4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 class LogicalUnit(object):
79 """Logical Unit base class.
81 Subclasses must follow these rules:
82 - implement ExpandNames
83 - implement CheckPrereq (except when tasklets are used)
84 - implement Exec (except when tasklets are used)
85 - implement BuildHooksEnv
86 - redefine HPATH and HTYPE
87 - optionally redefine their run requirements:
88 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90 Note that all commands require root permissions.
92 @ivar dry_run_result: the value (if any) that will be returned to the caller
93 in dry-run mode (signalled by opcode dry_run parameter)
100 def __init__(self, processor, op, context, rpc):
101 """Constructor for LogicalUnit.
103 This needs to be overridden in derived classes in order to check op
107 self.proc = processor
109 self.cfg = context.cfg
110 self.context = context
112 # Dicts used to declare locking needs to mcpu
113 self.needed_locks = None
114 self.acquired_locks = {}
115 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.remove_locks = {}
118 # Used to force good behavior when calling helper functions
119 self.recalculate_locks = {}
122 self.Log = processor.Log # pylint: disable-msg=C0103
123 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126 # support for dry-run
127 self.dry_run_result = None
128 # support for generic debug attribute
129 if (not hasattr(self.op, "debug_level") or
130 not isinstance(self.op.debug_level, int)):
131 self.op.debug_level = 0
136 # Validate opcode parameters and set defaults
137 self.op.Validate(True)
139 self.CheckArguments()
142 """Returns the SshRunner object
146 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
149 ssh = property(fget=__GetSSH)
151 def CheckArguments(self):
152 """Check syntactic validity for the opcode arguments.
154 This method is for doing a simple syntactic check and ensure
155 validity of opcode parameters, without any cluster-related
156 checks. While the same can be accomplished in ExpandNames and/or
157 CheckPrereq, doing these separate is better because:
159 - ExpandNames is left as as purely a lock-related function
160 - CheckPrereq is run after we have acquired locks (and possible
163 The function is allowed to change the self.op attribute so that
164 later methods can no longer worry about missing parameters.
169 def ExpandNames(self):
170 """Expand names for this LU.
172 This method is called before starting to execute the opcode, and it should
173 update all the parameters of the opcode to their canonical form (e.g. a
174 short node name must be fully expanded after this method has successfully
175 completed). This way locking, hooks, logging, etc. can work correctly.
177 LUs which implement this method must also populate the self.needed_locks
178 member, as a dict with lock levels as keys, and a list of needed lock names
181 - use an empty dict if you don't need any lock
182 - if you don't need any lock at a particular level omit that level
183 - don't put anything for the BGL level
184 - if you want all locks at a level use locking.ALL_SET as a value
186 If you need to share locks (rather than acquire them exclusively) at one
187 level you can modify self.share_locks, setting a true value (usually 1) for
188 that level. By default locks are not shared.
190 This function can also define a list of tasklets, which then will be
191 executed in order instead of the usual LU-level CheckPrereq and Exec
192 functions, if those are not defined by the LU.
196 # Acquire all nodes and one instance
197 self.needed_locks = {
198 locking.LEVEL_NODE: locking.ALL_SET,
199 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 # Acquire just two nodes
202 self.needed_locks = {
203 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206 self.needed_locks = {} # No, you can't leave it to the default value None
209 # The implementation of this method is mandatory only if the new LU is
210 # concurrent, so that old LUs don't need to be changed all at the same
213 self.needed_locks = {} # Exclusive LUs don't need locks.
215 raise NotImplementedError
217 def DeclareLocks(self, level):
218 """Declare LU locking needs for a level
220 While most LUs can just declare their locking needs at ExpandNames time,
221 sometimes there's the need to calculate some locks after having acquired
222 the ones before. This function is called just before acquiring locks at a
223 particular level, but after acquiring the ones at lower levels, and permits
224 such calculations. It can be used to modify self.needed_locks, and by
225 default it does nothing.
227 This function is only called if you have something already set in
228 self.needed_locks for the level.
230 @param level: Locking level which is going to be locked
231 @type level: member of ganeti.locking.LEVELS
235 def CheckPrereq(self):
236 """Check prerequisites for this LU.
238 This method should check that the prerequisites for the execution
239 of this LU are fulfilled. It can do internode communication, but
240 it should be idempotent - no cluster or system changes are
243 The method should raise errors.OpPrereqError in case something is
244 not fulfilled. Its return value is ignored.
246 This method should also update all the parameters of the opcode to
247 their canonical form if it hasn't been done by ExpandNames before.
250 if self.tasklets is not None:
251 for (idx, tl) in enumerate(self.tasklets):
252 logging.debug("Checking prerequisites for tasklet %s/%s",
253 idx + 1, len(self.tasklets))
258 def Exec(self, feedback_fn):
261 This method should implement the actual work. It should raise
262 errors.OpExecError for failures that are somewhat dealt with in
266 if self.tasklets is not None:
267 for (idx, tl) in enumerate(self.tasklets):
268 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271 raise NotImplementedError
273 def BuildHooksEnv(self):
274 """Build hooks environment for this LU.
276 This method should return a three-node tuple consisting of: a dict
277 containing the environment that will be used for running the
278 specific hook for this LU, a list of node names on which the hook
279 should run before the execution, and a list of node names on which
280 the hook should run after the execution.
282 The keys of the dict must not have 'GANETI_' prefixed as this will
283 be handled in the hooks runner. Also note additional keys will be
284 added by the hooks runner. If the LU doesn't define any
285 environment, an empty dict (and not None) should be returned.
287 No nodes should be returned as an empty list (and not None).
289 Note that if the HPATH for a LU class is None, this function will
293 raise NotImplementedError
295 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296 """Notify the LU about the results of its hooks.
298 This method is called every time a hooks phase is executed, and notifies
299 the Logical Unit about the hooks' result. The LU can then use it to alter
300 its result based on the hooks. By default the method does nothing and the
301 previous result is passed back unchanged but any LU can define it if it
302 wants to use the local cluster hook-scripts somehow.
304 @param phase: one of L{constants.HOOKS_PHASE_POST} or
305 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306 @param hook_results: the results of the multi-node hooks rpc call
307 @param feedback_fn: function used send feedback back to the caller
308 @param lu_result: the previous Exec result this LU had, or None
310 @return: the new Exec result, based on the previous result
314 # API must be kept, thus we ignore the unused argument and could
315 # be a function warnings
316 # pylint: disable-msg=W0613,R0201
319 def _ExpandAndLockInstance(self):
320 """Helper function to expand and lock an instance.
322 Many LUs that work on an instance take its name in self.op.instance_name
323 and need to expand it and then declare the expanded name for locking. This
324 function does it, and then updates self.op.instance_name to the expanded
325 name. It also initializes needed_locks as a dict, if this hasn't been done
329 if self.needed_locks is None:
330 self.needed_locks = {}
332 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333 "_ExpandAndLockInstance called with instance-level locks set"
334 self.op.instance_name = _ExpandInstanceName(self.cfg,
335 self.op.instance_name)
336 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338 def _LockInstancesNodes(self, primary_only=False):
339 """Helper function to declare instances' nodes for locking.
341 This function should be called after locking one or more instances to lock
342 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343 with all primary or secondary nodes for instances already locked and
344 present in self.needed_locks[locking.LEVEL_INSTANCE].
346 It should be called from DeclareLocks, and for safety only works if
347 self.recalculate_locks[locking.LEVEL_NODE] is set.
349 In the future it may grow parameters to just lock some instance's nodes, or
350 to just lock primaries or secondary nodes, if needed.
352 If should be called in DeclareLocks in a way similar to::
354 if level == locking.LEVEL_NODE:
355 self._LockInstancesNodes()
357 @type primary_only: boolean
358 @param primary_only: only lock primary nodes of locked instances
361 assert locking.LEVEL_NODE in self.recalculate_locks, \
362 "_LockInstancesNodes helper function called with no nodes to recalculate"
364 # TODO: check if we're really been called with the instance locks held
366 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367 # future we might want to have different behaviors depending on the value
368 # of self.recalculate_locks[locking.LEVEL_NODE]
370 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371 instance = self.context.cfg.GetInstanceInfo(instance_name)
372 wanted_nodes.append(instance.primary_node)
374 wanted_nodes.extend(instance.secondary_nodes)
376 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381 del self.recalculate_locks[locking.LEVEL_NODE]
384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385 """Simple LU which runs no hooks.
387 This LU is intended as a parent for other LogicalUnits which will
388 run no hooks, in order to reduce duplicate code.
394 def BuildHooksEnv(self):
395 """Empty BuildHooksEnv for NoHooksLu.
397 This just raises an error.
400 assert False, "BuildHooksEnv called for NoHooksLUs"
404 """Tasklet base class.
406 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407 they can mix legacy code with tasklets. Locking needs to be done in the LU,
408 tasklets know nothing about locks.
410 Subclasses must follow these rules:
411 - Implement CheckPrereq
415 def __init__(self, lu):
422 def CheckPrereq(self):
423 """Check prerequisites for this tasklets.
425 This method should check whether the prerequisites for the execution of
426 this tasklet are fulfilled. It can do internode communication, but it
427 should be idempotent - no cluster or system changes are allowed.
429 The method should raise errors.OpPrereqError in case something is not
430 fulfilled. Its return value is ignored.
432 This method should also update all parameters to their canonical form if it
433 hasn't been done before.
438 def Exec(self, feedback_fn):
439 """Execute the tasklet.
441 This method should implement the actual work. It should raise
442 errors.OpExecError for failures that are somewhat dealt with in code, or
446 raise NotImplementedError
450 """Base for query utility classes.
453 #: Attribute holding field definitions
456 def __init__(self, names, fields, use_locking):
457 """Initializes this class.
461 self.use_locking = use_locking
463 self.query = query.Query(self.FIELDS, fields)
464 self.requested_data = self.query.RequestedData()
466 self.do_locking = None
469 def _GetNames(self, lu, all_names, lock_level):
470 """Helper function to determine names asked for in the query.
474 names = lu.acquired_locks[lock_level]
478 if self.wanted == locking.ALL_SET:
479 assert not self.names
480 # caller didn't specify names, so ordering is not important
481 return utils.NiceSort(names)
483 # caller specified names and we must keep the same order
485 assert not self.do_locking or lu.acquired_locks[lock_level]
487 missing = set(self.wanted).difference(names)
489 raise errors.OpExecError("Some items were removed before retrieving"
490 " their data: %s" % missing)
492 # Return expanded names
496 def FieldsQuery(cls, fields):
497 """Returns list of available fields.
499 @return: List of L{objects.QueryFieldDefinition}
502 return query.QueryFields(cls.FIELDS, fields)
504 def ExpandNames(self, lu):
505 """Expand names for this query.
507 See L{LogicalUnit.ExpandNames}.
510 raise NotImplementedError()
512 def DeclareLocks(self, lu, level):
513 """Declare locks for this query.
515 See L{LogicalUnit.DeclareLocks}.
518 raise NotImplementedError()
520 def _GetQueryData(self, lu):
521 """Collects all data for this query.
523 @return: Query data object
526 raise NotImplementedError()
528 def NewStyleQuery(self, lu):
529 """Collect data and execute query.
532 return query.GetQueryResponse(self.query, self._GetQueryData(lu))
534 def OldStyleQuery(self, lu):
535 """Collect data and execute query.
538 return self.query.OldStyleQuery(self._GetQueryData(lu))
541 def _GetWantedNodes(lu, nodes):
542 """Returns list of checked and expanded node names.
544 @type lu: L{LogicalUnit}
545 @param lu: the logical unit on whose behalf we execute
547 @param nodes: list of node names or None for all nodes
549 @return: the list of nodes, sorted
550 @raise errors.ProgrammerError: if the nodes parameter is wrong type
554 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
556 return utils.NiceSort(lu.cfg.GetNodeList())
559 def _GetWantedInstances(lu, instances):
560 """Returns list of checked and expanded instance names.
562 @type lu: L{LogicalUnit}
563 @param lu: the logical unit on whose behalf we execute
564 @type instances: list
565 @param instances: list of instance names or None for all instances
567 @return: the list of instances, sorted
568 @raise errors.OpPrereqError: if the instances parameter is wrong type
569 @raise errors.OpPrereqError: if any of the passed instances is not found
573 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
575 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
579 def _GetUpdatedParams(old_params, update_dict,
580 use_default=True, use_none=False):
581 """Return the new version of a parameter dictionary.
583 @type old_params: dict
584 @param old_params: old parameters
585 @type update_dict: dict
586 @param update_dict: dict containing new parameter values, or
587 constants.VALUE_DEFAULT to reset the parameter to its default
589 @param use_default: boolean
590 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591 values as 'to be deleted' values
592 @param use_none: boolean
593 @type use_none: whether to recognise C{None} values as 'to be
596 @return: the new parameter dictionary
599 params_copy = copy.deepcopy(old_params)
600 for key, val in update_dict.iteritems():
601 if ((use_default and val == constants.VALUE_DEFAULT) or
602 (use_none and val is None)):
608 params_copy[key] = val
612 def _CheckOutputFields(static, dynamic, selected):
613 """Checks whether all selected fields are valid.
615 @type static: L{utils.FieldSet}
616 @param static: static fields set
617 @type dynamic: L{utils.FieldSet}
618 @param dynamic: dynamic fields set
625 delta = f.NonMatching(selected)
627 raise errors.OpPrereqError("Unknown output fields selected: %s"
628 % ",".join(delta), errors.ECODE_INVAL)
631 def _CheckGlobalHvParams(params):
632 """Validates that given hypervisor params are not global ones.
634 This will ensure that instances don't get customised versions of
638 used_globals = constants.HVC_GLOBALS.intersection(params)
640 msg = ("The following hypervisor parameters are global and cannot"
641 " be customized at instance level, please modify them at"
642 " cluster level: %s" % utils.CommaJoin(used_globals))
643 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
646 def _CheckNodeOnline(lu, node, msg=None):
647 """Ensure that a given node is online.
649 @param lu: the LU on behalf of which we make the check
650 @param node: the node to check
651 @param msg: if passed, should be a message to replace the default one
652 @raise errors.OpPrereqError: if the node is offline
656 msg = "Can't use offline node"
657 if lu.cfg.GetNodeInfo(node).offline:
658 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
661 def _CheckNodeNotDrained(lu, node):
662 """Ensure that a given node is not drained.
664 @param lu: the LU on behalf of which we make the check
665 @param node: the node to check
666 @raise errors.OpPrereqError: if the node is drained
669 if lu.cfg.GetNodeInfo(node).drained:
670 raise errors.OpPrereqError("Can't use drained node %s" % node,
674 def _CheckNodeVmCapable(lu, node):
675 """Ensure that a given node is vm capable.
677 @param lu: the LU on behalf of which we make the check
678 @param node: the node to check
679 @raise errors.OpPrereqError: if the node is not vm capable
682 if not lu.cfg.GetNodeInfo(node).vm_capable:
683 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
688 """Ensure that a node supports a given OS.
690 @param lu: the LU on behalf of which we make the check
691 @param node: the node to check
692 @param os_name: the OS to query about
693 @param force_variant: whether to ignore variant errors
694 @raise errors.OpPrereqError: if the node is not supporting the OS
697 result = lu.rpc.call_os_get(node, os_name)
698 result.Raise("OS '%s' not in supported OS list for node %s" %
700 prereq=True, ecode=errors.ECODE_INVAL)
701 if not force_variant:
702 _CheckOSVariant(result.payload, os_name)
705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706 """Ensure that a node has the given secondary ip.
708 @type lu: L{LogicalUnit}
709 @param lu: the LU on behalf of which we make the check
711 @param node: the node to check
712 @type secondary_ip: string
713 @param secondary_ip: the ip to check
714 @type prereq: boolean
715 @param prereq: whether to throw a prerequisite or an execute error
716 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
720 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721 result.Raise("Failure checking secondary ip on node %s" % node,
722 prereq=prereq, ecode=errors.ECODE_ENVIRON)
723 if not result.payload:
724 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725 " please fix and re-run this command" % secondary_ip)
727 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
729 raise errors.OpExecError(msg)
732 def _GetClusterDomainSecret():
733 """Reads the cluster domain secret.
736 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
740 def _CheckInstanceDown(lu, instance, reason):
741 """Ensure that an instance is not running."""
742 if instance.admin_up:
743 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744 (instance.name, reason), errors.ECODE_STATE)
746 pnode = instance.primary_node
747 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748 ins_l.Raise("Can't contact node %s for instance information" % pnode,
749 prereq=True, ecode=errors.ECODE_ENVIRON)
751 if instance.name in ins_l.payload:
752 raise errors.OpPrereqError("Instance %s is running, %s" %
753 (instance.name, reason), errors.ECODE_STATE)
756 def _ExpandItemName(fn, name, kind):
757 """Expand an item name.
759 @param fn: the function to use for expansion
760 @param name: requested item name
761 @param kind: text description ('Node' or 'Instance')
762 @return: the resolved (full) name
763 @raise errors.OpPrereqError: if the item is not found
767 if full_name is None:
768 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
773 def _ExpandNodeName(cfg, name):
774 """Wrapper over L{_ExpandItemName} for nodes."""
775 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
778 def _ExpandInstanceName(cfg, name):
779 """Wrapper over L{_ExpandItemName} for instance."""
780 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784 memory, vcpus, nics, disk_template, disks,
785 bep, hvp, hypervisor_name):
786 """Builds instance related env variables for hooks
788 This builds the hook environment from individual variables.
791 @param name: the name of the instance
792 @type primary_node: string
793 @param primary_node: the name of the instance's primary node
794 @type secondary_nodes: list
795 @param secondary_nodes: list of secondary nodes as strings
796 @type os_type: string
797 @param os_type: the name of the instance's OS
798 @type status: boolean
799 @param status: the should_run status of the instance
801 @param memory: the memory size of the instance
803 @param vcpus: the count of VCPUs the instance has
805 @param nics: list of tuples (ip, mac, mode, link) representing
806 the NICs the instance has
807 @type disk_template: string
808 @param disk_template: the disk template of the instance
810 @param disks: the list of (size, mode) pairs
812 @param bep: the backend parameters for the instance
814 @param hvp: the hypervisor parameters for the instance
815 @type hypervisor_name: string
816 @param hypervisor_name: the hypervisor for the instance
818 @return: the hook environment for this instance
827 "INSTANCE_NAME": name,
828 "INSTANCE_PRIMARY": primary_node,
829 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830 "INSTANCE_OS_TYPE": os_type,
831 "INSTANCE_STATUS": str_status,
832 "INSTANCE_MEMORY": memory,
833 "INSTANCE_VCPUS": vcpus,
834 "INSTANCE_DISK_TEMPLATE": disk_template,
835 "INSTANCE_HYPERVISOR": hypervisor_name,
839 nic_count = len(nics)
840 for idx, (ip, mac, mode, link) in enumerate(nics):
843 env["INSTANCE_NIC%d_IP" % idx] = ip
844 env["INSTANCE_NIC%d_MAC" % idx] = mac
845 env["INSTANCE_NIC%d_MODE" % idx] = mode
846 env["INSTANCE_NIC%d_LINK" % idx] = link
847 if mode == constants.NIC_MODE_BRIDGED:
848 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
852 env["INSTANCE_NIC_COUNT"] = nic_count
855 disk_count = len(disks)
856 for idx, (size, mode) in enumerate(disks):
857 env["INSTANCE_DISK%d_SIZE" % idx] = size
858 env["INSTANCE_DISK%d_MODE" % idx] = mode
862 env["INSTANCE_DISK_COUNT"] = disk_count
864 for source, kind in [(bep, "BE"), (hvp, "HV")]:
865 for key, value in source.items():
866 env["INSTANCE_%s_%s" % (kind, key)] = value
871 def _NICListToTuple(lu, nics):
872 """Build a list of nic information tuples.
874 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875 value in LUInstanceQueryData.
877 @type lu: L{LogicalUnit}
878 @param lu: the logical unit on whose behalf we execute
879 @type nics: list of L{objects.NIC}
880 @param nics: list of nics to convert to hooks tuples
884 cluster = lu.cfg.GetClusterInfo()
888 filled_params = cluster.SimpleFillNIC(nic.nicparams)
889 mode = filled_params[constants.NIC_MODE]
890 link = filled_params[constants.NIC_LINK]
891 hooks_nics.append((ip, mac, mode, link))
895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896 """Builds instance related env variables for hooks from an object.
898 @type lu: L{LogicalUnit}
899 @param lu: the logical unit on whose behalf we execute
900 @type instance: L{objects.Instance}
901 @param instance: the instance for which we should build the
904 @param override: dictionary with key/values that will override
907 @return: the hook environment dictionary
910 cluster = lu.cfg.GetClusterInfo()
911 bep = cluster.FillBE(instance)
912 hvp = cluster.FillHV(instance)
914 'name': instance.name,
915 'primary_node': instance.primary_node,
916 'secondary_nodes': instance.secondary_nodes,
917 'os_type': instance.os,
918 'status': instance.admin_up,
919 'memory': bep[constants.BE_MEMORY],
920 'vcpus': bep[constants.BE_VCPUS],
921 'nics': _NICListToTuple(lu, instance.nics),
922 'disk_template': instance.disk_template,
923 'disks': [(disk.size, disk.mode) for disk in instance.disks],
926 'hypervisor_name': instance.hypervisor,
929 args.update(override)
930 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
933 def _AdjustCandidatePool(lu, exceptions):
934 """Adjust the candidate pool after node operations.
937 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
939 lu.LogInfo("Promoted nodes to master candidate role: %s",
940 utils.CommaJoin(node.name for node in mod_list))
941 for name in mod_list:
942 lu.context.ReaddNode(name)
943 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
945 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
949 def _DecideSelfPromotion(lu, exceptions=None):
950 """Decide whether I should promote myself as a master candidate.
953 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955 # the new node will increase mc_max with one, so:
956 mc_should = min(mc_should + 1, cp_size)
957 return mc_now < mc_should
960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
961 """Check that the brigdes needed by a list of nics exist.
964 cluster = lu.cfg.GetClusterInfo()
965 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966 brlist = [params[constants.NIC_LINK] for params in paramslist
967 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
969 result = lu.rpc.call_bridges_exist(target_node, brlist)
970 result.Raise("Error checking bridges on destination node '%s'" %
971 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
974 def _CheckInstanceBridgesExist(lu, instance, node=None):
975 """Check that the brigdes needed by an instance exist.
979 node = instance.primary_node
980 _CheckNicsBridgesExist(lu, instance.nics, node)
983 def _CheckOSVariant(os_obj, name):
984 """Check whether an OS name conforms to the os variants specification.
986 @type os_obj: L{objects.OS}
987 @param os_obj: OS object to check
989 @param name: OS name passed by the user, to check for validity
992 if not os_obj.supported_variants:
994 variant = objects.OS.GetVariant(name)
996 raise errors.OpPrereqError("OS name must include a variant",
999 if variant not in os_obj.supported_variants:
1000 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1003 def _GetNodeInstancesInner(cfg, fn):
1004 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1007 def _GetNodeInstances(cfg, node_name):
1008 """Returns a list of all primary and secondary instances on a node.
1012 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016 """Returns primary instances on a node.
1019 return _GetNodeInstancesInner(cfg,
1020 lambda inst: node_name == inst.primary_node)
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024 """Returns secondary instances on a node.
1027 return _GetNodeInstancesInner(cfg,
1028 lambda inst: node_name in inst.secondary_nodes)
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032 """Returns the arguments for a storage type.
1035 # Special case for file storage
1036 if storage_type == constants.ST_FILE:
1037 # storage.FileStorage wants a list of storage directories
1038 return [[cfg.GetFileStorageDir()]]
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1046 for dev in instance.disks:
1047 cfg.SetDiskID(dev, node_name)
1049 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050 result.Raise("Failed to get disk status from node %s" % node_name,
1051 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053 for idx, bdev_status in enumerate(result.payload):
1054 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061 """Check the sanity of iallocator and node arguments and use the
1062 cluster-wide iallocator if appropriate.
1064 Check that at most one of (iallocator, node) is specified. If none is
1065 specified, then the LU's opcode's iallocator slot is filled with the
1066 cluster-wide default iallocator.
1068 @type iallocator_slot: string
1069 @param iallocator_slot: the name of the opcode iallocator slot
1070 @type node_slot: string
1071 @param node_slot: the name of the opcode target node slot
1074 node = getattr(lu.op, node_slot, None)
1075 iallocator = getattr(lu.op, iallocator_slot, None)
1077 if node is not None and iallocator is not None:
1078 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080 elif node is None and iallocator is None:
1081 default_iallocator = lu.cfg.GetDefaultIAllocator()
1082 if default_iallocator:
1083 setattr(lu.op, iallocator_slot, default_iallocator)
1085 raise errors.OpPrereqError("No iallocator or node given and no"
1086 " cluster-wide default iallocator found."
1087 " Please specify either an iallocator or a"
1088 " node, or set a cluster-wide default"
1092 class LUClusterPostInit(LogicalUnit):
1093 """Logical unit for running hooks after cluster initialization.
1096 HPATH = "cluster-init"
1097 HTYPE = constants.HTYPE_CLUSTER
1099 def BuildHooksEnv(self):
1103 env = {"OP_TARGET": self.cfg.GetClusterName()}
1104 mn = self.cfg.GetMasterNode()
1105 return env, [], [mn]
1107 def Exec(self, feedback_fn):
1114 class LUClusterDestroy(LogicalUnit):
1115 """Logical unit for destroying the cluster.
1118 HPATH = "cluster-destroy"
1119 HTYPE = constants.HTYPE_CLUSTER
1121 def BuildHooksEnv(self):
1125 env = {"OP_TARGET": self.cfg.GetClusterName()}
1128 def CheckPrereq(self):
1129 """Check prerequisites.
1131 This checks whether the cluster is empty.
1133 Any errors are signaled by raising errors.OpPrereqError.
1136 master = self.cfg.GetMasterNode()
1138 nodelist = self.cfg.GetNodeList()
1139 if len(nodelist) != 1 or nodelist[0] != master:
1140 raise errors.OpPrereqError("There are still %d node(s) in"
1141 " this cluster." % (len(nodelist) - 1),
1143 instancelist = self.cfg.GetInstanceList()
1145 raise errors.OpPrereqError("There are still %d instance(s) in"
1146 " this cluster." % len(instancelist),
1149 def Exec(self, feedback_fn):
1150 """Destroys the cluster.
1153 master = self.cfg.GetMasterNode()
1155 # Run post hooks on master node before it's removed
1156 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1158 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1160 # pylint: disable-msg=W0702
1161 self.LogWarning("Errors occurred running hooks on %s" % master)
1163 result = self.rpc.call_node_stop_master(master, False)
1164 result.Raise("Could not disable the master role")
1169 def _VerifyCertificate(filename):
1170 """Verifies a certificate for LUClusterVerify.
1172 @type filename: string
1173 @param filename: Path to PEM file
1177 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178 utils.ReadFile(filename))
1179 except Exception, err: # pylint: disable-msg=W0703
1180 return (LUClusterVerify.ETYPE_ERROR,
1181 "Failed to load X509 certificate %s: %s" % (filename, err))
1184 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185 constants.SSL_CERT_EXPIRATION_ERROR)
1188 fnamemsg = "While verifying %s: %s" % (filename, msg)
1193 return (None, fnamemsg)
1194 elif errcode == utils.CERT_WARNING:
1195 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196 elif errcode == utils.CERT_ERROR:
1197 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1202 class LUClusterVerify(LogicalUnit):
1203 """Verifies the cluster status.
1206 HPATH = "cluster-verify"
1207 HTYPE = constants.HTYPE_CLUSTER
1210 TCLUSTER = "cluster"
1212 TINSTANCE = "instance"
1214 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223 ENODEDRBD = (TNODE, "ENODEDRBD")
1224 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227 ENODEHV = (TNODE, "ENODEHV")
1228 ENODELVM = (TNODE, "ENODELVM")
1229 ENODEN1 = (TNODE, "ENODEN1")
1230 ENODENET = (TNODE, "ENODENET")
1231 ENODEOS = (TNODE, "ENODEOS")
1232 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234 ENODERPC = (TNODE, "ENODERPC")
1235 ENODESSH = (TNODE, "ENODESSH")
1236 ENODEVERSION = (TNODE, "ENODEVERSION")
1237 ENODESETUP = (TNODE, "ENODESETUP")
1238 ENODETIME = (TNODE, "ENODETIME")
1239 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241 ETYPE_FIELD = "code"
1242 ETYPE_ERROR = "ERROR"
1243 ETYPE_WARNING = "WARNING"
1245 _HOOKS_INDENT_RE = re.compile("^", re.M)
1247 class NodeImage(object):
1248 """A class representing the logical and physical status of a node.
1251 @ivar name: the node name to which this object refers
1252 @ivar volumes: a structure as returned from
1253 L{ganeti.backend.GetVolumeList} (runtime)
1254 @ivar instances: a list of running instances (runtime)
1255 @ivar pinst: list of configured primary instances (config)
1256 @ivar sinst: list of configured secondary instances (config)
1257 @ivar sbp: dictionary of {primary-node: list of instances} for all
1258 instances for which this node is secondary (config)
1259 @ivar mfree: free memory, as reported by hypervisor (runtime)
1260 @ivar dfree: free disk, as reported by the node (runtime)
1261 @ivar offline: the offline status (config)
1262 @type rpc_fail: boolean
1263 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264 not whether the individual keys were correct) (runtime)
1265 @type lvm_fail: boolean
1266 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267 @type hyp_fail: boolean
1268 @ivar hyp_fail: whether the RPC call didn't return the instance list
1269 @type ghost: boolean
1270 @ivar ghost: whether this is a known node or not (config)
1271 @type os_fail: boolean
1272 @ivar os_fail: whether the RPC call didn't return valid OS data
1274 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275 @type vm_capable: boolean
1276 @ivar vm_capable: whether the node can host instances
1279 def __init__(self, offline=False, name=None, vm_capable=True):
1288 self.offline = offline
1289 self.vm_capable = vm_capable
1290 self.rpc_fail = False
1291 self.lvm_fail = False
1292 self.hyp_fail = False
1294 self.os_fail = False
1297 def ExpandNames(self):
1298 self.needed_locks = {
1299 locking.LEVEL_NODE: locking.ALL_SET,
1300 locking.LEVEL_INSTANCE: locking.ALL_SET,
1302 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1304 def _Error(self, ecode, item, msg, *args, **kwargs):
1305 """Format an error message.
1307 Based on the opcode's error_codes parameter, either format a
1308 parseable error code, or a simpler error string.
1310 This must be called only from Exec and functions called from Exec.
1313 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315 # first complete the msg
1318 # then format the whole message
1319 if self.op.error_codes:
1320 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1326 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327 # and finally report it via the feedback_fn
1328 self._feedback_fn(" - %s" % msg)
1330 def _ErrorIf(self, cond, *args, **kwargs):
1331 """Log an error message if the passed condition is True.
1334 cond = bool(cond) or self.op.debug_simulate_errors
1336 self._Error(*args, **kwargs)
1337 # do not mark the operation as failed for WARN cases only
1338 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339 self.bad = self.bad or cond
1341 def _VerifyNode(self, ninfo, nresult):
1342 """Perform some basic validation on data returned from a node.
1344 - check the result data structure is well formed and has all the
1346 - check ganeti version
1348 @type ninfo: L{objects.Node}
1349 @param ninfo: the node to check
1350 @param nresult: the results from the node
1352 @return: whether overall this call was successful (and we can expect
1353 reasonable values in the respose)
1357 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1359 # main result, nresult should be a non-empty dict
1360 test = not nresult or not isinstance(nresult, dict)
1361 _ErrorIf(test, self.ENODERPC, node,
1362 "unable to verify node: no data returned")
1366 # compares ganeti version
1367 local_version = constants.PROTOCOL_VERSION
1368 remote_version = nresult.get("version", None)
1369 test = not (remote_version and
1370 isinstance(remote_version, (list, tuple)) and
1371 len(remote_version) == 2)
1372 _ErrorIf(test, self.ENODERPC, node,
1373 "connection to node returned invalid data")
1377 test = local_version != remote_version[0]
1378 _ErrorIf(test, self.ENODEVERSION, node,
1379 "incompatible protocol versions: master %s,"
1380 " node %s", local_version, remote_version[0])
1384 # node seems compatible, we can actually try to look into its results
1386 # full package version
1387 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388 self.ENODEVERSION, node,
1389 "software version mismatch: master %s, node %s",
1390 constants.RELEASE_VERSION, remote_version[1],
1391 code=self.ETYPE_WARNING)
1393 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394 if ninfo.vm_capable and isinstance(hyp_result, dict):
1395 for hv_name, hv_result in hyp_result.iteritems():
1396 test = hv_result is not None
1397 _ErrorIf(test, self.ENODEHV, node,
1398 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401 if ninfo.vm_capable and isinstance(hvp_result, list):
1402 for item, hv_name, hv_result in hvp_result:
1403 _ErrorIf(True, self.ENODEHV, node,
1404 "hypervisor %s parameter verify failure (source %s): %s",
1405 hv_name, item, hv_result)
1407 test = nresult.get(constants.NV_NODESETUP,
1408 ["Missing NODESETUP results"])
1409 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1414 def _VerifyNodeTime(self, ninfo, nresult,
1415 nvinfo_starttime, nvinfo_endtime):
1416 """Check the node time.
1418 @type ninfo: L{objects.Node}
1419 @param ninfo: the node to check
1420 @param nresult: the remote results for the node
1421 @param nvinfo_starttime: the start time of the RPC call
1422 @param nvinfo_endtime: the end time of the RPC call
1426 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1428 ntime = nresult.get(constants.NV_TIME, None)
1430 ntime_merged = utils.MergeTime(ntime)
1431 except (ValueError, TypeError):
1432 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1435 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1442 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443 "Node time diverges by at least %s from master node time",
1446 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447 """Check the node time.
1449 @type ninfo: L{objects.Node}
1450 @param ninfo: the node to check
1451 @param nresult: the remote results for the node
1452 @param vg_name: the configured VG name
1459 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461 # checks vg existence and size > 20G
1462 vglist = nresult.get(constants.NV_VGLIST, None)
1464 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467 constants.MIN_VG_SIZE)
1468 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1471 pvlist = nresult.get(constants.NV_PVLIST, None)
1472 test = pvlist is None
1473 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475 # check that ':' is not present in PV names, since it's a
1476 # special character for lvcreate (denotes the range of PEs to
1478 for _, pvname, owner_vg in pvlist:
1479 test = ":" in pvname
1480 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481 " '%s' of VG '%s'", pvname, owner_vg)
1483 def _VerifyNodeNetwork(self, ninfo, nresult):
1484 """Check the node time.
1486 @type ninfo: L{objects.Node}
1487 @param ninfo: the node to check
1488 @param nresult: the remote results for the node
1492 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1494 test = constants.NV_NODELIST not in nresult
1495 _ErrorIf(test, self.ENODESSH, node,
1496 "node hasn't returned node ssh connectivity data")
1498 if nresult[constants.NV_NODELIST]:
1499 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500 _ErrorIf(True, self.ENODESSH, node,
1501 "ssh communication with node '%s': %s", a_node, a_msg)
1503 test = constants.NV_NODENETTEST not in nresult
1504 _ErrorIf(test, self.ENODENET, node,
1505 "node hasn't returned node tcp connectivity data")
1507 if nresult[constants.NV_NODENETTEST]:
1508 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1510 _ErrorIf(True, self.ENODENET, node,
1511 "tcp communication with node '%s': %s",
1512 anode, nresult[constants.NV_NODENETTEST][anode])
1514 test = constants.NV_MASTERIP not in nresult
1515 _ErrorIf(test, self.ENODENET, node,
1516 "node hasn't returned node master IP reachability data")
1518 if not nresult[constants.NV_MASTERIP]:
1519 if node == self.master_node:
1520 msg = "the master node cannot reach the master IP (not configured?)"
1522 msg = "cannot reach the master IP"
1523 _ErrorIf(True, self.ENODENET, node, msg)
1525 def _VerifyInstance(self, instance, instanceconfig, node_image,
1527 """Verify an instance.
1529 This function checks to see if the required block devices are
1530 available on the instance's node.
1533 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534 node_current = instanceconfig.primary_node
1536 node_vol_should = {}
1537 instanceconfig.MapLVsByNode(node_vol_should)
1539 for node in node_vol_should:
1540 n_img = node_image[node]
1541 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542 # ignore missing volumes on offline or broken nodes
1544 for volume in node_vol_should[node]:
1545 test = volume not in n_img.volumes
1546 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547 "volume %s missing on node %s", volume, node)
1549 if instanceconfig.admin_up:
1550 pri_img = node_image[node_current]
1551 test = instance not in pri_img.instances and not pri_img.offline
1552 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553 "instance not running on its primary node %s",
1556 for node, n_img in node_image.items():
1557 if (not node == node_current):
1558 test = instance in n_img.instances
1559 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560 "instance should not run on node %s", node)
1562 diskdata = [(nname, success, status, idx)
1563 for (nname, disks) in diskstatus.items()
1564 for idx, (success, status) in enumerate(disks)]
1566 for nname, success, bdev_status, idx in diskdata:
1567 _ErrorIf(instanceconfig.admin_up and not success,
1568 self.EINSTANCEFAULTYDISK, instance,
1569 "couldn't retrieve status for disk/%s on %s: %s",
1570 idx, nname, bdev_status)
1571 _ErrorIf((instanceconfig.admin_up and success and
1572 bdev_status.ldisk_status == constants.LDS_FAULTY),
1573 self.EINSTANCEFAULTYDISK, instance,
1574 "disk/%s on %s is faulty", idx, nname)
1576 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1577 """Verify if there are any unknown volumes in the cluster.
1579 The .os, .swap and backup volumes are ignored. All other volumes are
1580 reported as unknown.
1582 @type reserved: L{ganeti.utils.FieldSet}
1583 @param reserved: a FieldSet of reserved volume names
1586 for node, n_img in node_image.items():
1587 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1588 # skip non-healthy nodes
1590 for volume in n_img.volumes:
1591 test = ((node not in node_vol_should or
1592 volume not in node_vol_should[node]) and
1593 not reserved.Matches(volume))
1594 self._ErrorIf(test, self.ENODEORPHANLV, node,
1595 "volume %s is unknown", volume)
1597 def _VerifyOrphanInstances(self, instancelist, node_image):
1598 """Verify the list of running instances.
1600 This checks what instances are running but unknown to the cluster.
1603 for node, n_img in node_image.items():
1604 for o_inst in n_img.instances:
1605 test = o_inst not in instancelist
1606 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1607 "instance %s on node %s should not exist", o_inst, node)
1609 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1610 """Verify N+1 Memory Resilience.
1612 Check that if one single node dies we can still start all the
1613 instances it was primary for.
1616 for node, n_img in node_image.items():
1617 # This code checks that every node which is now listed as
1618 # secondary has enough memory to host all instances it is
1619 # supposed to should a single other node in the cluster fail.
1620 # FIXME: not ready for failover to an arbitrary node
1621 # FIXME: does not support file-backed instances
1622 # WARNING: we currently take into account down instances as well
1623 # as up ones, considering that even if they're down someone
1624 # might want to start them even in the event of a node failure.
1625 for prinode, instances in n_img.sbp.items():
1627 for instance in instances:
1628 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1629 if bep[constants.BE_AUTO_BALANCE]:
1630 needed_mem += bep[constants.BE_MEMORY]
1631 test = n_img.mfree < needed_mem
1632 self._ErrorIf(test, self.ENODEN1, node,
1633 "not enough memory to accomodate instance failovers"
1634 " should node %s fail", prinode)
1636 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1638 """Verifies and computes the node required file checksums.
1640 @type ninfo: L{objects.Node}
1641 @param ninfo: the node to check
1642 @param nresult: the remote results for the node
1643 @param file_list: required list of files
1644 @param local_cksum: dictionary of local files and their checksums
1645 @param master_files: list of files that only masters should have
1649 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1651 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1652 test = not isinstance(remote_cksum, dict)
1653 _ErrorIf(test, self.ENODEFILECHECK, node,
1654 "node hasn't returned file checksum data")
1658 for file_name in file_list:
1659 node_is_mc = ninfo.master_candidate
1660 must_have = (file_name not in master_files) or node_is_mc
1662 test1 = file_name not in remote_cksum
1664 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1666 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1667 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1668 "file '%s' missing", file_name)
1669 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1670 "file '%s' has wrong checksum", file_name)
1671 # not candidate and this is not a must-have file
1672 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1673 "file '%s' should not exist on non master"
1674 " candidates (and the file is outdated)", file_name)
1675 # all good, except non-master/non-must have combination
1676 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1677 "file '%s' should not exist"
1678 " on non master candidates", file_name)
1680 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1682 """Verifies and the node DRBD status.
1684 @type ninfo: L{objects.Node}
1685 @param ninfo: the node to check
1686 @param nresult: the remote results for the node
1687 @param instanceinfo: the dict of instances
1688 @param drbd_helper: the configured DRBD usermode helper
1689 @param drbd_map: the DRBD map as returned by
1690 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1694 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1697 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1698 test = (helper_result == None)
1699 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1700 "no drbd usermode helper returned")
1702 status, payload = helper_result
1704 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1705 "drbd usermode helper check unsuccessful: %s", payload)
1706 test = status and (payload != drbd_helper)
1707 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1708 "wrong drbd usermode helper: %s", payload)
1710 # compute the DRBD minors
1712 for minor, instance in drbd_map[node].items():
1713 test = instance not in instanceinfo
1714 _ErrorIf(test, self.ECLUSTERCFG, None,
1715 "ghost instance '%s' in temporary DRBD map", instance)
1716 # ghost instance should not be running, but otherwise we
1717 # don't give double warnings (both ghost instance and
1718 # unallocated minor in use)
1720 node_drbd[minor] = (instance, False)
1722 instance = instanceinfo[instance]
1723 node_drbd[minor] = (instance.name, instance.admin_up)
1725 # and now check them
1726 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1727 test = not isinstance(used_minors, (tuple, list))
1728 _ErrorIf(test, self.ENODEDRBD, node,
1729 "cannot parse drbd status file: %s", str(used_minors))
1731 # we cannot check drbd status
1734 for minor, (iname, must_exist) in node_drbd.items():
1735 test = minor not in used_minors and must_exist
1736 _ErrorIf(test, self.ENODEDRBD, node,
1737 "drbd minor %d of instance %s is not active", minor, iname)
1738 for minor in used_minors:
1739 test = minor not in node_drbd
1740 _ErrorIf(test, self.ENODEDRBD, node,
1741 "unallocated drbd minor %d is in use", minor)
1743 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1744 """Builds the node OS structures.
1746 @type ninfo: L{objects.Node}
1747 @param ninfo: the node to check
1748 @param nresult: the remote results for the node
1749 @param nimg: the node image object
1753 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1755 remote_os = nresult.get(constants.NV_OSLIST, None)
1756 test = (not isinstance(remote_os, list) or
1757 not compat.all(isinstance(v, list) and len(v) == 7
1758 for v in remote_os))
1760 _ErrorIf(test, self.ENODEOS, node,
1761 "node hasn't returned valid OS data")
1770 for (name, os_path, status, diagnose,
1771 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1773 if name not in os_dict:
1776 # parameters is a list of lists instead of list of tuples due to
1777 # JSON lacking a real tuple type, fix it:
1778 parameters = [tuple(v) for v in parameters]
1779 os_dict[name].append((os_path, status, diagnose,
1780 set(variants), set(parameters), set(api_ver)))
1782 nimg.oslist = os_dict
1784 def _VerifyNodeOS(self, ninfo, nimg, base):
1785 """Verifies the node OS list.
1787 @type ninfo: L{objects.Node}
1788 @param ninfo: the node to check
1789 @param nimg: the node image object
1790 @param base: the 'template' node we match against (e.g. from the master)
1794 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1796 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1798 for os_name, os_data in nimg.oslist.items():
1799 assert os_data, "Empty OS status for OS %s?!" % os_name
1800 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1801 _ErrorIf(not f_status, self.ENODEOS, node,
1802 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1803 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1804 "OS '%s' has multiple entries (first one shadows the rest): %s",
1805 os_name, utils.CommaJoin([v[0] for v in os_data]))
1806 # this will catched in backend too
1807 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1808 and not f_var, self.ENODEOS, node,
1809 "OS %s with API at least %d does not declare any variant",
1810 os_name, constants.OS_API_V15)
1811 # comparisons with the 'base' image
1812 test = os_name not in base.oslist
1813 _ErrorIf(test, self.ENODEOS, node,
1814 "Extra OS %s not present on reference node (%s)",
1818 assert base.oslist[os_name], "Base node has empty OS status?"
1819 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1821 # base OS is invalid, skipping
1823 for kind, a, b in [("API version", f_api, b_api),
1824 ("variants list", f_var, b_var),
1825 ("parameters", f_param, b_param)]:
1826 _ErrorIf(a != b, self.ENODEOS, node,
1827 "OS %s %s differs from reference node %s: %s vs. %s",
1828 kind, os_name, base.name,
1829 utils.CommaJoin(a), utils.CommaJoin(b))
1831 # check any missing OSes
1832 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1833 _ErrorIf(missing, self.ENODEOS, node,
1834 "OSes present on reference node %s but missing on this node: %s",
1835 base.name, utils.CommaJoin(missing))
1837 def _VerifyOob(self, ninfo, nresult):
1838 """Verifies out of band functionality of a node.
1840 @type ninfo: L{objects.Node}
1841 @param ninfo: the node to check
1842 @param nresult: the remote results for the node
1846 # We just have to verify the paths on master and/or master candidates
1847 # as the oob helper is invoked on the master
1848 if ((ninfo.master_candidate or ninfo.master_capable) and
1849 constants.NV_OOB_PATHS in nresult):
1850 for path_result in nresult[constants.NV_OOB_PATHS]:
1851 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1853 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1854 """Verifies and updates the node volume data.
1856 This function will update a L{NodeImage}'s internal structures
1857 with data from the remote call.
1859 @type ninfo: L{objects.Node}
1860 @param ninfo: the node to check
1861 @param nresult: the remote results for the node
1862 @param nimg: the node image object
1863 @param vg_name: the configured VG name
1867 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1869 nimg.lvm_fail = True
1870 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1873 elif isinstance(lvdata, basestring):
1874 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1875 utils.SafeEncode(lvdata))
1876 elif not isinstance(lvdata, dict):
1877 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1879 nimg.volumes = lvdata
1880 nimg.lvm_fail = False
1882 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1883 """Verifies and updates the node instance list.
1885 If the listing was successful, then updates this node's instance
1886 list. Otherwise, it marks the RPC call as failed for the instance
1889 @type ninfo: L{objects.Node}
1890 @param ninfo: the node to check
1891 @param nresult: the remote results for the node
1892 @param nimg: the node image object
1895 idata = nresult.get(constants.NV_INSTANCELIST, None)
1896 test = not isinstance(idata, list)
1897 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1898 " (instancelist): %s", utils.SafeEncode(str(idata)))
1900 nimg.hyp_fail = True
1902 nimg.instances = idata
1904 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1905 """Verifies and computes a node information map
1907 @type ninfo: L{objects.Node}
1908 @param ninfo: the node to check
1909 @param nresult: the remote results for the node
1910 @param nimg: the node image object
1911 @param vg_name: the configured VG name
1915 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1917 # try to read free memory (from the hypervisor)
1918 hv_info = nresult.get(constants.NV_HVINFO, None)
1919 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1920 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1923 nimg.mfree = int(hv_info["memory_free"])
1924 except (ValueError, TypeError):
1925 _ErrorIf(True, self.ENODERPC, node,
1926 "node returned invalid nodeinfo, check hypervisor")
1928 # FIXME: devise a free space model for file based instances as well
1929 if vg_name is not None:
1930 test = (constants.NV_VGLIST not in nresult or
1931 vg_name not in nresult[constants.NV_VGLIST])
1932 _ErrorIf(test, self.ENODELVM, node,
1933 "node didn't return data for the volume group '%s'"
1934 " - it is either missing or broken", vg_name)
1937 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1938 except (ValueError, TypeError):
1939 _ErrorIf(True, self.ENODERPC, node,
1940 "node returned invalid LVM info, check LVM status")
1942 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1943 """Gets per-disk status information for all instances.
1945 @type nodelist: list of strings
1946 @param nodelist: Node names
1947 @type node_image: dict of (name, L{objects.Node})
1948 @param node_image: Node objects
1949 @type instanceinfo: dict of (name, L{objects.Instance})
1950 @param instanceinfo: Instance objects
1951 @rtype: {instance: {node: [(succes, payload)]}}
1952 @return: a dictionary of per-instance dictionaries with nodes as
1953 keys and disk information as values; the disk information is a
1954 list of tuples (success, payload)
1957 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1960 node_disks_devonly = {}
1961 diskless_instances = set()
1962 diskless = constants.DT_DISKLESS
1964 for nname in nodelist:
1965 node_instances = list(itertools.chain(node_image[nname].pinst,
1966 node_image[nname].sinst))
1967 diskless_instances.update(inst for inst in node_instances
1968 if instanceinfo[inst].disk_template == diskless)
1969 disks = [(inst, disk)
1970 for inst in node_instances
1971 for disk in instanceinfo[inst].disks]
1974 # No need to collect data
1977 node_disks[nname] = disks
1979 # Creating copies as SetDiskID below will modify the objects and that can
1980 # lead to incorrect data returned from nodes
1981 devonly = [dev.Copy() for (_, dev) in disks]
1984 self.cfg.SetDiskID(dev, nname)
1986 node_disks_devonly[nname] = devonly
1988 assert len(node_disks) == len(node_disks_devonly)
1990 # Collect data from all nodes with disks
1991 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1994 assert len(result) == len(node_disks)
1998 for (nname, nres) in result.items():
1999 disks = node_disks[nname]
2002 # No data from this node
2003 data = len(disks) * [(False, "node offline")]
2006 _ErrorIf(msg, self.ENODERPC, nname,
2007 "while getting disk information: %s", msg)
2009 # No data from this node
2010 data = len(disks) * [(False, msg)]
2013 for idx, i in enumerate(nres.payload):
2014 if isinstance(i, (tuple, list)) and len(i) == 2:
2017 logging.warning("Invalid result from node %s, entry %d: %s",
2019 data.append((False, "Invalid result from the remote node"))
2021 for ((inst, _), status) in zip(disks, data):
2022 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2024 # Add empty entries for diskless instances.
2025 for inst in diskless_instances:
2026 assert inst not in instdisk
2029 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2030 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2031 compat.all(isinstance(s, (tuple, list)) and
2032 len(s) == 2 for s in statuses)
2033 for inst, nnames in instdisk.items()
2034 for nname, statuses in nnames.items())
2035 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2039 def _VerifyHVP(self, hvp_data):
2040 """Verifies locally the syntax of the hypervisor parameters.
2043 for item, hv_name, hv_params in hvp_data:
2044 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2047 hv_class = hypervisor.GetHypervisor(hv_name)
2048 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2049 hv_class.CheckParameterSyntax(hv_params)
2050 except errors.GenericError, err:
2051 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2054 def BuildHooksEnv(self):
2057 Cluster-Verify hooks just ran in the post phase and their failure makes
2058 the output be logged in the verify output and the verification to fail.
2061 all_nodes = self.cfg.GetNodeList()
2063 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2065 for node in self.cfg.GetAllNodesInfo().values():
2066 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2068 return env, [], all_nodes
2070 def Exec(self, feedback_fn):
2071 """Verify integrity of cluster, performing various test on nodes.
2074 # This method has too many local variables. pylint: disable-msg=R0914
2076 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2077 verbose = self.op.verbose
2078 self._feedback_fn = feedback_fn
2079 feedback_fn("* Verifying global settings")
2080 for msg in self.cfg.VerifyConfig():
2081 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2083 # Check the cluster certificates
2084 for cert_filename in constants.ALL_CERT_FILES:
2085 (errcode, msg) = _VerifyCertificate(cert_filename)
2086 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2088 vg_name = self.cfg.GetVGName()
2089 drbd_helper = self.cfg.GetDRBDHelper()
2090 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2091 cluster = self.cfg.GetClusterInfo()
2092 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2093 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2094 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2095 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2096 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2097 for iname in instancelist)
2098 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2099 i_non_redundant = [] # Non redundant instances
2100 i_non_a_balanced = [] # Non auto-balanced instances
2101 n_offline = 0 # Count of offline nodes
2102 n_drained = 0 # Count of nodes being drained
2103 node_vol_should = {}
2105 # FIXME: verify OS list
2106 # do local checksums
2107 master_files = [constants.CLUSTER_CONF_FILE]
2108 master_node = self.master_node = self.cfg.GetMasterNode()
2109 master_ip = self.cfg.GetMasterIP()
2111 file_names = ssconf.SimpleStore().GetFileList()
2112 file_names.extend(constants.ALL_CERT_FILES)
2113 file_names.extend(master_files)
2114 if cluster.modify_etc_hosts:
2115 file_names.append(constants.ETC_HOSTS)
2117 local_checksums = utils.FingerprintFiles(file_names)
2119 # Compute the set of hypervisor parameters
2121 for hv_name in hypervisors:
2122 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2123 for os_name, os_hvp in cluster.os_hvp.items():
2124 for hv_name, hv_params in os_hvp.items():
2127 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2128 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2129 # TODO: collapse identical parameter values in a single one
2130 for instance in instanceinfo.values():
2131 if not instance.hvparams:
2133 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2134 cluster.FillHV(instance)))
2135 # and verify them locally
2136 self._VerifyHVP(hvp_data)
2138 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2139 node_verify_param = {
2140 constants.NV_FILELIST: file_names,
2141 constants.NV_NODELIST: [node.name for node in nodeinfo
2142 if not node.offline],
2143 constants.NV_HYPERVISOR: hypervisors,
2144 constants.NV_HVPARAMS: hvp_data,
2145 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2146 node.secondary_ip) for node in nodeinfo
2147 if not node.offline],
2148 constants.NV_INSTANCELIST: hypervisors,
2149 constants.NV_VERSION: None,
2150 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2151 constants.NV_NODESETUP: None,
2152 constants.NV_TIME: None,
2153 constants.NV_MASTERIP: (master_node, master_ip),
2154 constants.NV_OSLIST: None,
2155 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2158 if vg_name is not None:
2159 node_verify_param[constants.NV_VGLIST] = None
2160 node_verify_param[constants.NV_LVLIST] = vg_name
2161 node_verify_param[constants.NV_PVLIST] = [vg_name]
2162 node_verify_param[constants.NV_DRBDLIST] = None
2165 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2167 # Build our expected cluster state
2168 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2170 vm_capable=node.vm_capable))
2171 for node in nodeinfo)
2175 for node in nodeinfo:
2176 path = _SupportsOob(self.cfg, node)
2177 if path and path not in oob_paths:
2178 oob_paths.append(path)
2181 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2183 for instance in instancelist:
2184 inst_config = instanceinfo[instance]
2186 for nname in inst_config.all_nodes:
2187 if nname not in node_image:
2189 gnode = self.NodeImage(name=nname)
2191 node_image[nname] = gnode
2193 inst_config.MapLVsByNode(node_vol_should)
2195 pnode = inst_config.primary_node
2196 node_image[pnode].pinst.append(instance)
2198 for snode in inst_config.secondary_nodes:
2199 nimg = node_image[snode]
2200 nimg.sinst.append(instance)
2201 if pnode not in nimg.sbp:
2202 nimg.sbp[pnode] = []
2203 nimg.sbp[pnode].append(instance)
2205 # At this point, we have the in-memory data structures complete,
2206 # except for the runtime information, which we'll gather next
2208 # Due to the way our RPC system works, exact response times cannot be
2209 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2210 # time before and after executing the request, we can at least have a time
2212 nvinfo_starttime = time.time()
2213 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2214 self.cfg.GetClusterName())
2215 nvinfo_endtime = time.time()
2217 all_drbd_map = self.cfg.ComputeDRBDMap()
2219 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2220 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2222 feedback_fn("* Verifying node status")
2226 for node_i in nodeinfo:
2228 nimg = node_image[node]
2232 feedback_fn("* Skipping offline node %s" % (node,))
2236 if node == master_node:
2238 elif node_i.master_candidate:
2239 ntype = "master candidate"
2240 elif node_i.drained:
2246 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2248 msg = all_nvinfo[node].fail_msg
2249 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2251 nimg.rpc_fail = True
2254 nresult = all_nvinfo[node].payload
2256 nimg.call_ok = self._VerifyNode(node_i, nresult)
2257 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2258 self._VerifyNodeNetwork(node_i, nresult)
2259 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2262 self._VerifyOob(node_i, nresult)
2265 self._VerifyNodeLVM(node_i, nresult, vg_name)
2266 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2269 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2270 self._UpdateNodeInstances(node_i, nresult, nimg)
2271 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2272 self._UpdateNodeOS(node_i, nresult, nimg)
2273 if not nimg.os_fail:
2274 if refos_img is None:
2276 self._VerifyNodeOS(node_i, nimg, refos_img)
2278 feedback_fn("* Verifying instance status")
2279 for instance in instancelist:
2281 feedback_fn("* Verifying instance %s" % instance)
2282 inst_config = instanceinfo[instance]
2283 self._VerifyInstance(instance, inst_config, node_image,
2285 inst_nodes_offline = []
2287 pnode = inst_config.primary_node
2288 pnode_img = node_image[pnode]
2289 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2290 self.ENODERPC, pnode, "instance %s, connection to"
2291 " primary node failed", instance)
2293 if pnode_img.offline:
2294 inst_nodes_offline.append(pnode)
2296 # If the instance is non-redundant we cannot survive losing its primary
2297 # node, so we are not N+1 compliant. On the other hand we have no disk
2298 # templates with more than one secondary so that situation is not well
2300 # FIXME: does not support file-backed instances
2301 if not inst_config.secondary_nodes:
2302 i_non_redundant.append(instance)
2304 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2305 instance, "instance has multiple secondary nodes: %s",
2306 utils.CommaJoin(inst_config.secondary_nodes),
2307 code=self.ETYPE_WARNING)
2309 if inst_config.disk_template in constants.DTS_NET_MIRROR:
2310 pnode = inst_config.primary_node
2311 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2312 instance_groups = {}
2314 for node in instance_nodes:
2315 instance_groups.setdefault(nodeinfo_byname[node].group,
2319 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2320 # Sort so that we always list the primary node first.
2321 for group, nodes in sorted(instance_groups.items(),
2322 key=lambda (_, nodes): pnode in nodes,
2325 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2326 instance, "instance has primary and secondary nodes in"
2327 " different groups: %s", utils.CommaJoin(pretty_list),
2328 code=self.ETYPE_WARNING)
2330 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2331 i_non_a_balanced.append(instance)
2333 for snode in inst_config.secondary_nodes:
2334 s_img = node_image[snode]
2335 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2336 "instance %s, connection to secondary node failed", instance)
2339 inst_nodes_offline.append(snode)
2341 # warn that the instance lives on offline nodes
2342 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2343 "instance lives on offline node(s) %s",
2344 utils.CommaJoin(inst_nodes_offline))
2345 # ... or ghost/non-vm_capable nodes
2346 for node in inst_config.all_nodes:
2347 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2348 "instance lives on ghost node %s", node)
2349 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2350 instance, "instance lives on non-vm_capable node %s", node)
2352 feedback_fn("* Verifying orphan volumes")
2353 reserved = utils.FieldSet(*cluster.reserved_lvs)
2354 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2356 feedback_fn("* Verifying orphan instances")
2357 self._VerifyOrphanInstances(instancelist, node_image)
2359 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2360 feedback_fn("* Verifying N+1 Memory redundancy")
2361 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2363 feedback_fn("* Other Notes")
2365 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2366 % len(i_non_redundant))
2368 if i_non_a_balanced:
2369 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2370 % len(i_non_a_balanced))
2373 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2376 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2380 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2381 """Analyze the post-hooks' result
2383 This method analyses the hook result, handles it, and sends some
2384 nicely-formatted feedback back to the user.
2386 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2387 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2388 @param hooks_results: the results of the multi-node hooks rpc call
2389 @param feedback_fn: function used send feedback back to the caller
2390 @param lu_result: previous Exec result
2391 @return: the new Exec result, based on the previous result
2395 # We only really run POST phase hooks, and are only interested in
2397 if phase == constants.HOOKS_PHASE_POST:
2398 # Used to change hooks' output to proper indentation
2399 feedback_fn("* Hooks Results")
2400 assert hooks_results, "invalid result from hooks"
2402 for node_name in hooks_results:
2403 res = hooks_results[node_name]
2405 test = msg and not res.offline
2406 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2407 "Communication failure in hooks execution: %s", msg)
2408 if res.offline or msg:
2409 # No need to investigate payload if node is offline or gave an error.
2410 # override manually lu_result here as _ErrorIf only
2411 # overrides self.bad
2414 for script, hkr, output in res.payload:
2415 test = hkr == constants.HKR_FAIL
2416 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2417 "Script %s failed, output:", script)
2419 output = self._HOOKS_INDENT_RE.sub(' ', output)
2420 feedback_fn("%s" % output)
2426 class LUClusterVerifyDisks(NoHooksLU):
2427 """Verifies the cluster disks status.
2432 def ExpandNames(self):
2433 self.needed_locks = {
2434 locking.LEVEL_NODE: locking.ALL_SET,
2435 locking.LEVEL_INSTANCE: locking.ALL_SET,
2437 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2439 def Exec(self, feedback_fn):
2440 """Verify integrity of cluster disks.
2442 @rtype: tuple of three items
2443 @return: a tuple of (dict of node-to-node_error, list of instances
2444 which need activate-disks, dict of instance: (node, volume) for
2448 result = res_nodes, res_instances, res_missing = {}, [], {}
2450 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2451 instances = self.cfg.GetAllInstancesInfo().values()
2454 for inst in instances:
2456 if not inst.admin_up:
2458 inst.MapLVsByNode(inst_lvs)
2459 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2460 for node, vol_list in inst_lvs.iteritems():
2461 for vol in vol_list:
2462 nv_dict[(node, vol)] = inst
2467 node_lvs = self.rpc.call_lv_list(nodes, [])
2468 for node, node_res in node_lvs.items():
2469 if node_res.offline:
2471 msg = node_res.fail_msg
2473 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2474 res_nodes[node] = msg
2477 lvs = node_res.payload
2478 for lv_name, (_, _, lv_online) in lvs.items():
2479 inst = nv_dict.pop((node, lv_name), None)
2480 if (not lv_online and inst is not None
2481 and inst.name not in res_instances):
2482 res_instances.append(inst.name)
2484 # any leftover items in nv_dict are missing LVs, let's arrange the
2486 for key, inst in nv_dict.iteritems():
2487 if inst.name not in res_missing:
2488 res_missing[inst.name] = []
2489 res_missing[inst.name].append(key)
2494 class LUClusterRepairDiskSizes(NoHooksLU):
2495 """Verifies the cluster disks sizes.
2500 def ExpandNames(self):
2501 if self.op.instances:
2502 self.wanted_names = []
2503 for name in self.op.instances:
2504 full_name = _ExpandInstanceName(self.cfg, name)
2505 self.wanted_names.append(full_name)
2506 self.needed_locks = {
2507 locking.LEVEL_NODE: [],
2508 locking.LEVEL_INSTANCE: self.wanted_names,
2510 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2512 self.wanted_names = None
2513 self.needed_locks = {
2514 locking.LEVEL_NODE: locking.ALL_SET,
2515 locking.LEVEL_INSTANCE: locking.ALL_SET,
2517 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2519 def DeclareLocks(self, level):
2520 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2521 self._LockInstancesNodes(primary_only=True)
2523 def CheckPrereq(self):
2524 """Check prerequisites.
2526 This only checks the optional instance list against the existing names.
2529 if self.wanted_names is None:
2530 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2532 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2533 in self.wanted_names]
2535 def _EnsureChildSizes(self, disk):
2536 """Ensure children of the disk have the needed disk size.
2538 This is valid mainly for DRBD8 and fixes an issue where the
2539 children have smaller disk size.
2541 @param disk: an L{ganeti.objects.Disk} object
2544 if disk.dev_type == constants.LD_DRBD8:
2545 assert disk.children, "Empty children for DRBD8?"
2546 fchild = disk.children[0]
2547 mismatch = fchild.size < disk.size
2549 self.LogInfo("Child disk has size %d, parent %d, fixing",
2550 fchild.size, disk.size)
2551 fchild.size = disk.size
2553 # and we recurse on this child only, not on the metadev
2554 return self._EnsureChildSizes(fchild) or mismatch
2558 def Exec(self, feedback_fn):
2559 """Verify the size of cluster disks.
2562 # TODO: check child disks too
2563 # TODO: check differences in size between primary/secondary nodes
2565 for instance in self.wanted_instances:
2566 pnode = instance.primary_node
2567 if pnode not in per_node_disks:
2568 per_node_disks[pnode] = []
2569 for idx, disk in enumerate(instance.disks):
2570 per_node_disks[pnode].append((instance, idx, disk))
2573 for node, dskl in per_node_disks.items():
2574 newl = [v[2].Copy() for v in dskl]
2576 self.cfg.SetDiskID(dsk, node)
2577 result = self.rpc.call_blockdev_getsizes(node, newl)
2579 self.LogWarning("Failure in blockdev_getsizes call to node"
2580 " %s, ignoring", node)
2582 if len(result.data) != len(dskl):
2583 self.LogWarning("Invalid result from node %s, ignoring node results",
2586 for ((instance, idx, disk), size) in zip(dskl, result.data):
2588 self.LogWarning("Disk %d of instance %s did not return size"
2589 " information, ignoring", idx, instance.name)
2591 if not isinstance(size, (int, long)):
2592 self.LogWarning("Disk %d of instance %s did not return valid"
2593 " size information, ignoring", idx, instance.name)
2596 if size != disk.size:
2597 self.LogInfo("Disk %d of instance %s has mismatched size,"
2598 " correcting: recorded %d, actual %d", idx,
2599 instance.name, disk.size, size)
2601 self.cfg.Update(instance, feedback_fn)
2602 changed.append((instance.name, idx, size))
2603 if self._EnsureChildSizes(disk):
2604 self.cfg.Update(instance, feedback_fn)
2605 changed.append((instance.name, idx, disk.size))
2609 class LUClusterRename(LogicalUnit):
2610 """Rename the cluster.
2613 HPATH = "cluster-rename"
2614 HTYPE = constants.HTYPE_CLUSTER
2616 def BuildHooksEnv(self):
2621 "OP_TARGET": self.cfg.GetClusterName(),
2622 "NEW_NAME": self.op.name,
2624 mn = self.cfg.GetMasterNode()
2625 all_nodes = self.cfg.GetNodeList()
2626 return env, [mn], all_nodes
2628 def CheckPrereq(self):
2629 """Verify that the passed name is a valid one.
2632 hostname = netutils.GetHostname(name=self.op.name,
2633 family=self.cfg.GetPrimaryIPFamily())
2635 new_name = hostname.name
2636 self.ip = new_ip = hostname.ip
2637 old_name = self.cfg.GetClusterName()
2638 old_ip = self.cfg.GetMasterIP()
2639 if new_name == old_name and new_ip == old_ip:
2640 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2641 " cluster has changed",
2643 if new_ip != old_ip:
2644 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2645 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2646 " reachable on the network" %
2647 new_ip, errors.ECODE_NOTUNIQUE)
2649 self.op.name = new_name
2651 def Exec(self, feedback_fn):
2652 """Rename the cluster.
2655 clustername = self.op.name
2658 # shutdown the master IP
2659 master = self.cfg.GetMasterNode()
2660 result = self.rpc.call_node_stop_master(master, False)
2661 result.Raise("Could not disable the master role")
2664 cluster = self.cfg.GetClusterInfo()
2665 cluster.cluster_name = clustername
2666 cluster.master_ip = ip
2667 self.cfg.Update(cluster, feedback_fn)
2669 # update the known hosts file
2670 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2671 node_list = self.cfg.GetOnlineNodeList()
2673 node_list.remove(master)
2676 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2678 result = self.rpc.call_node_start_master(master, False, False)
2679 msg = result.fail_msg
2681 self.LogWarning("Could not re-enable the master role on"
2682 " the master, please restart manually: %s", msg)
2687 class LUClusterSetParams(LogicalUnit):
2688 """Change the parameters of the cluster.
2691 HPATH = "cluster-modify"
2692 HTYPE = constants.HTYPE_CLUSTER
2695 def CheckArguments(self):
2699 if self.op.uid_pool:
2700 uidpool.CheckUidPool(self.op.uid_pool)
2702 if self.op.add_uids:
2703 uidpool.CheckUidPool(self.op.add_uids)
2705 if self.op.remove_uids:
2706 uidpool.CheckUidPool(self.op.remove_uids)
2708 def ExpandNames(self):
2709 # FIXME: in the future maybe other cluster params won't require checking on
2710 # all nodes to be modified.
2711 self.needed_locks = {
2712 locking.LEVEL_NODE: locking.ALL_SET,
2714 self.share_locks[locking.LEVEL_NODE] = 1
2716 def BuildHooksEnv(self):
2721 "OP_TARGET": self.cfg.GetClusterName(),
2722 "NEW_VG_NAME": self.op.vg_name,
2724 mn = self.cfg.GetMasterNode()
2725 return env, [mn], [mn]
2727 def CheckPrereq(self):
2728 """Check prerequisites.
2730 This checks whether the given params don't conflict and
2731 if the given volume group is valid.
2734 if self.op.vg_name is not None and not self.op.vg_name:
2735 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2736 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2737 " instances exist", errors.ECODE_INVAL)
2739 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2740 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2741 raise errors.OpPrereqError("Cannot disable drbd helper while"
2742 " drbd-based instances exist",
2745 node_list = self.acquired_locks[locking.LEVEL_NODE]
2747 # if vg_name not None, checks given volume group on all nodes
2749 vglist = self.rpc.call_vg_list(node_list)
2750 for node in node_list:
2751 msg = vglist[node].fail_msg
2753 # ignoring down node
2754 self.LogWarning("Error while gathering data on node %s"
2755 " (ignoring node): %s", node, msg)
2757 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2759 constants.MIN_VG_SIZE)
2761 raise errors.OpPrereqError("Error on node '%s': %s" %
2762 (node, vgstatus), errors.ECODE_ENVIRON)
2764 if self.op.drbd_helper:
2765 # checks given drbd helper on all nodes
2766 helpers = self.rpc.call_drbd_helper(node_list)
2767 for node in node_list:
2768 ninfo = self.cfg.GetNodeInfo(node)
2770 self.LogInfo("Not checking drbd helper on offline node %s", node)
2772 msg = helpers[node].fail_msg
2774 raise errors.OpPrereqError("Error checking drbd helper on node"
2775 " '%s': %s" % (node, msg),
2776 errors.ECODE_ENVIRON)
2777 node_helper = helpers[node].payload
2778 if node_helper != self.op.drbd_helper:
2779 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2780 (node, node_helper), errors.ECODE_ENVIRON)
2782 self.cluster = cluster = self.cfg.GetClusterInfo()
2783 # validate params changes
2784 if self.op.beparams:
2785 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2786 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2788 if self.op.ndparams:
2789 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2790 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2792 if self.op.nicparams:
2793 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2794 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2795 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2798 # check all instances for consistency
2799 for instance in self.cfg.GetAllInstancesInfo().values():
2800 for nic_idx, nic in enumerate(instance.nics):
2801 params_copy = copy.deepcopy(nic.nicparams)
2802 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2804 # check parameter syntax
2806 objects.NIC.CheckParameterSyntax(params_filled)
2807 except errors.ConfigurationError, err:
2808 nic_errors.append("Instance %s, nic/%d: %s" %
2809 (instance.name, nic_idx, err))
2811 # if we're moving instances to routed, check that they have an ip
2812 target_mode = params_filled[constants.NIC_MODE]
2813 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2814 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2815 (instance.name, nic_idx))
2817 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2818 "\n".join(nic_errors))
2820 # hypervisor list/parameters
2821 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2822 if self.op.hvparams:
2823 for hv_name, hv_dict in self.op.hvparams.items():
2824 if hv_name not in self.new_hvparams:
2825 self.new_hvparams[hv_name] = hv_dict
2827 self.new_hvparams[hv_name].update(hv_dict)
2829 # os hypervisor parameters
2830 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2832 for os_name, hvs in self.op.os_hvp.items():
2833 if os_name not in self.new_os_hvp:
2834 self.new_os_hvp[os_name] = hvs
2836 for hv_name, hv_dict in hvs.items():
2837 if hv_name not in self.new_os_hvp[os_name]:
2838 self.new_os_hvp[os_name][hv_name] = hv_dict
2840 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2843 self.new_osp = objects.FillDict(cluster.osparams, {})
2844 if self.op.osparams:
2845 for os_name, osp in self.op.osparams.items():
2846 if os_name not in self.new_osp:
2847 self.new_osp[os_name] = {}
2849 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2852 if not self.new_osp[os_name]:
2853 # we removed all parameters
2854 del self.new_osp[os_name]
2856 # check the parameter validity (remote check)
2857 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2858 os_name, self.new_osp[os_name])
2860 # changes to the hypervisor list
2861 if self.op.enabled_hypervisors is not None:
2862 self.hv_list = self.op.enabled_hypervisors
2863 for hv in self.hv_list:
2864 # if the hypervisor doesn't already exist in the cluster
2865 # hvparams, we initialize it to empty, and then (in both
2866 # cases) we make sure to fill the defaults, as we might not
2867 # have a complete defaults list if the hypervisor wasn't
2869 if hv not in new_hvp:
2871 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2872 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2874 self.hv_list = cluster.enabled_hypervisors
2876 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2877 # either the enabled list has changed, or the parameters have, validate
2878 for hv_name, hv_params in self.new_hvparams.items():
2879 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2880 (self.op.enabled_hypervisors and
2881 hv_name in self.op.enabled_hypervisors)):
2882 # either this is a new hypervisor, or its parameters have changed
2883 hv_class = hypervisor.GetHypervisor(hv_name)
2884 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2885 hv_class.CheckParameterSyntax(hv_params)
2886 _CheckHVParams(self, node_list, hv_name, hv_params)
2889 # no need to check any newly-enabled hypervisors, since the
2890 # defaults have already been checked in the above code-block
2891 for os_name, os_hvp in self.new_os_hvp.items():
2892 for hv_name, hv_params in os_hvp.items():
2893 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2894 # we need to fill in the new os_hvp on top of the actual hv_p
2895 cluster_defaults = self.new_hvparams.get(hv_name, {})
2896 new_osp = objects.FillDict(cluster_defaults, hv_params)
2897 hv_class = hypervisor.GetHypervisor(hv_name)
2898 hv_class.CheckParameterSyntax(new_osp)
2899 _CheckHVParams(self, node_list, hv_name, new_osp)
2901 if self.op.default_iallocator:
2902 alloc_script = utils.FindFile(self.op.default_iallocator,
2903 constants.IALLOCATOR_SEARCH_PATH,
2905 if alloc_script is None:
2906 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2907 " specified" % self.op.default_iallocator,
2910 def Exec(self, feedback_fn):
2911 """Change the parameters of the cluster.
2914 if self.op.vg_name is not None:
2915 new_volume = self.op.vg_name
2918 if new_volume != self.cfg.GetVGName():
2919 self.cfg.SetVGName(new_volume)
2921 feedback_fn("Cluster LVM configuration already in desired"
2922 " state, not changing")
2923 if self.op.drbd_helper is not None:
2924 new_helper = self.op.drbd_helper
2927 if new_helper != self.cfg.GetDRBDHelper():
2928 self.cfg.SetDRBDHelper(new_helper)
2930 feedback_fn("Cluster DRBD helper already in desired state,"
2932 if self.op.hvparams:
2933 self.cluster.hvparams = self.new_hvparams
2935 self.cluster.os_hvp = self.new_os_hvp
2936 if self.op.enabled_hypervisors is not None:
2937 self.cluster.hvparams = self.new_hvparams
2938 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2939 if self.op.beparams:
2940 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2941 if self.op.nicparams:
2942 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2943 if self.op.osparams:
2944 self.cluster.osparams = self.new_osp
2945 if self.op.ndparams:
2946 self.cluster.ndparams = self.new_ndparams
2948 if self.op.candidate_pool_size is not None:
2949 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2950 # we need to update the pool size here, otherwise the save will fail
2951 _AdjustCandidatePool(self, [])
2953 if self.op.maintain_node_health is not None:
2954 self.cluster.maintain_node_health = self.op.maintain_node_health
2956 if self.op.prealloc_wipe_disks is not None:
2957 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2959 if self.op.add_uids is not None:
2960 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2962 if self.op.remove_uids is not None:
2963 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2965 if self.op.uid_pool is not None:
2966 self.cluster.uid_pool = self.op.uid_pool
2968 if self.op.default_iallocator is not None:
2969 self.cluster.default_iallocator = self.op.default_iallocator
2971 if self.op.reserved_lvs is not None:
2972 self.cluster.reserved_lvs = self.op.reserved_lvs
2974 def helper_os(aname, mods, desc):
2976 lst = getattr(self.cluster, aname)
2977 for key, val in mods:
2978 if key == constants.DDM_ADD:
2980 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2983 elif key == constants.DDM_REMOVE:
2987 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2989 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2991 if self.op.hidden_os:
2992 helper_os("hidden_os", self.op.hidden_os, "hidden")
2994 if self.op.blacklisted_os:
2995 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2997 if self.op.master_netdev:
2998 master = self.cfg.GetMasterNode()
2999 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3000 self.cluster.master_netdev)
3001 result = self.rpc.call_node_stop_master(master, False)
3002 result.Raise("Could not disable the master ip")
3003 feedback_fn("Changing master_netdev from %s to %s" %
3004 (self.cluster.master_netdev, self.op.master_netdev))
3005 self.cluster.master_netdev = self.op.master_netdev
3007 self.cfg.Update(self.cluster, feedback_fn)
3009 if self.op.master_netdev:
3010 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3011 self.op.master_netdev)
3012 result = self.rpc.call_node_start_master(master, False, False)
3014 self.LogWarning("Could not re-enable the master ip on"
3015 " the master, please restart manually: %s",
3019 def _UploadHelper(lu, nodes, fname):
3020 """Helper for uploading a file and showing warnings.
3023 if os.path.exists(fname):
3024 result = lu.rpc.call_upload_file(nodes, fname)
3025 for to_node, to_result in result.items():
3026 msg = to_result.fail_msg
3028 msg = ("Copy of file %s to node %s failed: %s" %
3029 (fname, to_node, msg))
3030 lu.proc.LogWarning(msg)
3033 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3034 """Distribute additional files which are part of the cluster configuration.
3036 ConfigWriter takes care of distributing the config and ssconf files, but
3037 there are more files which should be distributed to all nodes. This function
3038 makes sure those are copied.
3040 @param lu: calling logical unit
3041 @param additional_nodes: list of nodes not in the config to distribute to
3042 @type additional_vm: boolean
3043 @param additional_vm: whether the additional nodes are vm-capable or not
3046 # 1. Gather target nodes
3047 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3048 dist_nodes = lu.cfg.GetOnlineNodeList()
3049 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3050 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3051 if additional_nodes is not None:
3052 dist_nodes.extend(additional_nodes)
3054 vm_nodes.extend(additional_nodes)
3055 if myself.name in dist_nodes:
3056 dist_nodes.remove(myself.name)
3057 if myself.name in vm_nodes:
3058 vm_nodes.remove(myself.name)
3060 # 2. Gather files to distribute
3061 dist_files = set([constants.ETC_HOSTS,
3062 constants.SSH_KNOWN_HOSTS_FILE,
3063 constants.RAPI_CERT_FILE,
3064 constants.RAPI_USERS_FILE,
3065 constants.CONFD_HMAC_KEY,
3066 constants.CLUSTER_DOMAIN_SECRET_FILE,
3070 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3071 for hv_name in enabled_hypervisors:
3072 hv_class = hypervisor.GetHypervisor(hv_name)
3073 vm_files.update(hv_class.GetAncillaryFiles())
3075 # 3. Perform the files upload
3076 for fname in dist_files:
3077 _UploadHelper(lu, dist_nodes, fname)
3078 for fname in vm_files:
3079 _UploadHelper(lu, vm_nodes, fname)
3082 class LUClusterRedistConf(NoHooksLU):
3083 """Force the redistribution of cluster configuration.
3085 This is a very simple LU.
3090 def ExpandNames(self):
3091 self.needed_locks = {
3092 locking.LEVEL_NODE: locking.ALL_SET,
3094 self.share_locks[locking.LEVEL_NODE] = 1
3096 def Exec(self, feedback_fn):
3097 """Redistribute the configuration.
3100 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3101 _RedistributeAncillaryFiles(self)
3104 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3105 """Sleep and poll for an instance's disk to sync.
3108 if not instance.disks or disks is not None and not disks:
3111 disks = _ExpandCheckDisks(instance, disks)
3114 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3116 node = instance.primary_node
3119 lu.cfg.SetDiskID(dev, node)
3121 # TODO: Convert to utils.Retry
3124 degr_retries = 10 # in seconds, as we sleep 1 second each time
3128 cumul_degraded = False
3129 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3130 msg = rstats.fail_msg
3132 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3135 raise errors.RemoteError("Can't contact node %s for mirror data,"
3136 " aborting." % node)
3139 rstats = rstats.payload
3141 for i, mstat in enumerate(rstats):
3143 lu.LogWarning("Can't compute data for node %s/%s",
3144 node, disks[i].iv_name)
3147 cumul_degraded = (cumul_degraded or
3148 (mstat.is_degraded and mstat.sync_percent is None))
3149 if mstat.sync_percent is not None:
3151 if mstat.estimated_time is not None:
3152 rem_time = ("%s remaining (estimated)" %
3153 utils.FormatSeconds(mstat.estimated_time))
3154 max_time = mstat.estimated_time
3156 rem_time = "no time estimate"
3157 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3158 (disks[i].iv_name, mstat.sync_percent, rem_time))
3160 # if we're done but degraded, let's do a few small retries, to
3161 # make sure we see a stable and not transient situation; therefore
3162 # we force restart of the loop
3163 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3164 logging.info("Degraded disks found, %d retries left", degr_retries)
3172 time.sleep(min(60, max_time))
3175 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3176 return not cumul_degraded
3179 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3180 """Check that mirrors are not degraded.
3182 The ldisk parameter, if True, will change the test from the
3183 is_degraded attribute (which represents overall non-ok status for
3184 the device(s)) to the ldisk (representing the local storage status).
3187 lu.cfg.SetDiskID(dev, node)
3191 if on_primary or dev.AssembleOnSecondary():
3192 rstats = lu.rpc.call_blockdev_find(node, dev)
3193 msg = rstats.fail_msg
3195 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3197 elif not rstats.payload:
3198 lu.LogWarning("Can't find disk on node %s", node)
3202 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3204 result = result and not rstats.payload.is_degraded
3207 for child in dev.children:
3208 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3213 class LUOobCommand(NoHooksLU):
3214 """Logical unit for OOB handling.
3219 def CheckPrereq(self):
3220 """Check prerequisites.
3223 - the node exists in the configuration
3226 Any errors are signaled by raising errors.OpPrereqError.
3230 master_node = self.cfg.GetMasterNode()
3231 for node_name in self.op.node_names:
3232 node = self.cfg.GetNodeInfo(node_name)
3235 raise errors.OpPrereqError("Node %s not found" % node_name,
3238 self.nodes.append(node)
3240 if (not self.op.ignore_status and
3241 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3242 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3243 " not marked offline") % node_name,
3246 if self.op.command in (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE):
3247 # This does two things, it checks if master is in the list and if so and
3248 # force_master is set it puts it to the end so the master is done last
3250 self.op.node_names.remove(master_node)
3254 if self.op.force_master:
3255 self.op.node_names.append(master_node)
3257 self.LogWarning("Master %s was skipped, use the force master"
3258 " option to operate on the master too",
3260 if not self.op.node_names:
3261 raise errors.OpPrereqError("No nodes left to operate on, aborting",
3264 assert (master_node not in self.op.node_names or
3265 self.op.node_names[-1] == master_node)
3267 def ExpandNames(self):
3268 """Gather locks we need.
3271 if self.op.node_names:
3272 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3273 for name in self.op.node_names]
3275 self.op.node_names = self.cfg.GetNodeList()
3277 self.needed_locks = {
3278 locking.LEVEL_NODE: self.op.node_names,
3281 def Exec(self, feedback_fn):
3282 """Execute OOB and return result if we expect any.
3285 master_node = self.cfg.GetMasterNode()
3288 for node in self.nodes:
3289 node_entry = [(constants.RS_NORMAL, node.name)]
3290 ret.append(node_entry)
3292 oob_program = _SupportsOob(self.cfg, node)
3295 node_entry.append((constants.RS_UNAVAIL, None))
3298 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3299 self.op.command, oob_program, node.name)
3300 result = self.rpc.call_run_oob(master_node, oob_program,
3301 self.op.command, node.name,
3305 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3306 node.name, result.fail_msg)
3307 node_entry.append((constants.RS_NODATA, None))
3310 self._CheckPayload(result)
3311 except errors.OpExecError, err:
3312 self.LogWarning("The payload returned by '%s' is not valid: %s",
3314 node_entry.append((constants.RS_NODATA, None))
3316 if self.op.command == constants.OOB_HEALTH:
3317 # For health we should log important events
3318 for item, status in result.payload:
3319 if status in [constants.OOB_STATUS_WARNING,
3320 constants.OOB_STATUS_CRITICAL]:
3321 self.LogWarning("On node '%s' item '%s' has status '%s'",
3322 node.name, item, status)
3324 if self.op.command == constants.OOB_POWER_ON:
3326 elif self.op.command == constants.OOB_POWER_OFF:
3327 node.powered = False
3328 elif self.op.command == constants.OOB_POWER_STATUS:
3329 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3330 if powered != node.powered:
3331 logging.warning(("Recorded power state (%s) of node '%s' does not"
3332 " match actual power state (%s)"), node.powered,
3335 # For configuration changing commands we should update the node
3336 if self.op.command in (constants.OOB_POWER_ON,
3337 constants.OOB_POWER_OFF):
3338 self.cfg.Update(node, feedback_fn)
3340 node_entry.append((constants.RS_NORMAL, result.payload))
3344 def _CheckPayload(self, result):
3345 """Checks if the payload is valid.
3347 @param result: RPC result
3348 @raises errors.OpExecError: If payload is not valid
3352 if self.op.command == constants.OOB_HEALTH:
3353 if not isinstance(result.payload, list):
3354 errs.append("command 'health' is expected to return a list but got %s" %
3355 type(result.payload))
3357 for item, status in result.payload:
3358 if status not in constants.OOB_STATUSES:
3359 errs.append("health item '%s' has invalid status '%s'" %
3362 if self.op.command == constants.OOB_POWER_STATUS:
3363 if not isinstance(result.payload, dict):
3364 errs.append("power-status is expected to return a dict but got %s" %
3365 type(result.payload))
3367 if self.op.command in [
3368 constants.OOB_POWER_ON,
3369 constants.OOB_POWER_OFF,
3370 constants.OOB_POWER_CYCLE,
3372 if result.payload is not None:
3373 errs.append("%s is expected to not return payload but got '%s'" %
3374 (self.op.command, result.payload))
3377 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3378 utils.CommaJoin(errs))
3382 class LUOsDiagnose(NoHooksLU):
3383 """Logical unit for OS diagnose/query.
3388 _BLK = "blacklisted"
3390 _FIELDS_STATIC = utils.FieldSet()
3391 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3392 "parameters", "api_versions", _HID, _BLK)
3394 def CheckArguments(self):
3396 raise errors.OpPrereqError("Selective OS query not supported",
3399 _CheckOutputFields(static=self._FIELDS_STATIC,
3400 dynamic=self._FIELDS_DYNAMIC,
3401 selected=self.op.output_fields)
3403 def ExpandNames(self):
3404 # Lock all nodes, in shared mode
3405 # Temporary removal of locks, should be reverted later
3406 # TODO: reintroduce locks when they are lighter-weight
3407 self.needed_locks = {}
3408 #self.share_locks[locking.LEVEL_NODE] = 1
3409 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3412 def _DiagnoseByOS(rlist):
3413 """Remaps a per-node return list into an a per-os per-node dictionary
3415 @param rlist: a map with node names as keys and OS objects as values
3418 @return: a dictionary with osnames as keys and as value another
3419 map, with nodes as keys and tuples of (path, status, diagnose,
3420 variants, parameters, api_versions) as values, eg::
3422 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3423 (/srv/..., False, "invalid api")],
3424 "node2": [(/srv/..., True, "", [], [])]}
3429 # we build here the list of nodes that didn't fail the RPC (at RPC
3430 # level), so that nodes with a non-responding node daemon don't
3431 # make all OSes invalid
3432 good_nodes = [node_name for node_name in rlist
3433 if not rlist[node_name].fail_msg]
3434 for node_name, nr in rlist.items():
3435 if nr.fail_msg or not nr.payload:
3437 for (name, path, status, diagnose, variants,
3438 params, api_versions) in nr.payload:
3439 if name not in all_os:
3440 # build a list of nodes for this os containing empty lists
3441 # for each node in node_list
3443 for nname in good_nodes:
3444 all_os[name][nname] = []
3445 # convert params from [name, help] to (name, help)
3446 params = [tuple(v) for v in params]
3447 all_os[name][node_name].append((path, status, diagnose,
3448 variants, params, api_versions))
3451 def Exec(self, feedback_fn):
3452 """Compute the list of OSes.
3455 valid_nodes = [node.name
3456 for node in self.cfg.GetAllNodesInfo().values()
3457 if not node.offline and node.vm_capable]
3458 node_data = self.rpc.call_os_diagnose(valid_nodes)
3459 pol = self._DiagnoseByOS(node_data)
3461 cluster = self.cfg.GetClusterInfo()
3463 for os_name in utils.NiceSort(pol.keys()):
3464 os_data = pol[os_name]
3467 (variants, params, api_versions) = null_state = (set(), set(), set())
3468 for idx, osl in enumerate(os_data.values()):
3469 valid = bool(valid and osl and osl[0][1])
3471 (variants, params, api_versions) = null_state
3473 node_variants, node_params, node_api = osl[0][3:6]
3474 if idx == 0: # first entry
3475 variants = set(node_variants)
3476 params = set(node_params)
3477 api_versions = set(node_api)
3478 else: # keep consistency
3479 variants.intersection_update(node_variants)
3480 params.intersection_update(node_params)
3481 api_versions.intersection_update(node_api)
3483 is_hid = os_name in cluster.hidden_os
3484 is_blk = os_name in cluster.blacklisted_os
3485 if ((self._HID not in self.op.output_fields and is_hid) or
3486 (self._BLK not in self.op.output_fields and is_blk) or
3487 (self._VLD not in self.op.output_fields and not valid)):
3490 for field in self.op.output_fields:
3493 elif field == self._VLD:
3495 elif field == "node_status":
3496 # this is just a copy of the dict
3498 for node_name, nos_list in os_data.items():
3499 val[node_name] = nos_list
3500 elif field == "variants":
3501 val = utils.NiceSort(list(variants))
3502 elif field == "parameters":
3504 elif field == "api_versions":
3505 val = list(api_versions)
3506 elif field == self._HID:
3508 elif field == self._BLK:
3511 raise errors.ParameterError(field)
3518 class LUNodeRemove(LogicalUnit):
3519 """Logical unit for removing a node.
3522 HPATH = "node-remove"
3523 HTYPE = constants.HTYPE_NODE
3525 def BuildHooksEnv(self):
3528 This doesn't run on the target node in the pre phase as a failed
3529 node would then be impossible to remove.
3533 "OP_TARGET": self.op.node_name,
3534 "NODE_NAME": self.op.node_name,
3536 all_nodes = self.cfg.GetNodeList()
3538 all_nodes.remove(self.op.node_name)
3540 logging.warning("Node %s which is about to be removed not found"
3541 " in the all nodes list", self.op.node_name)
3542 return env, all_nodes, all_nodes
3544 def CheckPrereq(self):
3545 """Check prerequisites.
3548 - the node exists in the configuration
3549 - it does not have primary or secondary instances
3550 - it's not the master
3552 Any errors are signaled by raising errors.OpPrereqError.
3555 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3556 node = self.cfg.GetNodeInfo(self.op.node_name)
3557 assert node is not None
3559 instance_list = self.cfg.GetInstanceList()
3561 masternode = self.cfg.GetMasterNode()
3562 if node.name == masternode:
3563 raise errors.OpPrereqError("Node is the master node,"
3564 " you need to failover first.",
3567 for instance_name in instance_list:
3568 instance = self.cfg.GetInstanceInfo(instance_name)
3569 if node.name in instance.all_nodes:
3570 raise errors.OpPrereqError("Instance %s is still running on the node,"
3571 " please remove first." % instance_name,
3573 self.op.node_name = node.name
3576 def Exec(self, feedback_fn):
3577 """Removes the node from the cluster.
3581 logging.info("Stopping the node daemon and removing configs from node %s",
3584 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3586 # Promote nodes to master candidate as needed
3587 _AdjustCandidatePool(self, exceptions=[node.name])
3588 self.context.RemoveNode(node.name)
3590 # Run post hooks on the node before it's removed
3591 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3593 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3595 # pylint: disable-msg=W0702
3596 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3598 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3599 msg = result.fail_msg
3601 self.LogWarning("Errors encountered on the remote node while leaving"
3602 " the cluster: %s", msg)
3604 # Remove node from our /etc/hosts
3605 if self.cfg.GetClusterInfo().modify_etc_hosts:
3606 master_node = self.cfg.GetMasterNode()
3607 result = self.rpc.call_etc_hosts_modify(master_node,
3608 constants.ETC_HOSTS_REMOVE,
3610 result.Raise("Can't update hosts file with new host data")
3611 _RedistributeAncillaryFiles(self)
3614 class _NodeQuery(_QueryBase):
3615 FIELDS = query.NODE_FIELDS
3617 def ExpandNames(self, lu):
3618 lu.needed_locks = {}
3619 lu.share_locks[locking.LEVEL_NODE] = 1
3622 self.wanted = _GetWantedNodes(lu, self.names)
3624 self.wanted = locking.ALL_SET
3626 self.do_locking = (self.use_locking and
3627 query.NQ_LIVE in self.requested_data)
3630 # if we don't request only static fields, we need to lock the nodes
3631 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3633 def DeclareLocks(self, lu, level):
3636 def _GetQueryData(self, lu):
3637 """Computes the list of nodes and their attributes.
3640 all_info = lu.cfg.GetAllNodesInfo()
3642 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3644 # Gather data as requested
3645 if query.NQ_LIVE in self.requested_data:
3646 node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3647 lu.cfg.GetHypervisorType())
3648 live_data = dict((name, nresult.payload)
3649 for (name, nresult) in node_data.items()
3650 if not nresult.fail_msg and nresult.payload)
3654 if query.NQ_INST in self.requested_data:
3655 node_to_primary = dict([(name, set()) for name in nodenames])
3656 node_to_secondary = dict([(name, set()) for name in nodenames])
3658 inst_data = lu.cfg.GetAllInstancesInfo()
3660 for inst in inst_data.values():
3661 if inst.primary_node in node_to_primary:
3662 node_to_primary[inst.primary_node].add(inst.name)
3663 for secnode in inst.secondary_nodes:
3664 if secnode in node_to_secondary:
3665 node_to_secondary[secnode].add(inst.name)
3667 node_to_primary = None
3668 node_to_secondary = None
3670 if query.NQ_OOB in self.requested_data:
3671 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3672 for name, node in all_info.iteritems())
3676 if query.NQ_GROUP in self.requested_data:
3677 groups = lu.cfg.GetAllNodeGroupsInfo()
3681 return query.NodeQueryData([all_info[name] for name in nodenames],
3682 live_data, lu.cfg.GetMasterNode(),
3683 node_to_primary, node_to_secondary, groups,
3684 oob_support, lu.cfg.GetClusterInfo())
3687 class LUNodeQuery(NoHooksLU):
3688 """Logical unit for querying nodes.
3691 # pylint: disable-msg=W0142
3694 def CheckArguments(self):
3695 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3696 self.op.use_locking)
3698 def ExpandNames(self):
3699 self.nq.ExpandNames(self)
3701 def Exec(self, feedback_fn):
3702 return self.nq.OldStyleQuery(self)
3705 class LUNodeQueryvols(NoHooksLU):
3706 """Logical unit for getting volumes on node(s).
3710 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3711 _FIELDS_STATIC = utils.FieldSet("node")
3713 def CheckArguments(self):
3714 _CheckOutputFields(static=self._FIELDS_STATIC,
3715 dynamic=self._FIELDS_DYNAMIC,
3716 selected=self.op.output_fields)
3718 def ExpandNames(self):
3719 self.needed_locks = {}
3720 self.share_locks[locking.LEVEL_NODE] = 1
3721 if not self.op.nodes:
3722 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3724 self.needed_locks[locking.LEVEL_NODE] = \
3725 _GetWantedNodes(self, self.op.nodes)
3727 def Exec(self, feedback_fn):
3728 """Computes the list of nodes and their attributes.
3731 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3732 volumes = self.rpc.call_node_volumes(nodenames)
3734 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3735 in self.cfg.GetInstanceList()]
3737 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3740 for node in nodenames:
3741 nresult = volumes[node]
3744 msg = nresult.fail_msg
3746 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3749 node_vols = nresult.payload[:]
3750 node_vols.sort(key=lambda vol: vol['dev'])
3752 for vol in node_vols:
3754 for field in self.op.output_fields:
3757 elif field == "phys":
3761 elif field == "name":
3763 elif field == "size":
3764 val = int(float(vol['size']))
3765 elif field == "instance":
3767 if node not in lv_by_node[inst]:
3769 if vol['name'] in lv_by_node[inst][node]:
3775 raise errors.ParameterError(field)
3776 node_output.append(str(val))
3778 output.append(node_output)
3783 class LUNodeQueryStorage(NoHooksLU):
3784 """Logical unit for getting information on storage units on node(s).
3787 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3790 def CheckArguments(self):
3791 _CheckOutputFields(static=self._FIELDS_STATIC,
3792 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3793 selected=self.op.output_fields)
3795 def ExpandNames(self):
3796 self.needed_locks = {}
3797 self.share_locks[locking.LEVEL_NODE] = 1
3800 self.needed_locks[locking.LEVEL_NODE] = \
3801 _GetWantedNodes(self, self.op.nodes)
3803 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3805 def Exec(self, feedback_fn):
3806 """Computes the list of nodes and their attributes.
3809 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3811 # Always get name to sort by
3812 if constants.SF_NAME in self.op.output_fields:
3813 fields = self.op.output_fields[:]
3815 fields = [constants.SF_NAME] + self.op.output_fields
3817 # Never ask for node or type as it's only known to the LU
3818 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3819 while extra in fields:
3820 fields.remove(extra)
3822 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3823 name_idx = field_idx[constants.SF_NAME]
3825 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3826 data = self.rpc.call_storage_list(self.nodes,
3827 self.op.storage_type, st_args,
3828 self.op.name, fields)
3832 for node in utils.NiceSort(self.nodes):
3833 nresult = data[node]
3837 msg = nresult.fail_msg
3839 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3842 rows = dict([(row[name_idx], row) for row in nresult.payload])
3844 for name in utils.NiceSort(rows.keys()):
3849 for field in self.op.output_fields:
3850 if field == constants.SF_NODE:
3852 elif field == constants.SF_TYPE:
3853 val = self.op.storage_type
3854 elif field in field_idx:
3855 val = row[field_idx[field]]
3857 raise errors.ParameterError(field)
3866 class _InstanceQuery(_QueryBase):
3867 FIELDS = query.INSTANCE_FIELDS
3869 def ExpandNames(self, lu):
3870 lu.needed_locks = {}
3871 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3872 lu.share_locks[locking.LEVEL_NODE] = 1
3875 self.wanted = _GetWantedInstances(lu, self.names)
3877 self.wanted = locking.ALL_SET
3879 self.do_locking = (self.use_locking and
3880 query.IQ_LIVE in self.requested_data)
3882 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3883 lu.needed_locks[locking.LEVEL_NODE] = []
3884 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3886 def DeclareLocks(self, lu, level):
3887 if level == locking.LEVEL_NODE and self.do_locking:
3888 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3890 def _GetQueryData(self, lu):
3891 """Computes the list of instances and their attributes.
3894 all_info = lu.cfg.GetAllInstancesInfo()
3896 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3898 instance_list = [all_info[name] for name in instance_names]
3899 nodes = frozenset([inst.primary_node for inst in instance_list])
3900 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3904 # Gather data as requested
3905 if query.IQ_LIVE in self.requested_data:
3907 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3909 result = node_data[name]
3911 # offline nodes will be in both lists
3912 assert result.fail_msg
3913 offline_nodes.append(name)
3915 bad_nodes.append(name)
3916 elif result.payload:
3917 live_data.update(result.payload)
3918 # else no instance is alive
3922 if query.IQ_DISKUSAGE in self.requested_data:
3923 disk_usage = dict((inst.name,
3924 _ComputeDiskSize(inst.disk_template,
3925 [{"size": disk.size}
3926 for disk in inst.disks]))
3927 for inst in instance_list)
3931 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3932 disk_usage, offline_nodes, bad_nodes,
3936 class LUQuery(NoHooksLU):
3937 """Query for resources/items of a certain kind.
3940 # pylint: disable-msg=W0142
3943 def CheckArguments(self):
3944 qcls = _GetQueryImplementation(self.op.what)
3945 names = qlang.ReadSimpleFilter("name", self.op.filter)
3947 self.impl = qcls(names, self.op.fields, False)
3949 def ExpandNames(self):
3950 self.impl.ExpandNames(self)
3952 def DeclareLocks(self, level):
3953 self.impl.DeclareLocks(self, level)
3955 def Exec(self, feedback_fn):
3956 return self.impl.NewStyleQuery(self)
3959 class LUQueryFields(NoHooksLU):
3960 """Query for resources/items of a certain kind.
3963 # pylint: disable-msg=W0142
3966 def CheckArguments(self):
3967 self.qcls = _GetQueryImplementation(self.op.what)
3969 def ExpandNames(self):
3970 self.needed_locks = {}
3972 def Exec(self, feedback_fn):
3973 return self.qcls.FieldsQuery(self.op.fields)
3976 class LUNodeModifyStorage(NoHooksLU):
3977 """Logical unit for modifying a storage volume on a node.
3982 def CheckArguments(self):
3983 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3985 storage_type = self.op.storage_type
3988 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3990 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3991 " modified" % storage_type,
3994 diff = set(self.op.changes.keys()) - modifiable
3996 raise errors.OpPrereqError("The following fields can not be modified for"
3997 " storage units of type '%s': %r" %
3998 (storage_type, list(diff)),
4001 def ExpandNames(self):
4002 self.needed_locks = {
4003 locking.LEVEL_NODE: self.op.node_name,
4006 def Exec(self, feedback_fn):
4007 """Computes the list of nodes and their attributes.
4010 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4011 result = self.rpc.call_storage_modify(self.op.node_name,
4012 self.op.storage_type, st_args,
4013 self.op.name, self.op.changes)
4014 result.Raise("Failed to modify storage unit '%s' on %s" %
4015 (self.op.name, self.op.node_name))
4018 class LUNodeAdd(LogicalUnit):
4019 """Logical unit for adding node to the cluster.
4023 HTYPE = constants.HTYPE_NODE
4024 _NFLAGS = ["master_capable", "vm_capable"]
4026 def CheckArguments(self):
4027 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4028 # validate/normalize the node name
4029 self.hostname = netutils.GetHostname(name=self.op.node_name,
4030 family=self.primary_ip_family)
4031 self.op.node_name = self.hostname.name
4032 if self.op.readd and self.op.group:
4033 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4034 " being readded", errors.ECODE_INVAL)
4036 def BuildHooksEnv(self):
4039 This will run on all nodes before, and on all nodes + the new node after.
4043 "OP_TARGET": self.op.node_name,
4044 "NODE_NAME": self.op.node_name,
4045 "NODE_PIP": self.op.primary_ip,
4046 "NODE_SIP": self.op.secondary_ip,
4047 "MASTER_CAPABLE": str(self.op.master_capable),
4048 "VM_CAPABLE": str(self.op.vm_capable),
4050 nodes_0 = self.cfg.GetNodeList()
4051 nodes_1 = nodes_0 + [self.op.node_name, ]
4052 return env, nodes_0, nodes_1
4054 def CheckPrereq(self):
4055 """Check prerequisites.
4058 - the new node is not already in the config
4060 - its parameters (single/dual homed) matches the cluster
4062 Any errors are signaled by raising errors.OpPrereqError.
4066 hostname = self.hostname
4067 node = hostname.name
4068 primary_ip = self.op.primary_ip = hostname.ip
4069 if self.op.secondary_ip is None:
4070 if self.primary_ip_family == netutils.IP6Address.family:
4071 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4072 " IPv4 address must be given as secondary",
4074 self.op.secondary_ip = primary_ip
4076 secondary_ip = self.op.secondary_ip
4077 if not netutils.IP4Address.IsValid(secondary_ip):
4078 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4079 " address" % secondary_ip, errors.ECODE_INVAL)
4081 node_list = cfg.GetNodeList()
4082 if not self.op.readd and node in node_list:
4083 raise errors.OpPrereqError("Node %s is already in the configuration" %
4084 node, errors.ECODE_EXISTS)
4085 elif self.op.readd and node not in node_list:
4086 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4089 self.changed_primary_ip = False
4091 for existing_node_name in node_list:
4092 existing_node = cfg.GetNodeInfo(existing_node_name)
4094 if self.op.readd and node == existing_node_name:
4095 if existing_node.secondary_ip != secondary_ip:
4096 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4097 " address configuration as before",
4099 if existing_node.primary_ip != primary_ip:
4100 self.changed_primary_ip = True
4104 if (existing_node.primary_ip == primary_ip or
4105 existing_node.secondary_ip == primary_ip or
4106 existing_node.primary_ip == secondary_ip or
4107 existing_node.secondary_ip == secondary_ip):
4108 raise errors.OpPrereqError("New node ip address(es) conflict with"
4109 " existing node %s" % existing_node.name,
4110 errors.ECODE_NOTUNIQUE)
4112 # After this 'if' block, None is no longer a valid value for the
4113 # _capable op attributes
4115 old_node = self.cfg.GetNodeInfo(node)
4116 assert old_node is not None, "Can't retrieve locked node %s" % node
4117 for attr in self._NFLAGS:
4118 if getattr(self.op, attr) is None:
4119 setattr(self.op, attr, getattr(old_node, attr))
4121 for attr in self._NFLAGS:
4122 if getattr(self.op, attr) is None:
4123 setattr(self.op, attr, True)
4125 if self.op.readd and not self.op.vm_capable:
4126 pri, sec = cfg.GetNodeInstances(node)
4128 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4129 " flag set to false, but it already holds"
4130 " instances" % node,
4133 # check that the type of the node (single versus dual homed) is the
4134 # same as for the master
4135 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4136 master_singlehomed = myself.secondary_ip == myself.primary_ip
4137 newbie_singlehomed = secondary_ip == primary_ip
4138 if master_singlehomed != newbie_singlehomed:
4139 if master_singlehomed:
4140 raise errors.OpPrereqError("The master has no secondary ip but the"
4141 " new node has one",
4144 raise errors.OpPrereqError("The master has a secondary ip but the"
4145 " new node doesn't have one",
4148 # checks reachability
4149 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4150 raise errors.OpPrereqError("Node not reachable by ping",
4151 errors.ECODE_ENVIRON)
4153 if not newbie_singlehomed:
4154 # check reachability from my secondary ip to newbie's secondary ip
4155 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4156 source=myself.secondary_ip):
4157 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4158 " based ping to node daemon port",
4159 errors.ECODE_ENVIRON)
4166 if self.op.master_capable:
4167 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4169 self.master_candidate = False
4172 self.new_node = old_node
4174 node_group = cfg.LookupNodeGroup(self.op.group)
4175 self.new_node = objects.Node(name=node,
4176 primary_ip=primary_ip,
4177 secondary_ip=secondary_ip,
4178 master_candidate=self.master_candidate,
4179 offline=False, drained=False,
4182 if self.op.ndparams:
4183 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4185 def Exec(self, feedback_fn):
4186 """Adds the new node to the cluster.
4189 new_node = self.new_node
4190 node = new_node.name
4192 # We adding a new node so we assume it's powered
4193 new_node.powered = True
4195 # for re-adds, reset the offline/drained/master-candidate flags;
4196 # we need to reset here, otherwise offline would prevent RPC calls
4197 # later in the procedure; this also means that if the re-add
4198 # fails, we are left with a non-offlined, broken node
4200 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4201 self.LogInfo("Readding a node, the offline/drained flags were reset")
4202 # if we demote the node, we do cleanup later in the procedure
4203 new_node.master_candidate = self.master_candidate
4204 if self.changed_primary_ip:
4205 new_node.primary_ip = self.op.primary_ip
4207 # copy the master/vm_capable flags
4208 for attr in self._NFLAGS:
4209 setattr(new_node, attr, getattr(self.op, attr))
4211 # notify the user about any possible mc promotion
4212 if new_node.master_candidate:
4213 self.LogInfo("Node will be a master candidate")
4215 if self.op.ndparams:
4216 new_node.ndparams = self.op.ndparams
4218 new_node.ndparams = {}
4220 # check connectivity
4221 result = self.rpc.call_version([node])[node]
4222 result.Raise("Can't get version information from node %s" % node)
4223 if constants.PROTOCOL_VERSION == result.payload:
4224 logging.info("Communication to node %s fine, sw version %s match",
4225 node, result.payload)
4227 raise errors.OpExecError("Version mismatch master version %s,"
4228 " node version %s" %
4229 (constants.PROTOCOL_VERSION, result.payload))
4231 # Add node to our /etc/hosts, and add key to known_hosts
4232 if self.cfg.GetClusterInfo().modify_etc_hosts:
4233 master_node = self.cfg.GetMasterNode()
4234 result = self.rpc.call_etc_hosts_modify(master_node,
4235 constants.ETC_HOSTS_ADD,
4238 result.Raise("Can't update hosts file with new host data")
4240 if new_node.secondary_ip != new_node.primary_ip:
4241 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4244 node_verify_list = [self.cfg.GetMasterNode()]
4245 node_verify_param = {
4246 constants.NV_NODELIST: [node],
4247 # TODO: do a node-net-test as well?
4250 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4251 self.cfg.GetClusterName())
4252 for verifier in node_verify_list:
4253 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4254 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4256 for failed in nl_payload:
4257 feedback_fn("ssh/hostname verification failed"
4258 " (checking from %s): %s" %
4259 (verifier, nl_payload[failed]))
4260 raise errors.OpExecError("ssh/hostname verification failed.")
4263 _RedistributeAncillaryFiles(self)
4264 self.context.ReaddNode(new_node)
4265 # make sure we redistribute the config
4266 self.cfg.Update(new_node, feedback_fn)
4267 # and make sure the new node will not have old files around
4268 if not new_node.master_candidate:
4269 result = self.rpc.call_node_demote_from_mc(new_node.name)
4270 msg = result.fail_msg
4272 self.LogWarning("Node failed to demote itself from master"
4273 " candidate status: %s" % msg)
4275 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4276 additional_vm=self.op.vm_capable)
4277 self.context.AddNode(new_node, self.proc.GetECId())
4280 class LUNodeSetParams(LogicalUnit):
4281 """Modifies the parameters of a node.
4283 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4284 to the node role (as _ROLE_*)
4285 @cvar _R2F: a dictionary from node role to tuples of flags
4286 @cvar _FLAGS: a list of attribute names corresponding to the flags
4289 HPATH = "node-modify"
4290 HTYPE = constants.HTYPE_NODE
4292 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4294 (True, False, False): _ROLE_CANDIDATE,
4295 (False, True, False): _ROLE_DRAINED,
4296 (False, False, True): _ROLE_OFFLINE,
4297 (False, False, False): _ROLE_REGULAR,
4299 _R2F = dict((v, k) for k, v in _F2R.items())
4300 _FLAGS = ["master_candidate", "drained", "offline"]
4302 def CheckArguments(self):
4303 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4304 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4305 self.op.master_capable, self.op.vm_capable,
4306 self.op.secondary_ip, self.op.ndparams]
4307 if all_mods.count(None) == len(all_mods):
4308 raise errors.OpPrereqError("Please pass at least one modification",
4310 if all_mods.count(True) > 1:
4311 raise errors.OpPrereqError("Can't set the node into more than one"
4312 " state at the same time",
4315 # Boolean value that tells us whether we might be demoting from MC
4316 self.might_demote = (self.op.master_candidate == False or
4317 self.op.offline == True or
4318 self.op.drained == True or
4319 self.op.master_capable == False)
4321 if self.op.secondary_ip:
4322 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4323 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4324 " address" % self.op.secondary_ip,
4327 self.lock_all = self.op.auto_promote and self.might_demote
4328 self.lock_instances = self.op.secondary_ip is not None
4330 def ExpandNames(self):
4332 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4334 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4336 if self.lock_instances:
4337 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4339 def DeclareLocks(self, level):
4340 # If we have locked all instances, before waiting to lock nodes, release
4341 # all the ones living on nodes unrelated to the current operation.
4342 if level == locking.LEVEL_NODE and self.lock_instances:
4343 instances_release = []
4345 self.affected_instances = []
4346 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4347 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4348 instance = self.context.cfg.GetInstanceInfo(instance_name)
4349 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4350 if i_mirrored and self.op.node_name in instance.all_nodes:
4351 instances_keep.append(instance_name)
4352 self.affected_instances.append(instance)
4354 instances_release.append(instance_name)
4355 if instances_release:
4356 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4357 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4359 def BuildHooksEnv(self):
4362 This runs on the master node.
4366 "OP_TARGET": self.op.node_name,
4367 "MASTER_CANDIDATE": str(self.op.master_candidate),
4368 "OFFLINE": str(self.op.offline),
4369 "DRAINED": str(self.op.drained),
4370 "MASTER_CAPABLE": str(self.op.master_capable),
4371 "VM_CAPABLE": str(self.op.vm_capable),
4373 nl = [self.cfg.GetMasterNode(),
4377 def CheckPrereq(self):
4378 """Check prerequisites.
4380 This only checks the instance list against the existing names.
4383 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4385 if (self.op.master_candidate is not None or
4386 self.op.drained is not None or
4387 self.op.offline is not None):
4388 # we can't change the master's node flags
4389 if self.op.node_name == self.cfg.GetMasterNode():
4390 raise errors.OpPrereqError("The master role can be changed"
4391 " only via master-failover",
4394 if self.op.master_candidate and not node.master_capable:
4395 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4396 " it a master candidate" % node.name,
4399 if self.op.vm_capable == False:
4400 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4402 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4403 " the vm_capable flag" % node.name,
4406 if node.master_candidate and self.might_demote and not self.lock_all:
4407 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4408 # check if after removing the current node, we're missing master
4410 (mc_remaining, mc_should, _) = \
4411 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4412 if mc_remaining < mc_should:
4413 raise errors.OpPrereqError("Not enough master candidates, please"
4414 " pass auto promote option to allow"
4415 " promotion", errors.ECODE_STATE)
4417 self.old_flags = old_flags = (node.master_candidate,
4418 node.drained, node.offline)
4419 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4420 self.old_role = old_role = self._F2R[old_flags]
4422 # Check for ineffective changes
4423 for attr in self._FLAGS:
4424 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4425 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4426 setattr(self.op, attr, None)
4428 # Past this point, any flag change to False means a transition
4429 # away from the respective state, as only real changes are kept
4431 # TODO: We might query the real power state if it supports OOB
4432 if _SupportsOob(self.cfg, node):
4433 if self.op.offline is False and not (node.powered or
4434 self.op.powered == True):
4435 raise errors.OpPrereqError(("Please power on node %s first before you"
4436 " can reset offline state") %
4438 elif self.op.powered is not None:
4439 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4440 " which does not support out-of-band"
4441 " handling") % self.op.node_name)
4443 # If we're being deofflined/drained, we'll MC ourself if needed
4444 if (self.op.drained == False or self.op.offline == False or
4445 (self.op.master_capable and not node.master_capable)):
4446 if _DecideSelfPromotion(self):
4447 self.op.master_candidate = True
4448 self.LogInfo("Auto-promoting node to master candidate")
4450 # If we're no longer master capable, we'll demote ourselves from MC
4451 if self.op.master_capable == False and node.master_candidate:
4452 self.LogInfo("Demoting from master candidate")
4453 self.op.master_candidate = False
4456 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4457 if self.op.master_candidate:
4458 new_role = self._ROLE_CANDIDATE
4459 elif self.op.drained:
4460 new_role = self._ROLE_DRAINED
4461 elif self.op.offline:
4462 new_role = self._ROLE_OFFLINE
4463 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4464 # False is still in new flags, which means we're un-setting (the
4466 new_role = self._ROLE_REGULAR
4467 else: # no new flags, nothing, keep old role
4470 self.new_role = new_role
4472 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4473 # Trying to transition out of offline status
4474 result = self.rpc.call_version([node.name])[node.name]
4476 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4477 " to report its version: %s" %
4478 (node.name, result.fail_msg),
4481 self.LogWarning("Transitioning node from offline to online state"
4482 " without using re-add. Please make sure the node"
4485 if self.op.secondary_ip:
4486 # Ok even without locking, because this can't be changed by any LU
4487 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4488 master_singlehomed = master.secondary_ip == master.primary_ip
4489 if master_singlehomed and self.op.secondary_ip:
4490 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4491 " homed cluster", errors.ECODE_INVAL)
4494 if self.affected_instances:
4495 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4496 " node has instances (%s) configured"
4497 " to use it" % self.affected_instances)
4499 # On online nodes, check that no instances are running, and that
4500 # the node has the new ip and we can reach it.
4501 for instance in self.affected_instances:
4502 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4504 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4505 if master.name != node.name:
4506 # check reachability from master secondary ip to new secondary ip
4507 if not netutils.TcpPing(self.op.secondary_ip,
4508 constants.DEFAULT_NODED_PORT,
4509 source=master.secondary_ip):
4510 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4511 " based ping to node daemon port",
4512 errors.ECODE_ENVIRON)
4514 if self.op.ndparams:
4515 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4516 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4517 self.new_ndparams = new_ndparams
4519 def Exec(self, feedback_fn):
4524 old_role = self.old_role
4525 new_role = self.new_role
4529 if self.op.ndparams:
4530 node.ndparams = self.new_ndparams
4532 if self.op.powered is not None:
4533 node.powered = self.op.powered
4535 for attr in ["master_capable", "vm_capable"]:
4536 val = getattr(self.op, attr)
4538 setattr(node, attr, val)
4539 result.append((attr, str(val)))
4541 if new_role != old_role:
4542 # Tell the node to demote itself, if no longer MC and not offline
4543 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4544 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4546 self.LogWarning("Node failed to demote itself: %s", msg)
4548 new_flags = self._R2F[new_role]
4549 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4551 result.append((desc, str(nf)))
4552 (node.master_candidate, node.drained, node.offline) = new_flags
4554 # we locked all nodes, we adjust the CP before updating this node
4556 _AdjustCandidatePool(self, [node.name])
4558 if self.op.secondary_ip:
4559 node.secondary_ip = self.op.secondary_ip
4560 result.append(("secondary_ip", self.op.secondary_ip))
4562 # this will trigger configuration file update, if needed
4563 self.cfg.Update(node, feedback_fn)
4565 # this will trigger job queue propagation or cleanup if the mc
4567 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4568 self.context.ReaddNode(node)
4573 class LUNodePowercycle(NoHooksLU):
4574 """Powercycles a node.
4579 def CheckArguments(self):
4580 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4581 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4582 raise errors.OpPrereqError("The node is the master and the force"
4583 " parameter was not set",
4586 def ExpandNames(self):
4587 """Locking for PowercycleNode.
4589 This is a last-resort option and shouldn't block on other
4590 jobs. Therefore, we grab no locks.
4593 self.needed_locks = {}
4595 def Exec(self, feedback_fn):
4599 result = self.rpc.call_node_powercycle(self.op.node_name,
4600 self.cfg.GetHypervisorType())
4601 result.Raise("Failed to schedule the reboot")
4602 return result.payload
4605 class LUClusterQuery(NoHooksLU):
4606 """Query cluster configuration.
4611 def ExpandNames(self):
4612 self.needed_locks = {}
4614 def Exec(self, feedback_fn):
4615 """Return cluster config.
4618 cluster = self.cfg.GetClusterInfo()
4621 # Filter just for enabled hypervisors
4622 for os_name, hv_dict in cluster.os_hvp.items():
4623 os_hvp[os_name] = {}
4624 for hv_name, hv_params in hv_dict.items():
4625 if hv_name in cluster.enabled_hypervisors:
4626 os_hvp[os_name][hv_name] = hv_params
4628 # Convert ip_family to ip_version
4629 primary_ip_version = constants.IP4_VERSION
4630 if cluster.primary_ip_family == netutils.IP6Address.family:
4631 primary_ip_version = constants.IP6_VERSION
4634 "software_version": constants.RELEASE_VERSION,
4635 "protocol_version": constants.PROTOCOL_VERSION,
4636 "config_version": constants.CONFIG_VERSION,
4637 "os_api_version": max(constants.OS_API_VERSIONS),
4638 "export_version": constants.EXPORT_VERSION,
4639 "architecture": (platform.architecture()[0], platform.machine()),
4640 "name": cluster.cluster_name,
4641 "master": cluster.master_node,
4642 "default_hypervisor": cluster.enabled_hypervisors[0],
4643 "enabled_hypervisors": cluster.enabled_hypervisors,
4644 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4645 for hypervisor_name in cluster.enabled_hypervisors]),
4647 "beparams": cluster.beparams,
4648 "osparams": cluster.osparams,
4649 "nicparams": cluster.nicparams,
4650 "ndparams": cluster.ndparams,
4651 "candidate_pool_size": cluster.candidate_pool_size,
4652 "master_netdev": cluster.master_netdev,
4653 "volume_group_name": cluster.volume_group_name,
4654 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4655 "file_storage_dir": cluster.file_storage_dir,
4656 "maintain_node_health": cluster.maintain_node_health,
4657 "ctime": cluster.ctime,
4658 "mtime": cluster.mtime,
4659 "uuid": cluster.uuid,
4660 "tags": list(cluster.GetTags()),
4661 "uid_pool": cluster.uid_pool,
4662 "default_iallocator": cluster.default_iallocator,
4663 "reserved_lvs": cluster.reserved_lvs,
4664 "primary_ip_version": primary_ip_version,
4665 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4666 "hidden_os": cluster.hidden_os,
4667 "blacklisted_os": cluster.blacklisted_os,
4673 class LUClusterConfigQuery(NoHooksLU):
4674 """Return configuration values.
4678 _FIELDS_DYNAMIC = utils.FieldSet()
4679 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4680 "watcher_pause", "volume_group_name")
4682 def CheckArguments(self):
4683 _CheckOutputFields(static=self._FIELDS_STATIC,
4684 dynamic=self._FIELDS_DYNAMIC,
4685 selected=self.op.output_fields)
4687 def ExpandNames(self):
4688 self.needed_locks = {}
4690 def Exec(self, feedback_fn):
4691 """Dump a representation of the cluster config to the standard output.
4695 for field in self.op.output_fields:
4696 if field == "cluster_name":
4697 entry = self.cfg.GetClusterName()
4698 elif field == "master_node":
4699 entry = self.cfg.GetMasterNode()
4700 elif field == "drain_flag":
4701 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4702 elif field == "watcher_pause":
4703 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4704 elif field == "volume_group_name":
4705 entry = self.cfg.GetVGName()
4707 raise errors.ParameterError(field)
4708 values.append(entry)
4712 class LUInstanceActivateDisks(NoHooksLU):
4713 """Bring up an instance's disks.
4718 def ExpandNames(self):
4719 self._ExpandAndLockInstance()
4720 self.needed_locks[locking.LEVEL_NODE] = []
4721 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4723 def DeclareLocks(self, level):
4724 if level == locking.LEVEL_NODE:
4725 self._LockInstancesNodes()
4727 def CheckPrereq(self):
4728 """Check prerequisites.
4730 This checks that the instance is in the cluster.
4733 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4734 assert self.instance is not None, \
4735 "Cannot retrieve locked instance %s" % self.op.instance_name
4736 _CheckNodeOnline(self, self.instance.primary_node)
4738 def Exec(self, feedback_fn):
4739 """Activate the disks.
4742 disks_ok, disks_info = \
4743 _AssembleInstanceDisks(self, self.instance,
4744 ignore_size=self.op.ignore_size)
4746 raise errors.OpExecError("Cannot activate block devices")
4751 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4753 """Prepare the block devices for an instance.
4755 This sets up the block devices on all nodes.
4757 @type lu: L{LogicalUnit}
4758 @param lu: the logical unit on whose behalf we execute
4759 @type instance: L{objects.Instance}
4760 @param instance: the instance for whose disks we assemble
4761 @type disks: list of L{objects.Disk} or None
4762 @param disks: which disks to assemble (or all, if None)
4763 @type ignore_secondaries: boolean
4764 @param ignore_secondaries: if true, errors on secondary nodes
4765 won't result in an error return from the function
4766 @type ignore_size: boolean
4767 @param ignore_size: if true, the current known size of the disk
4768 will not be used during the disk activation, useful for cases
4769 when the size is wrong
4770 @return: False if the operation failed, otherwise a list of
4771 (host, instance_visible_name, node_visible_name)
4772 with the mapping from node devices to instance devices
4777 iname = instance.name
4778 disks = _ExpandCheckDisks(instance, disks)
4780 # With the two passes mechanism we try to reduce the window of
4781 # opportunity for the race condition of switching DRBD to primary
4782 # before handshaking occured, but we do not eliminate it
4784 # The proper fix would be to wait (with some limits) until the
4785 # connection has been made and drbd transitions from WFConnection
4786 # into any other network-connected state (Connected, SyncTarget,
4789 # 1st pass, assemble on all nodes in secondary mode
4790 for inst_disk in disks:
4791 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4793 node_disk = node_disk.Copy()
4794 node_disk.UnsetSize()
4795 lu.cfg.SetDiskID(node_disk, node)
4796 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4797 msg = result.fail_msg
4799 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4800 " (is_primary=False, pass=1): %s",
4801 inst_disk.iv_name, node, msg)
4802 if not ignore_secondaries:
4805 # FIXME: race condition on drbd migration to primary
4807 # 2nd pass, do only the primary node
4808 for inst_disk in disks:
4811 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4812 if node != instance.primary_node:
4815 node_disk = node_disk.Copy()
4816 node_disk.UnsetSize()
4817 lu.cfg.SetDiskID(node_disk, node)
4818 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4819 msg = result.fail_msg
4821 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4822 " (is_primary=True, pass=2): %s",
4823 inst_disk.iv_name, node, msg)
4826 dev_path = result.payload
4828 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4830 # leave the disks configured for the primary node
4831 # this is a workaround that would be fixed better by
4832 # improving the logical/physical id handling
4834 lu.cfg.SetDiskID(disk, instance.primary_node)
4836 return disks_ok, device_info
4839 def _StartInstanceDisks(lu, instance, force):
4840 """Start the disks of an instance.
4843 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4844 ignore_secondaries=force)
4846 _ShutdownInstanceDisks(lu, instance)
4847 if force is not None and not force:
4848 lu.proc.LogWarning("", hint="If the message above refers to a"
4850 " you can retry the operation using '--force'.")
4851 raise errors.OpExecError("Disk consistency error")
4854 class LUInstanceDeactivateDisks(NoHooksLU):
4855 """Shutdown an instance's disks.
4860 def ExpandNames(self):
4861 self._ExpandAndLockInstance()
4862 self.needed_locks[locking.LEVEL_NODE] = []
4863 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4865 def DeclareLocks(self, level):
4866 if level == locking.LEVEL_NODE:
4867 self._LockInstancesNodes()
4869 def CheckPrereq(self):
4870 """Check prerequisites.
4872 This checks that the instance is in the cluster.
4875 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4876 assert self.instance is not None, \
4877 "Cannot retrieve locked instance %s" % self.op.instance_name
4879 def Exec(self, feedback_fn):
4880 """Deactivate the disks
4883 instance = self.instance
4885 _ShutdownInstanceDisks(self, instance)
4887 _SafeShutdownInstanceDisks(self, instance)
4890 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4891 """Shutdown block devices of an instance.
4893 This function checks if an instance is running, before calling
4894 _ShutdownInstanceDisks.
4897 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4898 _ShutdownInstanceDisks(lu, instance, disks=disks)
4901 def _ExpandCheckDisks(instance, disks):
4902 """Return the instance disks selected by the disks list
4904 @type disks: list of L{objects.Disk} or None
4905 @param disks: selected disks
4906 @rtype: list of L{objects.Disk}
4907 @return: selected instance disks to act on
4911 return instance.disks
4913 if not set(disks).issubset(instance.disks):
4914 raise errors.ProgrammerError("Can only act on disks belonging to the"
4919 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4920 """Shutdown block devices of an instance.
4922 This does the shutdown on all nodes of the instance.
4924 If the ignore_primary is false, errors on the primary node are
4929 disks = _ExpandCheckDisks(instance, disks)
4932 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4933 lu.cfg.SetDiskID(top_disk, node)
4934 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4935 msg = result.fail_msg
4937 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4938 disk.iv_name, node, msg)
4939 if ((node == instance.primary_node and not ignore_primary) or
4940 (node != instance.primary_node and not result.offline)):
4945 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4946 """Checks if a node has enough free memory.
4948 This function check if a given node has the needed amount of free
4949 memory. In case the node has less memory or we cannot get the
4950 information from the node, this function raise an OpPrereqError
4953 @type lu: C{LogicalUnit}
4954 @param lu: a logical unit from which we get configuration data
4956 @param node: the node to check
4957 @type reason: C{str}
4958 @param reason: string to use in the error message
4959 @type requested: C{int}
4960 @param requested: the amount of memory in MiB to check for
4961 @type hypervisor_name: C{str}
4962 @param hypervisor_name: the hypervisor to ask for memory stats
4963 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4964 we cannot check the node
4967 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4968 nodeinfo[node].Raise("Can't get data from node %s" % node,
4969 prereq=True, ecode=errors.ECODE_ENVIRON)
4970 free_mem = nodeinfo[node].payload.get('memory_free', None)
4971 if not isinstance(free_mem, int):
4972 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4973 " was '%s'" % (node, free_mem),
4974 errors.ECODE_ENVIRON)
4975 if requested > free_mem:
4976 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4977 " needed %s MiB, available %s MiB" %
4978 (node, reason, requested, free_mem),
4982 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4983 """Checks if nodes have enough free disk space in the all VGs.
4985 This function check if all given nodes have the needed amount of
4986 free disk. In case any node has less disk or we cannot get the
4987 information from the node, this function raise an OpPrereqError
4990 @type lu: C{LogicalUnit}
4991 @param lu: a logical unit from which we get configuration data
4992 @type nodenames: C{list}
4993 @param nodenames: the list of node names to check
4994 @type req_sizes: C{dict}
4995 @param req_sizes: the hash of vg and corresponding amount of disk in
4997 @raise errors.OpPrereqError: if the node doesn't have enough disk,
4998 or we cannot check the node
5001 for vg, req_size in req_sizes.items():
5002 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5005 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5006 """Checks if nodes have enough free disk space in the specified VG.
5008 This function check if all given nodes have the needed amount of
5009 free disk. In case any node has less disk or we cannot get the
5010 information from the node, this function raise an OpPrereqError
5013 @type lu: C{LogicalUnit}
5014 @param lu: a logical unit from which we get configuration data
5015 @type nodenames: C{list}
5016 @param nodenames: the list of node names to check
5018 @param vg: the volume group to check
5019 @type requested: C{int}
5020 @param requested: the amount of disk in MiB to check for
5021 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5022 or we cannot check the node
5025 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5026 for node in nodenames:
5027 info = nodeinfo[node]
5028 info.Raise("Cannot get current information from node %s" % node,
5029 prereq=True, ecode=errors.ECODE_ENVIRON)
5030 vg_free = info.payload.get("vg_free", None)
5031 if not isinstance(vg_free, int):
5032 raise errors.OpPrereqError("Can't compute free disk space on node"
5033 " %s for vg %s, result was '%s'" %
5034 (node, vg, vg_free), errors.ECODE_ENVIRON)
5035 if requested > vg_free:
5036 raise errors.OpPrereqError("Not enough disk space on target node %s"
5037 " vg %s: required %d MiB, available %d MiB" %
5038 (node, vg, requested, vg_free),
5042 class LUInstanceStartup(LogicalUnit):
5043 """Starts an instance.
5046 HPATH = "instance-start"
5047 HTYPE = constants.HTYPE_INSTANCE
5050 def CheckArguments(self):
5052 if self.op.beparams:
5053 # fill the beparams dict
5054 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5056 def ExpandNames(self):
5057 self._ExpandAndLockInstance()
5059 def BuildHooksEnv(self):
5062 This runs on master, primary and secondary nodes of the instance.
5066 "FORCE": self.op.force,
5068 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5069 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5072 def CheckPrereq(self):
5073 """Check prerequisites.
5075 This checks that the instance is in the cluster.
5078 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5079 assert self.instance is not None, \
5080 "Cannot retrieve locked instance %s" % self.op.instance_name
5083 if self.op.hvparams:
5084 # check hypervisor parameter syntax (locally)
5085 cluster = self.cfg.GetClusterInfo()
5086 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5087 filled_hvp = cluster.FillHV(instance)
5088 filled_hvp.update(self.op.hvparams)
5089 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5090 hv_type.CheckParameterSyntax(filled_hvp)
5091 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5093 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5095 if self.primary_offline and self.op.ignore_offline_nodes:
5096 self.proc.LogWarning("Ignoring offline primary node")
5098 if self.op.hvparams or self.op.beparams:
5099 self.proc.LogWarning("Overridden parameters are ignored")
5101 _CheckNodeOnline(self, instance.primary_node)
5103 bep = self.cfg.GetClusterInfo().FillBE(instance)
5105 # check bridges existence
5106 _CheckInstanceBridgesExist(self, instance)
5108 remote_info = self.rpc.call_instance_info(instance.primary_node,
5110 instance.hypervisor)
5111 remote_info.Raise("Error checking node %s" % instance.primary_node,
5112 prereq=True, ecode=errors.ECODE_ENVIRON)
5113 if not remote_info.payload: # not running already
5114 _CheckNodeFreeMemory(self, instance.primary_node,
5115 "starting instance %s" % instance.name,
5116 bep[constants.BE_MEMORY], instance.hypervisor)
5118 def Exec(self, feedback_fn):
5119 """Start the instance.
5122 instance = self.instance
5123 force = self.op.force
5125 self.cfg.MarkInstanceUp(instance.name)
5127 if self.primary_offline:
5128 assert self.op.ignore_offline_nodes
5129 self.proc.LogInfo("Primary node offline, marked instance as started")
5131 node_current = instance.primary_node
5133 _StartInstanceDisks(self, instance, force)
5135 result = self.rpc.call_instance_start(node_current, instance,
5136 self.op.hvparams, self.op.beparams)
5137 msg = result.fail_msg
5139 _ShutdownInstanceDisks(self, instance)
5140 raise errors.OpExecError("Could not start instance: %s" % msg)
5143 class LUInstanceReboot(LogicalUnit):
5144 """Reboot an instance.
5147 HPATH = "instance-reboot"
5148 HTYPE = constants.HTYPE_INSTANCE
5151 def ExpandNames(self):
5152 self._ExpandAndLockInstance()
5154 def BuildHooksEnv(self):
5157 This runs on master, primary and secondary nodes of the instance.
5161 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5162 "REBOOT_TYPE": self.op.reboot_type,
5163 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5165 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5166 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5169 def CheckPrereq(self):
5170 """Check prerequisites.
5172 This checks that the instance is in the cluster.
5175 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5176 assert self.instance is not None, \
5177 "Cannot retrieve locked instance %s" % self.op.instance_name
5179 _CheckNodeOnline(self, instance.primary_node)
5181 # check bridges existence
5182 _CheckInstanceBridgesExist(self, instance)
5184 def Exec(self, feedback_fn):
5185 """Reboot the instance.
5188 instance = self.instance
5189 ignore_secondaries = self.op.ignore_secondaries
5190 reboot_type = self.op.reboot_type
5192 node_current = instance.primary_node
5194 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5195 constants.INSTANCE_REBOOT_HARD]:
5196 for disk in instance.disks:
5197 self.cfg.SetDiskID(disk, node_current)
5198 result = self.rpc.call_instance_reboot(node_current, instance,
5200 self.op.shutdown_timeout)
5201 result.Raise("Could not reboot instance")
5203 result = self.rpc.call_instance_shutdown(node_current, instance,
5204 self.op.shutdown_timeout)
5205 result.Raise("Could not shutdown instance for full reboot")
5206 _ShutdownInstanceDisks(self, instance)
5207 _StartInstanceDisks(self, instance, ignore_secondaries)
5208 result = self.rpc.call_instance_start(node_current, instance, None, None)
5209 msg = result.fail_msg
5211 _ShutdownInstanceDisks(self, instance)
5212 raise errors.OpExecError("Could not start instance for"
5213 " full reboot: %s" % msg)
5215 self.cfg.MarkInstanceUp(instance.name)
5218 class LUInstanceShutdown(LogicalUnit):
5219 """Shutdown an instance.
5222 HPATH = "instance-stop"
5223 HTYPE = constants.HTYPE_INSTANCE
5226 def ExpandNames(self):
5227 self._ExpandAndLockInstance()
5229 def BuildHooksEnv(self):
5232 This runs on master, primary and secondary nodes of the instance.
5235 env = _BuildInstanceHookEnvByObject(self, self.instance)
5236 env["TIMEOUT"] = self.op.timeout
5237 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5240 def CheckPrereq(self):
5241 """Check prerequisites.
5243 This checks that the instance is in the cluster.
5246 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5247 assert self.instance is not None, \
5248 "Cannot retrieve locked instance %s" % self.op.instance_name
5250 self.primary_offline = \
5251 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5253 if self.primary_offline and self.op.ignore_offline_nodes:
5254 self.proc.LogWarning("Ignoring offline primary node")
5256 _CheckNodeOnline(self, self.instance.primary_node)
5258 def Exec(self, feedback_fn):
5259 """Shutdown the instance.
5262 instance = self.instance
5263 node_current = instance.primary_node
5264 timeout = self.op.timeout
5266 self.cfg.MarkInstanceDown(instance.name)
5268 if self.primary_offline:
5269 assert self.op.ignore_offline_nodes
5270 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5272 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5273 msg = result.fail_msg
5275 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5277 _ShutdownInstanceDisks(self, instance)
5280 class LUInstanceReinstall(LogicalUnit):
5281 """Reinstall an instance.
5284 HPATH = "instance-reinstall"
5285 HTYPE = constants.HTYPE_INSTANCE
5288 def ExpandNames(self):
5289 self._ExpandAndLockInstance()
5291 def BuildHooksEnv(self):
5294 This runs on master, primary and secondary nodes of the instance.
5297 env = _BuildInstanceHookEnvByObject(self, self.instance)
5298 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5301 def CheckPrereq(self):
5302 """Check prerequisites.
5304 This checks that the instance is in the cluster and is not running.
5307 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5308 assert instance is not None, \
5309 "Cannot retrieve locked instance %s" % self.op.instance_name
5310 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5311 " offline, cannot reinstall")
5312 for node in instance.secondary_nodes:
5313 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5314 " cannot reinstall")
5316 if instance.disk_template == constants.DT_DISKLESS:
5317 raise errors.OpPrereqError("Instance '%s' has no disks" %
5318 self.op.instance_name,
5320 _CheckInstanceDown(self, instance, "cannot reinstall")
5322 if self.op.os_type is not None:
5324 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5325 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5326 instance_os = self.op.os_type
5328 instance_os = instance.os
5330 nodelist = list(instance.all_nodes)
5332 if self.op.osparams:
5333 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5334 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5335 self.os_inst = i_osdict # the new dict (without defaults)
5339 self.instance = instance
5341 def Exec(self, feedback_fn):
5342 """Reinstall the instance.
5345 inst = self.instance
5347 if self.op.os_type is not None:
5348 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5349 inst.os = self.op.os_type
5350 # Write to configuration
5351 self.cfg.Update(inst, feedback_fn)
5353 _StartInstanceDisks(self, inst, None)
5355 feedback_fn("Running the instance OS create scripts...")
5356 # FIXME: pass debug option from opcode to backend
5357 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5358 self.op.debug_level,
5359 osparams=self.os_inst)
5360 result.Raise("Could not install OS for instance %s on node %s" %
5361 (inst.name, inst.primary_node))
5363 _ShutdownInstanceDisks(self, inst)
5366 class LUInstanceRecreateDisks(LogicalUnit):
5367 """Recreate an instance's missing disks.
5370 HPATH = "instance-recreate-disks"
5371 HTYPE = constants.HTYPE_INSTANCE
5374 def ExpandNames(self):
5375 self._ExpandAndLockInstance()
5377 def BuildHooksEnv(self):
5380 This runs on master, primary and secondary nodes of the instance.
5383 env = _BuildInstanceHookEnvByObject(self, self.instance)
5384 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5387 def CheckPrereq(self):
5388 """Check prerequisites.
5390 This checks that the instance is in the cluster and is not running.
5393 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5394 assert instance is not None, \
5395 "Cannot retrieve locked instance %s" % self.op.instance_name
5396 _CheckNodeOnline(self, instance.primary_node)
5398 if instance.disk_template == constants.DT_DISKLESS:
5399 raise errors.OpPrereqError("Instance '%s' has no disks" %
5400 self.op.instance_name, errors.ECODE_INVAL)
5401 _CheckInstanceDown(self, instance, "cannot recreate disks")
5403 if not self.op.disks:
5404 self.op.disks = range(len(instance.disks))
5406 for idx in self.op.disks:
5407 if idx >= len(instance.disks):
5408 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5411 self.instance = instance
5413 def Exec(self, feedback_fn):
5414 """Recreate the disks.
5418 for idx, _ in enumerate(self.instance.disks):
5419 if idx not in self.op.disks: # disk idx has not been passed in
5423 _CreateDisks(self, self.instance, to_skip=to_skip)
5426 class LUInstanceRename(LogicalUnit):
5427 """Rename an instance.
5430 HPATH = "instance-rename"
5431 HTYPE = constants.HTYPE_INSTANCE
5433 def CheckArguments(self):
5437 if self.op.ip_check and not self.op.name_check:
5438 # TODO: make the ip check more flexible and not depend on the name check
5439 raise errors.OpPrereqError("Cannot do ip check without a name check",
5442 def BuildHooksEnv(self):
5445 This runs on master, primary and secondary nodes of the instance.
5448 env = _BuildInstanceHookEnvByObject(self, self.instance)
5449 env["INSTANCE_NEW_NAME"] = self.op.new_name
5450 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5453 def CheckPrereq(self):
5454 """Check prerequisites.
5456 This checks that the instance is in the cluster and is not running.
5459 self.op.instance_name = _ExpandInstanceName(self.cfg,
5460 self.op.instance_name)
5461 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5462 assert instance is not None
5463 _CheckNodeOnline(self, instance.primary_node)
5464 _CheckInstanceDown(self, instance, "cannot rename")
5465 self.instance = instance
5467 new_name = self.op.new_name
5468 if self.op.name_check:
5469 hostname = netutils.GetHostname(name=new_name)
5470 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5472 new_name = self.op.new_name = hostname.name
5473 if (self.op.ip_check and
5474 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5475 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5476 (hostname.ip, new_name),
5477 errors.ECODE_NOTUNIQUE)
5479 instance_list = self.cfg.GetInstanceList()
5480 if new_name in instance_list and new_name != instance.name:
5481 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5482 new_name, errors.ECODE_EXISTS)
5484 def Exec(self, feedback_fn):
5485 """Rename the instance.
5488 inst = self.instance
5489 old_name = inst.name
5491 rename_file_storage = False
5492 if (inst.disk_template == constants.DT_FILE and
5493 self.op.new_name != inst.name):
5494 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5495 rename_file_storage = True
5497 self.cfg.RenameInstance(inst.name, self.op.new_name)
5498 # Change the instance lock. This is definitely safe while we hold the BGL
5499 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5500 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5502 # re-read the instance from the configuration after rename
5503 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5505 if rename_file_storage:
5506 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5507 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5508 old_file_storage_dir,
5509 new_file_storage_dir)
5510 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5511 " (but the instance has been renamed in Ganeti)" %
5512 (inst.primary_node, old_file_storage_dir,
5513 new_file_storage_dir))
5515 _StartInstanceDisks(self, inst, None)
5517 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5518 old_name, self.op.debug_level)
5519 msg = result.fail_msg
5521 msg = ("Could not run OS rename script for instance %s on node %s"
5522 " (but the instance has been renamed in Ganeti): %s" %
5523 (inst.name, inst.primary_node, msg))
5524 self.proc.LogWarning(msg)
5526 _ShutdownInstanceDisks(self, inst)
5531 class LUInstanceRemove(LogicalUnit):
5532 """Remove an instance.
5535 HPATH = "instance-remove"
5536 HTYPE = constants.HTYPE_INSTANCE
5539 def ExpandNames(self):
5540 self._ExpandAndLockInstance()
5541 self.needed_locks[locking.LEVEL_NODE] = []
5542 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5544 def DeclareLocks(self, level):
5545 if level == locking.LEVEL_NODE:
5546 self._LockInstancesNodes()
5548 def BuildHooksEnv(self):
5551 This runs on master, primary and secondary nodes of the instance.
5554 env = _BuildInstanceHookEnvByObject(self, self.instance)
5555 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5556 nl = [self.cfg.GetMasterNode()]
5557 nl_post = list(self.instance.all_nodes) + nl
5558 return env, nl, nl_post
5560 def CheckPrereq(self):
5561 """Check prerequisites.
5563 This checks that the instance is in the cluster.
5566 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5567 assert self.instance is not None, \
5568 "Cannot retrieve locked instance %s" % self.op.instance_name
5570 def Exec(self, feedback_fn):
5571 """Remove the instance.
5574 instance = self.instance
5575 logging.info("Shutting down instance %s on node %s",
5576 instance.name, instance.primary_node)
5578 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5579 self.op.shutdown_timeout)
5580 msg = result.fail_msg
5582 if self.op.ignore_failures:
5583 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5585 raise errors.OpExecError("Could not shutdown instance %s on"
5587 (instance.name, instance.primary_node, msg))
5589 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5592 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5593 """Utility function to remove an instance.
5596 logging.info("Removing block devices for instance %s", instance.name)
5598 if not _RemoveDisks(lu, instance):
5599 if not ignore_failures:
5600 raise errors.OpExecError("Can't remove instance's disks")
5601 feedback_fn("Warning: can't remove instance's disks")
5603 logging.info("Removing instance %s out of cluster config", instance.name)
5605 lu.cfg.RemoveInstance(instance.name)
5607 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5608 "Instance lock removal conflict"
5610 # Remove lock for the instance
5611 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5614 class LUInstanceQuery(NoHooksLU):
5615 """Logical unit for querying instances.
5618 # pylint: disable-msg=W0142
5621 def CheckArguments(self):
5622 self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5623 self.op.use_locking)
5625 def ExpandNames(self):
5626 self.iq.ExpandNames(self)
5628 def DeclareLocks(self, level):
5629 self.iq.DeclareLocks(self, level)
5631 def Exec(self, feedback_fn):
5632 return self.iq.OldStyleQuery(self)
5635 class LUInstanceFailover(LogicalUnit):
5636 """Failover an instance.
5639 HPATH = "instance-failover"
5640 HTYPE = constants.HTYPE_INSTANCE
5643 def ExpandNames(self):
5644 self._ExpandAndLockInstance()
5645 self.needed_locks[locking.LEVEL_NODE] = []
5646 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5648 def DeclareLocks(self, level):
5649 if level == locking.LEVEL_NODE:
5650 self._LockInstancesNodes()
5652 def BuildHooksEnv(self):
5655 This runs on master, primary and secondary nodes of the instance.
5658 instance = self.instance
5659 source_node = instance.primary_node
5660 target_node = instance.secondary_nodes[0]
5662 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5663 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5664 "OLD_PRIMARY": source_node,
5665 "OLD_SECONDARY": target_node,
5666 "NEW_PRIMARY": target_node,
5667 "NEW_SECONDARY": source_node,
5669 env.update(_BuildInstanceHookEnvByObject(self, instance))
5670 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5672 nl_post.append(source_node)
5673 return env, nl, nl_post
5675 def CheckPrereq(self):
5676 """Check prerequisites.
5678 This checks that the instance is in the cluster.
5681 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5682 assert self.instance is not None, \
5683 "Cannot retrieve locked instance %s" % self.op.instance_name
5685 bep = self.cfg.GetClusterInfo().FillBE(instance)
5686 if instance.disk_template not in constants.DTS_NET_MIRROR:
5687 raise errors.OpPrereqError("Instance's disk layout is not"
5688 " network mirrored, cannot failover.",
5691 secondary_nodes = instance.secondary_nodes
5692 if not secondary_nodes:
5693 raise errors.ProgrammerError("no secondary node but using "
5694 "a mirrored disk template")
5696 target_node = secondary_nodes[0]
5697 _CheckNodeOnline(self, target_node)
5698 _CheckNodeNotDrained(self, target_node)
5699 if instance.admin_up:
5700 # check memory requirements on the secondary node
5701 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5702 instance.name, bep[constants.BE_MEMORY],
5703 instance.hypervisor)
5705 self.LogInfo("Not checking memory on the secondary node as"
5706 " instance will not be started")
5708 # check bridge existance
5709 _CheckInstanceBridgesExist(self, instance, node=target_node)
5711 def Exec(self, feedback_fn):
5712 """Failover an instance.
5714 The failover is done by shutting it down on its present node and
5715 starting it on the secondary.
5718 instance = self.instance
5719 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5721 source_node = instance.primary_node
5722 target_node = instance.secondary_nodes[0]
5724 if instance.admin_up:
5725 feedback_fn("* checking disk consistency between source and target")
5726 for dev in instance.disks:
5727 # for drbd, these are drbd over lvm
5728 if not _CheckDiskConsistency(self, dev, target_node, False):
5729 if not self.op.ignore_consistency:
5730 raise errors.OpExecError("Disk %s is degraded on target node,"
5731 " aborting failover." % dev.iv_name)
5733 feedback_fn("* not checking disk consistency as instance is not running")
5735 feedback_fn("* shutting down instance on source node")
5736 logging.info("Shutting down instance %s on node %s",
5737 instance.name, source_node)
5739 result = self.rpc.call_instance_shutdown(source_node, instance,
5740 self.op.shutdown_timeout)
5741 msg = result.fail_msg
5743 if self.op.ignore_consistency or primary_node.offline:
5744 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5745 " Proceeding anyway. Please make sure node"
5746 " %s is down. Error details: %s",
5747 instance.name, source_node, source_node, msg)
5749 raise errors.OpExecError("Could not shutdown instance %s on"
5751 (instance.name, source_node, msg))
5753 feedback_fn("* deactivating the instance's disks on source node")
5754 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5755 raise errors.OpExecError("Can't shut down the instance's disks.")
5757 instance.primary_node = target_node
5758 # distribute new instance config to the other nodes
5759 self.cfg.Update(instance, feedback_fn)
5761 # Only start the instance if it's marked as up
5762 if instance.admin_up:
5763 feedback_fn("* activating the instance's disks on target node")
5764 logging.info("Starting instance %s on node %s",
5765 instance.name, target_node)
5767 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5768 ignore_secondaries=True)
5770 _ShutdownInstanceDisks(self, instance)
5771 raise errors.OpExecError("Can't activate the instance's disks")
5773 feedback_fn("* starting the instance on the target node")
5774 result = self.rpc.call_instance_start(target_node, instance, None, None)
5775 msg = result.fail_msg
5777 _ShutdownInstanceDisks(self, instance)
5778 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5779 (instance.name, target_node, msg))
5782 class LUInstanceMigrate(LogicalUnit):
5783 """Migrate an instance.
5785 This is migration without shutting down, compared to the failover,
5786 which is done with shutdown.
5789 HPATH = "instance-migrate"
5790 HTYPE = constants.HTYPE_INSTANCE
5793 def ExpandNames(self):
5794 self._ExpandAndLockInstance()
5796 self.needed_locks[locking.LEVEL_NODE] = []
5797 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5799 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5801 self.tasklets = [self._migrater]
5803 def DeclareLocks(self, level):
5804 if level == locking.LEVEL_NODE:
5805 self._LockInstancesNodes()
5807 def BuildHooksEnv(self):
5810 This runs on master, primary and secondary nodes of the instance.
5813 instance = self._migrater.instance
5814 source_node = instance.primary_node
5815 target_node = instance.secondary_nodes[0]
5816 env = _BuildInstanceHookEnvByObject(self, instance)
5817 env["MIGRATE_LIVE"] = self._migrater.live
5818 env["MIGRATE_CLEANUP"] = self.op.cleanup
5820 "OLD_PRIMARY": source_node,
5821 "OLD_SECONDARY": target_node,
5822 "NEW_PRIMARY": target_node,
5823 "NEW_SECONDARY": source_node,
5825 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5827 nl_post.append(source_node)
5828 return env, nl, nl_post
5831 class LUInstanceMove(LogicalUnit):
5832 """Move an instance by data-copying.
5835 HPATH = "instance-move"
5836 HTYPE = constants.HTYPE_INSTANCE
5839 def ExpandNames(self):
5840 self._ExpandAndLockInstance()
5841 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5842 self.op.target_node = target_node
5843 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5844 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5846 def DeclareLocks(self, level):
5847 if level == locking.LEVEL_NODE:
5848 self._LockInstancesNodes(primary_only=True)
5850 def BuildHooksEnv(self):
5853 This runs on master, primary and secondary nodes of the instance.
5857 "TARGET_NODE": self.op.target_node,
5858 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5860 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5861 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5862 self.op.target_node]
5865 def CheckPrereq(self):
5866 """Check prerequisites.
5868 This checks that the instance is in the cluster.
5871 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5872 assert self.instance is not None, \
5873 "Cannot retrieve locked instance %s" % self.op.instance_name
5875 node = self.cfg.GetNodeInfo(self.op.target_node)
5876 assert node is not None, \
5877 "Cannot retrieve locked node %s" % self.op.target_node
5879 self.target_node = target_node = node.name
5881 if target_node == instance.primary_node:
5882 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5883 (instance.name, target_node),
5886 bep = self.cfg.GetClusterInfo().FillBE(instance)
5888 for idx, dsk in enumerate(instance.disks):
5889 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5890 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5891 " cannot copy" % idx, errors.ECODE_STATE)
5893 _CheckNodeOnline(self, target_node)
5894 _CheckNodeNotDrained(self, target_node)
5895 _CheckNodeVmCapable(self, target_node)
5897 if instance.admin_up:
5898 # check memory requirements on the secondary node
5899 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5900 instance.name, bep[constants.BE_MEMORY],
5901 instance.hypervisor)
5903 self.LogInfo("Not checking memory on the secondary node as"
5904 " instance will not be started")
5906 # check bridge existance
5907 _CheckInstanceBridgesExist(self, instance, node=target_node)
5909 def Exec(self, feedback_fn):
5910 """Move an instance.
5912 The move is done by shutting it down on its present node, copying
5913 the data over (slow) and starting it on the new node.
5916 instance = self.instance
5918 source_node = instance.primary_node
5919 target_node = self.target_node
5921 self.LogInfo("Shutting down instance %s on source node %s",
5922 instance.name, source_node)
5924 result = self.rpc.call_instance_shutdown(source_node, instance,
5925 self.op.shutdown_timeout)
5926 msg = result.fail_msg
5928 if self.op.ignore_consistency:
5929 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5930 " Proceeding anyway. Please make sure node"
5931 " %s is down. Error details: %s",
5932 instance.name, source_node, source_node, msg)
5934 raise errors.OpExecError("Could not shutdown instance %s on"
5936 (instance.name, source_node, msg))
5938 # create the target disks
5940 _CreateDisks(self, instance, target_node=target_node)
5941 except errors.OpExecError:
5942 self.LogWarning("Device creation failed, reverting...")
5944 _RemoveDisks(self, instance, target_node=target_node)
5946 self.cfg.ReleaseDRBDMinors(instance.name)
5949 cluster_name = self.cfg.GetClusterInfo().cluster_name
5952 # activate, get path, copy the data over
5953 for idx, disk in enumerate(instance.disks):
5954 self.LogInfo("Copying data for disk %d", idx)
5955 result = self.rpc.call_blockdev_assemble(target_node, disk,
5956 instance.name, True)
5958 self.LogWarning("Can't assemble newly created disk %d: %s",
5959 idx, result.fail_msg)
5960 errs.append(result.fail_msg)
5962 dev_path = result.payload
5963 result = self.rpc.call_blockdev_export(source_node, disk,
5964 target_node, dev_path,
5967 self.LogWarning("Can't copy data over for disk %d: %s",
5968 idx, result.fail_msg)
5969 errs.append(result.fail_msg)
5973 self.LogWarning("Some disks failed to copy, aborting")
5975 _RemoveDisks(self, instance, target_node=target_node)
5977 self.cfg.ReleaseDRBDMinors(instance.name)
5978 raise errors.OpExecError("Errors during disk copy: %s" %
5981 instance.primary_node = target_node
5982 self.cfg.Update(instance, feedback_fn)
5984 self.LogInfo("Removing the disks on the original node")
5985 _RemoveDisks(self, instance, target_node=source_node)
5987 # Only start the instance if it's marked as up
5988 if instance.admin_up:
5989 self.LogInfo("Starting instance %s on node %s",
5990 instance.name, target_node)
5992 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5993 ignore_secondaries=True)
5995 _ShutdownInstanceDisks(self, instance)
5996 raise errors.OpExecError("Can't activate the instance's disks")
5998 result = self.rpc.call_instance_start(target_node, instance, None, None)
5999 msg = result.fail_msg
6001 _ShutdownInstanceDisks(self, instance)
6002 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6003 (instance.name, target_node, msg))
6006 class LUNodeMigrate(LogicalUnit):
6007 """Migrate all instances from a node.
6010 HPATH = "node-migrate"
6011 HTYPE = constants.HTYPE_NODE
6014 def ExpandNames(self):
6015 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6017 self.needed_locks = {
6018 locking.LEVEL_NODE: [self.op.node_name],
6021 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6023 # Create tasklets for migrating instances for all instances on this node
6027 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6028 logging.debug("Migrating instance %s", inst.name)
6029 names.append(inst.name)
6031 tasklets.append(TLMigrateInstance(self, inst.name, False))
6033 self.tasklets = tasklets
6035 # Declare instance locks
6036 self.needed_locks[locking.LEVEL_INSTANCE] = names
6038 def DeclareLocks(self, level):
6039 if level == locking.LEVEL_NODE:
6040 self._LockInstancesNodes()
6042 def BuildHooksEnv(self):
6045 This runs on the master, the primary and all the secondaries.
6049 "NODE_NAME": self.op.node_name,
6052 nl = [self.cfg.GetMasterNode()]
6054 return (env, nl, nl)
6057 class TLMigrateInstance(Tasklet):
6058 """Tasklet class for instance migration.
6061 @ivar live: whether the migration will be done live or non-live;
6062 this variable is initalized only after CheckPrereq has run
6065 def __init__(self, lu, instance_name, cleanup):
6066 """Initializes this class.
6069 Tasklet.__init__(self, lu)
6072 self.instance_name = instance_name
6073 self.cleanup = cleanup
6074 self.live = False # will be overridden later
6076 def CheckPrereq(self):
6077 """Check prerequisites.
6079 This checks that the instance is in the cluster.
6082 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6083 instance = self.cfg.GetInstanceInfo(instance_name)
6084 assert instance is not None
6086 if instance.disk_template != constants.DT_DRBD8:
6087 raise errors.OpPrereqError("Instance's disk layout is not"
6088 " drbd8, cannot migrate.", errors.ECODE_STATE)
6090 secondary_nodes = instance.secondary_nodes
6091 if not secondary_nodes:
6092 raise errors.ConfigurationError("No secondary node but using"
6093 " drbd8 disk template")
6095 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6097 target_node = secondary_nodes[0]
6098 # check memory requirements on the secondary node
6099 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6100 instance.name, i_be[constants.BE_MEMORY],
6101 instance.hypervisor)
6103 # check bridge existance
6104 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6106 if not self.cleanup:
6107 _CheckNodeNotDrained(self.lu, target_node)
6108 result = self.rpc.call_instance_migratable(instance.primary_node,
6110 result.Raise("Can't migrate, please use failover",
6111 prereq=True, ecode=errors.ECODE_STATE)
6113 self.instance = instance
6115 if self.lu.op.live is not None and self.lu.op.mode is not None:
6116 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6117 " parameters are accepted",
6119 if self.lu.op.live is not None:
6121 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6123 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6124 # reset the 'live' parameter to None so that repeated
6125 # invocations of CheckPrereq do not raise an exception
6126 self.lu.op.live = None
6127 elif self.lu.op.mode is None:
6128 # read the default value from the hypervisor
6129 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6130 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6132 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6134 def _WaitUntilSync(self):
6135 """Poll with custom rpc for disk sync.
6137 This uses our own step-based rpc call.
6140 self.feedback_fn("* wait until resync is done")
6144 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6146 self.instance.disks)
6148 for node, nres in result.items():
6149 nres.Raise("Cannot resync disks on node %s" % node)
6150 node_done, node_percent = nres.payload
6151 all_done = all_done and node_done
6152 if node_percent is not None:
6153 min_percent = min(min_percent, node_percent)
6155 if min_percent < 100:
6156 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6159 def _EnsureSecondary(self, node):
6160 """Demote a node to secondary.
6163 self.feedback_fn("* switching node %s to secondary mode" % node)
6165 for dev in self.instance.disks:
6166 self.cfg.SetDiskID(dev, node)
6168 result = self.rpc.call_blockdev_close(node, self.instance.name,
6169 self.instance.disks)
6170 result.Raise("Cannot change disk to secondary on node %s" % node)
6172 def _GoStandalone(self):
6173 """Disconnect from the network.
6176 self.feedback_fn("* changing into standalone mode")
6177 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6178 self.instance.disks)
6179 for node, nres in result.items():
6180 nres.Raise("Cannot disconnect disks node %s" % node)
6182 def _GoReconnect(self, multimaster):
6183 """Reconnect to the network.
6189 msg = "single-master"
6190 self.feedback_fn("* changing disks into %s mode" % msg)
6191 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6192 self.instance.disks,
6193 self.instance.name, multimaster)
6194 for node, nres in result.items():
6195 nres.Raise("Cannot change disks config on node %s" % node)
6197 def _ExecCleanup(self):
6198 """Try to cleanup after a failed migration.
6200 The cleanup is done by:
6201 - check that the instance is running only on one node
6202 (and update the config if needed)
6203 - change disks on its secondary node to secondary
6204 - wait until disks are fully synchronized
6205 - disconnect from the network
6206 - change disks into single-master mode
6207 - wait again until disks are fully synchronized
6210 instance = self.instance
6211 target_node = self.target_node
6212 source_node = self.source_node
6214 # check running on only one node
6215 self.feedback_fn("* checking where the instance actually runs"
6216 " (if this hangs, the hypervisor might be in"
6218 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6219 for node, result in ins_l.items():
6220 result.Raise("Can't contact node %s" % node)
6222 runningon_source = instance.name in ins_l[source_node].payload
6223 runningon_target = instance.name in ins_l[target_node].payload
6225 if runningon_source and runningon_target:
6226 raise errors.OpExecError("Instance seems to be running on two nodes,"
6227 " or the hypervisor is confused. You will have"
6228 " to ensure manually that it runs only on one"
6229 " and restart this operation.")
6231 if not (runningon_source or runningon_target):
6232 raise errors.OpExecError("Instance does not seem to be running at all."
6233 " In this case, it's safer to repair by"
6234 " running 'gnt-instance stop' to ensure disk"
6235 " shutdown, and then restarting it.")
6237 if runningon_target:
6238 # the migration has actually succeeded, we need to update the config
6239 self.feedback_fn("* instance running on secondary node (%s),"
6240 " updating config" % target_node)
6241 instance.primary_node = target_node
6242 self.cfg.Update(instance, self.feedback_fn)
6243 demoted_node = source_node
6245 self.feedback_fn("* instance confirmed to be running on its"
6246 " primary node (%s)" % source_node)
6247 demoted_node = target_node
6249 self._EnsureSecondary(demoted_node)
6251 self._WaitUntilSync()
6252 except errors.OpExecError:
6253 # we ignore here errors, since if the device is standalone, it
6254 # won't be able to sync
6256 self._GoStandalone()
6257 self._GoReconnect(False)
6258 self._WaitUntilSync()
6260 self.feedback_fn("* done")
6262 def _RevertDiskStatus(self):
6263 """Try to revert the disk status after a failed migration.
6266 target_node = self.target_node
6268 self._EnsureSecondary(target_node)
6269 self._GoStandalone()
6270 self._GoReconnect(False)
6271 self._WaitUntilSync()
6272 except errors.OpExecError, err:
6273 self.lu.LogWarning("Migration failed and I can't reconnect the"
6274 " drives: error '%s'\n"
6275 "Please look and recover the instance status" %
6278 def _AbortMigration(self):
6279 """Call the hypervisor code to abort a started migration.
6282 instance = self.instance
6283 target_node = self.target_node
6284 migration_info = self.migration_info
6286 abort_result = self.rpc.call_finalize_migration(target_node,
6290 abort_msg = abort_result.fail_msg
6292 logging.error("Aborting migration failed on target node %s: %s",
6293 target_node, abort_msg)
6294 # Don't raise an exception here, as we stil have to try to revert the
6295 # disk status, even if this step failed.
6297 def _ExecMigration(self):
6298 """Migrate an instance.
6300 The migrate is done by:
6301 - change the disks into dual-master mode
6302 - wait until disks are fully synchronized again
6303 - migrate the instance
6304 - change disks on the new secondary node (the old primary) to secondary
6305 - wait until disks are fully synchronized
6306 - change disks into single-master mode
6309 instance = self.instance
6310 target_node = self.target_node
6311 source_node = self.source_node
6313 self.feedback_fn("* checking disk consistency between source and target")
6314 for dev in instance.disks:
6315 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6316 raise errors.OpExecError("Disk %s is degraded or not fully"
6317 " synchronized on target node,"
6318 " aborting migrate." % dev.iv_name)
6320 # First get the migration information from the remote node
6321 result = self.rpc.call_migration_info(source_node, instance)
6322 msg = result.fail_msg
6324 log_err = ("Failed fetching source migration information from %s: %s" %
6326 logging.error(log_err)
6327 raise errors.OpExecError(log_err)
6329 self.migration_info = migration_info = result.payload
6331 # Then switch the disks to master/master mode
6332 self._EnsureSecondary(target_node)
6333 self._GoStandalone()
6334 self._GoReconnect(True)
6335 self._WaitUntilSync()
6337 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6338 result = self.rpc.call_accept_instance(target_node,
6341 self.nodes_ip[target_node])
6343 msg = result.fail_msg
6345 logging.error("Instance pre-migration failed, trying to revert"
6346 " disk status: %s", msg)
6347 self.feedback_fn("Pre-migration failed, aborting")
6348 self._AbortMigration()
6349 self._RevertDiskStatus()
6350 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6351 (instance.name, msg))
6353 self.feedback_fn("* migrating instance to %s" % target_node)
6355 result = self.rpc.call_instance_migrate(source_node, instance,
6356 self.nodes_ip[target_node],
6358 msg = result.fail_msg
6360 logging.error("Instance migration failed, trying to revert"
6361 " disk status: %s", msg)
6362 self.feedback_fn("Migration failed, aborting")
6363 self._AbortMigration()
6364 self._RevertDiskStatus()
6365 raise errors.OpExecError("Could not migrate instance %s: %s" %
6366 (instance.name, msg))
6369 instance.primary_node = target_node
6370 # distribute new instance config to the other nodes
6371 self.cfg.Update(instance, self.feedback_fn)
6373 result = self.rpc.call_finalize_migration(target_node,
6377 msg = result.fail_msg
6379 logging.error("Instance migration succeeded, but finalization failed:"
6381 raise errors.OpExecError("Could not finalize instance migration: %s" %
6384 self._EnsureSecondary(source_node)
6385 self._WaitUntilSync()
6386 self._GoStandalone()
6387 self._GoReconnect(False)
6388 self._WaitUntilSync()
6390 self.feedback_fn("* done")
6392 def Exec(self, feedback_fn):
6393 """Perform the migration.
6396 feedback_fn("Migrating instance %s" % self.instance.name)
6398 self.feedback_fn = feedback_fn
6400 self.source_node = self.instance.primary_node
6401 self.target_node = self.instance.secondary_nodes[0]
6402 self.all_nodes = [self.source_node, self.target_node]
6404 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6405 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6409 return self._ExecCleanup()
6411 return self._ExecMigration()
6414 def _CreateBlockDev(lu, node, instance, device, force_create,
6416 """Create a tree of block devices on a given node.
6418 If this device type has to be created on secondaries, create it and
6421 If not, just recurse to children keeping the same 'force' value.
6423 @param lu: the lu on whose behalf we execute
6424 @param node: the node on which to create the device
6425 @type instance: L{objects.Instance}
6426 @param instance: the instance which owns the device
6427 @type device: L{objects.Disk}
6428 @param device: the device to create
6429 @type force_create: boolean
6430 @param force_create: whether to force creation of this device; this
6431 will be change to True whenever we find a device which has
6432 CreateOnSecondary() attribute
6433 @param info: the extra 'metadata' we should attach to the device
6434 (this will be represented as a LVM tag)
6435 @type force_open: boolean
6436 @param force_open: this parameter will be passes to the
6437 L{backend.BlockdevCreate} function where it specifies
6438 whether we run on primary or not, and it affects both
6439 the child assembly and the device own Open() execution
6442 if device.CreateOnSecondary():
6446 for child in device.children:
6447 _CreateBlockDev(lu, node, instance, child, force_create,
6450 if not force_create:
6453 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6456 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6457 """Create a single block device on a given node.
6459 This will not recurse over children of the device, so they must be
6462 @param lu: the lu on whose behalf we execute
6463 @param node: the node on which to create the device
6464 @type instance: L{objects.Instance}
6465 @param instance: the instance which owns the device
6466 @type device: L{objects.Disk}
6467 @param device: the device to create
6468 @param info: the extra 'metadata' we should attach to the device
6469 (this will be represented as a LVM tag)
6470 @type force_open: boolean
6471 @param force_open: this parameter will be passes to the
6472 L{backend.BlockdevCreate} function where it specifies
6473 whether we run on primary or not, and it affects both
6474 the child assembly and the device own Open() execution
6477 lu.cfg.SetDiskID(device, node)
6478 result = lu.rpc.call_blockdev_create(node, device, device.size,
6479 instance.name, force_open, info)
6480 result.Raise("Can't create block device %s on"
6481 " node %s for instance %s" % (device, node, instance.name))
6482 if device.physical_id is None:
6483 device.physical_id = result.payload
6486 def _GenerateUniqueNames(lu, exts):
6487 """Generate a suitable LV name.
6489 This will generate a logical volume name for the given instance.
6494 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6495 results.append("%s%s" % (new_id, val))
6499 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6501 """Generate a drbd8 device complete with its children.
6504 port = lu.cfg.AllocatePort()
6505 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6506 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6507 logical_id=(vgname, names[0]))
6508 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6509 logical_id=(vgname, names[1]))
6510 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6511 logical_id=(primary, secondary, port,
6514 children=[dev_data, dev_meta],
6519 def _GenerateDiskTemplate(lu, template_name,
6520 instance_name, primary_node,
6521 secondary_nodes, disk_info,
6522 file_storage_dir, file_driver,
6523 base_index, feedback_fn):
6524 """Generate the entire disk layout for a given template type.
6527 #TODO: compute space requirements
6529 vgname = lu.cfg.GetVGName()
6530 disk_count = len(disk_info)
6532 if template_name == constants.DT_DISKLESS:
6534 elif template_name == constants.DT_PLAIN:
6535 if len(secondary_nodes) != 0:
6536 raise errors.ProgrammerError("Wrong template configuration")
6538 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6539 for i in range(disk_count)])
6540 for idx, disk in enumerate(disk_info):
6541 disk_index = idx + base_index
6542 vg = disk.get("vg", vgname)
6543 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6544 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6545 logical_id=(vg, names[idx]),
6546 iv_name="disk/%d" % disk_index,
6548 disks.append(disk_dev)
6549 elif template_name == constants.DT_DRBD8:
6550 if len(secondary_nodes) != 1:
6551 raise errors.ProgrammerError("Wrong template configuration")
6552 remote_node = secondary_nodes[0]
6553 minors = lu.cfg.AllocateDRBDMinor(
6554 [primary_node, remote_node] * len(disk_info), instance_name)
6557 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6558 for i in range(disk_count)]):
6559 names.append(lv_prefix + "_data")
6560 names.append(lv_prefix + "_meta")
6561 for idx, disk in enumerate(disk_info):
6562 disk_index = idx + base_index
6563 vg = disk.get("vg", vgname)
6564 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6565 disk["size"], vg, names[idx*2:idx*2+2],
6566 "disk/%d" % disk_index,
6567 minors[idx*2], minors[idx*2+1])
6568 disk_dev.mode = disk["mode"]
6569 disks.append(disk_dev)
6570 elif template_name == constants.DT_FILE:
6571 if len(secondary_nodes) != 0:
6572 raise errors.ProgrammerError("Wrong template configuration")
6574 opcodes.RequireFileStorage()
6576 for idx, disk in enumerate(disk_info):
6577 disk_index = idx + base_index
6578 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6579 iv_name="disk/%d" % disk_index,
6580 logical_id=(file_driver,
6581 "%s/disk%d" % (file_storage_dir,
6584 disks.append(disk_dev)
6586 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6590 def _GetInstanceInfoText(instance):
6591 """Compute that text that should be added to the disk's metadata.
6594 return "originstname+%s" % instance.name
6597 def _CalcEta(time_taken, written, total_size):
6598 """Calculates the ETA based on size written and total size.
6600 @param time_taken: The time taken so far
6601 @param written: amount written so far
6602 @param total_size: The total size of data to be written
6603 @return: The remaining time in seconds
6606 avg_time = time_taken / float(written)
6607 return (total_size - written) * avg_time
6610 def _WipeDisks(lu, instance):
6611 """Wipes instance disks.
6613 @type lu: L{LogicalUnit}
6614 @param lu: the logical unit on whose behalf we execute
6615 @type instance: L{objects.Instance}
6616 @param instance: the instance whose disks we should create
6617 @return: the success of the wipe
6620 node = instance.primary_node
6621 logging.info("Pause sync of instance %s disks", instance.name)
6622 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6624 for idx, success in enumerate(result.payload):
6626 logging.warn("pause-sync of instance %s for disks %d failed",
6630 for idx, device in enumerate(instance.disks):
6631 lu.LogInfo("* Wiping disk %d", idx)
6632 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6634 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6635 # MAX_WIPE_CHUNK at max
6636 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6637 constants.MIN_WIPE_CHUNK_PERCENT)
6642 start_time = time.time()
6644 while offset < size:
6645 wipe_size = min(wipe_chunk_size, size - offset)
6646 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6647 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6648 (idx, offset, wipe_size))
6651 if now - last_output >= 60:
6652 eta = _CalcEta(now - start_time, offset, size)
6653 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6654 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6657 logging.info("Resume sync of instance %s disks", instance.name)
6659 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6661 for idx, success in enumerate(result.payload):
6663 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6664 " look at the status and troubleshoot the issue.", idx)
6665 logging.warn("resume-sync of instance %s for disks %d failed",
6669 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6670 """Create all disks for an instance.
6672 This abstracts away some work from AddInstance.
6674 @type lu: L{LogicalUnit}
6675 @param lu: the logical unit on whose behalf we execute
6676 @type instance: L{objects.Instance}
6677 @param instance: the instance whose disks we should create
6679 @param to_skip: list of indices to skip
6680 @type target_node: string
6681 @param target_node: if passed, overrides the target node for creation
6683 @return: the success of the creation
6686 info = _GetInstanceInfoText(instance)
6687 if target_node is None:
6688 pnode = instance.primary_node
6689 all_nodes = instance.all_nodes
6694 if instance.disk_template == constants.DT_FILE:
6695 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6696 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6698 result.Raise("Failed to create directory '%s' on"
6699 " node %s" % (file_storage_dir, pnode))
6701 # Note: this needs to be kept in sync with adding of disks in
6702 # LUInstanceSetParams
6703 for idx, device in enumerate(instance.disks):
6704 if to_skip and idx in to_skip:
6706 logging.info("Creating volume %s for instance %s",
6707 device.iv_name, instance.name)
6709 for node in all_nodes:
6710 f_create = node == pnode
6711 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6714 def _RemoveDisks(lu, instance, target_node=None):
6715 """Remove all disks for an instance.
6717 This abstracts away some work from `AddInstance()` and
6718 `RemoveInstance()`. Note that in case some of the devices couldn't
6719 be removed, the removal will continue with the other ones (compare
6720 with `_CreateDisks()`).
6722 @type lu: L{LogicalUnit}
6723 @param lu: the logical unit on whose behalf we execute
6724 @type instance: L{objects.Instance}
6725 @param instance: the instance whose disks we should remove
6726 @type target_node: string
6727 @param target_node: used to override the node on which to remove the disks
6729 @return: the success of the removal
6732 logging.info("Removing block devices for instance %s", instance.name)
6735 for device in instance.disks:
6737 edata = [(target_node, device)]
6739 edata = device.ComputeNodeTree(instance.primary_node)
6740 for node, disk in edata:
6741 lu.cfg.SetDiskID(disk, node)
6742 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6744 lu.LogWarning("Could not remove block device %s on node %s,"
6745 " continuing anyway: %s", device.iv_name, node, msg)
6748 if instance.disk_template == constants.DT_FILE:
6749 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6753 tgt = instance.primary_node
6754 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6756 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6757 file_storage_dir, instance.primary_node, result.fail_msg)
6763 def _ComputeDiskSizePerVG(disk_template, disks):
6764 """Compute disk size requirements in the volume group
6767 def _compute(disks, payload):
6768 """Universal algorithm
6773 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6777 # Required free disk space as a function of disk and swap space
6779 constants.DT_DISKLESS: {},
6780 constants.DT_PLAIN: _compute(disks, 0),
6781 # 128 MB are added for drbd metadata for each disk
6782 constants.DT_DRBD8: _compute(disks, 128),
6783 constants.DT_FILE: {},
6786 if disk_template not in req_size_dict:
6787 raise errors.ProgrammerError("Disk template '%s' size requirement"
6788 " is unknown" % disk_template)
6790 return req_size_dict[disk_template]
6793 def _ComputeDiskSize(disk_template, disks):
6794 """Compute disk size requirements in the volume group
6797 # Required free disk space as a function of disk and swap space
6799 constants.DT_DISKLESS: None,
6800 constants.DT_PLAIN: sum(d["size"] for d in disks),
6801 # 128 MB are added for drbd metadata for each disk
6802 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6803 constants.DT_FILE: None,
6806 if disk_template not in req_size_dict:
6807 raise errors.ProgrammerError("Disk template '%s' size requirement"
6808 " is unknown" % disk_template)
6810 return req_size_dict[disk_template]
6813 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6814 """Hypervisor parameter validation.
6816 This function abstract the hypervisor parameter validation to be
6817 used in both instance create and instance modify.
6819 @type lu: L{LogicalUnit}
6820 @param lu: the logical unit for which we check
6821 @type nodenames: list
6822 @param nodenames: the list of nodes on which we should check
6823 @type hvname: string
6824 @param hvname: the name of the hypervisor we should use
6825 @type hvparams: dict
6826 @param hvparams: the parameters which we need to check
6827 @raise errors.OpPrereqError: if the parameters are not valid
6830 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6833 for node in nodenames:
6837 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6840 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6841 """OS parameters validation.
6843 @type lu: L{LogicalUnit}
6844 @param lu: the logical unit for which we check
6845 @type required: boolean
6846 @param required: whether the validation should fail if the OS is not
6848 @type nodenames: list
6849 @param nodenames: the list of nodes on which we should check
6850 @type osname: string
6851 @param osname: the name of the hypervisor we should use
6852 @type osparams: dict
6853 @param osparams: the parameters which we need to check
6854 @raise errors.OpPrereqError: if the parameters are not valid
6857 result = lu.rpc.call_os_validate(required, nodenames, osname,
6858 [constants.OS_VALIDATE_PARAMETERS],
6860 for node, nres in result.items():
6861 # we don't check for offline cases since this should be run only
6862 # against the master node and/or an instance's nodes
6863 nres.Raise("OS Parameters validation failed on node %s" % node)
6864 if not nres.payload:
6865 lu.LogInfo("OS %s not found on node %s, validation skipped",
6869 class LUInstanceCreate(LogicalUnit):
6870 """Create an instance.
6873 HPATH = "instance-add"
6874 HTYPE = constants.HTYPE_INSTANCE
6877 def CheckArguments(self):
6881 # do not require name_check to ease forward/backward compatibility
6883 if self.op.no_install and self.op.start:
6884 self.LogInfo("No-installation mode selected, disabling startup")
6885 self.op.start = False
6886 # validate/normalize the instance name
6887 self.op.instance_name = \
6888 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6890 if self.op.ip_check and not self.op.name_check:
6891 # TODO: make the ip check more flexible and not depend on the name check
6892 raise errors.OpPrereqError("Cannot do ip check without a name check",
6895 # check nics' parameter names
6896 for nic in self.op.nics:
6897 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6899 # check disks. parameter names and consistent adopt/no-adopt strategy
6900 has_adopt = has_no_adopt = False
6901 for disk in self.op.disks:
6902 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6907 if has_adopt and has_no_adopt:
6908 raise errors.OpPrereqError("Either all disks are adopted or none is",
6911 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6912 raise errors.OpPrereqError("Disk adoption is not supported for the"
6913 " '%s' disk template" %
6914 self.op.disk_template,
6916 if self.op.iallocator is not None:
6917 raise errors.OpPrereqError("Disk adoption not allowed with an"
6918 " iallocator script", errors.ECODE_INVAL)
6919 if self.op.mode == constants.INSTANCE_IMPORT:
6920 raise errors.OpPrereqError("Disk adoption not allowed for"
6921 " instance import", errors.ECODE_INVAL)
6923 self.adopt_disks = has_adopt
6925 # instance name verification
6926 if self.op.name_check:
6927 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6928 self.op.instance_name = self.hostname1.name
6929 # used in CheckPrereq for ip ping check
6930 self.check_ip = self.hostname1.ip
6932 self.check_ip = None
6934 # file storage checks
6935 if (self.op.file_driver and
6936 not self.op.file_driver in constants.FILE_DRIVER):
6937 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6938 self.op.file_driver, errors.ECODE_INVAL)
6940 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6941 raise errors.OpPrereqError("File storage directory path not absolute",
6944 ### Node/iallocator related checks
6945 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6947 if self.op.pnode is not None:
6948 if self.op.disk_template in constants.DTS_NET_MIRROR:
6949 if self.op.snode is None:
6950 raise errors.OpPrereqError("The networked disk templates need"
6951 " a mirror node", errors.ECODE_INVAL)
6953 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6955 self.op.snode = None
6957 self._cds = _GetClusterDomainSecret()
6959 if self.op.mode == constants.INSTANCE_IMPORT:
6960 # On import force_variant must be True, because if we forced it at
6961 # initial install, our only chance when importing it back is that it
6963 self.op.force_variant = True
6965 if self.op.no_install:
6966 self.LogInfo("No-installation mode has no effect during import")
6968 elif self.op.mode == constants.INSTANCE_CREATE:
6969 if self.op.os_type is None:
6970 raise errors.OpPrereqError("No guest OS specified",
6972 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6973 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6974 " installation" % self.op.os_type,
6976 if self.op.disk_template is None:
6977 raise errors.OpPrereqError("No disk template specified",
6980 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6981 # Check handshake to ensure both clusters have the same domain secret
6982 src_handshake = self.op.source_handshake
6983 if not src_handshake:
6984 raise errors.OpPrereqError("Missing source handshake",
6987 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6990 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6993 # Load and check source CA
6994 self.source_x509_ca_pem = self.op.source_x509_ca
6995 if not self.source_x509_ca_pem:
6996 raise errors.OpPrereqError("Missing source X509 CA",
7000 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7002 except OpenSSL.crypto.Error, err:
7003 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7004 (err, ), errors.ECODE_INVAL)
7006 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7007 if errcode is not None:
7008 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7011 self.source_x509_ca = cert
7013 src_instance_name = self.op.source_instance_name
7014 if not src_instance_name:
7015 raise errors.OpPrereqError("Missing source instance name",
7018 self.source_instance_name = \
7019 netutils.GetHostname(name=src_instance_name).name
7022 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7023 self.op.mode, errors.ECODE_INVAL)
7025 def ExpandNames(self):
7026 """ExpandNames for CreateInstance.
7028 Figure out the right locks for instance creation.
7031 self.needed_locks = {}
7033 instance_name = self.op.instance_name
7034 # this is just a preventive check, but someone might still add this
7035 # instance in the meantime, and creation will fail at lock-add time
7036 if instance_name in self.cfg.GetInstanceList():
7037 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7038 instance_name, errors.ECODE_EXISTS)
7040 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7042 if self.op.iallocator:
7043 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7045 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7046 nodelist = [self.op.pnode]
7047 if self.op.snode is not None:
7048 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7049 nodelist.append(self.op.snode)
7050 self.needed_locks[locking.LEVEL_NODE] = nodelist
7052 # in case of import lock the source node too
7053 if self.op.mode == constants.INSTANCE_IMPORT:
7054 src_node = self.op.src_node
7055 src_path = self.op.src_path
7057 if src_path is None:
7058 self.op.src_path = src_path = self.op.instance_name
7060 if src_node is None:
7061 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7062 self.op.src_node = None
7063 if os.path.isabs(src_path):
7064 raise errors.OpPrereqError("Importing an instance from an absolute"
7065 " path requires a source node option.",
7068 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7069 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7070 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7071 if not os.path.isabs(src_path):
7072 self.op.src_path = src_path = \
7073 utils.PathJoin(constants.EXPORT_DIR, src_path)
7075 def _RunAllocator(self):
7076 """Run the allocator based on input opcode.
7079 nics = [n.ToDict() for n in self.nics]
7080 ial = IAllocator(self.cfg, self.rpc,
7081 mode=constants.IALLOCATOR_MODE_ALLOC,
7082 name=self.op.instance_name,
7083 disk_template=self.op.disk_template,
7086 vcpus=self.be_full[constants.BE_VCPUS],
7087 mem_size=self.be_full[constants.BE_MEMORY],
7090 hypervisor=self.op.hypervisor,
7093 ial.Run(self.op.iallocator)
7096 raise errors.OpPrereqError("Can't compute nodes using"
7097 " iallocator '%s': %s" %
7098 (self.op.iallocator, ial.info),
7100 if len(ial.result) != ial.required_nodes:
7101 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7102 " of nodes (%s), required %s" %
7103 (self.op.iallocator, len(ial.result),
7104 ial.required_nodes), errors.ECODE_FAULT)
7105 self.op.pnode = ial.result[0]
7106 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7107 self.op.instance_name, self.op.iallocator,
7108 utils.CommaJoin(ial.result))
7109 if ial.required_nodes == 2:
7110 self.op.snode = ial.result[1]
7112 def BuildHooksEnv(self):
7115 This runs on master, primary and secondary nodes of the instance.
7119 "ADD_MODE": self.op.mode,
7121 if self.op.mode == constants.INSTANCE_IMPORT:
7122 env["SRC_NODE"] = self.op.src_node
7123 env["SRC_PATH"] = self.op.src_path
7124 env["SRC_IMAGES"] = self.src_images
7126 env.update(_BuildInstanceHookEnv(
7127 name=self.op.instance_name,
7128 primary_node=self.op.pnode,
7129 secondary_nodes=self.secondaries,
7130 status=self.op.start,
7131 os_type=self.op.os_type,
7132 memory=self.be_full[constants.BE_MEMORY],
7133 vcpus=self.be_full[constants.BE_VCPUS],
7134 nics=_NICListToTuple(self, self.nics),
7135 disk_template=self.op.disk_template,
7136 disks=[(d["size"], d["mode"]) for d in self.disks],
7139 hypervisor_name=self.op.hypervisor,
7142 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7146 def _ReadExportInfo(self):
7147 """Reads the export information from disk.
7149 It will override the opcode source node and path with the actual
7150 information, if these two were not specified before.
7152 @return: the export information
7155 assert self.op.mode == constants.INSTANCE_IMPORT
7157 src_node = self.op.src_node
7158 src_path = self.op.src_path
7160 if src_node is None:
7161 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7162 exp_list = self.rpc.call_export_list(locked_nodes)
7164 for node in exp_list:
7165 if exp_list[node].fail_msg:
7167 if src_path in exp_list[node].payload:
7169 self.op.src_node = src_node = node
7170 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7174 raise errors.OpPrereqError("No export found for relative path %s" %
7175 src_path, errors.ECODE_INVAL)
7177 _CheckNodeOnline(self, src_node)
7178 result = self.rpc.call_export_info(src_node, src_path)
7179 result.Raise("No export or invalid export found in dir %s" % src_path)
7181 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7182 if not export_info.has_section(constants.INISECT_EXP):
7183 raise errors.ProgrammerError("Corrupted export config",
7184 errors.ECODE_ENVIRON)
7186 ei_version = export_info.get(constants.INISECT_EXP, "version")
7187 if (int(ei_version) != constants.EXPORT_VERSION):
7188 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7189 (ei_version, constants.EXPORT_VERSION),
7190 errors.ECODE_ENVIRON)
7193 def _ReadExportParams(self, einfo):
7194 """Use export parameters as defaults.
7196 In case the opcode doesn't specify (as in override) some instance
7197 parameters, then try to use them from the export information, if
7201 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7203 if self.op.disk_template is None:
7204 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7205 self.op.disk_template = einfo.get(constants.INISECT_INS,
7208 raise errors.OpPrereqError("No disk template specified and the export"
7209 " is missing the disk_template information",
7212 if not self.op.disks:
7213 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7215 # TODO: import the disk iv_name too
7216 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7217 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7218 disks.append({"size": disk_sz})
7219 self.op.disks = disks
7221 raise errors.OpPrereqError("No disk info specified and the export"
7222 " is missing the disk information",
7225 if (not self.op.nics and
7226 einfo.has_option(constants.INISECT_INS, "nic_count")):
7228 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7230 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7231 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7236 if (self.op.hypervisor is None and
7237 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7238 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7239 if einfo.has_section(constants.INISECT_HYP):
7240 # use the export parameters but do not override the ones
7241 # specified by the user
7242 for name, value in einfo.items(constants.INISECT_HYP):
7243 if name not in self.op.hvparams:
7244 self.op.hvparams[name] = value
7246 if einfo.has_section(constants.INISECT_BEP):
7247 # use the parameters, without overriding
7248 for name, value in einfo.items(constants.INISECT_BEP):
7249 if name not in self.op.beparams:
7250 self.op.beparams[name] = value
7252 # try to read the parameters old style, from the main section
7253 for name in constants.BES_PARAMETERS:
7254 if (name not in self.op.beparams and
7255 einfo.has_option(constants.INISECT_INS, name)):
7256 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7258 if einfo.has_section(constants.INISECT_OSP):
7259 # use the parameters, without overriding
7260 for name, value in einfo.items(constants.INISECT_OSP):
7261 if name not in self.op.osparams:
7262 self.op.osparams[name] = value
7264 def _RevertToDefaults(self, cluster):
7265 """Revert the instance parameters to the default values.
7269 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7270 for name in self.op.hvparams.keys():
7271 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7272 del self.op.hvparams[name]
7274 be_defs = cluster.SimpleFillBE({})
7275 for name in self.op.beparams.keys():
7276 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7277 del self.op.beparams[name]
7279 nic_defs = cluster.SimpleFillNIC({})
7280 for nic in self.op.nics:
7281 for name in constants.NICS_PARAMETERS:
7282 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7285 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7286 for name in self.op.osparams.keys():
7287 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7288 del self.op.osparams[name]
7290 def CheckPrereq(self):
7291 """Check prerequisites.
7294 if self.op.mode == constants.INSTANCE_IMPORT:
7295 export_info = self._ReadExportInfo()
7296 self._ReadExportParams(export_info)
7298 if (not self.cfg.GetVGName() and
7299 self.op.disk_template not in constants.DTS_NOT_LVM):
7300 raise errors.OpPrereqError("Cluster does not support lvm-based"
7301 " instances", errors.ECODE_STATE)
7303 if self.op.hypervisor is None:
7304 self.op.hypervisor = self.cfg.GetHypervisorType()
7306 cluster = self.cfg.GetClusterInfo()
7307 enabled_hvs = cluster.enabled_hypervisors
7308 if self.op.hypervisor not in enabled_hvs:
7309 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7310 " cluster (%s)" % (self.op.hypervisor,
7311 ",".join(enabled_hvs)),
7314 # check hypervisor parameter syntax (locally)
7315 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7316 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7318 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7319 hv_type.CheckParameterSyntax(filled_hvp)
7320 self.hv_full = filled_hvp
7321 # check that we don't specify global parameters on an instance
7322 _CheckGlobalHvParams(self.op.hvparams)
7324 # fill and remember the beparams dict
7325 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7326 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7328 # build os parameters
7329 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7331 # now that hvp/bep are in final format, let's reset to defaults,
7333 if self.op.identify_defaults:
7334 self._RevertToDefaults(cluster)
7338 for idx, nic in enumerate(self.op.nics):
7339 nic_mode_req = nic.get("mode", None)
7340 nic_mode = nic_mode_req
7341 if nic_mode is None:
7342 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7344 # in routed mode, for the first nic, the default ip is 'auto'
7345 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7346 default_ip_mode = constants.VALUE_AUTO
7348 default_ip_mode = constants.VALUE_NONE
7350 # ip validity checks
7351 ip = nic.get("ip", default_ip_mode)
7352 if ip is None or ip.lower() == constants.VALUE_NONE:
7354 elif ip.lower() == constants.VALUE_AUTO:
7355 if not self.op.name_check:
7356 raise errors.OpPrereqError("IP address set to auto but name checks"
7357 " have been skipped",
7359 nic_ip = self.hostname1.ip
7361 if not netutils.IPAddress.IsValid(ip):
7362 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7366 # TODO: check the ip address for uniqueness
7367 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7368 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7371 # MAC address verification
7372 mac = nic.get("mac", constants.VALUE_AUTO)
7373 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7374 mac = utils.NormalizeAndValidateMac(mac)
7377 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7378 except errors.ReservationError:
7379 raise errors.OpPrereqError("MAC address %s already in use"
7380 " in cluster" % mac,
7381 errors.ECODE_NOTUNIQUE)
7383 # bridge verification
7384 bridge = nic.get("bridge", None)
7385 link = nic.get("link", None)
7387 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7388 " at the same time", errors.ECODE_INVAL)
7389 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7390 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7397 nicparams[constants.NIC_MODE] = nic_mode_req
7399 nicparams[constants.NIC_LINK] = link
7401 check_params = cluster.SimpleFillNIC(nicparams)
7402 objects.NIC.CheckParameterSyntax(check_params)
7403 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7405 # disk checks/pre-build
7407 for disk in self.op.disks:
7408 mode = disk.get("mode", constants.DISK_RDWR)
7409 if mode not in constants.DISK_ACCESS_SET:
7410 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7411 mode, errors.ECODE_INVAL)
7412 size = disk.get("size", None)
7414 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7417 except (TypeError, ValueError):
7418 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7420 vg = disk.get("vg", self.cfg.GetVGName())
7421 new_disk = {"size": size, "mode": mode, "vg": vg}
7423 new_disk["adopt"] = disk["adopt"]
7424 self.disks.append(new_disk)
7426 if self.op.mode == constants.INSTANCE_IMPORT:
7428 # Check that the new instance doesn't have less disks than the export
7429 instance_disks = len(self.disks)
7430 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7431 if instance_disks < export_disks:
7432 raise errors.OpPrereqError("Not enough disks to import."
7433 " (instance: %d, export: %d)" %
7434 (instance_disks, export_disks),
7438 for idx in range(export_disks):
7439 option = 'disk%d_dump' % idx
7440 if export_info.has_option(constants.INISECT_INS, option):
7441 # FIXME: are the old os-es, disk sizes, etc. useful?
7442 export_name = export_info.get(constants.INISECT_INS, option)
7443 image = utils.PathJoin(self.op.src_path, export_name)
7444 disk_images.append(image)
7446 disk_images.append(False)
7448 self.src_images = disk_images
7450 old_name = export_info.get(constants.INISECT_INS, 'name')
7452 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7453 except (TypeError, ValueError), err:
7454 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7455 " an integer: %s" % str(err),
7457 if self.op.instance_name == old_name:
7458 for idx, nic in enumerate(self.nics):
7459 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7460 nic_mac_ini = 'nic%d_mac' % idx
7461 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7463 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7465 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7466 if self.op.ip_check:
7467 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7468 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7469 (self.check_ip, self.op.instance_name),
7470 errors.ECODE_NOTUNIQUE)
7472 #### mac address generation
7473 # By generating here the mac address both the allocator and the hooks get
7474 # the real final mac address rather than the 'auto' or 'generate' value.
7475 # There is a race condition between the generation and the instance object
7476 # creation, which means that we know the mac is valid now, but we're not
7477 # sure it will be when we actually add the instance. If things go bad
7478 # adding the instance will abort because of a duplicate mac, and the
7479 # creation job will fail.
7480 for nic in self.nics:
7481 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7482 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7486 if self.op.iallocator is not None:
7487 self._RunAllocator()
7489 #### node related checks
7491 # check primary node
7492 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7493 assert self.pnode is not None, \
7494 "Cannot retrieve locked node %s" % self.op.pnode
7496 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7497 pnode.name, errors.ECODE_STATE)
7499 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7500 pnode.name, errors.ECODE_STATE)
7501 if not pnode.vm_capable:
7502 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7503 " '%s'" % pnode.name, errors.ECODE_STATE)
7505 self.secondaries = []
7507 # mirror node verification
7508 if self.op.disk_template in constants.DTS_NET_MIRROR:
7509 if self.op.snode == pnode.name:
7510 raise errors.OpPrereqError("The secondary node cannot be the"
7511 " primary node.", errors.ECODE_INVAL)
7512 _CheckNodeOnline(self, self.op.snode)
7513 _CheckNodeNotDrained(self, self.op.snode)
7514 _CheckNodeVmCapable(self, self.op.snode)
7515 self.secondaries.append(self.op.snode)
7517 nodenames = [pnode.name] + self.secondaries
7519 if not self.adopt_disks:
7520 # Check lv size requirements, if not adopting
7521 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7522 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7524 else: # instead, we must check the adoption data
7525 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7526 if len(all_lvs) != len(self.disks):
7527 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7529 for lv_name in all_lvs:
7531 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7532 # to ReserveLV uses the same syntax
7533 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7534 except errors.ReservationError:
7535 raise errors.OpPrereqError("LV named %s used by another instance" %
7536 lv_name, errors.ECODE_NOTUNIQUE)
7538 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7539 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7541 node_lvs = self.rpc.call_lv_list([pnode.name],
7542 vg_names.payload.keys())[pnode.name]
7543 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7544 node_lvs = node_lvs.payload
7546 delta = all_lvs.difference(node_lvs.keys())
7548 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7549 utils.CommaJoin(delta),
7551 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7553 raise errors.OpPrereqError("Online logical volumes found, cannot"
7554 " adopt: %s" % utils.CommaJoin(online_lvs),
7556 # update the size of disk based on what is found
7557 for dsk in self.disks:
7558 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7560 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7562 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7563 # check OS parameters (remotely)
7564 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7566 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7568 # memory check on primary node
7570 _CheckNodeFreeMemory(self, self.pnode.name,
7571 "creating instance %s" % self.op.instance_name,
7572 self.be_full[constants.BE_MEMORY],
7575 self.dry_run_result = list(nodenames)
7577 def Exec(self, feedback_fn):
7578 """Create and add the instance to the cluster.
7581 instance = self.op.instance_name
7582 pnode_name = self.pnode.name
7584 ht_kind = self.op.hypervisor
7585 if ht_kind in constants.HTS_REQ_PORT:
7586 network_port = self.cfg.AllocatePort()
7590 if constants.ENABLE_FILE_STORAGE:
7591 # this is needed because os.path.join does not accept None arguments
7592 if self.op.file_storage_dir is None:
7593 string_file_storage_dir = ""
7595 string_file_storage_dir = self.op.file_storage_dir
7597 # build the full file storage dir path
7598 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7599 string_file_storage_dir, instance)
7601 file_storage_dir = ""
7603 disks = _GenerateDiskTemplate(self,
7604 self.op.disk_template,
7605 instance, pnode_name,
7609 self.op.file_driver,
7613 iobj = objects.Instance(name=instance, os=self.op.os_type,
7614 primary_node=pnode_name,
7615 nics=self.nics, disks=disks,
7616 disk_template=self.op.disk_template,
7618 network_port=network_port,
7619 beparams=self.op.beparams,
7620 hvparams=self.op.hvparams,
7621 hypervisor=self.op.hypervisor,
7622 osparams=self.op.osparams,
7625 if self.adopt_disks:
7626 # rename LVs to the newly-generated names; we need to construct
7627 # 'fake' LV disks with the old data, plus the new unique_id
7628 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7630 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7631 rename_to.append(t_dsk.logical_id)
7632 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7633 self.cfg.SetDiskID(t_dsk, pnode_name)
7634 result = self.rpc.call_blockdev_rename(pnode_name,
7635 zip(tmp_disks, rename_to))
7636 result.Raise("Failed to rename adoped LVs")
7638 feedback_fn("* creating instance disks...")
7640 _CreateDisks(self, iobj)
7641 except errors.OpExecError:
7642 self.LogWarning("Device creation failed, reverting...")
7644 _RemoveDisks(self, iobj)
7646 self.cfg.ReleaseDRBDMinors(instance)
7649 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7650 feedback_fn("* wiping instance disks...")
7652 _WipeDisks(self, iobj)
7653 except errors.OpExecError:
7654 self.LogWarning("Device wiping failed, reverting...")
7656 _RemoveDisks(self, iobj)
7658 self.cfg.ReleaseDRBDMinors(instance)
7661 feedback_fn("adding instance %s to cluster config" % instance)
7663 self.cfg.AddInstance(iobj, self.proc.GetECId())
7665 # Declare that we don't want to remove the instance lock anymore, as we've
7666 # added the instance to the config
7667 del self.remove_locks[locking.LEVEL_INSTANCE]
7668 # Unlock all the nodes
7669 if self.op.mode == constants.INSTANCE_IMPORT:
7670 nodes_keep = [self.op.src_node]
7671 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7672 if node != self.op.src_node]
7673 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7674 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7676 self.context.glm.release(locking.LEVEL_NODE)
7677 del self.acquired_locks[locking.LEVEL_NODE]
7679 if self.op.wait_for_sync:
7680 disk_abort = not _WaitForSync(self, iobj)
7681 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7682 # make sure the disks are not degraded (still sync-ing is ok)
7684 feedback_fn("* checking mirrors status")
7685 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7690 _RemoveDisks(self, iobj)
7691 self.cfg.RemoveInstance(iobj.name)
7692 # Make sure the instance lock gets removed
7693 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7694 raise errors.OpExecError("There are some degraded disks for"
7697 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7698 if self.op.mode == constants.INSTANCE_CREATE:
7699 if not self.op.no_install:
7700 feedback_fn("* running the instance OS create scripts...")
7701 # FIXME: pass debug option from opcode to backend
7702 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7703 self.op.debug_level)
7704 result.Raise("Could not add os for instance %s"
7705 " on node %s" % (instance, pnode_name))
7707 elif self.op.mode == constants.INSTANCE_IMPORT:
7708 feedback_fn("* running the instance OS import scripts...")
7712 for idx, image in enumerate(self.src_images):
7716 # FIXME: pass debug option from opcode to backend
7717 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7718 constants.IEIO_FILE, (image, ),
7719 constants.IEIO_SCRIPT,
7720 (iobj.disks[idx], idx),
7722 transfers.append(dt)
7725 masterd.instance.TransferInstanceData(self, feedback_fn,
7726 self.op.src_node, pnode_name,
7727 self.pnode.secondary_ip,
7729 if not compat.all(import_result):
7730 self.LogWarning("Some disks for instance %s on node %s were not"
7731 " imported successfully" % (instance, pnode_name))
7733 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7734 feedback_fn("* preparing remote import...")
7735 # The source cluster will stop the instance before attempting to make a
7736 # connection. In some cases stopping an instance can take a long time,
7737 # hence the shutdown timeout is added to the connection timeout.
7738 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7739 self.op.source_shutdown_timeout)
7740 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7742 assert iobj.primary_node == self.pnode.name
7744 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7745 self.source_x509_ca,
7746 self._cds, timeouts)
7747 if not compat.all(disk_results):
7748 # TODO: Should the instance still be started, even if some disks
7749 # failed to import (valid for local imports, too)?
7750 self.LogWarning("Some disks for instance %s on node %s were not"
7751 " imported successfully" % (instance, pnode_name))
7753 # Run rename script on newly imported instance
7754 assert iobj.name == instance
7755 feedback_fn("Running rename script for %s" % instance)
7756 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7757 self.source_instance_name,
7758 self.op.debug_level)
7760 self.LogWarning("Failed to run rename script for %s on node"
7761 " %s: %s" % (instance, pnode_name, result.fail_msg))
7764 # also checked in the prereq part
7765 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7769 iobj.admin_up = True
7770 self.cfg.Update(iobj, feedback_fn)
7771 logging.info("Starting instance %s on node %s", instance, pnode_name)
7772 feedback_fn("* starting instance...")
7773 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7774 result.Raise("Could not start instance")
7776 return list(iobj.all_nodes)
7779 class LUInstanceConsole(NoHooksLU):
7780 """Connect to an instance's console.
7782 This is somewhat special in that it returns the command line that
7783 you need to run on the master node in order to connect to the
7789 def ExpandNames(self):
7790 self._ExpandAndLockInstance()
7792 def CheckPrereq(self):
7793 """Check prerequisites.
7795 This checks that the instance is in the cluster.
7798 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7799 assert self.instance is not None, \
7800 "Cannot retrieve locked instance %s" % self.op.instance_name
7801 _CheckNodeOnline(self, self.instance.primary_node)
7803 def Exec(self, feedback_fn):
7804 """Connect to the console of an instance
7807 instance = self.instance
7808 node = instance.primary_node
7810 node_insts = self.rpc.call_instance_list([node],
7811 [instance.hypervisor])[node]
7812 node_insts.Raise("Can't get node information from %s" % node)
7814 if instance.name not in node_insts.payload:
7815 if instance.admin_up:
7816 state = "ERROR_down"
7818 state = "ADMIN_down"
7819 raise errors.OpExecError("Instance %s is not running (state %s)" %
7820 (instance.name, state))
7822 logging.debug("Connecting to console of %s on %s", instance.name, node)
7824 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7825 cluster = self.cfg.GetClusterInfo()
7826 # beparams and hvparams are passed separately, to avoid editing the
7827 # instance and then saving the defaults in the instance itself.
7828 hvparams = cluster.FillHV(instance)
7829 beparams = cluster.FillBE(instance)
7830 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7832 assert console.instance == instance.name
7833 assert console.Validate()
7835 return console.ToDict()
7838 class LUInstanceReplaceDisks(LogicalUnit):
7839 """Replace the disks of an instance.
7842 HPATH = "mirrors-replace"
7843 HTYPE = constants.HTYPE_INSTANCE
7846 def CheckArguments(self):
7847 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7850 def ExpandNames(self):
7851 self._ExpandAndLockInstance()
7853 if self.op.iallocator is not None:
7854 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7856 elif self.op.remote_node is not None:
7857 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7858 self.op.remote_node = remote_node
7860 # Warning: do not remove the locking of the new secondary here
7861 # unless DRBD8.AddChildren is changed to work in parallel;
7862 # currently it doesn't since parallel invocations of
7863 # FindUnusedMinor will conflict
7864 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7865 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7868 self.needed_locks[locking.LEVEL_NODE] = []
7869 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7871 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7872 self.op.iallocator, self.op.remote_node,
7873 self.op.disks, False, self.op.early_release)
7875 self.tasklets = [self.replacer]
7877 def DeclareLocks(self, level):
7878 # If we're not already locking all nodes in the set we have to declare the
7879 # instance's primary/secondary nodes.
7880 if (level == locking.LEVEL_NODE and
7881 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7882 self._LockInstancesNodes()
7884 def BuildHooksEnv(self):
7887 This runs on the master, the primary and all the secondaries.
7890 instance = self.replacer.instance
7892 "MODE": self.op.mode,
7893 "NEW_SECONDARY": self.op.remote_node,
7894 "OLD_SECONDARY": instance.secondary_nodes[0],
7896 env.update(_BuildInstanceHookEnvByObject(self, instance))
7898 self.cfg.GetMasterNode(),
7899 instance.primary_node,
7901 if self.op.remote_node is not None:
7902 nl.append(self.op.remote_node)
7906 class TLReplaceDisks(Tasklet):
7907 """Replaces disks for an instance.
7909 Note: Locking is not within the scope of this class.
7912 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7913 disks, delay_iallocator, early_release):
7914 """Initializes this class.
7917 Tasklet.__init__(self, lu)
7920 self.instance_name = instance_name
7922 self.iallocator_name = iallocator_name
7923 self.remote_node = remote_node
7925 self.delay_iallocator = delay_iallocator
7926 self.early_release = early_release
7929 self.instance = None
7930 self.new_node = None
7931 self.target_node = None
7932 self.other_node = None
7933 self.remote_node_info = None
7934 self.node_secondary_ip = None
7937 def CheckArguments(mode, remote_node, iallocator):
7938 """Helper function for users of this class.
7941 # check for valid parameter combination
7942 if mode == constants.REPLACE_DISK_CHG:
7943 if remote_node is None and iallocator is None:
7944 raise errors.OpPrereqError("When changing the secondary either an"
7945 " iallocator script must be used or the"
7946 " new node given", errors.ECODE_INVAL)
7948 if remote_node is not None and iallocator is not None:
7949 raise errors.OpPrereqError("Give either the iallocator or the new"
7950 " secondary, not both", errors.ECODE_INVAL)
7952 elif remote_node is not None or iallocator is not None:
7953 # Not replacing the secondary
7954 raise errors.OpPrereqError("The iallocator and new node options can"
7955 " only be used when changing the"
7956 " secondary node", errors.ECODE_INVAL)
7959 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7960 """Compute a new secondary node using an IAllocator.
7963 ial = IAllocator(lu.cfg, lu.rpc,
7964 mode=constants.IALLOCATOR_MODE_RELOC,
7966 relocate_from=relocate_from)
7968 ial.Run(iallocator_name)
7971 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7972 " %s" % (iallocator_name, ial.info),
7975 if len(ial.result) != ial.required_nodes:
7976 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7977 " of nodes (%s), required %s" %
7979 len(ial.result), ial.required_nodes),
7982 remote_node_name = ial.result[0]
7984 lu.LogInfo("Selected new secondary for instance '%s': %s",
7985 instance_name, remote_node_name)
7987 return remote_node_name
7989 def _FindFaultyDisks(self, node_name):
7990 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7993 def CheckPrereq(self):
7994 """Check prerequisites.
7996 This checks that the instance is in the cluster.
7999 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8000 assert instance is not None, \
8001 "Cannot retrieve locked instance %s" % self.instance_name
8003 if instance.disk_template != constants.DT_DRBD8:
8004 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8005 " instances", errors.ECODE_INVAL)
8007 if len(instance.secondary_nodes) != 1:
8008 raise errors.OpPrereqError("The instance has a strange layout,"
8009 " expected one secondary but found %d" %
8010 len(instance.secondary_nodes),
8013 if not self.delay_iallocator:
8014 self._CheckPrereq2()
8016 def _CheckPrereq2(self):
8017 """Check prerequisites, second part.
8019 This function should always be part of CheckPrereq. It was separated and is
8020 now called from Exec because during node evacuation iallocator was only
8021 called with an unmodified cluster model, not taking planned changes into
8025 instance = self.instance
8026 secondary_node = instance.secondary_nodes[0]
8028 if self.iallocator_name is None:
8029 remote_node = self.remote_node
8031 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8032 instance.name, instance.secondary_nodes)
8034 if remote_node is not None:
8035 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8036 assert self.remote_node_info is not None, \
8037 "Cannot retrieve locked node %s" % remote_node
8039 self.remote_node_info = None
8041 if remote_node == self.instance.primary_node:
8042 raise errors.OpPrereqError("The specified node is the primary node of"
8043 " the instance.", errors.ECODE_INVAL)
8045 if remote_node == secondary_node:
8046 raise errors.OpPrereqError("The specified node is already the"
8047 " secondary node of the instance.",
8050 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8051 constants.REPLACE_DISK_CHG):
8052 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8055 if self.mode == constants.REPLACE_DISK_AUTO:
8056 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8057 faulty_secondary = self._FindFaultyDisks(secondary_node)
8059 if faulty_primary and faulty_secondary:
8060 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8061 " one node and can not be repaired"
8062 " automatically" % self.instance_name,
8066 self.disks = faulty_primary
8067 self.target_node = instance.primary_node
8068 self.other_node = secondary_node
8069 check_nodes = [self.target_node, self.other_node]
8070 elif faulty_secondary:
8071 self.disks = faulty_secondary
8072 self.target_node = secondary_node
8073 self.other_node = instance.primary_node
8074 check_nodes = [self.target_node, self.other_node]
8080 # Non-automatic modes
8081 if self.mode == constants.REPLACE_DISK_PRI:
8082 self.target_node = instance.primary_node
8083 self.other_node = secondary_node
8084 check_nodes = [self.target_node, self.other_node]
8086 elif self.mode == constants.REPLACE_DISK_SEC:
8087 self.target_node = secondary_node
8088 self.other_node = instance.primary_node
8089 check_nodes = [self.target_node, self.other_node]
8091 elif self.mode == constants.REPLACE_DISK_CHG:
8092 self.new_node = remote_node
8093 self.other_node = instance.primary_node
8094 self.target_node = secondary_node
8095 check_nodes = [self.new_node, self.other_node]
8097 _CheckNodeNotDrained(self.lu, remote_node)
8098 _CheckNodeVmCapable(self.lu, remote_node)
8100 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8101 assert old_node_info is not None
8102 if old_node_info.offline and not self.early_release:
8103 # doesn't make sense to delay the release
8104 self.early_release = True
8105 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8106 " early-release mode", secondary_node)
8109 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8112 # If not specified all disks should be replaced
8114 self.disks = range(len(self.instance.disks))
8116 for node in check_nodes:
8117 _CheckNodeOnline(self.lu, node)
8119 # Check whether disks are valid
8120 for disk_idx in self.disks:
8121 instance.FindDisk(disk_idx)
8123 # Get secondary node IP addresses
8126 for node_name in [self.target_node, self.other_node, self.new_node]:
8127 if node_name is not None:
8128 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8130 self.node_secondary_ip = node_2nd_ip
8132 def Exec(self, feedback_fn):
8133 """Execute disk replacement.
8135 This dispatches the disk replacement to the appropriate handler.
8138 if self.delay_iallocator:
8139 self._CheckPrereq2()
8142 feedback_fn("No disks need replacement")
8145 feedback_fn("Replacing disk(s) %s for %s" %
8146 (utils.CommaJoin(self.disks), self.instance.name))
8148 activate_disks = (not self.instance.admin_up)
8150 # Activate the instance disks if we're replacing them on a down instance
8152 _StartInstanceDisks(self.lu, self.instance, True)
8155 # Should we replace the secondary node?
8156 if self.new_node is not None:
8157 fn = self._ExecDrbd8Secondary
8159 fn = self._ExecDrbd8DiskOnly
8161 return fn(feedback_fn)
8164 # Deactivate the instance disks if we're replacing them on a
8167 _SafeShutdownInstanceDisks(self.lu, self.instance)
8169 def _CheckVolumeGroup(self, nodes):
8170 self.lu.LogInfo("Checking volume groups")
8172 vgname = self.cfg.GetVGName()
8174 # Make sure volume group exists on all involved nodes
8175 results = self.rpc.call_vg_list(nodes)
8177 raise errors.OpExecError("Can't list volume groups on the nodes")
8181 res.Raise("Error checking node %s" % node)
8182 if vgname not in res.payload:
8183 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8186 def _CheckDisksExistence(self, nodes):
8187 # Check disk existence
8188 for idx, dev in enumerate(self.instance.disks):
8189 if idx not in self.disks:
8193 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8194 self.cfg.SetDiskID(dev, node)
8196 result = self.rpc.call_blockdev_find(node, dev)
8198 msg = result.fail_msg
8199 if msg or not result.payload:
8201 msg = "disk not found"
8202 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8205 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8206 for idx, dev in enumerate(self.instance.disks):
8207 if idx not in self.disks:
8210 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8213 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8215 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8216 " replace disks for instance %s" %
8217 (node_name, self.instance.name))
8219 def _CreateNewStorage(self, node_name):
8220 vgname = self.cfg.GetVGName()
8223 for idx, dev in enumerate(self.instance.disks):
8224 if idx not in self.disks:
8227 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8229 self.cfg.SetDiskID(dev, node_name)
8231 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8232 names = _GenerateUniqueNames(self.lu, lv_names)
8234 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8235 logical_id=(vgname, names[0]))
8236 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8237 logical_id=(vgname, names[1]))
8239 new_lvs = [lv_data, lv_meta]
8240 old_lvs = dev.children
8241 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8243 # we pass force_create=True to force the LVM creation
8244 for new_lv in new_lvs:
8245 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8246 _GetInstanceInfoText(self.instance), False)
8250 def _CheckDevices(self, node_name, iv_names):
8251 for name, (dev, _, _) in iv_names.iteritems():
8252 self.cfg.SetDiskID(dev, node_name)
8254 result = self.rpc.call_blockdev_find(node_name, dev)
8256 msg = result.fail_msg
8257 if msg or not result.payload:
8259 msg = "disk not found"
8260 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8263 if result.payload.is_degraded:
8264 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8266 def _RemoveOldStorage(self, node_name, iv_names):
8267 for name, (_, old_lvs, _) in iv_names.iteritems():
8268 self.lu.LogInfo("Remove logical volumes for %s" % name)
8271 self.cfg.SetDiskID(lv, node_name)
8273 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8275 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8276 hint="remove unused LVs manually")
8278 def _ReleaseNodeLock(self, node_name):
8279 """Releases the lock for a given node."""
8280 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8282 def _ExecDrbd8DiskOnly(self, feedback_fn):
8283 """Replace a disk on the primary or secondary for DRBD 8.
8285 The algorithm for replace is quite complicated:
8287 1. for each disk to be replaced:
8289 1. create new LVs on the target node with unique names
8290 1. detach old LVs from the drbd device
8291 1. rename old LVs to name_replaced.<time_t>
8292 1. rename new LVs to old LVs
8293 1. attach the new LVs (with the old names now) to the drbd device
8295 1. wait for sync across all devices
8297 1. for each modified disk:
8299 1. remove old LVs (which have the name name_replaces.<time_t>)
8301 Failures are not very well handled.
8306 # Step: check device activation
8307 self.lu.LogStep(1, steps_total, "Check device existence")
8308 self._CheckDisksExistence([self.other_node, self.target_node])
8309 self._CheckVolumeGroup([self.target_node, self.other_node])
8311 # Step: check other node consistency
8312 self.lu.LogStep(2, steps_total, "Check peer consistency")
8313 self._CheckDisksConsistency(self.other_node,
8314 self.other_node == self.instance.primary_node,
8317 # Step: create new storage
8318 self.lu.LogStep(3, steps_total, "Allocate new storage")
8319 iv_names = self._CreateNewStorage(self.target_node)
8321 # Step: for each lv, detach+rename*2+attach
8322 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8323 for dev, old_lvs, new_lvs in iv_names.itervalues():
8324 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8326 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8328 result.Raise("Can't detach drbd from local storage on node"
8329 " %s for device %s" % (self.target_node, dev.iv_name))
8331 #cfg.Update(instance)
8333 # ok, we created the new LVs, so now we know we have the needed
8334 # storage; as such, we proceed on the target node to rename
8335 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8336 # using the assumption that logical_id == physical_id (which in
8337 # turn is the unique_id on that node)
8339 # FIXME(iustin): use a better name for the replaced LVs
8340 temp_suffix = int(time.time())
8341 ren_fn = lambda d, suff: (d.physical_id[0],
8342 d.physical_id[1] + "_replaced-%s" % suff)
8344 # Build the rename list based on what LVs exist on the node
8345 rename_old_to_new = []
8346 for to_ren in old_lvs:
8347 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8348 if not result.fail_msg and result.payload:
8350 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8352 self.lu.LogInfo("Renaming the old LVs on the target node")
8353 result = self.rpc.call_blockdev_rename(self.target_node,
8355 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8357 # Now we rename the new LVs to the old LVs
8358 self.lu.LogInfo("Renaming the new LVs on the target node")
8359 rename_new_to_old = [(new, old.physical_id)
8360 for old, new in zip(old_lvs, new_lvs)]
8361 result = self.rpc.call_blockdev_rename(self.target_node,
8363 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8365 for old, new in zip(old_lvs, new_lvs):
8366 new.logical_id = old.logical_id
8367 self.cfg.SetDiskID(new, self.target_node)
8369 for disk in old_lvs:
8370 disk.logical_id = ren_fn(disk, temp_suffix)
8371 self.cfg.SetDiskID(disk, self.target_node)
8373 # Now that the new lvs have the old name, we can add them to the device
8374 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8375 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8377 msg = result.fail_msg
8379 for new_lv in new_lvs:
8380 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8383 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8384 hint=("cleanup manually the unused logical"
8386 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8388 dev.children = new_lvs
8390 self.cfg.Update(self.instance, feedback_fn)
8393 if self.early_release:
8394 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8396 self._RemoveOldStorage(self.target_node, iv_names)
8397 # WARNING: we release both node locks here, do not do other RPCs
8398 # than WaitForSync to the primary node
8399 self._ReleaseNodeLock([self.target_node, self.other_node])
8402 # This can fail as the old devices are degraded and _WaitForSync
8403 # does a combined result over all disks, so we don't check its return value
8404 self.lu.LogStep(cstep, steps_total, "Sync devices")
8406 _WaitForSync(self.lu, self.instance)
8408 # Check all devices manually
8409 self._CheckDevices(self.instance.primary_node, iv_names)
8411 # Step: remove old storage
8412 if not self.early_release:
8413 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8415 self._RemoveOldStorage(self.target_node, iv_names)
8417 def _ExecDrbd8Secondary(self, feedback_fn):
8418 """Replace the secondary node for DRBD 8.
8420 The algorithm for replace is quite complicated:
8421 - for all disks of the instance:
8422 - create new LVs on the new node with same names
8423 - shutdown the drbd device on the old secondary
8424 - disconnect the drbd network on the primary
8425 - create the drbd device on the new secondary
8426 - network attach the drbd on the primary, using an artifice:
8427 the drbd code for Attach() will connect to the network if it
8428 finds a device which is connected to the good local disks but
8430 - wait for sync across all devices
8431 - remove all disks from the old secondary
8433 Failures are not very well handled.
8438 # Step: check device activation
8439 self.lu.LogStep(1, steps_total, "Check device existence")
8440 self._CheckDisksExistence([self.instance.primary_node])
8441 self._CheckVolumeGroup([self.instance.primary_node])
8443 # Step: check other node consistency
8444 self.lu.LogStep(2, steps_total, "Check peer consistency")
8445 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8447 # Step: create new storage
8448 self.lu.LogStep(3, steps_total, "Allocate new storage")
8449 for idx, dev in enumerate(self.instance.disks):
8450 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8451 (self.new_node, idx))
8452 # we pass force_create=True to force LVM creation
8453 for new_lv in dev.children:
8454 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8455 _GetInstanceInfoText(self.instance), False)
8457 # Step 4: dbrd minors and drbd setups changes
8458 # after this, we must manually remove the drbd minors on both the
8459 # error and the success paths
8460 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8461 minors = self.cfg.AllocateDRBDMinor([self.new_node
8462 for dev in self.instance.disks],
8464 logging.debug("Allocated minors %r", minors)
8467 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8468 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8469 (self.new_node, idx))
8470 # create new devices on new_node; note that we create two IDs:
8471 # one without port, so the drbd will be activated without
8472 # networking information on the new node at this stage, and one
8473 # with network, for the latter activation in step 4
8474 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8475 if self.instance.primary_node == o_node1:
8478 assert self.instance.primary_node == o_node2, "Three-node instance?"
8481 new_alone_id = (self.instance.primary_node, self.new_node, None,
8482 p_minor, new_minor, o_secret)
8483 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8484 p_minor, new_minor, o_secret)
8486 iv_names[idx] = (dev, dev.children, new_net_id)
8487 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8489 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8490 logical_id=new_alone_id,
8491 children=dev.children,
8494 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8495 _GetInstanceInfoText(self.instance), False)
8496 except errors.GenericError:
8497 self.cfg.ReleaseDRBDMinors(self.instance.name)
8500 # We have new devices, shutdown the drbd on the old secondary
8501 for idx, dev in enumerate(self.instance.disks):
8502 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8503 self.cfg.SetDiskID(dev, self.target_node)
8504 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8506 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8507 "node: %s" % (idx, msg),
8508 hint=("Please cleanup this device manually as"
8509 " soon as possible"))
8511 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8512 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8513 self.node_secondary_ip,
8514 self.instance.disks)\
8515 [self.instance.primary_node]
8517 msg = result.fail_msg
8519 # detaches didn't succeed (unlikely)
8520 self.cfg.ReleaseDRBDMinors(self.instance.name)
8521 raise errors.OpExecError("Can't detach the disks from the network on"
8522 " old node: %s" % (msg,))
8524 # if we managed to detach at least one, we update all the disks of
8525 # the instance to point to the new secondary
8526 self.lu.LogInfo("Updating instance configuration")
8527 for dev, _, new_logical_id in iv_names.itervalues():
8528 dev.logical_id = new_logical_id
8529 self.cfg.SetDiskID(dev, self.instance.primary_node)
8531 self.cfg.Update(self.instance, feedback_fn)
8533 # and now perform the drbd attach
8534 self.lu.LogInfo("Attaching primary drbds to new secondary"
8535 " (standalone => connected)")
8536 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8538 self.node_secondary_ip,
8539 self.instance.disks,
8542 for to_node, to_result in result.items():
8543 msg = to_result.fail_msg
8545 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8547 hint=("please do a gnt-instance info to see the"
8548 " status of disks"))
8550 if self.early_release:
8551 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8553 self._RemoveOldStorage(self.target_node, iv_names)
8554 # WARNING: we release all node locks here, do not do other RPCs
8555 # than WaitForSync to the primary node
8556 self._ReleaseNodeLock([self.instance.primary_node,
8561 # This can fail as the old devices are degraded and _WaitForSync
8562 # does a combined result over all disks, so we don't check its return value
8563 self.lu.LogStep(cstep, steps_total, "Sync devices")
8565 _WaitForSync(self.lu, self.instance)
8567 # Check all devices manually
8568 self._CheckDevices(self.instance.primary_node, iv_names)
8570 # Step: remove old storage
8571 if not self.early_release:
8572 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8573 self._RemoveOldStorage(self.target_node, iv_names)
8576 class LURepairNodeStorage(NoHooksLU):
8577 """Repairs the volume group on a node.
8582 def CheckArguments(self):
8583 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8585 storage_type = self.op.storage_type
8587 if (constants.SO_FIX_CONSISTENCY not in
8588 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8589 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8590 " repaired" % storage_type,
8593 def ExpandNames(self):
8594 self.needed_locks = {
8595 locking.LEVEL_NODE: [self.op.node_name],
8598 def _CheckFaultyDisks(self, instance, node_name):
8599 """Ensure faulty disks abort the opcode or at least warn."""
8601 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8603 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8604 " node '%s'" % (instance.name, node_name),
8606 except errors.OpPrereqError, err:
8607 if self.op.ignore_consistency:
8608 self.proc.LogWarning(str(err.args[0]))
8612 def CheckPrereq(self):
8613 """Check prerequisites.
8616 # Check whether any instance on this node has faulty disks
8617 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8618 if not inst.admin_up:
8620 check_nodes = set(inst.all_nodes)
8621 check_nodes.discard(self.op.node_name)
8622 for inst_node_name in check_nodes:
8623 self._CheckFaultyDisks(inst, inst_node_name)
8625 def Exec(self, feedback_fn):
8626 feedback_fn("Repairing storage unit '%s' on %s ..." %
8627 (self.op.name, self.op.node_name))
8629 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8630 result = self.rpc.call_storage_execute(self.op.node_name,
8631 self.op.storage_type, st_args,
8633 constants.SO_FIX_CONSISTENCY)
8634 result.Raise("Failed to repair storage unit '%s' on %s" %
8635 (self.op.name, self.op.node_name))
8638 class LUNodeEvacStrategy(NoHooksLU):
8639 """Computes the node evacuation strategy.
8644 def CheckArguments(self):
8645 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8647 def ExpandNames(self):
8648 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8649 self.needed_locks = locks = {}
8650 if self.op.remote_node is None:
8651 locks[locking.LEVEL_NODE] = locking.ALL_SET
8653 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8654 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8656 def Exec(self, feedback_fn):
8657 if self.op.remote_node is not None:
8659 for node in self.op.nodes:
8660 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8663 if i.primary_node == self.op.remote_node:
8664 raise errors.OpPrereqError("Node %s is the primary node of"
8665 " instance %s, cannot use it as"
8667 (self.op.remote_node, i.name),
8669 result.append([i.name, self.op.remote_node])
8671 ial = IAllocator(self.cfg, self.rpc,
8672 mode=constants.IALLOCATOR_MODE_MEVAC,
8673 evac_nodes=self.op.nodes)
8674 ial.Run(self.op.iallocator, validate=True)
8676 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8682 class LUInstanceGrowDisk(LogicalUnit):
8683 """Grow a disk of an instance.
8687 HTYPE = constants.HTYPE_INSTANCE
8690 def ExpandNames(self):
8691 self._ExpandAndLockInstance()
8692 self.needed_locks[locking.LEVEL_NODE] = []
8693 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8695 def DeclareLocks(self, level):
8696 if level == locking.LEVEL_NODE:
8697 self._LockInstancesNodes()
8699 def BuildHooksEnv(self):
8702 This runs on the master, the primary and all the secondaries.
8706 "DISK": self.op.disk,
8707 "AMOUNT": self.op.amount,
8709 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8710 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8713 def CheckPrereq(self):
8714 """Check prerequisites.
8716 This checks that the instance is in the cluster.
8719 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8720 assert instance is not None, \
8721 "Cannot retrieve locked instance %s" % self.op.instance_name
8722 nodenames = list(instance.all_nodes)
8723 for node in nodenames:
8724 _CheckNodeOnline(self, node)
8726 self.instance = instance
8728 if instance.disk_template not in constants.DTS_GROWABLE:
8729 raise errors.OpPrereqError("Instance's disk layout does not support"
8730 " growing.", errors.ECODE_INVAL)
8732 self.disk = instance.FindDisk(self.op.disk)
8734 if instance.disk_template != constants.DT_FILE:
8735 # TODO: check the free disk space for file, when that feature
8737 _CheckNodesFreeDiskPerVG(self, nodenames,
8738 self.disk.ComputeGrowth(self.op.amount))
8740 def Exec(self, feedback_fn):
8741 """Execute disk grow.
8744 instance = self.instance
8747 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8749 raise errors.OpExecError("Cannot activate block device to grow")
8751 for node in instance.all_nodes:
8752 self.cfg.SetDiskID(disk, node)
8753 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8754 result.Raise("Grow request failed to node %s" % node)
8756 # TODO: Rewrite code to work properly
8757 # DRBD goes into sync mode for a short amount of time after executing the
8758 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8759 # calling "resize" in sync mode fails. Sleeping for a short amount of
8760 # time is a work-around.
8763 disk.RecordGrow(self.op.amount)
8764 self.cfg.Update(instance, feedback_fn)
8765 if self.op.wait_for_sync:
8766 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8768 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8769 " status.\nPlease check the instance.")
8770 if not instance.admin_up:
8771 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8772 elif not instance.admin_up:
8773 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8774 " not supposed to be running because no wait for"
8775 " sync mode was requested.")
8778 class LUInstanceQueryData(NoHooksLU):
8779 """Query runtime instance data.
8784 def ExpandNames(self):
8785 self.needed_locks = {}
8786 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8788 if self.op.instances:
8789 self.wanted_names = []
8790 for name in self.op.instances:
8791 full_name = _ExpandInstanceName(self.cfg, name)
8792 self.wanted_names.append(full_name)
8793 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8795 self.wanted_names = None
8796 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8798 self.needed_locks[locking.LEVEL_NODE] = []
8799 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8801 def DeclareLocks(self, level):
8802 if level == locking.LEVEL_NODE:
8803 self._LockInstancesNodes()
8805 def CheckPrereq(self):
8806 """Check prerequisites.
8808 This only checks the optional instance list against the existing names.
8811 if self.wanted_names is None:
8812 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8814 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8815 in self.wanted_names]
8817 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8818 """Returns the status of a block device
8821 if self.op.static or not node:
8824 self.cfg.SetDiskID(dev, node)
8826 result = self.rpc.call_blockdev_find(node, dev)
8830 result.Raise("Can't compute disk status for %s" % instance_name)
8832 status = result.payload
8836 return (status.dev_path, status.major, status.minor,
8837 status.sync_percent, status.estimated_time,
8838 status.is_degraded, status.ldisk_status)
8840 def _ComputeDiskStatus(self, instance, snode, dev):
8841 """Compute block device status.
8844 if dev.dev_type in constants.LDS_DRBD:
8845 # we change the snode then (otherwise we use the one passed in)
8846 if dev.logical_id[0] == instance.primary_node:
8847 snode = dev.logical_id[1]
8849 snode = dev.logical_id[0]
8851 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8853 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8856 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8857 for child in dev.children]
8862 "iv_name": dev.iv_name,
8863 "dev_type": dev.dev_type,
8864 "logical_id": dev.logical_id,
8865 "physical_id": dev.physical_id,
8866 "pstatus": dev_pstatus,
8867 "sstatus": dev_sstatus,
8868 "children": dev_children,
8875 def Exec(self, feedback_fn):
8876 """Gather and return data"""
8879 cluster = self.cfg.GetClusterInfo()
8881 for instance in self.wanted_instances:
8882 if not self.op.static:
8883 remote_info = self.rpc.call_instance_info(instance.primary_node,
8885 instance.hypervisor)
8886 remote_info.Raise("Error checking node %s" % instance.primary_node)
8887 remote_info = remote_info.payload
8888 if remote_info and "state" in remote_info:
8891 remote_state = "down"
8894 if instance.admin_up:
8897 config_state = "down"
8899 disks = [self._ComputeDiskStatus(instance, None, device)
8900 for device in instance.disks]
8903 "name": instance.name,
8904 "config_state": config_state,
8905 "run_state": remote_state,
8906 "pnode": instance.primary_node,
8907 "snodes": instance.secondary_nodes,
8909 # this happens to be the same format used for hooks
8910 "nics": _NICListToTuple(self, instance.nics),
8911 "disk_template": instance.disk_template,
8913 "hypervisor": instance.hypervisor,
8914 "network_port": instance.network_port,
8915 "hv_instance": instance.hvparams,
8916 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8917 "be_instance": instance.beparams,
8918 "be_actual": cluster.FillBE(instance),
8919 "os_instance": instance.osparams,
8920 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8921 "serial_no": instance.serial_no,
8922 "mtime": instance.mtime,
8923 "ctime": instance.ctime,
8924 "uuid": instance.uuid,
8927 result[instance.name] = idict
8932 class LUInstanceSetParams(LogicalUnit):
8933 """Modifies an instances's parameters.
8936 HPATH = "instance-modify"
8937 HTYPE = constants.HTYPE_INSTANCE
8940 def CheckArguments(self):
8941 if not (self.op.nics or self.op.disks or self.op.disk_template or
8942 self.op.hvparams or self.op.beparams or self.op.os_name):
8943 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8945 if self.op.hvparams:
8946 _CheckGlobalHvParams(self.op.hvparams)
8950 for disk_op, disk_dict in self.op.disks:
8951 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8952 if disk_op == constants.DDM_REMOVE:
8955 elif disk_op == constants.DDM_ADD:
8958 if not isinstance(disk_op, int):
8959 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8960 if not isinstance(disk_dict, dict):
8961 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8962 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8964 if disk_op == constants.DDM_ADD:
8965 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8966 if mode not in constants.DISK_ACCESS_SET:
8967 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8969 size = disk_dict.get('size', None)
8971 raise errors.OpPrereqError("Required disk parameter size missing",
8975 except (TypeError, ValueError), err:
8976 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8977 str(err), errors.ECODE_INVAL)
8978 disk_dict['size'] = size
8980 # modification of disk
8981 if 'size' in disk_dict:
8982 raise errors.OpPrereqError("Disk size change not possible, use"
8983 " grow-disk", errors.ECODE_INVAL)
8985 if disk_addremove > 1:
8986 raise errors.OpPrereqError("Only one disk add or remove operation"
8987 " supported at a time", errors.ECODE_INVAL)
8989 if self.op.disks and self.op.disk_template is not None:
8990 raise errors.OpPrereqError("Disk template conversion and other disk"
8991 " changes not supported at the same time",
8994 if (self.op.disk_template and
8995 self.op.disk_template in constants.DTS_NET_MIRROR and
8996 self.op.remote_node is None):
8997 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8998 " one requires specifying a secondary node",
9003 for nic_op, nic_dict in self.op.nics:
9004 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9005 if nic_op == constants.DDM_REMOVE:
9008 elif nic_op == constants.DDM_ADD:
9011 if not isinstance(nic_op, int):
9012 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9013 if not isinstance(nic_dict, dict):
9014 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9015 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9017 # nic_dict should be a dict
9018 nic_ip = nic_dict.get('ip', None)
9019 if nic_ip is not None:
9020 if nic_ip.lower() == constants.VALUE_NONE:
9021 nic_dict['ip'] = None
9023 if not netutils.IPAddress.IsValid(nic_ip):
9024 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9027 nic_bridge = nic_dict.get('bridge', None)
9028 nic_link = nic_dict.get('link', None)
9029 if nic_bridge and nic_link:
9030 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9031 " at the same time", errors.ECODE_INVAL)
9032 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9033 nic_dict['bridge'] = None
9034 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9035 nic_dict['link'] = None
9037 if nic_op == constants.DDM_ADD:
9038 nic_mac = nic_dict.get('mac', None)
9040 nic_dict['mac'] = constants.VALUE_AUTO
9042 if 'mac' in nic_dict:
9043 nic_mac = nic_dict['mac']
9044 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9045 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9047 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9048 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9049 " modifying an existing nic",
9052 if nic_addremove > 1:
9053 raise errors.OpPrereqError("Only one NIC add or remove operation"
9054 " supported at a time", errors.ECODE_INVAL)
9056 def ExpandNames(self):
9057 self._ExpandAndLockInstance()
9058 self.needed_locks[locking.LEVEL_NODE] = []
9059 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9061 def DeclareLocks(self, level):
9062 if level == locking.LEVEL_NODE:
9063 self._LockInstancesNodes()
9064 if self.op.disk_template and self.op.remote_node:
9065 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9066 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9068 def BuildHooksEnv(self):
9071 This runs on the master, primary and secondaries.
9075 if constants.BE_MEMORY in self.be_new:
9076 args['memory'] = self.be_new[constants.BE_MEMORY]
9077 if constants.BE_VCPUS in self.be_new:
9078 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9079 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9080 # information at all.
9083 nic_override = dict(self.op.nics)
9084 for idx, nic in enumerate(self.instance.nics):
9085 if idx in nic_override:
9086 this_nic_override = nic_override[idx]
9088 this_nic_override = {}
9089 if 'ip' in this_nic_override:
9090 ip = this_nic_override['ip']
9093 if 'mac' in this_nic_override:
9094 mac = this_nic_override['mac']
9097 if idx in self.nic_pnew:
9098 nicparams = self.nic_pnew[idx]
9100 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9101 mode = nicparams[constants.NIC_MODE]
9102 link = nicparams[constants.NIC_LINK]
9103 args['nics'].append((ip, mac, mode, link))
9104 if constants.DDM_ADD in nic_override:
9105 ip = nic_override[constants.DDM_ADD].get('ip', None)
9106 mac = nic_override[constants.DDM_ADD]['mac']
9107 nicparams = self.nic_pnew[constants.DDM_ADD]
9108 mode = nicparams[constants.NIC_MODE]
9109 link = nicparams[constants.NIC_LINK]
9110 args['nics'].append((ip, mac, mode, link))
9111 elif constants.DDM_REMOVE in nic_override:
9112 del args['nics'][-1]
9114 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9115 if self.op.disk_template:
9116 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9117 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9120 def CheckPrereq(self):
9121 """Check prerequisites.
9123 This only checks the instance list against the existing names.
9126 # checking the new params on the primary/secondary nodes
9128 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9129 cluster = self.cluster = self.cfg.GetClusterInfo()
9130 assert self.instance is not None, \
9131 "Cannot retrieve locked instance %s" % self.op.instance_name
9132 pnode = instance.primary_node
9133 nodelist = list(instance.all_nodes)
9136 if self.op.os_name and not self.op.force:
9137 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9138 self.op.force_variant)
9139 instance_os = self.op.os_name
9141 instance_os = instance.os
9143 if self.op.disk_template:
9144 if instance.disk_template == self.op.disk_template:
9145 raise errors.OpPrereqError("Instance already has disk template %s" %
9146 instance.disk_template, errors.ECODE_INVAL)
9148 if (instance.disk_template,
9149 self.op.disk_template) not in self._DISK_CONVERSIONS:
9150 raise errors.OpPrereqError("Unsupported disk template conversion from"
9151 " %s to %s" % (instance.disk_template,
9152 self.op.disk_template),
9154 _CheckInstanceDown(self, instance, "cannot change disk template")
9155 if self.op.disk_template in constants.DTS_NET_MIRROR:
9156 if self.op.remote_node == pnode:
9157 raise errors.OpPrereqError("Given new secondary node %s is the same"
9158 " as the primary node of the instance" %
9159 self.op.remote_node, errors.ECODE_STATE)
9160 _CheckNodeOnline(self, self.op.remote_node)
9161 _CheckNodeNotDrained(self, self.op.remote_node)
9162 # FIXME: here we assume that the old instance type is DT_PLAIN
9163 assert instance.disk_template == constants.DT_PLAIN
9164 disks = [{"size": d.size, "vg": d.logical_id[0]}
9165 for d in instance.disks]
9166 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9167 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9169 # hvparams processing
9170 if self.op.hvparams:
9171 hv_type = instance.hypervisor
9172 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9173 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9174 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9177 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9178 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9179 self.hv_new = hv_new # the new actual values
9180 self.hv_inst = i_hvdict # the new dict (without defaults)
9182 self.hv_new = self.hv_inst = {}
9184 # beparams processing
9185 if self.op.beparams:
9186 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9188 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9189 be_new = cluster.SimpleFillBE(i_bedict)
9190 self.be_new = be_new # the new actual values
9191 self.be_inst = i_bedict # the new dict (without defaults)
9193 self.be_new = self.be_inst = {}
9195 # osparams processing
9196 if self.op.osparams:
9197 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9198 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9199 self.os_inst = i_osdict # the new dict (without defaults)
9205 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9206 mem_check_list = [pnode]
9207 if be_new[constants.BE_AUTO_BALANCE]:
9208 # either we changed auto_balance to yes or it was from before
9209 mem_check_list.extend(instance.secondary_nodes)
9210 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9211 instance.hypervisor)
9212 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9213 instance.hypervisor)
9214 pninfo = nodeinfo[pnode]
9215 msg = pninfo.fail_msg
9217 # Assume the primary node is unreachable and go ahead
9218 self.warn.append("Can't get info from primary node %s: %s" %
9220 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9221 self.warn.append("Node data from primary node %s doesn't contain"
9222 " free memory information" % pnode)
9223 elif instance_info.fail_msg:
9224 self.warn.append("Can't get instance runtime information: %s" %
9225 instance_info.fail_msg)
9227 if instance_info.payload:
9228 current_mem = int(instance_info.payload['memory'])
9230 # Assume instance not running
9231 # (there is a slight race condition here, but it's not very probable,
9232 # and we have no other way to check)
9234 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9235 pninfo.payload['memory_free'])
9237 raise errors.OpPrereqError("This change will prevent the instance"
9238 " from starting, due to %d MB of memory"
9239 " missing on its primary node" % miss_mem,
9242 if be_new[constants.BE_AUTO_BALANCE]:
9243 for node, nres in nodeinfo.items():
9244 if node not in instance.secondary_nodes:
9248 self.warn.append("Can't get info from secondary node %s: %s" %
9250 elif not isinstance(nres.payload.get('memory_free', None), int):
9251 self.warn.append("Secondary node %s didn't return free"
9252 " memory information" % node)
9253 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9254 self.warn.append("Not enough memory to failover instance to"
9255 " secondary node %s" % node)
9260 for nic_op, nic_dict in self.op.nics:
9261 if nic_op == constants.DDM_REMOVE:
9262 if not instance.nics:
9263 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9266 if nic_op != constants.DDM_ADD:
9268 if not instance.nics:
9269 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9270 " no NICs" % nic_op,
9272 if nic_op < 0 or nic_op >= len(instance.nics):
9273 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9275 (nic_op, len(instance.nics) - 1),
9277 old_nic_params = instance.nics[nic_op].nicparams
9278 old_nic_ip = instance.nics[nic_op].ip
9283 update_params_dict = dict([(key, nic_dict[key])
9284 for key in constants.NICS_PARAMETERS
9285 if key in nic_dict])
9287 if 'bridge' in nic_dict:
9288 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9290 new_nic_params = _GetUpdatedParams(old_nic_params,
9292 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9293 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9294 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9295 self.nic_pinst[nic_op] = new_nic_params
9296 self.nic_pnew[nic_op] = new_filled_nic_params
9297 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9299 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9300 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9301 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9303 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9305 self.warn.append(msg)
9307 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9308 if new_nic_mode == constants.NIC_MODE_ROUTED:
9309 if 'ip' in nic_dict:
9310 nic_ip = nic_dict['ip']
9314 raise errors.OpPrereqError('Cannot set the nic ip to None'
9315 ' on a routed nic', errors.ECODE_INVAL)
9316 if 'mac' in nic_dict:
9317 nic_mac = nic_dict['mac']
9319 raise errors.OpPrereqError('Cannot set the nic mac to None',
9321 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9322 # otherwise generate the mac
9323 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9325 # or validate/reserve the current one
9327 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9328 except errors.ReservationError:
9329 raise errors.OpPrereqError("MAC address %s already in use"
9330 " in cluster" % nic_mac,
9331 errors.ECODE_NOTUNIQUE)
9334 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9335 raise errors.OpPrereqError("Disk operations not supported for"
9336 " diskless instances",
9338 for disk_op, _ in self.op.disks:
9339 if disk_op == constants.DDM_REMOVE:
9340 if len(instance.disks) == 1:
9341 raise errors.OpPrereqError("Cannot remove the last disk of"
9342 " an instance", errors.ECODE_INVAL)
9343 _CheckInstanceDown(self, instance, "cannot remove disks")
9345 if (disk_op == constants.DDM_ADD and
9346 len(instance.disks) >= constants.MAX_DISKS):
9347 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9348 " add more" % constants.MAX_DISKS,
9350 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9352 if disk_op < 0 or disk_op >= len(instance.disks):
9353 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9355 (disk_op, len(instance.disks)),
9360 def _ConvertPlainToDrbd(self, feedback_fn):
9361 """Converts an instance from plain to drbd.
9364 feedback_fn("Converting template to drbd")
9365 instance = self.instance
9366 pnode = instance.primary_node
9367 snode = self.op.remote_node
9369 # create a fake disk info for _GenerateDiskTemplate
9370 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9371 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9372 instance.name, pnode, [snode],
9373 disk_info, None, None, 0, feedback_fn)
9374 info = _GetInstanceInfoText(instance)
9375 feedback_fn("Creating aditional volumes...")
9376 # first, create the missing data and meta devices
9377 for disk in new_disks:
9378 # unfortunately this is... not too nice
9379 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9381 for child in disk.children:
9382 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9383 # at this stage, all new LVs have been created, we can rename the
9385 feedback_fn("Renaming original volumes...")
9386 rename_list = [(o, n.children[0].logical_id)
9387 for (o, n) in zip(instance.disks, new_disks)]
9388 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9389 result.Raise("Failed to rename original LVs")
9391 feedback_fn("Initializing DRBD devices...")
9392 # all child devices are in place, we can now create the DRBD devices
9393 for disk in new_disks:
9394 for node in [pnode, snode]:
9395 f_create = node == pnode
9396 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9398 # at this point, the instance has been modified
9399 instance.disk_template = constants.DT_DRBD8
9400 instance.disks = new_disks
9401 self.cfg.Update(instance, feedback_fn)
9403 # disks are created, waiting for sync
9404 disk_abort = not _WaitForSync(self, instance)
9406 raise errors.OpExecError("There are some degraded disks for"
9407 " this instance, please cleanup manually")
9409 def _ConvertDrbdToPlain(self, feedback_fn):
9410 """Converts an instance from drbd to plain.
9413 instance = self.instance
9414 assert len(instance.secondary_nodes) == 1
9415 pnode = instance.primary_node
9416 snode = instance.secondary_nodes[0]
9417 feedback_fn("Converting template to plain")
9419 old_disks = instance.disks
9420 new_disks = [d.children[0] for d in old_disks]
9422 # copy over size and mode
9423 for parent, child in zip(old_disks, new_disks):
9424 child.size = parent.size
9425 child.mode = parent.mode
9427 # update instance structure
9428 instance.disks = new_disks
9429 instance.disk_template = constants.DT_PLAIN
9430 self.cfg.Update(instance, feedback_fn)
9432 feedback_fn("Removing volumes on the secondary node...")
9433 for disk in old_disks:
9434 self.cfg.SetDiskID(disk, snode)
9435 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9437 self.LogWarning("Could not remove block device %s on node %s,"
9438 " continuing anyway: %s", disk.iv_name, snode, msg)
9440 feedback_fn("Removing unneeded volumes on the primary node...")
9441 for idx, disk in enumerate(old_disks):
9442 meta = disk.children[1]
9443 self.cfg.SetDiskID(meta, pnode)
9444 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9446 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9447 " continuing anyway: %s", idx, pnode, msg)
9449 def Exec(self, feedback_fn):
9450 """Modifies an instance.
9452 All parameters take effect only at the next restart of the instance.
9455 # Process here the warnings from CheckPrereq, as we don't have a
9456 # feedback_fn there.
9457 for warn in self.warn:
9458 feedback_fn("WARNING: %s" % warn)
9461 instance = self.instance
9463 for disk_op, disk_dict in self.op.disks:
9464 if disk_op == constants.DDM_REMOVE:
9465 # remove the last disk
9466 device = instance.disks.pop()
9467 device_idx = len(instance.disks)
9468 for node, disk in device.ComputeNodeTree(instance.primary_node):
9469 self.cfg.SetDiskID(disk, node)
9470 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9472 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9473 " continuing anyway", device_idx, node, msg)
9474 result.append(("disk/%d" % device_idx, "remove"))
9475 elif disk_op == constants.DDM_ADD:
9477 if instance.disk_template == constants.DT_FILE:
9478 file_driver, file_path = instance.disks[0].logical_id
9479 file_path = os.path.dirname(file_path)
9481 file_driver = file_path = None
9482 disk_idx_base = len(instance.disks)
9483 new_disk = _GenerateDiskTemplate(self,
9484 instance.disk_template,
9485 instance.name, instance.primary_node,
9486 instance.secondary_nodes,
9490 disk_idx_base, feedback_fn)[0]
9491 instance.disks.append(new_disk)
9492 info = _GetInstanceInfoText(instance)
9494 logging.info("Creating volume %s for instance %s",
9495 new_disk.iv_name, instance.name)
9496 # Note: this needs to be kept in sync with _CreateDisks
9498 for node in instance.all_nodes:
9499 f_create = node == instance.primary_node
9501 _CreateBlockDev(self, node, instance, new_disk,
9502 f_create, info, f_create)
9503 except errors.OpExecError, err:
9504 self.LogWarning("Failed to create volume %s (%s) on"
9506 new_disk.iv_name, new_disk, node, err)
9507 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9508 (new_disk.size, new_disk.mode)))
9510 # change a given disk
9511 instance.disks[disk_op].mode = disk_dict['mode']
9512 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9514 if self.op.disk_template:
9515 r_shut = _ShutdownInstanceDisks(self, instance)
9517 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9518 " proceed with disk template conversion")
9519 mode = (instance.disk_template, self.op.disk_template)
9521 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9523 self.cfg.ReleaseDRBDMinors(instance.name)
9525 result.append(("disk_template", self.op.disk_template))
9528 for nic_op, nic_dict in self.op.nics:
9529 if nic_op == constants.DDM_REMOVE:
9530 # remove the last nic
9531 del instance.nics[-1]
9532 result.append(("nic.%d" % len(instance.nics), "remove"))
9533 elif nic_op == constants.DDM_ADD:
9534 # mac and bridge should be set, by now
9535 mac = nic_dict['mac']
9536 ip = nic_dict.get('ip', None)
9537 nicparams = self.nic_pinst[constants.DDM_ADD]
9538 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9539 instance.nics.append(new_nic)
9540 result.append(("nic.%d" % (len(instance.nics) - 1),
9541 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9542 (new_nic.mac, new_nic.ip,
9543 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9544 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9547 for key in 'mac', 'ip':
9549 setattr(instance.nics[nic_op], key, nic_dict[key])
9550 if nic_op in self.nic_pinst:
9551 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9552 for key, val in nic_dict.iteritems():
9553 result.append(("nic.%s/%d" % (key, nic_op), val))
9556 if self.op.hvparams:
9557 instance.hvparams = self.hv_inst
9558 for key, val in self.op.hvparams.iteritems():
9559 result.append(("hv/%s" % key, val))
9562 if self.op.beparams:
9563 instance.beparams = self.be_inst
9564 for key, val in self.op.beparams.iteritems():
9565 result.append(("be/%s" % key, val))
9569 instance.os = self.op.os_name
9572 if self.op.osparams:
9573 instance.osparams = self.os_inst
9574 for key, val in self.op.osparams.iteritems():
9575 result.append(("os/%s" % key, val))
9577 self.cfg.Update(instance, feedback_fn)
9581 _DISK_CONVERSIONS = {
9582 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9583 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9587 class LUBackupQuery(NoHooksLU):
9588 """Query the exports list
9593 def ExpandNames(self):
9594 self.needed_locks = {}
9595 self.share_locks[locking.LEVEL_NODE] = 1
9596 if not self.op.nodes:
9597 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9599 self.needed_locks[locking.LEVEL_NODE] = \
9600 _GetWantedNodes(self, self.op.nodes)
9602 def Exec(self, feedback_fn):
9603 """Compute the list of all the exported system images.
9606 @return: a dictionary with the structure node->(export-list)
9607 where export-list is a list of the instances exported on
9611 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9612 rpcresult = self.rpc.call_export_list(self.nodes)
9614 for node in rpcresult:
9615 if rpcresult[node].fail_msg:
9616 result[node] = False
9618 result[node] = rpcresult[node].payload
9623 class LUBackupPrepare(NoHooksLU):
9624 """Prepares an instance for an export and returns useful information.
9629 def ExpandNames(self):
9630 self._ExpandAndLockInstance()
9632 def CheckPrereq(self):
9633 """Check prerequisites.
9636 instance_name = self.op.instance_name
9638 self.instance = self.cfg.GetInstanceInfo(instance_name)
9639 assert self.instance is not None, \
9640 "Cannot retrieve locked instance %s" % self.op.instance_name
9641 _CheckNodeOnline(self, self.instance.primary_node)
9643 self._cds = _GetClusterDomainSecret()
9645 def Exec(self, feedback_fn):
9646 """Prepares an instance for an export.
9649 instance = self.instance
9651 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9652 salt = utils.GenerateSecret(8)
9654 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9655 result = self.rpc.call_x509_cert_create(instance.primary_node,
9656 constants.RIE_CERT_VALIDITY)
9657 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9659 (name, cert_pem) = result.payload
9661 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9665 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9666 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9668 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9674 class LUBackupExport(LogicalUnit):
9675 """Export an instance to an image in the cluster.
9678 HPATH = "instance-export"
9679 HTYPE = constants.HTYPE_INSTANCE
9682 def CheckArguments(self):
9683 """Check the arguments.
9686 self.x509_key_name = self.op.x509_key_name
9687 self.dest_x509_ca_pem = self.op.destination_x509_ca
9689 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9690 if not self.x509_key_name:
9691 raise errors.OpPrereqError("Missing X509 key name for encryption",
9694 if not self.dest_x509_ca_pem:
9695 raise errors.OpPrereqError("Missing destination X509 CA",
9698 def ExpandNames(self):
9699 self._ExpandAndLockInstance()
9701 # Lock all nodes for local exports
9702 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9703 # FIXME: lock only instance primary and destination node
9705 # Sad but true, for now we have do lock all nodes, as we don't know where
9706 # the previous export might be, and in this LU we search for it and
9707 # remove it from its current node. In the future we could fix this by:
9708 # - making a tasklet to search (share-lock all), then create the
9709 # new one, then one to remove, after
9710 # - removing the removal operation altogether
9711 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9713 def DeclareLocks(self, level):
9714 """Last minute lock declaration."""
9715 # All nodes are locked anyway, so nothing to do here.
9717 def BuildHooksEnv(self):
9720 This will run on the master, primary node and target node.
9724 "EXPORT_MODE": self.op.mode,
9725 "EXPORT_NODE": self.op.target_node,
9726 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9727 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9728 # TODO: Generic function for boolean env variables
9729 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9732 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9734 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9736 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9737 nl.append(self.op.target_node)
9741 def CheckPrereq(self):
9742 """Check prerequisites.
9744 This checks that the instance and node names are valid.
9747 instance_name = self.op.instance_name
9749 self.instance = self.cfg.GetInstanceInfo(instance_name)
9750 assert self.instance is not None, \
9751 "Cannot retrieve locked instance %s" % self.op.instance_name
9752 _CheckNodeOnline(self, self.instance.primary_node)
9754 if (self.op.remove_instance and self.instance.admin_up and
9755 not self.op.shutdown):
9756 raise errors.OpPrereqError("Can not remove instance without shutting it"
9759 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9760 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9761 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9762 assert self.dst_node is not None
9764 _CheckNodeOnline(self, self.dst_node.name)
9765 _CheckNodeNotDrained(self, self.dst_node.name)
9768 self.dest_disk_info = None
9769 self.dest_x509_ca = None
9771 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9772 self.dst_node = None
9774 if len(self.op.target_node) != len(self.instance.disks):
9775 raise errors.OpPrereqError(("Received destination information for %s"
9776 " disks, but instance %s has %s disks") %
9777 (len(self.op.target_node), instance_name,
9778 len(self.instance.disks)),
9781 cds = _GetClusterDomainSecret()
9783 # Check X509 key name
9785 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9786 except (TypeError, ValueError), err:
9787 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9789 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9790 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9793 # Load and verify CA
9795 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9796 except OpenSSL.crypto.Error, err:
9797 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9798 (err, ), errors.ECODE_INVAL)
9800 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9801 if errcode is not None:
9802 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9803 (msg, ), errors.ECODE_INVAL)
9805 self.dest_x509_ca = cert
9807 # Verify target information
9809 for idx, disk_data in enumerate(self.op.target_node):
9811 (host, port, magic) = \
9812 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9813 except errors.GenericError, err:
9814 raise errors.OpPrereqError("Target info for disk %s: %s" %
9815 (idx, err), errors.ECODE_INVAL)
9817 disk_info.append((host, port, magic))
9819 assert len(disk_info) == len(self.op.target_node)
9820 self.dest_disk_info = disk_info
9823 raise errors.ProgrammerError("Unhandled export mode %r" %
9826 # instance disk type verification
9827 # TODO: Implement export support for file-based disks
9828 for disk in self.instance.disks:
9829 if disk.dev_type == constants.LD_FILE:
9830 raise errors.OpPrereqError("Export not supported for instances with"
9831 " file-based disks", errors.ECODE_INVAL)
9833 def _CleanupExports(self, feedback_fn):
9834 """Removes exports of current instance from all other nodes.
9836 If an instance in a cluster with nodes A..D was exported to node C, its
9837 exports will be removed from the nodes A, B and D.
9840 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9842 nodelist = self.cfg.GetNodeList()
9843 nodelist.remove(self.dst_node.name)
9845 # on one-node clusters nodelist will be empty after the removal
9846 # if we proceed the backup would be removed because OpBackupQuery
9847 # substitutes an empty list with the full cluster node list.
9848 iname = self.instance.name
9850 feedback_fn("Removing old exports for instance %s" % iname)
9851 exportlist = self.rpc.call_export_list(nodelist)
9852 for node in exportlist:
9853 if exportlist[node].fail_msg:
9855 if iname in exportlist[node].payload:
9856 msg = self.rpc.call_export_remove(node, iname).fail_msg
9858 self.LogWarning("Could not remove older export for instance %s"
9859 " on node %s: %s", iname, node, msg)
9861 def Exec(self, feedback_fn):
9862 """Export an instance to an image in the cluster.
9865 assert self.op.mode in constants.EXPORT_MODES
9867 instance = self.instance
9868 src_node = instance.primary_node
9870 if self.op.shutdown:
9871 # shutdown the instance, but not the disks
9872 feedback_fn("Shutting down instance %s" % instance.name)
9873 result = self.rpc.call_instance_shutdown(src_node, instance,
9874 self.op.shutdown_timeout)
9875 # TODO: Maybe ignore failures if ignore_remove_failures is set
9876 result.Raise("Could not shutdown instance %s on"
9877 " node %s" % (instance.name, src_node))
9879 # set the disks ID correctly since call_instance_start needs the
9880 # correct drbd minor to create the symlinks
9881 for disk in instance.disks:
9882 self.cfg.SetDiskID(disk, src_node)
9884 activate_disks = (not instance.admin_up)
9887 # Activate the instance disks if we'exporting a stopped instance
9888 feedback_fn("Activating disks for %s" % instance.name)
9889 _StartInstanceDisks(self, instance, None)
9892 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9895 helper.CreateSnapshots()
9897 if (self.op.shutdown and instance.admin_up and
9898 not self.op.remove_instance):
9899 assert not activate_disks
9900 feedback_fn("Starting instance %s" % instance.name)
9901 result = self.rpc.call_instance_start(src_node, instance, None, None)
9902 msg = result.fail_msg
9904 feedback_fn("Failed to start instance: %s" % msg)
9905 _ShutdownInstanceDisks(self, instance)
9906 raise errors.OpExecError("Could not start instance: %s" % msg)
9908 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9909 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9910 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9911 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9912 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9914 (key_name, _, _) = self.x509_key_name
9917 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9920 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9921 key_name, dest_ca_pem,
9926 # Check for backwards compatibility
9927 assert len(dresults) == len(instance.disks)
9928 assert compat.all(isinstance(i, bool) for i in dresults), \
9929 "Not all results are boolean: %r" % dresults
9933 feedback_fn("Deactivating disks for %s" % instance.name)
9934 _ShutdownInstanceDisks(self, instance)
9936 if not (compat.all(dresults) and fin_resu):
9939 failures.append("export finalization")
9940 if not compat.all(dresults):
9941 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9943 failures.append("disk export: disk(s) %s" % fdsk)
9945 raise errors.OpExecError("Export failed, errors in %s" %
9946 utils.CommaJoin(failures))
9948 # At this point, the export was successful, we can cleanup/finish
9950 # Remove instance if requested
9951 if self.op.remove_instance:
9952 feedback_fn("Removing instance %s" % instance.name)
9953 _RemoveInstance(self, feedback_fn, instance,
9954 self.op.ignore_remove_failures)
9956 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9957 self._CleanupExports(feedback_fn)
9959 return fin_resu, dresults
9962 class LUBackupRemove(NoHooksLU):
9963 """Remove exports related to the named instance.
9968 def ExpandNames(self):
9969 self.needed_locks = {}
9970 # We need all nodes to be locked in order for RemoveExport to work, but we
9971 # don't need to lock the instance itself, as nothing will happen to it (and
9972 # we can remove exports also for a removed instance)
9973 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9975 def Exec(self, feedback_fn):
9976 """Remove any export.
9979 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9980 # If the instance was not found we'll try with the name that was passed in.
9981 # This will only work if it was an FQDN, though.
9983 if not instance_name:
9985 instance_name = self.op.instance_name
9987 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9988 exportlist = self.rpc.call_export_list(locked_nodes)
9990 for node in exportlist:
9991 msg = exportlist[node].fail_msg
9993 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9995 if instance_name in exportlist[node].payload:
9997 result = self.rpc.call_export_remove(node, instance_name)
9998 msg = result.fail_msg
10000 logging.error("Could not remove export for instance %s"
10001 " on node %s: %s", instance_name, node, msg)
10003 if fqdn_warn and not found:
10004 feedback_fn("Export not found. If trying to remove an export belonging"
10005 " to a deleted instance please use its Fully Qualified"
10009 class LUGroupAdd(LogicalUnit):
10010 """Logical unit for creating node groups.
10013 HPATH = "group-add"
10014 HTYPE = constants.HTYPE_GROUP
10017 def ExpandNames(self):
10018 # We need the new group's UUID here so that we can create and acquire the
10019 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10020 # that it should not check whether the UUID exists in the configuration.
10021 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10022 self.needed_locks = {}
10023 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10025 def CheckPrereq(self):
10026 """Check prerequisites.
10028 This checks that the given group name is not an existing node group
10033 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10034 except errors.OpPrereqError:
10037 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10038 " node group (UUID: %s)" %
10039 (self.op.group_name, existing_uuid),
10040 errors.ECODE_EXISTS)
10042 if self.op.ndparams:
10043 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10045 def BuildHooksEnv(self):
10046 """Build hooks env.
10050 "GROUP_NAME": self.op.group_name,
10052 mn = self.cfg.GetMasterNode()
10053 return env, [mn], [mn]
10055 def Exec(self, feedback_fn):
10056 """Add the node group to the cluster.
10059 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10060 uuid=self.group_uuid,
10061 alloc_policy=self.op.alloc_policy,
10062 ndparams=self.op.ndparams)
10064 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10065 del self.remove_locks[locking.LEVEL_NODEGROUP]
10068 class LUGroupAssignNodes(NoHooksLU):
10069 """Logical unit for assigning nodes to groups.
10074 def ExpandNames(self):
10075 # These raise errors.OpPrereqError on their own:
10076 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10077 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10079 # We want to lock all the affected nodes and groups. We have readily
10080 # available the list of nodes, and the *destination* group. To gather the
10081 # list of "source" groups, we need to fetch node information.
10082 self.node_data = self.cfg.GetAllNodesInfo()
10083 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10084 affected_groups.add(self.group_uuid)
10086 self.needed_locks = {
10087 locking.LEVEL_NODEGROUP: list(affected_groups),
10088 locking.LEVEL_NODE: self.op.nodes,
10091 def CheckPrereq(self):
10092 """Check prerequisites.
10095 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10096 instance_data = self.cfg.GetAllInstancesInfo()
10098 if self.group is None:
10099 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10100 (self.op.group_name, self.group_uuid))
10102 (new_splits, previous_splits) = \
10103 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10104 for node in self.op.nodes],
10105 self.node_data, instance_data)
10108 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10110 if not self.op.force:
10111 raise errors.OpExecError("The following instances get split by this"
10112 " change and --force was not given: %s" %
10115 self.LogWarning("This operation will split the following instances: %s",
10118 if previous_splits:
10119 self.LogWarning("In addition, these already-split instances continue"
10120 " to be spit across groups: %s",
10121 utils.CommaJoin(utils.NiceSort(previous_splits)))
10123 def Exec(self, feedback_fn):
10124 """Assign nodes to a new group.
10127 for node in self.op.nodes:
10128 self.node_data[node].group = self.group_uuid
10130 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10133 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10134 """Check for split instances after a node assignment.
10136 This method considers a series of node assignments as an atomic operation,
10137 and returns information about split instances after applying the set of
10140 In particular, it returns information about newly split instances, and
10141 instances that were already split, and remain so after the change.
10143 Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10146 @type changes: list of (node_name, new_group_uuid) pairs.
10147 @param changes: list of node assignments to consider.
10148 @param node_data: a dict with data for all nodes
10149 @param instance_data: a dict with all instances to consider
10150 @rtype: a two-tuple
10151 @return: a list of instances that were previously okay and result split as a
10152 consequence of this change, and a list of instances that were previously
10153 split and this change does not fix.
10156 changed_nodes = dict((node, group) for node, group in changes
10157 if node_data[node].group != group)
10159 all_split_instances = set()
10160 previously_split_instances = set()
10162 def InstanceNodes(instance):
10163 return [instance.primary_node] + list(instance.secondary_nodes)
10165 for inst in instance_data.values():
10166 if inst.disk_template not in constants.DTS_NET_MIRROR:
10169 instance_nodes = InstanceNodes(inst)
10171 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10172 previously_split_instances.add(inst.name)
10174 if len(set(changed_nodes.get(node, node_data[node].group)
10175 for node in instance_nodes)) > 1:
10176 all_split_instances.add(inst.name)
10178 return (list(all_split_instances - previously_split_instances),
10179 list(previously_split_instances & all_split_instances))
10182 class _GroupQuery(_QueryBase):
10184 FIELDS = query.GROUP_FIELDS
10186 def ExpandNames(self, lu):
10187 lu.needed_locks = {}
10189 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10190 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10193 self.wanted = [name_to_uuid[name]
10194 for name in utils.NiceSort(name_to_uuid.keys())]
10196 # Accept names to be either names or UUIDs.
10199 all_uuid = frozenset(self._all_groups.keys())
10201 for name in self.names:
10202 if name in all_uuid:
10203 self.wanted.append(name)
10204 elif name in name_to_uuid:
10205 self.wanted.append(name_to_uuid[name])
10207 missing.append(name)
10210 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10211 errors.ECODE_NOENT)
10213 def DeclareLocks(self, lu, level):
10216 def _GetQueryData(self, lu):
10217 """Computes the list of node groups and their attributes.
10220 do_nodes = query.GQ_NODE in self.requested_data
10221 do_instances = query.GQ_INST in self.requested_data
10223 group_to_nodes = None
10224 group_to_instances = None
10226 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10227 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10228 # latter GetAllInstancesInfo() is not enough, for we have to go through
10229 # instance->node. Hence, we will need to process nodes even if we only need
10230 # instance information.
10231 if do_nodes or do_instances:
10232 all_nodes = lu.cfg.GetAllNodesInfo()
10233 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10236 for node in all_nodes.values():
10237 if node.group in group_to_nodes:
10238 group_to_nodes[node.group].append(node.name)
10239 node_to_group[node.name] = node.group
10242 all_instances = lu.cfg.GetAllInstancesInfo()
10243 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10245 for instance in all_instances.values():
10246 node = instance.primary_node
10247 if node in node_to_group:
10248 group_to_instances[node_to_group[node]].append(instance.name)
10251 # Do not pass on node information if it was not requested.
10252 group_to_nodes = None
10254 return query.GroupQueryData([self._all_groups[uuid]
10255 for uuid in self.wanted],
10256 group_to_nodes, group_to_instances)
10259 class LUGroupQuery(NoHooksLU):
10260 """Logical unit for querying node groups.
10265 def CheckArguments(self):
10266 self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10268 def ExpandNames(self):
10269 self.gq.ExpandNames(self)
10271 def Exec(self, feedback_fn):
10272 return self.gq.OldStyleQuery(self)
10275 class LUGroupSetParams(LogicalUnit):
10276 """Modifies the parameters of a node group.
10279 HPATH = "group-modify"
10280 HTYPE = constants.HTYPE_GROUP
10283 def CheckArguments(self):
10286 self.op.alloc_policy,
10289 if all_changes.count(None) == len(all_changes):
10290 raise errors.OpPrereqError("Please pass at least one modification",
10291 errors.ECODE_INVAL)
10293 def ExpandNames(self):
10294 # This raises errors.OpPrereqError on its own:
10295 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10297 self.needed_locks = {
10298 locking.LEVEL_NODEGROUP: [self.group_uuid],
10301 def CheckPrereq(self):
10302 """Check prerequisites.
10305 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10307 if self.group is None:
10308 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10309 (self.op.group_name, self.group_uuid))
10311 if self.op.ndparams:
10312 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10313 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10314 self.new_ndparams = new_ndparams
10316 def BuildHooksEnv(self):
10317 """Build hooks env.
10321 "GROUP_NAME": self.op.group_name,
10322 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10324 mn = self.cfg.GetMasterNode()
10325 return env, [mn], [mn]
10327 def Exec(self, feedback_fn):
10328 """Modifies the node group.
10333 if self.op.ndparams:
10334 self.group.ndparams = self.new_ndparams
10335 result.append(("ndparams", str(self.group.ndparams)))
10337 if self.op.alloc_policy:
10338 self.group.alloc_policy = self.op.alloc_policy
10340 self.cfg.Update(self.group, feedback_fn)
10345 class LUGroupRemove(LogicalUnit):
10346 HPATH = "group-remove"
10347 HTYPE = constants.HTYPE_GROUP
10350 def ExpandNames(self):
10351 # This will raises errors.OpPrereqError on its own:
10352 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10353 self.needed_locks = {
10354 locking.LEVEL_NODEGROUP: [self.group_uuid],
10357 def CheckPrereq(self):
10358 """Check prerequisites.
10360 This checks that the given group name exists as a node group, that is
10361 empty (i.e., contains no nodes), and that is not the last group of the
10365 # Verify that the group is empty.
10366 group_nodes = [node.name
10367 for node in self.cfg.GetAllNodesInfo().values()
10368 if node.group == self.group_uuid]
10371 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10373 (self.op.group_name,
10374 utils.CommaJoin(utils.NiceSort(group_nodes))),
10375 errors.ECODE_STATE)
10377 # Verify the cluster would not be left group-less.
10378 if len(self.cfg.GetNodeGroupList()) == 1:
10379 raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10380 " which cannot be left without at least one"
10381 " group" % self.op.group_name,
10382 errors.ECODE_STATE)
10384 def BuildHooksEnv(self):
10385 """Build hooks env.
10389 "GROUP_NAME": self.op.group_name,
10391 mn = self.cfg.GetMasterNode()
10392 return env, [mn], [mn]
10394 def Exec(self, feedback_fn):
10395 """Remove the node group.
10399 self.cfg.RemoveNodeGroup(self.group_uuid)
10400 except errors.ConfigurationError:
10401 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10402 (self.op.group_name, self.group_uuid))
10404 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10407 class LUGroupRename(LogicalUnit):
10408 HPATH = "group-rename"
10409 HTYPE = constants.HTYPE_GROUP
10412 def ExpandNames(self):
10413 # This raises errors.OpPrereqError on its own:
10414 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10416 self.needed_locks = {
10417 locking.LEVEL_NODEGROUP: [self.group_uuid],
10420 def CheckPrereq(self):
10421 """Check prerequisites.
10423 This checks that the given old_name exists as a node group, and that
10428 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10429 except errors.OpPrereqError:
10432 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10433 " node group (UUID: %s)" %
10434 (self.op.new_name, new_name_uuid),
10435 errors.ECODE_EXISTS)
10437 def BuildHooksEnv(self):
10438 """Build hooks env.
10442 "OLD_NAME": self.op.old_name,
10443 "NEW_NAME": self.op.new_name,
10446 mn = self.cfg.GetMasterNode()
10447 all_nodes = self.cfg.GetAllNodesInfo()
10449 all_nodes.pop(mn, None)
10451 for node in all_nodes.values():
10452 if node.group == self.group_uuid:
10453 run_nodes.append(node.name)
10455 return env, run_nodes, run_nodes
10457 def Exec(self, feedback_fn):
10458 """Rename the node group.
10461 group = self.cfg.GetNodeGroup(self.group_uuid)
10464 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10465 (self.op.old_name, self.group_uuid))
10467 group.name = self.op.new_name
10468 self.cfg.Update(group, feedback_fn)
10470 return self.op.new_name
10473 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10474 """Generic tags LU.
10476 This is an abstract class which is the parent of all the other tags LUs.
10480 def ExpandNames(self):
10481 self.needed_locks = {}
10482 if self.op.kind == constants.TAG_NODE:
10483 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10484 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10485 elif self.op.kind == constants.TAG_INSTANCE:
10486 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10487 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10489 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10490 # not possible to acquire the BGL based on opcode parameters)
10492 def CheckPrereq(self):
10493 """Check prerequisites.
10496 if self.op.kind == constants.TAG_CLUSTER:
10497 self.target = self.cfg.GetClusterInfo()
10498 elif self.op.kind == constants.TAG_NODE:
10499 self.target = self.cfg.GetNodeInfo(self.op.name)
10500 elif self.op.kind == constants.TAG_INSTANCE:
10501 self.target = self.cfg.GetInstanceInfo(self.op.name)
10503 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10504 str(self.op.kind), errors.ECODE_INVAL)
10507 class LUTagsGet(TagsLU):
10508 """Returns the tags of a given object.
10513 def ExpandNames(self):
10514 TagsLU.ExpandNames(self)
10516 # Share locks as this is only a read operation
10517 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10519 def Exec(self, feedback_fn):
10520 """Returns the tag list.
10523 return list(self.target.GetTags())
10526 class LUTagsSearch(NoHooksLU):
10527 """Searches the tags for a given pattern.
10532 def ExpandNames(self):
10533 self.needed_locks = {}
10535 def CheckPrereq(self):
10536 """Check prerequisites.
10538 This checks the pattern passed for validity by compiling it.
10542 self.re = re.compile(self.op.pattern)
10543 except re.error, err:
10544 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10545 (self.op.pattern, err), errors.ECODE_INVAL)
10547 def Exec(self, feedback_fn):
10548 """Returns the tag list.
10552 tgts = [("/cluster", cfg.GetClusterInfo())]
10553 ilist = cfg.GetAllInstancesInfo().values()
10554 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10555 nlist = cfg.GetAllNodesInfo().values()
10556 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10558 for path, target in tgts:
10559 for tag in target.GetTags():
10560 if self.re.search(tag):
10561 results.append((path, tag))
10565 class LUTagsSet(TagsLU):
10566 """Sets a tag on a given object.
10571 def CheckPrereq(self):
10572 """Check prerequisites.
10574 This checks the type and length of the tag name and value.
10577 TagsLU.CheckPrereq(self)
10578 for tag in self.op.tags:
10579 objects.TaggableObject.ValidateTag(tag)
10581 def Exec(self, feedback_fn):
10586 for tag in self.op.tags:
10587 self.target.AddTag(tag)
10588 except errors.TagError, err:
10589 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10590 self.cfg.Update(self.target, feedback_fn)
10593 class LUTagsDel(TagsLU):
10594 """Delete a list of tags from a given object.
10599 def CheckPrereq(self):
10600 """Check prerequisites.
10602 This checks that we have the given tag.
10605 TagsLU.CheckPrereq(self)
10606 for tag in self.op.tags:
10607 objects.TaggableObject.ValidateTag(tag)
10608 del_tags = frozenset(self.op.tags)
10609 cur_tags = self.target.GetTags()
10611 diff_tags = del_tags - cur_tags
10613 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10614 raise errors.OpPrereqError("Tag(s) %s not found" %
10615 (utils.CommaJoin(diff_names), ),
10616 errors.ECODE_NOENT)
10618 def Exec(self, feedback_fn):
10619 """Remove the tag from the object.
10622 for tag in self.op.tags:
10623 self.target.RemoveTag(tag)
10624 self.cfg.Update(self.target, feedback_fn)
10627 class LUTestDelay(NoHooksLU):
10628 """Sleep for a specified amount of time.
10630 This LU sleeps on the master and/or nodes for a specified amount of
10636 def ExpandNames(self):
10637 """Expand names and set required locks.
10639 This expands the node list, if any.
10642 self.needed_locks = {}
10643 if self.op.on_nodes:
10644 # _GetWantedNodes can be used here, but is not always appropriate to use
10645 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10646 # more information.
10647 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10648 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10650 def _TestDelay(self):
10651 """Do the actual sleep.
10654 if self.op.on_master:
10655 if not utils.TestDelay(self.op.duration):
10656 raise errors.OpExecError("Error during master delay test")
10657 if self.op.on_nodes:
10658 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10659 for node, node_result in result.items():
10660 node_result.Raise("Failure during rpc call to node %s" % node)
10662 def Exec(self, feedback_fn):
10663 """Execute the test delay opcode, with the wanted repetitions.
10666 if self.op.repeat == 0:
10669 top_value = self.op.repeat - 1
10670 for i in range(self.op.repeat):
10671 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10675 class LUTestJqueue(NoHooksLU):
10676 """Utility LU to test some aspects of the job queue.
10681 # Must be lower than default timeout for WaitForJobChange to see whether it
10682 # notices changed jobs
10683 _CLIENT_CONNECT_TIMEOUT = 20.0
10684 _CLIENT_CONFIRM_TIMEOUT = 60.0
10687 def _NotifyUsingSocket(cls, cb, errcls):
10688 """Opens a Unix socket and waits for another program to connect.
10691 @param cb: Callback to send socket name to client
10692 @type errcls: class
10693 @param errcls: Exception class to use for errors
10696 # Using a temporary directory as there's no easy way to create temporary
10697 # sockets without writing a custom loop around tempfile.mktemp and
10699 tmpdir = tempfile.mkdtemp()
10701 tmpsock = utils.PathJoin(tmpdir, "sock")
10703 logging.debug("Creating temporary socket at %s", tmpsock)
10704 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10709 # Send details to client
10712 # Wait for client to connect before continuing
10713 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10715 (conn, _) = sock.accept()
10716 except socket.error, err:
10717 raise errcls("Client didn't connect in time (%s)" % err)
10721 # Remove as soon as client is connected
10722 shutil.rmtree(tmpdir)
10724 # Wait for client to close
10727 # pylint: disable-msg=E1101
10728 # Instance of '_socketobject' has no ... member
10729 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10731 except socket.error, err:
10732 raise errcls("Client failed to confirm notification (%s)" % err)
10736 def _SendNotification(self, test, arg, sockname):
10737 """Sends a notification to the client.
10740 @param test: Test name
10741 @param arg: Test argument (depends on test)
10742 @type sockname: string
10743 @param sockname: Socket path
10746 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10748 def _Notify(self, prereq, test, arg):
10749 """Notifies the client of a test.
10752 @param prereq: Whether this is a prereq-phase test
10754 @param test: Test name
10755 @param arg: Test argument (depends on test)
10759 errcls = errors.OpPrereqError
10761 errcls = errors.OpExecError
10763 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10767 def CheckArguments(self):
10768 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10769 self.expandnames_calls = 0
10771 def ExpandNames(self):
10772 checkargs_calls = getattr(self, "checkargs_calls", 0)
10773 if checkargs_calls < 1:
10774 raise errors.ProgrammerError("CheckArguments was not called")
10776 self.expandnames_calls += 1
10778 if self.op.notify_waitlock:
10779 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10781 self.LogInfo("Expanding names")
10783 # Get lock on master node (just to get a lock, not for a particular reason)
10784 self.needed_locks = {
10785 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10788 def Exec(self, feedback_fn):
10789 if self.expandnames_calls < 1:
10790 raise errors.ProgrammerError("ExpandNames was not called")
10792 if self.op.notify_exec:
10793 self._Notify(False, constants.JQT_EXEC, None)
10795 self.LogInfo("Executing")
10797 if self.op.log_messages:
10798 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10799 for idx, msg in enumerate(self.op.log_messages):
10800 self.LogInfo("Sending log message %s", idx + 1)
10801 feedback_fn(constants.JQT_MSGPREFIX + msg)
10802 # Report how many test messages have been sent
10803 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10806 raise errors.OpExecError("Opcode failure was requested")
10811 class IAllocator(object):
10812 """IAllocator framework.
10814 An IAllocator instance has three sets of attributes:
10815 - cfg that is needed to query the cluster
10816 - input data (all members of the _KEYS class attribute are required)
10817 - four buffer attributes (in|out_data|text), that represent the
10818 input (to the external script) in text and data structure format,
10819 and the output from it, again in two formats
10820 - the result variables from the script (success, info, nodes) for
10824 # pylint: disable-msg=R0902
10825 # lots of instance attributes
10827 "name", "mem_size", "disks", "disk_template",
10828 "os", "tags", "nics", "vcpus", "hypervisor",
10831 "name", "relocate_from",
10837 def __init__(self, cfg, rpc, mode, **kwargs):
10840 # init buffer variables
10841 self.in_text = self.out_text = self.in_data = self.out_data = None
10842 # init all input fields so that pylint is happy
10844 self.mem_size = self.disks = self.disk_template = None
10845 self.os = self.tags = self.nics = self.vcpus = None
10846 self.hypervisor = None
10847 self.relocate_from = None
10849 self.evac_nodes = None
10851 self.required_nodes = None
10852 # init result fields
10853 self.success = self.info = self.result = None
10854 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10855 keyset = self._ALLO_KEYS
10856 fn = self._AddNewInstance
10857 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10858 keyset = self._RELO_KEYS
10859 fn = self._AddRelocateInstance
10860 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10861 keyset = self._EVAC_KEYS
10862 fn = self._AddEvacuateNodes
10864 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10865 " IAllocator" % self.mode)
10867 if key not in keyset:
10868 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10869 " IAllocator" % key)
10870 setattr(self, key, kwargs[key])
10873 if key not in kwargs:
10874 raise errors.ProgrammerError("Missing input parameter '%s' to"
10875 " IAllocator" % key)
10876 self._BuildInputData(fn)
10878 def _ComputeClusterData(self):
10879 """Compute the generic allocator input data.
10881 This is the data that is independent of the actual operation.
10885 cluster_info = cfg.GetClusterInfo()
10888 "version": constants.IALLOCATOR_VERSION,
10889 "cluster_name": cfg.GetClusterName(),
10890 "cluster_tags": list(cluster_info.GetTags()),
10891 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10892 # we don't have job IDs
10894 ninfo = cfg.GetAllNodesInfo()
10895 iinfo = cfg.GetAllInstancesInfo().values()
10896 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10899 node_list = [n.name for n in ninfo.values() if n.vm_capable]
10901 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10902 hypervisor_name = self.hypervisor
10903 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10904 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10905 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10906 hypervisor_name = cluster_info.enabled_hypervisors[0]
10908 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10911 self.rpc.call_all_instances_info(node_list,
10912 cluster_info.enabled_hypervisors)
10914 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10916 config_ndata = self._ComputeBasicNodeData(ninfo)
10917 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10918 i_list, config_ndata)
10919 assert len(data["nodes"]) == len(ninfo), \
10920 "Incomplete node data computed"
10922 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10924 self.in_data = data
10927 def _ComputeNodeGroupData(cfg):
10928 """Compute node groups data.
10932 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10934 "name": gdata.name,
10935 "alloc_policy": gdata.alloc_policy,
10940 def _ComputeBasicNodeData(node_cfg):
10941 """Compute global node data.
10944 @returns: a dict of name: (node dict, node config)
10948 for ninfo in node_cfg.values():
10949 # fill in static (config-based) values
10951 "tags": list(ninfo.GetTags()),
10952 "primary_ip": ninfo.primary_ip,
10953 "secondary_ip": ninfo.secondary_ip,
10954 "offline": ninfo.offline,
10955 "drained": ninfo.drained,
10956 "master_candidate": ninfo.master_candidate,
10957 "group": ninfo.group,
10958 "master_capable": ninfo.master_capable,
10959 "vm_capable": ninfo.vm_capable,
10962 node_results[ninfo.name] = pnr
10964 return node_results
10967 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10969 """Compute global node data.
10971 @param node_results: the basic node structures as filled from the config
10974 # make a copy of the current dict
10975 node_results = dict(node_results)
10976 for nname, nresult in node_data.items():
10977 assert nname in node_results, "Missing basic data for node %s" % nname
10978 ninfo = node_cfg[nname]
10980 if not (ninfo.offline or ninfo.drained):
10981 nresult.Raise("Can't get data for node %s" % nname)
10982 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10984 remote_info = nresult.payload
10986 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10987 'vg_size', 'vg_free', 'cpu_total']:
10988 if attr not in remote_info:
10989 raise errors.OpExecError("Node '%s' didn't return attribute"
10990 " '%s'" % (nname, attr))
10991 if not isinstance(remote_info[attr], int):
10992 raise errors.OpExecError("Node '%s' returned invalid value"
10994 (nname, attr, remote_info[attr]))
10995 # compute memory used by primary instances
10996 i_p_mem = i_p_up_mem = 0
10997 for iinfo, beinfo in i_list:
10998 if iinfo.primary_node == nname:
10999 i_p_mem += beinfo[constants.BE_MEMORY]
11000 if iinfo.name not in node_iinfo[nname].payload:
11003 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11004 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11005 remote_info['memory_free'] -= max(0, i_mem_diff)
11008 i_p_up_mem += beinfo[constants.BE_MEMORY]
11010 # compute memory used by instances
11012 "total_memory": remote_info['memory_total'],
11013 "reserved_memory": remote_info['memory_dom0'],
11014 "free_memory": remote_info['memory_free'],
11015 "total_disk": remote_info['vg_size'],
11016 "free_disk": remote_info['vg_free'],
11017 "total_cpus": remote_info['cpu_total'],
11018 "i_pri_memory": i_p_mem,
11019 "i_pri_up_memory": i_p_up_mem,
11021 pnr_dyn.update(node_results[nname])
11023 node_results[nname] = pnr_dyn
11025 return node_results
11028 def _ComputeInstanceData(cluster_info, i_list):
11029 """Compute global instance data.
11033 for iinfo, beinfo in i_list:
11035 for nic in iinfo.nics:
11036 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11037 nic_dict = {"mac": nic.mac,
11039 "mode": filled_params[constants.NIC_MODE],
11040 "link": filled_params[constants.NIC_LINK],
11042 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11043 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11044 nic_data.append(nic_dict)
11046 "tags": list(iinfo.GetTags()),
11047 "admin_up": iinfo.admin_up,
11048 "vcpus": beinfo[constants.BE_VCPUS],
11049 "memory": beinfo[constants.BE_MEMORY],
11051 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11053 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11054 "disk_template": iinfo.disk_template,
11055 "hypervisor": iinfo.hypervisor,
11057 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11059 instance_data[iinfo.name] = pir
11061 return instance_data
11063 def _AddNewInstance(self):
11064 """Add new instance data to allocator structure.
11066 This in combination with _AllocatorGetClusterData will create the
11067 correct structure needed as input for the allocator.
11069 The checks for the completeness of the opcode must have already been
11073 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11075 if self.disk_template in constants.DTS_NET_MIRROR:
11076 self.required_nodes = 2
11078 self.required_nodes = 1
11081 "disk_template": self.disk_template,
11084 "vcpus": self.vcpus,
11085 "memory": self.mem_size,
11086 "disks": self.disks,
11087 "disk_space_total": disk_space,
11089 "required_nodes": self.required_nodes,
11093 def _AddRelocateInstance(self):
11094 """Add relocate instance data to allocator structure.
11096 This in combination with _IAllocatorGetClusterData will create the
11097 correct structure needed as input for the allocator.
11099 The checks for the completeness of the opcode must have already been
11103 instance = self.cfg.GetInstanceInfo(self.name)
11104 if instance is None:
11105 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11106 " IAllocator" % self.name)
11108 if instance.disk_template not in constants.DTS_NET_MIRROR:
11109 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11110 errors.ECODE_INVAL)
11112 if len(instance.secondary_nodes) != 1:
11113 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11114 errors.ECODE_STATE)
11116 self.required_nodes = 1
11117 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11118 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11122 "disk_space_total": disk_space,
11123 "required_nodes": self.required_nodes,
11124 "relocate_from": self.relocate_from,
11128 def _AddEvacuateNodes(self):
11129 """Add evacuate nodes data to allocator structure.
11133 "evac_nodes": self.evac_nodes
11137 def _BuildInputData(self, fn):
11138 """Build input data structures.
11141 self._ComputeClusterData()
11144 request["type"] = self.mode
11145 self.in_data["request"] = request
11147 self.in_text = serializer.Dump(self.in_data)
11149 def Run(self, name, validate=True, call_fn=None):
11150 """Run an instance allocator and return the results.
11153 if call_fn is None:
11154 call_fn = self.rpc.call_iallocator_runner
11156 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11157 result.Raise("Failure while running the iallocator script")
11159 self.out_text = result.payload
11161 self._ValidateResult()
11163 def _ValidateResult(self):
11164 """Process the allocator results.
11166 This will process and if successful save the result in
11167 self.out_data and the other parameters.
11171 rdict = serializer.Load(self.out_text)
11172 except Exception, err:
11173 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11175 if not isinstance(rdict, dict):
11176 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11178 # TODO: remove backwards compatiblity in later versions
11179 if "nodes" in rdict and "result" not in rdict:
11180 rdict["result"] = rdict["nodes"]
11183 for key in "success", "info", "result":
11184 if key not in rdict:
11185 raise errors.OpExecError("Can't parse iallocator results:"
11186 " missing key '%s'" % key)
11187 setattr(self, key, rdict[key])
11189 if not isinstance(rdict["result"], list):
11190 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11192 self.out_data = rdict
11195 class LUTestAllocator(NoHooksLU):
11196 """Run allocator tests.
11198 This LU runs the allocator tests
11201 def CheckPrereq(self):
11202 """Check prerequisites.
11204 This checks the opcode parameters depending on the director and mode test.
11207 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11208 for attr in ["mem_size", "disks", "disk_template",
11209 "os", "tags", "nics", "vcpus"]:
11210 if not hasattr(self.op, attr):
11211 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11212 attr, errors.ECODE_INVAL)
11213 iname = self.cfg.ExpandInstanceName(self.op.name)
11214 if iname is not None:
11215 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11216 iname, errors.ECODE_EXISTS)
11217 if not isinstance(self.op.nics, list):
11218 raise errors.OpPrereqError("Invalid parameter 'nics'",
11219 errors.ECODE_INVAL)
11220 if not isinstance(self.op.disks, list):
11221 raise errors.OpPrereqError("Invalid parameter 'disks'",
11222 errors.ECODE_INVAL)
11223 for row in self.op.disks:
11224 if (not isinstance(row, dict) or
11225 "size" not in row or
11226 not isinstance(row["size"], int) or
11227 "mode" not in row or
11228 row["mode"] not in ['r', 'w']):
11229 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11230 " parameter", errors.ECODE_INVAL)
11231 if self.op.hypervisor is None:
11232 self.op.hypervisor = self.cfg.GetHypervisorType()
11233 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11234 fname = _ExpandInstanceName(self.cfg, self.op.name)
11235 self.op.name = fname
11236 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11237 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11238 if not hasattr(self.op, "evac_nodes"):
11239 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11240 " opcode input", errors.ECODE_INVAL)
11242 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11243 self.op.mode, errors.ECODE_INVAL)
11245 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11246 if self.op.allocator is None:
11247 raise errors.OpPrereqError("Missing allocator name",
11248 errors.ECODE_INVAL)
11249 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11250 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11251 self.op.direction, errors.ECODE_INVAL)
11253 def Exec(self, feedback_fn):
11254 """Run the allocator test.
11257 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11258 ial = IAllocator(self.cfg, self.rpc,
11261 mem_size=self.op.mem_size,
11262 disks=self.op.disks,
11263 disk_template=self.op.disk_template,
11267 vcpus=self.op.vcpus,
11268 hypervisor=self.op.hypervisor,
11270 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11271 ial = IAllocator(self.cfg, self.rpc,
11274 relocate_from=list(self.relocate_from),
11276 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11277 ial = IAllocator(self.cfg, self.rpc,
11279 evac_nodes=self.op.evac_nodes)
11281 raise errors.ProgrammerError("Uncatched mode %s in"
11282 " LUTestAllocator.Exec", self.op.mode)
11284 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11285 result = ial.in_text
11287 ial.Run(self.op.allocator, validate=False)
11288 result = ial.out_text
11292 #: Query type implementations
11294 constants.QR_INSTANCE: _InstanceQuery,
11295 constants.QR_NODE: _NodeQuery,
11296 constants.QR_GROUP: _GroupQuery,
11300 def _GetQueryImplementation(name):
11301 """Returns the implemtnation for a query type.
11303 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11307 return _QUERY_IMPL[name]
11309 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11310 errors.ECODE_INVAL)