4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 class LogicalUnit(object):
79 """Logical Unit base class.
81 Subclasses must follow these rules:
82 - implement ExpandNames
83 - implement CheckPrereq (except when tasklets are used)
84 - implement Exec (except when tasklets are used)
85 - implement BuildHooksEnv
86 - redefine HPATH and HTYPE
87 - optionally redefine their run requirements:
88 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90 Note that all commands require root permissions.
92 @ivar dry_run_result: the value (if any) that will be returned to the caller
93 in dry-run mode (signalled by opcode dry_run parameter)
100 def __init__(self, processor, op, context, rpc):
101 """Constructor for LogicalUnit.
103 This needs to be overridden in derived classes in order to check op
107 self.proc = processor
109 self.cfg = context.cfg
110 self.context = context
112 # Dicts used to declare locking needs to mcpu
113 self.needed_locks = None
114 self.acquired_locks = {}
115 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.remove_locks = {}
118 # Used to force good behavior when calling helper functions
119 self.recalculate_locks = {}
122 self.Log = processor.Log # pylint: disable-msg=C0103
123 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126 # support for dry-run
127 self.dry_run_result = None
128 # support for generic debug attribute
129 if (not hasattr(self.op, "debug_level") or
130 not isinstance(self.op.debug_level, int)):
131 self.op.debug_level = 0
136 # Validate opcode parameters and set defaults
137 self.op.Validate(True)
139 self.CheckArguments()
142 """Returns the SshRunner object
146 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
149 ssh = property(fget=__GetSSH)
151 def CheckArguments(self):
152 """Check syntactic validity for the opcode arguments.
154 This method is for doing a simple syntactic check and ensure
155 validity of opcode parameters, without any cluster-related
156 checks. While the same can be accomplished in ExpandNames and/or
157 CheckPrereq, doing these separate is better because:
159 - ExpandNames is left as as purely a lock-related function
160 - CheckPrereq is run after we have acquired locks (and possible
163 The function is allowed to change the self.op attribute so that
164 later methods can no longer worry about missing parameters.
169 def ExpandNames(self):
170 """Expand names for this LU.
172 This method is called before starting to execute the opcode, and it should
173 update all the parameters of the opcode to their canonical form (e.g. a
174 short node name must be fully expanded after this method has successfully
175 completed). This way locking, hooks, logging, etc. can work correctly.
177 LUs which implement this method must also populate the self.needed_locks
178 member, as a dict with lock levels as keys, and a list of needed lock names
181 - use an empty dict if you don't need any lock
182 - if you don't need any lock at a particular level omit that level
183 - don't put anything for the BGL level
184 - if you want all locks at a level use locking.ALL_SET as a value
186 If you need to share locks (rather than acquire them exclusively) at one
187 level you can modify self.share_locks, setting a true value (usually 1) for
188 that level. By default locks are not shared.
190 This function can also define a list of tasklets, which then will be
191 executed in order instead of the usual LU-level CheckPrereq and Exec
192 functions, if those are not defined by the LU.
196 # Acquire all nodes and one instance
197 self.needed_locks = {
198 locking.LEVEL_NODE: locking.ALL_SET,
199 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 # Acquire just two nodes
202 self.needed_locks = {
203 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206 self.needed_locks = {} # No, you can't leave it to the default value None
209 # The implementation of this method is mandatory only if the new LU is
210 # concurrent, so that old LUs don't need to be changed all at the same
213 self.needed_locks = {} # Exclusive LUs don't need locks.
215 raise NotImplementedError
217 def DeclareLocks(self, level):
218 """Declare LU locking needs for a level
220 While most LUs can just declare their locking needs at ExpandNames time,
221 sometimes there's the need to calculate some locks after having acquired
222 the ones before. This function is called just before acquiring locks at a
223 particular level, but after acquiring the ones at lower levels, and permits
224 such calculations. It can be used to modify self.needed_locks, and by
225 default it does nothing.
227 This function is only called if you have something already set in
228 self.needed_locks for the level.
230 @param level: Locking level which is going to be locked
231 @type level: member of ganeti.locking.LEVELS
235 def CheckPrereq(self):
236 """Check prerequisites for this LU.
238 This method should check that the prerequisites for the execution
239 of this LU are fulfilled. It can do internode communication, but
240 it should be idempotent - no cluster or system changes are
243 The method should raise errors.OpPrereqError in case something is
244 not fulfilled. Its return value is ignored.
246 This method should also update all the parameters of the opcode to
247 their canonical form if it hasn't been done by ExpandNames before.
250 if self.tasklets is not None:
251 for (idx, tl) in enumerate(self.tasklets):
252 logging.debug("Checking prerequisites for tasklet %s/%s",
253 idx + 1, len(self.tasklets))
258 def Exec(self, feedback_fn):
261 This method should implement the actual work. It should raise
262 errors.OpExecError for failures that are somewhat dealt with in
266 if self.tasklets is not None:
267 for (idx, tl) in enumerate(self.tasklets):
268 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271 raise NotImplementedError
273 def BuildHooksEnv(self):
274 """Build hooks environment for this LU.
276 This method should return a three-node tuple consisting of: a dict
277 containing the environment that will be used for running the
278 specific hook for this LU, a list of node names on which the hook
279 should run before the execution, and a list of node names on which
280 the hook should run after the execution.
282 The keys of the dict must not have 'GANETI_' prefixed as this will
283 be handled in the hooks runner. Also note additional keys will be
284 added by the hooks runner. If the LU doesn't define any
285 environment, an empty dict (and not None) should be returned.
287 No nodes should be returned as an empty list (and not None).
289 Note that if the HPATH for a LU class is None, this function will
293 raise NotImplementedError
295 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296 """Notify the LU about the results of its hooks.
298 This method is called every time a hooks phase is executed, and notifies
299 the Logical Unit about the hooks' result. The LU can then use it to alter
300 its result based on the hooks. By default the method does nothing and the
301 previous result is passed back unchanged but any LU can define it if it
302 wants to use the local cluster hook-scripts somehow.
304 @param phase: one of L{constants.HOOKS_PHASE_POST} or
305 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306 @param hook_results: the results of the multi-node hooks rpc call
307 @param feedback_fn: function used send feedback back to the caller
308 @param lu_result: the previous Exec result this LU had, or None
310 @return: the new Exec result, based on the previous result
314 # API must be kept, thus we ignore the unused argument and could
315 # be a function warnings
316 # pylint: disable-msg=W0613,R0201
319 def _ExpandAndLockInstance(self):
320 """Helper function to expand and lock an instance.
322 Many LUs that work on an instance take its name in self.op.instance_name
323 and need to expand it and then declare the expanded name for locking. This
324 function does it, and then updates self.op.instance_name to the expanded
325 name. It also initializes needed_locks as a dict, if this hasn't been done
329 if self.needed_locks is None:
330 self.needed_locks = {}
332 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333 "_ExpandAndLockInstance called with instance-level locks set"
334 self.op.instance_name = _ExpandInstanceName(self.cfg,
335 self.op.instance_name)
336 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338 def _LockInstancesNodes(self, primary_only=False):
339 """Helper function to declare instances' nodes for locking.
341 This function should be called after locking one or more instances to lock
342 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343 with all primary or secondary nodes for instances already locked and
344 present in self.needed_locks[locking.LEVEL_INSTANCE].
346 It should be called from DeclareLocks, and for safety only works if
347 self.recalculate_locks[locking.LEVEL_NODE] is set.
349 In the future it may grow parameters to just lock some instance's nodes, or
350 to just lock primaries or secondary nodes, if needed.
352 If should be called in DeclareLocks in a way similar to::
354 if level == locking.LEVEL_NODE:
355 self._LockInstancesNodes()
357 @type primary_only: boolean
358 @param primary_only: only lock primary nodes of locked instances
361 assert locking.LEVEL_NODE in self.recalculate_locks, \
362 "_LockInstancesNodes helper function called with no nodes to recalculate"
364 # TODO: check if we're really been called with the instance locks held
366 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367 # future we might want to have different behaviors depending on the value
368 # of self.recalculate_locks[locking.LEVEL_NODE]
370 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371 instance = self.context.cfg.GetInstanceInfo(instance_name)
372 wanted_nodes.append(instance.primary_node)
374 wanted_nodes.extend(instance.secondary_nodes)
376 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381 del self.recalculate_locks[locking.LEVEL_NODE]
384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385 """Simple LU which runs no hooks.
387 This LU is intended as a parent for other LogicalUnits which will
388 run no hooks, in order to reduce duplicate code.
394 def BuildHooksEnv(self):
395 """Empty BuildHooksEnv for NoHooksLu.
397 This just raises an error.
400 assert False, "BuildHooksEnv called for NoHooksLUs"
404 """Tasklet base class.
406 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407 they can mix legacy code with tasklets. Locking needs to be done in the LU,
408 tasklets know nothing about locks.
410 Subclasses must follow these rules:
411 - Implement CheckPrereq
415 def __init__(self, lu):
422 def CheckPrereq(self):
423 """Check prerequisites for this tasklets.
425 This method should check whether the prerequisites for the execution of
426 this tasklet are fulfilled. It can do internode communication, but it
427 should be idempotent - no cluster or system changes are allowed.
429 The method should raise errors.OpPrereqError in case something is not
430 fulfilled. Its return value is ignored.
432 This method should also update all parameters to their canonical form if it
433 hasn't been done before.
438 def Exec(self, feedback_fn):
439 """Execute the tasklet.
441 This method should implement the actual work. It should raise
442 errors.OpExecError for failures that are somewhat dealt with in code, or
446 raise NotImplementedError
450 """Base for query utility classes.
453 #: Attribute holding field definitions
456 def __init__(self, names, fields, use_locking):
457 """Initializes this class.
461 self.use_locking = use_locking
463 self.query = query.Query(self.FIELDS, fields)
464 self.requested_data = self.query.RequestedData()
466 self.do_locking = None
469 def _GetNames(self, lu, all_names, lock_level):
470 """Helper function to determine names asked for in the query.
474 names = lu.acquired_locks[lock_level]
478 if self.wanted == locking.ALL_SET:
479 assert not self.names
480 # caller didn't specify names, so ordering is not important
481 return utils.NiceSort(names)
483 # caller specified names and we must keep the same order
485 assert not self.do_locking or lu.acquired_locks[lock_level]
487 missing = set(self.wanted).difference(names)
489 raise errors.OpExecError("Some items were removed before retrieving"
490 " their data: %s" % missing)
492 # Return expanded names
496 def FieldsQuery(cls, fields):
497 """Returns list of available fields.
499 @return: List of L{objects.QueryFieldDefinition}
502 return query.QueryFields(cls.FIELDS, fields)
504 def ExpandNames(self, lu):
505 """Expand names for this query.
507 See L{LogicalUnit.ExpandNames}.
510 raise NotImplementedError()
512 def DeclareLocks(self, lu, level):
513 """Declare locks for this query.
515 See L{LogicalUnit.DeclareLocks}.
518 raise NotImplementedError()
520 def _GetQueryData(self, lu):
521 """Collects all data for this query.
523 @return: Query data object
526 raise NotImplementedError()
528 def NewStyleQuery(self, lu):
529 """Collect data and execute query.
532 return query.GetQueryResponse(self.query, self._GetQueryData(lu))
534 def OldStyleQuery(self, lu):
535 """Collect data and execute query.
538 return self.query.OldStyleQuery(self._GetQueryData(lu))
541 def _GetWantedNodes(lu, nodes):
542 """Returns list of checked and expanded node names.
544 @type lu: L{LogicalUnit}
545 @param lu: the logical unit on whose behalf we execute
547 @param nodes: list of node names or None for all nodes
549 @return: the list of nodes, sorted
550 @raise errors.ProgrammerError: if the nodes parameter is wrong type
554 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
556 return utils.NiceSort(lu.cfg.GetNodeList())
559 def _GetWantedInstances(lu, instances):
560 """Returns list of checked and expanded instance names.
562 @type lu: L{LogicalUnit}
563 @param lu: the logical unit on whose behalf we execute
564 @type instances: list
565 @param instances: list of instance names or None for all instances
567 @return: the list of instances, sorted
568 @raise errors.OpPrereqError: if the instances parameter is wrong type
569 @raise errors.OpPrereqError: if any of the passed instances is not found
573 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
575 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
579 def _GetUpdatedParams(old_params, update_dict,
580 use_default=True, use_none=False):
581 """Return the new version of a parameter dictionary.
583 @type old_params: dict
584 @param old_params: old parameters
585 @type update_dict: dict
586 @param update_dict: dict containing new parameter values, or
587 constants.VALUE_DEFAULT to reset the parameter to its default
589 @param use_default: boolean
590 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591 values as 'to be deleted' values
592 @param use_none: boolean
593 @type use_none: whether to recognise C{None} values as 'to be
596 @return: the new parameter dictionary
599 params_copy = copy.deepcopy(old_params)
600 for key, val in update_dict.iteritems():
601 if ((use_default and val == constants.VALUE_DEFAULT) or
602 (use_none and val is None)):
608 params_copy[key] = val
612 def _CheckOutputFields(static, dynamic, selected):
613 """Checks whether all selected fields are valid.
615 @type static: L{utils.FieldSet}
616 @param static: static fields set
617 @type dynamic: L{utils.FieldSet}
618 @param dynamic: dynamic fields set
625 delta = f.NonMatching(selected)
627 raise errors.OpPrereqError("Unknown output fields selected: %s"
628 % ",".join(delta), errors.ECODE_INVAL)
631 def _CheckGlobalHvParams(params):
632 """Validates that given hypervisor params are not global ones.
634 This will ensure that instances don't get customised versions of
638 used_globals = constants.HVC_GLOBALS.intersection(params)
640 msg = ("The following hypervisor parameters are global and cannot"
641 " be customized at instance level, please modify them at"
642 " cluster level: %s" % utils.CommaJoin(used_globals))
643 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
646 def _CheckNodeOnline(lu, node, msg=None):
647 """Ensure that a given node is online.
649 @param lu: the LU on behalf of which we make the check
650 @param node: the node to check
651 @param msg: if passed, should be a message to replace the default one
652 @raise errors.OpPrereqError: if the node is offline
656 msg = "Can't use offline node"
657 if lu.cfg.GetNodeInfo(node).offline:
658 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
661 def _CheckNodeNotDrained(lu, node):
662 """Ensure that a given node is not drained.
664 @param lu: the LU on behalf of which we make the check
665 @param node: the node to check
666 @raise errors.OpPrereqError: if the node is drained
669 if lu.cfg.GetNodeInfo(node).drained:
670 raise errors.OpPrereqError("Can't use drained node %s" % node,
674 def _CheckNodeVmCapable(lu, node):
675 """Ensure that a given node is vm capable.
677 @param lu: the LU on behalf of which we make the check
678 @param node: the node to check
679 @raise errors.OpPrereqError: if the node is not vm capable
682 if not lu.cfg.GetNodeInfo(node).vm_capable:
683 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
688 """Ensure that a node supports a given OS.
690 @param lu: the LU on behalf of which we make the check
691 @param node: the node to check
692 @param os_name: the OS to query about
693 @param force_variant: whether to ignore variant errors
694 @raise errors.OpPrereqError: if the node is not supporting the OS
697 result = lu.rpc.call_os_get(node, os_name)
698 result.Raise("OS '%s' not in supported OS list for node %s" %
700 prereq=True, ecode=errors.ECODE_INVAL)
701 if not force_variant:
702 _CheckOSVariant(result.payload, os_name)
705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706 """Ensure that a node has the given secondary ip.
708 @type lu: L{LogicalUnit}
709 @param lu: the LU on behalf of which we make the check
711 @param node: the node to check
712 @type secondary_ip: string
713 @param secondary_ip: the ip to check
714 @type prereq: boolean
715 @param prereq: whether to throw a prerequisite or an execute error
716 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
720 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721 result.Raise("Failure checking secondary ip on node %s" % node,
722 prereq=prereq, ecode=errors.ECODE_ENVIRON)
723 if not result.payload:
724 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725 " please fix and re-run this command" % secondary_ip)
727 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
729 raise errors.OpExecError(msg)
732 def _GetClusterDomainSecret():
733 """Reads the cluster domain secret.
736 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
740 def _CheckInstanceDown(lu, instance, reason):
741 """Ensure that an instance is not running."""
742 if instance.admin_up:
743 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744 (instance.name, reason), errors.ECODE_STATE)
746 pnode = instance.primary_node
747 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748 ins_l.Raise("Can't contact node %s for instance information" % pnode,
749 prereq=True, ecode=errors.ECODE_ENVIRON)
751 if instance.name in ins_l.payload:
752 raise errors.OpPrereqError("Instance %s is running, %s" %
753 (instance.name, reason), errors.ECODE_STATE)
756 def _ExpandItemName(fn, name, kind):
757 """Expand an item name.
759 @param fn: the function to use for expansion
760 @param name: requested item name
761 @param kind: text description ('Node' or 'Instance')
762 @return: the resolved (full) name
763 @raise errors.OpPrereqError: if the item is not found
767 if full_name is None:
768 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
773 def _ExpandNodeName(cfg, name):
774 """Wrapper over L{_ExpandItemName} for nodes."""
775 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
778 def _ExpandInstanceName(cfg, name):
779 """Wrapper over L{_ExpandItemName} for instance."""
780 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784 memory, vcpus, nics, disk_template, disks,
785 bep, hvp, hypervisor_name):
786 """Builds instance related env variables for hooks
788 This builds the hook environment from individual variables.
791 @param name: the name of the instance
792 @type primary_node: string
793 @param primary_node: the name of the instance's primary node
794 @type secondary_nodes: list
795 @param secondary_nodes: list of secondary nodes as strings
796 @type os_type: string
797 @param os_type: the name of the instance's OS
798 @type status: boolean
799 @param status: the should_run status of the instance
801 @param memory: the memory size of the instance
803 @param vcpus: the count of VCPUs the instance has
805 @param nics: list of tuples (ip, mac, mode, link) representing
806 the NICs the instance has
807 @type disk_template: string
808 @param disk_template: the disk template of the instance
810 @param disks: the list of (size, mode) pairs
812 @param bep: the backend parameters for the instance
814 @param hvp: the hypervisor parameters for the instance
815 @type hypervisor_name: string
816 @param hypervisor_name: the hypervisor for the instance
818 @return: the hook environment for this instance
827 "INSTANCE_NAME": name,
828 "INSTANCE_PRIMARY": primary_node,
829 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830 "INSTANCE_OS_TYPE": os_type,
831 "INSTANCE_STATUS": str_status,
832 "INSTANCE_MEMORY": memory,
833 "INSTANCE_VCPUS": vcpus,
834 "INSTANCE_DISK_TEMPLATE": disk_template,
835 "INSTANCE_HYPERVISOR": hypervisor_name,
839 nic_count = len(nics)
840 for idx, (ip, mac, mode, link) in enumerate(nics):
843 env["INSTANCE_NIC%d_IP" % idx] = ip
844 env["INSTANCE_NIC%d_MAC" % idx] = mac
845 env["INSTANCE_NIC%d_MODE" % idx] = mode
846 env["INSTANCE_NIC%d_LINK" % idx] = link
847 if mode == constants.NIC_MODE_BRIDGED:
848 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
852 env["INSTANCE_NIC_COUNT"] = nic_count
855 disk_count = len(disks)
856 for idx, (size, mode) in enumerate(disks):
857 env["INSTANCE_DISK%d_SIZE" % idx] = size
858 env["INSTANCE_DISK%d_MODE" % idx] = mode
862 env["INSTANCE_DISK_COUNT"] = disk_count
864 for source, kind in [(bep, "BE"), (hvp, "HV")]:
865 for key, value in source.items():
866 env["INSTANCE_%s_%s" % (kind, key)] = value
871 def _NICListToTuple(lu, nics):
872 """Build a list of nic information tuples.
874 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875 value in LUInstanceQueryData.
877 @type lu: L{LogicalUnit}
878 @param lu: the logical unit on whose behalf we execute
879 @type nics: list of L{objects.NIC}
880 @param nics: list of nics to convert to hooks tuples
884 cluster = lu.cfg.GetClusterInfo()
888 filled_params = cluster.SimpleFillNIC(nic.nicparams)
889 mode = filled_params[constants.NIC_MODE]
890 link = filled_params[constants.NIC_LINK]
891 hooks_nics.append((ip, mac, mode, link))
895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896 """Builds instance related env variables for hooks from an object.
898 @type lu: L{LogicalUnit}
899 @param lu: the logical unit on whose behalf we execute
900 @type instance: L{objects.Instance}
901 @param instance: the instance for which we should build the
904 @param override: dictionary with key/values that will override
907 @return: the hook environment dictionary
910 cluster = lu.cfg.GetClusterInfo()
911 bep = cluster.FillBE(instance)
912 hvp = cluster.FillHV(instance)
914 'name': instance.name,
915 'primary_node': instance.primary_node,
916 'secondary_nodes': instance.secondary_nodes,
917 'os_type': instance.os,
918 'status': instance.admin_up,
919 'memory': bep[constants.BE_MEMORY],
920 'vcpus': bep[constants.BE_VCPUS],
921 'nics': _NICListToTuple(lu, instance.nics),
922 'disk_template': instance.disk_template,
923 'disks': [(disk.size, disk.mode) for disk in instance.disks],
926 'hypervisor_name': instance.hypervisor,
929 args.update(override)
930 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
933 def _AdjustCandidatePool(lu, exceptions):
934 """Adjust the candidate pool after node operations.
937 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
939 lu.LogInfo("Promoted nodes to master candidate role: %s",
940 utils.CommaJoin(node.name for node in mod_list))
941 for name in mod_list:
942 lu.context.ReaddNode(name)
943 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
945 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
949 def _DecideSelfPromotion(lu, exceptions=None):
950 """Decide whether I should promote myself as a master candidate.
953 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955 # the new node will increase mc_max with one, so:
956 mc_should = min(mc_should + 1, cp_size)
957 return mc_now < mc_should
960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
961 """Check that the brigdes needed by a list of nics exist.
964 cluster = lu.cfg.GetClusterInfo()
965 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966 brlist = [params[constants.NIC_LINK] for params in paramslist
967 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
969 result = lu.rpc.call_bridges_exist(target_node, brlist)
970 result.Raise("Error checking bridges on destination node '%s'" %
971 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
974 def _CheckInstanceBridgesExist(lu, instance, node=None):
975 """Check that the brigdes needed by an instance exist.
979 node = instance.primary_node
980 _CheckNicsBridgesExist(lu, instance.nics, node)
983 def _CheckOSVariant(os_obj, name):
984 """Check whether an OS name conforms to the os variants specification.
986 @type os_obj: L{objects.OS}
987 @param os_obj: OS object to check
989 @param name: OS name passed by the user, to check for validity
992 if not os_obj.supported_variants:
994 variant = objects.OS.GetVariant(name)
996 raise errors.OpPrereqError("OS name must include a variant",
999 if variant not in os_obj.supported_variants:
1000 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1003 def _GetNodeInstancesInner(cfg, fn):
1004 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1007 def _GetNodeInstances(cfg, node_name):
1008 """Returns a list of all primary and secondary instances on a node.
1012 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016 """Returns primary instances on a node.
1019 return _GetNodeInstancesInner(cfg,
1020 lambda inst: node_name == inst.primary_node)
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024 """Returns secondary instances on a node.
1027 return _GetNodeInstancesInner(cfg,
1028 lambda inst: node_name in inst.secondary_nodes)
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032 """Returns the arguments for a storage type.
1035 # Special case for file storage
1036 if storage_type == constants.ST_FILE:
1037 # storage.FileStorage wants a list of storage directories
1038 return [[cfg.GetFileStorageDir()]]
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1046 for dev in instance.disks:
1047 cfg.SetDiskID(dev, node_name)
1049 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050 result.Raise("Failed to get disk status from node %s" % node_name,
1051 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053 for idx, bdev_status in enumerate(result.payload):
1054 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061 """Check the sanity of iallocator and node arguments and use the
1062 cluster-wide iallocator if appropriate.
1064 Check that at most one of (iallocator, node) is specified. If none is
1065 specified, then the LU's opcode's iallocator slot is filled with the
1066 cluster-wide default iallocator.
1068 @type iallocator_slot: string
1069 @param iallocator_slot: the name of the opcode iallocator slot
1070 @type node_slot: string
1071 @param node_slot: the name of the opcode target node slot
1074 node = getattr(lu.op, node_slot, None)
1075 iallocator = getattr(lu.op, iallocator_slot, None)
1077 if node is not None and iallocator is not None:
1078 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080 elif node is None and iallocator is None:
1081 default_iallocator = lu.cfg.GetDefaultIAllocator()
1082 if default_iallocator:
1083 setattr(lu.op, iallocator_slot, default_iallocator)
1085 raise errors.OpPrereqError("No iallocator or node given and no"
1086 " cluster-wide default iallocator found."
1087 " Please specify either an iallocator or a"
1088 " node, or set a cluster-wide default"
1092 class LUClusterPostInit(LogicalUnit):
1093 """Logical unit for running hooks after cluster initialization.
1096 HPATH = "cluster-init"
1097 HTYPE = constants.HTYPE_CLUSTER
1099 def BuildHooksEnv(self):
1103 env = {"OP_TARGET": self.cfg.GetClusterName()}
1104 mn = self.cfg.GetMasterNode()
1105 return env, [], [mn]
1107 def Exec(self, feedback_fn):
1114 class LUClusterDestroy(LogicalUnit):
1115 """Logical unit for destroying the cluster.
1118 HPATH = "cluster-destroy"
1119 HTYPE = constants.HTYPE_CLUSTER
1121 def BuildHooksEnv(self):
1125 env = {"OP_TARGET": self.cfg.GetClusterName()}
1128 def CheckPrereq(self):
1129 """Check prerequisites.
1131 This checks whether the cluster is empty.
1133 Any errors are signaled by raising errors.OpPrereqError.
1136 master = self.cfg.GetMasterNode()
1138 nodelist = self.cfg.GetNodeList()
1139 if len(nodelist) != 1 or nodelist[0] != master:
1140 raise errors.OpPrereqError("There are still %d node(s) in"
1141 " this cluster." % (len(nodelist) - 1),
1143 instancelist = self.cfg.GetInstanceList()
1145 raise errors.OpPrereqError("There are still %d instance(s) in"
1146 " this cluster." % len(instancelist),
1149 def Exec(self, feedback_fn):
1150 """Destroys the cluster.
1153 master = self.cfg.GetMasterNode()
1155 # Run post hooks on master node before it's removed
1156 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1158 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1160 # pylint: disable-msg=W0702
1161 self.LogWarning("Errors occurred running hooks on %s" % master)
1163 result = self.rpc.call_node_stop_master(master, False)
1164 result.Raise("Could not disable the master role")
1169 def _VerifyCertificate(filename):
1170 """Verifies a certificate for LUClusterVerify.
1172 @type filename: string
1173 @param filename: Path to PEM file
1177 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178 utils.ReadFile(filename))
1179 except Exception, err: # pylint: disable-msg=W0703
1180 return (LUClusterVerify.ETYPE_ERROR,
1181 "Failed to load X509 certificate %s: %s" % (filename, err))
1184 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185 constants.SSL_CERT_EXPIRATION_ERROR)
1188 fnamemsg = "While verifying %s: %s" % (filename, msg)
1193 return (None, fnamemsg)
1194 elif errcode == utils.CERT_WARNING:
1195 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196 elif errcode == utils.CERT_ERROR:
1197 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1202 class LUClusterVerify(LogicalUnit):
1203 """Verifies the cluster status.
1206 HPATH = "cluster-verify"
1207 HTYPE = constants.HTYPE_CLUSTER
1210 TCLUSTER = "cluster"
1212 TINSTANCE = "instance"
1214 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223 ENODEDRBD = (TNODE, "ENODEDRBD")
1224 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227 ENODEHV = (TNODE, "ENODEHV")
1228 ENODELVM = (TNODE, "ENODELVM")
1229 ENODEN1 = (TNODE, "ENODEN1")
1230 ENODENET = (TNODE, "ENODENET")
1231 ENODEOS = (TNODE, "ENODEOS")
1232 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234 ENODERPC = (TNODE, "ENODERPC")
1235 ENODESSH = (TNODE, "ENODESSH")
1236 ENODEVERSION = (TNODE, "ENODEVERSION")
1237 ENODESETUP = (TNODE, "ENODESETUP")
1238 ENODETIME = (TNODE, "ENODETIME")
1239 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241 ETYPE_FIELD = "code"
1242 ETYPE_ERROR = "ERROR"
1243 ETYPE_WARNING = "WARNING"
1245 _HOOKS_INDENT_RE = re.compile("^", re.M)
1247 class NodeImage(object):
1248 """A class representing the logical and physical status of a node.
1251 @ivar name: the node name to which this object refers
1252 @ivar volumes: a structure as returned from
1253 L{ganeti.backend.GetVolumeList} (runtime)
1254 @ivar instances: a list of running instances (runtime)
1255 @ivar pinst: list of configured primary instances (config)
1256 @ivar sinst: list of configured secondary instances (config)
1257 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258 of this node (config)
1259 @ivar mfree: free memory, as reported by hypervisor (runtime)
1260 @ivar dfree: free disk, as reported by the node (runtime)
1261 @ivar offline: the offline status (config)
1262 @type rpc_fail: boolean
1263 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264 not whether the individual keys were correct) (runtime)
1265 @type lvm_fail: boolean
1266 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267 @type hyp_fail: boolean
1268 @ivar hyp_fail: whether the RPC call didn't return the instance list
1269 @type ghost: boolean
1270 @ivar ghost: whether this is a known node or not (config)
1271 @type os_fail: boolean
1272 @ivar os_fail: whether the RPC call didn't return valid OS data
1274 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275 @type vm_capable: boolean
1276 @ivar vm_capable: whether the node can host instances
1279 def __init__(self, offline=False, name=None, vm_capable=True):
1288 self.offline = offline
1289 self.vm_capable = vm_capable
1290 self.rpc_fail = False
1291 self.lvm_fail = False
1292 self.hyp_fail = False
1294 self.os_fail = False
1297 def ExpandNames(self):
1298 self.needed_locks = {
1299 locking.LEVEL_NODE: locking.ALL_SET,
1300 locking.LEVEL_INSTANCE: locking.ALL_SET,
1302 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1304 def _Error(self, ecode, item, msg, *args, **kwargs):
1305 """Format an error message.
1307 Based on the opcode's error_codes parameter, either format a
1308 parseable error code, or a simpler error string.
1310 This must be called only from Exec and functions called from Exec.
1313 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315 # first complete the msg
1318 # then format the whole message
1319 if self.op.error_codes:
1320 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1326 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327 # and finally report it via the feedback_fn
1328 self._feedback_fn(" - %s" % msg)
1330 def _ErrorIf(self, cond, *args, **kwargs):
1331 """Log an error message if the passed condition is True.
1334 cond = bool(cond) or self.op.debug_simulate_errors
1336 self._Error(*args, **kwargs)
1337 # do not mark the operation as failed for WARN cases only
1338 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339 self.bad = self.bad or cond
1341 def _VerifyNode(self, ninfo, nresult):
1342 """Perform some basic validation on data returned from a node.
1344 - check the result data structure is well formed and has all the
1346 - check ganeti version
1348 @type ninfo: L{objects.Node}
1349 @param ninfo: the node to check
1350 @param nresult: the results from the node
1352 @return: whether overall this call was successful (and we can expect
1353 reasonable values in the respose)
1357 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1359 # main result, nresult should be a non-empty dict
1360 test = not nresult or not isinstance(nresult, dict)
1361 _ErrorIf(test, self.ENODERPC, node,
1362 "unable to verify node: no data returned")
1366 # compares ganeti version
1367 local_version = constants.PROTOCOL_VERSION
1368 remote_version = nresult.get("version", None)
1369 test = not (remote_version and
1370 isinstance(remote_version, (list, tuple)) and
1371 len(remote_version) == 2)
1372 _ErrorIf(test, self.ENODERPC, node,
1373 "connection to node returned invalid data")
1377 test = local_version != remote_version[0]
1378 _ErrorIf(test, self.ENODEVERSION, node,
1379 "incompatible protocol versions: master %s,"
1380 " node %s", local_version, remote_version[0])
1384 # node seems compatible, we can actually try to look into its results
1386 # full package version
1387 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388 self.ENODEVERSION, node,
1389 "software version mismatch: master %s, node %s",
1390 constants.RELEASE_VERSION, remote_version[1],
1391 code=self.ETYPE_WARNING)
1393 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394 if ninfo.vm_capable and isinstance(hyp_result, dict):
1395 for hv_name, hv_result in hyp_result.iteritems():
1396 test = hv_result is not None
1397 _ErrorIf(test, self.ENODEHV, node,
1398 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401 if ninfo.vm_capable and isinstance(hvp_result, list):
1402 for item, hv_name, hv_result in hvp_result:
1403 _ErrorIf(True, self.ENODEHV, node,
1404 "hypervisor %s parameter verify failure (source %s): %s",
1405 hv_name, item, hv_result)
1407 test = nresult.get(constants.NV_NODESETUP,
1408 ["Missing NODESETUP results"])
1409 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1414 def _VerifyNodeTime(self, ninfo, nresult,
1415 nvinfo_starttime, nvinfo_endtime):
1416 """Check the node time.
1418 @type ninfo: L{objects.Node}
1419 @param ninfo: the node to check
1420 @param nresult: the remote results for the node
1421 @param nvinfo_starttime: the start time of the RPC call
1422 @param nvinfo_endtime: the end time of the RPC call
1426 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1428 ntime = nresult.get(constants.NV_TIME, None)
1430 ntime_merged = utils.MergeTime(ntime)
1431 except (ValueError, TypeError):
1432 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1435 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1442 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443 "Node time diverges by at least %s from master node time",
1446 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447 """Check the node time.
1449 @type ninfo: L{objects.Node}
1450 @param ninfo: the node to check
1451 @param nresult: the remote results for the node
1452 @param vg_name: the configured VG name
1459 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461 # checks vg existence and size > 20G
1462 vglist = nresult.get(constants.NV_VGLIST, None)
1464 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467 constants.MIN_VG_SIZE)
1468 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1471 pvlist = nresult.get(constants.NV_PVLIST, None)
1472 test = pvlist is None
1473 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475 # check that ':' is not present in PV names, since it's a
1476 # special character for lvcreate (denotes the range of PEs to
1478 for _, pvname, owner_vg in pvlist:
1479 test = ":" in pvname
1480 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481 " '%s' of VG '%s'", pvname, owner_vg)
1483 def _VerifyNodeNetwork(self, ninfo, nresult):
1484 """Check the node time.
1486 @type ninfo: L{objects.Node}
1487 @param ninfo: the node to check
1488 @param nresult: the remote results for the node
1492 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1494 test = constants.NV_NODELIST not in nresult
1495 _ErrorIf(test, self.ENODESSH, node,
1496 "node hasn't returned node ssh connectivity data")
1498 if nresult[constants.NV_NODELIST]:
1499 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500 _ErrorIf(True, self.ENODESSH, node,
1501 "ssh communication with node '%s': %s", a_node, a_msg)
1503 test = constants.NV_NODENETTEST not in nresult
1504 _ErrorIf(test, self.ENODENET, node,
1505 "node hasn't returned node tcp connectivity data")
1507 if nresult[constants.NV_NODENETTEST]:
1508 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1510 _ErrorIf(True, self.ENODENET, node,
1511 "tcp communication with node '%s': %s",
1512 anode, nresult[constants.NV_NODENETTEST][anode])
1514 test = constants.NV_MASTERIP not in nresult
1515 _ErrorIf(test, self.ENODENET, node,
1516 "node hasn't returned node master IP reachability data")
1518 if not nresult[constants.NV_MASTERIP]:
1519 if node == self.master_node:
1520 msg = "the master node cannot reach the master IP (not configured?)"
1522 msg = "cannot reach the master IP"
1523 _ErrorIf(True, self.ENODENET, node, msg)
1525 def _VerifyInstance(self, instance, instanceconfig, node_image,
1527 """Verify an instance.
1529 This function checks to see if the required block devices are
1530 available on the instance's node.
1533 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534 node_current = instanceconfig.primary_node
1536 node_vol_should = {}
1537 instanceconfig.MapLVsByNode(node_vol_should)
1539 for node in node_vol_should:
1540 n_img = node_image[node]
1541 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542 # ignore missing volumes on offline or broken nodes
1544 for volume in node_vol_should[node]:
1545 test = volume not in n_img.volumes
1546 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547 "volume %s missing on node %s", volume, node)
1549 if instanceconfig.admin_up:
1550 pri_img = node_image[node_current]
1551 test = instance not in pri_img.instances and not pri_img.offline
1552 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553 "instance not running on its primary node %s",
1556 for node, n_img in node_image.items():
1557 if (not node == node_current):
1558 test = instance in n_img.instances
1559 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560 "instance should not run on node %s", node)
1562 diskdata = [(nname, success, status, idx)
1563 for (nname, disks) in diskstatus.items()
1564 for idx, (success, status) in enumerate(disks)]
1566 for nname, success, bdev_status, idx in diskdata:
1567 _ErrorIf(instanceconfig.admin_up and not success,
1568 self.EINSTANCEFAULTYDISK, instance,
1569 "couldn't retrieve status for disk/%s on %s: %s",
1570 idx, nname, bdev_status)
1571 _ErrorIf((instanceconfig.admin_up and success and
1572 bdev_status.ldisk_status == constants.LDS_FAULTY),
1573 self.EINSTANCEFAULTYDISK, instance,
1574 "disk/%s on %s is faulty", idx, nname)
1576 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1577 """Verify if there are any unknown volumes in the cluster.
1579 The .os, .swap and backup volumes are ignored. All other volumes are
1580 reported as unknown.
1582 @type reserved: L{ganeti.utils.FieldSet}
1583 @param reserved: a FieldSet of reserved volume names
1586 for node, n_img in node_image.items():
1587 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1588 # skip non-healthy nodes
1590 for volume in n_img.volumes:
1591 test = ((node not in node_vol_should or
1592 volume not in node_vol_should[node]) and
1593 not reserved.Matches(volume))
1594 self._ErrorIf(test, self.ENODEORPHANLV, node,
1595 "volume %s is unknown", volume)
1597 def _VerifyOrphanInstances(self, instancelist, node_image):
1598 """Verify the list of running instances.
1600 This checks what instances are running but unknown to the cluster.
1603 for node, n_img in node_image.items():
1604 for o_inst in n_img.instances:
1605 test = o_inst not in instancelist
1606 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1607 "instance %s on node %s should not exist", o_inst, node)
1609 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1610 """Verify N+1 Memory Resilience.
1612 Check that if one single node dies we can still start all the
1613 instances it was primary for.
1616 for node, n_img in node_image.items():
1617 # This code checks that every node which is now listed as
1618 # secondary has enough memory to host all instances it is
1619 # supposed to should a single other node in the cluster fail.
1620 # FIXME: not ready for failover to an arbitrary node
1621 # FIXME: does not support file-backed instances
1622 # WARNING: we currently take into account down instances as well
1623 # as up ones, considering that even if they're down someone
1624 # might want to start them even in the event of a node failure.
1625 for prinode, instances in n_img.sbp.items():
1627 for instance in instances:
1628 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1629 if bep[constants.BE_AUTO_BALANCE]:
1630 needed_mem += bep[constants.BE_MEMORY]
1631 test = n_img.mfree < needed_mem
1632 self._ErrorIf(test, self.ENODEN1, node,
1633 "not enough memory to accomodate instance failovers"
1634 " should node %s fail", prinode)
1636 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1638 """Verifies and computes the node required file checksums.
1640 @type ninfo: L{objects.Node}
1641 @param ninfo: the node to check
1642 @param nresult: the remote results for the node
1643 @param file_list: required list of files
1644 @param local_cksum: dictionary of local files and their checksums
1645 @param master_files: list of files that only masters should have
1649 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1651 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1652 test = not isinstance(remote_cksum, dict)
1653 _ErrorIf(test, self.ENODEFILECHECK, node,
1654 "node hasn't returned file checksum data")
1658 for file_name in file_list:
1659 node_is_mc = ninfo.master_candidate
1660 must_have = (file_name not in master_files) or node_is_mc
1662 test1 = file_name not in remote_cksum
1664 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1666 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1667 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1668 "file '%s' missing", file_name)
1669 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1670 "file '%s' has wrong checksum", file_name)
1671 # not candidate and this is not a must-have file
1672 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1673 "file '%s' should not exist on non master"
1674 " candidates (and the file is outdated)", file_name)
1675 # all good, except non-master/non-must have combination
1676 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1677 "file '%s' should not exist"
1678 " on non master candidates", file_name)
1680 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1682 """Verifies and the node DRBD status.
1684 @type ninfo: L{objects.Node}
1685 @param ninfo: the node to check
1686 @param nresult: the remote results for the node
1687 @param instanceinfo: the dict of instances
1688 @param drbd_helper: the configured DRBD usermode helper
1689 @param drbd_map: the DRBD map as returned by
1690 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1694 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1697 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1698 test = (helper_result == None)
1699 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1700 "no drbd usermode helper returned")
1702 status, payload = helper_result
1704 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1705 "drbd usermode helper check unsuccessful: %s", payload)
1706 test = status and (payload != drbd_helper)
1707 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1708 "wrong drbd usermode helper: %s", payload)
1710 # compute the DRBD minors
1712 for minor, instance in drbd_map[node].items():
1713 test = instance not in instanceinfo
1714 _ErrorIf(test, self.ECLUSTERCFG, None,
1715 "ghost instance '%s' in temporary DRBD map", instance)
1716 # ghost instance should not be running, but otherwise we
1717 # don't give double warnings (both ghost instance and
1718 # unallocated minor in use)
1720 node_drbd[minor] = (instance, False)
1722 instance = instanceinfo[instance]
1723 node_drbd[minor] = (instance.name, instance.admin_up)
1725 # and now check them
1726 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1727 test = not isinstance(used_minors, (tuple, list))
1728 _ErrorIf(test, self.ENODEDRBD, node,
1729 "cannot parse drbd status file: %s", str(used_minors))
1731 # we cannot check drbd status
1734 for minor, (iname, must_exist) in node_drbd.items():
1735 test = minor not in used_minors and must_exist
1736 _ErrorIf(test, self.ENODEDRBD, node,
1737 "drbd minor %d of instance %s is not active", minor, iname)
1738 for minor in used_minors:
1739 test = minor not in node_drbd
1740 _ErrorIf(test, self.ENODEDRBD, node,
1741 "unallocated drbd minor %d is in use", minor)
1743 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1744 """Builds the node OS structures.
1746 @type ninfo: L{objects.Node}
1747 @param ninfo: the node to check
1748 @param nresult: the remote results for the node
1749 @param nimg: the node image object
1753 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1755 remote_os = nresult.get(constants.NV_OSLIST, None)
1756 test = (not isinstance(remote_os, list) or
1757 not compat.all(isinstance(v, list) and len(v) == 7
1758 for v in remote_os))
1760 _ErrorIf(test, self.ENODEOS, node,
1761 "node hasn't returned valid OS data")
1770 for (name, os_path, status, diagnose,
1771 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1773 if name not in os_dict:
1776 # parameters is a list of lists instead of list of tuples due to
1777 # JSON lacking a real tuple type, fix it:
1778 parameters = [tuple(v) for v in parameters]
1779 os_dict[name].append((os_path, status, diagnose,
1780 set(variants), set(parameters), set(api_ver)))
1782 nimg.oslist = os_dict
1784 def _VerifyNodeOS(self, ninfo, nimg, base):
1785 """Verifies the node OS list.
1787 @type ninfo: L{objects.Node}
1788 @param ninfo: the node to check
1789 @param nimg: the node image object
1790 @param base: the 'template' node we match against (e.g. from the master)
1794 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1796 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1798 for os_name, os_data in nimg.oslist.items():
1799 assert os_data, "Empty OS status for OS %s?!" % os_name
1800 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1801 _ErrorIf(not f_status, self.ENODEOS, node,
1802 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1803 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1804 "OS '%s' has multiple entries (first one shadows the rest): %s",
1805 os_name, utils.CommaJoin([v[0] for v in os_data]))
1806 # this will catched in backend too
1807 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1808 and not f_var, self.ENODEOS, node,
1809 "OS %s with API at least %d does not declare any variant",
1810 os_name, constants.OS_API_V15)
1811 # comparisons with the 'base' image
1812 test = os_name not in base.oslist
1813 _ErrorIf(test, self.ENODEOS, node,
1814 "Extra OS %s not present on reference node (%s)",
1818 assert base.oslist[os_name], "Base node has empty OS status?"
1819 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1821 # base OS is invalid, skipping
1823 for kind, a, b in [("API version", f_api, b_api),
1824 ("variants list", f_var, b_var),
1825 ("parameters", f_param, b_param)]:
1826 _ErrorIf(a != b, self.ENODEOS, node,
1827 "OS %s %s differs from reference node %s: %s vs. %s",
1828 kind, os_name, base.name,
1829 utils.CommaJoin(a), utils.CommaJoin(b))
1831 # check any missing OSes
1832 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1833 _ErrorIf(missing, self.ENODEOS, node,
1834 "OSes present on reference node %s but missing on this node: %s",
1835 base.name, utils.CommaJoin(missing))
1837 def _VerifyOob(self, ninfo, nresult):
1838 """Verifies out of band functionality of a node.
1840 @type ninfo: L{objects.Node}
1841 @param ninfo: the node to check
1842 @param nresult: the remote results for the node
1846 # We just have to verify the paths on master and/or master candidates
1847 # as the oob helper is invoked on the master
1848 if ((ninfo.master_candidate or ninfo.master_capable) and
1849 constants.NV_OOB_PATHS in nresult):
1850 for path_result in nresult[constants.NV_OOB_PATHS]:
1851 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1853 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1854 """Verifies and updates the node volume data.
1856 This function will update a L{NodeImage}'s internal structures
1857 with data from the remote call.
1859 @type ninfo: L{objects.Node}
1860 @param ninfo: the node to check
1861 @param nresult: the remote results for the node
1862 @param nimg: the node image object
1863 @param vg_name: the configured VG name
1867 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1869 nimg.lvm_fail = True
1870 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1873 elif isinstance(lvdata, basestring):
1874 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1875 utils.SafeEncode(lvdata))
1876 elif not isinstance(lvdata, dict):
1877 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1879 nimg.volumes = lvdata
1880 nimg.lvm_fail = False
1882 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1883 """Verifies and updates the node instance list.
1885 If the listing was successful, then updates this node's instance
1886 list. Otherwise, it marks the RPC call as failed for the instance
1889 @type ninfo: L{objects.Node}
1890 @param ninfo: the node to check
1891 @param nresult: the remote results for the node
1892 @param nimg: the node image object
1895 idata = nresult.get(constants.NV_INSTANCELIST, None)
1896 test = not isinstance(idata, list)
1897 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1898 " (instancelist): %s", utils.SafeEncode(str(idata)))
1900 nimg.hyp_fail = True
1902 nimg.instances = idata
1904 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1905 """Verifies and computes a node information map
1907 @type ninfo: L{objects.Node}
1908 @param ninfo: the node to check
1909 @param nresult: the remote results for the node
1910 @param nimg: the node image object
1911 @param vg_name: the configured VG name
1915 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1917 # try to read free memory (from the hypervisor)
1918 hv_info = nresult.get(constants.NV_HVINFO, None)
1919 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1920 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1923 nimg.mfree = int(hv_info["memory_free"])
1924 except (ValueError, TypeError):
1925 _ErrorIf(True, self.ENODERPC, node,
1926 "node returned invalid nodeinfo, check hypervisor")
1928 # FIXME: devise a free space model for file based instances as well
1929 if vg_name is not None:
1930 test = (constants.NV_VGLIST not in nresult or
1931 vg_name not in nresult[constants.NV_VGLIST])
1932 _ErrorIf(test, self.ENODELVM, node,
1933 "node didn't return data for the volume group '%s'"
1934 " - it is either missing or broken", vg_name)
1937 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1938 except (ValueError, TypeError):
1939 _ErrorIf(True, self.ENODERPC, node,
1940 "node returned invalid LVM info, check LVM status")
1942 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1943 """Gets per-disk status information for all instances.
1945 @type nodelist: list of strings
1946 @param nodelist: Node names
1947 @type node_image: dict of (name, L{objects.Node})
1948 @param node_image: Node objects
1949 @type instanceinfo: dict of (name, L{objects.Instance})
1950 @param instanceinfo: Instance objects
1951 @rtype: {instance: {node: [(succes, payload)]}}
1952 @return: a dictionary of per-instance dictionaries with nodes as
1953 keys and disk information as values; the disk information is a
1954 list of tuples (success, payload)
1957 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1960 node_disks_devonly = {}
1961 diskless_instances = set()
1962 diskless = constants.DT_DISKLESS
1964 for nname in nodelist:
1965 node_instances = list(itertools.chain(node_image[nname].pinst,
1966 node_image[nname].sinst))
1967 diskless_instances.update(inst for inst in node_instances
1968 if instanceinfo[inst].disk_template == diskless)
1969 disks = [(inst, disk)
1970 for inst in node_instances
1971 for disk in instanceinfo[inst].disks]
1974 # No need to collect data
1977 node_disks[nname] = disks
1979 # Creating copies as SetDiskID below will modify the objects and that can
1980 # lead to incorrect data returned from nodes
1981 devonly = [dev.Copy() for (_, dev) in disks]
1984 self.cfg.SetDiskID(dev, nname)
1986 node_disks_devonly[nname] = devonly
1988 assert len(node_disks) == len(node_disks_devonly)
1990 # Collect data from all nodes with disks
1991 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1994 assert len(result) == len(node_disks)
1998 for (nname, nres) in result.items():
1999 disks = node_disks[nname]
2002 # No data from this node
2003 data = len(disks) * [(False, "node offline")]
2006 _ErrorIf(msg, self.ENODERPC, nname,
2007 "while getting disk information: %s", msg)
2009 # No data from this node
2010 data = len(disks) * [(False, msg)]
2013 for idx, i in enumerate(nres.payload):
2014 if isinstance(i, (tuple, list)) and len(i) == 2:
2017 logging.warning("Invalid result from node %s, entry %d: %s",
2019 data.append((False, "Invalid result from the remote node"))
2021 for ((inst, _), status) in zip(disks, data):
2022 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2024 # Add empty entries for diskless instances.
2025 for inst in diskless_instances:
2026 assert inst not in instdisk
2029 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2030 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2031 compat.all(isinstance(s, (tuple, list)) and
2032 len(s) == 2 for s in statuses)
2033 for inst, nnames in instdisk.items()
2034 for nname, statuses in nnames.items())
2035 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2039 def _VerifyHVP(self, hvp_data):
2040 """Verifies locally the syntax of the hypervisor parameters.
2043 for item, hv_name, hv_params in hvp_data:
2044 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2047 hv_class = hypervisor.GetHypervisor(hv_name)
2048 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2049 hv_class.CheckParameterSyntax(hv_params)
2050 except errors.GenericError, err:
2051 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2054 def BuildHooksEnv(self):
2057 Cluster-Verify hooks just ran in the post phase and their failure makes
2058 the output be logged in the verify output and the verification to fail.
2061 all_nodes = self.cfg.GetNodeList()
2063 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2065 for node in self.cfg.GetAllNodesInfo().values():
2066 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2068 return env, [], all_nodes
2070 def Exec(self, feedback_fn):
2071 """Verify integrity of cluster, performing various test on nodes.
2074 # This method has too many local variables. pylint: disable-msg=R0914
2076 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2077 verbose = self.op.verbose
2078 self._feedback_fn = feedback_fn
2079 feedback_fn("* Verifying global settings")
2080 for msg in self.cfg.VerifyConfig():
2081 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2083 # Check the cluster certificates
2084 for cert_filename in constants.ALL_CERT_FILES:
2085 (errcode, msg) = _VerifyCertificate(cert_filename)
2086 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2088 vg_name = self.cfg.GetVGName()
2089 drbd_helper = self.cfg.GetDRBDHelper()
2090 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2091 cluster = self.cfg.GetClusterInfo()
2092 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2093 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2094 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2095 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2096 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2097 for iname in instancelist)
2098 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2099 i_non_redundant = [] # Non redundant instances
2100 i_non_a_balanced = [] # Non auto-balanced instances
2101 n_offline = 0 # Count of offline nodes
2102 n_drained = 0 # Count of nodes being drained
2103 node_vol_should = {}
2105 # FIXME: verify OS list
2106 # do local checksums
2107 master_files = [constants.CLUSTER_CONF_FILE]
2108 master_node = self.master_node = self.cfg.GetMasterNode()
2109 master_ip = self.cfg.GetMasterIP()
2111 file_names = ssconf.SimpleStore().GetFileList()
2112 file_names.extend(constants.ALL_CERT_FILES)
2113 file_names.extend(master_files)
2114 if cluster.modify_etc_hosts:
2115 file_names.append(constants.ETC_HOSTS)
2117 local_checksums = utils.FingerprintFiles(file_names)
2119 # Compute the set of hypervisor parameters
2121 for hv_name in hypervisors:
2122 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2123 for os_name, os_hvp in cluster.os_hvp.items():
2124 for hv_name, hv_params in os_hvp.items():
2127 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2128 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2129 # TODO: collapse identical parameter values in a single one
2130 for instance in instanceinfo.values():
2131 if not instance.hvparams:
2133 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2134 cluster.FillHV(instance)))
2135 # and verify them locally
2136 self._VerifyHVP(hvp_data)
2138 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2139 node_verify_param = {
2140 constants.NV_FILELIST: file_names,
2141 constants.NV_NODELIST: [node.name for node in nodeinfo
2142 if not node.offline],
2143 constants.NV_HYPERVISOR: hypervisors,
2144 constants.NV_HVPARAMS: hvp_data,
2145 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2146 node.secondary_ip) for node in nodeinfo
2147 if not node.offline],
2148 constants.NV_INSTANCELIST: hypervisors,
2149 constants.NV_VERSION: None,
2150 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2151 constants.NV_NODESETUP: None,
2152 constants.NV_TIME: None,
2153 constants.NV_MASTERIP: (master_node, master_ip),
2154 constants.NV_OSLIST: None,
2155 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2158 if vg_name is not None:
2159 node_verify_param[constants.NV_VGLIST] = None
2160 node_verify_param[constants.NV_LVLIST] = vg_name
2161 node_verify_param[constants.NV_PVLIST] = [vg_name]
2162 node_verify_param[constants.NV_DRBDLIST] = None
2165 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2167 # Build our expected cluster state
2168 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2170 vm_capable=node.vm_capable))
2171 for node in nodeinfo)
2175 for node in nodeinfo:
2176 path = _SupportsOob(self.cfg, node)
2177 if path and path not in oob_paths:
2178 oob_paths.append(path)
2181 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2183 for instance in instancelist:
2184 inst_config = instanceinfo[instance]
2186 for nname in inst_config.all_nodes:
2187 if nname not in node_image:
2189 gnode = self.NodeImage(name=nname)
2191 node_image[nname] = gnode
2193 inst_config.MapLVsByNode(node_vol_should)
2195 pnode = inst_config.primary_node
2196 node_image[pnode].pinst.append(instance)
2198 for snode in inst_config.secondary_nodes:
2199 nimg = node_image[snode]
2200 nimg.sinst.append(instance)
2201 if pnode not in nimg.sbp:
2202 nimg.sbp[pnode] = []
2203 nimg.sbp[pnode].append(instance)
2205 # At this point, we have the in-memory data structures complete,
2206 # except for the runtime information, which we'll gather next
2208 # Due to the way our RPC system works, exact response times cannot be
2209 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2210 # time before and after executing the request, we can at least have a time
2212 nvinfo_starttime = time.time()
2213 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2214 self.cfg.GetClusterName())
2215 nvinfo_endtime = time.time()
2217 all_drbd_map = self.cfg.ComputeDRBDMap()
2219 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2220 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2222 feedback_fn("* Verifying node status")
2226 for node_i in nodeinfo:
2228 nimg = node_image[node]
2232 feedback_fn("* Skipping offline node %s" % (node,))
2236 if node == master_node:
2238 elif node_i.master_candidate:
2239 ntype = "master candidate"
2240 elif node_i.drained:
2246 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2248 msg = all_nvinfo[node].fail_msg
2249 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2251 nimg.rpc_fail = True
2254 nresult = all_nvinfo[node].payload
2256 nimg.call_ok = self._VerifyNode(node_i, nresult)
2257 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2258 self._VerifyNodeNetwork(node_i, nresult)
2259 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2262 self._VerifyOob(node_i, nresult)
2265 self._VerifyNodeLVM(node_i, nresult, vg_name)
2266 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2269 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2270 self._UpdateNodeInstances(node_i, nresult, nimg)
2271 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2272 self._UpdateNodeOS(node_i, nresult, nimg)
2273 if not nimg.os_fail:
2274 if refos_img is None:
2276 self._VerifyNodeOS(node_i, nimg, refos_img)
2278 feedback_fn("* Verifying instance status")
2279 for instance in instancelist:
2281 feedback_fn("* Verifying instance %s" % instance)
2282 inst_config = instanceinfo[instance]
2283 self._VerifyInstance(instance, inst_config, node_image,
2285 inst_nodes_offline = []
2287 pnode = inst_config.primary_node
2288 pnode_img = node_image[pnode]
2289 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2290 self.ENODERPC, pnode, "instance %s, connection to"
2291 " primary node failed", instance)
2293 if pnode_img.offline:
2294 inst_nodes_offline.append(pnode)
2296 # If the instance is non-redundant we cannot survive losing its primary
2297 # node, so we are not N+1 compliant. On the other hand we have no disk
2298 # templates with more than one secondary so that situation is not well
2300 # FIXME: does not support file-backed instances
2301 if not inst_config.secondary_nodes:
2302 i_non_redundant.append(instance)
2304 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2305 instance, "instance has multiple secondary nodes: %s",
2306 utils.CommaJoin(inst_config.secondary_nodes),
2307 code=self.ETYPE_WARNING)
2309 if inst_config.disk_template in constants.DTS_NET_MIRROR:
2310 pnode = inst_config.primary_node
2311 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2312 instance_groups = {}
2314 for node in instance_nodes:
2315 instance_groups.setdefault(nodeinfo_byname[node].group,
2319 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2320 # Sort so that we always list the primary node first.
2321 for group, nodes in sorted(instance_groups.items(),
2322 key=lambda (_, nodes): pnode in nodes,
2325 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2326 instance, "instance has primary and secondary nodes in"
2327 " different groups: %s", utils.CommaJoin(pretty_list),
2328 code=self.ETYPE_WARNING)
2330 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2331 i_non_a_balanced.append(instance)
2333 for snode in inst_config.secondary_nodes:
2334 s_img = node_image[snode]
2335 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2336 "instance %s, connection to secondary node failed", instance)
2339 inst_nodes_offline.append(snode)
2341 # warn that the instance lives on offline nodes
2342 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2343 "instance lives on offline node(s) %s",
2344 utils.CommaJoin(inst_nodes_offline))
2345 # ... or ghost/non-vm_capable nodes
2346 for node in inst_config.all_nodes:
2347 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2348 "instance lives on ghost node %s", node)
2349 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2350 instance, "instance lives on non-vm_capable node %s", node)
2352 feedback_fn("* Verifying orphan volumes")
2353 reserved = utils.FieldSet(*cluster.reserved_lvs)
2354 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2356 feedback_fn("* Verifying orphan instances")
2357 self._VerifyOrphanInstances(instancelist, node_image)
2359 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2360 feedback_fn("* Verifying N+1 Memory redundancy")
2361 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2363 feedback_fn("* Other Notes")
2365 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2366 % len(i_non_redundant))
2368 if i_non_a_balanced:
2369 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2370 % len(i_non_a_balanced))
2373 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2376 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2380 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2381 """Analyze the post-hooks' result
2383 This method analyses the hook result, handles it, and sends some
2384 nicely-formatted feedback back to the user.
2386 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2387 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2388 @param hooks_results: the results of the multi-node hooks rpc call
2389 @param feedback_fn: function used send feedback back to the caller
2390 @param lu_result: previous Exec result
2391 @return: the new Exec result, based on the previous result
2395 # We only really run POST phase hooks, and are only interested in
2397 if phase == constants.HOOKS_PHASE_POST:
2398 # Used to change hooks' output to proper indentation
2399 feedback_fn("* Hooks Results")
2400 assert hooks_results, "invalid result from hooks"
2402 for node_name in hooks_results:
2403 res = hooks_results[node_name]
2405 test = msg and not res.offline
2406 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2407 "Communication failure in hooks execution: %s", msg)
2408 if res.offline or msg:
2409 # No need to investigate payload if node is offline or gave an error.
2410 # override manually lu_result here as _ErrorIf only
2411 # overrides self.bad
2414 for script, hkr, output in res.payload:
2415 test = hkr == constants.HKR_FAIL
2416 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2417 "Script %s failed, output:", script)
2419 output = self._HOOKS_INDENT_RE.sub(' ', output)
2420 feedback_fn("%s" % output)
2426 class LUClusterVerifyDisks(NoHooksLU):
2427 """Verifies the cluster disks status.
2432 def ExpandNames(self):
2433 self.needed_locks = {
2434 locking.LEVEL_NODE: locking.ALL_SET,
2435 locking.LEVEL_INSTANCE: locking.ALL_SET,
2437 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2439 def Exec(self, feedback_fn):
2440 """Verify integrity of cluster disks.
2442 @rtype: tuple of three items
2443 @return: a tuple of (dict of node-to-node_error, list of instances
2444 which need activate-disks, dict of instance: (node, volume) for
2448 result = res_nodes, res_instances, res_missing = {}, [], {}
2450 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2451 instances = self.cfg.GetAllInstancesInfo().values()
2454 for inst in instances:
2456 if not inst.admin_up:
2458 inst.MapLVsByNode(inst_lvs)
2459 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2460 for node, vol_list in inst_lvs.iteritems():
2461 for vol in vol_list:
2462 nv_dict[(node, vol)] = inst
2467 node_lvs = self.rpc.call_lv_list(nodes, [])
2468 for node, node_res in node_lvs.items():
2469 if node_res.offline:
2471 msg = node_res.fail_msg
2473 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2474 res_nodes[node] = msg
2477 lvs = node_res.payload
2478 for lv_name, (_, _, lv_online) in lvs.items():
2479 inst = nv_dict.pop((node, lv_name), None)
2480 if (not lv_online and inst is not None
2481 and inst.name not in res_instances):
2482 res_instances.append(inst.name)
2484 # any leftover items in nv_dict are missing LVs, let's arrange the
2486 for key, inst in nv_dict.iteritems():
2487 if inst.name not in res_missing:
2488 res_missing[inst.name] = []
2489 res_missing[inst.name].append(key)
2494 class LUClusterRepairDiskSizes(NoHooksLU):
2495 """Verifies the cluster disks sizes.
2500 def ExpandNames(self):
2501 if self.op.instances:
2502 self.wanted_names = []
2503 for name in self.op.instances:
2504 full_name = _ExpandInstanceName(self.cfg, name)
2505 self.wanted_names.append(full_name)
2506 self.needed_locks = {
2507 locking.LEVEL_NODE: [],
2508 locking.LEVEL_INSTANCE: self.wanted_names,
2510 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2512 self.wanted_names = None
2513 self.needed_locks = {
2514 locking.LEVEL_NODE: locking.ALL_SET,
2515 locking.LEVEL_INSTANCE: locking.ALL_SET,
2517 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2519 def DeclareLocks(self, level):
2520 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2521 self._LockInstancesNodes(primary_only=True)
2523 def CheckPrereq(self):
2524 """Check prerequisites.
2526 This only checks the optional instance list against the existing names.
2529 if self.wanted_names is None:
2530 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2532 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2533 in self.wanted_names]
2535 def _EnsureChildSizes(self, disk):
2536 """Ensure children of the disk have the needed disk size.
2538 This is valid mainly for DRBD8 and fixes an issue where the
2539 children have smaller disk size.
2541 @param disk: an L{ganeti.objects.Disk} object
2544 if disk.dev_type == constants.LD_DRBD8:
2545 assert disk.children, "Empty children for DRBD8?"
2546 fchild = disk.children[0]
2547 mismatch = fchild.size < disk.size
2549 self.LogInfo("Child disk has size %d, parent %d, fixing",
2550 fchild.size, disk.size)
2551 fchild.size = disk.size
2553 # and we recurse on this child only, not on the metadev
2554 return self._EnsureChildSizes(fchild) or mismatch
2558 def Exec(self, feedback_fn):
2559 """Verify the size of cluster disks.
2562 # TODO: check child disks too
2563 # TODO: check differences in size between primary/secondary nodes
2565 for instance in self.wanted_instances:
2566 pnode = instance.primary_node
2567 if pnode not in per_node_disks:
2568 per_node_disks[pnode] = []
2569 for idx, disk in enumerate(instance.disks):
2570 per_node_disks[pnode].append((instance, idx, disk))
2573 for node, dskl in per_node_disks.items():
2574 newl = [v[2].Copy() for v in dskl]
2576 self.cfg.SetDiskID(dsk, node)
2577 result = self.rpc.call_blockdev_getsizes(node, newl)
2579 self.LogWarning("Failure in blockdev_getsizes call to node"
2580 " %s, ignoring", node)
2582 if len(result.data) != len(dskl):
2583 self.LogWarning("Invalid result from node %s, ignoring node results",
2586 for ((instance, idx, disk), size) in zip(dskl, result.data):
2588 self.LogWarning("Disk %d of instance %s did not return size"
2589 " information, ignoring", idx, instance.name)
2591 if not isinstance(size, (int, long)):
2592 self.LogWarning("Disk %d of instance %s did not return valid"
2593 " size information, ignoring", idx, instance.name)
2596 if size != disk.size:
2597 self.LogInfo("Disk %d of instance %s has mismatched size,"
2598 " correcting: recorded %d, actual %d", idx,
2599 instance.name, disk.size, size)
2601 self.cfg.Update(instance, feedback_fn)
2602 changed.append((instance.name, idx, size))
2603 if self._EnsureChildSizes(disk):
2604 self.cfg.Update(instance, feedback_fn)
2605 changed.append((instance.name, idx, disk.size))
2609 class LUClusterRename(LogicalUnit):
2610 """Rename the cluster.
2613 HPATH = "cluster-rename"
2614 HTYPE = constants.HTYPE_CLUSTER
2616 def BuildHooksEnv(self):
2621 "OP_TARGET": self.cfg.GetClusterName(),
2622 "NEW_NAME": self.op.name,
2624 mn = self.cfg.GetMasterNode()
2625 all_nodes = self.cfg.GetNodeList()
2626 return env, [mn], all_nodes
2628 def CheckPrereq(self):
2629 """Verify that the passed name is a valid one.
2632 hostname = netutils.GetHostname(name=self.op.name,
2633 family=self.cfg.GetPrimaryIPFamily())
2635 new_name = hostname.name
2636 self.ip = new_ip = hostname.ip
2637 old_name = self.cfg.GetClusterName()
2638 old_ip = self.cfg.GetMasterIP()
2639 if new_name == old_name and new_ip == old_ip:
2640 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2641 " cluster has changed",
2643 if new_ip != old_ip:
2644 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2645 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2646 " reachable on the network" %
2647 new_ip, errors.ECODE_NOTUNIQUE)
2649 self.op.name = new_name
2651 def Exec(self, feedback_fn):
2652 """Rename the cluster.
2655 clustername = self.op.name
2658 # shutdown the master IP
2659 master = self.cfg.GetMasterNode()
2660 result = self.rpc.call_node_stop_master(master, False)
2661 result.Raise("Could not disable the master role")
2664 cluster = self.cfg.GetClusterInfo()
2665 cluster.cluster_name = clustername
2666 cluster.master_ip = ip
2667 self.cfg.Update(cluster, feedback_fn)
2669 # update the known hosts file
2670 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2671 node_list = self.cfg.GetOnlineNodeList()
2673 node_list.remove(master)
2676 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2678 result = self.rpc.call_node_start_master(master, False, False)
2679 msg = result.fail_msg
2681 self.LogWarning("Could not re-enable the master role on"
2682 " the master, please restart manually: %s", msg)
2687 class LUClusterSetParams(LogicalUnit):
2688 """Change the parameters of the cluster.
2691 HPATH = "cluster-modify"
2692 HTYPE = constants.HTYPE_CLUSTER
2695 def CheckArguments(self):
2699 if self.op.uid_pool:
2700 uidpool.CheckUidPool(self.op.uid_pool)
2702 if self.op.add_uids:
2703 uidpool.CheckUidPool(self.op.add_uids)
2705 if self.op.remove_uids:
2706 uidpool.CheckUidPool(self.op.remove_uids)
2708 def ExpandNames(self):
2709 # FIXME: in the future maybe other cluster params won't require checking on
2710 # all nodes to be modified.
2711 self.needed_locks = {
2712 locking.LEVEL_NODE: locking.ALL_SET,
2714 self.share_locks[locking.LEVEL_NODE] = 1
2716 def BuildHooksEnv(self):
2721 "OP_TARGET": self.cfg.GetClusterName(),
2722 "NEW_VG_NAME": self.op.vg_name,
2724 mn = self.cfg.GetMasterNode()
2725 return env, [mn], [mn]
2727 def CheckPrereq(self):
2728 """Check prerequisites.
2730 This checks whether the given params don't conflict and
2731 if the given volume group is valid.
2734 if self.op.vg_name is not None and not self.op.vg_name:
2735 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2736 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2737 " instances exist", errors.ECODE_INVAL)
2739 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2740 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2741 raise errors.OpPrereqError("Cannot disable drbd helper while"
2742 " drbd-based instances exist",
2745 node_list = self.acquired_locks[locking.LEVEL_NODE]
2747 # if vg_name not None, checks given volume group on all nodes
2749 vglist = self.rpc.call_vg_list(node_list)
2750 for node in node_list:
2751 msg = vglist[node].fail_msg
2753 # ignoring down node
2754 self.LogWarning("Error while gathering data on node %s"
2755 " (ignoring node): %s", node, msg)
2757 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2759 constants.MIN_VG_SIZE)
2761 raise errors.OpPrereqError("Error on node '%s': %s" %
2762 (node, vgstatus), errors.ECODE_ENVIRON)
2764 if self.op.drbd_helper:
2765 # checks given drbd helper on all nodes
2766 helpers = self.rpc.call_drbd_helper(node_list)
2767 for node in node_list:
2768 ninfo = self.cfg.GetNodeInfo(node)
2770 self.LogInfo("Not checking drbd helper on offline node %s", node)
2772 msg = helpers[node].fail_msg
2774 raise errors.OpPrereqError("Error checking drbd helper on node"
2775 " '%s': %s" % (node, msg),
2776 errors.ECODE_ENVIRON)
2777 node_helper = helpers[node].payload
2778 if node_helper != self.op.drbd_helper:
2779 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2780 (node, node_helper), errors.ECODE_ENVIRON)
2782 self.cluster = cluster = self.cfg.GetClusterInfo()
2783 # validate params changes
2784 if self.op.beparams:
2785 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2786 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2788 if self.op.ndparams:
2789 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2790 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2792 if self.op.nicparams:
2793 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2794 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2795 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2798 # check all instances for consistency
2799 for instance in self.cfg.GetAllInstancesInfo().values():
2800 for nic_idx, nic in enumerate(instance.nics):
2801 params_copy = copy.deepcopy(nic.nicparams)
2802 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2804 # check parameter syntax
2806 objects.NIC.CheckParameterSyntax(params_filled)
2807 except errors.ConfigurationError, err:
2808 nic_errors.append("Instance %s, nic/%d: %s" %
2809 (instance.name, nic_idx, err))
2811 # if we're moving instances to routed, check that they have an ip
2812 target_mode = params_filled[constants.NIC_MODE]
2813 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2814 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2815 (instance.name, nic_idx))
2817 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2818 "\n".join(nic_errors))
2820 # hypervisor list/parameters
2821 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2822 if self.op.hvparams:
2823 for hv_name, hv_dict in self.op.hvparams.items():
2824 if hv_name not in self.new_hvparams:
2825 self.new_hvparams[hv_name] = hv_dict
2827 self.new_hvparams[hv_name].update(hv_dict)
2829 # os hypervisor parameters
2830 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2832 for os_name, hvs in self.op.os_hvp.items():
2833 if os_name not in self.new_os_hvp:
2834 self.new_os_hvp[os_name] = hvs
2836 for hv_name, hv_dict in hvs.items():
2837 if hv_name not in self.new_os_hvp[os_name]:
2838 self.new_os_hvp[os_name][hv_name] = hv_dict
2840 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2843 self.new_osp = objects.FillDict(cluster.osparams, {})
2844 if self.op.osparams:
2845 for os_name, osp in self.op.osparams.items():
2846 if os_name not in self.new_osp:
2847 self.new_osp[os_name] = {}
2849 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2852 if not self.new_osp[os_name]:
2853 # we removed all parameters
2854 del self.new_osp[os_name]
2856 # check the parameter validity (remote check)
2857 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2858 os_name, self.new_osp[os_name])
2860 # changes to the hypervisor list
2861 if self.op.enabled_hypervisors is not None:
2862 self.hv_list = self.op.enabled_hypervisors
2863 for hv in self.hv_list:
2864 # if the hypervisor doesn't already exist in the cluster
2865 # hvparams, we initialize it to empty, and then (in both
2866 # cases) we make sure to fill the defaults, as we might not
2867 # have a complete defaults list if the hypervisor wasn't
2869 if hv not in new_hvp:
2871 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2872 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2874 self.hv_list = cluster.enabled_hypervisors
2876 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2877 # either the enabled list has changed, or the parameters have, validate
2878 for hv_name, hv_params in self.new_hvparams.items():
2879 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2880 (self.op.enabled_hypervisors and
2881 hv_name in self.op.enabled_hypervisors)):
2882 # either this is a new hypervisor, or its parameters have changed
2883 hv_class = hypervisor.GetHypervisor(hv_name)
2884 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2885 hv_class.CheckParameterSyntax(hv_params)
2886 _CheckHVParams(self, node_list, hv_name, hv_params)
2889 # no need to check any newly-enabled hypervisors, since the
2890 # defaults have already been checked in the above code-block
2891 for os_name, os_hvp in self.new_os_hvp.items():
2892 for hv_name, hv_params in os_hvp.items():
2893 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2894 # we need to fill in the new os_hvp on top of the actual hv_p
2895 cluster_defaults = self.new_hvparams.get(hv_name, {})
2896 new_osp = objects.FillDict(cluster_defaults, hv_params)
2897 hv_class = hypervisor.GetHypervisor(hv_name)
2898 hv_class.CheckParameterSyntax(new_osp)
2899 _CheckHVParams(self, node_list, hv_name, new_osp)
2901 if self.op.default_iallocator:
2902 alloc_script = utils.FindFile(self.op.default_iallocator,
2903 constants.IALLOCATOR_SEARCH_PATH,
2905 if alloc_script is None:
2906 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2907 " specified" % self.op.default_iallocator,
2910 def Exec(self, feedback_fn):
2911 """Change the parameters of the cluster.
2914 if self.op.vg_name is not None:
2915 new_volume = self.op.vg_name
2918 if new_volume != self.cfg.GetVGName():
2919 self.cfg.SetVGName(new_volume)
2921 feedback_fn("Cluster LVM configuration already in desired"
2922 " state, not changing")
2923 if self.op.drbd_helper is not None:
2924 new_helper = self.op.drbd_helper
2927 if new_helper != self.cfg.GetDRBDHelper():
2928 self.cfg.SetDRBDHelper(new_helper)
2930 feedback_fn("Cluster DRBD helper already in desired state,"
2932 if self.op.hvparams:
2933 self.cluster.hvparams = self.new_hvparams
2935 self.cluster.os_hvp = self.new_os_hvp
2936 if self.op.enabled_hypervisors is not None:
2937 self.cluster.hvparams = self.new_hvparams
2938 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2939 if self.op.beparams:
2940 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2941 if self.op.nicparams:
2942 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2943 if self.op.osparams:
2944 self.cluster.osparams = self.new_osp
2945 if self.op.ndparams:
2946 self.cluster.ndparams = self.new_ndparams
2948 if self.op.candidate_pool_size is not None:
2949 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2950 # we need to update the pool size here, otherwise the save will fail
2951 _AdjustCandidatePool(self, [])
2953 if self.op.maintain_node_health is not None:
2954 self.cluster.maintain_node_health = self.op.maintain_node_health
2956 if self.op.prealloc_wipe_disks is not None:
2957 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2959 if self.op.add_uids is not None:
2960 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2962 if self.op.remove_uids is not None:
2963 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2965 if self.op.uid_pool is not None:
2966 self.cluster.uid_pool = self.op.uid_pool
2968 if self.op.default_iallocator is not None:
2969 self.cluster.default_iallocator = self.op.default_iallocator
2971 if self.op.reserved_lvs is not None:
2972 self.cluster.reserved_lvs = self.op.reserved_lvs
2974 def helper_os(aname, mods, desc):
2976 lst = getattr(self.cluster, aname)
2977 for key, val in mods:
2978 if key == constants.DDM_ADD:
2980 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2983 elif key == constants.DDM_REMOVE:
2987 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2989 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2991 if self.op.hidden_os:
2992 helper_os("hidden_os", self.op.hidden_os, "hidden")
2994 if self.op.blacklisted_os:
2995 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2997 if self.op.master_netdev:
2998 master = self.cfg.GetMasterNode()
2999 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3000 self.cluster.master_netdev)
3001 result = self.rpc.call_node_stop_master(master, False)
3002 result.Raise("Could not disable the master ip")
3003 feedback_fn("Changing master_netdev from %s to %s" %
3004 (self.cluster.master_netdev, self.op.master_netdev))
3005 self.cluster.master_netdev = self.op.master_netdev
3007 self.cfg.Update(self.cluster, feedback_fn)
3009 if self.op.master_netdev:
3010 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3011 self.op.master_netdev)
3012 result = self.rpc.call_node_start_master(master, False, False)
3014 self.LogWarning("Could not re-enable the master ip on"
3015 " the master, please restart manually: %s",
3019 def _UploadHelper(lu, nodes, fname):
3020 """Helper for uploading a file and showing warnings.
3023 if os.path.exists(fname):
3024 result = lu.rpc.call_upload_file(nodes, fname)
3025 for to_node, to_result in result.items():
3026 msg = to_result.fail_msg
3028 msg = ("Copy of file %s to node %s failed: %s" %
3029 (fname, to_node, msg))
3030 lu.proc.LogWarning(msg)
3033 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3034 """Distribute additional files which are part of the cluster configuration.
3036 ConfigWriter takes care of distributing the config and ssconf files, but
3037 there are more files which should be distributed to all nodes. This function
3038 makes sure those are copied.
3040 @param lu: calling logical unit
3041 @param additional_nodes: list of nodes not in the config to distribute to
3042 @type additional_vm: boolean
3043 @param additional_vm: whether the additional nodes are vm-capable or not
3046 # 1. Gather target nodes
3047 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3048 dist_nodes = lu.cfg.GetOnlineNodeList()
3049 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3050 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3051 if additional_nodes is not None:
3052 dist_nodes.extend(additional_nodes)
3054 vm_nodes.extend(additional_nodes)
3055 if myself.name in dist_nodes:
3056 dist_nodes.remove(myself.name)
3057 if myself.name in vm_nodes:
3058 vm_nodes.remove(myself.name)
3060 # 2. Gather files to distribute
3061 dist_files = set([constants.ETC_HOSTS,
3062 constants.SSH_KNOWN_HOSTS_FILE,
3063 constants.RAPI_CERT_FILE,
3064 constants.RAPI_USERS_FILE,
3065 constants.CONFD_HMAC_KEY,
3066 constants.CLUSTER_DOMAIN_SECRET_FILE,
3070 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3071 for hv_name in enabled_hypervisors:
3072 hv_class = hypervisor.GetHypervisor(hv_name)
3073 vm_files.update(hv_class.GetAncillaryFiles())
3075 # 3. Perform the files upload
3076 for fname in dist_files:
3077 _UploadHelper(lu, dist_nodes, fname)
3078 for fname in vm_files:
3079 _UploadHelper(lu, vm_nodes, fname)
3082 class LUClusterRedistConf(NoHooksLU):
3083 """Force the redistribution of cluster configuration.
3085 This is a very simple LU.
3090 def ExpandNames(self):
3091 self.needed_locks = {
3092 locking.LEVEL_NODE: locking.ALL_SET,
3094 self.share_locks[locking.LEVEL_NODE] = 1
3096 def Exec(self, feedback_fn):
3097 """Redistribute the configuration.
3100 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3101 _RedistributeAncillaryFiles(self)
3104 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3105 """Sleep and poll for an instance's disk to sync.
3108 if not instance.disks or disks is not None and not disks:
3111 disks = _ExpandCheckDisks(instance, disks)
3114 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3116 node = instance.primary_node
3119 lu.cfg.SetDiskID(dev, node)
3121 # TODO: Convert to utils.Retry
3124 degr_retries = 10 # in seconds, as we sleep 1 second each time
3128 cumul_degraded = False
3129 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3130 msg = rstats.fail_msg
3132 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3135 raise errors.RemoteError("Can't contact node %s for mirror data,"
3136 " aborting." % node)
3139 rstats = rstats.payload
3141 for i, mstat in enumerate(rstats):
3143 lu.LogWarning("Can't compute data for node %s/%s",
3144 node, disks[i].iv_name)
3147 cumul_degraded = (cumul_degraded or
3148 (mstat.is_degraded and mstat.sync_percent is None))
3149 if mstat.sync_percent is not None:
3151 if mstat.estimated_time is not None:
3152 rem_time = ("%s remaining (estimated)" %
3153 utils.FormatSeconds(mstat.estimated_time))
3154 max_time = mstat.estimated_time
3156 rem_time = "no time estimate"
3157 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3158 (disks[i].iv_name, mstat.sync_percent, rem_time))
3160 # if we're done but degraded, let's do a few small retries, to
3161 # make sure we see a stable and not transient situation; therefore
3162 # we force restart of the loop
3163 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3164 logging.info("Degraded disks found, %d retries left", degr_retries)
3172 time.sleep(min(60, max_time))
3175 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3176 return not cumul_degraded
3179 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3180 """Check that mirrors are not degraded.
3182 The ldisk parameter, if True, will change the test from the
3183 is_degraded attribute (which represents overall non-ok status for
3184 the device(s)) to the ldisk (representing the local storage status).
3187 lu.cfg.SetDiskID(dev, node)
3191 if on_primary or dev.AssembleOnSecondary():
3192 rstats = lu.rpc.call_blockdev_find(node, dev)
3193 msg = rstats.fail_msg
3195 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3197 elif not rstats.payload:
3198 lu.LogWarning("Can't find disk on node %s", node)
3202 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3204 result = result and not rstats.payload.is_degraded
3207 for child in dev.children:
3208 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3213 class LUOobCommand(NoHooksLU):
3214 """Logical unit for OOB handling.
3219 def CheckPrereq(self):
3220 """Check prerequisites.
3223 - the node exists in the configuration
3226 Any errors are signaled by raising errors.OpPrereqError.
3230 for node_name in self.op.node_names:
3231 node = self.cfg.GetNodeInfo(node_name)
3234 raise errors.OpPrereqError("Node %s not found" % node_name,
3237 self.nodes.append(node)
3239 if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3240 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3241 " not marked offline") % node_name,
3244 def ExpandNames(self):
3245 """Gather locks we need.
3248 if self.op.node_names:
3249 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3250 for name in self.op.node_names]
3252 self.op.node_names = self.cfg.GetNodeList()
3254 self.needed_locks = {
3255 locking.LEVEL_NODE: self.op.node_names,
3258 def Exec(self, feedback_fn):
3259 """Execute OOB and return result if we expect any.
3262 master_node = self.cfg.GetMasterNode()
3265 for node in self.nodes:
3266 node_entry = [(constants.RS_NORMAL, node.name)]
3267 ret.append(node_entry)
3269 oob_program = _SupportsOob(self.cfg, node)
3272 node_entry.append((constants.RS_UNAVAIL, None))
3275 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3276 self.op.command, oob_program, node.name)
3277 result = self.rpc.call_run_oob(master_node, oob_program,
3278 self.op.command, node.name,
3282 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3283 node.name, result.fail_msg)
3284 node_entry.append((constants.RS_NODATA, None))
3287 self._CheckPayload(result)
3288 except errors.OpExecError, err:
3289 self.LogWarning("The payload returned by '%s' is not valid: %s",
3291 node_entry.append((constants.RS_NODATA, None))
3293 if self.op.command == constants.OOB_HEALTH:
3294 # For health we should log important events
3295 for item, status in result.payload:
3296 if status in [constants.OOB_STATUS_WARNING,
3297 constants.OOB_STATUS_CRITICAL]:
3298 self.LogWarning("On node '%s' item '%s' has status '%s'",
3299 node.name, item, status)
3301 if self.op.command == constants.OOB_POWER_ON:
3303 elif self.op.command == constants.OOB_POWER_OFF:
3304 node.powered = False
3305 elif self.op.command == constants.OOB_POWER_STATUS:
3306 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3307 if powered != node.powered:
3308 logging.warning(("Recorded power state (%s) of node '%s' does not"
3309 " match actual power state (%s)"), node.powered,
3312 # For configuration changing commands we should update the node
3313 if self.op.command in (constants.OOB_POWER_ON,
3314 constants.OOB_POWER_OFF):
3315 self.cfg.Update(node, feedback_fn)
3317 node_entry.append((constants.RS_NORMAL, result.payload))
3321 def _CheckPayload(self, result):
3322 """Checks if the payload is valid.
3324 @param result: RPC result
3325 @raises errors.OpExecError: If payload is not valid
3329 if self.op.command == constants.OOB_HEALTH:
3330 if not isinstance(result.payload, list):
3331 errs.append("command 'health' is expected to return a list but got %s" %
3332 type(result.payload))
3334 for item, status in result.payload:
3335 if status not in constants.OOB_STATUSES:
3336 errs.append("health item '%s' has invalid status '%s'" %
3339 if self.op.command == constants.OOB_POWER_STATUS:
3340 if not isinstance(result.payload, dict):
3341 errs.append("power-status is expected to return a dict but got %s" %
3342 type(result.payload))
3344 if self.op.command in [
3345 constants.OOB_POWER_ON,
3346 constants.OOB_POWER_OFF,
3347 constants.OOB_POWER_CYCLE,
3349 if result.payload is not None:
3350 errs.append("%s is expected to not return payload but got '%s'" %
3351 (self.op.command, result.payload))
3354 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3355 utils.CommaJoin(errs))
3359 class LUOsDiagnose(NoHooksLU):
3360 """Logical unit for OS diagnose/query.
3365 _BLK = "blacklisted"
3367 _FIELDS_STATIC = utils.FieldSet()
3368 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3369 "parameters", "api_versions", _HID, _BLK)
3371 def CheckArguments(self):
3373 raise errors.OpPrereqError("Selective OS query not supported",
3376 _CheckOutputFields(static=self._FIELDS_STATIC,
3377 dynamic=self._FIELDS_DYNAMIC,
3378 selected=self.op.output_fields)
3380 def ExpandNames(self):
3381 # Lock all nodes, in shared mode
3382 # Temporary removal of locks, should be reverted later
3383 # TODO: reintroduce locks when they are lighter-weight
3384 self.needed_locks = {}
3385 #self.share_locks[locking.LEVEL_NODE] = 1
3386 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3389 def _DiagnoseByOS(rlist):
3390 """Remaps a per-node return list into an a per-os per-node dictionary
3392 @param rlist: a map with node names as keys and OS objects as values
3395 @return: a dictionary with osnames as keys and as value another
3396 map, with nodes as keys and tuples of (path, status, diagnose,
3397 variants, parameters, api_versions) as values, eg::
3399 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3400 (/srv/..., False, "invalid api")],
3401 "node2": [(/srv/..., True, "", [], [])]}
3406 # we build here the list of nodes that didn't fail the RPC (at RPC
3407 # level), so that nodes with a non-responding node daemon don't
3408 # make all OSes invalid
3409 good_nodes = [node_name for node_name in rlist
3410 if not rlist[node_name].fail_msg]
3411 for node_name, nr in rlist.items():
3412 if nr.fail_msg or not nr.payload:
3414 for (name, path, status, diagnose, variants,
3415 params, api_versions) in nr.payload:
3416 if name not in all_os:
3417 # build a list of nodes for this os containing empty lists
3418 # for each node in node_list
3420 for nname in good_nodes:
3421 all_os[name][nname] = []
3422 # convert params from [name, help] to (name, help)
3423 params = [tuple(v) for v in params]
3424 all_os[name][node_name].append((path, status, diagnose,
3425 variants, params, api_versions))
3428 def Exec(self, feedback_fn):
3429 """Compute the list of OSes.
3432 valid_nodes = [node.name
3433 for node in self.cfg.GetAllNodesInfo().values()
3434 if not node.offline and node.vm_capable]
3435 node_data = self.rpc.call_os_diagnose(valid_nodes)
3436 pol = self._DiagnoseByOS(node_data)
3438 cluster = self.cfg.GetClusterInfo()
3440 for os_name in utils.NiceSort(pol.keys()):
3441 os_data = pol[os_name]
3444 (variants, params, api_versions) = null_state = (set(), set(), set())
3445 for idx, osl in enumerate(os_data.values()):
3446 valid = bool(valid and osl and osl[0][1])
3448 (variants, params, api_versions) = null_state
3450 node_variants, node_params, node_api = osl[0][3:6]
3451 if idx == 0: # first entry
3452 variants = set(node_variants)
3453 params = set(node_params)
3454 api_versions = set(node_api)
3455 else: # keep consistency
3456 variants.intersection_update(node_variants)
3457 params.intersection_update(node_params)
3458 api_versions.intersection_update(node_api)
3460 is_hid = os_name in cluster.hidden_os
3461 is_blk = os_name in cluster.blacklisted_os
3462 if ((self._HID not in self.op.output_fields and is_hid) or
3463 (self._BLK not in self.op.output_fields and is_blk) or
3464 (self._VLD not in self.op.output_fields and not valid)):
3467 for field in self.op.output_fields:
3470 elif field == self._VLD:
3472 elif field == "node_status":
3473 # this is just a copy of the dict
3475 for node_name, nos_list in os_data.items():
3476 val[node_name] = nos_list
3477 elif field == "variants":
3478 val = utils.NiceSort(list(variants))
3479 elif field == "parameters":
3481 elif field == "api_versions":
3482 val = list(api_versions)
3483 elif field == self._HID:
3485 elif field == self._BLK:
3488 raise errors.ParameterError(field)
3495 class LUNodeRemove(LogicalUnit):
3496 """Logical unit for removing a node.
3499 HPATH = "node-remove"
3500 HTYPE = constants.HTYPE_NODE
3502 def BuildHooksEnv(self):
3505 This doesn't run on the target node in the pre phase as a failed
3506 node would then be impossible to remove.
3510 "OP_TARGET": self.op.node_name,
3511 "NODE_NAME": self.op.node_name,
3513 all_nodes = self.cfg.GetNodeList()
3515 all_nodes.remove(self.op.node_name)
3517 logging.warning("Node %s which is about to be removed not found"
3518 " in the all nodes list", self.op.node_name)
3519 return env, all_nodes, all_nodes
3521 def CheckPrereq(self):
3522 """Check prerequisites.
3525 - the node exists in the configuration
3526 - it does not have primary or secondary instances
3527 - it's not the master
3529 Any errors are signaled by raising errors.OpPrereqError.
3532 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3533 node = self.cfg.GetNodeInfo(self.op.node_name)
3534 assert node is not None
3536 instance_list = self.cfg.GetInstanceList()
3538 masternode = self.cfg.GetMasterNode()
3539 if node.name == masternode:
3540 raise errors.OpPrereqError("Node is the master node,"
3541 " you need to failover first.",
3544 for instance_name in instance_list:
3545 instance = self.cfg.GetInstanceInfo(instance_name)
3546 if node.name in instance.all_nodes:
3547 raise errors.OpPrereqError("Instance %s is still running on the node,"
3548 " please remove first." % instance_name,
3550 self.op.node_name = node.name
3553 def Exec(self, feedback_fn):
3554 """Removes the node from the cluster.
3558 logging.info("Stopping the node daemon and removing configs from node %s",
3561 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3563 # Promote nodes to master candidate as needed
3564 _AdjustCandidatePool(self, exceptions=[node.name])
3565 self.context.RemoveNode(node.name)
3567 # Run post hooks on the node before it's removed
3568 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3570 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3572 # pylint: disable-msg=W0702
3573 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3575 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3576 msg = result.fail_msg
3578 self.LogWarning("Errors encountered on the remote node while leaving"
3579 " the cluster: %s", msg)
3581 # Remove node from our /etc/hosts
3582 if self.cfg.GetClusterInfo().modify_etc_hosts:
3583 master_node = self.cfg.GetMasterNode()
3584 result = self.rpc.call_etc_hosts_modify(master_node,
3585 constants.ETC_HOSTS_REMOVE,
3587 result.Raise("Can't update hosts file with new host data")
3588 _RedistributeAncillaryFiles(self)
3591 class _NodeQuery(_QueryBase):
3592 FIELDS = query.NODE_FIELDS
3594 def ExpandNames(self, lu):
3595 lu.needed_locks = {}
3596 lu.share_locks[locking.LEVEL_NODE] = 1
3599 self.wanted = _GetWantedNodes(lu, self.names)
3601 self.wanted = locking.ALL_SET
3603 self.do_locking = (self.use_locking and
3604 query.NQ_LIVE in self.requested_data)
3607 # if we don't request only static fields, we need to lock the nodes
3608 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3610 def DeclareLocks(self, lu, level):
3613 def _GetQueryData(self, lu):
3614 """Computes the list of nodes and their attributes.
3617 all_info = lu.cfg.GetAllNodesInfo()
3619 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3621 # Gather data as requested
3622 if query.NQ_LIVE in self.requested_data:
3623 node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3624 lu.cfg.GetHypervisorType())
3625 live_data = dict((name, nresult.payload)
3626 for (name, nresult) in node_data.items()
3627 if not nresult.fail_msg and nresult.payload)
3631 if query.NQ_INST in self.requested_data:
3632 node_to_primary = dict([(name, set()) for name in nodenames])
3633 node_to_secondary = dict([(name, set()) for name in nodenames])
3635 inst_data = lu.cfg.GetAllInstancesInfo()
3637 for inst in inst_data.values():
3638 if inst.primary_node in node_to_primary:
3639 node_to_primary[inst.primary_node].add(inst.name)
3640 for secnode in inst.secondary_nodes:
3641 if secnode in node_to_secondary:
3642 node_to_secondary[secnode].add(inst.name)
3644 node_to_primary = None
3645 node_to_secondary = None
3647 if query.NQ_OOB in self.requested_data:
3648 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3649 for name, node in all_info.iteritems())
3653 if query.NQ_GROUP in self.requested_data:
3654 groups = lu.cfg.GetAllNodeGroupsInfo()
3658 return query.NodeQueryData([all_info[name] for name in nodenames],
3659 live_data, lu.cfg.GetMasterNode(),
3660 node_to_primary, node_to_secondary, groups,
3661 oob_support, lu.cfg.GetClusterInfo())
3664 class LUNodeQuery(NoHooksLU):
3665 """Logical unit for querying nodes.
3668 # pylint: disable-msg=W0142
3671 def CheckArguments(self):
3672 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3673 self.op.use_locking)
3675 def ExpandNames(self):
3676 self.nq.ExpandNames(self)
3678 def Exec(self, feedback_fn):
3679 return self.nq.OldStyleQuery(self)
3682 class LUNodeQueryvols(NoHooksLU):
3683 """Logical unit for getting volumes on node(s).
3687 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3688 _FIELDS_STATIC = utils.FieldSet("node")
3690 def CheckArguments(self):
3691 _CheckOutputFields(static=self._FIELDS_STATIC,
3692 dynamic=self._FIELDS_DYNAMIC,
3693 selected=self.op.output_fields)
3695 def ExpandNames(self):
3696 self.needed_locks = {}
3697 self.share_locks[locking.LEVEL_NODE] = 1
3698 if not self.op.nodes:
3699 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3701 self.needed_locks[locking.LEVEL_NODE] = \
3702 _GetWantedNodes(self, self.op.nodes)
3704 def Exec(self, feedback_fn):
3705 """Computes the list of nodes and their attributes.
3708 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3709 volumes = self.rpc.call_node_volumes(nodenames)
3711 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3712 in self.cfg.GetInstanceList()]
3714 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3717 for node in nodenames:
3718 nresult = volumes[node]
3721 msg = nresult.fail_msg
3723 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3726 node_vols = nresult.payload[:]
3727 node_vols.sort(key=lambda vol: vol['dev'])
3729 for vol in node_vols:
3731 for field in self.op.output_fields:
3734 elif field == "phys":
3738 elif field == "name":
3740 elif field == "size":
3741 val = int(float(vol['size']))
3742 elif field == "instance":
3744 if node not in lv_by_node[inst]:
3746 if vol['name'] in lv_by_node[inst][node]:
3752 raise errors.ParameterError(field)
3753 node_output.append(str(val))
3755 output.append(node_output)
3760 class LUNodeQueryStorage(NoHooksLU):
3761 """Logical unit for getting information on storage units on node(s).
3764 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3767 def CheckArguments(self):
3768 _CheckOutputFields(static=self._FIELDS_STATIC,
3769 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3770 selected=self.op.output_fields)
3772 def ExpandNames(self):
3773 self.needed_locks = {}
3774 self.share_locks[locking.LEVEL_NODE] = 1
3777 self.needed_locks[locking.LEVEL_NODE] = \
3778 _GetWantedNodes(self, self.op.nodes)
3780 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3782 def Exec(self, feedback_fn):
3783 """Computes the list of nodes and their attributes.
3786 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3788 # Always get name to sort by
3789 if constants.SF_NAME in self.op.output_fields:
3790 fields = self.op.output_fields[:]
3792 fields = [constants.SF_NAME] + self.op.output_fields
3794 # Never ask for node or type as it's only known to the LU
3795 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3796 while extra in fields:
3797 fields.remove(extra)
3799 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3800 name_idx = field_idx[constants.SF_NAME]
3802 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3803 data = self.rpc.call_storage_list(self.nodes,
3804 self.op.storage_type, st_args,
3805 self.op.name, fields)
3809 for node in utils.NiceSort(self.nodes):
3810 nresult = data[node]
3814 msg = nresult.fail_msg
3816 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3819 rows = dict([(row[name_idx], row) for row in nresult.payload])
3821 for name in utils.NiceSort(rows.keys()):
3826 for field in self.op.output_fields:
3827 if field == constants.SF_NODE:
3829 elif field == constants.SF_TYPE:
3830 val = self.op.storage_type
3831 elif field in field_idx:
3832 val = row[field_idx[field]]
3834 raise errors.ParameterError(field)
3843 class _InstanceQuery(_QueryBase):
3844 FIELDS = query.INSTANCE_FIELDS
3846 def ExpandNames(self, lu):
3847 lu.needed_locks = {}
3848 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3849 lu.share_locks[locking.LEVEL_NODE] = 1
3852 self.wanted = _GetWantedInstances(lu, self.names)
3854 self.wanted = locking.ALL_SET
3856 self.do_locking = (self.use_locking and
3857 query.IQ_LIVE in self.requested_data)
3859 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3860 lu.needed_locks[locking.LEVEL_NODE] = []
3861 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3863 def DeclareLocks(self, lu, level):
3864 if level == locking.LEVEL_NODE and self.do_locking:
3865 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3867 def _GetQueryData(self, lu):
3868 """Computes the list of instances and their attributes.
3871 cluster = lu.cfg.GetClusterInfo()
3872 all_info = lu.cfg.GetAllInstancesInfo()
3874 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3876 instance_list = [all_info[name] for name in instance_names]
3877 nodes = frozenset(itertools.chain(*(inst.all_nodes
3878 for inst in instance_list)))
3879 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3882 wrongnode_inst = set()
3884 # Gather data as requested
3885 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3887 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3889 result = node_data[name]
3891 # offline nodes will be in both lists
3892 assert result.fail_msg
3893 offline_nodes.append(name)
3895 bad_nodes.append(name)
3896 elif result.payload:
3897 for inst in result.payload:
3898 if all_info[inst].primary_node == name:
3899 live_data.update(result.payload)
3901 wrongnode_inst.add(inst)
3902 # else no instance is alive
3906 if query.IQ_DISKUSAGE in self.requested_data:
3907 disk_usage = dict((inst.name,
3908 _ComputeDiskSize(inst.disk_template,
3909 [{"size": disk.size}
3910 for disk in inst.disks]))
3911 for inst in instance_list)
3915 if query.IQ_CONSOLE in self.requested_data:
3917 for inst in instance_list:
3918 if inst.name in live_data:
3919 # Instance is running
3920 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3922 consinfo[inst.name] = None
3923 assert set(consinfo.keys()) == set(instance_names)
3927 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3928 disk_usage, offline_nodes, bad_nodes,
3929 live_data, wrongnode_inst, consinfo)
3932 class LUQuery(NoHooksLU):
3933 """Query for resources/items of a certain kind.
3936 # pylint: disable-msg=W0142
3939 def CheckArguments(self):
3940 qcls = _GetQueryImplementation(self.op.what)
3941 names = qlang.ReadSimpleFilter("name", self.op.filter)
3943 self.impl = qcls(names, self.op.fields, False)
3945 def ExpandNames(self):
3946 self.impl.ExpandNames(self)
3948 def DeclareLocks(self, level):
3949 self.impl.DeclareLocks(self, level)
3951 def Exec(self, feedback_fn):
3952 return self.impl.NewStyleQuery(self)
3955 class LUQueryFields(NoHooksLU):
3956 """Query for resources/items of a certain kind.
3959 # pylint: disable-msg=W0142
3962 def CheckArguments(self):
3963 self.qcls = _GetQueryImplementation(self.op.what)
3965 def ExpandNames(self):
3966 self.needed_locks = {}
3968 def Exec(self, feedback_fn):
3969 return self.qcls.FieldsQuery(self.op.fields)
3972 class LUNodeModifyStorage(NoHooksLU):
3973 """Logical unit for modifying a storage volume on a node.
3978 def CheckArguments(self):
3979 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3981 storage_type = self.op.storage_type
3984 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3986 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3987 " modified" % storage_type,
3990 diff = set(self.op.changes.keys()) - modifiable
3992 raise errors.OpPrereqError("The following fields can not be modified for"
3993 " storage units of type '%s': %r" %
3994 (storage_type, list(diff)),
3997 def ExpandNames(self):
3998 self.needed_locks = {
3999 locking.LEVEL_NODE: self.op.node_name,
4002 def Exec(self, feedback_fn):
4003 """Computes the list of nodes and their attributes.
4006 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4007 result = self.rpc.call_storage_modify(self.op.node_name,
4008 self.op.storage_type, st_args,
4009 self.op.name, self.op.changes)
4010 result.Raise("Failed to modify storage unit '%s' on %s" %
4011 (self.op.name, self.op.node_name))
4014 class LUNodeAdd(LogicalUnit):
4015 """Logical unit for adding node to the cluster.
4019 HTYPE = constants.HTYPE_NODE
4020 _NFLAGS = ["master_capable", "vm_capable"]
4022 def CheckArguments(self):
4023 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4024 # validate/normalize the node name
4025 self.hostname = netutils.GetHostname(name=self.op.node_name,
4026 family=self.primary_ip_family)
4027 self.op.node_name = self.hostname.name
4028 if self.op.readd and self.op.group:
4029 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4030 " being readded", errors.ECODE_INVAL)
4032 def BuildHooksEnv(self):
4035 This will run on all nodes before, and on all nodes + the new node after.
4039 "OP_TARGET": self.op.node_name,
4040 "NODE_NAME": self.op.node_name,
4041 "NODE_PIP": self.op.primary_ip,
4042 "NODE_SIP": self.op.secondary_ip,
4043 "MASTER_CAPABLE": str(self.op.master_capable),
4044 "VM_CAPABLE": str(self.op.vm_capable),
4046 nodes_0 = self.cfg.GetNodeList()
4047 nodes_1 = nodes_0 + [self.op.node_name, ]
4048 return env, nodes_0, nodes_1
4050 def CheckPrereq(self):
4051 """Check prerequisites.
4054 - the new node is not already in the config
4056 - its parameters (single/dual homed) matches the cluster
4058 Any errors are signaled by raising errors.OpPrereqError.
4062 hostname = self.hostname
4063 node = hostname.name
4064 primary_ip = self.op.primary_ip = hostname.ip
4065 if self.op.secondary_ip is None:
4066 if self.primary_ip_family == netutils.IP6Address.family:
4067 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4068 " IPv4 address must be given as secondary",
4070 self.op.secondary_ip = primary_ip
4072 secondary_ip = self.op.secondary_ip
4073 if not netutils.IP4Address.IsValid(secondary_ip):
4074 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4075 " address" % secondary_ip, errors.ECODE_INVAL)
4077 node_list = cfg.GetNodeList()
4078 if not self.op.readd and node in node_list:
4079 raise errors.OpPrereqError("Node %s is already in the configuration" %
4080 node, errors.ECODE_EXISTS)
4081 elif self.op.readd and node not in node_list:
4082 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4085 self.changed_primary_ip = False
4087 for existing_node_name in node_list:
4088 existing_node = cfg.GetNodeInfo(existing_node_name)
4090 if self.op.readd and node == existing_node_name:
4091 if existing_node.secondary_ip != secondary_ip:
4092 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4093 " address configuration as before",
4095 if existing_node.primary_ip != primary_ip:
4096 self.changed_primary_ip = True
4100 if (existing_node.primary_ip == primary_ip or
4101 existing_node.secondary_ip == primary_ip or
4102 existing_node.primary_ip == secondary_ip or
4103 existing_node.secondary_ip == secondary_ip):
4104 raise errors.OpPrereqError("New node ip address(es) conflict with"
4105 " existing node %s" % existing_node.name,
4106 errors.ECODE_NOTUNIQUE)
4108 # After this 'if' block, None is no longer a valid value for the
4109 # _capable op attributes
4111 old_node = self.cfg.GetNodeInfo(node)
4112 assert old_node is not None, "Can't retrieve locked node %s" % node
4113 for attr in self._NFLAGS:
4114 if getattr(self.op, attr) is None:
4115 setattr(self.op, attr, getattr(old_node, attr))
4117 for attr in self._NFLAGS:
4118 if getattr(self.op, attr) is None:
4119 setattr(self.op, attr, True)
4121 if self.op.readd and not self.op.vm_capable:
4122 pri, sec = cfg.GetNodeInstances(node)
4124 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4125 " flag set to false, but it already holds"
4126 " instances" % node,
4129 # check that the type of the node (single versus dual homed) is the
4130 # same as for the master
4131 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4132 master_singlehomed = myself.secondary_ip == myself.primary_ip
4133 newbie_singlehomed = secondary_ip == primary_ip
4134 if master_singlehomed != newbie_singlehomed:
4135 if master_singlehomed:
4136 raise errors.OpPrereqError("The master has no secondary ip but the"
4137 " new node has one",
4140 raise errors.OpPrereqError("The master has a secondary ip but the"
4141 " new node doesn't have one",
4144 # checks reachability
4145 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4146 raise errors.OpPrereqError("Node not reachable by ping",
4147 errors.ECODE_ENVIRON)
4149 if not newbie_singlehomed:
4150 # check reachability from my secondary ip to newbie's secondary ip
4151 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4152 source=myself.secondary_ip):
4153 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4154 " based ping to node daemon port",
4155 errors.ECODE_ENVIRON)
4162 if self.op.master_capable:
4163 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4165 self.master_candidate = False
4168 self.new_node = old_node
4170 node_group = cfg.LookupNodeGroup(self.op.group)
4171 self.new_node = objects.Node(name=node,
4172 primary_ip=primary_ip,
4173 secondary_ip=secondary_ip,
4174 master_candidate=self.master_candidate,
4175 offline=False, drained=False,
4178 if self.op.ndparams:
4179 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4181 def Exec(self, feedback_fn):
4182 """Adds the new node to the cluster.
4185 new_node = self.new_node
4186 node = new_node.name
4188 # We adding a new node so we assume it's powered
4189 new_node.powered = True
4191 # for re-adds, reset the offline/drained/master-candidate flags;
4192 # we need to reset here, otherwise offline would prevent RPC calls
4193 # later in the procedure; this also means that if the re-add
4194 # fails, we are left with a non-offlined, broken node
4196 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4197 self.LogInfo("Readding a node, the offline/drained flags were reset")
4198 # if we demote the node, we do cleanup later in the procedure
4199 new_node.master_candidate = self.master_candidate
4200 if self.changed_primary_ip:
4201 new_node.primary_ip = self.op.primary_ip
4203 # copy the master/vm_capable flags
4204 for attr in self._NFLAGS:
4205 setattr(new_node, attr, getattr(self.op, attr))
4207 # notify the user about any possible mc promotion
4208 if new_node.master_candidate:
4209 self.LogInfo("Node will be a master candidate")
4211 if self.op.ndparams:
4212 new_node.ndparams = self.op.ndparams
4214 new_node.ndparams = {}
4216 # check connectivity
4217 result = self.rpc.call_version([node])[node]
4218 result.Raise("Can't get version information from node %s" % node)
4219 if constants.PROTOCOL_VERSION == result.payload:
4220 logging.info("Communication to node %s fine, sw version %s match",
4221 node, result.payload)
4223 raise errors.OpExecError("Version mismatch master version %s,"
4224 " node version %s" %
4225 (constants.PROTOCOL_VERSION, result.payload))
4227 # Add node to our /etc/hosts, and add key to known_hosts
4228 if self.cfg.GetClusterInfo().modify_etc_hosts:
4229 master_node = self.cfg.GetMasterNode()
4230 result = self.rpc.call_etc_hosts_modify(master_node,
4231 constants.ETC_HOSTS_ADD,
4234 result.Raise("Can't update hosts file with new host data")
4236 if new_node.secondary_ip != new_node.primary_ip:
4237 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4240 node_verify_list = [self.cfg.GetMasterNode()]
4241 node_verify_param = {
4242 constants.NV_NODELIST: [node],
4243 # TODO: do a node-net-test as well?
4246 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4247 self.cfg.GetClusterName())
4248 for verifier in node_verify_list:
4249 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4250 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4252 for failed in nl_payload:
4253 feedback_fn("ssh/hostname verification failed"
4254 " (checking from %s): %s" %
4255 (verifier, nl_payload[failed]))
4256 raise errors.OpExecError("ssh/hostname verification failed.")
4259 _RedistributeAncillaryFiles(self)
4260 self.context.ReaddNode(new_node)
4261 # make sure we redistribute the config
4262 self.cfg.Update(new_node, feedback_fn)
4263 # and make sure the new node will not have old files around
4264 if not new_node.master_candidate:
4265 result = self.rpc.call_node_demote_from_mc(new_node.name)
4266 msg = result.fail_msg
4268 self.LogWarning("Node failed to demote itself from master"
4269 " candidate status: %s" % msg)
4271 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4272 additional_vm=self.op.vm_capable)
4273 self.context.AddNode(new_node, self.proc.GetECId())
4276 class LUNodeSetParams(LogicalUnit):
4277 """Modifies the parameters of a node.
4279 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4280 to the node role (as _ROLE_*)
4281 @cvar _R2F: a dictionary from node role to tuples of flags
4282 @cvar _FLAGS: a list of attribute names corresponding to the flags
4285 HPATH = "node-modify"
4286 HTYPE = constants.HTYPE_NODE
4288 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4290 (True, False, False): _ROLE_CANDIDATE,
4291 (False, True, False): _ROLE_DRAINED,
4292 (False, False, True): _ROLE_OFFLINE,
4293 (False, False, False): _ROLE_REGULAR,
4295 _R2F = dict((v, k) for k, v in _F2R.items())
4296 _FLAGS = ["master_candidate", "drained", "offline"]
4298 def CheckArguments(self):
4299 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4300 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4301 self.op.master_capable, self.op.vm_capable,
4302 self.op.secondary_ip, self.op.ndparams]
4303 if all_mods.count(None) == len(all_mods):
4304 raise errors.OpPrereqError("Please pass at least one modification",
4306 if all_mods.count(True) > 1:
4307 raise errors.OpPrereqError("Can't set the node into more than one"
4308 " state at the same time",
4311 # Boolean value that tells us whether we might be demoting from MC
4312 self.might_demote = (self.op.master_candidate == False or
4313 self.op.offline == True or
4314 self.op.drained == True or
4315 self.op.master_capable == False)
4317 if self.op.secondary_ip:
4318 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4319 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4320 " address" % self.op.secondary_ip,
4323 self.lock_all = self.op.auto_promote and self.might_demote
4324 self.lock_instances = self.op.secondary_ip is not None
4326 def ExpandNames(self):
4328 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4330 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4332 if self.lock_instances:
4333 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4335 def DeclareLocks(self, level):
4336 # If we have locked all instances, before waiting to lock nodes, release
4337 # all the ones living on nodes unrelated to the current operation.
4338 if level == locking.LEVEL_NODE and self.lock_instances:
4339 instances_release = []
4341 self.affected_instances = []
4342 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4343 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4344 instance = self.context.cfg.GetInstanceInfo(instance_name)
4345 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4346 if i_mirrored and self.op.node_name in instance.all_nodes:
4347 instances_keep.append(instance_name)
4348 self.affected_instances.append(instance)
4350 instances_release.append(instance_name)
4351 if instances_release:
4352 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4353 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4355 def BuildHooksEnv(self):
4358 This runs on the master node.
4362 "OP_TARGET": self.op.node_name,
4363 "MASTER_CANDIDATE": str(self.op.master_candidate),
4364 "OFFLINE": str(self.op.offline),
4365 "DRAINED": str(self.op.drained),
4366 "MASTER_CAPABLE": str(self.op.master_capable),
4367 "VM_CAPABLE": str(self.op.vm_capable),
4369 nl = [self.cfg.GetMasterNode(),
4373 def CheckPrereq(self):
4374 """Check prerequisites.
4376 This only checks the instance list against the existing names.
4379 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4381 if (self.op.master_candidate is not None or
4382 self.op.drained is not None or
4383 self.op.offline is not None):
4384 # we can't change the master's node flags
4385 if self.op.node_name == self.cfg.GetMasterNode():
4386 raise errors.OpPrereqError("The master role can be changed"
4387 " only via master-failover",
4390 if self.op.master_candidate and not node.master_capable:
4391 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4392 " it a master candidate" % node.name,
4395 if self.op.vm_capable == False:
4396 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4398 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4399 " the vm_capable flag" % node.name,
4402 if node.master_candidate and self.might_demote and not self.lock_all:
4403 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4404 # check if after removing the current node, we're missing master
4406 (mc_remaining, mc_should, _) = \
4407 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4408 if mc_remaining < mc_should:
4409 raise errors.OpPrereqError("Not enough master candidates, please"
4410 " pass auto promote option to allow"
4411 " promotion", errors.ECODE_STATE)
4413 self.old_flags = old_flags = (node.master_candidate,
4414 node.drained, node.offline)
4415 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4416 self.old_role = old_role = self._F2R[old_flags]
4418 # Check for ineffective changes
4419 for attr in self._FLAGS:
4420 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4421 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4422 setattr(self.op, attr, None)
4424 # Past this point, any flag change to False means a transition
4425 # away from the respective state, as only real changes are kept
4427 # TODO: We might query the real power state if it supports OOB
4428 if _SupportsOob(self.cfg, node):
4429 if self.op.offline is False and not (node.powered or
4430 self.op.powered == True):
4431 raise errors.OpPrereqError(("Please power on node %s first before you"
4432 " can reset offline state") %
4434 elif self.op.powered is not None:
4435 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4436 " which does not support out-of-band"
4437 " handling") % self.op.node_name)
4439 # If we're being deofflined/drained, we'll MC ourself if needed
4440 if (self.op.drained == False or self.op.offline == False or
4441 (self.op.master_capable and not node.master_capable)):
4442 if _DecideSelfPromotion(self):
4443 self.op.master_candidate = True
4444 self.LogInfo("Auto-promoting node to master candidate")
4446 # If we're no longer master capable, we'll demote ourselves from MC
4447 if self.op.master_capable == False and node.master_candidate:
4448 self.LogInfo("Demoting from master candidate")
4449 self.op.master_candidate = False
4452 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4453 if self.op.master_candidate:
4454 new_role = self._ROLE_CANDIDATE
4455 elif self.op.drained:
4456 new_role = self._ROLE_DRAINED
4457 elif self.op.offline:
4458 new_role = self._ROLE_OFFLINE
4459 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4460 # False is still in new flags, which means we're un-setting (the
4462 new_role = self._ROLE_REGULAR
4463 else: # no new flags, nothing, keep old role
4466 self.new_role = new_role
4468 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4469 # Trying to transition out of offline status
4470 result = self.rpc.call_version([node.name])[node.name]
4472 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4473 " to report its version: %s" %
4474 (node.name, result.fail_msg),
4477 self.LogWarning("Transitioning node from offline to online state"
4478 " without using re-add. Please make sure the node"
4481 if self.op.secondary_ip:
4482 # Ok even without locking, because this can't be changed by any LU
4483 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4484 master_singlehomed = master.secondary_ip == master.primary_ip
4485 if master_singlehomed and self.op.secondary_ip:
4486 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4487 " homed cluster", errors.ECODE_INVAL)
4490 if self.affected_instances:
4491 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4492 " node has instances (%s) configured"
4493 " to use it" % self.affected_instances)
4495 # On online nodes, check that no instances are running, and that
4496 # the node has the new ip and we can reach it.
4497 for instance in self.affected_instances:
4498 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4500 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4501 if master.name != node.name:
4502 # check reachability from master secondary ip to new secondary ip
4503 if not netutils.TcpPing(self.op.secondary_ip,
4504 constants.DEFAULT_NODED_PORT,
4505 source=master.secondary_ip):
4506 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4507 " based ping to node daemon port",
4508 errors.ECODE_ENVIRON)
4510 if self.op.ndparams:
4511 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4512 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4513 self.new_ndparams = new_ndparams
4515 def Exec(self, feedback_fn):
4520 old_role = self.old_role
4521 new_role = self.new_role
4525 if self.op.ndparams:
4526 node.ndparams = self.new_ndparams
4528 if self.op.powered is not None:
4529 node.powered = self.op.powered
4531 for attr in ["master_capable", "vm_capable"]:
4532 val = getattr(self.op, attr)
4534 setattr(node, attr, val)
4535 result.append((attr, str(val)))
4537 if new_role != old_role:
4538 # Tell the node to demote itself, if no longer MC and not offline
4539 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4540 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4542 self.LogWarning("Node failed to demote itself: %s", msg)
4544 new_flags = self._R2F[new_role]
4545 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4547 result.append((desc, str(nf)))
4548 (node.master_candidate, node.drained, node.offline) = new_flags
4550 # we locked all nodes, we adjust the CP before updating this node
4552 _AdjustCandidatePool(self, [node.name])
4554 if self.op.secondary_ip:
4555 node.secondary_ip = self.op.secondary_ip
4556 result.append(("secondary_ip", self.op.secondary_ip))
4558 # this will trigger configuration file update, if needed
4559 self.cfg.Update(node, feedback_fn)
4561 # this will trigger job queue propagation or cleanup if the mc
4563 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4564 self.context.ReaddNode(node)
4569 class LUNodePowercycle(NoHooksLU):
4570 """Powercycles a node.
4575 def CheckArguments(self):
4576 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4577 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4578 raise errors.OpPrereqError("The node is the master and the force"
4579 " parameter was not set",
4582 def ExpandNames(self):
4583 """Locking for PowercycleNode.
4585 This is a last-resort option and shouldn't block on other
4586 jobs. Therefore, we grab no locks.
4589 self.needed_locks = {}
4591 def Exec(self, feedback_fn):
4595 result = self.rpc.call_node_powercycle(self.op.node_name,
4596 self.cfg.GetHypervisorType())
4597 result.Raise("Failed to schedule the reboot")
4598 return result.payload
4601 class LUClusterQuery(NoHooksLU):
4602 """Query cluster configuration.
4607 def ExpandNames(self):
4608 self.needed_locks = {}
4610 def Exec(self, feedback_fn):
4611 """Return cluster config.
4614 cluster = self.cfg.GetClusterInfo()
4617 # Filter just for enabled hypervisors
4618 for os_name, hv_dict in cluster.os_hvp.items():
4619 os_hvp[os_name] = {}
4620 for hv_name, hv_params in hv_dict.items():
4621 if hv_name in cluster.enabled_hypervisors:
4622 os_hvp[os_name][hv_name] = hv_params
4624 # Convert ip_family to ip_version
4625 primary_ip_version = constants.IP4_VERSION
4626 if cluster.primary_ip_family == netutils.IP6Address.family:
4627 primary_ip_version = constants.IP6_VERSION
4630 "software_version": constants.RELEASE_VERSION,
4631 "protocol_version": constants.PROTOCOL_VERSION,
4632 "config_version": constants.CONFIG_VERSION,
4633 "os_api_version": max(constants.OS_API_VERSIONS),
4634 "export_version": constants.EXPORT_VERSION,
4635 "architecture": (platform.architecture()[0], platform.machine()),
4636 "name": cluster.cluster_name,
4637 "master": cluster.master_node,
4638 "default_hypervisor": cluster.enabled_hypervisors[0],
4639 "enabled_hypervisors": cluster.enabled_hypervisors,
4640 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4641 for hypervisor_name in cluster.enabled_hypervisors]),
4643 "beparams": cluster.beparams,
4644 "osparams": cluster.osparams,
4645 "nicparams": cluster.nicparams,
4646 "ndparams": cluster.ndparams,
4647 "candidate_pool_size": cluster.candidate_pool_size,
4648 "master_netdev": cluster.master_netdev,
4649 "volume_group_name": cluster.volume_group_name,
4650 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4651 "file_storage_dir": cluster.file_storage_dir,
4652 "maintain_node_health": cluster.maintain_node_health,
4653 "ctime": cluster.ctime,
4654 "mtime": cluster.mtime,
4655 "uuid": cluster.uuid,
4656 "tags": list(cluster.GetTags()),
4657 "uid_pool": cluster.uid_pool,
4658 "default_iallocator": cluster.default_iallocator,
4659 "reserved_lvs": cluster.reserved_lvs,
4660 "primary_ip_version": primary_ip_version,
4661 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4662 "hidden_os": cluster.hidden_os,
4663 "blacklisted_os": cluster.blacklisted_os,
4669 class LUClusterConfigQuery(NoHooksLU):
4670 """Return configuration values.
4674 _FIELDS_DYNAMIC = utils.FieldSet()
4675 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4676 "watcher_pause", "volume_group_name")
4678 def CheckArguments(self):
4679 _CheckOutputFields(static=self._FIELDS_STATIC,
4680 dynamic=self._FIELDS_DYNAMIC,
4681 selected=self.op.output_fields)
4683 def ExpandNames(self):
4684 self.needed_locks = {}
4686 def Exec(self, feedback_fn):
4687 """Dump a representation of the cluster config to the standard output.
4691 for field in self.op.output_fields:
4692 if field == "cluster_name":
4693 entry = self.cfg.GetClusterName()
4694 elif field == "master_node":
4695 entry = self.cfg.GetMasterNode()
4696 elif field == "drain_flag":
4697 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4698 elif field == "watcher_pause":
4699 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4700 elif field == "volume_group_name":
4701 entry = self.cfg.GetVGName()
4703 raise errors.ParameterError(field)
4704 values.append(entry)
4708 class LUInstanceActivateDisks(NoHooksLU):
4709 """Bring up an instance's disks.
4714 def ExpandNames(self):
4715 self._ExpandAndLockInstance()
4716 self.needed_locks[locking.LEVEL_NODE] = []
4717 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4719 def DeclareLocks(self, level):
4720 if level == locking.LEVEL_NODE:
4721 self._LockInstancesNodes()
4723 def CheckPrereq(self):
4724 """Check prerequisites.
4726 This checks that the instance is in the cluster.
4729 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4730 assert self.instance is not None, \
4731 "Cannot retrieve locked instance %s" % self.op.instance_name
4732 _CheckNodeOnline(self, self.instance.primary_node)
4734 def Exec(self, feedback_fn):
4735 """Activate the disks.
4738 disks_ok, disks_info = \
4739 _AssembleInstanceDisks(self, self.instance,
4740 ignore_size=self.op.ignore_size)
4742 raise errors.OpExecError("Cannot activate block devices")
4747 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4749 """Prepare the block devices for an instance.
4751 This sets up the block devices on all nodes.
4753 @type lu: L{LogicalUnit}
4754 @param lu: the logical unit on whose behalf we execute
4755 @type instance: L{objects.Instance}
4756 @param instance: the instance for whose disks we assemble
4757 @type disks: list of L{objects.Disk} or None
4758 @param disks: which disks to assemble (or all, if None)
4759 @type ignore_secondaries: boolean
4760 @param ignore_secondaries: if true, errors on secondary nodes
4761 won't result in an error return from the function
4762 @type ignore_size: boolean
4763 @param ignore_size: if true, the current known size of the disk
4764 will not be used during the disk activation, useful for cases
4765 when the size is wrong
4766 @return: False if the operation failed, otherwise a list of
4767 (host, instance_visible_name, node_visible_name)
4768 with the mapping from node devices to instance devices
4773 iname = instance.name
4774 disks = _ExpandCheckDisks(instance, disks)
4776 # With the two passes mechanism we try to reduce the window of
4777 # opportunity for the race condition of switching DRBD to primary
4778 # before handshaking occured, but we do not eliminate it
4780 # The proper fix would be to wait (with some limits) until the
4781 # connection has been made and drbd transitions from WFConnection
4782 # into any other network-connected state (Connected, SyncTarget,
4785 # 1st pass, assemble on all nodes in secondary mode
4786 for idx, inst_disk in enumerate(disks):
4787 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4789 node_disk = node_disk.Copy()
4790 node_disk.UnsetSize()
4791 lu.cfg.SetDiskID(node_disk, node)
4792 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4793 msg = result.fail_msg
4795 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4796 " (is_primary=False, pass=1): %s",
4797 inst_disk.iv_name, node, msg)
4798 if not ignore_secondaries:
4801 # FIXME: race condition on drbd migration to primary
4803 # 2nd pass, do only the primary node
4804 for idx, inst_disk in enumerate(disks):
4807 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4808 if node != instance.primary_node:
4811 node_disk = node_disk.Copy()
4812 node_disk.UnsetSize()
4813 lu.cfg.SetDiskID(node_disk, node)
4814 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4815 msg = result.fail_msg
4817 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4818 " (is_primary=True, pass=2): %s",
4819 inst_disk.iv_name, node, msg)
4822 dev_path = result.payload
4824 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4826 # leave the disks configured for the primary node
4827 # this is a workaround that would be fixed better by
4828 # improving the logical/physical id handling
4830 lu.cfg.SetDiskID(disk, instance.primary_node)
4832 return disks_ok, device_info
4835 def _StartInstanceDisks(lu, instance, force):
4836 """Start the disks of an instance.
4839 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4840 ignore_secondaries=force)
4842 _ShutdownInstanceDisks(lu, instance)
4843 if force is not None and not force:
4844 lu.proc.LogWarning("", hint="If the message above refers to a"
4846 " you can retry the operation using '--force'.")
4847 raise errors.OpExecError("Disk consistency error")
4850 class LUInstanceDeactivateDisks(NoHooksLU):
4851 """Shutdown an instance's disks.
4856 def ExpandNames(self):
4857 self._ExpandAndLockInstance()
4858 self.needed_locks[locking.LEVEL_NODE] = []
4859 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4861 def DeclareLocks(self, level):
4862 if level == locking.LEVEL_NODE:
4863 self._LockInstancesNodes()
4865 def CheckPrereq(self):
4866 """Check prerequisites.
4868 This checks that the instance is in the cluster.
4871 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4872 assert self.instance is not None, \
4873 "Cannot retrieve locked instance %s" % self.op.instance_name
4875 def Exec(self, feedback_fn):
4876 """Deactivate the disks
4879 instance = self.instance
4881 _ShutdownInstanceDisks(self, instance)
4883 _SafeShutdownInstanceDisks(self, instance)
4886 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4887 """Shutdown block devices of an instance.
4889 This function checks if an instance is running, before calling
4890 _ShutdownInstanceDisks.
4893 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4894 _ShutdownInstanceDisks(lu, instance, disks=disks)
4897 def _ExpandCheckDisks(instance, disks):
4898 """Return the instance disks selected by the disks list
4900 @type disks: list of L{objects.Disk} or None
4901 @param disks: selected disks
4902 @rtype: list of L{objects.Disk}
4903 @return: selected instance disks to act on
4907 return instance.disks
4909 if not set(disks).issubset(instance.disks):
4910 raise errors.ProgrammerError("Can only act on disks belonging to the"
4915 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4916 """Shutdown block devices of an instance.
4918 This does the shutdown on all nodes of the instance.
4920 If the ignore_primary is false, errors on the primary node are
4925 disks = _ExpandCheckDisks(instance, disks)
4928 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4929 lu.cfg.SetDiskID(top_disk, node)
4930 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4931 msg = result.fail_msg
4933 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4934 disk.iv_name, node, msg)
4935 if ((node == instance.primary_node and not ignore_primary) or
4936 (node != instance.primary_node and not result.offline)):
4941 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4942 """Checks if a node has enough free memory.
4944 This function check if a given node has the needed amount of free
4945 memory. In case the node has less memory or we cannot get the
4946 information from the node, this function raise an OpPrereqError
4949 @type lu: C{LogicalUnit}
4950 @param lu: a logical unit from which we get configuration data
4952 @param node: the node to check
4953 @type reason: C{str}
4954 @param reason: string to use in the error message
4955 @type requested: C{int}
4956 @param requested: the amount of memory in MiB to check for
4957 @type hypervisor_name: C{str}
4958 @param hypervisor_name: the hypervisor to ask for memory stats
4959 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4960 we cannot check the node
4963 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4964 nodeinfo[node].Raise("Can't get data from node %s" % node,
4965 prereq=True, ecode=errors.ECODE_ENVIRON)
4966 free_mem = nodeinfo[node].payload.get('memory_free', None)
4967 if not isinstance(free_mem, int):
4968 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4969 " was '%s'" % (node, free_mem),
4970 errors.ECODE_ENVIRON)
4971 if requested > free_mem:
4972 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4973 " needed %s MiB, available %s MiB" %
4974 (node, reason, requested, free_mem),
4978 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4979 """Checks if nodes have enough free disk space in the all VGs.
4981 This function check if all given nodes have the needed amount of
4982 free disk. In case any node has less disk or we cannot get the
4983 information from the node, this function raise an OpPrereqError
4986 @type lu: C{LogicalUnit}
4987 @param lu: a logical unit from which we get configuration data
4988 @type nodenames: C{list}
4989 @param nodenames: the list of node names to check
4990 @type req_sizes: C{dict}
4991 @param req_sizes: the hash of vg and corresponding amount of disk in
4993 @raise errors.OpPrereqError: if the node doesn't have enough disk,
4994 or we cannot check the node
4997 for vg, req_size in req_sizes.items():
4998 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5001 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5002 """Checks if nodes have enough free disk space in the specified VG.
5004 This function check if all given nodes have the needed amount of
5005 free disk. In case any node has less disk or we cannot get the
5006 information from the node, this function raise an OpPrereqError
5009 @type lu: C{LogicalUnit}
5010 @param lu: a logical unit from which we get configuration data
5011 @type nodenames: C{list}
5012 @param nodenames: the list of node names to check
5014 @param vg: the volume group to check
5015 @type requested: C{int}
5016 @param requested: the amount of disk in MiB to check for
5017 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5018 or we cannot check the node
5021 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5022 for node in nodenames:
5023 info = nodeinfo[node]
5024 info.Raise("Cannot get current information from node %s" % node,
5025 prereq=True, ecode=errors.ECODE_ENVIRON)
5026 vg_free = info.payload.get("vg_free", None)
5027 if not isinstance(vg_free, int):
5028 raise errors.OpPrereqError("Can't compute free disk space on node"
5029 " %s for vg %s, result was '%s'" %
5030 (node, vg, vg_free), errors.ECODE_ENVIRON)
5031 if requested > vg_free:
5032 raise errors.OpPrereqError("Not enough disk space on target node %s"
5033 " vg %s: required %d MiB, available %d MiB" %
5034 (node, vg, requested, vg_free),
5038 class LUInstanceStartup(LogicalUnit):
5039 """Starts an instance.
5042 HPATH = "instance-start"
5043 HTYPE = constants.HTYPE_INSTANCE
5046 def CheckArguments(self):
5048 if self.op.beparams:
5049 # fill the beparams dict
5050 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5052 def ExpandNames(self):
5053 self._ExpandAndLockInstance()
5055 def BuildHooksEnv(self):
5058 This runs on master, primary and secondary nodes of the instance.
5062 "FORCE": self.op.force,
5064 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5065 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5068 def CheckPrereq(self):
5069 """Check prerequisites.
5071 This checks that the instance is in the cluster.
5074 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5075 assert self.instance is not None, \
5076 "Cannot retrieve locked instance %s" % self.op.instance_name
5079 if self.op.hvparams:
5080 # check hypervisor parameter syntax (locally)
5081 cluster = self.cfg.GetClusterInfo()
5082 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5083 filled_hvp = cluster.FillHV(instance)
5084 filled_hvp.update(self.op.hvparams)
5085 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5086 hv_type.CheckParameterSyntax(filled_hvp)
5087 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5089 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5091 if self.primary_offline and self.op.ignore_offline_nodes:
5092 self.proc.LogWarning("Ignoring offline primary node")
5094 if self.op.hvparams or self.op.beparams:
5095 self.proc.LogWarning("Overridden parameters are ignored")
5097 _CheckNodeOnline(self, instance.primary_node)
5099 bep = self.cfg.GetClusterInfo().FillBE(instance)
5101 # check bridges existence
5102 _CheckInstanceBridgesExist(self, instance)
5104 remote_info = self.rpc.call_instance_info(instance.primary_node,
5106 instance.hypervisor)
5107 remote_info.Raise("Error checking node %s" % instance.primary_node,
5108 prereq=True, ecode=errors.ECODE_ENVIRON)
5109 if not remote_info.payload: # not running already
5110 _CheckNodeFreeMemory(self, instance.primary_node,
5111 "starting instance %s" % instance.name,
5112 bep[constants.BE_MEMORY], instance.hypervisor)
5114 def Exec(self, feedback_fn):
5115 """Start the instance.
5118 instance = self.instance
5119 force = self.op.force
5121 self.cfg.MarkInstanceUp(instance.name)
5123 if self.primary_offline:
5124 assert self.op.ignore_offline_nodes
5125 self.proc.LogInfo("Primary node offline, marked instance as started")
5127 node_current = instance.primary_node
5129 _StartInstanceDisks(self, instance, force)
5131 result = self.rpc.call_instance_start(node_current, instance,
5132 self.op.hvparams, self.op.beparams)
5133 msg = result.fail_msg
5135 _ShutdownInstanceDisks(self, instance)
5136 raise errors.OpExecError("Could not start instance: %s" % msg)
5139 class LUInstanceReboot(LogicalUnit):
5140 """Reboot an instance.
5143 HPATH = "instance-reboot"
5144 HTYPE = constants.HTYPE_INSTANCE
5147 def ExpandNames(self):
5148 self._ExpandAndLockInstance()
5150 def BuildHooksEnv(self):
5153 This runs on master, primary and secondary nodes of the instance.
5157 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5158 "REBOOT_TYPE": self.op.reboot_type,
5159 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5161 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5162 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5165 def CheckPrereq(self):
5166 """Check prerequisites.
5168 This checks that the instance is in the cluster.
5171 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5172 assert self.instance is not None, \
5173 "Cannot retrieve locked instance %s" % self.op.instance_name
5175 _CheckNodeOnline(self, instance.primary_node)
5177 # check bridges existence
5178 _CheckInstanceBridgesExist(self, instance)
5180 def Exec(self, feedback_fn):
5181 """Reboot the instance.
5184 instance = self.instance
5185 ignore_secondaries = self.op.ignore_secondaries
5186 reboot_type = self.op.reboot_type
5188 node_current = instance.primary_node
5190 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5191 constants.INSTANCE_REBOOT_HARD]:
5192 for disk in instance.disks:
5193 self.cfg.SetDiskID(disk, node_current)
5194 result = self.rpc.call_instance_reboot(node_current, instance,
5196 self.op.shutdown_timeout)
5197 result.Raise("Could not reboot instance")
5199 result = self.rpc.call_instance_shutdown(node_current, instance,
5200 self.op.shutdown_timeout)
5201 result.Raise("Could not shutdown instance for full reboot")
5202 _ShutdownInstanceDisks(self, instance)
5203 _StartInstanceDisks(self, instance, ignore_secondaries)
5204 result = self.rpc.call_instance_start(node_current, instance, None, None)
5205 msg = result.fail_msg
5207 _ShutdownInstanceDisks(self, instance)
5208 raise errors.OpExecError("Could not start instance for"
5209 " full reboot: %s" % msg)
5211 self.cfg.MarkInstanceUp(instance.name)
5214 class LUInstanceShutdown(LogicalUnit):
5215 """Shutdown an instance.
5218 HPATH = "instance-stop"
5219 HTYPE = constants.HTYPE_INSTANCE
5222 def ExpandNames(self):
5223 self._ExpandAndLockInstance()
5225 def BuildHooksEnv(self):
5228 This runs on master, primary and secondary nodes of the instance.
5231 env = _BuildInstanceHookEnvByObject(self, self.instance)
5232 env["TIMEOUT"] = self.op.timeout
5233 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5236 def CheckPrereq(self):
5237 """Check prerequisites.
5239 This checks that the instance is in the cluster.
5242 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5243 assert self.instance is not None, \
5244 "Cannot retrieve locked instance %s" % self.op.instance_name
5246 self.primary_offline = \
5247 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5249 if self.primary_offline and self.op.ignore_offline_nodes:
5250 self.proc.LogWarning("Ignoring offline primary node")
5252 _CheckNodeOnline(self, self.instance.primary_node)
5254 def Exec(self, feedback_fn):
5255 """Shutdown the instance.
5258 instance = self.instance
5259 node_current = instance.primary_node
5260 timeout = self.op.timeout
5262 self.cfg.MarkInstanceDown(instance.name)
5264 if self.primary_offline:
5265 assert self.op.ignore_offline_nodes
5266 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5268 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5269 msg = result.fail_msg
5271 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5273 _ShutdownInstanceDisks(self, instance)
5276 class LUInstanceReinstall(LogicalUnit):
5277 """Reinstall an instance.
5280 HPATH = "instance-reinstall"
5281 HTYPE = constants.HTYPE_INSTANCE
5284 def ExpandNames(self):
5285 self._ExpandAndLockInstance()
5287 def BuildHooksEnv(self):
5290 This runs on master, primary and secondary nodes of the instance.
5293 env = _BuildInstanceHookEnvByObject(self, self.instance)
5294 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5297 def CheckPrereq(self):
5298 """Check prerequisites.
5300 This checks that the instance is in the cluster and is not running.
5303 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5304 assert instance is not None, \
5305 "Cannot retrieve locked instance %s" % self.op.instance_name
5306 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5307 " offline, cannot reinstall")
5308 for node in instance.secondary_nodes:
5309 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5310 " cannot reinstall")
5312 if instance.disk_template == constants.DT_DISKLESS:
5313 raise errors.OpPrereqError("Instance '%s' has no disks" %
5314 self.op.instance_name,
5316 _CheckInstanceDown(self, instance, "cannot reinstall")
5318 if self.op.os_type is not None:
5320 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5321 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5322 instance_os = self.op.os_type
5324 instance_os = instance.os
5326 nodelist = list(instance.all_nodes)
5328 if self.op.osparams:
5329 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5330 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5331 self.os_inst = i_osdict # the new dict (without defaults)
5335 self.instance = instance
5337 def Exec(self, feedback_fn):
5338 """Reinstall the instance.
5341 inst = self.instance
5343 if self.op.os_type is not None:
5344 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5345 inst.os = self.op.os_type
5346 # Write to configuration
5347 self.cfg.Update(inst, feedback_fn)
5349 _StartInstanceDisks(self, inst, None)
5351 feedback_fn("Running the instance OS create scripts...")
5352 # FIXME: pass debug option from opcode to backend
5353 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5354 self.op.debug_level,
5355 osparams=self.os_inst)
5356 result.Raise("Could not install OS for instance %s on node %s" %
5357 (inst.name, inst.primary_node))
5359 _ShutdownInstanceDisks(self, inst)
5362 class LUInstanceRecreateDisks(LogicalUnit):
5363 """Recreate an instance's missing disks.
5366 HPATH = "instance-recreate-disks"
5367 HTYPE = constants.HTYPE_INSTANCE
5370 def ExpandNames(self):
5371 self._ExpandAndLockInstance()
5373 def BuildHooksEnv(self):
5376 This runs on master, primary and secondary nodes of the instance.
5379 env = _BuildInstanceHookEnvByObject(self, self.instance)
5380 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5383 def CheckPrereq(self):
5384 """Check prerequisites.
5386 This checks that the instance is in the cluster and is not running.
5389 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5390 assert instance is not None, \
5391 "Cannot retrieve locked instance %s" % self.op.instance_name
5392 _CheckNodeOnline(self, instance.primary_node)
5394 if instance.disk_template == constants.DT_DISKLESS:
5395 raise errors.OpPrereqError("Instance '%s' has no disks" %
5396 self.op.instance_name, errors.ECODE_INVAL)
5397 _CheckInstanceDown(self, instance, "cannot recreate disks")
5399 if not self.op.disks:
5400 self.op.disks = range(len(instance.disks))
5402 for idx in self.op.disks:
5403 if idx >= len(instance.disks):
5404 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5407 self.instance = instance
5409 def Exec(self, feedback_fn):
5410 """Recreate the disks.
5414 for idx, _ in enumerate(self.instance.disks):
5415 if idx not in self.op.disks: # disk idx has not been passed in
5419 _CreateDisks(self, self.instance, to_skip=to_skip)
5422 class LUInstanceRename(LogicalUnit):
5423 """Rename an instance.
5426 HPATH = "instance-rename"
5427 HTYPE = constants.HTYPE_INSTANCE
5429 def CheckArguments(self):
5433 if self.op.ip_check and not self.op.name_check:
5434 # TODO: make the ip check more flexible and not depend on the name check
5435 raise errors.OpPrereqError("Cannot do ip check without a name check",
5438 def BuildHooksEnv(self):
5441 This runs on master, primary and secondary nodes of the instance.
5444 env = _BuildInstanceHookEnvByObject(self, self.instance)
5445 env["INSTANCE_NEW_NAME"] = self.op.new_name
5446 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5449 def CheckPrereq(self):
5450 """Check prerequisites.
5452 This checks that the instance is in the cluster and is not running.
5455 self.op.instance_name = _ExpandInstanceName(self.cfg,
5456 self.op.instance_name)
5457 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5458 assert instance is not None
5459 _CheckNodeOnline(self, instance.primary_node)
5460 _CheckInstanceDown(self, instance, "cannot rename")
5461 self.instance = instance
5463 new_name = self.op.new_name
5464 if self.op.name_check:
5465 hostname = netutils.GetHostname(name=new_name)
5466 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5468 new_name = self.op.new_name = hostname.name
5469 if (self.op.ip_check and
5470 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5471 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5472 (hostname.ip, new_name),
5473 errors.ECODE_NOTUNIQUE)
5475 instance_list = self.cfg.GetInstanceList()
5476 if new_name in instance_list and new_name != instance.name:
5477 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5478 new_name, errors.ECODE_EXISTS)
5480 def Exec(self, feedback_fn):
5481 """Rename the instance.
5484 inst = self.instance
5485 old_name = inst.name
5487 rename_file_storage = False
5488 if (inst.disk_template == constants.DT_FILE and
5489 self.op.new_name != inst.name):
5490 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5491 rename_file_storage = True
5493 self.cfg.RenameInstance(inst.name, self.op.new_name)
5494 # Change the instance lock. This is definitely safe while we hold the BGL
5495 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5496 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5498 # re-read the instance from the configuration after rename
5499 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5501 if rename_file_storage:
5502 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5503 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5504 old_file_storage_dir,
5505 new_file_storage_dir)
5506 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5507 " (but the instance has been renamed in Ganeti)" %
5508 (inst.primary_node, old_file_storage_dir,
5509 new_file_storage_dir))
5511 _StartInstanceDisks(self, inst, None)
5513 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5514 old_name, self.op.debug_level)
5515 msg = result.fail_msg
5517 msg = ("Could not run OS rename script for instance %s on node %s"
5518 " (but the instance has been renamed in Ganeti): %s" %
5519 (inst.name, inst.primary_node, msg))
5520 self.proc.LogWarning(msg)
5522 _ShutdownInstanceDisks(self, inst)
5527 class LUInstanceRemove(LogicalUnit):
5528 """Remove an instance.
5531 HPATH = "instance-remove"
5532 HTYPE = constants.HTYPE_INSTANCE
5535 def ExpandNames(self):
5536 self._ExpandAndLockInstance()
5537 self.needed_locks[locking.LEVEL_NODE] = []
5538 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5540 def DeclareLocks(self, level):
5541 if level == locking.LEVEL_NODE:
5542 self._LockInstancesNodes()
5544 def BuildHooksEnv(self):
5547 This runs on master, primary and secondary nodes of the instance.
5550 env = _BuildInstanceHookEnvByObject(self, self.instance)
5551 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5552 nl = [self.cfg.GetMasterNode()]
5553 nl_post = list(self.instance.all_nodes) + nl
5554 return env, nl, nl_post
5556 def CheckPrereq(self):
5557 """Check prerequisites.
5559 This checks that the instance is in the cluster.
5562 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5563 assert self.instance is not None, \
5564 "Cannot retrieve locked instance %s" % self.op.instance_name
5566 def Exec(self, feedback_fn):
5567 """Remove the instance.
5570 instance = self.instance
5571 logging.info("Shutting down instance %s on node %s",
5572 instance.name, instance.primary_node)
5574 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5575 self.op.shutdown_timeout)
5576 msg = result.fail_msg
5578 if self.op.ignore_failures:
5579 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5581 raise errors.OpExecError("Could not shutdown instance %s on"
5583 (instance.name, instance.primary_node, msg))
5585 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5588 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5589 """Utility function to remove an instance.
5592 logging.info("Removing block devices for instance %s", instance.name)
5594 if not _RemoveDisks(lu, instance):
5595 if not ignore_failures:
5596 raise errors.OpExecError("Can't remove instance's disks")
5597 feedback_fn("Warning: can't remove instance's disks")
5599 logging.info("Removing instance %s out of cluster config", instance.name)
5601 lu.cfg.RemoveInstance(instance.name)
5603 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5604 "Instance lock removal conflict"
5606 # Remove lock for the instance
5607 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5610 class LUInstanceQuery(NoHooksLU):
5611 """Logical unit for querying instances.
5614 # pylint: disable-msg=W0142
5617 def CheckArguments(self):
5618 self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5619 self.op.use_locking)
5621 def ExpandNames(self):
5622 self.iq.ExpandNames(self)
5624 def DeclareLocks(self, level):
5625 self.iq.DeclareLocks(self, level)
5627 def Exec(self, feedback_fn):
5628 return self.iq.OldStyleQuery(self)
5631 class LUInstanceFailover(LogicalUnit):
5632 """Failover an instance.
5635 HPATH = "instance-failover"
5636 HTYPE = constants.HTYPE_INSTANCE
5639 def ExpandNames(self):
5640 self._ExpandAndLockInstance()
5641 self.needed_locks[locking.LEVEL_NODE] = []
5642 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5644 def DeclareLocks(self, level):
5645 if level == locking.LEVEL_NODE:
5646 self._LockInstancesNodes()
5648 def BuildHooksEnv(self):
5651 This runs on master, primary and secondary nodes of the instance.
5654 instance = self.instance
5655 source_node = instance.primary_node
5656 target_node = instance.secondary_nodes[0]
5658 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5659 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5660 "OLD_PRIMARY": source_node,
5661 "OLD_SECONDARY": target_node,
5662 "NEW_PRIMARY": target_node,
5663 "NEW_SECONDARY": source_node,
5665 env.update(_BuildInstanceHookEnvByObject(self, instance))
5666 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5668 nl_post.append(source_node)
5669 return env, nl, nl_post
5671 def CheckPrereq(self):
5672 """Check prerequisites.
5674 This checks that the instance is in the cluster.
5677 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5678 assert self.instance is not None, \
5679 "Cannot retrieve locked instance %s" % self.op.instance_name
5681 bep = self.cfg.GetClusterInfo().FillBE(instance)
5682 if instance.disk_template not in constants.DTS_NET_MIRROR:
5683 raise errors.OpPrereqError("Instance's disk layout is not"
5684 " network mirrored, cannot failover.",
5687 secondary_nodes = instance.secondary_nodes
5688 if not secondary_nodes:
5689 raise errors.ProgrammerError("no secondary node but using "
5690 "a mirrored disk template")
5692 target_node = secondary_nodes[0]
5693 _CheckNodeOnline(self, target_node)
5694 _CheckNodeNotDrained(self, target_node)
5695 if instance.admin_up:
5696 # check memory requirements on the secondary node
5697 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5698 instance.name, bep[constants.BE_MEMORY],
5699 instance.hypervisor)
5701 self.LogInfo("Not checking memory on the secondary node as"
5702 " instance will not be started")
5704 # check bridge existance
5705 _CheckInstanceBridgesExist(self, instance, node=target_node)
5707 def Exec(self, feedback_fn):
5708 """Failover an instance.
5710 The failover is done by shutting it down on its present node and
5711 starting it on the secondary.
5714 instance = self.instance
5715 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5717 source_node = instance.primary_node
5718 target_node = instance.secondary_nodes[0]
5720 if instance.admin_up:
5721 feedback_fn("* checking disk consistency between source and target")
5722 for dev in instance.disks:
5723 # for drbd, these are drbd over lvm
5724 if not _CheckDiskConsistency(self, dev, target_node, False):
5725 if not self.op.ignore_consistency:
5726 raise errors.OpExecError("Disk %s is degraded on target node,"
5727 " aborting failover." % dev.iv_name)
5729 feedback_fn("* not checking disk consistency as instance is not running")
5731 feedback_fn("* shutting down instance on source node")
5732 logging.info("Shutting down instance %s on node %s",
5733 instance.name, source_node)
5735 result = self.rpc.call_instance_shutdown(source_node, instance,
5736 self.op.shutdown_timeout)
5737 msg = result.fail_msg
5739 if self.op.ignore_consistency or primary_node.offline:
5740 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5741 " Proceeding anyway. Please make sure node"
5742 " %s is down. Error details: %s",
5743 instance.name, source_node, source_node, msg)
5745 raise errors.OpExecError("Could not shutdown instance %s on"
5747 (instance.name, source_node, msg))
5749 feedback_fn("* deactivating the instance's disks on source node")
5750 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5751 raise errors.OpExecError("Can't shut down the instance's disks.")
5753 instance.primary_node = target_node
5754 # distribute new instance config to the other nodes
5755 self.cfg.Update(instance, feedback_fn)
5757 # Only start the instance if it's marked as up
5758 if instance.admin_up:
5759 feedback_fn("* activating the instance's disks on target node")
5760 logging.info("Starting instance %s on node %s",
5761 instance.name, target_node)
5763 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5764 ignore_secondaries=True)
5766 _ShutdownInstanceDisks(self, instance)
5767 raise errors.OpExecError("Can't activate the instance's disks")
5769 feedback_fn("* starting the instance on the target node")
5770 result = self.rpc.call_instance_start(target_node, instance, None, None)
5771 msg = result.fail_msg
5773 _ShutdownInstanceDisks(self, instance)
5774 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5775 (instance.name, target_node, msg))
5778 class LUInstanceMigrate(LogicalUnit):
5779 """Migrate an instance.
5781 This is migration without shutting down, compared to the failover,
5782 which is done with shutdown.
5785 HPATH = "instance-migrate"
5786 HTYPE = constants.HTYPE_INSTANCE
5789 def ExpandNames(self):
5790 self._ExpandAndLockInstance()
5792 self.needed_locks[locking.LEVEL_NODE] = []
5793 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5795 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5797 self.tasklets = [self._migrater]
5799 def DeclareLocks(self, level):
5800 if level == locking.LEVEL_NODE:
5801 self._LockInstancesNodes()
5803 def BuildHooksEnv(self):
5806 This runs on master, primary and secondary nodes of the instance.
5809 instance = self._migrater.instance
5810 source_node = instance.primary_node
5811 target_node = instance.secondary_nodes[0]
5812 env = _BuildInstanceHookEnvByObject(self, instance)
5813 env["MIGRATE_LIVE"] = self._migrater.live
5814 env["MIGRATE_CLEANUP"] = self.op.cleanup
5816 "OLD_PRIMARY": source_node,
5817 "OLD_SECONDARY": target_node,
5818 "NEW_PRIMARY": target_node,
5819 "NEW_SECONDARY": source_node,
5821 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5823 nl_post.append(source_node)
5824 return env, nl, nl_post
5827 class LUInstanceMove(LogicalUnit):
5828 """Move an instance by data-copying.
5831 HPATH = "instance-move"
5832 HTYPE = constants.HTYPE_INSTANCE
5835 def ExpandNames(self):
5836 self._ExpandAndLockInstance()
5837 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5838 self.op.target_node = target_node
5839 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5840 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5842 def DeclareLocks(self, level):
5843 if level == locking.LEVEL_NODE:
5844 self._LockInstancesNodes(primary_only=True)
5846 def BuildHooksEnv(self):
5849 This runs on master, primary and secondary nodes of the instance.
5853 "TARGET_NODE": self.op.target_node,
5854 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5856 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5857 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5858 self.op.target_node]
5861 def CheckPrereq(self):
5862 """Check prerequisites.
5864 This checks that the instance is in the cluster.
5867 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5868 assert self.instance is not None, \
5869 "Cannot retrieve locked instance %s" % self.op.instance_name
5871 node = self.cfg.GetNodeInfo(self.op.target_node)
5872 assert node is not None, \
5873 "Cannot retrieve locked node %s" % self.op.target_node
5875 self.target_node = target_node = node.name
5877 if target_node == instance.primary_node:
5878 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5879 (instance.name, target_node),
5882 bep = self.cfg.GetClusterInfo().FillBE(instance)
5884 for idx, dsk in enumerate(instance.disks):
5885 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5886 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5887 " cannot copy" % idx, errors.ECODE_STATE)
5889 _CheckNodeOnline(self, target_node)
5890 _CheckNodeNotDrained(self, target_node)
5891 _CheckNodeVmCapable(self, target_node)
5893 if instance.admin_up:
5894 # check memory requirements on the secondary node
5895 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5896 instance.name, bep[constants.BE_MEMORY],
5897 instance.hypervisor)
5899 self.LogInfo("Not checking memory on the secondary node as"
5900 " instance will not be started")
5902 # check bridge existance
5903 _CheckInstanceBridgesExist(self, instance, node=target_node)
5905 def Exec(self, feedback_fn):
5906 """Move an instance.
5908 The move is done by shutting it down on its present node, copying
5909 the data over (slow) and starting it on the new node.
5912 instance = self.instance
5914 source_node = instance.primary_node
5915 target_node = self.target_node
5917 self.LogInfo("Shutting down instance %s on source node %s",
5918 instance.name, source_node)
5920 result = self.rpc.call_instance_shutdown(source_node, instance,
5921 self.op.shutdown_timeout)
5922 msg = result.fail_msg
5924 if self.op.ignore_consistency:
5925 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5926 " Proceeding anyway. Please make sure node"
5927 " %s is down. Error details: %s",
5928 instance.name, source_node, source_node, msg)
5930 raise errors.OpExecError("Could not shutdown instance %s on"
5932 (instance.name, source_node, msg))
5934 # create the target disks
5936 _CreateDisks(self, instance, target_node=target_node)
5937 except errors.OpExecError:
5938 self.LogWarning("Device creation failed, reverting...")
5940 _RemoveDisks(self, instance, target_node=target_node)
5942 self.cfg.ReleaseDRBDMinors(instance.name)
5945 cluster_name = self.cfg.GetClusterInfo().cluster_name
5948 # activate, get path, copy the data over
5949 for idx, disk in enumerate(instance.disks):
5950 self.LogInfo("Copying data for disk %d", idx)
5951 result = self.rpc.call_blockdev_assemble(target_node, disk,
5952 instance.name, True, idx)
5954 self.LogWarning("Can't assemble newly created disk %d: %s",
5955 idx, result.fail_msg)
5956 errs.append(result.fail_msg)
5958 dev_path = result.payload
5959 result = self.rpc.call_blockdev_export(source_node, disk,
5960 target_node, dev_path,
5963 self.LogWarning("Can't copy data over for disk %d: %s",
5964 idx, result.fail_msg)
5965 errs.append(result.fail_msg)
5969 self.LogWarning("Some disks failed to copy, aborting")
5971 _RemoveDisks(self, instance, target_node=target_node)
5973 self.cfg.ReleaseDRBDMinors(instance.name)
5974 raise errors.OpExecError("Errors during disk copy: %s" %
5977 instance.primary_node = target_node
5978 self.cfg.Update(instance, feedback_fn)
5980 self.LogInfo("Removing the disks on the original node")
5981 _RemoveDisks(self, instance, target_node=source_node)
5983 # Only start the instance if it's marked as up
5984 if instance.admin_up:
5985 self.LogInfo("Starting instance %s on node %s",
5986 instance.name, target_node)
5988 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5989 ignore_secondaries=True)
5991 _ShutdownInstanceDisks(self, instance)
5992 raise errors.OpExecError("Can't activate the instance's disks")
5994 result = self.rpc.call_instance_start(target_node, instance, None, None)
5995 msg = result.fail_msg
5997 _ShutdownInstanceDisks(self, instance)
5998 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5999 (instance.name, target_node, msg))
6002 class LUNodeMigrate(LogicalUnit):
6003 """Migrate all instances from a node.
6006 HPATH = "node-migrate"
6007 HTYPE = constants.HTYPE_NODE
6010 def ExpandNames(self):
6011 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6013 self.needed_locks = {
6014 locking.LEVEL_NODE: [self.op.node_name],
6017 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6019 # Create tasklets for migrating instances for all instances on this node
6023 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6024 logging.debug("Migrating instance %s", inst.name)
6025 names.append(inst.name)
6027 tasklets.append(TLMigrateInstance(self, inst.name, False))
6029 self.tasklets = tasklets
6031 # Declare instance locks
6032 self.needed_locks[locking.LEVEL_INSTANCE] = names
6034 def DeclareLocks(self, level):
6035 if level == locking.LEVEL_NODE:
6036 self._LockInstancesNodes()
6038 def BuildHooksEnv(self):
6041 This runs on the master, the primary and all the secondaries.
6045 "NODE_NAME": self.op.node_name,
6048 nl = [self.cfg.GetMasterNode()]
6050 return (env, nl, nl)
6053 class TLMigrateInstance(Tasklet):
6054 """Tasklet class for instance migration.
6057 @ivar live: whether the migration will be done live or non-live;
6058 this variable is initalized only after CheckPrereq has run
6061 def __init__(self, lu, instance_name, cleanup):
6062 """Initializes this class.
6065 Tasklet.__init__(self, lu)
6068 self.instance_name = instance_name
6069 self.cleanup = cleanup
6070 self.live = False # will be overridden later
6072 def CheckPrereq(self):
6073 """Check prerequisites.
6075 This checks that the instance is in the cluster.
6078 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6079 instance = self.cfg.GetInstanceInfo(instance_name)
6080 assert instance is not None
6082 if instance.disk_template != constants.DT_DRBD8:
6083 raise errors.OpPrereqError("Instance's disk layout is not"
6084 " drbd8, cannot migrate.", errors.ECODE_STATE)
6086 secondary_nodes = instance.secondary_nodes
6087 if not secondary_nodes:
6088 raise errors.ConfigurationError("No secondary node but using"
6089 " drbd8 disk template")
6091 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6093 target_node = secondary_nodes[0]
6094 # check memory requirements on the secondary node
6095 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6096 instance.name, i_be[constants.BE_MEMORY],
6097 instance.hypervisor)
6099 # check bridge existance
6100 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6102 if not self.cleanup:
6103 _CheckNodeNotDrained(self.lu, target_node)
6104 result = self.rpc.call_instance_migratable(instance.primary_node,
6106 result.Raise("Can't migrate, please use failover",
6107 prereq=True, ecode=errors.ECODE_STATE)
6109 self.instance = instance
6111 if self.lu.op.live is not None and self.lu.op.mode is not None:
6112 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6113 " parameters are accepted",
6115 if self.lu.op.live is not None:
6117 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6119 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6120 # reset the 'live' parameter to None so that repeated
6121 # invocations of CheckPrereq do not raise an exception
6122 self.lu.op.live = None
6123 elif self.lu.op.mode is None:
6124 # read the default value from the hypervisor
6125 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6126 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6128 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6130 def _WaitUntilSync(self):
6131 """Poll with custom rpc for disk sync.
6133 This uses our own step-based rpc call.
6136 self.feedback_fn("* wait until resync is done")
6140 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6142 self.instance.disks)
6144 for node, nres in result.items():
6145 nres.Raise("Cannot resync disks on node %s" % node)
6146 node_done, node_percent = nres.payload
6147 all_done = all_done and node_done
6148 if node_percent is not None:
6149 min_percent = min(min_percent, node_percent)
6151 if min_percent < 100:
6152 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6155 def _EnsureSecondary(self, node):
6156 """Demote a node to secondary.
6159 self.feedback_fn("* switching node %s to secondary mode" % node)
6161 for dev in self.instance.disks:
6162 self.cfg.SetDiskID(dev, node)
6164 result = self.rpc.call_blockdev_close(node, self.instance.name,
6165 self.instance.disks)
6166 result.Raise("Cannot change disk to secondary on node %s" % node)
6168 def _GoStandalone(self):
6169 """Disconnect from the network.
6172 self.feedback_fn("* changing into standalone mode")
6173 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6174 self.instance.disks)
6175 for node, nres in result.items():
6176 nres.Raise("Cannot disconnect disks node %s" % node)
6178 def _GoReconnect(self, multimaster):
6179 """Reconnect to the network.
6185 msg = "single-master"
6186 self.feedback_fn("* changing disks into %s mode" % msg)
6187 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6188 self.instance.disks,
6189 self.instance.name, multimaster)
6190 for node, nres in result.items():
6191 nres.Raise("Cannot change disks config on node %s" % node)
6193 def _ExecCleanup(self):
6194 """Try to cleanup after a failed migration.
6196 The cleanup is done by:
6197 - check that the instance is running only on one node
6198 (and update the config if needed)
6199 - change disks on its secondary node to secondary
6200 - wait until disks are fully synchronized
6201 - disconnect from the network
6202 - change disks into single-master mode
6203 - wait again until disks are fully synchronized
6206 instance = self.instance
6207 target_node = self.target_node
6208 source_node = self.source_node
6210 # check running on only one node
6211 self.feedback_fn("* checking where the instance actually runs"
6212 " (if this hangs, the hypervisor might be in"
6214 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6215 for node, result in ins_l.items():
6216 result.Raise("Can't contact node %s" % node)
6218 runningon_source = instance.name in ins_l[source_node].payload
6219 runningon_target = instance.name in ins_l[target_node].payload
6221 if runningon_source and runningon_target:
6222 raise errors.OpExecError("Instance seems to be running on two nodes,"
6223 " or the hypervisor is confused. You will have"
6224 " to ensure manually that it runs only on one"
6225 " and restart this operation.")
6227 if not (runningon_source or runningon_target):
6228 raise errors.OpExecError("Instance does not seem to be running at all."
6229 " In this case, it's safer to repair by"
6230 " running 'gnt-instance stop' to ensure disk"
6231 " shutdown, and then restarting it.")
6233 if runningon_target:
6234 # the migration has actually succeeded, we need to update the config
6235 self.feedback_fn("* instance running on secondary node (%s),"
6236 " updating config" % target_node)
6237 instance.primary_node = target_node
6238 self.cfg.Update(instance, self.feedback_fn)
6239 demoted_node = source_node
6241 self.feedback_fn("* instance confirmed to be running on its"
6242 " primary node (%s)" % source_node)
6243 demoted_node = target_node
6245 self._EnsureSecondary(demoted_node)
6247 self._WaitUntilSync()
6248 except errors.OpExecError:
6249 # we ignore here errors, since if the device is standalone, it
6250 # won't be able to sync
6252 self._GoStandalone()
6253 self._GoReconnect(False)
6254 self._WaitUntilSync()
6256 self.feedback_fn("* done")
6258 def _RevertDiskStatus(self):
6259 """Try to revert the disk status after a failed migration.
6262 target_node = self.target_node
6264 self._EnsureSecondary(target_node)
6265 self._GoStandalone()
6266 self._GoReconnect(False)
6267 self._WaitUntilSync()
6268 except errors.OpExecError, err:
6269 self.lu.LogWarning("Migration failed and I can't reconnect the"
6270 " drives: error '%s'\n"
6271 "Please look and recover the instance status" %
6274 def _AbortMigration(self):
6275 """Call the hypervisor code to abort a started migration.
6278 instance = self.instance
6279 target_node = self.target_node
6280 migration_info = self.migration_info
6282 abort_result = self.rpc.call_finalize_migration(target_node,
6286 abort_msg = abort_result.fail_msg
6288 logging.error("Aborting migration failed on target node %s: %s",
6289 target_node, abort_msg)
6290 # Don't raise an exception here, as we stil have to try to revert the
6291 # disk status, even if this step failed.
6293 def _ExecMigration(self):
6294 """Migrate an instance.
6296 The migrate is done by:
6297 - change the disks into dual-master mode
6298 - wait until disks are fully synchronized again
6299 - migrate the instance
6300 - change disks on the new secondary node (the old primary) to secondary
6301 - wait until disks are fully synchronized
6302 - change disks into single-master mode
6305 instance = self.instance
6306 target_node = self.target_node
6307 source_node = self.source_node
6309 self.feedback_fn("* checking disk consistency between source and target")
6310 for dev in instance.disks:
6311 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6312 raise errors.OpExecError("Disk %s is degraded or not fully"
6313 " synchronized on target node,"
6314 " aborting migrate." % dev.iv_name)
6316 # First get the migration information from the remote node
6317 result = self.rpc.call_migration_info(source_node, instance)
6318 msg = result.fail_msg
6320 log_err = ("Failed fetching source migration information from %s: %s" %
6322 logging.error(log_err)
6323 raise errors.OpExecError(log_err)
6325 self.migration_info = migration_info = result.payload
6327 # Then switch the disks to master/master mode
6328 self._EnsureSecondary(target_node)
6329 self._GoStandalone()
6330 self._GoReconnect(True)
6331 self._WaitUntilSync()
6333 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6334 result = self.rpc.call_accept_instance(target_node,
6337 self.nodes_ip[target_node])
6339 msg = result.fail_msg
6341 logging.error("Instance pre-migration failed, trying to revert"
6342 " disk status: %s", msg)
6343 self.feedback_fn("Pre-migration failed, aborting")
6344 self._AbortMigration()
6345 self._RevertDiskStatus()
6346 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6347 (instance.name, msg))
6349 self.feedback_fn("* migrating instance to %s" % target_node)
6351 result = self.rpc.call_instance_migrate(source_node, instance,
6352 self.nodes_ip[target_node],
6354 msg = result.fail_msg
6356 logging.error("Instance migration failed, trying to revert"
6357 " disk status: %s", msg)
6358 self.feedback_fn("Migration failed, aborting")
6359 self._AbortMigration()
6360 self._RevertDiskStatus()
6361 raise errors.OpExecError("Could not migrate instance %s: %s" %
6362 (instance.name, msg))
6365 instance.primary_node = target_node
6366 # distribute new instance config to the other nodes
6367 self.cfg.Update(instance, self.feedback_fn)
6369 result = self.rpc.call_finalize_migration(target_node,
6373 msg = result.fail_msg
6375 logging.error("Instance migration succeeded, but finalization failed:"
6377 raise errors.OpExecError("Could not finalize instance migration: %s" %
6380 self._EnsureSecondary(source_node)
6381 self._WaitUntilSync()
6382 self._GoStandalone()
6383 self._GoReconnect(False)
6384 self._WaitUntilSync()
6386 self.feedback_fn("* done")
6388 def Exec(self, feedback_fn):
6389 """Perform the migration.
6392 feedback_fn("Migrating instance %s" % self.instance.name)
6394 self.feedback_fn = feedback_fn
6396 self.source_node = self.instance.primary_node
6397 self.target_node = self.instance.secondary_nodes[0]
6398 self.all_nodes = [self.source_node, self.target_node]
6400 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6401 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6405 return self._ExecCleanup()
6407 return self._ExecMigration()
6410 def _CreateBlockDev(lu, node, instance, device, force_create,
6412 """Create a tree of block devices on a given node.
6414 If this device type has to be created on secondaries, create it and
6417 If not, just recurse to children keeping the same 'force' value.
6419 @param lu: the lu on whose behalf we execute
6420 @param node: the node on which to create the device
6421 @type instance: L{objects.Instance}
6422 @param instance: the instance which owns the device
6423 @type device: L{objects.Disk}
6424 @param device: the device to create
6425 @type force_create: boolean
6426 @param force_create: whether to force creation of this device; this
6427 will be change to True whenever we find a device which has
6428 CreateOnSecondary() attribute
6429 @param info: the extra 'metadata' we should attach to the device
6430 (this will be represented as a LVM tag)
6431 @type force_open: boolean
6432 @param force_open: this parameter will be passes to the
6433 L{backend.BlockdevCreate} function where it specifies
6434 whether we run on primary or not, and it affects both
6435 the child assembly and the device own Open() execution
6438 if device.CreateOnSecondary():
6442 for child in device.children:
6443 _CreateBlockDev(lu, node, instance, child, force_create,
6446 if not force_create:
6449 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6452 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6453 """Create a single block device on a given node.
6455 This will not recurse over children of the device, so they must be
6458 @param lu: the lu on whose behalf we execute
6459 @param node: the node on which to create the device
6460 @type instance: L{objects.Instance}
6461 @param instance: the instance which owns the device
6462 @type device: L{objects.Disk}
6463 @param device: the device to create
6464 @param info: the extra 'metadata' we should attach to the device
6465 (this will be represented as a LVM tag)
6466 @type force_open: boolean
6467 @param force_open: this parameter will be passes to the
6468 L{backend.BlockdevCreate} function where it specifies
6469 whether we run on primary or not, and it affects both
6470 the child assembly and the device own Open() execution
6473 lu.cfg.SetDiskID(device, node)
6474 result = lu.rpc.call_blockdev_create(node, device, device.size,
6475 instance.name, force_open, info)
6476 result.Raise("Can't create block device %s on"
6477 " node %s for instance %s" % (device, node, instance.name))
6478 if device.physical_id is None:
6479 device.physical_id = result.payload
6482 def _GenerateUniqueNames(lu, exts):
6483 """Generate a suitable LV name.
6485 This will generate a logical volume name for the given instance.
6490 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6491 results.append("%s%s" % (new_id, val))
6495 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6497 """Generate a drbd8 device complete with its children.
6500 port = lu.cfg.AllocatePort()
6501 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6502 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6503 logical_id=(vgname, names[0]))
6504 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6505 logical_id=(vgname, names[1]))
6506 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6507 logical_id=(primary, secondary, port,
6510 children=[dev_data, dev_meta],
6515 def _GenerateDiskTemplate(lu, template_name,
6516 instance_name, primary_node,
6517 secondary_nodes, disk_info,
6518 file_storage_dir, file_driver,
6519 base_index, feedback_fn):
6520 """Generate the entire disk layout for a given template type.
6523 #TODO: compute space requirements
6525 vgname = lu.cfg.GetVGName()
6526 disk_count = len(disk_info)
6528 if template_name == constants.DT_DISKLESS:
6530 elif template_name == constants.DT_PLAIN:
6531 if len(secondary_nodes) != 0:
6532 raise errors.ProgrammerError("Wrong template configuration")
6534 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6535 for i in range(disk_count)])
6536 for idx, disk in enumerate(disk_info):
6537 disk_index = idx + base_index
6538 vg = disk.get("vg", vgname)
6539 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6540 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6541 logical_id=(vg, names[idx]),
6542 iv_name="disk/%d" % disk_index,
6544 disks.append(disk_dev)
6545 elif template_name == constants.DT_DRBD8:
6546 if len(secondary_nodes) != 1:
6547 raise errors.ProgrammerError("Wrong template configuration")
6548 remote_node = secondary_nodes[0]
6549 minors = lu.cfg.AllocateDRBDMinor(
6550 [primary_node, remote_node] * len(disk_info), instance_name)
6553 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6554 for i in range(disk_count)]):
6555 names.append(lv_prefix + "_data")
6556 names.append(lv_prefix + "_meta")
6557 for idx, disk in enumerate(disk_info):
6558 disk_index = idx + base_index
6559 vg = disk.get("vg", vgname)
6560 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6561 disk["size"], vg, names[idx*2:idx*2+2],
6562 "disk/%d" % disk_index,
6563 minors[idx*2], minors[idx*2+1])
6564 disk_dev.mode = disk["mode"]
6565 disks.append(disk_dev)
6566 elif template_name == constants.DT_FILE:
6567 if len(secondary_nodes) != 0:
6568 raise errors.ProgrammerError("Wrong template configuration")
6570 opcodes.RequireFileStorage()
6572 for idx, disk in enumerate(disk_info):
6573 disk_index = idx + base_index
6574 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6575 iv_name="disk/%d" % disk_index,
6576 logical_id=(file_driver,
6577 "%s/disk%d" % (file_storage_dir,
6580 disks.append(disk_dev)
6582 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6586 def _GetInstanceInfoText(instance):
6587 """Compute that text that should be added to the disk's metadata.
6590 return "originstname+%s" % instance.name
6593 def _CalcEta(time_taken, written, total_size):
6594 """Calculates the ETA based on size written and total size.
6596 @param time_taken: The time taken so far
6597 @param written: amount written so far
6598 @param total_size: The total size of data to be written
6599 @return: The remaining time in seconds
6602 avg_time = time_taken / float(written)
6603 return (total_size - written) * avg_time
6606 def _WipeDisks(lu, instance):
6607 """Wipes instance disks.
6609 @type lu: L{LogicalUnit}
6610 @param lu: the logical unit on whose behalf we execute
6611 @type instance: L{objects.Instance}
6612 @param instance: the instance whose disks we should create
6613 @return: the success of the wipe
6616 node = instance.primary_node
6617 logging.info("Pause sync of instance %s disks", instance.name)
6618 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6620 for idx, success in enumerate(result.payload):
6622 logging.warn("pause-sync of instance %s for disks %d failed",
6626 for idx, device in enumerate(instance.disks):
6627 lu.LogInfo("* Wiping disk %d", idx)
6628 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6630 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6631 # MAX_WIPE_CHUNK at max
6632 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6633 constants.MIN_WIPE_CHUNK_PERCENT)
6638 start_time = time.time()
6640 while offset < size:
6641 wipe_size = min(wipe_chunk_size, size - offset)
6642 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6643 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6644 (idx, offset, wipe_size))
6647 if now - last_output >= 60:
6648 eta = _CalcEta(now - start_time, offset, size)
6649 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6650 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6653 logging.info("Resume sync of instance %s disks", instance.name)
6655 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6657 for idx, success in enumerate(result.payload):
6659 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6660 " look at the status and troubleshoot the issue.", idx)
6661 logging.warn("resume-sync of instance %s for disks %d failed",
6665 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6666 """Create all disks for an instance.
6668 This abstracts away some work from AddInstance.
6670 @type lu: L{LogicalUnit}
6671 @param lu: the logical unit on whose behalf we execute
6672 @type instance: L{objects.Instance}
6673 @param instance: the instance whose disks we should create
6675 @param to_skip: list of indices to skip
6676 @type target_node: string
6677 @param target_node: if passed, overrides the target node for creation
6679 @return: the success of the creation
6682 info = _GetInstanceInfoText(instance)
6683 if target_node is None:
6684 pnode = instance.primary_node
6685 all_nodes = instance.all_nodes
6690 if instance.disk_template == constants.DT_FILE:
6691 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6692 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6694 result.Raise("Failed to create directory '%s' on"
6695 " node %s" % (file_storage_dir, pnode))
6697 # Note: this needs to be kept in sync with adding of disks in
6698 # LUInstanceSetParams
6699 for idx, device in enumerate(instance.disks):
6700 if to_skip and idx in to_skip:
6702 logging.info("Creating volume %s for instance %s",
6703 device.iv_name, instance.name)
6705 for node in all_nodes:
6706 f_create = node == pnode
6707 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6710 def _RemoveDisks(lu, instance, target_node=None):
6711 """Remove all disks for an instance.
6713 This abstracts away some work from `AddInstance()` and
6714 `RemoveInstance()`. Note that in case some of the devices couldn't
6715 be removed, the removal will continue with the other ones (compare
6716 with `_CreateDisks()`).
6718 @type lu: L{LogicalUnit}
6719 @param lu: the logical unit on whose behalf we execute
6720 @type instance: L{objects.Instance}
6721 @param instance: the instance whose disks we should remove
6722 @type target_node: string
6723 @param target_node: used to override the node on which to remove the disks
6725 @return: the success of the removal
6728 logging.info("Removing block devices for instance %s", instance.name)
6731 for device in instance.disks:
6733 edata = [(target_node, device)]
6735 edata = device.ComputeNodeTree(instance.primary_node)
6736 for node, disk in edata:
6737 lu.cfg.SetDiskID(disk, node)
6738 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6740 lu.LogWarning("Could not remove block device %s on node %s,"
6741 " continuing anyway: %s", device.iv_name, node, msg)
6744 if instance.disk_template == constants.DT_FILE:
6745 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6749 tgt = instance.primary_node
6750 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6752 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6753 file_storage_dir, instance.primary_node, result.fail_msg)
6759 def _ComputeDiskSizePerVG(disk_template, disks):
6760 """Compute disk size requirements in the volume group
6763 def _compute(disks, payload):
6764 """Universal algorithm
6769 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6773 # Required free disk space as a function of disk and swap space
6775 constants.DT_DISKLESS: {},
6776 constants.DT_PLAIN: _compute(disks, 0),
6777 # 128 MB are added for drbd metadata for each disk
6778 constants.DT_DRBD8: _compute(disks, 128),
6779 constants.DT_FILE: {},
6782 if disk_template not in req_size_dict:
6783 raise errors.ProgrammerError("Disk template '%s' size requirement"
6784 " is unknown" % disk_template)
6786 return req_size_dict[disk_template]
6789 def _ComputeDiskSize(disk_template, disks):
6790 """Compute disk size requirements in the volume group
6793 # Required free disk space as a function of disk and swap space
6795 constants.DT_DISKLESS: None,
6796 constants.DT_PLAIN: sum(d["size"] for d in disks),
6797 # 128 MB are added for drbd metadata for each disk
6798 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6799 constants.DT_FILE: None,
6802 if disk_template not in req_size_dict:
6803 raise errors.ProgrammerError("Disk template '%s' size requirement"
6804 " is unknown" % disk_template)
6806 return req_size_dict[disk_template]
6809 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6810 """Hypervisor parameter validation.
6812 This function abstract the hypervisor parameter validation to be
6813 used in both instance create and instance modify.
6815 @type lu: L{LogicalUnit}
6816 @param lu: the logical unit for which we check
6817 @type nodenames: list
6818 @param nodenames: the list of nodes on which we should check
6819 @type hvname: string
6820 @param hvname: the name of the hypervisor we should use
6821 @type hvparams: dict
6822 @param hvparams: the parameters which we need to check
6823 @raise errors.OpPrereqError: if the parameters are not valid
6826 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6829 for node in nodenames:
6833 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6836 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6837 """OS parameters validation.
6839 @type lu: L{LogicalUnit}
6840 @param lu: the logical unit for which we check
6841 @type required: boolean
6842 @param required: whether the validation should fail if the OS is not
6844 @type nodenames: list
6845 @param nodenames: the list of nodes on which we should check
6846 @type osname: string
6847 @param osname: the name of the hypervisor we should use
6848 @type osparams: dict
6849 @param osparams: the parameters which we need to check
6850 @raise errors.OpPrereqError: if the parameters are not valid
6853 result = lu.rpc.call_os_validate(required, nodenames, osname,
6854 [constants.OS_VALIDATE_PARAMETERS],
6856 for node, nres in result.items():
6857 # we don't check for offline cases since this should be run only
6858 # against the master node and/or an instance's nodes
6859 nres.Raise("OS Parameters validation failed on node %s" % node)
6860 if not nres.payload:
6861 lu.LogInfo("OS %s not found on node %s, validation skipped",
6865 class LUInstanceCreate(LogicalUnit):
6866 """Create an instance.
6869 HPATH = "instance-add"
6870 HTYPE = constants.HTYPE_INSTANCE
6873 def CheckArguments(self):
6877 # do not require name_check to ease forward/backward compatibility
6879 if self.op.no_install and self.op.start:
6880 self.LogInfo("No-installation mode selected, disabling startup")
6881 self.op.start = False
6882 # validate/normalize the instance name
6883 self.op.instance_name = \
6884 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6886 if self.op.ip_check and not self.op.name_check:
6887 # TODO: make the ip check more flexible and not depend on the name check
6888 raise errors.OpPrereqError("Cannot do ip check without a name check",
6891 # check nics' parameter names
6892 for nic in self.op.nics:
6893 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6895 # check disks. parameter names and consistent adopt/no-adopt strategy
6896 has_adopt = has_no_adopt = False
6897 for disk in self.op.disks:
6898 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6903 if has_adopt and has_no_adopt:
6904 raise errors.OpPrereqError("Either all disks are adopted or none is",
6907 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6908 raise errors.OpPrereqError("Disk adoption is not supported for the"
6909 " '%s' disk template" %
6910 self.op.disk_template,
6912 if self.op.iallocator is not None:
6913 raise errors.OpPrereqError("Disk adoption not allowed with an"
6914 " iallocator script", errors.ECODE_INVAL)
6915 if self.op.mode == constants.INSTANCE_IMPORT:
6916 raise errors.OpPrereqError("Disk adoption not allowed for"
6917 " instance import", errors.ECODE_INVAL)
6919 self.adopt_disks = has_adopt
6921 # instance name verification
6922 if self.op.name_check:
6923 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6924 self.op.instance_name = self.hostname1.name
6925 # used in CheckPrereq for ip ping check
6926 self.check_ip = self.hostname1.ip
6928 self.check_ip = None
6930 # file storage checks
6931 if (self.op.file_driver and
6932 not self.op.file_driver in constants.FILE_DRIVER):
6933 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6934 self.op.file_driver, errors.ECODE_INVAL)
6936 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6937 raise errors.OpPrereqError("File storage directory path not absolute",
6940 ### Node/iallocator related checks
6941 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6943 if self.op.pnode is not None:
6944 if self.op.disk_template in constants.DTS_NET_MIRROR:
6945 if self.op.snode is None:
6946 raise errors.OpPrereqError("The networked disk templates need"
6947 " a mirror node", errors.ECODE_INVAL)
6949 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6951 self.op.snode = None
6953 self._cds = _GetClusterDomainSecret()
6955 if self.op.mode == constants.INSTANCE_IMPORT:
6956 # On import force_variant must be True, because if we forced it at
6957 # initial install, our only chance when importing it back is that it
6959 self.op.force_variant = True
6961 if self.op.no_install:
6962 self.LogInfo("No-installation mode has no effect during import")
6964 elif self.op.mode == constants.INSTANCE_CREATE:
6965 if self.op.os_type is None:
6966 raise errors.OpPrereqError("No guest OS specified",
6968 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6969 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6970 " installation" % self.op.os_type,
6972 if self.op.disk_template is None:
6973 raise errors.OpPrereqError("No disk template specified",
6976 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6977 # Check handshake to ensure both clusters have the same domain secret
6978 src_handshake = self.op.source_handshake
6979 if not src_handshake:
6980 raise errors.OpPrereqError("Missing source handshake",
6983 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6986 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6989 # Load and check source CA
6990 self.source_x509_ca_pem = self.op.source_x509_ca
6991 if not self.source_x509_ca_pem:
6992 raise errors.OpPrereqError("Missing source X509 CA",
6996 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6998 except OpenSSL.crypto.Error, err:
6999 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7000 (err, ), errors.ECODE_INVAL)
7002 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7003 if errcode is not None:
7004 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7007 self.source_x509_ca = cert
7009 src_instance_name = self.op.source_instance_name
7010 if not src_instance_name:
7011 raise errors.OpPrereqError("Missing source instance name",
7014 self.source_instance_name = \
7015 netutils.GetHostname(name=src_instance_name).name
7018 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7019 self.op.mode, errors.ECODE_INVAL)
7021 def ExpandNames(self):
7022 """ExpandNames for CreateInstance.
7024 Figure out the right locks for instance creation.
7027 self.needed_locks = {}
7029 instance_name = self.op.instance_name
7030 # this is just a preventive check, but someone might still add this
7031 # instance in the meantime, and creation will fail at lock-add time
7032 if instance_name in self.cfg.GetInstanceList():
7033 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7034 instance_name, errors.ECODE_EXISTS)
7036 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7038 if self.op.iallocator:
7039 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7041 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7042 nodelist = [self.op.pnode]
7043 if self.op.snode is not None:
7044 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7045 nodelist.append(self.op.snode)
7046 self.needed_locks[locking.LEVEL_NODE] = nodelist
7048 # in case of import lock the source node too
7049 if self.op.mode == constants.INSTANCE_IMPORT:
7050 src_node = self.op.src_node
7051 src_path = self.op.src_path
7053 if src_path is None:
7054 self.op.src_path = src_path = self.op.instance_name
7056 if src_node is None:
7057 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7058 self.op.src_node = None
7059 if os.path.isabs(src_path):
7060 raise errors.OpPrereqError("Importing an instance from an absolute"
7061 " path requires a source node option.",
7064 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7065 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7066 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7067 if not os.path.isabs(src_path):
7068 self.op.src_path = src_path = \
7069 utils.PathJoin(constants.EXPORT_DIR, src_path)
7071 def _RunAllocator(self):
7072 """Run the allocator based on input opcode.
7075 nics = [n.ToDict() for n in self.nics]
7076 ial = IAllocator(self.cfg, self.rpc,
7077 mode=constants.IALLOCATOR_MODE_ALLOC,
7078 name=self.op.instance_name,
7079 disk_template=self.op.disk_template,
7082 vcpus=self.be_full[constants.BE_VCPUS],
7083 mem_size=self.be_full[constants.BE_MEMORY],
7086 hypervisor=self.op.hypervisor,
7089 ial.Run(self.op.iallocator)
7092 raise errors.OpPrereqError("Can't compute nodes using"
7093 " iallocator '%s': %s" %
7094 (self.op.iallocator, ial.info),
7096 if len(ial.result) != ial.required_nodes:
7097 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7098 " of nodes (%s), required %s" %
7099 (self.op.iallocator, len(ial.result),
7100 ial.required_nodes), errors.ECODE_FAULT)
7101 self.op.pnode = ial.result[0]
7102 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7103 self.op.instance_name, self.op.iallocator,
7104 utils.CommaJoin(ial.result))
7105 if ial.required_nodes == 2:
7106 self.op.snode = ial.result[1]
7108 def BuildHooksEnv(self):
7111 This runs on master, primary and secondary nodes of the instance.
7115 "ADD_MODE": self.op.mode,
7117 if self.op.mode == constants.INSTANCE_IMPORT:
7118 env["SRC_NODE"] = self.op.src_node
7119 env["SRC_PATH"] = self.op.src_path
7120 env["SRC_IMAGES"] = self.src_images
7122 env.update(_BuildInstanceHookEnv(
7123 name=self.op.instance_name,
7124 primary_node=self.op.pnode,
7125 secondary_nodes=self.secondaries,
7126 status=self.op.start,
7127 os_type=self.op.os_type,
7128 memory=self.be_full[constants.BE_MEMORY],
7129 vcpus=self.be_full[constants.BE_VCPUS],
7130 nics=_NICListToTuple(self, self.nics),
7131 disk_template=self.op.disk_template,
7132 disks=[(d["size"], d["mode"]) for d in self.disks],
7135 hypervisor_name=self.op.hypervisor,
7138 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7142 def _ReadExportInfo(self):
7143 """Reads the export information from disk.
7145 It will override the opcode source node and path with the actual
7146 information, if these two were not specified before.
7148 @return: the export information
7151 assert self.op.mode == constants.INSTANCE_IMPORT
7153 src_node = self.op.src_node
7154 src_path = self.op.src_path
7156 if src_node is None:
7157 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7158 exp_list = self.rpc.call_export_list(locked_nodes)
7160 for node in exp_list:
7161 if exp_list[node].fail_msg:
7163 if src_path in exp_list[node].payload:
7165 self.op.src_node = src_node = node
7166 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7170 raise errors.OpPrereqError("No export found for relative path %s" %
7171 src_path, errors.ECODE_INVAL)
7173 _CheckNodeOnline(self, src_node)
7174 result = self.rpc.call_export_info(src_node, src_path)
7175 result.Raise("No export or invalid export found in dir %s" % src_path)
7177 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7178 if not export_info.has_section(constants.INISECT_EXP):
7179 raise errors.ProgrammerError("Corrupted export config",
7180 errors.ECODE_ENVIRON)
7182 ei_version = export_info.get(constants.INISECT_EXP, "version")
7183 if (int(ei_version) != constants.EXPORT_VERSION):
7184 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7185 (ei_version, constants.EXPORT_VERSION),
7186 errors.ECODE_ENVIRON)
7189 def _ReadExportParams(self, einfo):
7190 """Use export parameters as defaults.
7192 In case the opcode doesn't specify (as in override) some instance
7193 parameters, then try to use them from the export information, if
7197 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7199 if self.op.disk_template is None:
7200 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7201 self.op.disk_template = einfo.get(constants.INISECT_INS,
7204 raise errors.OpPrereqError("No disk template specified and the export"
7205 " is missing the disk_template information",
7208 if not self.op.disks:
7209 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7211 # TODO: import the disk iv_name too
7212 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7213 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7214 disks.append({"size": disk_sz})
7215 self.op.disks = disks
7217 raise errors.OpPrereqError("No disk info specified and the export"
7218 " is missing the disk information",
7221 if (not self.op.nics and
7222 einfo.has_option(constants.INISECT_INS, "nic_count")):
7224 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7226 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7227 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7232 if (self.op.hypervisor is None and
7233 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7234 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7235 if einfo.has_section(constants.INISECT_HYP):
7236 # use the export parameters but do not override the ones
7237 # specified by the user
7238 for name, value in einfo.items(constants.INISECT_HYP):
7239 if name not in self.op.hvparams:
7240 self.op.hvparams[name] = value
7242 if einfo.has_section(constants.INISECT_BEP):
7243 # use the parameters, without overriding
7244 for name, value in einfo.items(constants.INISECT_BEP):
7245 if name not in self.op.beparams:
7246 self.op.beparams[name] = value
7248 # try to read the parameters old style, from the main section
7249 for name in constants.BES_PARAMETERS:
7250 if (name not in self.op.beparams and
7251 einfo.has_option(constants.INISECT_INS, name)):
7252 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7254 if einfo.has_section(constants.INISECT_OSP):
7255 # use the parameters, without overriding
7256 for name, value in einfo.items(constants.INISECT_OSP):
7257 if name not in self.op.osparams:
7258 self.op.osparams[name] = value
7260 def _RevertToDefaults(self, cluster):
7261 """Revert the instance parameters to the default values.
7265 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7266 for name in self.op.hvparams.keys():
7267 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7268 del self.op.hvparams[name]
7270 be_defs = cluster.SimpleFillBE({})
7271 for name in self.op.beparams.keys():
7272 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7273 del self.op.beparams[name]
7275 nic_defs = cluster.SimpleFillNIC({})
7276 for nic in self.op.nics:
7277 for name in constants.NICS_PARAMETERS:
7278 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7281 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7282 for name in self.op.osparams.keys():
7283 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7284 del self.op.osparams[name]
7286 def CheckPrereq(self):
7287 """Check prerequisites.
7290 if self.op.mode == constants.INSTANCE_IMPORT:
7291 export_info = self._ReadExportInfo()
7292 self._ReadExportParams(export_info)
7294 if (not self.cfg.GetVGName() and
7295 self.op.disk_template not in constants.DTS_NOT_LVM):
7296 raise errors.OpPrereqError("Cluster does not support lvm-based"
7297 " instances", errors.ECODE_STATE)
7299 if self.op.hypervisor is None:
7300 self.op.hypervisor = self.cfg.GetHypervisorType()
7302 cluster = self.cfg.GetClusterInfo()
7303 enabled_hvs = cluster.enabled_hypervisors
7304 if self.op.hypervisor not in enabled_hvs:
7305 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7306 " cluster (%s)" % (self.op.hypervisor,
7307 ",".join(enabled_hvs)),
7310 # check hypervisor parameter syntax (locally)
7311 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7312 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7314 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7315 hv_type.CheckParameterSyntax(filled_hvp)
7316 self.hv_full = filled_hvp
7317 # check that we don't specify global parameters on an instance
7318 _CheckGlobalHvParams(self.op.hvparams)
7320 # fill and remember the beparams dict
7321 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7322 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7324 # build os parameters
7325 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7327 # now that hvp/bep are in final format, let's reset to defaults,
7329 if self.op.identify_defaults:
7330 self._RevertToDefaults(cluster)
7334 for idx, nic in enumerate(self.op.nics):
7335 nic_mode_req = nic.get("mode", None)
7336 nic_mode = nic_mode_req
7337 if nic_mode is None:
7338 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7340 # in routed mode, for the first nic, the default ip is 'auto'
7341 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7342 default_ip_mode = constants.VALUE_AUTO
7344 default_ip_mode = constants.VALUE_NONE
7346 # ip validity checks
7347 ip = nic.get("ip", default_ip_mode)
7348 if ip is None or ip.lower() == constants.VALUE_NONE:
7350 elif ip.lower() == constants.VALUE_AUTO:
7351 if not self.op.name_check:
7352 raise errors.OpPrereqError("IP address set to auto but name checks"
7353 " have been skipped",
7355 nic_ip = self.hostname1.ip
7357 if not netutils.IPAddress.IsValid(ip):
7358 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7362 # TODO: check the ip address for uniqueness
7363 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7364 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7367 # MAC address verification
7368 mac = nic.get("mac", constants.VALUE_AUTO)
7369 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7370 mac = utils.NormalizeAndValidateMac(mac)
7373 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7374 except errors.ReservationError:
7375 raise errors.OpPrereqError("MAC address %s already in use"
7376 " in cluster" % mac,
7377 errors.ECODE_NOTUNIQUE)
7379 # bridge verification
7380 bridge = nic.get("bridge", None)
7381 link = nic.get("link", None)
7383 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7384 " at the same time", errors.ECODE_INVAL)
7385 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7386 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7393 nicparams[constants.NIC_MODE] = nic_mode_req
7395 nicparams[constants.NIC_LINK] = link
7397 check_params = cluster.SimpleFillNIC(nicparams)
7398 objects.NIC.CheckParameterSyntax(check_params)
7399 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7401 # disk checks/pre-build
7403 for disk in self.op.disks:
7404 mode = disk.get("mode", constants.DISK_RDWR)
7405 if mode not in constants.DISK_ACCESS_SET:
7406 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7407 mode, errors.ECODE_INVAL)
7408 size = disk.get("size", None)
7410 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7413 except (TypeError, ValueError):
7414 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7416 vg = disk.get("vg", self.cfg.GetVGName())
7417 new_disk = {"size": size, "mode": mode, "vg": vg}
7419 new_disk["adopt"] = disk["adopt"]
7420 self.disks.append(new_disk)
7422 if self.op.mode == constants.INSTANCE_IMPORT:
7424 # Check that the new instance doesn't have less disks than the export
7425 instance_disks = len(self.disks)
7426 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7427 if instance_disks < export_disks:
7428 raise errors.OpPrereqError("Not enough disks to import."
7429 " (instance: %d, export: %d)" %
7430 (instance_disks, export_disks),
7434 for idx in range(export_disks):
7435 option = 'disk%d_dump' % idx
7436 if export_info.has_option(constants.INISECT_INS, option):
7437 # FIXME: are the old os-es, disk sizes, etc. useful?
7438 export_name = export_info.get(constants.INISECT_INS, option)
7439 image = utils.PathJoin(self.op.src_path, export_name)
7440 disk_images.append(image)
7442 disk_images.append(False)
7444 self.src_images = disk_images
7446 old_name = export_info.get(constants.INISECT_INS, 'name')
7448 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7449 except (TypeError, ValueError), err:
7450 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7451 " an integer: %s" % str(err),
7453 if self.op.instance_name == old_name:
7454 for idx, nic in enumerate(self.nics):
7455 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7456 nic_mac_ini = 'nic%d_mac' % idx
7457 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7459 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7461 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7462 if self.op.ip_check:
7463 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7464 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7465 (self.check_ip, self.op.instance_name),
7466 errors.ECODE_NOTUNIQUE)
7468 #### mac address generation
7469 # By generating here the mac address both the allocator and the hooks get
7470 # the real final mac address rather than the 'auto' or 'generate' value.
7471 # There is a race condition between the generation and the instance object
7472 # creation, which means that we know the mac is valid now, but we're not
7473 # sure it will be when we actually add the instance. If things go bad
7474 # adding the instance will abort because of a duplicate mac, and the
7475 # creation job will fail.
7476 for nic in self.nics:
7477 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7478 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7482 if self.op.iallocator is not None:
7483 self._RunAllocator()
7485 #### node related checks
7487 # check primary node
7488 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7489 assert self.pnode is not None, \
7490 "Cannot retrieve locked node %s" % self.op.pnode
7492 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7493 pnode.name, errors.ECODE_STATE)
7495 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7496 pnode.name, errors.ECODE_STATE)
7497 if not pnode.vm_capable:
7498 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7499 " '%s'" % pnode.name, errors.ECODE_STATE)
7501 self.secondaries = []
7503 # mirror node verification
7504 if self.op.disk_template in constants.DTS_NET_MIRROR:
7505 if self.op.snode == pnode.name:
7506 raise errors.OpPrereqError("The secondary node cannot be the"
7507 " primary node.", errors.ECODE_INVAL)
7508 _CheckNodeOnline(self, self.op.snode)
7509 _CheckNodeNotDrained(self, self.op.snode)
7510 _CheckNodeVmCapable(self, self.op.snode)
7511 self.secondaries.append(self.op.snode)
7513 nodenames = [pnode.name] + self.secondaries
7515 if not self.adopt_disks:
7516 # Check lv size requirements, if not adopting
7517 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7518 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7520 else: # instead, we must check the adoption data
7521 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7522 if len(all_lvs) != len(self.disks):
7523 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7525 for lv_name in all_lvs:
7527 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7528 # to ReserveLV uses the same syntax
7529 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7530 except errors.ReservationError:
7531 raise errors.OpPrereqError("LV named %s used by another instance" %
7532 lv_name, errors.ECODE_NOTUNIQUE)
7534 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7535 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7537 node_lvs = self.rpc.call_lv_list([pnode.name],
7538 vg_names.payload.keys())[pnode.name]
7539 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7540 node_lvs = node_lvs.payload
7542 delta = all_lvs.difference(node_lvs.keys())
7544 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7545 utils.CommaJoin(delta),
7547 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7549 raise errors.OpPrereqError("Online logical volumes found, cannot"
7550 " adopt: %s" % utils.CommaJoin(online_lvs),
7552 # update the size of disk based on what is found
7553 for dsk in self.disks:
7554 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7556 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7558 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7559 # check OS parameters (remotely)
7560 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7562 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7564 # memory check on primary node
7566 _CheckNodeFreeMemory(self, self.pnode.name,
7567 "creating instance %s" % self.op.instance_name,
7568 self.be_full[constants.BE_MEMORY],
7571 self.dry_run_result = list(nodenames)
7573 def Exec(self, feedback_fn):
7574 """Create and add the instance to the cluster.
7577 instance = self.op.instance_name
7578 pnode_name = self.pnode.name
7580 ht_kind = self.op.hypervisor
7581 if ht_kind in constants.HTS_REQ_PORT:
7582 network_port = self.cfg.AllocatePort()
7586 if constants.ENABLE_FILE_STORAGE:
7587 # this is needed because os.path.join does not accept None arguments
7588 if self.op.file_storage_dir is None:
7589 string_file_storage_dir = ""
7591 string_file_storage_dir = self.op.file_storage_dir
7593 # build the full file storage dir path
7594 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7595 string_file_storage_dir, instance)
7597 file_storage_dir = ""
7599 disks = _GenerateDiskTemplate(self,
7600 self.op.disk_template,
7601 instance, pnode_name,
7605 self.op.file_driver,
7609 iobj = objects.Instance(name=instance, os=self.op.os_type,
7610 primary_node=pnode_name,
7611 nics=self.nics, disks=disks,
7612 disk_template=self.op.disk_template,
7614 network_port=network_port,
7615 beparams=self.op.beparams,
7616 hvparams=self.op.hvparams,
7617 hypervisor=self.op.hypervisor,
7618 osparams=self.op.osparams,
7621 if self.adopt_disks:
7622 # rename LVs to the newly-generated names; we need to construct
7623 # 'fake' LV disks with the old data, plus the new unique_id
7624 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7626 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7627 rename_to.append(t_dsk.logical_id)
7628 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7629 self.cfg.SetDiskID(t_dsk, pnode_name)
7630 result = self.rpc.call_blockdev_rename(pnode_name,
7631 zip(tmp_disks, rename_to))
7632 result.Raise("Failed to rename adoped LVs")
7634 feedback_fn("* creating instance disks...")
7636 _CreateDisks(self, iobj)
7637 except errors.OpExecError:
7638 self.LogWarning("Device creation failed, reverting...")
7640 _RemoveDisks(self, iobj)
7642 self.cfg.ReleaseDRBDMinors(instance)
7645 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7646 feedback_fn("* wiping instance disks...")
7648 _WipeDisks(self, iobj)
7649 except errors.OpExecError:
7650 self.LogWarning("Device wiping failed, reverting...")
7652 _RemoveDisks(self, iobj)
7654 self.cfg.ReleaseDRBDMinors(instance)
7657 feedback_fn("adding instance %s to cluster config" % instance)
7659 self.cfg.AddInstance(iobj, self.proc.GetECId())
7661 # Declare that we don't want to remove the instance lock anymore, as we've
7662 # added the instance to the config
7663 del self.remove_locks[locking.LEVEL_INSTANCE]
7664 # Unlock all the nodes
7665 if self.op.mode == constants.INSTANCE_IMPORT:
7666 nodes_keep = [self.op.src_node]
7667 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7668 if node != self.op.src_node]
7669 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7670 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7672 self.context.glm.release(locking.LEVEL_NODE)
7673 del self.acquired_locks[locking.LEVEL_NODE]
7675 if self.op.wait_for_sync:
7676 disk_abort = not _WaitForSync(self, iobj)
7677 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7678 # make sure the disks are not degraded (still sync-ing is ok)
7680 feedback_fn("* checking mirrors status")
7681 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7686 _RemoveDisks(self, iobj)
7687 self.cfg.RemoveInstance(iobj.name)
7688 # Make sure the instance lock gets removed
7689 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7690 raise errors.OpExecError("There are some degraded disks for"
7693 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7694 if self.op.mode == constants.INSTANCE_CREATE:
7695 if not self.op.no_install:
7696 feedback_fn("* running the instance OS create scripts...")
7697 # FIXME: pass debug option from opcode to backend
7698 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7699 self.op.debug_level)
7700 result.Raise("Could not add os for instance %s"
7701 " on node %s" % (instance, pnode_name))
7703 elif self.op.mode == constants.INSTANCE_IMPORT:
7704 feedback_fn("* running the instance OS import scripts...")
7708 for idx, image in enumerate(self.src_images):
7712 # FIXME: pass debug option from opcode to backend
7713 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7714 constants.IEIO_FILE, (image, ),
7715 constants.IEIO_SCRIPT,
7716 (iobj.disks[idx], idx),
7718 transfers.append(dt)
7721 masterd.instance.TransferInstanceData(self, feedback_fn,
7722 self.op.src_node, pnode_name,
7723 self.pnode.secondary_ip,
7725 if not compat.all(import_result):
7726 self.LogWarning("Some disks for instance %s on node %s were not"
7727 " imported successfully" % (instance, pnode_name))
7729 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7730 feedback_fn("* preparing remote import...")
7731 # The source cluster will stop the instance before attempting to make a
7732 # connection. In some cases stopping an instance can take a long time,
7733 # hence the shutdown timeout is added to the connection timeout.
7734 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7735 self.op.source_shutdown_timeout)
7736 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7738 assert iobj.primary_node == self.pnode.name
7740 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7741 self.source_x509_ca,
7742 self._cds, timeouts)
7743 if not compat.all(disk_results):
7744 # TODO: Should the instance still be started, even if some disks
7745 # failed to import (valid for local imports, too)?
7746 self.LogWarning("Some disks for instance %s on node %s were not"
7747 " imported successfully" % (instance, pnode_name))
7749 # Run rename script on newly imported instance
7750 assert iobj.name == instance
7751 feedback_fn("Running rename script for %s" % instance)
7752 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7753 self.source_instance_name,
7754 self.op.debug_level)
7756 self.LogWarning("Failed to run rename script for %s on node"
7757 " %s: %s" % (instance, pnode_name, result.fail_msg))
7760 # also checked in the prereq part
7761 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7765 iobj.admin_up = True
7766 self.cfg.Update(iobj, feedback_fn)
7767 logging.info("Starting instance %s on node %s", instance, pnode_name)
7768 feedback_fn("* starting instance...")
7769 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7770 result.Raise("Could not start instance")
7772 return list(iobj.all_nodes)
7775 class LUInstanceConsole(NoHooksLU):
7776 """Connect to an instance's console.
7778 This is somewhat special in that it returns the command line that
7779 you need to run on the master node in order to connect to the
7785 def ExpandNames(self):
7786 self._ExpandAndLockInstance()
7788 def CheckPrereq(self):
7789 """Check prerequisites.
7791 This checks that the instance is in the cluster.
7794 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7795 assert self.instance is not None, \
7796 "Cannot retrieve locked instance %s" % self.op.instance_name
7797 _CheckNodeOnline(self, self.instance.primary_node)
7799 def Exec(self, feedback_fn):
7800 """Connect to the console of an instance
7803 instance = self.instance
7804 node = instance.primary_node
7806 node_insts = self.rpc.call_instance_list([node],
7807 [instance.hypervisor])[node]
7808 node_insts.Raise("Can't get node information from %s" % node)
7810 if instance.name not in node_insts.payload:
7811 if instance.admin_up:
7812 state = "ERROR_down"
7814 state = "ADMIN_down"
7815 raise errors.OpExecError("Instance %s is not running (state %s)" %
7816 (instance.name, state))
7818 logging.debug("Connecting to console of %s on %s", instance.name, node)
7820 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7823 def _GetInstanceConsole(cluster, instance):
7824 """Returns console information for an instance.
7826 @type cluster: L{objects.Cluster}
7827 @type instance: L{objects.Instance}
7831 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7832 # beparams and hvparams are passed separately, to avoid editing the
7833 # instance and then saving the defaults in the instance itself.
7834 hvparams = cluster.FillHV(instance)
7835 beparams = cluster.FillBE(instance)
7836 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7838 assert console.instance == instance.name
7839 assert console.Validate()
7841 return console.ToDict()
7844 class LUInstanceReplaceDisks(LogicalUnit):
7845 """Replace the disks of an instance.
7848 HPATH = "mirrors-replace"
7849 HTYPE = constants.HTYPE_INSTANCE
7852 def CheckArguments(self):
7853 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7856 def ExpandNames(self):
7857 self._ExpandAndLockInstance()
7859 if self.op.iallocator is not None:
7860 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7862 elif self.op.remote_node is not None:
7863 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7864 self.op.remote_node = remote_node
7866 # Warning: do not remove the locking of the new secondary here
7867 # unless DRBD8.AddChildren is changed to work in parallel;
7868 # currently it doesn't since parallel invocations of
7869 # FindUnusedMinor will conflict
7870 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7871 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7874 self.needed_locks[locking.LEVEL_NODE] = []
7875 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7877 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7878 self.op.iallocator, self.op.remote_node,
7879 self.op.disks, False, self.op.early_release)
7881 self.tasklets = [self.replacer]
7883 def DeclareLocks(self, level):
7884 # If we're not already locking all nodes in the set we have to declare the
7885 # instance's primary/secondary nodes.
7886 if (level == locking.LEVEL_NODE and
7887 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7888 self._LockInstancesNodes()
7890 def BuildHooksEnv(self):
7893 This runs on the master, the primary and all the secondaries.
7896 instance = self.replacer.instance
7898 "MODE": self.op.mode,
7899 "NEW_SECONDARY": self.op.remote_node,
7900 "OLD_SECONDARY": instance.secondary_nodes[0],
7902 env.update(_BuildInstanceHookEnvByObject(self, instance))
7904 self.cfg.GetMasterNode(),
7905 instance.primary_node,
7907 if self.op.remote_node is not None:
7908 nl.append(self.op.remote_node)
7912 class TLReplaceDisks(Tasklet):
7913 """Replaces disks for an instance.
7915 Note: Locking is not within the scope of this class.
7918 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7919 disks, delay_iallocator, early_release):
7920 """Initializes this class.
7923 Tasklet.__init__(self, lu)
7926 self.instance_name = instance_name
7928 self.iallocator_name = iallocator_name
7929 self.remote_node = remote_node
7931 self.delay_iallocator = delay_iallocator
7932 self.early_release = early_release
7935 self.instance = None
7936 self.new_node = None
7937 self.target_node = None
7938 self.other_node = None
7939 self.remote_node_info = None
7940 self.node_secondary_ip = None
7943 def CheckArguments(mode, remote_node, iallocator):
7944 """Helper function for users of this class.
7947 # check for valid parameter combination
7948 if mode == constants.REPLACE_DISK_CHG:
7949 if remote_node is None and iallocator is None:
7950 raise errors.OpPrereqError("When changing the secondary either an"
7951 " iallocator script must be used or the"
7952 " new node given", errors.ECODE_INVAL)
7954 if remote_node is not None and iallocator is not None:
7955 raise errors.OpPrereqError("Give either the iallocator or the new"
7956 " secondary, not both", errors.ECODE_INVAL)
7958 elif remote_node is not None or iallocator is not None:
7959 # Not replacing the secondary
7960 raise errors.OpPrereqError("The iallocator and new node options can"
7961 " only be used when changing the"
7962 " secondary node", errors.ECODE_INVAL)
7965 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7966 """Compute a new secondary node using an IAllocator.
7969 ial = IAllocator(lu.cfg, lu.rpc,
7970 mode=constants.IALLOCATOR_MODE_RELOC,
7972 relocate_from=relocate_from)
7974 ial.Run(iallocator_name)
7977 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7978 " %s" % (iallocator_name, ial.info),
7981 if len(ial.result) != ial.required_nodes:
7982 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7983 " of nodes (%s), required %s" %
7985 len(ial.result), ial.required_nodes),
7988 remote_node_name = ial.result[0]
7990 lu.LogInfo("Selected new secondary for instance '%s': %s",
7991 instance_name, remote_node_name)
7993 return remote_node_name
7995 def _FindFaultyDisks(self, node_name):
7996 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7999 def CheckPrereq(self):
8000 """Check prerequisites.
8002 This checks that the instance is in the cluster.
8005 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8006 assert instance is not None, \
8007 "Cannot retrieve locked instance %s" % self.instance_name
8009 if instance.disk_template != constants.DT_DRBD8:
8010 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8011 " instances", errors.ECODE_INVAL)
8013 if len(instance.secondary_nodes) != 1:
8014 raise errors.OpPrereqError("The instance has a strange layout,"
8015 " expected one secondary but found %d" %
8016 len(instance.secondary_nodes),
8019 if not self.delay_iallocator:
8020 self._CheckPrereq2()
8022 def _CheckPrereq2(self):
8023 """Check prerequisites, second part.
8025 This function should always be part of CheckPrereq. It was separated and is
8026 now called from Exec because during node evacuation iallocator was only
8027 called with an unmodified cluster model, not taking planned changes into
8031 instance = self.instance
8032 secondary_node = instance.secondary_nodes[0]
8034 if self.iallocator_name is None:
8035 remote_node = self.remote_node
8037 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8038 instance.name, instance.secondary_nodes)
8040 if remote_node is not None:
8041 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8042 assert self.remote_node_info is not None, \
8043 "Cannot retrieve locked node %s" % remote_node
8045 self.remote_node_info = None
8047 if remote_node == self.instance.primary_node:
8048 raise errors.OpPrereqError("The specified node is the primary node of"
8049 " the instance.", errors.ECODE_INVAL)
8051 if remote_node == secondary_node:
8052 raise errors.OpPrereqError("The specified node is already the"
8053 " secondary node of the instance.",
8056 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8057 constants.REPLACE_DISK_CHG):
8058 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8061 if self.mode == constants.REPLACE_DISK_AUTO:
8062 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8063 faulty_secondary = self._FindFaultyDisks(secondary_node)
8065 if faulty_primary and faulty_secondary:
8066 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8067 " one node and can not be repaired"
8068 " automatically" % self.instance_name,
8072 self.disks = faulty_primary
8073 self.target_node = instance.primary_node
8074 self.other_node = secondary_node
8075 check_nodes = [self.target_node, self.other_node]
8076 elif faulty_secondary:
8077 self.disks = faulty_secondary
8078 self.target_node = secondary_node
8079 self.other_node = instance.primary_node
8080 check_nodes = [self.target_node, self.other_node]
8086 # Non-automatic modes
8087 if self.mode == constants.REPLACE_DISK_PRI:
8088 self.target_node = instance.primary_node
8089 self.other_node = secondary_node
8090 check_nodes = [self.target_node, self.other_node]
8092 elif self.mode == constants.REPLACE_DISK_SEC:
8093 self.target_node = secondary_node
8094 self.other_node = instance.primary_node
8095 check_nodes = [self.target_node, self.other_node]
8097 elif self.mode == constants.REPLACE_DISK_CHG:
8098 self.new_node = remote_node
8099 self.other_node = instance.primary_node
8100 self.target_node = secondary_node
8101 check_nodes = [self.new_node, self.other_node]
8103 _CheckNodeNotDrained(self.lu, remote_node)
8104 _CheckNodeVmCapable(self.lu, remote_node)
8106 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8107 assert old_node_info is not None
8108 if old_node_info.offline and not self.early_release:
8109 # doesn't make sense to delay the release
8110 self.early_release = True
8111 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8112 " early-release mode", secondary_node)
8115 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8118 # If not specified all disks should be replaced
8120 self.disks = range(len(self.instance.disks))
8122 for node in check_nodes:
8123 _CheckNodeOnline(self.lu, node)
8125 # Check whether disks are valid
8126 for disk_idx in self.disks:
8127 instance.FindDisk(disk_idx)
8129 # Get secondary node IP addresses
8132 for node_name in [self.target_node, self.other_node, self.new_node]:
8133 if node_name is not None:
8134 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8136 self.node_secondary_ip = node_2nd_ip
8138 def Exec(self, feedback_fn):
8139 """Execute disk replacement.
8141 This dispatches the disk replacement to the appropriate handler.
8144 if self.delay_iallocator:
8145 self._CheckPrereq2()
8148 feedback_fn("No disks need replacement")
8151 feedback_fn("Replacing disk(s) %s for %s" %
8152 (utils.CommaJoin(self.disks), self.instance.name))
8154 activate_disks = (not self.instance.admin_up)
8156 # Activate the instance disks if we're replacing them on a down instance
8158 _StartInstanceDisks(self.lu, self.instance, True)
8161 # Should we replace the secondary node?
8162 if self.new_node is not None:
8163 fn = self._ExecDrbd8Secondary
8165 fn = self._ExecDrbd8DiskOnly
8167 return fn(feedback_fn)
8170 # Deactivate the instance disks if we're replacing them on a
8173 _SafeShutdownInstanceDisks(self.lu, self.instance)
8175 def _CheckVolumeGroup(self, nodes):
8176 self.lu.LogInfo("Checking volume groups")
8178 vgname = self.cfg.GetVGName()
8180 # Make sure volume group exists on all involved nodes
8181 results = self.rpc.call_vg_list(nodes)
8183 raise errors.OpExecError("Can't list volume groups on the nodes")
8187 res.Raise("Error checking node %s" % node)
8188 if vgname not in res.payload:
8189 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8192 def _CheckDisksExistence(self, nodes):
8193 # Check disk existence
8194 for idx, dev in enumerate(self.instance.disks):
8195 if idx not in self.disks:
8199 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8200 self.cfg.SetDiskID(dev, node)
8202 result = self.rpc.call_blockdev_find(node, dev)
8204 msg = result.fail_msg
8205 if msg or not result.payload:
8207 msg = "disk not found"
8208 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8211 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8212 for idx, dev in enumerate(self.instance.disks):
8213 if idx not in self.disks:
8216 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8219 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8221 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8222 " replace disks for instance %s" %
8223 (node_name, self.instance.name))
8225 def _CreateNewStorage(self, node_name):
8226 vgname = self.cfg.GetVGName()
8229 for idx, dev in enumerate(self.instance.disks):
8230 if idx not in self.disks:
8233 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8235 self.cfg.SetDiskID(dev, node_name)
8237 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8238 names = _GenerateUniqueNames(self.lu, lv_names)
8240 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8241 logical_id=(vgname, names[0]))
8242 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8243 logical_id=(vgname, names[1]))
8245 new_lvs = [lv_data, lv_meta]
8246 old_lvs = dev.children
8247 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8249 # we pass force_create=True to force the LVM creation
8250 for new_lv in new_lvs:
8251 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8252 _GetInstanceInfoText(self.instance), False)
8256 def _CheckDevices(self, node_name, iv_names):
8257 for name, (dev, _, _) in iv_names.iteritems():
8258 self.cfg.SetDiskID(dev, node_name)
8260 result = self.rpc.call_blockdev_find(node_name, dev)
8262 msg = result.fail_msg
8263 if msg or not result.payload:
8265 msg = "disk not found"
8266 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8269 if result.payload.is_degraded:
8270 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8272 def _RemoveOldStorage(self, node_name, iv_names):
8273 for name, (_, old_lvs, _) in iv_names.iteritems():
8274 self.lu.LogInfo("Remove logical volumes for %s" % name)
8277 self.cfg.SetDiskID(lv, node_name)
8279 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8281 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8282 hint="remove unused LVs manually")
8284 def _ReleaseNodeLock(self, node_name):
8285 """Releases the lock for a given node."""
8286 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8288 def _ExecDrbd8DiskOnly(self, feedback_fn):
8289 """Replace a disk on the primary or secondary for DRBD 8.
8291 The algorithm for replace is quite complicated:
8293 1. for each disk to be replaced:
8295 1. create new LVs on the target node with unique names
8296 1. detach old LVs from the drbd device
8297 1. rename old LVs to name_replaced.<time_t>
8298 1. rename new LVs to old LVs
8299 1. attach the new LVs (with the old names now) to the drbd device
8301 1. wait for sync across all devices
8303 1. for each modified disk:
8305 1. remove old LVs (which have the name name_replaces.<time_t>)
8307 Failures are not very well handled.
8312 # Step: check device activation
8313 self.lu.LogStep(1, steps_total, "Check device existence")
8314 self._CheckDisksExistence([self.other_node, self.target_node])
8315 self._CheckVolumeGroup([self.target_node, self.other_node])
8317 # Step: check other node consistency
8318 self.lu.LogStep(2, steps_total, "Check peer consistency")
8319 self._CheckDisksConsistency(self.other_node,
8320 self.other_node == self.instance.primary_node,
8323 # Step: create new storage
8324 self.lu.LogStep(3, steps_total, "Allocate new storage")
8325 iv_names = self._CreateNewStorage(self.target_node)
8327 # Step: for each lv, detach+rename*2+attach
8328 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8329 for dev, old_lvs, new_lvs in iv_names.itervalues():
8330 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8332 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8334 result.Raise("Can't detach drbd from local storage on node"
8335 " %s for device %s" % (self.target_node, dev.iv_name))
8337 #cfg.Update(instance)
8339 # ok, we created the new LVs, so now we know we have the needed
8340 # storage; as such, we proceed on the target node to rename
8341 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8342 # using the assumption that logical_id == physical_id (which in
8343 # turn is the unique_id on that node)
8345 # FIXME(iustin): use a better name for the replaced LVs
8346 temp_suffix = int(time.time())
8347 ren_fn = lambda d, suff: (d.physical_id[0],
8348 d.physical_id[1] + "_replaced-%s" % suff)
8350 # Build the rename list based on what LVs exist on the node
8351 rename_old_to_new = []
8352 for to_ren in old_lvs:
8353 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8354 if not result.fail_msg and result.payload:
8356 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8358 self.lu.LogInfo("Renaming the old LVs on the target node")
8359 result = self.rpc.call_blockdev_rename(self.target_node,
8361 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8363 # Now we rename the new LVs to the old LVs
8364 self.lu.LogInfo("Renaming the new LVs on the target node")
8365 rename_new_to_old = [(new, old.physical_id)
8366 for old, new in zip(old_lvs, new_lvs)]
8367 result = self.rpc.call_blockdev_rename(self.target_node,
8369 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8371 for old, new in zip(old_lvs, new_lvs):
8372 new.logical_id = old.logical_id
8373 self.cfg.SetDiskID(new, self.target_node)
8375 for disk in old_lvs:
8376 disk.logical_id = ren_fn(disk, temp_suffix)
8377 self.cfg.SetDiskID(disk, self.target_node)
8379 # Now that the new lvs have the old name, we can add them to the device
8380 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8381 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8383 msg = result.fail_msg
8385 for new_lv in new_lvs:
8386 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8389 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8390 hint=("cleanup manually the unused logical"
8392 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8394 dev.children = new_lvs
8396 self.cfg.Update(self.instance, feedback_fn)
8399 if self.early_release:
8400 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8402 self._RemoveOldStorage(self.target_node, iv_names)
8403 # WARNING: we release both node locks here, do not do other RPCs
8404 # than WaitForSync to the primary node
8405 self._ReleaseNodeLock([self.target_node, self.other_node])
8408 # This can fail as the old devices are degraded and _WaitForSync
8409 # does a combined result over all disks, so we don't check its return value
8410 self.lu.LogStep(cstep, steps_total, "Sync devices")
8412 _WaitForSync(self.lu, self.instance)
8414 # Check all devices manually
8415 self._CheckDevices(self.instance.primary_node, iv_names)
8417 # Step: remove old storage
8418 if not self.early_release:
8419 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8421 self._RemoveOldStorage(self.target_node, iv_names)
8423 def _ExecDrbd8Secondary(self, feedback_fn):
8424 """Replace the secondary node for DRBD 8.
8426 The algorithm for replace is quite complicated:
8427 - for all disks of the instance:
8428 - create new LVs on the new node with same names
8429 - shutdown the drbd device on the old secondary
8430 - disconnect the drbd network on the primary
8431 - create the drbd device on the new secondary
8432 - network attach the drbd on the primary, using an artifice:
8433 the drbd code for Attach() will connect to the network if it
8434 finds a device which is connected to the good local disks but
8436 - wait for sync across all devices
8437 - remove all disks from the old secondary
8439 Failures are not very well handled.
8444 # Step: check device activation
8445 self.lu.LogStep(1, steps_total, "Check device existence")
8446 self._CheckDisksExistence([self.instance.primary_node])
8447 self._CheckVolumeGroup([self.instance.primary_node])
8449 # Step: check other node consistency
8450 self.lu.LogStep(2, steps_total, "Check peer consistency")
8451 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8453 # Step: create new storage
8454 self.lu.LogStep(3, steps_total, "Allocate new storage")
8455 for idx, dev in enumerate(self.instance.disks):
8456 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8457 (self.new_node, idx))
8458 # we pass force_create=True to force LVM creation
8459 for new_lv in dev.children:
8460 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8461 _GetInstanceInfoText(self.instance), False)
8463 # Step 4: dbrd minors and drbd setups changes
8464 # after this, we must manually remove the drbd minors on both the
8465 # error and the success paths
8466 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8467 minors = self.cfg.AllocateDRBDMinor([self.new_node
8468 for dev in self.instance.disks],
8470 logging.debug("Allocated minors %r", minors)
8473 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8474 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8475 (self.new_node, idx))
8476 # create new devices on new_node; note that we create two IDs:
8477 # one without port, so the drbd will be activated without
8478 # networking information on the new node at this stage, and one
8479 # with network, for the latter activation in step 4
8480 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8481 if self.instance.primary_node == o_node1:
8484 assert self.instance.primary_node == o_node2, "Three-node instance?"
8487 new_alone_id = (self.instance.primary_node, self.new_node, None,
8488 p_minor, new_minor, o_secret)
8489 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8490 p_minor, new_minor, o_secret)
8492 iv_names[idx] = (dev, dev.children, new_net_id)
8493 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8495 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8496 logical_id=new_alone_id,
8497 children=dev.children,
8500 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8501 _GetInstanceInfoText(self.instance), False)
8502 except errors.GenericError:
8503 self.cfg.ReleaseDRBDMinors(self.instance.name)
8506 # We have new devices, shutdown the drbd on the old secondary
8507 for idx, dev in enumerate(self.instance.disks):
8508 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8509 self.cfg.SetDiskID(dev, self.target_node)
8510 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8512 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8513 "node: %s" % (idx, msg),
8514 hint=("Please cleanup this device manually as"
8515 " soon as possible"))
8517 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8518 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8519 self.node_secondary_ip,
8520 self.instance.disks)\
8521 [self.instance.primary_node]
8523 msg = result.fail_msg
8525 # detaches didn't succeed (unlikely)
8526 self.cfg.ReleaseDRBDMinors(self.instance.name)
8527 raise errors.OpExecError("Can't detach the disks from the network on"
8528 " old node: %s" % (msg,))
8530 # if we managed to detach at least one, we update all the disks of
8531 # the instance to point to the new secondary
8532 self.lu.LogInfo("Updating instance configuration")
8533 for dev, _, new_logical_id in iv_names.itervalues():
8534 dev.logical_id = new_logical_id
8535 self.cfg.SetDiskID(dev, self.instance.primary_node)
8537 self.cfg.Update(self.instance, feedback_fn)
8539 # and now perform the drbd attach
8540 self.lu.LogInfo("Attaching primary drbds to new secondary"
8541 " (standalone => connected)")
8542 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8544 self.node_secondary_ip,
8545 self.instance.disks,
8548 for to_node, to_result in result.items():
8549 msg = to_result.fail_msg
8551 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8553 hint=("please do a gnt-instance info to see the"
8554 " status of disks"))
8556 if self.early_release:
8557 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8559 self._RemoveOldStorage(self.target_node, iv_names)
8560 # WARNING: we release all node locks here, do not do other RPCs
8561 # than WaitForSync to the primary node
8562 self._ReleaseNodeLock([self.instance.primary_node,
8567 # This can fail as the old devices are degraded and _WaitForSync
8568 # does a combined result over all disks, so we don't check its return value
8569 self.lu.LogStep(cstep, steps_total, "Sync devices")
8571 _WaitForSync(self.lu, self.instance)
8573 # Check all devices manually
8574 self._CheckDevices(self.instance.primary_node, iv_names)
8576 # Step: remove old storage
8577 if not self.early_release:
8578 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8579 self._RemoveOldStorage(self.target_node, iv_names)
8582 class LURepairNodeStorage(NoHooksLU):
8583 """Repairs the volume group on a node.
8588 def CheckArguments(self):
8589 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8591 storage_type = self.op.storage_type
8593 if (constants.SO_FIX_CONSISTENCY not in
8594 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8595 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8596 " repaired" % storage_type,
8599 def ExpandNames(self):
8600 self.needed_locks = {
8601 locking.LEVEL_NODE: [self.op.node_name],
8604 def _CheckFaultyDisks(self, instance, node_name):
8605 """Ensure faulty disks abort the opcode or at least warn."""
8607 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8609 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8610 " node '%s'" % (instance.name, node_name),
8612 except errors.OpPrereqError, err:
8613 if self.op.ignore_consistency:
8614 self.proc.LogWarning(str(err.args[0]))
8618 def CheckPrereq(self):
8619 """Check prerequisites.
8622 # Check whether any instance on this node has faulty disks
8623 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8624 if not inst.admin_up:
8626 check_nodes = set(inst.all_nodes)
8627 check_nodes.discard(self.op.node_name)
8628 for inst_node_name in check_nodes:
8629 self._CheckFaultyDisks(inst, inst_node_name)
8631 def Exec(self, feedback_fn):
8632 feedback_fn("Repairing storage unit '%s' on %s ..." %
8633 (self.op.name, self.op.node_name))
8635 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8636 result = self.rpc.call_storage_execute(self.op.node_name,
8637 self.op.storage_type, st_args,
8639 constants.SO_FIX_CONSISTENCY)
8640 result.Raise("Failed to repair storage unit '%s' on %s" %
8641 (self.op.name, self.op.node_name))
8644 class LUNodeEvacStrategy(NoHooksLU):
8645 """Computes the node evacuation strategy.
8650 def CheckArguments(self):
8651 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8653 def ExpandNames(self):
8654 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8655 self.needed_locks = locks = {}
8656 if self.op.remote_node is None:
8657 locks[locking.LEVEL_NODE] = locking.ALL_SET
8659 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8660 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8662 def Exec(self, feedback_fn):
8663 if self.op.remote_node is not None:
8665 for node in self.op.nodes:
8666 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8669 if i.primary_node == self.op.remote_node:
8670 raise errors.OpPrereqError("Node %s is the primary node of"
8671 " instance %s, cannot use it as"
8673 (self.op.remote_node, i.name),
8675 result.append([i.name, self.op.remote_node])
8677 ial = IAllocator(self.cfg, self.rpc,
8678 mode=constants.IALLOCATOR_MODE_MEVAC,
8679 evac_nodes=self.op.nodes)
8680 ial.Run(self.op.iallocator, validate=True)
8682 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8688 class LUInstanceGrowDisk(LogicalUnit):
8689 """Grow a disk of an instance.
8693 HTYPE = constants.HTYPE_INSTANCE
8696 def ExpandNames(self):
8697 self._ExpandAndLockInstance()
8698 self.needed_locks[locking.LEVEL_NODE] = []
8699 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8701 def DeclareLocks(self, level):
8702 if level == locking.LEVEL_NODE:
8703 self._LockInstancesNodes()
8705 def BuildHooksEnv(self):
8708 This runs on the master, the primary and all the secondaries.
8712 "DISK": self.op.disk,
8713 "AMOUNT": self.op.amount,
8715 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8716 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8719 def CheckPrereq(self):
8720 """Check prerequisites.
8722 This checks that the instance is in the cluster.
8725 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8726 assert instance is not None, \
8727 "Cannot retrieve locked instance %s" % self.op.instance_name
8728 nodenames = list(instance.all_nodes)
8729 for node in nodenames:
8730 _CheckNodeOnline(self, node)
8732 self.instance = instance
8734 if instance.disk_template not in constants.DTS_GROWABLE:
8735 raise errors.OpPrereqError("Instance's disk layout does not support"
8736 " growing.", errors.ECODE_INVAL)
8738 self.disk = instance.FindDisk(self.op.disk)
8740 if instance.disk_template != constants.DT_FILE:
8741 # TODO: check the free disk space for file, when that feature
8743 _CheckNodesFreeDiskPerVG(self, nodenames,
8744 self.disk.ComputeGrowth(self.op.amount))
8746 def Exec(self, feedback_fn):
8747 """Execute disk grow.
8750 instance = self.instance
8753 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8755 raise errors.OpExecError("Cannot activate block device to grow")
8757 for node in instance.all_nodes:
8758 self.cfg.SetDiskID(disk, node)
8759 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8760 result.Raise("Grow request failed to node %s" % node)
8762 # TODO: Rewrite code to work properly
8763 # DRBD goes into sync mode for a short amount of time after executing the
8764 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8765 # calling "resize" in sync mode fails. Sleeping for a short amount of
8766 # time is a work-around.
8769 disk.RecordGrow(self.op.amount)
8770 self.cfg.Update(instance, feedback_fn)
8771 if self.op.wait_for_sync:
8772 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8774 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8775 " status.\nPlease check the instance.")
8776 if not instance.admin_up:
8777 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8778 elif not instance.admin_up:
8779 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8780 " not supposed to be running because no wait for"
8781 " sync mode was requested.")
8784 class LUInstanceQueryData(NoHooksLU):
8785 """Query runtime instance data.
8790 def ExpandNames(self):
8791 self.needed_locks = {}
8792 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8794 if self.op.instances:
8795 self.wanted_names = []
8796 for name in self.op.instances:
8797 full_name = _ExpandInstanceName(self.cfg, name)
8798 self.wanted_names.append(full_name)
8799 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8801 self.wanted_names = None
8802 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8804 self.needed_locks[locking.LEVEL_NODE] = []
8805 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8807 def DeclareLocks(self, level):
8808 if level == locking.LEVEL_NODE:
8809 self._LockInstancesNodes()
8811 def CheckPrereq(self):
8812 """Check prerequisites.
8814 This only checks the optional instance list against the existing names.
8817 if self.wanted_names is None:
8818 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8820 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8821 in self.wanted_names]
8823 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8824 """Returns the status of a block device
8827 if self.op.static or not node:
8830 self.cfg.SetDiskID(dev, node)
8832 result = self.rpc.call_blockdev_find(node, dev)
8836 result.Raise("Can't compute disk status for %s" % instance_name)
8838 status = result.payload
8842 return (status.dev_path, status.major, status.minor,
8843 status.sync_percent, status.estimated_time,
8844 status.is_degraded, status.ldisk_status)
8846 def _ComputeDiskStatus(self, instance, snode, dev):
8847 """Compute block device status.
8850 if dev.dev_type in constants.LDS_DRBD:
8851 # we change the snode then (otherwise we use the one passed in)
8852 if dev.logical_id[0] == instance.primary_node:
8853 snode = dev.logical_id[1]
8855 snode = dev.logical_id[0]
8857 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8859 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8862 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8863 for child in dev.children]
8868 "iv_name": dev.iv_name,
8869 "dev_type": dev.dev_type,
8870 "logical_id": dev.logical_id,
8871 "physical_id": dev.physical_id,
8872 "pstatus": dev_pstatus,
8873 "sstatus": dev_sstatus,
8874 "children": dev_children,
8881 def Exec(self, feedback_fn):
8882 """Gather and return data"""
8885 cluster = self.cfg.GetClusterInfo()
8887 for instance in self.wanted_instances:
8888 if not self.op.static:
8889 remote_info = self.rpc.call_instance_info(instance.primary_node,
8891 instance.hypervisor)
8892 remote_info.Raise("Error checking node %s" % instance.primary_node)
8893 remote_info = remote_info.payload
8894 if remote_info and "state" in remote_info:
8897 remote_state = "down"
8900 if instance.admin_up:
8903 config_state = "down"
8905 disks = [self._ComputeDiskStatus(instance, None, device)
8906 for device in instance.disks]
8909 "name": instance.name,
8910 "config_state": config_state,
8911 "run_state": remote_state,
8912 "pnode": instance.primary_node,
8913 "snodes": instance.secondary_nodes,
8915 # this happens to be the same format used for hooks
8916 "nics": _NICListToTuple(self, instance.nics),
8917 "disk_template": instance.disk_template,
8919 "hypervisor": instance.hypervisor,
8920 "network_port": instance.network_port,
8921 "hv_instance": instance.hvparams,
8922 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8923 "be_instance": instance.beparams,
8924 "be_actual": cluster.FillBE(instance),
8925 "os_instance": instance.osparams,
8926 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8927 "serial_no": instance.serial_no,
8928 "mtime": instance.mtime,
8929 "ctime": instance.ctime,
8930 "uuid": instance.uuid,
8933 result[instance.name] = idict
8938 class LUInstanceSetParams(LogicalUnit):
8939 """Modifies an instances's parameters.
8942 HPATH = "instance-modify"
8943 HTYPE = constants.HTYPE_INSTANCE
8946 def CheckArguments(self):
8947 if not (self.op.nics or self.op.disks or self.op.disk_template or
8948 self.op.hvparams or self.op.beparams or self.op.os_name):
8949 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8951 if self.op.hvparams:
8952 _CheckGlobalHvParams(self.op.hvparams)
8956 for disk_op, disk_dict in self.op.disks:
8957 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8958 if disk_op == constants.DDM_REMOVE:
8961 elif disk_op == constants.DDM_ADD:
8964 if not isinstance(disk_op, int):
8965 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8966 if not isinstance(disk_dict, dict):
8967 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8968 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8970 if disk_op == constants.DDM_ADD:
8971 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8972 if mode not in constants.DISK_ACCESS_SET:
8973 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8975 size = disk_dict.get('size', None)
8977 raise errors.OpPrereqError("Required disk parameter size missing",
8981 except (TypeError, ValueError), err:
8982 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8983 str(err), errors.ECODE_INVAL)
8984 disk_dict['size'] = size
8986 # modification of disk
8987 if 'size' in disk_dict:
8988 raise errors.OpPrereqError("Disk size change not possible, use"
8989 " grow-disk", errors.ECODE_INVAL)
8991 if disk_addremove > 1:
8992 raise errors.OpPrereqError("Only one disk add or remove operation"
8993 " supported at a time", errors.ECODE_INVAL)
8995 if self.op.disks and self.op.disk_template is not None:
8996 raise errors.OpPrereqError("Disk template conversion and other disk"
8997 " changes not supported at the same time",
9000 if (self.op.disk_template and
9001 self.op.disk_template in constants.DTS_NET_MIRROR and
9002 self.op.remote_node is None):
9003 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9004 " one requires specifying a secondary node",
9009 for nic_op, nic_dict in self.op.nics:
9010 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9011 if nic_op == constants.DDM_REMOVE:
9014 elif nic_op == constants.DDM_ADD:
9017 if not isinstance(nic_op, int):
9018 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9019 if not isinstance(nic_dict, dict):
9020 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9021 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9023 # nic_dict should be a dict
9024 nic_ip = nic_dict.get('ip', None)
9025 if nic_ip is not None:
9026 if nic_ip.lower() == constants.VALUE_NONE:
9027 nic_dict['ip'] = None
9029 if not netutils.IPAddress.IsValid(nic_ip):
9030 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9033 nic_bridge = nic_dict.get('bridge', None)
9034 nic_link = nic_dict.get('link', None)
9035 if nic_bridge and nic_link:
9036 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9037 " at the same time", errors.ECODE_INVAL)
9038 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9039 nic_dict['bridge'] = None
9040 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9041 nic_dict['link'] = None
9043 if nic_op == constants.DDM_ADD:
9044 nic_mac = nic_dict.get('mac', None)
9046 nic_dict['mac'] = constants.VALUE_AUTO
9048 if 'mac' in nic_dict:
9049 nic_mac = nic_dict['mac']
9050 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9051 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9053 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9054 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9055 " modifying an existing nic",
9058 if nic_addremove > 1:
9059 raise errors.OpPrereqError("Only one NIC add or remove operation"
9060 " supported at a time", errors.ECODE_INVAL)
9062 def ExpandNames(self):
9063 self._ExpandAndLockInstance()
9064 self.needed_locks[locking.LEVEL_NODE] = []
9065 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9067 def DeclareLocks(self, level):
9068 if level == locking.LEVEL_NODE:
9069 self._LockInstancesNodes()
9070 if self.op.disk_template and self.op.remote_node:
9071 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9072 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9074 def BuildHooksEnv(self):
9077 This runs on the master, primary and secondaries.
9081 if constants.BE_MEMORY in self.be_new:
9082 args['memory'] = self.be_new[constants.BE_MEMORY]
9083 if constants.BE_VCPUS in self.be_new:
9084 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9085 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9086 # information at all.
9089 nic_override = dict(self.op.nics)
9090 for idx, nic in enumerate(self.instance.nics):
9091 if idx in nic_override:
9092 this_nic_override = nic_override[idx]
9094 this_nic_override = {}
9095 if 'ip' in this_nic_override:
9096 ip = this_nic_override['ip']
9099 if 'mac' in this_nic_override:
9100 mac = this_nic_override['mac']
9103 if idx in self.nic_pnew:
9104 nicparams = self.nic_pnew[idx]
9106 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9107 mode = nicparams[constants.NIC_MODE]
9108 link = nicparams[constants.NIC_LINK]
9109 args['nics'].append((ip, mac, mode, link))
9110 if constants.DDM_ADD in nic_override:
9111 ip = nic_override[constants.DDM_ADD].get('ip', None)
9112 mac = nic_override[constants.DDM_ADD]['mac']
9113 nicparams = self.nic_pnew[constants.DDM_ADD]
9114 mode = nicparams[constants.NIC_MODE]
9115 link = nicparams[constants.NIC_LINK]
9116 args['nics'].append((ip, mac, mode, link))
9117 elif constants.DDM_REMOVE in nic_override:
9118 del args['nics'][-1]
9120 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9121 if self.op.disk_template:
9122 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9123 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9126 def CheckPrereq(self):
9127 """Check prerequisites.
9129 This only checks the instance list against the existing names.
9132 # checking the new params on the primary/secondary nodes
9134 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9135 cluster = self.cluster = self.cfg.GetClusterInfo()
9136 assert self.instance is not None, \
9137 "Cannot retrieve locked instance %s" % self.op.instance_name
9138 pnode = instance.primary_node
9139 nodelist = list(instance.all_nodes)
9142 if self.op.os_name and not self.op.force:
9143 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9144 self.op.force_variant)
9145 instance_os = self.op.os_name
9147 instance_os = instance.os
9149 if self.op.disk_template:
9150 if instance.disk_template == self.op.disk_template:
9151 raise errors.OpPrereqError("Instance already has disk template %s" %
9152 instance.disk_template, errors.ECODE_INVAL)
9154 if (instance.disk_template,
9155 self.op.disk_template) not in self._DISK_CONVERSIONS:
9156 raise errors.OpPrereqError("Unsupported disk template conversion from"
9157 " %s to %s" % (instance.disk_template,
9158 self.op.disk_template),
9160 _CheckInstanceDown(self, instance, "cannot change disk template")
9161 if self.op.disk_template in constants.DTS_NET_MIRROR:
9162 if self.op.remote_node == pnode:
9163 raise errors.OpPrereqError("Given new secondary node %s is the same"
9164 " as the primary node of the instance" %
9165 self.op.remote_node, errors.ECODE_STATE)
9166 _CheckNodeOnline(self, self.op.remote_node)
9167 _CheckNodeNotDrained(self, self.op.remote_node)
9168 # FIXME: here we assume that the old instance type is DT_PLAIN
9169 assert instance.disk_template == constants.DT_PLAIN
9170 disks = [{"size": d.size, "vg": d.logical_id[0]}
9171 for d in instance.disks]
9172 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9173 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9175 # hvparams processing
9176 if self.op.hvparams:
9177 hv_type = instance.hypervisor
9178 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9179 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9180 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9183 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9184 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9185 self.hv_new = hv_new # the new actual values
9186 self.hv_inst = i_hvdict # the new dict (without defaults)
9188 self.hv_new = self.hv_inst = {}
9190 # beparams processing
9191 if self.op.beparams:
9192 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9194 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9195 be_new = cluster.SimpleFillBE(i_bedict)
9196 self.be_new = be_new # the new actual values
9197 self.be_inst = i_bedict # the new dict (without defaults)
9199 self.be_new = self.be_inst = {}
9201 # osparams processing
9202 if self.op.osparams:
9203 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9204 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9205 self.os_inst = i_osdict # the new dict (without defaults)
9211 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9212 mem_check_list = [pnode]
9213 if be_new[constants.BE_AUTO_BALANCE]:
9214 # either we changed auto_balance to yes or it was from before
9215 mem_check_list.extend(instance.secondary_nodes)
9216 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9217 instance.hypervisor)
9218 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9219 instance.hypervisor)
9220 pninfo = nodeinfo[pnode]
9221 msg = pninfo.fail_msg
9223 # Assume the primary node is unreachable and go ahead
9224 self.warn.append("Can't get info from primary node %s: %s" %
9226 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9227 self.warn.append("Node data from primary node %s doesn't contain"
9228 " free memory information" % pnode)
9229 elif instance_info.fail_msg:
9230 self.warn.append("Can't get instance runtime information: %s" %
9231 instance_info.fail_msg)
9233 if instance_info.payload:
9234 current_mem = int(instance_info.payload['memory'])
9236 # Assume instance not running
9237 # (there is a slight race condition here, but it's not very probable,
9238 # and we have no other way to check)
9240 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9241 pninfo.payload['memory_free'])
9243 raise errors.OpPrereqError("This change will prevent the instance"
9244 " from starting, due to %d MB of memory"
9245 " missing on its primary node" % miss_mem,
9248 if be_new[constants.BE_AUTO_BALANCE]:
9249 for node, nres in nodeinfo.items():
9250 if node not in instance.secondary_nodes:
9254 self.warn.append("Can't get info from secondary node %s: %s" %
9256 elif not isinstance(nres.payload.get('memory_free', None), int):
9257 self.warn.append("Secondary node %s didn't return free"
9258 " memory information" % node)
9259 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9260 self.warn.append("Not enough memory to failover instance to"
9261 " secondary node %s" % node)
9266 for nic_op, nic_dict in self.op.nics:
9267 if nic_op == constants.DDM_REMOVE:
9268 if not instance.nics:
9269 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9272 if nic_op != constants.DDM_ADD:
9274 if not instance.nics:
9275 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9276 " no NICs" % nic_op,
9278 if nic_op < 0 or nic_op >= len(instance.nics):
9279 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9281 (nic_op, len(instance.nics) - 1),
9283 old_nic_params = instance.nics[nic_op].nicparams
9284 old_nic_ip = instance.nics[nic_op].ip
9289 update_params_dict = dict([(key, nic_dict[key])
9290 for key in constants.NICS_PARAMETERS
9291 if key in nic_dict])
9293 if 'bridge' in nic_dict:
9294 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9296 new_nic_params = _GetUpdatedParams(old_nic_params,
9298 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9299 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9300 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9301 self.nic_pinst[nic_op] = new_nic_params
9302 self.nic_pnew[nic_op] = new_filled_nic_params
9303 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9305 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9306 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9307 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9309 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9311 self.warn.append(msg)
9313 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9314 if new_nic_mode == constants.NIC_MODE_ROUTED:
9315 if 'ip' in nic_dict:
9316 nic_ip = nic_dict['ip']
9320 raise errors.OpPrereqError('Cannot set the nic ip to None'
9321 ' on a routed nic', errors.ECODE_INVAL)
9322 if 'mac' in nic_dict:
9323 nic_mac = nic_dict['mac']
9325 raise errors.OpPrereqError('Cannot set the nic mac to None',
9327 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9328 # otherwise generate the mac
9329 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9331 # or validate/reserve the current one
9333 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9334 except errors.ReservationError:
9335 raise errors.OpPrereqError("MAC address %s already in use"
9336 " in cluster" % nic_mac,
9337 errors.ECODE_NOTUNIQUE)
9340 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9341 raise errors.OpPrereqError("Disk operations not supported for"
9342 " diskless instances",
9344 for disk_op, _ in self.op.disks:
9345 if disk_op == constants.DDM_REMOVE:
9346 if len(instance.disks) == 1:
9347 raise errors.OpPrereqError("Cannot remove the last disk of"
9348 " an instance", errors.ECODE_INVAL)
9349 _CheckInstanceDown(self, instance, "cannot remove disks")
9351 if (disk_op == constants.DDM_ADD and
9352 len(instance.disks) >= constants.MAX_DISKS):
9353 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9354 " add more" % constants.MAX_DISKS,
9356 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9358 if disk_op < 0 or disk_op >= len(instance.disks):
9359 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9361 (disk_op, len(instance.disks)),
9366 def _ConvertPlainToDrbd(self, feedback_fn):
9367 """Converts an instance from plain to drbd.
9370 feedback_fn("Converting template to drbd")
9371 instance = self.instance
9372 pnode = instance.primary_node
9373 snode = self.op.remote_node
9375 # create a fake disk info for _GenerateDiskTemplate
9376 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9377 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9378 instance.name, pnode, [snode],
9379 disk_info, None, None, 0, feedback_fn)
9380 info = _GetInstanceInfoText(instance)
9381 feedback_fn("Creating aditional volumes...")
9382 # first, create the missing data and meta devices
9383 for disk in new_disks:
9384 # unfortunately this is... not too nice
9385 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9387 for child in disk.children:
9388 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9389 # at this stage, all new LVs have been created, we can rename the
9391 feedback_fn("Renaming original volumes...")
9392 rename_list = [(o, n.children[0].logical_id)
9393 for (o, n) in zip(instance.disks, new_disks)]
9394 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9395 result.Raise("Failed to rename original LVs")
9397 feedback_fn("Initializing DRBD devices...")
9398 # all child devices are in place, we can now create the DRBD devices
9399 for disk in new_disks:
9400 for node in [pnode, snode]:
9401 f_create = node == pnode
9402 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9404 # at this point, the instance has been modified
9405 instance.disk_template = constants.DT_DRBD8
9406 instance.disks = new_disks
9407 self.cfg.Update(instance, feedback_fn)
9409 # disks are created, waiting for sync
9410 disk_abort = not _WaitForSync(self, instance)
9412 raise errors.OpExecError("There are some degraded disks for"
9413 " this instance, please cleanup manually")
9415 def _ConvertDrbdToPlain(self, feedback_fn):
9416 """Converts an instance from drbd to plain.
9419 instance = self.instance
9420 assert len(instance.secondary_nodes) == 1
9421 pnode = instance.primary_node
9422 snode = instance.secondary_nodes[0]
9423 feedback_fn("Converting template to plain")
9425 old_disks = instance.disks
9426 new_disks = [d.children[0] for d in old_disks]
9428 # copy over size and mode
9429 for parent, child in zip(old_disks, new_disks):
9430 child.size = parent.size
9431 child.mode = parent.mode
9433 # update instance structure
9434 instance.disks = new_disks
9435 instance.disk_template = constants.DT_PLAIN
9436 self.cfg.Update(instance, feedback_fn)
9438 feedback_fn("Removing volumes on the secondary node...")
9439 for disk in old_disks:
9440 self.cfg.SetDiskID(disk, snode)
9441 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9443 self.LogWarning("Could not remove block device %s on node %s,"
9444 " continuing anyway: %s", disk.iv_name, snode, msg)
9446 feedback_fn("Removing unneeded volumes on the primary node...")
9447 for idx, disk in enumerate(old_disks):
9448 meta = disk.children[1]
9449 self.cfg.SetDiskID(meta, pnode)
9450 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9452 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9453 " continuing anyway: %s", idx, pnode, msg)
9455 def Exec(self, feedback_fn):
9456 """Modifies an instance.
9458 All parameters take effect only at the next restart of the instance.
9461 # Process here the warnings from CheckPrereq, as we don't have a
9462 # feedback_fn there.
9463 for warn in self.warn:
9464 feedback_fn("WARNING: %s" % warn)
9467 instance = self.instance
9469 for disk_op, disk_dict in self.op.disks:
9470 if disk_op == constants.DDM_REMOVE:
9471 # remove the last disk
9472 device = instance.disks.pop()
9473 device_idx = len(instance.disks)
9474 for node, disk in device.ComputeNodeTree(instance.primary_node):
9475 self.cfg.SetDiskID(disk, node)
9476 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9478 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9479 " continuing anyway", device_idx, node, msg)
9480 result.append(("disk/%d" % device_idx, "remove"))
9481 elif disk_op == constants.DDM_ADD:
9483 if instance.disk_template == constants.DT_FILE:
9484 file_driver, file_path = instance.disks[0].logical_id
9485 file_path = os.path.dirname(file_path)
9487 file_driver = file_path = None
9488 disk_idx_base = len(instance.disks)
9489 new_disk = _GenerateDiskTemplate(self,
9490 instance.disk_template,
9491 instance.name, instance.primary_node,
9492 instance.secondary_nodes,
9496 disk_idx_base, feedback_fn)[0]
9497 instance.disks.append(new_disk)
9498 info = _GetInstanceInfoText(instance)
9500 logging.info("Creating volume %s for instance %s",
9501 new_disk.iv_name, instance.name)
9502 # Note: this needs to be kept in sync with _CreateDisks
9504 for node in instance.all_nodes:
9505 f_create = node == instance.primary_node
9507 _CreateBlockDev(self, node, instance, new_disk,
9508 f_create, info, f_create)
9509 except errors.OpExecError, err:
9510 self.LogWarning("Failed to create volume %s (%s) on"
9512 new_disk.iv_name, new_disk, node, err)
9513 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9514 (new_disk.size, new_disk.mode)))
9516 # change a given disk
9517 instance.disks[disk_op].mode = disk_dict['mode']
9518 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9520 if self.op.disk_template:
9521 r_shut = _ShutdownInstanceDisks(self, instance)
9523 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9524 " proceed with disk template conversion")
9525 mode = (instance.disk_template, self.op.disk_template)
9527 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9529 self.cfg.ReleaseDRBDMinors(instance.name)
9531 result.append(("disk_template", self.op.disk_template))
9534 for nic_op, nic_dict in self.op.nics:
9535 if nic_op == constants.DDM_REMOVE:
9536 # remove the last nic
9537 del instance.nics[-1]
9538 result.append(("nic.%d" % len(instance.nics), "remove"))
9539 elif nic_op == constants.DDM_ADD:
9540 # mac and bridge should be set, by now
9541 mac = nic_dict['mac']
9542 ip = nic_dict.get('ip', None)
9543 nicparams = self.nic_pinst[constants.DDM_ADD]
9544 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9545 instance.nics.append(new_nic)
9546 result.append(("nic.%d" % (len(instance.nics) - 1),
9547 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9548 (new_nic.mac, new_nic.ip,
9549 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9550 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9553 for key in 'mac', 'ip':
9555 setattr(instance.nics[nic_op], key, nic_dict[key])
9556 if nic_op in self.nic_pinst:
9557 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9558 for key, val in nic_dict.iteritems():
9559 result.append(("nic.%s/%d" % (key, nic_op), val))
9562 if self.op.hvparams:
9563 instance.hvparams = self.hv_inst
9564 for key, val in self.op.hvparams.iteritems():
9565 result.append(("hv/%s" % key, val))
9568 if self.op.beparams:
9569 instance.beparams = self.be_inst
9570 for key, val in self.op.beparams.iteritems():
9571 result.append(("be/%s" % key, val))
9575 instance.os = self.op.os_name
9578 if self.op.osparams:
9579 instance.osparams = self.os_inst
9580 for key, val in self.op.osparams.iteritems():
9581 result.append(("os/%s" % key, val))
9583 self.cfg.Update(instance, feedback_fn)
9587 _DISK_CONVERSIONS = {
9588 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9589 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9593 class LUBackupQuery(NoHooksLU):
9594 """Query the exports list
9599 def ExpandNames(self):
9600 self.needed_locks = {}
9601 self.share_locks[locking.LEVEL_NODE] = 1
9602 if not self.op.nodes:
9603 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9605 self.needed_locks[locking.LEVEL_NODE] = \
9606 _GetWantedNodes(self, self.op.nodes)
9608 def Exec(self, feedback_fn):
9609 """Compute the list of all the exported system images.
9612 @return: a dictionary with the structure node->(export-list)
9613 where export-list is a list of the instances exported on
9617 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9618 rpcresult = self.rpc.call_export_list(self.nodes)
9620 for node in rpcresult:
9621 if rpcresult[node].fail_msg:
9622 result[node] = False
9624 result[node] = rpcresult[node].payload
9629 class LUBackupPrepare(NoHooksLU):
9630 """Prepares an instance for an export and returns useful information.
9635 def ExpandNames(self):
9636 self._ExpandAndLockInstance()
9638 def CheckPrereq(self):
9639 """Check prerequisites.
9642 instance_name = self.op.instance_name
9644 self.instance = self.cfg.GetInstanceInfo(instance_name)
9645 assert self.instance is not None, \
9646 "Cannot retrieve locked instance %s" % self.op.instance_name
9647 _CheckNodeOnline(self, self.instance.primary_node)
9649 self._cds = _GetClusterDomainSecret()
9651 def Exec(self, feedback_fn):
9652 """Prepares an instance for an export.
9655 instance = self.instance
9657 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9658 salt = utils.GenerateSecret(8)
9660 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9661 result = self.rpc.call_x509_cert_create(instance.primary_node,
9662 constants.RIE_CERT_VALIDITY)
9663 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9665 (name, cert_pem) = result.payload
9667 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9671 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9672 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9674 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9680 class LUBackupExport(LogicalUnit):
9681 """Export an instance to an image in the cluster.
9684 HPATH = "instance-export"
9685 HTYPE = constants.HTYPE_INSTANCE
9688 def CheckArguments(self):
9689 """Check the arguments.
9692 self.x509_key_name = self.op.x509_key_name
9693 self.dest_x509_ca_pem = self.op.destination_x509_ca
9695 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9696 if not self.x509_key_name:
9697 raise errors.OpPrereqError("Missing X509 key name for encryption",
9700 if not self.dest_x509_ca_pem:
9701 raise errors.OpPrereqError("Missing destination X509 CA",
9704 def ExpandNames(self):
9705 self._ExpandAndLockInstance()
9707 # Lock all nodes for local exports
9708 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9709 # FIXME: lock only instance primary and destination node
9711 # Sad but true, for now we have do lock all nodes, as we don't know where
9712 # the previous export might be, and in this LU we search for it and
9713 # remove it from its current node. In the future we could fix this by:
9714 # - making a tasklet to search (share-lock all), then create the
9715 # new one, then one to remove, after
9716 # - removing the removal operation altogether
9717 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9719 def DeclareLocks(self, level):
9720 """Last minute lock declaration."""
9721 # All nodes are locked anyway, so nothing to do here.
9723 def BuildHooksEnv(self):
9726 This will run on the master, primary node and target node.
9730 "EXPORT_MODE": self.op.mode,
9731 "EXPORT_NODE": self.op.target_node,
9732 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9733 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9734 # TODO: Generic function for boolean env variables
9735 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9738 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9740 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9742 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9743 nl.append(self.op.target_node)
9747 def CheckPrereq(self):
9748 """Check prerequisites.
9750 This checks that the instance and node names are valid.
9753 instance_name = self.op.instance_name
9755 self.instance = self.cfg.GetInstanceInfo(instance_name)
9756 assert self.instance is not None, \
9757 "Cannot retrieve locked instance %s" % self.op.instance_name
9758 _CheckNodeOnline(self, self.instance.primary_node)
9760 if (self.op.remove_instance and self.instance.admin_up and
9761 not self.op.shutdown):
9762 raise errors.OpPrereqError("Can not remove instance without shutting it"
9765 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9766 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9767 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9768 assert self.dst_node is not None
9770 _CheckNodeOnline(self, self.dst_node.name)
9771 _CheckNodeNotDrained(self, self.dst_node.name)
9774 self.dest_disk_info = None
9775 self.dest_x509_ca = None
9777 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9778 self.dst_node = None
9780 if len(self.op.target_node) != len(self.instance.disks):
9781 raise errors.OpPrereqError(("Received destination information for %s"
9782 " disks, but instance %s has %s disks") %
9783 (len(self.op.target_node), instance_name,
9784 len(self.instance.disks)),
9787 cds = _GetClusterDomainSecret()
9789 # Check X509 key name
9791 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9792 except (TypeError, ValueError), err:
9793 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9795 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9796 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9799 # Load and verify CA
9801 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9802 except OpenSSL.crypto.Error, err:
9803 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9804 (err, ), errors.ECODE_INVAL)
9806 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9807 if errcode is not None:
9808 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9809 (msg, ), errors.ECODE_INVAL)
9811 self.dest_x509_ca = cert
9813 # Verify target information
9815 for idx, disk_data in enumerate(self.op.target_node):
9817 (host, port, magic) = \
9818 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9819 except errors.GenericError, err:
9820 raise errors.OpPrereqError("Target info for disk %s: %s" %
9821 (idx, err), errors.ECODE_INVAL)
9823 disk_info.append((host, port, magic))
9825 assert len(disk_info) == len(self.op.target_node)
9826 self.dest_disk_info = disk_info
9829 raise errors.ProgrammerError("Unhandled export mode %r" %
9832 # instance disk type verification
9833 # TODO: Implement export support for file-based disks
9834 for disk in self.instance.disks:
9835 if disk.dev_type == constants.LD_FILE:
9836 raise errors.OpPrereqError("Export not supported for instances with"
9837 " file-based disks", errors.ECODE_INVAL)
9839 def _CleanupExports(self, feedback_fn):
9840 """Removes exports of current instance from all other nodes.
9842 If an instance in a cluster with nodes A..D was exported to node C, its
9843 exports will be removed from the nodes A, B and D.
9846 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9848 nodelist = self.cfg.GetNodeList()
9849 nodelist.remove(self.dst_node.name)
9851 # on one-node clusters nodelist will be empty after the removal
9852 # if we proceed the backup would be removed because OpBackupQuery
9853 # substitutes an empty list with the full cluster node list.
9854 iname = self.instance.name
9856 feedback_fn("Removing old exports for instance %s" % iname)
9857 exportlist = self.rpc.call_export_list(nodelist)
9858 for node in exportlist:
9859 if exportlist[node].fail_msg:
9861 if iname in exportlist[node].payload:
9862 msg = self.rpc.call_export_remove(node, iname).fail_msg
9864 self.LogWarning("Could not remove older export for instance %s"
9865 " on node %s: %s", iname, node, msg)
9867 def Exec(self, feedback_fn):
9868 """Export an instance to an image in the cluster.
9871 assert self.op.mode in constants.EXPORT_MODES
9873 instance = self.instance
9874 src_node = instance.primary_node
9876 if self.op.shutdown:
9877 # shutdown the instance, but not the disks
9878 feedback_fn("Shutting down instance %s" % instance.name)
9879 result = self.rpc.call_instance_shutdown(src_node, instance,
9880 self.op.shutdown_timeout)
9881 # TODO: Maybe ignore failures if ignore_remove_failures is set
9882 result.Raise("Could not shutdown instance %s on"
9883 " node %s" % (instance.name, src_node))
9885 # set the disks ID correctly since call_instance_start needs the
9886 # correct drbd minor to create the symlinks
9887 for disk in instance.disks:
9888 self.cfg.SetDiskID(disk, src_node)
9890 activate_disks = (not instance.admin_up)
9893 # Activate the instance disks if we'exporting a stopped instance
9894 feedback_fn("Activating disks for %s" % instance.name)
9895 _StartInstanceDisks(self, instance, None)
9898 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9901 helper.CreateSnapshots()
9903 if (self.op.shutdown and instance.admin_up and
9904 not self.op.remove_instance):
9905 assert not activate_disks
9906 feedback_fn("Starting instance %s" % instance.name)
9907 result = self.rpc.call_instance_start(src_node, instance, None, None)
9908 msg = result.fail_msg
9910 feedback_fn("Failed to start instance: %s" % msg)
9911 _ShutdownInstanceDisks(self, instance)
9912 raise errors.OpExecError("Could not start instance: %s" % msg)
9914 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9915 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9916 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9917 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9918 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9920 (key_name, _, _) = self.x509_key_name
9923 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9926 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9927 key_name, dest_ca_pem,
9932 # Check for backwards compatibility
9933 assert len(dresults) == len(instance.disks)
9934 assert compat.all(isinstance(i, bool) for i in dresults), \
9935 "Not all results are boolean: %r" % dresults
9939 feedback_fn("Deactivating disks for %s" % instance.name)
9940 _ShutdownInstanceDisks(self, instance)
9942 if not (compat.all(dresults) and fin_resu):
9945 failures.append("export finalization")
9946 if not compat.all(dresults):
9947 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9949 failures.append("disk export: disk(s) %s" % fdsk)
9951 raise errors.OpExecError("Export failed, errors in %s" %
9952 utils.CommaJoin(failures))
9954 # At this point, the export was successful, we can cleanup/finish
9956 # Remove instance if requested
9957 if self.op.remove_instance:
9958 feedback_fn("Removing instance %s" % instance.name)
9959 _RemoveInstance(self, feedback_fn, instance,
9960 self.op.ignore_remove_failures)
9962 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9963 self._CleanupExports(feedback_fn)
9965 return fin_resu, dresults
9968 class LUBackupRemove(NoHooksLU):
9969 """Remove exports related to the named instance.
9974 def ExpandNames(self):
9975 self.needed_locks = {}
9976 # We need all nodes to be locked in order for RemoveExport to work, but we
9977 # don't need to lock the instance itself, as nothing will happen to it (and
9978 # we can remove exports also for a removed instance)
9979 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9981 def Exec(self, feedback_fn):
9982 """Remove any export.
9985 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9986 # If the instance was not found we'll try with the name that was passed in.
9987 # This will only work if it was an FQDN, though.
9989 if not instance_name:
9991 instance_name = self.op.instance_name
9993 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9994 exportlist = self.rpc.call_export_list(locked_nodes)
9996 for node in exportlist:
9997 msg = exportlist[node].fail_msg
9999 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10001 if instance_name in exportlist[node].payload:
10003 result = self.rpc.call_export_remove(node, instance_name)
10004 msg = result.fail_msg
10006 logging.error("Could not remove export for instance %s"
10007 " on node %s: %s", instance_name, node, msg)
10009 if fqdn_warn and not found:
10010 feedback_fn("Export not found. If trying to remove an export belonging"
10011 " to a deleted instance please use its Fully Qualified"
10015 class LUGroupAdd(LogicalUnit):
10016 """Logical unit for creating node groups.
10019 HPATH = "group-add"
10020 HTYPE = constants.HTYPE_GROUP
10023 def ExpandNames(self):
10024 # We need the new group's UUID here so that we can create and acquire the
10025 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10026 # that it should not check whether the UUID exists in the configuration.
10027 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10028 self.needed_locks = {}
10029 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10031 def CheckPrereq(self):
10032 """Check prerequisites.
10034 This checks that the given group name is not an existing node group
10039 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10040 except errors.OpPrereqError:
10043 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10044 " node group (UUID: %s)" %
10045 (self.op.group_name, existing_uuid),
10046 errors.ECODE_EXISTS)
10048 if self.op.ndparams:
10049 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10051 def BuildHooksEnv(self):
10052 """Build hooks env.
10056 "GROUP_NAME": self.op.group_name,
10058 mn = self.cfg.GetMasterNode()
10059 return env, [mn], [mn]
10061 def Exec(self, feedback_fn):
10062 """Add the node group to the cluster.
10065 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10066 uuid=self.group_uuid,
10067 alloc_policy=self.op.alloc_policy,
10068 ndparams=self.op.ndparams)
10070 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10071 del self.remove_locks[locking.LEVEL_NODEGROUP]
10074 class LUGroupAssignNodes(NoHooksLU):
10075 """Logical unit for assigning nodes to groups.
10080 def ExpandNames(self):
10081 # These raise errors.OpPrereqError on their own:
10082 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10083 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10085 # We want to lock all the affected nodes and groups. We have readily
10086 # available the list of nodes, and the *destination* group. To gather the
10087 # list of "source" groups, we need to fetch node information.
10088 self.node_data = self.cfg.GetAllNodesInfo()
10089 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10090 affected_groups.add(self.group_uuid)
10092 self.needed_locks = {
10093 locking.LEVEL_NODEGROUP: list(affected_groups),
10094 locking.LEVEL_NODE: self.op.nodes,
10097 def CheckPrereq(self):
10098 """Check prerequisites.
10101 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10102 instance_data = self.cfg.GetAllInstancesInfo()
10104 if self.group is None:
10105 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10106 (self.op.group_name, self.group_uuid))
10108 (new_splits, previous_splits) = \
10109 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10110 for node in self.op.nodes],
10111 self.node_data, instance_data)
10114 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10116 if not self.op.force:
10117 raise errors.OpExecError("The following instances get split by this"
10118 " change and --force was not given: %s" %
10121 self.LogWarning("This operation will split the following instances: %s",
10124 if previous_splits:
10125 self.LogWarning("In addition, these already-split instances continue"
10126 " to be spit across groups: %s",
10127 utils.CommaJoin(utils.NiceSort(previous_splits)))
10129 def Exec(self, feedback_fn):
10130 """Assign nodes to a new group.
10133 for node in self.op.nodes:
10134 self.node_data[node].group = self.group_uuid
10136 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10139 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10140 """Check for split instances after a node assignment.
10142 This method considers a series of node assignments as an atomic operation,
10143 and returns information about split instances after applying the set of
10146 In particular, it returns information about newly split instances, and
10147 instances that were already split, and remain so after the change.
10149 Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10152 @type changes: list of (node_name, new_group_uuid) pairs.
10153 @param changes: list of node assignments to consider.
10154 @param node_data: a dict with data for all nodes
10155 @param instance_data: a dict with all instances to consider
10156 @rtype: a two-tuple
10157 @return: a list of instances that were previously okay and result split as a
10158 consequence of this change, and a list of instances that were previously
10159 split and this change does not fix.
10162 changed_nodes = dict((node, group) for node, group in changes
10163 if node_data[node].group != group)
10165 all_split_instances = set()
10166 previously_split_instances = set()
10168 def InstanceNodes(instance):
10169 return [instance.primary_node] + list(instance.secondary_nodes)
10171 for inst in instance_data.values():
10172 if inst.disk_template not in constants.DTS_NET_MIRROR:
10175 instance_nodes = InstanceNodes(inst)
10177 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10178 previously_split_instances.add(inst.name)
10180 if len(set(changed_nodes.get(node, node_data[node].group)
10181 for node in instance_nodes)) > 1:
10182 all_split_instances.add(inst.name)
10184 return (list(all_split_instances - previously_split_instances),
10185 list(previously_split_instances & all_split_instances))
10188 class _GroupQuery(_QueryBase):
10190 FIELDS = query.GROUP_FIELDS
10192 def ExpandNames(self, lu):
10193 lu.needed_locks = {}
10195 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10196 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10199 self.wanted = [name_to_uuid[name]
10200 for name in utils.NiceSort(name_to_uuid.keys())]
10202 # Accept names to be either names or UUIDs.
10205 all_uuid = frozenset(self._all_groups.keys())
10207 for name in self.names:
10208 if name in all_uuid:
10209 self.wanted.append(name)
10210 elif name in name_to_uuid:
10211 self.wanted.append(name_to_uuid[name])
10213 missing.append(name)
10216 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10217 errors.ECODE_NOENT)
10219 def DeclareLocks(self, lu, level):
10222 def _GetQueryData(self, lu):
10223 """Computes the list of node groups and their attributes.
10226 do_nodes = query.GQ_NODE in self.requested_data
10227 do_instances = query.GQ_INST in self.requested_data
10229 group_to_nodes = None
10230 group_to_instances = None
10232 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10233 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10234 # latter GetAllInstancesInfo() is not enough, for we have to go through
10235 # instance->node. Hence, we will need to process nodes even if we only need
10236 # instance information.
10237 if do_nodes or do_instances:
10238 all_nodes = lu.cfg.GetAllNodesInfo()
10239 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10242 for node in all_nodes.values():
10243 if node.group in group_to_nodes:
10244 group_to_nodes[node.group].append(node.name)
10245 node_to_group[node.name] = node.group
10248 all_instances = lu.cfg.GetAllInstancesInfo()
10249 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10251 for instance in all_instances.values():
10252 node = instance.primary_node
10253 if node in node_to_group:
10254 group_to_instances[node_to_group[node]].append(instance.name)
10257 # Do not pass on node information if it was not requested.
10258 group_to_nodes = None
10260 return query.GroupQueryData([self._all_groups[uuid]
10261 for uuid in self.wanted],
10262 group_to_nodes, group_to_instances)
10265 class LUGroupQuery(NoHooksLU):
10266 """Logical unit for querying node groups.
10271 def CheckArguments(self):
10272 self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10274 def ExpandNames(self):
10275 self.gq.ExpandNames(self)
10277 def Exec(self, feedback_fn):
10278 return self.gq.OldStyleQuery(self)
10281 class LUGroupSetParams(LogicalUnit):
10282 """Modifies the parameters of a node group.
10285 HPATH = "group-modify"
10286 HTYPE = constants.HTYPE_GROUP
10289 def CheckArguments(self):
10292 self.op.alloc_policy,
10295 if all_changes.count(None) == len(all_changes):
10296 raise errors.OpPrereqError("Please pass at least one modification",
10297 errors.ECODE_INVAL)
10299 def ExpandNames(self):
10300 # This raises errors.OpPrereqError on its own:
10301 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10303 self.needed_locks = {
10304 locking.LEVEL_NODEGROUP: [self.group_uuid],
10307 def CheckPrereq(self):
10308 """Check prerequisites.
10311 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10313 if self.group is None:
10314 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10315 (self.op.group_name, self.group_uuid))
10317 if self.op.ndparams:
10318 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10319 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10320 self.new_ndparams = new_ndparams
10322 def BuildHooksEnv(self):
10323 """Build hooks env.
10327 "GROUP_NAME": self.op.group_name,
10328 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10330 mn = self.cfg.GetMasterNode()
10331 return env, [mn], [mn]
10333 def Exec(self, feedback_fn):
10334 """Modifies the node group.
10339 if self.op.ndparams:
10340 self.group.ndparams = self.new_ndparams
10341 result.append(("ndparams", str(self.group.ndparams)))
10343 if self.op.alloc_policy:
10344 self.group.alloc_policy = self.op.alloc_policy
10346 self.cfg.Update(self.group, feedback_fn)
10351 class LUGroupRemove(LogicalUnit):
10352 HPATH = "group-remove"
10353 HTYPE = constants.HTYPE_GROUP
10356 def ExpandNames(self):
10357 # This will raises errors.OpPrereqError on its own:
10358 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10359 self.needed_locks = {
10360 locking.LEVEL_NODEGROUP: [self.group_uuid],
10363 def CheckPrereq(self):
10364 """Check prerequisites.
10366 This checks that the given group name exists as a node group, that is
10367 empty (i.e., contains no nodes), and that is not the last group of the
10371 # Verify that the group is empty.
10372 group_nodes = [node.name
10373 for node in self.cfg.GetAllNodesInfo().values()
10374 if node.group == self.group_uuid]
10377 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10379 (self.op.group_name,
10380 utils.CommaJoin(utils.NiceSort(group_nodes))),
10381 errors.ECODE_STATE)
10383 # Verify the cluster would not be left group-less.
10384 if len(self.cfg.GetNodeGroupList()) == 1:
10385 raise errors.OpPrereqError("Group '%s' is the only group,"
10386 " cannot be removed" %
10387 self.op.group_name,
10388 errors.ECODE_STATE)
10390 def BuildHooksEnv(self):
10391 """Build hooks env.
10395 "GROUP_NAME": self.op.group_name,
10397 mn = self.cfg.GetMasterNode()
10398 return env, [mn], [mn]
10400 def Exec(self, feedback_fn):
10401 """Remove the node group.
10405 self.cfg.RemoveNodeGroup(self.group_uuid)
10406 except errors.ConfigurationError:
10407 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10408 (self.op.group_name, self.group_uuid))
10410 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10413 class LUGroupRename(LogicalUnit):
10414 HPATH = "group-rename"
10415 HTYPE = constants.HTYPE_GROUP
10418 def ExpandNames(self):
10419 # This raises errors.OpPrereqError on its own:
10420 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10422 self.needed_locks = {
10423 locking.LEVEL_NODEGROUP: [self.group_uuid],
10426 def CheckPrereq(self):
10427 """Check prerequisites.
10429 This checks that the given old_name exists as a node group, and that
10434 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10435 except errors.OpPrereqError:
10438 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10439 " node group (UUID: %s)" %
10440 (self.op.new_name, new_name_uuid),
10441 errors.ECODE_EXISTS)
10443 def BuildHooksEnv(self):
10444 """Build hooks env.
10448 "OLD_NAME": self.op.old_name,
10449 "NEW_NAME": self.op.new_name,
10452 mn = self.cfg.GetMasterNode()
10453 all_nodes = self.cfg.GetAllNodesInfo()
10455 all_nodes.pop(mn, None)
10457 for node in all_nodes.values():
10458 if node.group == self.group_uuid:
10459 run_nodes.append(node.name)
10461 return env, run_nodes, run_nodes
10463 def Exec(self, feedback_fn):
10464 """Rename the node group.
10467 group = self.cfg.GetNodeGroup(self.group_uuid)
10470 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10471 (self.op.old_name, self.group_uuid))
10473 group.name = self.op.new_name
10474 self.cfg.Update(group, feedback_fn)
10476 return self.op.new_name
10479 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10480 """Generic tags LU.
10482 This is an abstract class which is the parent of all the other tags LUs.
10486 def ExpandNames(self):
10487 self.needed_locks = {}
10488 if self.op.kind == constants.TAG_NODE:
10489 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10490 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10491 elif self.op.kind == constants.TAG_INSTANCE:
10492 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10493 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10495 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10496 # not possible to acquire the BGL based on opcode parameters)
10498 def CheckPrereq(self):
10499 """Check prerequisites.
10502 if self.op.kind == constants.TAG_CLUSTER:
10503 self.target = self.cfg.GetClusterInfo()
10504 elif self.op.kind == constants.TAG_NODE:
10505 self.target = self.cfg.GetNodeInfo(self.op.name)
10506 elif self.op.kind == constants.TAG_INSTANCE:
10507 self.target = self.cfg.GetInstanceInfo(self.op.name)
10509 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10510 str(self.op.kind), errors.ECODE_INVAL)
10513 class LUTagsGet(TagsLU):
10514 """Returns the tags of a given object.
10519 def ExpandNames(self):
10520 TagsLU.ExpandNames(self)
10522 # Share locks as this is only a read operation
10523 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10525 def Exec(self, feedback_fn):
10526 """Returns the tag list.
10529 return list(self.target.GetTags())
10532 class LUTagsSearch(NoHooksLU):
10533 """Searches the tags for a given pattern.
10538 def ExpandNames(self):
10539 self.needed_locks = {}
10541 def CheckPrereq(self):
10542 """Check prerequisites.
10544 This checks the pattern passed for validity by compiling it.
10548 self.re = re.compile(self.op.pattern)
10549 except re.error, err:
10550 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10551 (self.op.pattern, err), errors.ECODE_INVAL)
10553 def Exec(self, feedback_fn):
10554 """Returns the tag list.
10558 tgts = [("/cluster", cfg.GetClusterInfo())]
10559 ilist = cfg.GetAllInstancesInfo().values()
10560 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10561 nlist = cfg.GetAllNodesInfo().values()
10562 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10564 for path, target in tgts:
10565 for tag in target.GetTags():
10566 if self.re.search(tag):
10567 results.append((path, tag))
10571 class LUTagsSet(TagsLU):
10572 """Sets a tag on a given object.
10577 def CheckPrereq(self):
10578 """Check prerequisites.
10580 This checks the type and length of the tag name and value.
10583 TagsLU.CheckPrereq(self)
10584 for tag in self.op.tags:
10585 objects.TaggableObject.ValidateTag(tag)
10587 def Exec(self, feedback_fn):
10592 for tag in self.op.tags:
10593 self.target.AddTag(tag)
10594 except errors.TagError, err:
10595 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10596 self.cfg.Update(self.target, feedback_fn)
10599 class LUTagsDel(TagsLU):
10600 """Delete a list of tags from a given object.
10605 def CheckPrereq(self):
10606 """Check prerequisites.
10608 This checks that we have the given tag.
10611 TagsLU.CheckPrereq(self)
10612 for tag in self.op.tags:
10613 objects.TaggableObject.ValidateTag(tag)
10614 del_tags = frozenset(self.op.tags)
10615 cur_tags = self.target.GetTags()
10617 diff_tags = del_tags - cur_tags
10619 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10620 raise errors.OpPrereqError("Tag(s) %s not found" %
10621 (utils.CommaJoin(diff_names), ),
10622 errors.ECODE_NOENT)
10624 def Exec(self, feedback_fn):
10625 """Remove the tag from the object.
10628 for tag in self.op.tags:
10629 self.target.RemoveTag(tag)
10630 self.cfg.Update(self.target, feedback_fn)
10633 class LUTestDelay(NoHooksLU):
10634 """Sleep for a specified amount of time.
10636 This LU sleeps on the master and/or nodes for a specified amount of
10642 def ExpandNames(self):
10643 """Expand names and set required locks.
10645 This expands the node list, if any.
10648 self.needed_locks = {}
10649 if self.op.on_nodes:
10650 # _GetWantedNodes can be used here, but is not always appropriate to use
10651 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10652 # more information.
10653 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10654 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10656 def _TestDelay(self):
10657 """Do the actual sleep.
10660 if self.op.on_master:
10661 if not utils.TestDelay(self.op.duration):
10662 raise errors.OpExecError("Error during master delay test")
10663 if self.op.on_nodes:
10664 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10665 for node, node_result in result.items():
10666 node_result.Raise("Failure during rpc call to node %s" % node)
10668 def Exec(self, feedback_fn):
10669 """Execute the test delay opcode, with the wanted repetitions.
10672 if self.op.repeat == 0:
10675 top_value = self.op.repeat - 1
10676 for i in range(self.op.repeat):
10677 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10681 class LUTestJqueue(NoHooksLU):
10682 """Utility LU to test some aspects of the job queue.
10687 # Must be lower than default timeout for WaitForJobChange to see whether it
10688 # notices changed jobs
10689 _CLIENT_CONNECT_TIMEOUT = 20.0
10690 _CLIENT_CONFIRM_TIMEOUT = 60.0
10693 def _NotifyUsingSocket(cls, cb, errcls):
10694 """Opens a Unix socket and waits for another program to connect.
10697 @param cb: Callback to send socket name to client
10698 @type errcls: class
10699 @param errcls: Exception class to use for errors
10702 # Using a temporary directory as there's no easy way to create temporary
10703 # sockets without writing a custom loop around tempfile.mktemp and
10705 tmpdir = tempfile.mkdtemp()
10707 tmpsock = utils.PathJoin(tmpdir, "sock")
10709 logging.debug("Creating temporary socket at %s", tmpsock)
10710 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10715 # Send details to client
10718 # Wait for client to connect before continuing
10719 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10721 (conn, _) = sock.accept()
10722 except socket.error, err:
10723 raise errcls("Client didn't connect in time (%s)" % err)
10727 # Remove as soon as client is connected
10728 shutil.rmtree(tmpdir)
10730 # Wait for client to close
10733 # pylint: disable-msg=E1101
10734 # Instance of '_socketobject' has no ... member
10735 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10737 except socket.error, err:
10738 raise errcls("Client failed to confirm notification (%s)" % err)
10742 def _SendNotification(self, test, arg, sockname):
10743 """Sends a notification to the client.
10746 @param test: Test name
10747 @param arg: Test argument (depends on test)
10748 @type sockname: string
10749 @param sockname: Socket path
10752 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10754 def _Notify(self, prereq, test, arg):
10755 """Notifies the client of a test.
10758 @param prereq: Whether this is a prereq-phase test
10760 @param test: Test name
10761 @param arg: Test argument (depends on test)
10765 errcls = errors.OpPrereqError
10767 errcls = errors.OpExecError
10769 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10773 def CheckArguments(self):
10774 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10775 self.expandnames_calls = 0
10777 def ExpandNames(self):
10778 checkargs_calls = getattr(self, "checkargs_calls", 0)
10779 if checkargs_calls < 1:
10780 raise errors.ProgrammerError("CheckArguments was not called")
10782 self.expandnames_calls += 1
10784 if self.op.notify_waitlock:
10785 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10787 self.LogInfo("Expanding names")
10789 # Get lock on master node (just to get a lock, not for a particular reason)
10790 self.needed_locks = {
10791 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10794 def Exec(self, feedback_fn):
10795 if self.expandnames_calls < 1:
10796 raise errors.ProgrammerError("ExpandNames was not called")
10798 if self.op.notify_exec:
10799 self._Notify(False, constants.JQT_EXEC, None)
10801 self.LogInfo("Executing")
10803 if self.op.log_messages:
10804 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10805 for idx, msg in enumerate(self.op.log_messages):
10806 self.LogInfo("Sending log message %s", idx + 1)
10807 feedback_fn(constants.JQT_MSGPREFIX + msg)
10808 # Report how many test messages have been sent
10809 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10812 raise errors.OpExecError("Opcode failure was requested")
10817 class IAllocator(object):
10818 """IAllocator framework.
10820 An IAllocator instance has three sets of attributes:
10821 - cfg that is needed to query the cluster
10822 - input data (all members of the _KEYS class attribute are required)
10823 - four buffer attributes (in|out_data|text), that represent the
10824 input (to the external script) in text and data structure format,
10825 and the output from it, again in two formats
10826 - the result variables from the script (success, info, nodes) for
10830 # pylint: disable-msg=R0902
10831 # lots of instance attributes
10833 "name", "mem_size", "disks", "disk_template",
10834 "os", "tags", "nics", "vcpus", "hypervisor",
10837 "name", "relocate_from",
10843 def __init__(self, cfg, rpc, mode, **kwargs):
10846 # init buffer variables
10847 self.in_text = self.out_text = self.in_data = self.out_data = None
10848 # init all input fields so that pylint is happy
10850 self.mem_size = self.disks = self.disk_template = None
10851 self.os = self.tags = self.nics = self.vcpus = None
10852 self.hypervisor = None
10853 self.relocate_from = None
10855 self.evac_nodes = None
10857 self.required_nodes = None
10858 # init result fields
10859 self.success = self.info = self.result = None
10860 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10861 keyset = self._ALLO_KEYS
10862 fn = self._AddNewInstance
10863 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10864 keyset = self._RELO_KEYS
10865 fn = self._AddRelocateInstance
10866 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10867 keyset = self._EVAC_KEYS
10868 fn = self._AddEvacuateNodes
10870 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10871 " IAllocator" % self.mode)
10873 if key not in keyset:
10874 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10875 " IAllocator" % key)
10876 setattr(self, key, kwargs[key])
10879 if key not in kwargs:
10880 raise errors.ProgrammerError("Missing input parameter '%s' to"
10881 " IAllocator" % key)
10882 self._BuildInputData(fn)
10884 def _ComputeClusterData(self):
10885 """Compute the generic allocator input data.
10887 This is the data that is independent of the actual operation.
10891 cluster_info = cfg.GetClusterInfo()
10894 "version": constants.IALLOCATOR_VERSION,
10895 "cluster_name": cfg.GetClusterName(),
10896 "cluster_tags": list(cluster_info.GetTags()),
10897 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10898 # we don't have job IDs
10900 ninfo = cfg.GetAllNodesInfo()
10901 iinfo = cfg.GetAllInstancesInfo().values()
10902 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10905 node_list = [n.name for n in ninfo.values() if n.vm_capable]
10907 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10908 hypervisor_name = self.hypervisor
10909 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10910 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10911 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10912 hypervisor_name = cluster_info.enabled_hypervisors[0]
10914 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10917 self.rpc.call_all_instances_info(node_list,
10918 cluster_info.enabled_hypervisors)
10920 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10922 config_ndata = self._ComputeBasicNodeData(ninfo)
10923 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10924 i_list, config_ndata)
10925 assert len(data["nodes"]) == len(ninfo), \
10926 "Incomplete node data computed"
10928 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10930 self.in_data = data
10933 def _ComputeNodeGroupData(cfg):
10934 """Compute node groups data.
10938 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10940 "name": gdata.name,
10941 "alloc_policy": gdata.alloc_policy,
10946 def _ComputeBasicNodeData(node_cfg):
10947 """Compute global node data.
10950 @returns: a dict of name: (node dict, node config)
10954 for ninfo in node_cfg.values():
10955 # fill in static (config-based) values
10957 "tags": list(ninfo.GetTags()),
10958 "primary_ip": ninfo.primary_ip,
10959 "secondary_ip": ninfo.secondary_ip,
10960 "offline": ninfo.offline,
10961 "drained": ninfo.drained,
10962 "master_candidate": ninfo.master_candidate,
10963 "group": ninfo.group,
10964 "master_capable": ninfo.master_capable,
10965 "vm_capable": ninfo.vm_capable,
10968 node_results[ninfo.name] = pnr
10970 return node_results
10973 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10975 """Compute global node data.
10977 @param node_results: the basic node structures as filled from the config
10980 # make a copy of the current dict
10981 node_results = dict(node_results)
10982 for nname, nresult in node_data.items():
10983 assert nname in node_results, "Missing basic data for node %s" % nname
10984 ninfo = node_cfg[nname]
10986 if not (ninfo.offline or ninfo.drained):
10987 nresult.Raise("Can't get data for node %s" % nname)
10988 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10990 remote_info = nresult.payload
10992 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10993 'vg_size', 'vg_free', 'cpu_total']:
10994 if attr not in remote_info:
10995 raise errors.OpExecError("Node '%s' didn't return attribute"
10996 " '%s'" % (nname, attr))
10997 if not isinstance(remote_info[attr], int):
10998 raise errors.OpExecError("Node '%s' returned invalid value"
11000 (nname, attr, remote_info[attr]))
11001 # compute memory used by primary instances
11002 i_p_mem = i_p_up_mem = 0
11003 for iinfo, beinfo in i_list:
11004 if iinfo.primary_node == nname:
11005 i_p_mem += beinfo[constants.BE_MEMORY]
11006 if iinfo.name not in node_iinfo[nname].payload:
11009 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11010 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11011 remote_info['memory_free'] -= max(0, i_mem_diff)
11014 i_p_up_mem += beinfo[constants.BE_MEMORY]
11016 # compute memory used by instances
11018 "total_memory": remote_info['memory_total'],
11019 "reserved_memory": remote_info['memory_dom0'],
11020 "free_memory": remote_info['memory_free'],
11021 "total_disk": remote_info['vg_size'],
11022 "free_disk": remote_info['vg_free'],
11023 "total_cpus": remote_info['cpu_total'],
11024 "i_pri_memory": i_p_mem,
11025 "i_pri_up_memory": i_p_up_mem,
11027 pnr_dyn.update(node_results[nname])
11029 node_results[nname] = pnr_dyn
11031 return node_results
11034 def _ComputeInstanceData(cluster_info, i_list):
11035 """Compute global instance data.
11039 for iinfo, beinfo in i_list:
11041 for nic in iinfo.nics:
11042 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11043 nic_dict = {"mac": nic.mac,
11045 "mode": filled_params[constants.NIC_MODE],
11046 "link": filled_params[constants.NIC_LINK],
11048 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11049 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11050 nic_data.append(nic_dict)
11052 "tags": list(iinfo.GetTags()),
11053 "admin_up": iinfo.admin_up,
11054 "vcpus": beinfo[constants.BE_VCPUS],
11055 "memory": beinfo[constants.BE_MEMORY],
11057 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11059 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11060 "disk_template": iinfo.disk_template,
11061 "hypervisor": iinfo.hypervisor,
11063 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11065 instance_data[iinfo.name] = pir
11067 return instance_data
11069 def _AddNewInstance(self):
11070 """Add new instance data to allocator structure.
11072 This in combination with _AllocatorGetClusterData will create the
11073 correct structure needed as input for the allocator.
11075 The checks for the completeness of the opcode must have already been
11079 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11081 if self.disk_template in constants.DTS_NET_MIRROR:
11082 self.required_nodes = 2
11084 self.required_nodes = 1
11087 "disk_template": self.disk_template,
11090 "vcpus": self.vcpus,
11091 "memory": self.mem_size,
11092 "disks": self.disks,
11093 "disk_space_total": disk_space,
11095 "required_nodes": self.required_nodes,
11099 def _AddRelocateInstance(self):
11100 """Add relocate instance data to allocator structure.
11102 This in combination with _IAllocatorGetClusterData will create the
11103 correct structure needed as input for the allocator.
11105 The checks for the completeness of the opcode must have already been
11109 instance = self.cfg.GetInstanceInfo(self.name)
11110 if instance is None:
11111 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11112 " IAllocator" % self.name)
11114 if instance.disk_template not in constants.DTS_NET_MIRROR:
11115 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11116 errors.ECODE_INVAL)
11118 if len(instance.secondary_nodes) != 1:
11119 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11120 errors.ECODE_STATE)
11122 self.required_nodes = 1
11123 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11124 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11128 "disk_space_total": disk_space,
11129 "required_nodes": self.required_nodes,
11130 "relocate_from": self.relocate_from,
11134 def _AddEvacuateNodes(self):
11135 """Add evacuate nodes data to allocator structure.
11139 "evac_nodes": self.evac_nodes
11143 def _BuildInputData(self, fn):
11144 """Build input data structures.
11147 self._ComputeClusterData()
11150 request["type"] = self.mode
11151 self.in_data["request"] = request
11153 self.in_text = serializer.Dump(self.in_data)
11155 def Run(self, name, validate=True, call_fn=None):
11156 """Run an instance allocator and return the results.
11159 if call_fn is None:
11160 call_fn = self.rpc.call_iallocator_runner
11162 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11163 result.Raise("Failure while running the iallocator script")
11165 self.out_text = result.payload
11167 self._ValidateResult()
11169 def _ValidateResult(self):
11170 """Process the allocator results.
11172 This will process and if successful save the result in
11173 self.out_data and the other parameters.
11177 rdict = serializer.Load(self.out_text)
11178 except Exception, err:
11179 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11181 if not isinstance(rdict, dict):
11182 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11184 # TODO: remove backwards compatiblity in later versions
11185 if "nodes" in rdict and "result" not in rdict:
11186 rdict["result"] = rdict["nodes"]
11189 for key in "success", "info", "result":
11190 if key not in rdict:
11191 raise errors.OpExecError("Can't parse iallocator results:"
11192 " missing key '%s'" % key)
11193 setattr(self, key, rdict[key])
11195 if not isinstance(rdict["result"], list):
11196 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11198 self.out_data = rdict
11201 class LUTestAllocator(NoHooksLU):
11202 """Run allocator tests.
11204 This LU runs the allocator tests
11207 def CheckPrereq(self):
11208 """Check prerequisites.
11210 This checks the opcode parameters depending on the director and mode test.
11213 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11214 for attr in ["mem_size", "disks", "disk_template",
11215 "os", "tags", "nics", "vcpus"]:
11216 if not hasattr(self.op, attr):
11217 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11218 attr, errors.ECODE_INVAL)
11219 iname = self.cfg.ExpandInstanceName(self.op.name)
11220 if iname is not None:
11221 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11222 iname, errors.ECODE_EXISTS)
11223 if not isinstance(self.op.nics, list):
11224 raise errors.OpPrereqError("Invalid parameter 'nics'",
11225 errors.ECODE_INVAL)
11226 if not isinstance(self.op.disks, list):
11227 raise errors.OpPrereqError("Invalid parameter 'disks'",
11228 errors.ECODE_INVAL)
11229 for row in self.op.disks:
11230 if (not isinstance(row, dict) or
11231 "size" not in row or
11232 not isinstance(row["size"], int) or
11233 "mode" not in row or
11234 row["mode"] not in ['r', 'w']):
11235 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11236 " parameter", errors.ECODE_INVAL)
11237 if self.op.hypervisor is None:
11238 self.op.hypervisor = self.cfg.GetHypervisorType()
11239 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11240 fname = _ExpandInstanceName(self.cfg, self.op.name)
11241 self.op.name = fname
11242 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11243 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11244 if not hasattr(self.op, "evac_nodes"):
11245 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11246 " opcode input", errors.ECODE_INVAL)
11248 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11249 self.op.mode, errors.ECODE_INVAL)
11251 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11252 if self.op.allocator is None:
11253 raise errors.OpPrereqError("Missing allocator name",
11254 errors.ECODE_INVAL)
11255 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11256 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11257 self.op.direction, errors.ECODE_INVAL)
11259 def Exec(self, feedback_fn):
11260 """Run the allocator test.
11263 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11264 ial = IAllocator(self.cfg, self.rpc,
11267 mem_size=self.op.mem_size,
11268 disks=self.op.disks,
11269 disk_template=self.op.disk_template,
11273 vcpus=self.op.vcpus,
11274 hypervisor=self.op.hypervisor,
11276 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11277 ial = IAllocator(self.cfg, self.rpc,
11280 relocate_from=list(self.relocate_from),
11282 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11283 ial = IAllocator(self.cfg, self.rpc,
11285 evac_nodes=self.op.evac_nodes)
11287 raise errors.ProgrammerError("Uncatched mode %s in"
11288 " LUTestAllocator.Exec", self.op.mode)
11290 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11291 result = ial.in_text
11293 ial.Run(self.op.allocator, validate=False)
11294 result = ial.out_text
11298 #: Query type implementations
11300 constants.QR_INSTANCE: _InstanceQuery,
11301 constants.QR_NODE: _NodeQuery,
11302 constants.QR_GROUP: _GroupQuery,
11306 def _GetQueryImplementation(name):
11307 """Returns the implemtnation for a query type.
11309 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11313 return _QUERY_IMPL[name]
11315 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11316 errors.ECODE_INVAL)