4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 class LogicalUnit(object):
79 """Logical Unit base class.
81 Subclasses must follow these rules:
82 - implement ExpandNames
83 - implement CheckPrereq (except when tasklets are used)
84 - implement Exec (except when tasklets are used)
85 - implement BuildHooksEnv
86 - redefine HPATH and HTYPE
87 - optionally redefine their run requirements:
88 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90 Note that all commands require root permissions.
92 @ivar dry_run_result: the value (if any) that will be returned to the caller
93 in dry-run mode (signalled by opcode dry_run parameter)
100 def __init__(self, processor, op, context, rpc):
101 """Constructor for LogicalUnit.
103 This needs to be overridden in derived classes in order to check op
107 self.proc = processor
109 self.cfg = context.cfg
110 self.context = context
112 # Dicts used to declare locking needs to mcpu
113 self.needed_locks = None
114 self.acquired_locks = {}
115 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.remove_locks = {}
118 # Used to force good behavior when calling helper functions
119 self.recalculate_locks = {}
122 self.Log = processor.Log # pylint: disable-msg=C0103
123 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126 # support for dry-run
127 self.dry_run_result = None
128 # support for generic debug attribute
129 if (not hasattr(self.op, "debug_level") or
130 not isinstance(self.op.debug_level, int)):
131 self.op.debug_level = 0
136 # Validate opcode parameters and set defaults
137 self.op.Validate(True)
139 self.CheckArguments()
142 """Returns the SshRunner object
146 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
149 ssh = property(fget=__GetSSH)
151 def CheckArguments(self):
152 """Check syntactic validity for the opcode arguments.
154 This method is for doing a simple syntactic check and ensure
155 validity of opcode parameters, without any cluster-related
156 checks. While the same can be accomplished in ExpandNames and/or
157 CheckPrereq, doing these separate is better because:
159 - ExpandNames is left as as purely a lock-related function
160 - CheckPrereq is run after we have acquired locks (and possible
163 The function is allowed to change the self.op attribute so that
164 later methods can no longer worry about missing parameters.
169 def ExpandNames(self):
170 """Expand names for this LU.
172 This method is called before starting to execute the opcode, and it should
173 update all the parameters of the opcode to their canonical form (e.g. a
174 short node name must be fully expanded after this method has successfully
175 completed). This way locking, hooks, logging, etc. can work correctly.
177 LUs which implement this method must also populate the self.needed_locks
178 member, as a dict with lock levels as keys, and a list of needed lock names
181 - use an empty dict if you don't need any lock
182 - if you don't need any lock at a particular level omit that level
183 - don't put anything for the BGL level
184 - if you want all locks at a level use locking.ALL_SET as a value
186 If you need to share locks (rather than acquire them exclusively) at one
187 level you can modify self.share_locks, setting a true value (usually 1) for
188 that level. By default locks are not shared.
190 This function can also define a list of tasklets, which then will be
191 executed in order instead of the usual LU-level CheckPrereq and Exec
192 functions, if those are not defined by the LU.
196 # Acquire all nodes and one instance
197 self.needed_locks = {
198 locking.LEVEL_NODE: locking.ALL_SET,
199 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 # Acquire just two nodes
202 self.needed_locks = {
203 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206 self.needed_locks = {} # No, you can't leave it to the default value None
209 # The implementation of this method is mandatory only if the new LU is
210 # concurrent, so that old LUs don't need to be changed all at the same
213 self.needed_locks = {} # Exclusive LUs don't need locks.
215 raise NotImplementedError
217 def DeclareLocks(self, level):
218 """Declare LU locking needs for a level
220 While most LUs can just declare their locking needs at ExpandNames time,
221 sometimes there's the need to calculate some locks after having acquired
222 the ones before. This function is called just before acquiring locks at a
223 particular level, but after acquiring the ones at lower levels, and permits
224 such calculations. It can be used to modify self.needed_locks, and by
225 default it does nothing.
227 This function is only called if you have something already set in
228 self.needed_locks for the level.
230 @param level: Locking level which is going to be locked
231 @type level: member of ganeti.locking.LEVELS
235 def CheckPrereq(self):
236 """Check prerequisites for this LU.
238 This method should check that the prerequisites for the execution
239 of this LU are fulfilled. It can do internode communication, but
240 it should be idempotent - no cluster or system changes are
243 The method should raise errors.OpPrereqError in case something is
244 not fulfilled. Its return value is ignored.
246 This method should also update all the parameters of the opcode to
247 their canonical form if it hasn't been done by ExpandNames before.
250 if self.tasklets is not None:
251 for (idx, tl) in enumerate(self.tasklets):
252 logging.debug("Checking prerequisites for tasklet %s/%s",
253 idx + 1, len(self.tasklets))
258 def Exec(self, feedback_fn):
261 This method should implement the actual work. It should raise
262 errors.OpExecError for failures that are somewhat dealt with in
266 if self.tasklets is not None:
267 for (idx, tl) in enumerate(self.tasklets):
268 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271 raise NotImplementedError
273 def BuildHooksEnv(self):
274 """Build hooks environment for this LU.
276 This method should return a three-node tuple consisting of: a dict
277 containing the environment that will be used for running the
278 specific hook for this LU, a list of node names on which the hook
279 should run before the execution, and a list of node names on which
280 the hook should run after the execution.
282 The keys of the dict must not have 'GANETI_' prefixed as this will
283 be handled in the hooks runner. Also note additional keys will be
284 added by the hooks runner. If the LU doesn't define any
285 environment, an empty dict (and not None) should be returned.
287 No nodes should be returned as an empty list (and not None).
289 Note that if the HPATH for a LU class is None, this function will
293 raise NotImplementedError
295 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296 """Notify the LU about the results of its hooks.
298 This method is called every time a hooks phase is executed, and notifies
299 the Logical Unit about the hooks' result. The LU can then use it to alter
300 its result based on the hooks. By default the method does nothing and the
301 previous result is passed back unchanged but any LU can define it if it
302 wants to use the local cluster hook-scripts somehow.
304 @param phase: one of L{constants.HOOKS_PHASE_POST} or
305 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306 @param hook_results: the results of the multi-node hooks rpc call
307 @param feedback_fn: function used send feedback back to the caller
308 @param lu_result: the previous Exec result this LU had, or None
310 @return: the new Exec result, based on the previous result
314 # API must be kept, thus we ignore the unused argument and could
315 # be a function warnings
316 # pylint: disable-msg=W0613,R0201
319 def _ExpandAndLockInstance(self):
320 """Helper function to expand and lock an instance.
322 Many LUs that work on an instance take its name in self.op.instance_name
323 and need to expand it and then declare the expanded name for locking. This
324 function does it, and then updates self.op.instance_name to the expanded
325 name. It also initializes needed_locks as a dict, if this hasn't been done
329 if self.needed_locks is None:
330 self.needed_locks = {}
332 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333 "_ExpandAndLockInstance called with instance-level locks set"
334 self.op.instance_name = _ExpandInstanceName(self.cfg,
335 self.op.instance_name)
336 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338 def _LockInstancesNodes(self, primary_only=False):
339 """Helper function to declare instances' nodes for locking.
341 This function should be called after locking one or more instances to lock
342 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343 with all primary or secondary nodes for instances already locked and
344 present in self.needed_locks[locking.LEVEL_INSTANCE].
346 It should be called from DeclareLocks, and for safety only works if
347 self.recalculate_locks[locking.LEVEL_NODE] is set.
349 In the future it may grow parameters to just lock some instance's nodes, or
350 to just lock primaries or secondary nodes, if needed.
352 If should be called in DeclareLocks in a way similar to::
354 if level == locking.LEVEL_NODE:
355 self._LockInstancesNodes()
357 @type primary_only: boolean
358 @param primary_only: only lock primary nodes of locked instances
361 assert locking.LEVEL_NODE in self.recalculate_locks, \
362 "_LockInstancesNodes helper function called with no nodes to recalculate"
364 # TODO: check if we're really been called with the instance locks held
366 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367 # future we might want to have different behaviors depending on the value
368 # of self.recalculate_locks[locking.LEVEL_NODE]
370 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371 instance = self.context.cfg.GetInstanceInfo(instance_name)
372 wanted_nodes.append(instance.primary_node)
374 wanted_nodes.extend(instance.secondary_nodes)
376 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381 del self.recalculate_locks[locking.LEVEL_NODE]
384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385 """Simple LU which runs no hooks.
387 This LU is intended as a parent for other LogicalUnits which will
388 run no hooks, in order to reduce duplicate code.
394 def BuildHooksEnv(self):
395 """Empty BuildHooksEnv for NoHooksLu.
397 This just raises an error.
400 assert False, "BuildHooksEnv called for NoHooksLUs"
404 """Tasklet base class.
406 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407 they can mix legacy code with tasklets. Locking needs to be done in the LU,
408 tasklets know nothing about locks.
410 Subclasses must follow these rules:
411 - Implement CheckPrereq
415 def __init__(self, lu):
422 def CheckPrereq(self):
423 """Check prerequisites for this tasklets.
425 This method should check whether the prerequisites for the execution of
426 this tasklet are fulfilled. It can do internode communication, but it
427 should be idempotent - no cluster or system changes are allowed.
429 The method should raise errors.OpPrereqError in case something is not
430 fulfilled. Its return value is ignored.
432 This method should also update all parameters to their canonical form if it
433 hasn't been done before.
438 def Exec(self, feedback_fn):
439 """Execute the tasklet.
441 This method should implement the actual work. It should raise
442 errors.OpExecError for failures that are somewhat dealt with in code, or
446 raise NotImplementedError
450 """Base for query utility classes.
453 #: Attribute holding field definitions
456 def __init__(self, names, fields, use_locking):
457 """Initializes this class.
461 self.use_locking = use_locking
463 self.query = query.Query(self.FIELDS, fields)
464 self.requested_data = self.query.RequestedData()
466 self.do_locking = None
469 def _GetNames(self, lu, all_names, lock_level):
470 """Helper function to determine names asked for in the query.
474 names = lu.acquired_locks[lock_level]
478 if self.wanted == locking.ALL_SET:
479 assert not self.names
480 # caller didn't specify names, so ordering is not important
481 return utils.NiceSort(names)
483 # caller specified names and we must keep the same order
485 assert not self.do_locking or lu.acquired_locks[lock_level]
487 missing = set(self.wanted).difference(names)
489 raise errors.OpExecError("Some items were removed before retrieving"
490 " their data: %s" % missing)
492 # Return expanded names
496 def FieldsQuery(cls, fields):
497 """Returns list of available fields.
499 @return: List of L{objects.QueryFieldDefinition}
502 return query.QueryFields(cls.FIELDS, fields)
504 def ExpandNames(self, lu):
505 """Expand names for this query.
507 See L{LogicalUnit.ExpandNames}.
510 raise NotImplementedError()
512 def DeclareLocks(self, lu, level):
513 """Declare locks for this query.
515 See L{LogicalUnit.DeclareLocks}.
518 raise NotImplementedError()
520 def _GetQueryData(self, lu):
521 """Collects all data for this query.
523 @return: Query data object
526 raise NotImplementedError()
528 def NewStyleQuery(self, lu):
529 """Collect data and execute query.
532 return query.GetQueryResponse(self.query, self._GetQueryData(lu))
534 def OldStyleQuery(self, lu):
535 """Collect data and execute query.
538 return self.query.OldStyleQuery(self._GetQueryData(lu))
541 def _GetWantedNodes(lu, nodes):
542 """Returns list of checked and expanded node names.
544 @type lu: L{LogicalUnit}
545 @param lu: the logical unit on whose behalf we execute
547 @param nodes: list of node names or None for all nodes
549 @return: the list of nodes, sorted
550 @raise errors.ProgrammerError: if the nodes parameter is wrong type
554 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
556 return utils.NiceSort(lu.cfg.GetNodeList())
559 def _GetWantedInstances(lu, instances):
560 """Returns list of checked and expanded instance names.
562 @type lu: L{LogicalUnit}
563 @param lu: the logical unit on whose behalf we execute
564 @type instances: list
565 @param instances: list of instance names or None for all instances
567 @return: the list of instances, sorted
568 @raise errors.OpPrereqError: if the instances parameter is wrong type
569 @raise errors.OpPrereqError: if any of the passed instances is not found
573 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
575 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
579 def _GetUpdatedParams(old_params, update_dict,
580 use_default=True, use_none=False):
581 """Return the new version of a parameter dictionary.
583 @type old_params: dict
584 @param old_params: old parameters
585 @type update_dict: dict
586 @param update_dict: dict containing new parameter values, or
587 constants.VALUE_DEFAULT to reset the parameter to its default
589 @param use_default: boolean
590 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591 values as 'to be deleted' values
592 @param use_none: boolean
593 @type use_none: whether to recognise C{None} values as 'to be
596 @return: the new parameter dictionary
599 params_copy = copy.deepcopy(old_params)
600 for key, val in update_dict.iteritems():
601 if ((use_default and val == constants.VALUE_DEFAULT) or
602 (use_none and val is None)):
608 params_copy[key] = val
612 def _CheckOutputFields(static, dynamic, selected):
613 """Checks whether all selected fields are valid.
615 @type static: L{utils.FieldSet}
616 @param static: static fields set
617 @type dynamic: L{utils.FieldSet}
618 @param dynamic: dynamic fields set
625 delta = f.NonMatching(selected)
627 raise errors.OpPrereqError("Unknown output fields selected: %s"
628 % ",".join(delta), errors.ECODE_INVAL)
631 def _CheckGlobalHvParams(params):
632 """Validates that given hypervisor params are not global ones.
634 This will ensure that instances don't get customised versions of
638 used_globals = constants.HVC_GLOBALS.intersection(params)
640 msg = ("The following hypervisor parameters are global and cannot"
641 " be customized at instance level, please modify them at"
642 " cluster level: %s" % utils.CommaJoin(used_globals))
643 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
646 def _CheckNodeOnline(lu, node, msg=None):
647 """Ensure that a given node is online.
649 @param lu: the LU on behalf of which we make the check
650 @param node: the node to check
651 @param msg: if passed, should be a message to replace the default one
652 @raise errors.OpPrereqError: if the node is offline
656 msg = "Can't use offline node"
657 if lu.cfg.GetNodeInfo(node).offline:
658 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
661 def _CheckNodeNotDrained(lu, node):
662 """Ensure that a given node is not drained.
664 @param lu: the LU on behalf of which we make the check
665 @param node: the node to check
666 @raise errors.OpPrereqError: if the node is drained
669 if lu.cfg.GetNodeInfo(node).drained:
670 raise errors.OpPrereqError("Can't use drained node %s" % node,
674 def _CheckNodeVmCapable(lu, node):
675 """Ensure that a given node is vm capable.
677 @param lu: the LU on behalf of which we make the check
678 @param node: the node to check
679 @raise errors.OpPrereqError: if the node is not vm capable
682 if not lu.cfg.GetNodeInfo(node).vm_capable:
683 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
688 """Ensure that a node supports a given OS.
690 @param lu: the LU on behalf of which we make the check
691 @param node: the node to check
692 @param os_name: the OS to query about
693 @param force_variant: whether to ignore variant errors
694 @raise errors.OpPrereqError: if the node is not supporting the OS
697 result = lu.rpc.call_os_get(node, os_name)
698 result.Raise("OS '%s' not in supported OS list for node %s" %
700 prereq=True, ecode=errors.ECODE_INVAL)
701 if not force_variant:
702 _CheckOSVariant(result.payload, os_name)
705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706 """Ensure that a node has the given secondary ip.
708 @type lu: L{LogicalUnit}
709 @param lu: the LU on behalf of which we make the check
711 @param node: the node to check
712 @type secondary_ip: string
713 @param secondary_ip: the ip to check
714 @type prereq: boolean
715 @param prereq: whether to throw a prerequisite or an execute error
716 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
720 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721 result.Raise("Failure checking secondary ip on node %s" % node,
722 prereq=prereq, ecode=errors.ECODE_ENVIRON)
723 if not result.payload:
724 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725 " please fix and re-run this command" % secondary_ip)
727 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
729 raise errors.OpExecError(msg)
732 def _GetClusterDomainSecret():
733 """Reads the cluster domain secret.
736 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
740 def _CheckInstanceDown(lu, instance, reason):
741 """Ensure that an instance is not running."""
742 if instance.admin_up:
743 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744 (instance.name, reason), errors.ECODE_STATE)
746 pnode = instance.primary_node
747 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748 ins_l.Raise("Can't contact node %s for instance information" % pnode,
749 prereq=True, ecode=errors.ECODE_ENVIRON)
751 if instance.name in ins_l.payload:
752 raise errors.OpPrereqError("Instance %s is running, %s" %
753 (instance.name, reason), errors.ECODE_STATE)
756 def _ExpandItemName(fn, name, kind):
757 """Expand an item name.
759 @param fn: the function to use for expansion
760 @param name: requested item name
761 @param kind: text description ('Node' or 'Instance')
762 @return: the resolved (full) name
763 @raise errors.OpPrereqError: if the item is not found
767 if full_name is None:
768 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
773 def _ExpandNodeName(cfg, name):
774 """Wrapper over L{_ExpandItemName} for nodes."""
775 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
778 def _ExpandInstanceName(cfg, name):
779 """Wrapper over L{_ExpandItemName} for instance."""
780 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784 memory, vcpus, nics, disk_template, disks,
785 bep, hvp, hypervisor_name):
786 """Builds instance related env variables for hooks
788 This builds the hook environment from individual variables.
791 @param name: the name of the instance
792 @type primary_node: string
793 @param primary_node: the name of the instance's primary node
794 @type secondary_nodes: list
795 @param secondary_nodes: list of secondary nodes as strings
796 @type os_type: string
797 @param os_type: the name of the instance's OS
798 @type status: boolean
799 @param status: the should_run status of the instance
801 @param memory: the memory size of the instance
803 @param vcpus: the count of VCPUs the instance has
805 @param nics: list of tuples (ip, mac, mode, link) representing
806 the NICs the instance has
807 @type disk_template: string
808 @param disk_template: the disk template of the instance
810 @param disks: the list of (size, mode) pairs
812 @param bep: the backend parameters for the instance
814 @param hvp: the hypervisor parameters for the instance
815 @type hypervisor_name: string
816 @param hypervisor_name: the hypervisor for the instance
818 @return: the hook environment for this instance
827 "INSTANCE_NAME": name,
828 "INSTANCE_PRIMARY": primary_node,
829 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830 "INSTANCE_OS_TYPE": os_type,
831 "INSTANCE_STATUS": str_status,
832 "INSTANCE_MEMORY": memory,
833 "INSTANCE_VCPUS": vcpus,
834 "INSTANCE_DISK_TEMPLATE": disk_template,
835 "INSTANCE_HYPERVISOR": hypervisor_name,
839 nic_count = len(nics)
840 for idx, (ip, mac, mode, link) in enumerate(nics):
843 env["INSTANCE_NIC%d_IP" % idx] = ip
844 env["INSTANCE_NIC%d_MAC" % idx] = mac
845 env["INSTANCE_NIC%d_MODE" % idx] = mode
846 env["INSTANCE_NIC%d_LINK" % idx] = link
847 if mode == constants.NIC_MODE_BRIDGED:
848 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
852 env["INSTANCE_NIC_COUNT"] = nic_count
855 disk_count = len(disks)
856 for idx, (size, mode) in enumerate(disks):
857 env["INSTANCE_DISK%d_SIZE" % idx] = size
858 env["INSTANCE_DISK%d_MODE" % idx] = mode
862 env["INSTANCE_DISK_COUNT"] = disk_count
864 for source, kind in [(bep, "BE"), (hvp, "HV")]:
865 for key, value in source.items():
866 env["INSTANCE_%s_%s" % (kind, key)] = value
871 def _NICListToTuple(lu, nics):
872 """Build a list of nic information tuples.
874 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875 value in LUInstanceQueryData.
877 @type lu: L{LogicalUnit}
878 @param lu: the logical unit on whose behalf we execute
879 @type nics: list of L{objects.NIC}
880 @param nics: list of nics to convert to hooks tuples
884 cluster = lu.cfg.GetClusterInfo()
888 filled_params = cluster.SimpleFillNIC(nic.nicparams)
889 mode = filled_params[constants.NIC_MODE]
890 link = filled_params[constants.NIC_LINK]
891 hooks_nics.append((ip, mac, mode, link))
895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896 """Builds instance related env variables for hooks from an object.
898 @type lu: L{LogicalUnit}
899 @param lu: the logical unit on whose behalf we execute
900 @type instance: L{objects.Instance}
901 @param instance: the instance for which we should build the
904 @param override: dictionary with key/values that will override
907 @return: the hook environment dictionary
910 cluster = lu.cfg.GetClusterInfo()
911 bep = cluster.FillBE(instance)
912 hvp = cluster.FillHV(instance)
914 'name': instance.name,
915 'primary_node': instance.primary_node,
916 'secondary_nodes': instance.secondary_nodes,
917 'os_type': instance.os,
918 'status': instance.admin_up,
919 'memory': bep[constants.BE_MEMORY],
920 'vcpus': bep[constants.BE_VCPUS],
921 'nics': _NICListToTuple(lu, instance.nics),
922 'disk_template': instance.disk_template,
923 'disks': [(disk.size, disk.mode) for disk in instance.disks],
926 'hypervisor_name': instance.hypervisor,
929 args.update(override)
930 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
933 def _AdjustCandidatePool(lu, exceptions):
934 """Adjust the candidate pool after node operations.
937 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
939 lu.LogInfo("Promoted nodes to master candidate role: %s",
940 utils.CommaJoin(node.name for node in mod_list))
941 for name in mod_list:
942 lu.context.ReaddNode(name)
943 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
945 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
949 def _DecideSelfPromotion(lu, exceptions=None):
950 """Decide whether I should promote myself as a master candidate.
953 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955 # the new node will increase mc_max with one, so:
956 mc_should = min(mc_should + 1, cp_size)
957 return mc_now < mc_should
960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
961 """Check that the brigdes needed by a list of nics exist.
964 cluster = lu.cfg.GetClusterInfo()
965 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966 brlist = [params[constants.NIC_LINK] for params in paramslist
967 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
969 result = lu.rpc.call_bridges_exist(target_node, brlist)
970 result.Raise("Error checking bridges on destination node '%s'" %
971 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
974 def _CheckInstanceBridgesExist(lu, instance, node=None):
975 """Check that the brigdes needed by an instance exist.
979 node = instance.primary_node
980 _CheckNicsBridgesExist(lu, instance.nics, node)
983 def _CheckOSVariant(os_obj, name):
984 """Check whether an OS name conforms to the os variants specification.
986 @type os_obj: L{objects.OS}
987 @param os_obj: OS object to check
989 @param name: OS name passed by the user, to check for validity
992 if not os_obj.supported_variants:
994 variant = objects.OS.GetVariant(name)
996 raise errors.OpPrereqError("OS name must include a variant",
999 if variant not in os_obj.supported_variants:
1000 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1003 def _GetNodeInstancesInner(cfg, fn):
1004 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1007 def _GetNodeInstances(cfg, node_name):
1008 """Returns a list of all primary and secondary instances on a node.
1012 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016 """Returns primary instances on a node.
1019 return _GetNodeInstancesInner(cfg,
1020 lambda inst: node_name == inst.primary_node)
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024 """Returns secondary instances on a node.
1027 return _GetNodeInstancesInner(cfg,
1028 lambda inst: node_name in inst.secondary_nodes)
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032 """Returns the arguments for a storage type.
1035 # Special case for file storage
1036 if storage_type == constants.ST_FILE:
1037 # storage.FileStorage wants a list of storage directories
1038 return [[cfg.GetFileStorageDir()]]
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1046 for dev in instance.disks:
1047 cfg.SetDiskID(dev, node_name)
1049 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050 result.Raise("Failed to get disk status from node %s" % node_name,
1051 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053 for idx, bdev_status in enumerate(result.payload):
1054 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061 """Check the sanity of iallocator and node arguments and use the
1062 cluster-wide iallocator if appropriate.
1064 Check that at most one of (iallocator, node) is specified. If none is
1065 specified, then the LU's opcode's iallocator slot is filled with the
1066 cluster-wide default iallocator.
1068 @type iallocator_slot: string
1069 @param iallocator_slot: the name of the opcode iallocator slot
1070 @type node_slot: string
1071 @param node_slot: the name of the opcode target node slot
1074 node = getattr(lu.op, node_slot, None)
1075 iallocator = getattr(lu.op, iallocator_slot, None)
1077 if node is not None and iallocator is not None:
1078 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080 elif node is None and iallocator is None:
1081 default_iallocator = lu.cfg.GetDefaultIAllocator()
1082 if default_iallocator:
1083 setattr(lu.op, iallocator_slot, default_iallocator)
1085 raise errors.OpPrereqError("No iallocator or node given and no"
1086 " cluster-wide default iallocator found."
1087 " Please specify either an iallocator or a"
1088 " node, or set a cluster-wide default"
1092 class LUClusterPostInit(LogicalUnit):
1093 """Logical unit for running hooks after cluster initialization.
1096 HPATH = "cluster-init"
1097 HTYPE = constants.HTYPE_CLUSTER
1099 def BuildHooksEnv(self):
1103 env = {"OP_TARGET": self.cfg.GetClusterName()}
1104 mn = self.cfg.GetMasterNode()
1105 return env, [], [mn]
1107 def Exec(self, feedback_fn):
1114 class LUClusterDestroy(LogicalUnit):
1115 """Logical unit for destroying the cluster.
1118 HPATH = "cluster-destroy"
1119 HTYPE = constants.HTYPE_CLUSTER
1121 def BuildHooksEnv(self):
1125 env = {"OP_TARGET": self.cfg.GetClusterName()}
1128 def CheckPrereq(self):
1129 """Check prerequisites.
1131 This checks whether the cluster is empty.
1133 Any errors are signaled by raising errors.OpPrereqError.
1136 master = self.cfg.GetMasterNode()
1138 nodelist = self.cfg.GetNodeList()
1139 if len(nodelist) != 1 or nodelist[0] != master:
1140 raise errors.OpPrereqError("There are still %d node(s) in"
1141 " this cluster." % (len(nodelist) - 1),
1143 instancelist = self.cfg.GetInstanceList()
1145 raise errors.OpPrereqError("There are still %d instance(s) in"
1146 " this cluster." % len(instancelist),
1149 def Exec(self, feedback_fn):
1150 """Destroys the cluster.
1153 master = self.cfg.GetMasterNode()
1155 # Run post hooks on master node before it's removed
1156 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1158 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1160 # pylint: disable-msg=W0702
1161 self.LogWarning("Errors occurred running hooks on %s" % master)
1163 result = self.rpc.call_node_stop_master(master, False)
1164 result.Raise("Could not disable the master role")
1169 def _VerifyCertificate(filename):
1170 """Verifies a certificate for LUClusterVerify.
1172 @type filename: string
1173 @param filename: Path to PEM file
1177 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178 utils.ReadFile(filename))
1179 except Exception, err: # pylint: disable-msg=W0703
1180 return (LUClusterVerify.ETYPE_ERROR,
1181 "Failed to load X509 certificate %s: %s" % (filename, err))
1184 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185 constants.SSL_CERT_EXPIRATION_ERROR)
1188 fnamemsg = "While verifying %s: %s" % (filename, msg)
1193 return (None, fnamemsg)
1194 elif errcode == utils.CERT_WARNING:
1195 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196 elif errcode == utils.CERT_ERROR:
1197 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1202 class LUClusterVerify(LogicalUnit):
1203 """Verifies the cluster status.
1206 HPATH = "cluster-verify"
1207 HTYPE = constants.HTYPE_CLUSTER
1210 TCLUSTER = "cluster"
1212 TINSTANCE = "instance"
1214 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223 ENODEDRBD = (TNODE, "ENODEDRBD")
1224 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227 ENODEHV = (TNODE, "ENODEHV")
1228 ENODELVM = (TNODE, "ENODELVM")
1229 ENODEN1 = (TNODE, "ENODEN1")
1230 ENODENET = (TNODE, "ENODENET")
1231 ENODEOS = (TNODE, "ENODEOS")
1232 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234 ENODERPC = (TNODE, "ENODERPC")
1235 ENODESSH = (TNODE, "ENODESSH")
1236 ENODEVERSION = (TNODE, "ENODEVERSION")
1237 ENODESETUP = (TNODE, "ENODESETUP")
1238 ENODETIME = (TNODE, "ENODETIME")
1239 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241 ETYPE_FIELD = "code"
1242 ETYPE_ERROR = "ERROR"
1243 ETYPE_WARNING = "WARNING"
1245 _HOOKS_INDENT_RE = re.compile("^", re.M)
1247 class NodeImage(object):
1248 """A class representing the logical and physical status of a node.
1251 @ivar name: the node name to which this object refers
1252 @ivar volumes: a structure as returned from
1253 L{ganeti.backend.GetVolumeList} (runtime)
1254 @ivar instances: a list of running instances (runtime)
1255 @ivar pinst: list of configured primary instances (config)
1256 @ivar sinst: list of configured secondary instances (config)
1257 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258 of this node (config)
1259 @ivar mfree: free memory, as reported by hypervisor (runtime)
1260 @ivar dfree: free disk, as reported by the node (runtime)
1261 @ivar offline: the offline status (config)
1262 @type rpc_fail: boolean
1263 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264 not whether the individual keys were correct) (runtime)
1265 @type lvm_fail: boolean
1266 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267 @type hyp_fail: boolean
1268 @ivar hyp_fail: whether the RPC call didn't return the instance list
1269 @type ghost: boolean
1270 @ivar ghost: whether this is a known node or not (config)
1271 @type os_fail: boolean
1272 @ivar os_fail: whether the RPC call didn't return valid OS data
1274 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275 @type vm_capable: boolean
1276 @ivar vm_capable: whether the node can host instances
1279 def __init__(self, offline=False, name=None, vm_capable=True):
1288 self.offline = offline
1289 self.vm_capable = vm_capable
1290 self.rpc_fail = False
1291 self.lvm_fail = False
1292 self.hyp_fail = False
1294 self.os_fail = False
1297 def ExpandNames(self):
1298 self.needed_locks = {
1299 locking.LEVEL_NODE: locking.ALL_SET,
1300 locking.LEVEL_INSTANCE: locking.ALL_SET,
1302 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1304 def _Error(self, ecode, item, msg, *args, **kwargs):
1305 """Format an error message.
1307 Based on the opcode's error_codes parameter, either format a
1308 parseable error code, or a simpler error string.
1310 This must be called only from Exec and functions called from Exec.
1313 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315 # first complete the msg
1318 # then format the whole message
1319 if self.op.error_codes:
1320 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1326 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327 # and finally report it via the feedback_fn
1328 self._feedback_fn(" - %s" % msg)
1330 def _ErrorIf(self, cond, *args, **kwargs):
1331 """Log an error message if the passed condition is True.
1334 cond = bool(cond) or self.op.debug_simulate_errors
1336 self._Error(*args, **kwargs)
1337 # do not mark the operation as failed for WARN cases only
1338 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339 self.bad = self.bad or cond
1341 def _VerifyNode(self, ninfo, nresult):
1342 """Perform some basic validation on data returned from a node.
1344 - check the result data structure is well formed and has all the
1346 - check ganeti version
1348 @type ninfo: L{objects.Node}
1349 @param ninfo: the node to check
1350 @param nresult: the results from the node
1352 @return: whether overall this call was successful (and we can expect
1353 reasonable values in the respose)
1357 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1359 # main result, nresult should be a non-empty dict
1360 test = not nresult or not isinstance(nresult, dict)
1361 _ErrorIf(test, self.ENODERPC, node,
1362 "unable to verify node: no data returned")
1366 # compares ganeti version
1367 local_version = constants.PROTOCOL_VERSION
1368 remote_version = nresult.get("version", None)
1369 test = not (remote_version and
1370 isinstance(remote_version, (list, tuple)) and
1371 len(remote_version) == 2)
1372 _ErrorIf(test, self.ENODERPC, node,
1373 "connection to node returned invalid data")
1377 test = local_version != remote_version[0]
1378 _ErrorIf(test, self.ENODEVERSION, node,
1379 "incompatible protocol versions: master %s,"
1380 " node %s", local_version, remote_version[0])
1384 # node seems compatible, we can actually try to look into its results
1386 # full package version
1387 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388 self.ENODEVERSION, node,
1389 "software version mismatch: master %s, node %s",
1390 constants.RELEASE_VERSION, remote_version[1],
1391 code=self.ETYPE_WARNING)
1393 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394 if ninfo.vm_capable and isinstance(hyp_result, dict):
1395 for hv_name, hv_result in hyp_result.iteritems():
1396 test = hv_result is not None
1397 _ErrorIf(test, self.ENODEHV, node,
1398 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401 if ninfo.vm_capable and isinstance(hvp_result, list):
1402 for item, hv_name, hv_result in hvp_result:
1403 _ErrorIf(True, self.ENODEHV, node,
1404 "hypervisor %s parameter verify failure (source %s): %s",
1405 hv_name, item, hv_result)
1407 test = nresult.get(constants.NV_NODESETUP,
1408 ["Missing NODESETUP results"])
1409 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1414 def _VerifyNodeTime(self, ninfo, nresult,
1415 nvinfo_starttime, nvinfo_endtime):
1416 """Check the node time.
1418 @type ninfo: L{objects.Node}
1419 @param ninfo: the node to check
1420 @param nresult: the remote results for the node
1421 @param nvinfo_starttime: the start time of the RPC call
1422 @param nvinfo_endtime: the end time of the RPC call
1426 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1428 ntime = nresult.get(constants.NV_TIME, None)
1430 ntime_merged = utils.MergeTime(ntime)
1431 except (ValueError, TypeError):
1432 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1435 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1442 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443 "Node time diverges by at least %s from master node time",
1446 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447 """Check the node time.
1449 @type ninfo: L{objects.Node}
1450 @param ninfo: the node to check
1451 @param nresult: the remote results for the node
1452 @param vg_name: the configured VG name
1459 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461 # checks vg existence and size > 20G
1462 vglist = nresult.get(constants.NV_VGLIST, None)
1464 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467 constants.MIN_VG_SIZE)
1468 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1471 pvlist = nresult.get(constants.NV_PVLIST, None)
1472 test = pvlist is None
1473 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475 # check that ':' is not present in PV names, since it's a
1476 # special character for lvcreate (denotes the range of PEs to
1478 for _, pvname, owner_vg in pvlist:
1479 test = ":" in pvname
1480 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481 " '%s' of VG '%s'", pvname, owner_vg)
1483 def _VerifyNodeNetwork(self, ninfo, nresult):
1484 """Check the node time.
1486 @type ninfo: L{objects.Node}
1487 @param ninfo: the node to check
1488 @param nresult: the remote results for the node
1492 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1494 test = constants.NV_NODELIST not in nresult
1495 _ErrorIf(test, self.ENODESSH, node,
1496 "node hasn't returned node ssh connectivity data")
1498 if nresult[constants.NV_NODELIST]:
1499 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500 _ErrorIf(True, self.ENODESSH, node,
1501 "ssh communication with node '%s': %s", a_node, a_msg)
1503 test = constants.NV_NODENETTEST not in nresult
1504 _ErrorIf(test, self.ENODENET, node,
1505 "node hasn't returned node tcp connectivity data")
1507 if nresult[constants.NV_NODENETTEST]:
1508 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1510 _ErrorIf(True, self.ENODENET, node,
1511 "tcp communication with node '%s': %s",
1512 anode, nresult[constants.NV_NODENETTEST][anode])
1514 test = constants.NV_MASTERIP not in nresult
1515 _ErrorIf(test, self.ENODENET, node,
1516 "node hasn't returned node master IP reachability data")
1518 if not nresult[constants.NV_MASTERIP]:
1519 if node == self.master_node:
1520 msg = "the master node cannot reach the master IP (not configured?)"
1522 msg = "cannot reach the master IP"
1523 _ErrorIf(True, self.ENODENET, node, msg)
1525 def _VerifyInstance(self, instance, instanceconfig, node_image,
1527 """Verify an instance.
1529 This function checks to see if the required block devices are
1530 available on the instance's node.
1533 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534 node_current = instanceconfig.primary_node
1536 node_vol_should = {}
1537 instanceconfig.MapLVsByNode(node_vol_should)
1539 for node in node_vol_should:
1540 n_img = node_image[node]
1541 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542 # ignore missing volumes on offline or broken nodes
1544 for volume in node_vol_should[node]:
1545 test = volume not in n_img.volumes
1546 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547 "volume %s missing on node %s", volume, node)
1549 if instanceconfig.admin_up:
1550 pri_img = node_image[node_current]
1551 test = instance not in pri_img.instances and not pri_img.offline
1552 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553 "instance not running on its primary node %s",
1556 for node, n_img in node_image.items():
1557 if (not node == node_current):
1558 test = instance in n_img.instances
1559 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560 "instance should not run on node %s", node)
1562 diskdata = [(nname, success, status, idx)
1563 for (nname, disks) in diskstatus.items()
1564 for idx, (success, status) in enumerate(disks)]
1566 for nname, success, bdev_status, idx in diskdata:
1567 _ErrorIf(instanceconfig.admin_up and not success,
1568 self.EINSTANCEFAULTYDISK, instance,
1569 "couldn't retrieve status for disk/%s on %s: %s",
1570 idx, nname, bdev_status)
1571 _ErrorIf((instanceconfig.admin_up and success and
1572 bdev_status.ldisk_status == constants.LDS_FAULTY),
1573 self.EINSTANCEFAULTYDISK, instance,
1574 "disk/%s on %s is faulty", idx, nname)
1576 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1577 """Verify if there are any unknown volumes in the cluster.
1579 The .os, .swap and backup volumes are ignored. All other volumes are
1580 reported as unknown.
1582 @type reserved: L{ganeti.utils.FieldSet}
1583 @param reserved: a FieldSet of reserved volume names
1586 for node, n_img in node_image.items():
1587 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1588 # skip non-healthy nodes
1590 for volume in n_img.volumes:
1591 test = ((node not in node_vol_should or
1592 volume not in node_vol_should[node]) and
1593 not reserved.Matches(volume))
1594 self._ErrorIf(test, self.ENODEORPHANLV, node,
1595 "volume %s is unknown", volume)
1597 def _VerifyOrphanInstances(self, instancelist, node_image):
1598 """Verify the list of running instances.
1600 This checks what instances are running but unknown to the cluster.
1603 for node, n_img in node_image.items():
1604 for o_inst in n_img.instances:
1605 test = o_inst not in instancelist
1606 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1607 "instance %s on node %s should not exist", o_inst, node)
1609 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1610 """Verify N+1 Memory Resilience.
1612 Check that if one single node dies we can still start all the
1613 instances it was primary for.
1616 for node, n_img in node_image.items():
1617 # This code checks that every node which is now listed as
1618 # secondary has enough memory to host all instances it is
1619 # supposed to should a single other node in the cluster fail.
1620 # FIXME: not ready for failover to an arbitrary node
1621 # FIXME: does not support file-backed instances
1622 # WARNING: we currently take into account down instances as well
1623 # as up ones, considering that even if they're down someone
1624 # might want to start them even in the event of a node failure.
1625 for prinode, instances in n_img.sbp.items():
1627 for instance in instances:
1628 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1629 if bep[constants.BE_AUTO_BALANCE]:
1630 needed_mem += bep[constants.BE_MEMORY]
1631 test = n_img.mfree < needed_mem
1632 self._ErrorIf(test, self.ENODEN1, node,
1633 "not enough memory to accomodate instance failovers"
1634 " should node %s fail", prinode)
1636 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1638 """Verifies and computes the node required file checksums.
1640 @type ninfo: L{objects.Node}
1641 @param ninfo: the node to check
1642 @param nresult: the remote results for the node
1643 @param file_list: required list of files
1644 @param local_cksum: dictionary of local files and their checksums
1645 @param master_files: list of files that only masters should have
1649 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1651 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1652 test = not isinstance(remote_cksum, dict)
1653 _ErrorIf(test, self.ENODEFILECHECK, node,
1654 "node hasn't returned file checksum data")
1658 for file_name in file_list:
1659 node_is_mc = ninfo.master_candidate
1660 must_have = (file_name not in master_files) or node_is_mc
1662 test1 = file_name not in remote_cksum
1664 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1666 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1667 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1668 "file '%s' missing", file_name)
1669 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1670 "file '%s' has wrong checksum", file_name)
1671 # not candidate and this is not a must-have file
1672 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1673 "file '%s' should not exist on non master"
1674 " candidates (and the file is outdated)", file_name)
1675 # all good, except non-master/non-must have combination
1676 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1677 "file '%s' should not exist"
1678 " on non master candidates", file_name)
1680 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1682 """Verifies and the node DRBD status.
1684 @type ninfo: L{objects.Node}
1685 @param ninfo: the node to check
1686 @param nresult: the remote results for the node
1687 @param instanceinfo: the dict of instances
1688 @param drbd_helper: the configured DRBD usermode helper
1689 @param drbd_map: the DRBD map as returned by
1690 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1694 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1697 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1698 test = (helper_result == None)
1699 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1700 "no drbd usermode helper returned")
1702 status, payload = helper_result
1704 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1705 "drbd usermode helper check unsuccessful: %s", payload)
1706 test = status and (payload != drbd_helper)
1707 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1708 "wrong drbd usermode helper: %s", payload)
1710 # compute the DRBD minors
1712 for minor, instance in drbd_map[node].items():
1713 test = instance not in instanceinfo
1714 _ErrorIf(test, self.ECLUSTERCFG, None,
1715 "ghost instance '%s' in temporary DRBD map", instance)
1716 # ghost instance should not be running, but otherwise we
1717 # don't give double warnings (both ghost instance and
1718 # unallocated minor in use)
1720 node_drbd[minor] = (instance, False)
1722 instance = instanceinfo[instance]
1723 node_drbd[minor] = (instance.name, instance.admin_up)
1725 # and now check them
1726 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1727 test = not isinstance(used_minors, (tuple, list))
1728 _ErrorIf(test, self.ENODEDRBD, node,
1729 "cannot parse drbd status file: %s", str(used_minors))
1731 # we cannot check drbd status
1734 for minor, (iname, must_exist) in node_drbd.items():
1735 test = minor not in used_minors and must_exist
1736 _ErrorIf(test, self.ENODEDRBD, node,
1737 "drbd minor %d of instance %s is not active", minor, iname)
1738 for minor in used_minors:
1739 test = minor not in node_drbd
1740 _ErrorIf(test, self.ENODEDRBD, node,
1741 "unallocated drbd minor %d is in use", minor)
1743 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1744 """Builds the node OS structures.
1746 @type ninfo: L{objects.Node}
1747 @param ninfo: the node to check
1748 @param nresult: the remote results for the node
1749 @param nimg: the node image object
1753 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1755 remote_os = nresult.get(constants.NV_OSLIST, None)
1756 test = (not isinstance(remote_os, list) or
1757 not compat.all(isinstance(v, list) and len(v) == 7
1758 for v in remote_os))
1760 _ErrorIf(test, self.ENODEOS, node,
1761 "node hasn't returned valid OS data")
1770 for (name, os_path, status, diagnose,
1771 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1773 if name not in os_dict:
1776 # parameters is a list of lists instead of list of tuples due to
1777 # JSON lacking a real tuple type, fix it:
1778 parameters = [tuple(v) for v in parameters]
1779 os_dict[name].append((os_path, status, diagnose,
1780 set(variants), set(parameters), set(api_ver)))
1782 nimg.oslist = os_dict
1784 def _VerifyNodeOS(self, ninfo, nimg, base):
1785 """Verifies the node OS list.
1787 @type ninfo: L{objects.Node}
1788 @param ninfo: the node to check
1789 @param nimg: the node image object
1790 @param base: the 'template' node we match against (e.g. from the master)
1794 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1796 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1798 for os_name, os_data in nimg.oslist.items():
1799 assert os_data, "Empty OS status for OS %s?!" % os_name
1800 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1801 _ErrorIf(not f_status, self.ENODEOS, node,
1802 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1803 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1804 "OS '%s' has multiple entries (first one shadows the rest): %s",
1805 os_name, utils.CommaJoin([v[0] for v in os_data]))
1806 # this will catched in backend too
1807 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1808 and not f_var, self.ENODEOS, node,
1809 "OS %s with API at least %d does not declare any variant",
1810 os_name, constants.OS_API_V15)
1811 # comparisons with the 'base' image
1812 test = os_name not in base.oslist
1813 _ErrorIf(test, self.ENODEOS, node,
1814 "Extra OS %s not present on reference node (%s)",
1818 assert base.oslist[os_name], "Base node has empty OS status?"
1819 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1821 # base OS is invalid, skipping
1823 for kind, a, b in [("API version", f_api, b_api),
1824 ("variants list", f_var, b_var),
1825 ("parameters", f_param, b_param)]:
1826 _ErrorIf(a != b, self.ENODEOS, node,
1827 "OS %s %s differs from reference node %s: %s vs. %s",
1828 kind, os_name, base.name,
1829 utils.CommaJoin(a), utils.CommaJoin(b))
1831 # check any missing OSes
1832 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1833 _ErrorIf(missing, self.ENODEOS, node,
1834 "OSes present on reference node %s but missing on this node: %s",
1835 base.name, utils.CommaJoin(missing))
1837 def _VerifyOob(self, ninfo, nresult):
1838 """Verifies out of band functionality of a node.
1840 @type ninfo: L{objects.Node}
1841 @param ninfo: the node to check
1842 @param nresult: the remote results for the node
1846 # We just have to verify the paths on master and/or master candidates
1847 # as the oob helper is invoked on the master
1848 if ((ninfo.master_candidate or ninfo.master_capable) and
1849 constants.NV_OOB_PATHS in nresult):
1850 for path_result in nresult[constants.NV_OOB_PATHS]:
1851 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1853 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1854 """Verifies and updates the node volume data.
1856 This function will update a L{NodeImage}'s internal structures
1857 with data from the remote call.
1859 @type ninfo: L{objects.Node}
1860 @param ninfo: the node to check
1861 @param nresult: the remote results for the node
1862 @param nimg: the node image object
1863 @param vg_name: the configured VG name
1867 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1869 nimg.lvm_fail = True
1870 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1873 elif isinstance(lvdata, basestring):
1874 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1875 utils.SafeEncode(lvdata))
1876 elif not isinstance(lvdata, dict):
1877 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1879 nimg.volumes = lvdata
1880 nimg.lvm_fail = False
1882 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1883 """Verifies and updates the node instance list.
1885 If the listing was successful, then updates this node's instance
1886 list. Otherwise, it marks the RPC call as failed for the instance
1889 @type ninfo: L{objects.Node}
1890 @param ninfo: the node to check
1891 @param nresult: the remote results for the node
1892 @param nimg: the node image object
1895 idata = nresult.get(constants.NV_INSTANCELIST, None)
1896 test = not isinstance(idata, list)
1897 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1898 " (instancelist): %s", utils.SafeEncode(str(idata)))
1900 nimg.hyp_fail = True
1902 nimg.instances = idata
1904 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1905 """Verifies and computes a node information map
1907 @type ninfo: L{objects.Node}
1908 @param ninfo: the node to check
1909 @param nresult: the remote results for the node
1910 @param nimg: the node image object
1911 @param vg_name: the configured VG name
1915 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1917 # try to read free memory (from the hypervisor)
1918 hv_info = nresult.get(constants.NV_HVINFO, None)
1919 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1920 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1923 nimg.mfree = int(hv_info["memory_free"])
1924 except (ValueError, TypeError):
1925 _ErrorIf(True, self.ENODERPC, node,
1926 "node returned invalid nodeinfo, check hypervisor")
1928 # FIXME: devise a free space model for file based instances as well
1929 if vg_name is not None:
1930 test = (constants.NV_VGLIST not in nresult or
1931 vg_name not in nresult[constants.NV_VGLIST])
1932 _ErrorIf(test, self.ENODELVM, node,
1933 "node didn't return data for the volume group '%s'"
1934 " - it is either missing or broken", vg_name)
1937 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1938 except (ValueError, TypeError):
1939 _ErrorIf(True, self.ENODERPC, node,
1940 "node returned invalid LVM info, check LVM status")
1942 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1943 """Gets per-disk status information for all instances.
1945 @type nodelist: list of strings
1946 @param nodelist: Node names
1947 @type node_image: dict of (name, L{objects.Node})
1948 @param node_image: Node objects
1949 @type instanceinfo: dict of (name, L{objects.Instance})
1950 @param instanceinfo: Instance objects
1951 @rtype: {instance: {node: [(succes, payload)]}}
1952 @return: a dictionary of per-instance dictionaries with nodes as
1953 keys and disk information as values; the disk information is a
1954 list of tuples (success, payload)
1957 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1960 node_disks_devonly = {}
1961 diskless_instances = set()
1962 diskless = constants.DT_DISKLESS
1964 for nname in nodelist:
1965 node_instances = list(itertools.chain(node_image[nname].pinst,
1966 node_image[nname].sinst))
1967 diskless_instances.update(inst for inst in node_instances
1968 if instanceinfo[inst].disk_template == diskless)
1969 disks = [(inst, disk)
1970 for inst in node_instances
1971 for disk in instanceinfo[inst].disks]
1974 # No need to collect data
1977 node_disks[nname] = disks
1979 # Creating copies as SetDiskID below will modify the objects and that can
1980 # lead to incorrect data returned from nodes
1981 devonly = [dev.Copy() for (_, dev) in disks]
1984 self.cfg.SetDiskID(dev, nname)
1986 node_disks_devonly[nname] = devonly
1988 assert len(node_disks) == len(node_disks_devonly)
1990 # Collect data from all nodes with disks
1991 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1994 assert len(result) == len(node_disks)
1998 for (nname, nres) in result.items():
1999 disks = node_disks[nname]
2002 # No data from this node
2003 data = len(disks) * [(False, "node offline")]
2006 _ErrorIf(msg, self.ENODERPC, nname,
2007 "while getting disk information: %s", msg)
2009 # No data from this node
2010 data = len(disks) * [(False, msg)]
2013 for idx, i in enumerate(nres.payload):
2014 if isinstance(i, (tuple, list)) and len(i) == 2:
2017 logging.warning("Invalid result from node %s, entry %d: %s",
2019 data.append((False, "Invalid result from the remote node"))
2021 for ((inst, _), status) in zip(disks, data):
2022 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2024 # Add empty entries for diskless instances.
2025 for inst in diskless_instances:
2026 assert inst not in instdisk
2029 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2030 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2031 compat.all(isinstance(s, (tuple, list)) and
2032 len(s) == 2 for s in statuses)
2033 for inst, nnames in instdisk.items()
2034 for nname, statuses in nnames.items())
2035 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2039 def _VerifyHVP(self, hvp_data):
2040 """Verifies locally the syntax of the hypervisor parameters.
2043 for item, hv_name, hv_params in hvp_data:
2044 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2047 hv_class = hypervisor.GetHypervisor(hv_name)
2048 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2049 hv_class.CheckParameterSyntax(hv_params)
2050 except errors.GenericError, err:
2051 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2054 def BuildHooksEnv(self):
2057 Cluster-Verify hooks just ran in the post phase and their failure makes
2058 the output be logged in the verify output and the verification to fail.
2061 all_nodes = self.cfg.GetNodeList()
2063 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2065 for node in self.cfg.GetAllNodesInfo().values():
2066 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2068 return env, [], all_nodes
2070 def Exec(self, feedback_fn):
2071 """Verify integrity of cluster, performing various test on nodes.
2074 # This method has too many local variables. pylint: disable-msg=R0914
2076 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2077 verbose = self.op.verbose
2078 self._feedback_fn = feedback_fn
2079 feedback_fn("* Verifying global settings")
2080 for msg in self.cfg.VerifyConfig():
2081 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2083 # Check the cluster certificates
2084 for cert_filename in constants.ALL_CERT_FILES:
2085 (errcode, msg) = _VerifyCertificate(cert_filename)
2086 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2088 vg_name = self.cfg.GetVGName()
2089 drbd_helper = self.cfg.GetDRBDHelper()
2090 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2091 cluster = self.cfg.GetClusterInfo()
2092 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2093 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2094 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2095 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2096 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2097 for iname in instancelist)
2098 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2099 i_non_redundant = [] # Non redundant instances
2100 i_non_a_balanced = [] # Non auto-balanced instances
2101 n_offline = 0 # Count of offline nodes
2102 n_drained = 0 # Count of nodes being drained
2103 node_vol_should = {}
2105 # FIXME: verify OS list
2106 # do local checksums
2107 master_files = [constants.CLUSTER_CONF_FILE]
2108 master_node = self.master_node = self.cfg.GetMasterNode()
2109 master_ip = self.cfg.GetMasterIP()
2111 file_names = ssconf.SimpleStore().GetFileList()
2112 file_names.extend(constants.ALL_CERT_FILES)
2113 file_names.extend(master_files)
2114 if cluster.modify_etc_hosts:
2115 file_names.append(constants.ETC_HOSTS)
2117 local_checksums = utils.FingerprintFiles(file_names)
2119 # Compute the set of hypervisor parameters
2121 for hv_name in hypervisors:
2122 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2123 for os_name, os_hvp in cluster.os_hvp.items():
2124 for hv_name, hv_params in os_hvp.items():
2127 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2128 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2129 # TODO: collapse identical parameter values in a single one
2130 for instance in instanceinfo.values():
2131 if not instance.hvparams:
2133 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2134 cluster.FillHV(instance)))
2135 # and verify them locally
2136 self._VerifyHVP(hvp_data)
2138 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2139 node_verify_param = {
2140 constants.NV_FILELIST: file_names,
2141 constants.NV_NODELIST: [node.name for node in nodeinfo
2142 if not node.offline],
2143 constants.NV_HYPERVISOR: hypervisors,
2144 constants.NV_HVPARAMS: hvp_data,
2145 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2146 node.secondary_ip) for node in nodeinfo
2147 if not node.offline],
2148 constants.NV_INSTANCELIST: hypervisors,
2149 constants.NV_VERSION: None,
2150 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2151 constants.NV_NODESETUP: None,
2152 constants.NV_TIME: None,
2153 constants.NV_MASTERIP: (master_node, master_ip),
2154 constants.NV_OSLIST: None,
2155 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2158 if vg_name is not None:
2159 node_verify_param[constants.NV_VGLIST] = None
2160 node_verify_param[constants.NV_LVLIST] = vg_name
2161 node_verify_param[constants.NV_PVLIST] = [vg_name]
2162 node_verify_param[constants.NV_DRBDLIST] = None
2165 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2167 # Build our expected cluster state
2168 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2170 vm_capable=node.vm_capable))
2171 for node in nodeinfo)
2175 for node in nodeinfo:
2176 path = _SupportsOob(self.cfg, node)
2177 if path and path not in oob_paths:
2178 oob_paths.append(path)
2181 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2183 for instance in instancelist:
2184 inst_config = instanceinfo[instance]
2186 for nname in inst_config.all_nodes:
2187 if nname not in node_image:
2189 gnode = self.NodeImage(name=nname)
2191 node_image[nname] = gnode
2193 inst_config.MapLVsByNode(node_vol_should)
2195 pnode = inst_config.primary_node
2196 node_image[pnode].pinst.append(instance)
2198 for snode in inst_config.secondary_nodes:
2199 nimg = node_image[snode]
2200 nimg.sinst.append(instance)
2201 if pnode not in nimg.sbp:
2202 nimg.sbp[pnode] = []
2203 nimg.sbp[pnode].append(instance)
2205 # At this point, we have the in-memory data structures complete,
2206 # except for the runtime information, which we'll gather next
2208 # Due to the way our RPC system works, exact response times cannot be
2209 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2210 # time before and after executing the request, we can at least have a time
2212 nvinfo_starttime = time.time()
2213 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2214 self.cfg.GetClusterName())
2215 nvinfo_endtime = time.time()
2217 all_drbd_map = self.cfg.ComputeDRBDMap()
2219 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2220 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2222 feedback_fn("* Verifying node status")
2226 for node_i in nodeinfo:
2228 nimg = node_image[node]
2232 feedback_fn("* Skipping offline node %s" % (node,))
2236 if node == master_node:
2238 elif node_i.master_candidate:
2239 ntype = "master candidate"
2240 elif node_i.drained:
2246 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2248 msg = all_nvinfo[node].fail_msg
2249 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2251 nimg.rpc_fail = True
2254 nresult = all_nvinfo[node].payload
2256 nimg.call_ok = self._VerifyNode(node_i, nresult)
2257 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2258 self._VerifyNodeNetwork(node_i, nresult)
2259 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2262 self._VerifyOob(node_i, nresult)
2265 self._VerifyNodeLVM(node_i, nresult, vg_name)
2266 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2269 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2270 self._UpdateNodeInstances(node_i, nresult, nimg)
2271 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2272 self._UpdateNodeOS(node_i, nresult, nimg)
2273 if not nimg.os_fail:
2274 if refos_img is None:
2276 self._VerifyNodeOS(node_i, nimg, refos_img)
2278 feedback_fn("* Verifying instance status")
2279 for instance in instancelist:
2281 feedback_fn("* Verifying instance %s" % instance)
2282 inst_config = instanceinfo[instance]
2283 self._VerifyInstance(instance, inst_config, node_image,
2285 inst_nodes_offline = []
2287 pnode = inst_config.primary_node
2288 pnode_img = node_image[pnode]
2289 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2290 self.ENODERPC, pnode, "instance %s, connection to"
2291 " primary node failed", instance)
2293 if pnode_img.offline:
2294 inst_nodes_offline.append(pnode)
2296 # If the instance is non-redundant we cannot survive losing its primary
2297 # node, so we are not N+1 compliant. On the other hand we have no disk
2298 # templates with more than one secondary so that situation is not well
2300 # FIXME: does not support file-backed instances
2301 if not inst_config.secondary_nodes:
2302 i_non_redundant.append(instance)
2304 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2305 instance, "instance has multiple secondary nodes: %s",
2306 utils.CommaJoin(inst_config.secondary_nodes),
2307 code=self.ETYPE_WARNING)
2309 if inst_config.disk_template in constants.DTS_NET_MIRROR:
2310 pnode = inst_config.primary_node
2311 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2312 instance_groups = {}
2314 for node in instance_nodes:
2315 instance_groups.setdefault(nodeinfo_byname[node].group,
2319 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2320 # Sort so that we always list the primary node first.
2321 for group, nodes in sorted(instance_groups.items(),
2322 key=lambda (_, nodes): pnode in nodes,
2325 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2326 instance, "instance has primary and secondary nodes in"
2327 " different groups: %s", utils.CommaJoin(pretty_list),
2328 code=self.ETYPE_WARNING)
2330 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2331 i_non_a_balanced.append(instance)
2333 for snode in inst_config.secondary_nodes:
2334 s_img = node_image[snode]
2335 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2336 "instance %s, connection to secondary node failed", instance)
2339 inst_nodes_offline.append(snode)
2341 # warn that the instance lives on offline nodes
2342 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2343 "instance lives on offline node(s) %s",
2344 utils.CommaJoin(inst_nodes_offline))
2345 # ... or ghost/non-vm_capable nodes
2346 for node in inst_config.all_nodes:
2347 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2348 "instance lives on ghost node %s", node)
2349 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2350 instance, "instance lives on non-vm_capable node %s", node)
2352 feedback_fn("* Verifying orphan volumes")
2353 reserved = utils.FieldSet(*cluster.reserved_lvs)
2354 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2356 feedback_fn("* Verifying orphan instances")
2357 self._VerifyOrphanInstances(instancelist, node_image)
2359 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2360 feedback_fn("* Verifying N+1 Memory redundancy")
2361 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2363 feedback_fn("* Other Notes")
2365 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2366 % len(i_non_redundant))
2368 if i_non_a_balanced:
2369 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2370 % len(i_non_a_balanced))
2373 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2376 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2380 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2381 """Analyze the post-hooks' result
2383 This method analyses the hook result, handles it, and sends some
2384 nicely-formatted feedback back to the user.
2386 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2387 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2388 @param hooks_results: the results of the multi-node hooks rpc call
2389 @param feedback_fn: function used send feedback back to the caller
2390 @param lu_result: previous Exec result
2391 @return: the new Exec result, based on the previous result
2395 # We only really run POST phase hooks, and are only interested in
2397 if phase == constants.HOOKS_PHASE_POST:
2398 # Used to change hooks' output to proper indentation
2399 feedback_fn("* Hooks Results")
2400 assert hooks_results, "invalid result from hooks"
2402 for node_name in hooks_results:
2403 res = hooks_results[node_name]
2405 test = msg and not res.offline
2406 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2407 "Communication failure in hooks execution: %s", msg)
2408 if res.offline or msg:
2409 # No need to investigate payload if node is offline or gave an error.
2410 # override manually lu_result here as _ErrorIf only
2411 # overrides self.bad
2414 for script, hkr, output in res.payload:
2415 test = hkr == constants.HKR_FAIL
2416 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2417 "Script %s failed, output:", script)
2419 output = self._HOOKS_INDENT_RE.sub(' ', output)
2420 feedback_fn("%s" % output)
2426 class LUClusterVerifyDisks(NoHooksLU):
2427 """Verifies the cluster disks status.
2432 def ExpandNames(self):
2433 self.needed_locks = {
2434 locking.LEVEL_NODE: locking.ALL_SET,
2435 locking.LEVEL_INSTANCE: locking.ALL_SET,
2437 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2439 def Exec(self, feedback_fn):
2440 """Verify integrity of cluster disks.
2442 @rtype: tuple of three items
2443 @return: a tuple of (dict of node-to-node_error, list of instances
2444 which need activate-disks, dict of instance: (node, volume) for
2448 result = res_nodes, res_instances, res_missing = {}, [], {}
2450 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2451 instances = self.cfg.GetAllInstancesInfo().values()
2454 for inst in instances:
2456 if not inst.admin_up:
2458 inst.MapLVsByNode(inst_lvs)
2459 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2460 for node, vol_list in inst_lvs.iteritems():
2461 for vol in vol_list:
2462 nv_dict[(node, vol)] = inst
2467 node_lvs = self.rpc.call_lv_list(nodes, [])
2468 for node, node_res in node_lvs.items():
2469 if node_res.offline:
2471 msg = node_res.fail_msg
2473 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2474 res_nodes[node] = msg
2477 lvs = node_res.payload
2478 for lv_name, (_, _, lv_online) in lvs.items():
2479 inst = nv_dict.pop((node, lv_name), None)
2480 if (not lv_online and inst is not None
2481 and inst.name not in res_instances):
2482 res_instances.append(inst.name)
2484 # any leftover items in nv_dict are missing LVs, let's arrange the
2486 for key, inst in nv_dict.iteritems():
2487 if inst.name not in res_missing:
2488 res_missing[inst.name] = []
2489 res_missing[inst.name].append(key)
2494 class LUClusterRepairDiskSizes(NoHooksLU):
2495 """Verifies the cluster disks sizes.
2500 def ExpandNames(self):
2501 if self.op.instances:
2502 self.wanted_names = []
2503 for name in self.op.instances:
2504 full_name = _ExpandInstanceName(self.cfg, name)
2505 self.wanted_names.append(full_name)
2506 self.needed_locks = {
2507 locking.LEVEL_NODE: [],
2508 locking.LEVEL_INSTANCE: self.wanted_names,
2510 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2512 self.wanted_names = None
2513 self.needed_locks = {
2514 locking.LEVEL_NODE: locking.ALL_SET,
2515 locking.LEVEL_INSTANCE: locking.ALL_SET,
2517 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2519 def DeclareLocks(self, level):
2520 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2521 self._LockInstancesNodes(primary_only=True)
2523 def CheckPrereq(self):
2524 """Check prerequisites.
2526 This only checks the optional instance list against the existing names.
2529 if self.wanted_names is None:
2530 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2532 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2533 in self.wanted_names]
2535 def _EnsureChildSizes(self, disk):
2536 """Ensure children of the disk have the needed disk size.
2538 This is valid mainly for DRBD8 and fixes an issue where the
2539 children have smaller disk size.
2541 @param disk: an L{ganeti.objects.Disk} object
2544 if disk.dev_type == constants.LD_DRBD8:
2545 assert disk.children, "Empty children for DRBD8?"
2546 fchild = disk.children[0]
2547 mismatch = fchild.size < disk.size
2549 self.LogInfo("Child disk has size %d, parent %d, fixing",
2550 fchild.size, disk.size)
2551 fchild.size = disk.size
2553 # and we recurse on this child only, not on the metadev
2554 return self._EnsureChildSizes(fchild) or mismatch
2558 def Exec(self, feedback_fn):
2559 """Verify the size of cluster disks.
2562 # TODO: check child disks too
2563 # TODO: check differences in size between primary/secondary nodes
2565 for instance in self.wanted_instances:
2566 pnode = instance.primary_node
2567 if pnode not in per_node_disks:
2568 per_node_disks[pnode] = []
2569 for idx, disk in enumerate(instance.disks):
2570 per_node_disks[pnode].append((instance, idx, disk))
2573 for node, dskl in per_node_disks.items():
2574 newl = [v[2].Copy() for v in dskl]
2576 self.cfg.SetDiskID(dsk, node)
2577 result = self.rpc.call_blockdev_getsizes(node, newl)
2579 self.LogWarning("Failure in blockdev_getsizes call to node"
2580 " %s, ignoring", node)
2582 if len(result.data) != len(dskl):
2583 self.LogWarning("Invalid result from node %s, ignoring node results",
2586 for ((instance, idx, disk), size) in zip(dskl, result.data):
2588 self.LogWarning("Disk %d of instance %s did not return size"
2589 " information, ignoring", idx, instance.name)
2591 if not isinstance(size, (int, long)):
2592 self.LogWarning("Disk %d of instance %s did not return valid"
2593 " size information, ignoring", idx, instance.name)
2596 if size != disk.size:
2597 self.LogInfo("Disk %d of instance %s has mismatched size,"
2598 " correcting: recorded %d, actual %d", idx,
2599 instance.name, disk.size, size)
2601 self.cfg.Update(instance, feedback_fn)
2602 changed.append((instance.name, idx, size))
2603 if self._EnsureChildSizes(disk):
2604 self.cfg.Update(instance, feedback_fn)
2605 changed.append((instance.name, idx, disk.size))
2609 class LUClusterRename(LogicalUnit):
2610 """Rename the cluster.
2613 HPATH = "cluster-rename"
2614 HTYPE = constants.HTYPE_CLUSTER
2616 def BuildHooksEnv(self):
2621 "OP_TARGET": self.cfg.GetClusterName(),
2622 "NEW_NAME": self.op.name,
2624 mn = self.cfg.GetMasterNode()
2625 all_nodes = self.cfg.GetNodeList()
2626 return env, [mn], all_nodes
2628 def CheckPrereq(self):
2629 """Verify that the passed name is a valid one.
2632 hostname = netutils.GetHostname(name=self.op.name,
2633 family=self.cfg.GetPrimaryIPFamily())
2635 new_name = hostname.name
2636 self.ip = new_ip = hostname.ip
2637 old_name = self.cfg.GetClusterName()
2638 old_ip = self.cfg.GetMasterIP()
2639 if new_name == old_name and new_ip == old_ip:
2640 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2641 " cluster has changed",
2643 if new_ip != old_ip:
2644 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2645 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2646 " reachable on the network" %
2647 new_ip, errors.ECODE_NOTUNIQUE)
2649 self.op.name = new_name
2651 def Exec(self, feedback_fn):
2652 """Rename the cluster.
2655 clustername = self.op.name
2658 # shutdown the master IP
2659 master = self.cfg.GetMasterNode()
2660 result = self.rpc.call_node_stop_master(master, False)
2661 result.Raise("Could not disable the master role")
2664 cluster = self.cfg.GetClusterInfo()
2665 cluster.cluster_name = clustername
2666 cluster.master_ip = ip
2667 self.cfg.Update(cluster, feedback_fn)
2669 # update the known hosts file
2670 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2671 node_list = self.cfg.GetOnlineNodeList()
2673 node_list.remove(master)
2676 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2678 result = self.rpc.call_node_start_master(master, False, False)
2679 msg = result.fail_msg
2681 self.LogWarning("Could not re-enable the master role on"
2682 " the master, please restart manually: %s", msg)
2687 class LUClusterSetParams(LogicalUnit):
2688 """Change the parameters of the cluster.
2691 HPATH = "cluster-modify"
2692 HTYPE = constants.HTYPE_CLUSTER
2695 def CheckArguments(self):
2699 if self.op.uid_pool:
2700 uidpool.CheckUidPool(self.op.uid_pool)
2702 if self.op.add_uids:
2703 uidpool.CheckUidPool(self.op.add_uids)
2705 if self.op.remove_uids:
2706 uidpool.CheckUidPool(self.op.remove_uids)
2708 def ExpandNames(self):
2709 # FIXME: in the future maybe other cluster params won't require checking on
2710 # all nodes to be modified.
2711 self.needed_locks = {
2712 locking.LEVEL_NODE: locking.ALL_SET,
2714 self.share_locks[locking.LEVEL_NODE] = 1
2716 def BuildHooksEnv(self):
2721 "OP_TARGET": self.cfg.GetClusterName(),
2722 "NEW_VG_NAME": self.op.vg_name,
2724 mn = self.cfg.GetMasterNode()
2725 return env, [mn], [mn]
2727 def CheckPrereq(self):
2728 """Check prerequisites.
2730 This checks whether the given params don't conflict and
2731 if the given volume group is valid.
2734 if self.op.vg_name is not None and not self.op.vg_name:
2735 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2736 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2737 " instances exist", errors.ECODE_INVAL)
2739 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2740 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2741 raise errors.OpPrereqError("Cannot disable drbd helper while"
2742 " drbd-based instances exist",
2745 node_list = self.acquired_locks[locking.LEVEL_NODE]
2747 # if vg_name not None, checks given volume group on all nodes
2749 vglist = self.rpc.call_vg_list(node_list)
2750 for node in node_list:
2751 msg = vglist[node].fail_msg
2753 # ignoring down node
2754 self.LogWarning("Error while gathering data on node %s"
2755 " (ignoring node): %s", node, msg)
2757 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2759 constants.MIN_VG_SIZE)
2761 raise errors.OpPrereqError("Error on node '%s': %s" %
2762 (node, vgstatus), errors.ECODE_ENVIRON)
2764 if self.op.drbd_helper:
2765 # checks given drbd helper on all nodes
2766 helpers = self.rpc.call_drbd_helper(node_list)
2767 for node in node_list:
2768 ninfo = self.cfg.GetNodeInfo(node)
2770 self.LogInfo("Not checking drbd helper on offline node %s", node)
2772 msg = helpers[node].fail_msg
2774 raise errors.OpPrereqError("Error checking drbd helper on node"
2775 " '%s': %s" % (node, msg),
2776 errors.ECODE_ENVIRON)
2777 node_helper = helpers[node].payload
2778 if node_helper != self.op.drbd_helper:
2779 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2780 (node, node_helper), errors.ECODE_ENVIRON)
2782 self.cluster = cluster = self.cfg.GetClusterInfo()
2783 # validate params changes
2784 if self.op.beparams:
2785 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2786 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2788 if self.op.ndparams:
2789 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2790 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2792 if self.op.nicparams:
2793 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2794 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2795 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2798 # check all instances for consistency
2799 for instance in self.cfg.GetAllInstancesInfo().values():
2800 for nic_idx, nic in enumerate(instance.nics):
2801 params_copy = copy.deepcopy(nic.nicparams)
2802 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2804 # check parameter syntax
2806 objects.NIC.CheckParameterSyntax(params_filled)
2807 except errors.ConfigurationError, err:
2808 nic_errors.append("Instance %s, nic/%d: %s" %
2809 (instance.name, nic_idx, err))
2811 # if we're moving instances to routed, check that they have an ip
2812 target_mode = params_filled[constants.NIC_MODE]
2813 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2814 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2815 (instance.name, nic_idx))
2817 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2818 "\n".join(nic_errors))
2820 # hypervisor list/parameters
2821 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2822 if self.op.hvparams:
2823 for hv_name, hv_dict in self.op.hvparams.items():
2824 if hv_name not in self.new_hvparams:
2825 self.new_hvparams[hv_name] = hv_dict
2827 self.new_hvparams[hv_name].update(hv_dict)
2829 # os hypervisor parameters
2830 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2832 for os_name, hvs in self.op.os_hvp.items():
2833 if os_name not in self.new_os_hvp:
2834 self.new_os_hvp[os_name] = hvs
2836 for hv_name, hv_dict in hvs.items():
2837 if hv_name not in self.new_os_hvp[os_name]:
2838 self.new_os_hvp[os_name][hv_name] = hv_dict
2840 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2843 self.new_osp = objects.FillDict(cluster.osparams, {})
2844 if self.op.osparams:
2845 for os_name, osp in self.op.osparams.items():
2846 if os_name not in self.new_osp:
2847 self.new_osp[os_name] = {}
2849 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2852 if not self.new_osp[os_name]:
2853 # we removed all parameters
2854 del self.new_osp[os_name]
2856 # check the parameter validity (remote check)
2857 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2858 os_name, self.new_osp[os_name])
2860 # changes to the hypervisor list
2861 if self.op.enabled_hypervisors is not None:
2862 self.hv_list = self.op.enabled_hypervisors
2863 for hv in self.hv_list:
2864 # if the hypervisor doesn't already exist in the cluster
2865 # hvparams, we initialize it to empty, and then (in both
2866 # cases) we make sure to fill the defaults, as we might not
2867 # have a complete defaults list if the hypervisor wasn't
2869 if hv not in new_hvp:
2871 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2872 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2874 self.hv_list = cluster.enabled_hypervisors
2876 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2877 # either the enabled list has changed, or the parameters have, validate
2878 for hv_name, hv_params in self.new_hvparams.items():
2879 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2880 (self.op.enabled_hypervisors and
2881 hv_name in self.op.enabled_hypervisors)):
2882 # either this is a new hypervisor, or its parameters have changed
2883 hv_class = hypervisor.GetHypervisor(hv_name)
2884 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2885 hv_class.CheckParameterSyntax(hv_params)
2886 _CheckHVParams(self, node_list, hv_name, hv_params)
2889 # no need to check any newly-enabled hypervisors, since the
2890 # defaults have already been checked in the above code-block
2891 for os_name, os_hvp in self.new_os_hvp.items():
2892 for hv_name, hv_params in os_hvp.items():
2893 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2894 # we need to fill in the new os_hvp on top of the actual hv_p
2895 cluster_defaults = self.new_hvparams.get(hv_name, {})
2896 new_osp = objects.FillDict(cluster_defaults, hv_params)
2897 hv_class = hypervisor.GetHypervisor(hv_name)
2898 hv_class.CheckParameterSyntax(new_osp)
2899 _CheckHVParams(self, node_list, hv_name, new_osp)
2901 if self.op.default_iallocator:
2902 alloc_script = utils.FindFile(self.op.default_iallocator,
2903 constants.IALLOCATOR_SEARCH_PATH,
2905 if alloc_script is None:
2906 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2907 " specified" % self.op.default_iallocator,
2910 def Exec(self, feedback_fn):
2911 """Change the parameters of the cluster.
2914 if self.op.vg_name is not None:
2915 new_volume = self.op.vg_name
2918 if new_volume != self.cfg.GetVGName():
2919 self.cfg.SetVGName(new_volume)
2921 feedback_fn("Cluster LVM configuration already in desired"
2922 " state, not changing")
2923 if self.op.drbd_helper is not None:
2924 new_helper = self.op.drbd_helper
2927 if new_helper != self.cfg.GetDRBDHelper():
2928 self.cfg.SetDRBDHelper(new_helper)
2930 feedback_fn("Cluster DRBD helper already in desired state,"
2932 if self.op.hvparams:
2933 self.cluster.hvparams = self.new_hvparams
2935 self.cluster.os_hvp = self.new_os_hvp
2936 if self.op.enabled_hypervisors is not None:
2937 self.cluster.hvparams = self.new_hvparams
2938 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2939 if self.op.beparams:
2940 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2941 if self.op.nicparams:
2942 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2943 if self.op.osparams:
2944 self.cluster.osparams = self.new_osp
2945 if self.op.ndparams:
2946 self.cluster.ndparams = self.new_ndparams
2948 if self.op.candidate_pool_size is not None:
2949 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2950 # we need to update the pool size here, otherwise the save will fail
2951 _AdjustCandidatePool(self, [])
2953 if self.op.maintain_node_health is not None:
2954 self.cluster.maintain_node_health = self.op.maintain_node_health
2956 if self.op.prealloc_wipe_disks is not None:
2957 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2959 if self.op.add_uids is not None:
2960 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2962 if self.op.remove_uids is not None:
2963 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2965 if self.op.uid_pool is not None:
2966 self.cluster.uid_pool = self.op.uid_pool
2968 if self.op.default_iallocator is not None:
2969 self.cluster.default_iallocator = self.op.default_iallocator
2971 if self.op.reserved_lvs is not None:
2972 self.cluster.reserved_lvs = self.op.reserved_lvs
2974 def helper_os(aname, mods, desc):
2976 lst = getattr(self.cluster, aname)
2977 for key, val in mods:
2978 if key == constants.DDM_ADD:
2980 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2983 elif key == constants.DDM_REMOVE:
2987 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2989 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2991 if self.op.hidden_os:
2992 helper_os("hidden_os", self.op.hidden_os, "hidden")
2994 if self.op.blacklisted_os:
2995 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2997 if self.op.master_netdev:
2998 master = self.cfg.GetMasterNode()
2999 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3000 self.cluster.master_netdev)
3001 result = self.rpc.call_node_stop_master(master, False)
3002 result.Raise("Could not disable the master ip")
3003 feedback_fn("Changing master_netdev from %s to %s" %
3004 (self.cluster.master_netdev, self.op.master_netdev))
3005 self.cluster.master_netdev = self.op.master_netdev
3007 self.cfg.Update(self.cluster, feedback_fn)
3009 if self.op.master_netdev:
3010 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3011 self.op.master_netdev)
3012 result = self.rpc.call_node_start_master(master, False, False)
3014 self.LogWarning("Could not re-enable the master ip on"
3015 " the master, please restart manually: %s",
3019 def _UploadHelper(lu, nodes, fname):
3020 """Helper for uploading a file and showing warnings.
3023 if os.path.exists(fname):
3024 result = lu.rpc.call_upload_file(nodes, fname)
3025 for to_node, to_result in result.items():
3026 msg = to_result.fail_msg
3028 msg = ("Copy of file %s to node %s failed: %s" %
3029 (fname, to_node, msg))
3030 lu.proc.LogWarning(msg)
3033 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3034 """Distribute additional files which are part of the cluster configuration.
3036 ConfigWriter takes care of distributing the config and ssconf files, but
3037 there are more files which should be distributed to all nodes. This function
3038 makes sure those are copied.
3040 @param lu: calling logical unit
3041 @param additional_nodes: list of nodes not in the config to distribute to
3042 @type additional_vm: boolean
3043 @param additional_vm: whether the additional nodes are vm-capable or not
3046 # 1. Gather target nodes
3047 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3048 dist_nodes = lu.cfg.GetOnlineNodeList()
3049 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3050 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3051 if additional_nodes is not None:
3052 dist_nodes.extend(additional_nodes)
3054 vm_nodes.extend(additional_nodes)
3055 if myself.name in dist_nodes:
3056 dist_nodes.remove(myself.name)
3057 if myself.name in vm_nodes:
3058 vm_nodes.remove(myself.name)
3060 # 2. Gather files to distribute
3061 dist_files = set([constants.ETC_HOSTS,
3062 constants.SSH_KNOWN_HOSTS_FILE,
3063 constants.RAPI_CERT_FILE,
3064 constants.RAPI_USERS_FILE,
3065 constants.CONFD_HMAC_KEY,
3066 constants.CLUSTER_DOMAIN_SECRET_FILE,
3070 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3071 for hv_name in enabled_hypervisors:
3072 hv_class = hypervisor.GetHypervisor(hv_name)
3073 vm_files.update(hv_class.GetAncillaryFiles())
3075 # 3. Perform the files upload
3076 for fname in dist_files:
3077 _UploadHelper(lu, dist_nodes, fname)
3078 for fname in vm_files:
3079 _UploadHelper(lu, vm_nodes, fname)
3082 class LUClusterRedistConf(NoHooksLU):
3083 """Force the redistribution of cluster configuration.
3085 This is a very simple LU.
3090 def ExpandNames(self):
3091 self.needed_locks = {
3092 locking.LEVEL_NODE: locking.ALL_SET,
3094 self.share_locks[locking.LEVEL_NODE] = 1
3096 def Exec(self, feedback_fn):
3097 """Redistribute the configuration.
3100 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3101 _RedistributeAncillaryFiles(self)
3104 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3105 """Sleep and poll for an instance's disk to sync.
3108 if not instance.disks or disks is not None and not disks:
3111 disks = _ExpandCheckDisks(instance, disks)
3114 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3116 node = instance.primary_node
3119 lu.cfg.SetDiskID(dev, node)
3121 # TODO: Convert to utils.Retry
3124 degr_retries = 10 # in seconds, as we sleep 1 second each time
3128 cumul_degraded = False
3129 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3130 msg = rstats.fail_msg
3132 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3135 raise errors.RemoteError("Can't contact node %s for mirror data,"
3136 " aborting." % node)
3139 rstats = rstats.payload
3141 for i, mstat in enumerate(rstats):
3143 lu.LogWarning("Can't compute data for node %s/%s",
3144 node, disks[i].iv_name)
3147 cumul_degraded = (cumul_degraded or
3148 (mstat.is_degraded and mstat.sync_percent is None))
3149 if mstat.sync_percent is not None:
3151 if mstat.estimated_time is not None:
3152 rem_time = ("%s remaining (estimated)" %
3153 utils.FormatSeconds(mstat.estimated_time))
3154 max_time = mstat.estimated_time
3156 rem_time = "no time estimate"
3157 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3158 (disks[i].iv_name, mstat.sync_percent, rem_time))
3160 # if we're done but degraded, let's do a few small retries, to
3161 # make sure we see a stable and not transient situation; therefore
3162 # we force restart of the loop
3163 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3164 logging.info("Degraded disks found, %d retries left", degr_retries)
3172 time.sleep(min(60, max_time))
3175 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3176 return not cumul_degraded
3179 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3180 """Check that mirrors are not degraded.
3182 The ldisk parameter, if True, will change the test from the
3183 is_degraded attribute (which represents overall non-ok status for
3184 the device(s)) to the ldisk (representing the local storage status).
3187 lu.cfg.SetDiskID(dev, node)
3191 if on_primary or dev.AssembleOnSecondary():
3192 rstats = lu.rpc.call_blockdev_find(node, dev)
3193 msg = rstats.fail_msg
3195 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3197 elif not rstats.payload:
3198 lu.LogWarning("Can't find disk on node %s", node)
3202 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3204 result = result and not rstats.payload.is_degraded
3207 for child in dev.children:
3208 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3213 class LUOobCommand(NoHooksLU):
3214 """Logical unit for OOB handling.
3219 def CheckPrereq(self):
3220 """Check prerequisites.
3223 - the node exists in the configuration
3226 Any errors are signaled by raising errors.OpPrereqError.
3230 for node_name in self.op.node_names:
3231 node = self.cfg.GetNodeInfo(node_name)
3234 raise errors.OpPrereqError("Node %s not found" % node_name,
3237 self.nodes.append(node)
3239 if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3240 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3241 " not marked offline") % node_name,
3244 def ExpandNames(self):
3245 """Gather locks we need.
3248 if self.op.node_names:
3249 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3250 for name in self.op.node_names]
3252 self.op.node_names = self.cfg.GetNodeList()
3254 self.needed_locks = {
3255 locking.LEVEL_NODE: self.op.node_names,
3258 def Exec(self, feedback_fn):
3259 """Execute OOB and return result if we expect any.
3262 master_node = self.cfg.GetMasterNode()
3265 for node in self.nodes:
3266 node_entry = [(constants.RS_NORMAL, node.name)]
3267 ret.append(node_entry)
3269 oob_program = _SupportsOob(self.cfg, node)
3272 node_entry.append((constants.RS_UNAVAIL, None))
3275 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3276 self.op.command, oob_program, node.name)
3277 result = self.rpc.call_run_oob(master_node, oob_program,
3278 self.op.command, node.name,
3282 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3283 node.name, result.fail_msg)
3284 node_entry.append((constants.RS_NODATA, None))
3287 self._CheckPayload(result)
3288 except errors.OpExecError, err:
3289 self.LogWarning("The payload returned by '%s' is not valid: %s",
3291 node_entry.append((constants.RS_NODATA, None))
3293 if self.op.command == constants.OOB_HEALTH:
3294 # For health we should log important events
3295 for item, status in result.payload:
3296 if status in [constants.OOB_STATUS_WARNING,
3297 constants.OOB_STATUS_CRITICAL]:
3298 self.LogWarning("On node '%s' item '%s' has status '%s'",
3299 node.name, item, status)
3301 if self.op.command == constants.OOB_POWER_ON:
3303 elif self.op.command == constants.OOB_POWER_OFF:
3304 node.powered = False
3305 elif self.op.command == constants.OOB_POWER_STATUS:
3306 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3307 if powered != node.powered:
3308 logging.warning(("Recorded power state (%s) of node '%s' does not"
3309 " match actual power state (%s)"), node.powered,
3312 # For configuration changing commands we should update the node
3313 if self.op.command in (constants.OOB_POWER_ON,
3314 constants.OOB_POWER_OFF):
3315 self.cfg.Update(node, feedback_fn)
3317 node_entry.append((constants.RS_NORMAL, result.payload))
3321 def _CheckPayload(self, result):
3322 """Checks if the payload is valid.
3324 @param result: RPC result
3325 @raises errors.OpExecError: If payload is not valid
3329 if self.op.command == constants.OOB_HEALTH:
3330 if not isinstance(result.payload, list):
3331 errs.append("command 'health' is expected to return a list but got %s" %
3332 type(result.payload))
3334 for item, status in result.payload:
3335 if status not in constants.OOB_STATUSES:
3336 errs.append("health item '%s' has invalid status '%s'" %
3339 if self.op.command == constants.OOB_POWER_STATUS:
3340 if not isinstance(result.payload, dict):
3341 errs.append("power-status is expected to return a dict but got %s" %
3342 type(result.payload))
3344 if self.op.command in [
3345 constants.OOB_POWER_ON,
3346 constants.OOB_POWER_OFF,
3347 constants.OOB_POWER_CYCLE,
3349 if result.payload is not None:
3350 errs.append("%s is expected to not return payload but got '%s'" %
3351 (self.op.command, result.payload))
3354 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3355 utils.CommaJoin(errs))
3359 class LUOsDiagnose(NoHooksLU):
3360 """Logical unit for OS diagnose/query.
3365 _BLK = "blacklisted"
3367 _FIELDS_STATIC = utils.FieldSet()
3368 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3369 "parameters", "api_versions", _HID, _BLK)
3371 def CheckArguments(self):
3373 raise errors.OpPrereqError("Selective OS query not supported",
3376 _CheckOutputFields(static=self._FIELDS_STATIC,
3377 dynamic=self._FIELDS_DYNAMIC,
3378 selected=self.op.output_fields)
3380 def ExpandNames(self):
3381 # Lock all nodes, in shared mode
3382 # Temporary removal of locks, should be reverted later
3383 # TODO: reintroduce locks when they are lighter-weight
3384 self.needed_locks = {}
3385 #self.share_locks[locking.LEVEL_NODE] = 1
3386 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3389 def _DiagnoseByOS(rlist):
3390 """Remaps a per-node return list into an a per-os per-node dictionary
3392 @param rlist: a map with node names as keys and OS objects as values
3395 @return: a dictionary with osnames as keys and as value another
3396 map, with nodes as keys and tuples of (path, status, diagnose,
3397 variants, parameters, api_versions) as values, eg::
3399 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3400 (/srv/..., False, "invalid api")],
3401 "node2": [(/srv/..., True, "", [], [])]}
3406 # we build here the list of nodes that didn't fail the RPC (at RPC
3407 # level), so that nodes with a non-responding node daemon don't
3408 # make all OSes invalid
3409 good_nodes = [node_name for node_name in rlist
3410 if not rlist[node_name].fail_msg]
3411 for node_name, nr in rlist.items():
3412 if nr.fail_msg or not nr.payload:
3414 for (name, path, status, diagnose, variants,
3415 params, api_versions) in nr.payload:
3416 if name not in all_os:
3417 # build a list of nodes for this os containing empty lists
3418 # for each node in node_list
3420 for nname in good_nodes:
3421 all_os[name][nname] = []
3422 # convert params from [name, help] to (name, help)
3423 params = [tuple(v) for v in params]
3424 all_os[name][node_name].append((path, status, diagnose,
3425 variants, params, api_versions))
3428 def Exec(self, feedback_fn):
3429 """Compute the list of OSes.
3432 valid_nodes = [node.name
3433 for node in self.cfg.GetAllNodesInfo().values()
3434 if not node.offline and node.vm_capable]
3435 node_data = self.rpc.call_os_diagnose(valid_nodes)
3436 pol = self._DiagnoseByOS(node_data)
3438 cluster = self.cfg.GetClusterInfo()
3440 for os_name in utils.NiceSort(pol.keys()):
3441 os_data = pol[os_name]
3444 (variants, params, api_versions) = null_state = (set(), set(), set())
3445 for idx, osl in enumerate(os_data.values()):
3446 valid = bool(valid and osl and osl[0][1])
3448 (variants, params, api_versions) = null_state
3450 node_variants, node_params, node_api = osl[0][3:6]
3451 if idx == 0: # first entry
3452 variants = set(node_variants)
3453 params = set(node_params)
3454 api_versions = set(node_api)
3455 else: # keep consistency
3456 variants.intersection_update(node_variants)
3457 params.intersection_update(node_params)
3458 api_versions.intersection_update(node_api)
3460 is_hid = os_name in cluster.hidden_os
3461 is_blk = os_name in cluster.blacklisted_os
3462 if ((self._HID not in self.op.output_fields and is_hid) or
3463 (self._BLK not in self.op.output_fields and is_blk) or
3464 (self._VLD not in self.op.output_fields and not valid)):
3467 for field in self.op.output_fields:
3470 elif field == self._VLD:
3472 elif field == "node_status":
3473 # this is just a copy of the dict
3475 for node_name, nos_list in os_data.items():
3476 val[node_name] = nos_list
3477 elif field == "variants":
3478 val = utils.NiceSort(list(variants))
3479 elif field == "parameters":
3481 elif field == "api_versions":
3482 val = list(api_versions)
3483 elif field == self._HID:
3485 elif field == self._BLK:
3488 raise errors.ParameterError(field)
3495 class LUNodeRemove(LogicalUnit):
3496 """Logical unit for removing a node.
3499 HPATH = "node-remove"
3500 HTYPE = constants.HTYPE_NODE
3502 def BuildHooksEnv(self):
3505 This doesn't run on the target node in the pre phase as a failed
3506 node would then be impossible to remove.
3510 "OP_TARGET": self.op.node_name,
3511 "NODE_NAME": self.op.node_name,
3513 all_nodes = self.cfg.GetNodeList()
3515 all_nodes.remove(self.op.node_name)
3517 logging.warning("Node %s which is about to be removed not found"
3518 " in the all nodes list", self.op.node_name)
3519 return env, all_nodes, all_nodes
3521 def CheckPrereq(self):
3522 """Check prerequisites.
3525 - the node exists in the configuration
3526 - it does not have primary or secondary instances
3527 - it's not the master
3529 Any errors are signaled by raising errors.OpPrereqError.
3532 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3533 node = self.cfg.GetNodeInfo(self.op.node_name)
3534 assert node is not None
3536 instance_list = self.cfg.GetInstanceList()
3538 masternode = self.cfg.GetMasterNode()
3539 if node.name == masternode:
3540 raise errors.OpPrereqError("Node is the master node,"
3541 " you need to failover first.",
3544 for instance_name in instance_list:
3545 instance = self.cfg.GetInstanceInfo(instance_name)
3546 if node.name in instance.all_nodes:
3547 raise errors.OpPrereqError("Instance %s is still running on the node,"
3548 " please remove first." % instance_name,
3550 self.op.node_name = node.name
3553 def Exec(self, feedback_fn):
3554 """Removes the node from the cluster.
3558 logging.info("Stopping the node daemon and removing configs from node %s",
3561 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3563 # Promote nodes to master candidate as needed
3564 _AdjustCandidatePool(self, exceptions=[node.name])
3565 self.context.RemoveNode(node.name)
3567 # Run post hooks on the node before it's removed
3568 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3570 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3572 # pylint: disable-msg=W0702
3573 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3575 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3576 msg = result.fail_msg
3578 self.LogWarning("Errors encountered on the remote node while leaving"
3579 " the cluster: %s", msg)
3581 # Remove node from our /etc/hosts
3582 if self.cfg.GetClusterInfo().modify_etc_hosts:
3583 master_node = self.cfg.GetMasterNode()
3584 result = self.rpc.call_etc_hosts_modify(master_node,
3585 constants.ETC_HOSTS_REMOVE,
3587 result.Raise("Can't update hosts file with new host data")
3588 _RedistributeAncillaryFiles(self)
3591 class _NodeQuery(_QueryBase):
3592 FIELDS = query.NODE_FIELDS
3594 def ExpandNames(self, lu):
3595 lu.needed_locks = {}
3596 lu.share_locks[locking.LEVEL_NODE] = 1
3599 self.wanted = _GetWantedNodes(lu, self.names)
3601 self.wanted = locking.ALL_SET
3603 self.do_locking = (self.use_locking and
3604 query.NQ_LIVE in self.requested_data)
3607 # if we don't request only static fields, we need to lock the nodes
3608 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3610 def DeclareLocks(self, lu, level):
3613 def _GetQueryData(self, lu):
3614 """Computes the list of nodes and their attributes.
3617 all_info = lu.cfg.GetAllNodesInfo()
3619 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3621 # Gather data as requested
3622 if query.NQ_LIVE in self.requested_data:
3623 node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3624 lu.cfg.GetHypervisorType())
3625 live_data = dict((name, nresult.payload)
3626 for (name, nresult) in node_data.items()
3627 if not nresult.fail_msg and nresult.payload)
3631 if query.NQ_INST in self.requested_data:
3632 node_to_primary = dict([(name, set()) for name in nodenames])
3633 node_to_secondary = dict([(name, set()) for name in nodenames])
3635 inst_data = lu.cfg.GetAllInstancesInfo()
3637 for inst in inst_data.values():
3638 if inst.primary_node in node_to_primary:
3639 node_to_primary[inst.primary_node].add(inst.name)
3640 for secnode in inst.secondary_nodes:
3641 if secnode in node_to_secondary:
3642 node_to_secondary[secnode].add(inst.name)
3644 node_to_primary = None
3645 node_to_secondary = None
3647 if query.NQ_OOB in self.requested_data:
3648 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3649 for name, node in all_info.iteritems())
3653 if query.NQ_GROUP in self.requested_data:
3654 groups = lu.cfg.GetAllNodeGroupsInfo()
3658 return query.NodeQueryData([all_info[name] for name in nodenames],
3659 live_data, lu.cfg.GetMasterNode(),
3660 node_to_primary, node_to_secondary, groups,
3661 oob_support, lu.cfg.GetClusterInfo())
3664 class LUNodeQuery(NoHooksLU):
3665 """Logical unit for querying nodes.
3668 # pylint: disable-msg=W0142
3671 def CheckArguments(self):
3672 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3673 self.op.use_locking)
3675 def ExpandNames(self):
3676 self.nq.ExpandNames(self)
3678 def Exec(self, feedback_fn):
3679 return self.nq.OldStyleQuery(self)
3682 class LUNodeQueryvols(NoHooksLU):
3683 """Logical unit for getting volumes on node(s).
3687 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3688 _FIELDS_STATIC = utils.FieldSet("node")
3690 def CheckArguments(self):
3691 _CheckOutputFields(static=self._FIELDS_STATIC,
3692 dynamic=self._FIELDS_DYNAMIC,
3693 selected=self.op.output_fields)
3695 def ExpandNames(self):
3696 self.needed_locks = {}
3697 self.share_locks[locking.LEVEL_NODE] = 1
3698 if not self.op.nodes:
3699 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3701 self.needed_locks[locking.LEVEL_NODE] = \
3702 _GetWantedNodes(self, self.op.nodes)
3704 def Exec(self, feedback_fn):
3705 """Computes the list of nodes and their attributes.
3708 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3709 volumes = self.rpc.call_node_volumes(nodenames)
3711 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3712 in self.cfg.GetInstanceList()]
3714 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3717 for node in nodenames:
3718 nresult = volumes[node]
3721 msg = nresult.fail_msg
3723 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3726 node_vols = nresult.payload[:]
3727 node_vols.sort(key=lambda vol: vol['dev'])
3729 for vol in node_vols:
3731 for field in self.op.output_fields:
3734 elif field == "phys":
3738 elif field == "name":
3740 elif field == "size":
3741 val = int(float(vol['size']))
3742 elif field == "instance":
3744 if node not in lv_by_node[inst]:
3746 if vol['name'] in lv_by_node[inst][node]:
3752 raise errors.ParameterError(field)
3753 node_output.append(str(val))
3755 output.append(node_output)
3760 class LUNodeQueryStorage(NoHooksLU):
3761 """Logical unit for getting information on storage units on node(s).
3764 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3767 def CheckArguments(self):
3768 _CheckOutputFields(static=self._FIELDS_STATIC,
3769 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3770 selected=self.op.output_fields)
3772 def ExpandNames(self):
3773 self.needed_locks = {}
3774 self.share_locks[locking.LEVEL_NODE] = 1
3777 self.needed_locks[locking.LEVEL_NODE] = \
3778 _GetWantedNodes(self, self.op.nodes)
3780 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3782 def Exec(self, feedback_fn):
3783 """Computes the list of nodes and their attributes.
3786 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3788 # Always get name to sort by
3789 if constants.SF_NAME in self.op.output_fields:
3790 fields = self.op.output_fields[:]
3792 fields = [constants.SF_NAME] + self.op.output_fields
3794 # Never ask for node or type as it's only known to the LU
3795 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3796 while extra in fields:
3797 fields.remove(extra)
3799 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3800 name_idx = field_idx[constants.SF_NAME]
3802 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3803 data = self.rpc.call_storage_list(self.nodes,
3804 self.op.storage_type, st_args,
3805 self.op.name, fields)
3809 for node in utils.NiceSort(self.nodes):
3810 nresult = data[node]
3814 msg = nresult.fail_msg
3816 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3819 rows = dict([(row[name_idx], row) for row in nresult.payload])
3821 for name in utils.NiceSort(rows.keys()):
3826 for field in self.op.output_fields:
3827 if field == constants.SF_NODE:
3829 elif field == constants.SF_TYPE:
3830 val = self.op.storage_type
3831 elif field in field_idx:
3832 val = row[field_idx[field]]
3834 raise errors.ParameterError(field)
3843 class _InstanceQuery(_QueryBase):
3844 FIELDS = query.INSTANCE_FIELDS
3846 def ExpandNames(self, lu):
3847 lu.needed_locks = {}
3848 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3849 lu.share_locks[locking.LEVEL_NODE] = 1
3852 self.wanted = _GetWantedInstances(lu, self.names)
3854 self.wanted = locking.ALL_SET
3856 self.do_locking = (self.use_locking and
3857 query.IQ_LIVE in self.requested_data)
3859 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3860 lu.needed_locks[locking.LEVEL_NODE] = []
3861 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3863 def DeclareLocks(self, lu, level):
3864 if level == locking.LEVEL_NODE and self.do_locking:
3865 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3867 def _GetQueryData(self, lu):
3868 """Computes the list of instances and their attributes.
3871 all_info = lu.cfg.GetAllInstancesInfo()
3873 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3875 instance_list = [all_info[name] for name in instance_names]
3876 nodes = frozenset(itertools.chain(*(inst.all_nodes
3877 for inst in instance_list)))
3878 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3881 wrongnode_inst = set()
3883 # Gather data as requested
3884 if query.IQ_LIVE in self.requested_data:
3886 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3888 result = node_data[name]
3890 # offline nodes will be in both lists
3891 assert result.fail_msg
3892 offline_nodes.append(name)
3894 bad_nodes.append(name)
3895 elif result.payload:
3896 for inst in result.payload:
3897 if all_info[inst].primary_node == name:
3898 live_data.update(result.payload)
3900 wrongnode_inst.add(inst)
3901 # else no instance is alive
3905 if query.IQ_DISKUSAGE in self.requested_data:
3906 disk_usage = dict((inst.name,
3907 _ComputeDiskSize(inst.disk_template,
3908 [{"size": disk.size}
3909 for disk in inst.disks]))
3910 for inst in instance_list)
3914 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3915 disk_usage, offline_nodes, bad_nodes,
3916 live_data, wrongnode_inst)
3919 class LUQuery(NoHooksLU):
3920 """Query for resources/items of a certain kind.
3923 # pylint: disable-msg=W0142
3926 def CheckArguments(self):
3927 qcls = _GetQueryImplementation(self.op.what)
3928 names = qlang.ReadSimpleFilter("name", self.op.filter)
3930 self.impl = qcls(names, self.op.fields, False)
3932 def ExpandNames(self):
3933 self.impl.ExpandNames(self)
3935 def DeclareLocks(self, level):
3936 self.impl.DeclareLocks(self, level)
3938 def Exec(self, feedback_fn):
3939 return self.impl.NewStyleQuery(self)
3942 class LUQueryFields(NoHooksLU):
3943 """Query for resources/items of a certain kind.
3946 # pylint: disable-msg=W0142
3949 def CheckArguments(self):
3950 self.qcls = _GetQueryImplementation(self.op.what)
3952 def ExpandNames(self):
3953 self.needed_locks = {}
3955 def Exec(self, feedback_fn):
3956 return self.qcls.FieldsQuery(self.op.fields)
3959 class LUNodeModifyStorage(NoHooksLU):
3960 """Logical unit for modifying a storage volume on a node.
3965 def CheckArguments(self):
3966 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3968 storage_type = self.op.storage_type
3971 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3973 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3974 " modified" % storage_type,
3977 diff = set(self.op.changes.keys()) - modifiable
3979 raise errors.OpPrereqError("The following fields can not be modified for"
3980 " storage units of type '%s': %r" %
3981 (storage_type, list(diff)),
3984 def ExpandNames(self):
3985 self.needed_locks = {
3986 locking.LEVEL_NODE: self.op.node_name,
3989 def Exec(self, feedback_fn):
3990 """Computes the list of nodes and their attributes.
3993 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3994 result = self.rpc.call_storage_modify(self.op.node_name,
3995 self.op.storage_type, st_args,
3996 self.op.name, self.op.changes)
3997 result.Raise("Failed to modify storage unit '%s' on %s" %
3998 (self.op.name, self.op.node_name))
4001 class LUNodeAdd(LogicalUnit):
4002 """Logical unit for adding node to the cluster.
4006 HTYPE = constants.HTYPE_NODE
4007 _NFLAGS = ["master_capable", "vm_capable"]
4009 def CheckArguments(self):
4010 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4011 # validate/normalize the node name
4012 self.hostname = netutils.GetHostname(name=self.op.node_name,
4013 family=self.primary_ip_family)
4014 self.op.node_name = self.hostname.name
4015 if self.op.readd and self.op.group:
4016 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4017 " being readded", errors.ECODE_INVAL)
4019 def BuildHooksEnv(self):
4022 This will run on all nodes before, and on all nodes + the new node after.
4026 "OP_TARGET": self.op.node_name,
4027 "NODE_NAME": self.op.node_name,
4028 "NODE_PIP": self.op.primary_ip,
4029 "NODE_SIP": self.op.secondary_ip,
4030 "MASTER_CAPABLE": str(self.op.master_capable),
4031 "VM_CAPABLE": str(self.op.vm_capable),
4033 nodes_0 = self.cfg.GetNodeList()
4034 nodes_1 = nodes_0 + [self.op.node_name, ]
4035 return env, nodes_0, nodes_1
4037 def CheckPrereq(self):
4038 """Check prerequisites.
4041 - the new node is not already in the config
4043 - its parameters (single/dual homed) matches the cluster
4045 Any errors are signaled by raising errors.OpPrereqError.
4049 hostname = self.hostname
4050 node = hostname.name
4051 primary_ip = self.op.primary_ip = hostname.ip
4052 if self.op.secondary_ip is None:
4053 if self.primary_ip_family == netutils.IP6Address.family:
4054 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4055 " IPv4 address must be given as secondary",
4057 self.op.secondary_ip = primary_ip
4059 secondary_ip = self.op.secondary_ip
4060 if not netutils.IP4Address.IsValid(secondary_ip):
4061 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4062 " address" % secondary_ip, errors.ECODE_INVAL)
4064 node_list = cfg.GetNodeList()
4065 if not self.op.readd and node in node_list:
4066 raise errors.OpPrereqError("Node %s is already in the configuration" %
4067 node, errors.ECODE_EXISTS)
4068 elif self.op.readd and node not in node_list:
4069 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4072 self.changed_primary_ip = False
4074 for existing_node_name in node_list:
4075 existing_node = cfg.GetNodeInfo(existing_node_name)
4077 if self.op.readd and node == existing_node_name:
4078 if existing_node.secondary_ip != secondary_ip:
4079 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4080 " address configuration as before",
4082 if existing_node.primary_ip != primary_ip:
4083 self.changed_primary_ip = True
4087 if (existing_node.primary_ip == primary_ip or
4088 existing_node.secondary_ip == primary_ip or
4089 existing_node.primary_ip == secondary_ip or
4090 existing_node.secondary_ip == secondary_ip):
4091 raise errors.OpPrereqError("New node ip address(es) conflict with"
4092 " existing node %s" % existing_node.name,
4093 errors.ECODE_NOTUNIQUE)
4095 # After this 'if' block, None is no longer a valid value for the
4096 # _capable op attributes
4098 old_node = self.cfg.GetNodeInfo(node)
4099 assert old_node is not None, "Can't retrieve locked node %s" % node
4100 for attr in self._NFLAGS:
4101 if getattr(self.op, attr) is None:
4102 setattr(self.op, attr, getattr(old_node, attr))
4104 for attr in self._NFLAGS:
4105 if getattr(self.op, attr) is None:
4106 setattr(self.op, attr, True)
4108 if self.op.readd and not self.op.vm_capable:
4109 pri, sec = cfg.GetNodeInstances(node)
4111 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4112 " flag set to false, but it already holds"
4113 " instances" % node,
4116 # check that the type of the node (single versus dual homed) is the
4117 # same as for the master
4118 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4119 master_singlehomed = myself.secondary_ip == myself.primary_ip
4120 newbie_singlehomed = secondary_ip == primary_ip
4121 if master_singlehomed != newbie_singlehomed:
4122 if master_singlehomed:
4123 raise errors.OpPrereqError("The master has no secondary ip but the"
4124 " new node has one",
4127 raise errors.OpPrereqError("The master has a secondary ip but the"
4128 " new node doesn't have one",
4131 # checks reachability
4132 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4133 raise errors.OpPrereqError("Node not reachable by ping",
4134 errors.ECODE_ENVIRON)
4136 if not newbie_singlehomed:
4137 # check reachability from my secondary ip to newbie's secondary ip
4138 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4139 source=myself.secondary_ip):
4140 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4141 " based ping to node daemon port",
4142 errors.ECODE_ENVIRON)
4149 if self.op.master_capable:
4150 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4152 self.master_candidate = False
4155 self.new_node = old_node
4157 node_group = cfg.LookupNodeGroup(self.op.group)
4158 self.new_node = objects.Node(name=node,
4159 primary_ip=primary_ip,
4160 secondary_ip=secondary_ip,
4161 master_candidate=self.master_candidate,
4162 offline=False, drained=False,
4165 if self.op.ndparams:
4166 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4168 def Exec(self, feedback_fn):
4169 """Adds the new node to the cluster.
4172 new_node = self.new_node
4173 node = new_node.name
4175 # We adding a new node so we assume it's powered
4176 new_node.powered = True
4178 # for re-adds, reset the offline/drained/master-candidate flags;
4179 # we need to reset here, otherwise offline would prevent RPC calls
4180 # later in the procedure; this also means that if the re-add
4181 # fails, we are left with a non-offlined, broken node
4183 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4184 self.LogInfo("Readding a node, the offline/drained flags were reset")
4185 # if we demote the node, we do cleanup later in the procedure
4186 new_node.master_candidate = self.master_candidate
4187 if self.changed_primary_ip:
4188 new_node.primary_ip = self.op.primary_ip
4190 # copy the master/vm_capable flags
4191 for attr in self._NFLAGS:
4192 setattr(new_node, attr, getattr(self.op, attr))
4194 # notify the user about any possible mc promotion
4195 if new_node.master_candidate:
4196 self.LogInfo("Node will be a master candidate")
4198 if self.op.ndparams:
4199 new_node.ndparams = self.op.ndparams
4201 new_node.ndparams = {}
4203 # check connectivity
4204 result = self.rpc.call_version([node])[node]
4205 result.Raise("Can't get version information from node %s" % node)
4206 if constants.PROTOCOL_VERSION == result.payload:
4207 logging.info("Communication to node %s fine, sw version %s match",
4208 node, result.payload)
4210 raise errors.OpExecError("Version mismatch master version %s,"
4211 " node version %s" %
4212 (constants.PROTOCOL_VERSION, result.payload))
4214 # Add node to our /etc/hosts, and add key to known_hosts
4215 if self.cfg.GetClusterInfo().modify_etc_hosts:
4216 master_node = self.cfg.GetMasterNode()
4217 result = self.rpc.call_etc_hosts_modify(master_node,
4218 constants.ETC_HOSTS_ADD,
4221 result.Raise("Can't update hosts file with new host data")
4223 if new_node.secondary_ip != new_node.primary_ip:
4224 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4227 node_verify_list = [self.cfg.GetMasterNode()]
4228 node_verify_param = {
4229 constants.NV_NODELIST: [node],
4230 # TODO: do a node-net-test as well?
4233 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4234 self.cfg.GetClusterName())
4235 for verifier in node_verify_list:
4236 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4237 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4239 for failed in nl_payload:
4240 feedback_fn("ssh/hostname verification failed"
4241 " (checking from %s): %s" %
4242 (verifier, nl_payload[failed]))
4243 raise errors.OpExecError("ssh/hostname verification failed.")
4246 _RedistributeAncillaryFiles(self)
4247 self.context.ReaddNode(new_node)
4248 # make sure we redistribute the config
4249 self.cfg.Update(new_node, feedback_fn)
4250 # and make sure the new node will not have old files around
4251 if not new_node.master_candidate:
4252 result = self.rpc.call_node_demote_from_mc(new_node.name)
4253 msg = result.fail_msg
4255 self.LogWarning("Node failed to demote itself from master"
4256 " candidate status: %s" % msg)
4258 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4259 additional_vm=self.op.vm_capable)
4260 self.context.AddNode(new_node, self.proc.GetECId())
4263 class LUNodeSetParams(LogicalUnit):
4264 """Modifies the parameters of a node.
4266 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4267 to the node role (as _ROLE_*)
4268 @cvar _R2F: a dictionary from node role to tuples of flags
4269 @cvar _FLAGS: a list of attribute names corresponding to the flags
4272 HPATH = "node-modify"
4273 HTYPE = constants.HTYPE_NODE
4275 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4277 (True, False, False): _ROLE_CANDIDATE,
4278 (False, True, False): _ROLE_DRAINED,
4279 (False, False, True): _ROLE_OFFLINE,
4280 (False, False, False): _ROLE_REGULAR,
4282 _R2F = dict((v, k) for k, v in _F2R.items())
4283 _FLAGS = ["master_candidate", "drained", "offline"]
4285 def CheckArguments(self):
4286 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4287 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4288 self.op.master_capable, self.op.vm_capable,
4289 self.op.secondary_ip, self.op.ndparams]
4290 if all_mods.count(None) == len(all_mods):
4291 raise errors.OpPrereqError("Please pass at least one modification",
4293 if all_mods.count(True) > 1:
4294 raise errors.OpPrereqError("Can't set the node into more than one"
4295 " state at the same time",
4298 # Boolean value that tells us whether we might be demoting from MC
4299 self.might_demote = (self.op.master_candidate == False or
4300 self.op.offline == True or
4301 self.op.drained == True or
4302 self.op.master_capable == False)
4304 if self.op.secondary_ip:
4305 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4306 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4307 " address" % self.op.secondary_ip,
4310 self.lock_all = self.op.auto_promote and self.might_demote
4311 self.lock_instances = self.op.secondary_ip is not None
4313 def ExpandNames(self):
4315 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4317 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4319 if self.lock_instances:
4320 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4322 def DeclareLocks(self, level):
4323 # If we have locked all instances, before waiting to lock nodes, release
4324 # all the ones living on nodes unrelated to the current operation.
4325 if level == locking.LEVEL_NODE and self.lock_instances:
4326 instances_release = []
4328 self.affected_instances = []
4329 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4330 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4331 instance = self.context.cfg.GetInstanceInfo(instance_name)
4332 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4333 if i_mirrored and self.op.node_name in instance.all_nodes:
4334 instances_keep.append(instance_name)
4335 self.affected_instances.append(instance)
4337 instances_release.append(instance_name)
4338 if instances_release:
4339 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4340 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4342 def BuildHooksEnv(self):
4345 This runs on the master node.
4349 "OP_TARGET": self.op.node_name,
4350 "MASTER_CANDIDATE": str(self.op.master_candidate),
4351 "OFFLINE": str(self.op.offline),
4352 "DRAINED": str(self.op.drained),
4353 "MASTER_CAPABLE": str(self.op.master_capable),
4354 "VM_CAPABLE": str(self.op.vm_capable),
4356 nl = [self.cfg.GetMasterNode(),
4360 def CheckPrereq(self):
4361 """Check prerequisites.
4363 This only checks the instance list against the existing names.
4366 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4368 if (self.op.master_candidate is not None or
4369 self.op.drained is not None or
4370 self.op.offline is not None):
4371 # we can't change the master's node flags
4372 if self.op.node_name == self.cfg.GetMasterNode():
4373 raise errors.OpPrereqError("The master role can be changed"
4374 " only via master-failover",
4377 if self.op.master_candidate and not node.master_capable:
4378 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4379 " it a master candidate" % node.name,
4382 if self.op.vm_capable == False:
4383 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4385 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4386 " the vm_capable flag" % node.name,
4389 if node.master_candidate and self.might_demote and not self.lock_all:
4390 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4391 # check if after removing the current node, we're missing master
4393 (mc_remaining, mc_should, _) = \
4394 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4395 if mc_remaining < mc_should:
4396 raise errors.OpPrereqError("Not enough master candidates, please"
4397 " pass auto promote option to allow"
4398 " promotion", errors.ECODE_STATE)
4400 self.old_flags = old_flags = (node.master_candidate,
4401 node.drained, node.offline)
4402 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4403 self.old_role = old_role = self._F2R[old_flags]
4405 # Check for ineffective changes
4406 for attr in self._FLAGS:
4407 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4408 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4409 setattr(self.op, attr, None)
4411 # Past this point, any flag change to False means a transition
4412 # away from the respective state, as only real changes are kept
4414 # TODO: We might query the real power state if it supports OOB
4415 if _SupportsOob(self.cfg, node):
4416 if self.op.offline is False and not (node.powered or
4417 self.op.powered == True):
4418 raise errors.OpPrereqError(("Please power on node %s first before you"
4419 " can reset offline state") %
4421 elif self.op.powered is not None:
4422 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4423 " which does not support out-of-band"
4424 " handling") % self.op.node_name)
4426 # If we're being deofflined/drained, we'll MC ourself if needed
4427 if (self.op.drained == False or self.op.offline == False or
4428 (self.op.master_capable and not node.master_capable)):
4429 if _DecideSelfPromotion(self):
4430 self.op.master_candidate = True
4431 self.LogInfo("Auto-promoting node to master candidate")
4433 # If we're no longer master capable, we'll demote ourselves from MC
4434 if self.op.master_capable == False and node.master_candidate:
4435 self.LogInfo("Demoting from master candidate")
4436 self.op.master_candidate = False
4439 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4440 if self.op.master_candidate:
4441 new_role = self._ROLE_CANDIDATE
4442 elif self.op.drained:
4443 new_role = self._ROLE_DRAINED
4444 elif self.op.offline:
4445 new_role = self._ROLE_OFFLINE
4446 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4447 # False is still in new flags, which means we're un-setting (the
4449 new_role = self._ROLE_REGULAR
4450 else: # no new flags, nothing, keep old role
4453 self.new_role = new_role
4455 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4456 # Trying to transition out of offline status
4457 result = self.rpc.call_version([node.name])[node.name]
4459 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4460 " to report its version: %s" %
4461 (node.name, result.fail_msg),
4464 self.LogWarning("Transitioning node from offline to online state"
4465 " without using re-add. Please make sure the node"
4468 if self.op.secondary_ip:
4469 # Ok even without locking, because this can't be changed by any LU
4470 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4471 master_singlehomed = master.secondary_ip == master.primary_ip
4472 if master_singlehomed and self.op.secondary_ip:
4473 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4474 " homed cluster", errors.ECODE_INVAL)
4477 if self.affected_instances:
4478 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4479 " node has instances (%s) configured"
4480 " to use it" % self.affected_instances)
4482 # On online nodes, check that no instances are running, and that
4483 # the node has the new ip and we can reach it.
4484 for instance in self.affected_instances:
4485 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4487 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4488 if master.name != node.name:
4489 # check reachability from master secondary ip to new secondary ip
4490 if not netutils.TcpPing(self.op.secondary_ip,
4491 constants.DEFAULT_NODED_PORT,
4492 source=master.secondary_ip):
4493 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4494 " based ping to node daemon port",
4495 errors.ECODE_ENVIRON)
4497 if self.op.ndparams:
4498 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4499 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4500 self.new_ndparams = new_ndparams
4502 def Exec(self, feedback_fn):
4507 old_role = self.old_role
4508 new_role = self.new_role
4512 if self.op.ndparams:
4513 node.ndparams = self.new_ndparams
4515 if self.op.powered is not None:
4516 node.powered = self.op.powered
4518 for attr in ["master_capable", "vm_capable"]:
4519 val = getattr(self.op, attr)
4521 setattr(node, attr, val)
4522 result.append((attr, str(val)))
4524 if new_role != old_role:
4525 # Tell the node to demote itself, if no longer MC and not offline
4526 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4527 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4529 self.LogWarning("Node failed to demote itself: %s", msg)
4531 new_flags = self._R2F[new_role]
4532 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4534 result.append((desc, str(nf)))
4535 (node.master_candidate, node.drained, node.offline) = new_flags
4537 # we locked all nodes, we adjust the CP before updating this node
4539 _AdjustCandidatePool(self, [node.name])
4541 if self.op.secondary_ip:
4542 node.secondary_ip = self.op.secondary_ip
4543 result.append(("secondary_ip", self.op.secondary_ip))
4545 # this will trigger configuration file update, if needed
4546 self.cfg.Update(node, feedback_fn)
4548 # this will trigger job queue propagation or cleanup if the mc
4550 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4551 self.context.ReaddNode(node)
4556 class LUNodePowercycle(NoHooksLU):
4557 """Powercycles a node.
4562 def CheckArguments(self):
4563 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4564 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4565 raise errors.OpPrereqError("The node is the master and the force"
4566 " parameter was not set",
4569 def ExpandNames(self):
4570 """Locking for PowercycleNode.
4572 This is a last-resort option and shouldn't block on other
4573 jobs. Therefore, we grab no locks.
4576 self.needed_locks = {}
4578 def Exec(self, feedback_fn):
4582 result = self.rpc.call_node_powercycle(self.op.node_name,
4583 self.cfg.GetHypervisorType())
4584 result.Raise("Failed to schedule the reboot")
4585 return result.payload
4588 class LUClusterQuery(NoHooksLU):
4589 """Query cluster configuration.
4594 def ExpandNames(self):
4595 self.needed_locks = {}
4597 def Exec(self, feedback_fn):
4598 """Return cluster config.
4601 cluster = self.cfg.GetClusterInfo()
4604 # Filter just for enabled hypervisors
4605 for os_name, hv_dict in cluster.os_hvp.items():
4606 os_hvp[os_name] = {}
4607 for hv_name, hv_params in hv_dict.items():
4608 if hv_name in cluster.enabled_hypervisors:
4609 os_hvp[os_name][hv_name] = hv_params
4611 # Convert ip_family to ip_version
4612 primary_ip_version = constants.IP4_VERSION
4613 if cluster.primary_ip_family == netutils.IP6Address.family:
4614 primary_ip_version = constants.IP6_VERSION
4617 "software_version": constants.RELEASE_VERSION,
4618 "protocol_version": constants.PROTOCOL_VERSION,
4619 "config_version": constants.CONFIG_VERSION,
4620 "os_api_version": max(constants.OS_API_VERSIONS),
4621 "export_version": constants.EXPORT_VERSION,
4622 "architecture": (platform.architecture()[0], platform.machine()),
4623 "name": cluster.cluster_name,
4624 "master": cluster.master_node,
4625 "default_hypervisor": cluster.enabled_hypervisors[0],
4626 "enabled_hypervisors": cluster.enabled_hypervisors,
4627 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4628 for hypervisor_name in cluster.enabled_hypervisors]),
4630 "beparams": cluster.beparams,
4631 "osparams": cluster.osparams,
4632 "nicparams": cluster.nicparams,
4633 "ndparams": cluster.ndparams,
4634 "candidate_pool_size": cluster.candidate_pool_size,
4635 "master_netdev": cluster.master_netdev,
4636 "volume_group_name": cluster.volume_group_name,
4637 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4638 "file_storage_dir": cluster.file_storage_dir,
4639 "maintain_node_health": cluster.maintain_node_health,
4640 "ctime": cluster.ctime,
4641 "mtime": cluster.mtime,
4642 "uuid": cluster.uuid,
4643 "tags": list(cluster.GetTags()),
4644 "uid_pool": cluster.uid_pool,
4645 "default_iallocator": cluster.default_iallocator,
4646 "reserved_lvs": cluster.reserved_lvs,
4647 "primary_ip_version": primary_ip_version,
4648 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4649 "hidden_os": cluster.hidden_os,
4650 "blacklisted_os": cluster.blacklisted_os,
4656 class LUClusterConfigQuery(NoHooksLU):
4657 """Return configuration values.
4661 _FIELDS_DYNAMIC = utils.FieldSet()
4662 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4663 "watcher_pause", "volume_group_name")
4665 def CheckArguments(self):
4666 _CheckOutputFields(static=self._FIELDS_STATIC,
4667 dynamic=self._FIELDS_DYNAMIC,
4668 selected=self.op.output_fields)
4670 def ExpandNames(self):
4671 self.needed_locks = {}
4673 def Exec(self, feedback_fn):
4674 """Dump a representation of the cluster config to the standard output.
4678 for field in self.op.output_fields:
4679 if field == "cluster_name":
4680 entry = self.cfg.GetClusterName()
4681 elif field == "master_node":
4682 entry = self.cfg.GetMasterNode()
4683 elif field == "drain_flag":
4684 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4685 elif field == "watcher_pause":
4686 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4687 elif field == "volume_group_name":
4688 entry = self.cfg.GetVGName()
4690 raise errors.ParameterError(field)
4691 values.append(entry)
4695 class LUInstanceActivateDisks(NoHooksLU):
4696 """Bring up an instance's disks.
4701 def ExpandNames(self):
4702 self._ExpandAndLockInstance()
4703 self.needed_locks[locking.LEVEL_NODE] = []
4704 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4706 def DeclareLocks(self, level):
4707 if level == locking.LEVEL_NODE:
4708 self._LockInstancesNodes()
4710 def CheckPrereq(self):
4711 """Check prerequisites.
4713 This checks that the instance is in the cluster.
4716 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4717 assert self.instance is not None, \
4718 "Cannot retrieve locked instance %s" % self.op.instance_name
4719 _CheckNodeOnline(self, self.instance.primary_node)
4721 def Exec(self, feedback_fn):
4722 """Activate the disks.
4725 disks_ok, disks_info = \
4726 _AssembleInstanceDisks(self, self.instance,
4727 ignore_size=self.op.ignore_size)
4729 raise errors.OpExecError("Cannot activate block devices")
4734 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4736 """Prepare the block devices for an instance.
4738 This sets up the block devices on all nodes.
4740 @type lu: L{LogicalUnit}
4741 @param lu: the logical unit on whose behalf we execute
4742 @type instance: L{objects.Instance}
4743 @param instance: the instance for whose disks we assemble
4744 @type disks: list of L{objects.Disk} or None
4745 @param disks: which disks to assemble (or all, if None)
4746 @type ignore_secondaries: boolean
4747 @param ignore_secondaries: if true, errors on secondary nodes
4748 won't result in an error return from the function
4749 @type ignore_size: boolean
4750 @param ignore_size: if true, the current known size of the disk
4751 will not be used during the disk activation, useful for cases
4752 when the size is wrong
4753 @return: False if the operation failed, otherwise a list of
4754 (host, instance_visible_name, node_visible_name)
4755 with the mapping from node devices to instance devices
4760 iname = instance.name
4761 disks = _ExpandCheckDisks(instance, disks)
4763 # With the two passes mechanism we try to reduce the window of
4764 # opportunity for the race condition of switching DRBD to primary
4765 # before handshaking occured, but we do not eliminate it
4767 # The proper fix would be to wait (with some limits) until the
4768 # connection has been made and drbd transitions from WFConnection
4769 # into any other network-connected state (Connected, SyncTarget,
4772 # 1st pass, assemble on all nodes in secondary mode
4773 for inst_disk in disks:
4774 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4776 node_disk = node_disk.Copy()
4777 node_disk.UnsetSize()
4778 lu.cfg.SetDiskID(node_disk, node)
4779 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4780 msg = result.fail_msg
4782 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4783 " (is_primary=False, pass=1): %s",
4784 inst_disk.iv_name, node, msg)
4785 if not ignore_secondaries:
4788 # FIXME: race condition on drbd migration to primary
4790 # 2nd pass, do only the primary node
4791 for inst_disk in disks:
4794 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4795 if node != instance.primary_node:
4798 node_disk = node_disk.Copy()
4799 node_disk.UnsetSize()
4800 lu.cfg.SetDiskID(node_disk, node)
4801 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4802 msg = result.fail_msg
4804 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4805 " (is_primary=True, pass=2): %s",
4806 inst_disk.iv_name, node, msg)
4809 dev_path = result.payload
4811 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4813 # leave the disks configured for the primary node
4814 # this is a workaround that would be fixed better by
4815 # improving the logical/physical id handling
4817 lu.cfg.SetDiskID(disk, instance.primary_node)
4819 return disks_ok, device_info
4822 def _StartInstanceDisks(lu, instance, force):
4823 """Start the disks of an instance.
4826 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4827 ignore_secondaries=force)
4829 _ShutdownInstanceDisks(lu, instance)
4830 if force is not None and not force:
4831 lu.proc.LogWarning("", hint="If the message above refers to a"
4833 " you can retry the operation using '--force'.")
4834 raise errors.OpExecError("Disk consistency error")
4837 class LUInstanceDeactivateDisks(NoHooksLU):
4838 """Shutdown an instance's disks.
4843 def ExpandNames(self):
4844 self._ExpandAndLockInstance()
4845 self.needed_locks[locking.LEVEL_NODE] = []
4846 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4848 def DeclareLocks(self, level):
4849 if level == locking.LEVEL_NODE:
4850 self._LockInstancesNodes()
4852 def CheckPrereq(self):
4853 """Check prerequisites.
4855 This checks that the instance is in the cluster.
4858 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4859 assert self.instance is not None, \
4860 "Cannot retrieve locked instance %s" % self.op.instance_name
4862 def Exec(self, feedback_fn):
4863 """Deactivate the disks
4866 instance = self.instance
4868 _ShutdownInstanceDisks(self, instance)
4870 _SafeShutdownInstanceDisks(self, instance)
4873 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4874 """Shutdown block devices of an instance.
4876 This function checks if an instance is running, before calling
4877 _ShutdownInstanceDisks.
4880 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4881 _ShutdownInstanceDisks(lu, instance, disks=disks)
4884 def _ExpandCheckDisks(instance, disks):
4885 """Return the instance disks selected by the disks list
4887 @type disks: list of L{objects.Disk} or None
4888 @param disks: selected disks
4889 @rtype: list of L{objects.Disk}
4890 @return: selected instance disks to act on
4894 return instance.disks
4896 if not set(disks).issubset(instance.disks):
4897 raise errors.ProgrammerError("Can only act on disks belonging to the"
4902 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4903 """Shutdown block devices of an instance.
4905 This does the shutdown on all nodes of the instance.
4907 If the ignore_primary is false, errors on the primary node are
4912 disks = _ExpandCheckDisks(instance, disks)
4915 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4916 lu.cfg.SetDiskID(top_disk, node)
4917 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4918 msg = result.fail_msg
4920 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4921 disk.iv_name, node, msg)
4922 if ((node == instance.primary_node and not ignore_primary) or
4923 (node != instance.primary_node and not result.offline)):
4928 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4929 """Checks if a node has enough free memory.
4931 This function check if a given node has the needed amount of free
4932 memory. In case the node has less memory or we cannot get the
4933 information from the node, this function raise an OpPrereqError
4936 @type lu: C{LogicalUnit}
4937 @param lu: a logical unit from which we get configuration data
4939 @param node: the node to check
4940 @type reason: C{str}
4941 @param reason: string to use in the error message
4942 @type requested: C{int}
4943 @param requested: the amount of memory in MiB to check for
4944 @type hypervisor_name: C{str}
4945 @param hypervisor_name: the hypervisor to ask for memory stats
4946 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4947 we cannot check the node
4950 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4951 nodeinfo[node].Raise("Can't get data from node %s" % node,
4952 prereq=True, ecode=errors.ECODE_ENVIRON)
4953 free_mem = nodeinfo[node].payload.get('memory_free', None)
4954 if not isinstance(free_mem, int):
4955 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4956 " was '%s'" % (node, free_mem),
4957 errors.ECODE_ENVIRON)
4958 if requested > free_mem:
4959 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4960 " needed %s MiB, available %s MiB" %
4961 (node, reason, requested, free_mem),
4965 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4966 """Checks if nodes have enough free disk space in the all VGs.
4968 This function check if all given nodes have the needed amount of
4969 free disk. In case any node has less disk or we cannot get the
4970 information from the node, this function raise an OpPrereqError
4973 @type lu: C{LogicalUnit}
4974 @param lu: a logical unit from which we get configuration data
4975 @type nodenames: C{list}
4976 @param nodenames: the list of node names to check
4977 @type req_sizes: C{dict}
4978 @param req_sizes: the hash of vg and corresponding amount of disk in
4980 @raise errors.OpPrereqError: if the node doesn't have enough disk,
4981 or we cannot check the node
4984 for vg, req_size in req_sizes.items():
4985 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4988 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4989 """Checks if nodes have enough free disk space in the specified VG.
4991 This function check if all given nodes have the needed amount of
4992 free disk. In case any node has less disk or we cannot get the
4993 information from the node, this function raise an OpPrereqError
4996 @type lu: C{LogicalUnit}
4997 @param lu: a logical unit from which we get configuration data
4998 @type nodenames: C{list}
4999 @param nodenames: the list of node names to check
5001 @param vg: the volume group to check
5002 @type requested: C{int}
5003 @param requested: the amount of disk in MiB to check for
5004 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5005 or we cannot check the node
5008 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5009 for node in nodenames:
5010 info = nodeinfo[node]
5011 info.Raise("Cannot get current information from node %s" % node,
5012 prereq=True, ecode=errors.ECODE_ENVIRON)
5013 vg_free = info.payload.get("vg_free", None)
5014 if not isinstance(vg_free, int):
5015 raise errors.OpPrereqError("Can't compute free disk space on node"
5016 " %s for vg %s, result was '%s'" %
5017 (node, vg, vg_free), errors.ECODE_ENVIRON)
5018 if requested > vg_free:
5019 raise errors.OpPrereqError("Not enough disk space on target node %s"
5020 " vg %s: required %d MiB, available %d MiB" %
5021 (node, vg, requested, vg_free),
5025 class LUInstanceStartup(LogicalUnit):
5026 """Starts an instance.
5029 HPATH = "instance-start"
5030 HTYPE = constants.HTYPE_INSTANCE
5033 def CheckArguments(self):
5035 if self.op.beparams:
5036 # fill the beparams dict
5037 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5039 def ExpandNames(self):
5040 self._ExpandAndLockInstance()
5042 def BuildHooksEnv(self):
5045 This runs on master, primary and secondary nodes of the instance.
5049 "FORCE": self.op.force,
5051 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5052 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5055 def CheckPrereq(self):
5056 """Check prerequisites.
5058 This checks that the instance is in the cluster.
5061 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5062 assert self.instance is not None, \
5063 "Cannot retrieve locked instance %s" % self.op.instance_name
5066 if self.op.hvparams:
5067 # check hypervisor parameter syntax (locally)
5068 cluster = self.cfg.GetClusterInfo()
5069 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5070 filled_hvp = cluster.FillHV(instance)
5071 filled_hvp.update(self.op.hvparams)
5072 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5073 hv_type.CheckParameterSyntax(filled_hvp)
5074 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5076 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5078 if self.primary_offline and self.op.ignore_offline_nodes:
5079 self.proc.LogWarning("Ignoring offline primary node")
5081 if self.op.hvparams or self.op.beparams:
5082 self.proc.LogWarning("Overridden parameters are ignored")
5084 _CheckNodeOnline(self, instance.primary_node)
5086 bep = self.cfg.GetClusterInfo().FillBE(instance)
5088 # check bridges existence
5089 _CheckInstanceBridgesExist(self, instance)
5091 remote_info = self.rpc.call_instance_info(instance.primary_node,
5093 instance.hypervisor)
5094 remote_info.Raise("Error checking node %s" % instance.primary_node,
5095 prereq=True, ecode=errors.ECODE_ENVIRON)
5096 if not remote_info.payload: # not running already
5097 _CheckNodeFreeMemory(self, instance.primary_node,
5098 "starting instance %s" % instance.name,
5099 bep[constants.BE_MEMORY], instance.hypervisor)
5101 def Exec(self, feedback_fn):
5102 """Start the instance.
5105 instance = self.instance
5106 force = self.op.force
5108 self.cfg.MarkInstanceUp(instance.name)
5110 if self.primary_offline:
5111 assert self.op.ignore_offline_nodes
5112 self.proc.LogInfo("Primary node offline, marked instance as started")
5114 node_current = instance.primary_node
5116 _StartInstanceDisks(self, instance, force)
5118 result = self.rpc.call_instance_start(node_current, instance,
5119 self.op.hvparams, self.op.beparams)
5120 msg = result.fail_msg
5122 _ShutdownInstanceDisks(self, instance)
5123 raise errors.OpExecError("Could not start instance: %s" % msg)
5126 class LUInstanceReboot(LogicalUnit):
5127 """Reboot an instance.
5130 HPATH = "instance-reboot"
5131 HTYPE = constants.HTYPE_INSTANCE
5134 def ExpandNames(self):
5135 self._ExpandAndLockInstance()
5137 def BuildHooksEnv(self):
5140 This runs on master, primary and secondary nodes of the instance.
5144 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5145 "REBOOT_TYPE": self.op.reboot_type,
5146 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5148 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5149 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5152 def CheckPrereq(self):
5153 """Check prerequisites.
5155 This checks that the instance is in the cluster.
5158 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5159 assert self.instance is not None, \
5160 "Cannot retrieve locked instance %s" % self.op.instance_name
5162 _CheckNodeOnline(self, instance.primary_node)
5164 # check bridges existence
5165 _CheckInstanceBridgesExist(self, instance)
5167 def Exec(self, feedback_fn):
5168 """Reboot the instance.
5171 instance = self.instance
5172 ignore_secondaries = self.op.ignore_secondaries
5173 reboot_type = self.op.reboot_type
5175 node_current = instance.primary_node
5177 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5178 constants.INSTANCE_REBOOT_HARD]:
5179 for disk in instance.disks:
5180 self.cfg.SetDiskID(disk, node_current)
5181 result = self.rpc.call_instance_reboot(node_current, instance,
5183 self.op.shutdown_timeout)
5184 result.Raise("Could not reboot instance")
5186 result = self.rpc.call_instance_shutdown(node_current, instance,
5187 self.op.shutdown_timeout)
5188 result.Raise("Could not shutdown instance for full reboot")
5189 _ShutdownInstanceDisks(self, instance)
5190 _StartInstanceDisks(self, instance, ignore_secondaries)
5191 result = self.rpc.call_instance_start(node_current, instance, None, None)
5192 msg = result.fail_msg
5194 _ShutdownInstanceDisks(self, instance)
5195 raise errors.OpExecError("Could not start instance for"
5196 " full reboot: %s" % msg)
5198 self.cfg.MarkInstanceUp(instance.name)
5201 class LUInstanceShutdown(LogicalUnit):
5202 """Shutdown an instance.
5205 HPATH = "instance-stop"
5206 HTYPE = constants.HTYPE_INSTANCE
5209 def ExpandNames(self):
5210 self._ExpandAndLockInstance()
5212 def BuildHooksEnv(self):
5215 This runs on master, primary and secondary nodes of the instance.
5218 env = _BuildInstanceHookEnvByObject(self, self.instance)
5219 env["TIMEOUT"] = self.op.timeout
5220 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5223 def CheckPrereq(self):
5224 """Check prerequisites.
5226 This checks that the instance is in the cluster.
5229 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5230 assert self.instance is not None, \
5231 "Cannot retrieve locked instance %s" % self.op.instance_name
5233 self.primary_offline = \
5234 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5236 if self.primary_offline and self.op.ignore_offline_nodes:
5237 self.proc.LogWarning("Ignoring offline primary node")
5239 _CheckNodeOnline(self, self.instance.primary_node)
5241 def Exec(self, feedback_fn):
5242 """Shutdown the instance.
5245 instance = self.instance
5246 node_current = instance.primary_node
5247 timeout = self.op.timeout
5249 self.cfg.MarkInstanceDown(instance.name)
5251 if self.primary_offline:
5252 assert self.op.ignore_offline_nodes
5253 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5255 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5256 msg = result.fail_msg
5258 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5260 _ShutdownInstanceDisks(self, instance)
5263 class LUInstanceReinstall(LogicalUnit):
5264 """Reinstall an instance.
5267 HPATH = "instance-reinstall"
5268 HTYPE = constants.HTYPE_INSTANCE
5271 def ExpandNames(self):
5272 self._ExpandAndLockInstance()
5274 def BuildHooksEnv(self):
5277 This runs on master, primary and secondary nodes of the instance.
5280 env = _BuildInstanceHookEnvByObject(self, self.instance)
5281 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5284 def CheckPrereq(self):
5285 """Check prerequisites.
5287 This checks that the instance is in the cluster and is not running.
5290 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5291 assert instance is not None, \
5292 "Cannot retrieve locked instance %s" % self.op.instance_name
5293 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5294 " offline, cannot reinstall")
5295 for node in instance.secondary_nodes:
5296 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5297 " cannot reinstall")
5299 if instance.disk_template == constants.DT_DISKLESS:
5300 raise errors.OpPrereqError("Instance '%s' has no disks" %
5301 self.op.instance_name,
5303 _CheckInstanceDown(self, instance, "cannot reinstall")
5305 if self.op.os_type is not None:
5307 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5308 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5309 instance_os = self.op.os_type
5311 instance_os = instance.os
5313 nodelist = list(instance.all_nodes)
5315 if self.op.osparams:
5316 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5317 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5318 self.os_inst = i_osdict # the new dict (without defaults)
5322 self.instance = instance
5324 def Exec(self, feedback_fn):
5325 """Reinstall the instance.
5328 inst = self.instance
5330 if self.op.os_type is not None:
5331 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5332 inst.os = self.op.os_type
5333 # Write to configuration
5334 self.cfg.Update(inst, feedback_fn)
5336 _StartInstanceDisks(self, inst, None)
5338 feedback_fn("Running the instance OS create scripts...")
5339 # FIXME: pass debug option from opcode to backend
5340 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5341 self.op.debug_level,
5342 osparams=self.os_inst)
5343 result.Raise("Could not install OS for instance %s on node %s" %
5344 (inst.name, inst.primary_node))
5346 _ShutdownInstanceDisks(self, inst)
5349 class LUInstanceRecreateDisks(LogicalUnit):
5350 """Recreate an instance's missing disks.
5353 HPATH = "instance-recreate-disks"
5354 HTYPE = constants.HTYPE_INSTANCE
5357 def ExpandNames(self):
5358 self._ExpandAndLockInstance()
5360 def BuildHooksEnv(self):
5363 This runs on master, primary and secondary nodes of the instance.
5366 env = _BuildInstanceHookEnvByObject(self, self.instance)
5367 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5370 def CheckPrereq(self):
5371 """Check prerequisites.
5373 This checks that the instance is in the cluster and is not running.
5376 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5377 assert instance is not None, \
5378 "Cannot retrieve locked instance %s" % self.op.instance_name
5379 _CheckNodeOnline(self, instance.primary_node)
5381 if instance.disk_template == constants.DT_DISKLESS:
5382 raise errors.OpPrereqError("Instance '%s' has no disks" %
5383 self.op.instance_name, errors.ECODE_INVAL)
5384 _CheckInstanceDown(self, instance, "cannot recreate disks")
5386 if not self.op.disks:
5387 self.op.disks = range(len(instance.disks))
5389 for idx in self.op.disks:
5390 if idx >= len(instance.disks):
5391 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5394 self.instance = instance
5396 def Exec(self, feedback_fn):
5397 """Recreate the disks.
5401 for idx, _ in enumerate(self.instance.disks):
5402 if idx not in self.op.disks: # disk idx has not been passed in
5406 _CreateDisks(self, self.instance, to_skip=to_skip)
5409 class LUInstanceRename(LogicalUnit):
5410 """Rename an instance.
5413 HPATH = "instance-rename"
5414 HTYPE = constants.HTYPE_INSTANCE
5416 def CheckArguments(self):
5420 if self.op.ip_check and not self.op.name_check:
5421 # TODO: make the ip check more flexible and not depend on the name check
5422 raise errors.OpPrereqError("Cannot do ip check without a name check",
5425 def BuildHooksEnv(self):
5428 This runs on master, primary and secondary nodes of the instance.
5431 env = _BuildInstanceHookEnvByObject(self, self.instance)
5432 env["INSTANCE_NEW_NAME"] = self.op.new_name
5433 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5436 def CheckPrereq(self):
5437 """Check prerequisites.
5439 This checks that the instance is in the cluster and is not running.
5442 self.op.instance_name = _ExpandInstanceName(self.cfg,
5443 self.op.instance_name)
5444 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5445 assert instance is not None
5446 _CheckNodeOnline(self, instance.primary_node)
5447 _CheckInstanceDown(self, instance, "cannot rename")
5448 self.instance = instance
5450 new_name = self.op.new_name
5451 if self.op.name_check:
5452 hostname = netutils.GetHostname(name=new_name)
5453 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5455 new_name = self.op.new_name = hostname.name
5456 if (self.op.ip_check and
5457 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5458 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5459 (hostname.ip, new_name),
5460 errors.ECODE_NOTUNIQUE)
5462 instance_list = self.cfg.GetInstanceList()
5463 if new_name in instance_list and new_name != instance.name:
5464 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5465 new_name, errors.ECODE_EXISTS)
5467 def Exec(self, feedback_fn):
5468 """Rename the instance.
5471 inst = self.instance
5472 old_name = inst.name
5474 rename_file_storage = False
5475 if (inst.disk_template == constants.DT_FILE and
5476 self.op.new_name != inst.name):
5477 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5478 rename_file_storage = True
5480 self.cfg.RenameInstance(inst.name, self.op.new_name)
5481 # Change the instance lock. This is definitely safe while we hold the BGL
5482 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5483 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5485 # re-read the instance from the configuration after rename
5486 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5488 if rename_file_storage:
5489 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5490 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5491 old_file_storage_dir,
5492 new_file_storage_dir)
5493 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5494 " (but the instance has been renamed in Ganeti)" %
5495 (inst.primary_node, old_file_storage_dir,
5496 new_file_storage_dir))
5498 _StartInstanceDisks(self, inst, None)
5500 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5501 old_name, self.op.debug_level)
5502 msg = result.fail_msg
5504 msg = ("Could not run OS rename script for instance %s on node %s"
5505 " (but the instance has been renamed in Ganeti): %s" %
5506 (inst.name, inst.primary_node, msg))
5507 self.proc.LogWarning(msg)
5509 _ShutdownInstanceDisks(self, inst)
5514 class LUInstanceRemove(LogicalUnit):
5515 """Remove an instance.
5518 HPATH = "instance-remove"
5519 HTYPE = constants.HTYPE_INSTANCE
5522 def ExpandNames(self):
5523 self._ExpandAndLockInstance()
5524 self.needed_locks[locking.LEVEL_NODE] = []
5525 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5527 def DeclareLocks(self, level):
5528 if level == locking.LEVEL_NODE:
5529 self._LockInstancesNodes()
5531 def BuildHooksEnv(self):
5534 This runs on master, primary and secondary nodes of the instance.
5537 env = _BuildInstanceHookEnvByObject(self, self.instance)
5538 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5539 nl = [self.cfg.GetMasterNode()]
5540 nl_post = list(self.instance.all_nodes) + nl
5541 return env, nl, nl_post
5543 def CheckPrereq(self):
5544 """Check prerequisites.
5546 This checks that the instance is in the cluster.
5549 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5550 assert self.instance is not None, \
5551 "Cannot retrieve locked instance %s" % self.op.instance_name
5553 def Exec(self, feedback_fn):
5554 """Remove the instance.
5557 instance = self.instance
5558 logging.info("Shutting down instance %s on node %s",
5559 instance.name, instance.primary_node)
5561 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5562 self.op.shutdown_timeout)
5563 msg = result.fail_msg
5565 if self.op.ignore_failures:
5566 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5568 raise errors.OpExecError("Could not shutdown instance %s on"
5570 (instance.name, instance.primary_node, msg))
5572 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5575 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5576 """Utility function to remove an instance.
5579 logging.info("Removing block devices for instance %s", instance.name)
5581 if not _RemoveDisks(lu, instance):
5582 if not ignore_failures:
5583 raise errors.OpExecError("Can't remove instance's disks")
5584 feedback_fn("Warning: can't remove instance's disks")
5586 logging.info("Removing instance %s out of cluster config", instance.name)
5588 lu.cfg.RemoveInstance(instance.name)
5590 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5591 "Instance lock removal conflict"
5593 # Remove lock for the instance
5594 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5597 class LUInstanceQuery(NoHooksLU):
5598 """Logical unit for querying instances.
5601 # pylint: disable-msg=W0142
5604 def CheckArguments(self):
5605 self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5606 self.op.use_locking)
5608 def ExpandNames(self):
5609 self.iq.ExpandNames(self)
5611 def DeclareLocks(self, level):
5612 self.iq.DeclareLocks(self, level)
5614 def Exec(self, feedback_fn):
5615 return self.iq.OldStyleQuery(self)
5618 class LUInstanceFailover(LogicalUnit):
5619 """Failover an instance.
5622 HPATH = "instance-failover"
5623 HTYPE = constants.HTYPE_INSTANCE
5626 def ExpandNames(self):
5627 self._ExpandAndLockInstance()
5628 self.needed_locks[locking.LEVEL_NODE] = []
5629 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5631 def DeclareLocks(self, level):
5632 if level == locking.LEVEL_NODE:
5633 self._LockInstancesNodes()
5635 def BuildHooksEnv(self):
5638 This runs on master, primary and secondary nodes of the instance.
5641 instance = self.instance
5642 source_node = instance.primary_node
5643 target_node = instance.secondary_nodes[0]
5645 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5646 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5647 "OLD_PRIMARY": source_node,
5648 "OLD_SECONDARY": target_node,
5649 "NEW_PRIMARY": target_node,
5650 "NEW_SECONDARY": source_node,
5652 env.update(_BuildInstanceHookEnvByObject(self, instance))
5653 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5655 nl_post.append(source_node)
5656 return env, nl, nl_post
5658 def CheckPrereq(self):
5659 """Check prerequisites.
5661 This checks that the instance is in the cluster.
5664 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5665 assert self.instance is not None, \
5666 "Cannot retrieve locked instance %s" % self.op.instance_name
5668 bep = self.cfg.GetClusterInfo().FillBE(instance)
5669 if instance.disk_template not in constants.DTS_NET_MIRROR:
5670 raise errors.OpPrereqError("Instance's disk layout is not"
5671 " network mirrored, cannot failover.",
5674 secondary_nodes = instance.secondary_nodes
5675 if not secondary_nodes:
5676 raise errors.ProgrammerError("no secondary node but using "
5677 "a mirrored disk template")
5679 target_node = secondary_nodes[0]
5680 _CheckNodeOnline(self, target_node)
5681 _CheckNodeNotDrained(self, target_node)
5682 if instance.admin_up:
5683 # check memory requirements on the secondary node
5684 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5685 instance.name, bep[constants.BE_MEMORY],
5686 instance.hypervisor)
5688 self.LogInfo("Not checking memory on the secondary node as"
5689 " instance will not be started")
5691 # check bridge existance
5692 _CheckInstanceBridgesExist(self, instance, node=target_node)
5694 def Exec(self, feedback_fn):
5695 """Failover an instance.
5697 The failover is done by shutting it down on its present node and
5698 starting it on the secondary.
5701 instance = self.instance
5702 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5704 source_node = instance.primary_node
5705 target_node = instance.secondary_nodes[0]
5707 if instance.admin_up:
5708 feedback_fn("* checking disk consistency between source and target")
5709 for dev in instance.disks:
5710 # for drbd, these are drbd over lvm
5711 if not _CheckDiskConsistency(self, dev, target_node, False):
5712 if not self.op.ignore_consistency:
5713 raise errors.OpExecError("Disk %s is degraded on target node,"
5714 " aborting failover." % dev.iv_name)
5716 feedback_fn("* not checking disk consistency as instance is not running")
5718 feedback_fn("* shutting down instance on source node")
5719 logging.info("Shutting down instance %s on node %s",
5720 instance.name, source_node)
5722 result = self.rpc.call_instance_shutdown(source_node, instance,
5723 self.op.shutdown_timeout)
5724 msg = result.fail_msg
5726 if self.op.ignore_consistency or primary_node.offline:
5727 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5728 " Proceeding anyway. Please make sure node"
5729 " %s is down. Error details: %s",
5730 instance.name, source_node, source_node, msg)
5732 raise errors.OpExecError("Could not shutdown instance %s on"
5734 (instance.name, source_node, msg))
5736 feedback_fn("* deactivating the instance's disks on source node")
5737 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5738 raise errors.OpExecError("Can't shut down the instance's disks.")
5740 instance.primary_node = target_node
5741 # distribute new instance config to the other nodes
5742 self.cfg.Update(instance, feedback_fn)
5744 # Only start the instance if it's marked as up
5745 if instance.admin_up:
5746 feedback_fn("* activating the instance's disks on target node")
5747 logging.info("Starting instance %s on node %s",
5748 instance.name, target_node)
5750 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5751 ignore_secondaries=True)
5753 _ShutdownInstanceDisks(self, instance)
5754 raise errors.OpExecError("Can't activate the instance's disks")
5756 feedback_fn("* starting the instance on the target node")
5757 result = self.rpc.call_instance_start(target_node, instance, None, None)
5758 msg = result.fail_msg
5760 _ShutdownInstanceDisks(self, instance)
5761 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5762 (instance.name, target_node, msg))
5765 class LUInstanceMigrate(LogicalUnit):
5766 """Migrate an instance.
5768 This is migration without shutting down, compared to the failover,
5769 which is done with shutdown.
5772 HPATH = "instance-migrate"
5773 HTYPE = constants.HTYPE_INSTANCE
5776 def ExpandNames(self):
5777 self._ExpandAndLockInstance()
5779 self.needed_locks[locking.LEVEL_NODE] = []
5780 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5782 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5784 self.tasklets = [self._migrater]
5786 def DeclareLocks(self, level):
5787 if level == locking.LEVEL_NODE:
5788 self._LockInstancesNodes()
5790 def BuildHooksEnv(self):
5793 This runs on master, primary and secondary nodes of the instance.
5796 instance = self._migrater.instance
5797 source_node = instance.primary_node
5798 target_node = instance.secondary_nodes[0]
5799 env = _BuildInstanceHookEnvByObject(self, instance)
5800 env["MIGRATE_LIVE"] = self._migrater.live
5801 env["MIGRATE_CLEANUP"] = self.op.cleanup
5803 "OLD_PRIMARY": source_node,
5804 "OLD_SECONDARY": target_node,
5805 "NEW_PRIMARY": target_node,
5806 "NEW_SECONDARY": source_node,
5808 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5810 nl_post.append(source_node)
5811 return env, nl, nl_post
5814 class LUInstanceMove(LogicalUnit):
5815 """Move an instance by data-copying.
5818 HPATH = "instance-move"
5819 HTYPE = constants.HTYPE_INSTANCE
5822 def ExpandNames(self):
5823 self._ExpandAndLockInstance()
5824 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5825 self.op.target_node = target_node
5826 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5827 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5829 def DeclareLocks(self, level):
5830 if level == locking.LEVEL_NODE:
5831 self._LockInstancesNodes(primary_only=True)
5833 def BuildHooksEnv(self):
5836 This runs on master, primary and secondary nodes of the instance.
5840 "TARGET_NODE": self.op.target_node,
5841 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5843 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5844 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5845 self.op.target_node]
5848 def CheckPrereq(self):
5849 """Check prerequisites.
5851 This checks that the instance is in the cluster.
5854 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5855 assert self.instance is not None, \
5856 "Cannot retrieve locked instance %s" % self.op.instance_name
5858 node = self.cfg.GetNodeInfo(self.op.target_node)
5859 assert node is not None, \
5860 "Cannot retrieve locked node %s" % self.op.target_node
5862 self.target_node = target_node = node.name
5864 if target_node == instance.primary_node:
5865 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5866 (instance.name, target_node),
5869 bep = self.cfg.GetClusterInfo().FillBE(instance)
5871 for idx, dsk in enumerate(instance.disks):
5872 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5873 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5874 " cannot copy" % idx, errors.ECODE_STATE)
5876 _CheckNodeOnline(self, target_node)
5877 _CheckNodeNotDrained(self, target_node)
5878 _CheckNodeVmCapable(self, target_node)
5880 if instance.admin_up:
5881 # check memory requirements on the secondary node
5882 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5883 instance.name, bep[constants.BE_MEMORY],
5884 instance.hypervisor)
5886 self.LogInfo("Not checking memory on the secondary node as"
5887 " instance will not be started")
5889 # check bridge existance
5890 _CheckInstanceBridgesExist(self, instance, node=target_node)
5892 def Exec(self, feedback_fn):
5893 """Move an instance.
5895 The move is done by shutting it down on its present node, copying
5896 the data over (slow) and starting it on the new node.
5899 instance = self.instance
5901 source_node = instance.primary_node
5902 target_node = self.target_node
5904 self.LogInfo("Shutting down instance %s on source node %s",
5905 instance.name, source_node)
5907 result = self.rpc.call_instance_shutdown(source_node, instance,
5908 self.op.shutdown_timeout)
5909 msg = result.fail_msg
5911 if self.op.ignore_consistency:
5912 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5913 " Proceeding anyway. Please make sure node"
5914 " %s is down. Error details: %s",
5915 instance.name, source_node, source_node, msg)
5917 raise errors.OpExecError("Could not shutdown instance %s on"
5919 (instance.name, source_node, msg))
5921 # create the target disks
5923 _CreateDisks(self, instance, target_node=target_node)
5924 except errors.OpExecError:
5925 self.LogWarning("Device creation failed, reverting...")
5927 _RemoveDisks(self, instance, target_node=target_node)
5929 self.cfg.ReleaseDRBDMinors(instance.name)
5932 cluster_name = self.cfg.GetClusterInfo().cluster_name
5935 # activate, get path, copy the data over
5936 for idx, disk in enumerate(instance.disks):
5937 self.LogInfo("Copying data for disk %d", idx)
5938 result = self.rpc.call_blockdev_assemble(target_node, disk,
5939 instance.name, True)
5941 self.LogWarning("Can't assemble newly created disk %d: %s",
5942 idx, result.fail_msg)
5943 errs.append(result.fail_msg)
5945 dev_path = result.payload
5946 result = self.rpc.call_blockdev_export(source_node, disk,
5947 target_node, dev_path,
5950 self.LogWarning("Can't copy data over for disk %d: %s",
5951 idx, result.fail_msg)
5952 errs.append(result.fail_msg)
5956 self.LogWarning("Some disks failed to copy, aborting")
5958 _RemoveDisks(self, instance, target_node=target_node)
5960 self.cfg.ReleaseDRBDMinors(instance.name)
5961 raise errors.OpExecError("Errors during disk copy: %s" %
5964 instance.primary_node = target_node
5965 self.cfg.Update(instance, feedback_fn)
5967 self.LogInfo("Removing the disks on the original node")
5968 _RemoveDisks(self, instance, target_node=source_node)
5970 # Only start the instance if it's marked as up
5971 if instance.admin_up:
5972 self.LogInfo("Starting instance %s on node %s",
5973 instance.name, target_node)
5975 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5976 ignore_secondaries=True)
5978 _ShutdownInstanceDisks(self, instance)
5979 raise errors.OpExecError("Can't activate the instance's disks")
5981 result = self.rpc.call_instance_start(target_node, instance, None, None)
5982 msg = result.fail_msg
5984 _ShutdownInstanceDisks(self, instance)
5985 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5986 (instance.name, target_node, msg))
5989 class LUNodeMigrate(LogicalUnit):
5990 """Migrate all instances from a node.
5993 HPATH = "node-migrate"
5994 HTYPE = constants.HTYPE_NODE
5997 def ExpandNames(self):
5998 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6000 self.needed_locks = {
6001 locking.LEVEL_NODE: [self.op.node_name],
6004 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6006 # Create tasklets for migrating instances for all instances on this node
6010 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6011 logging.debug("Migrating instance %s", inst.name)
6012 names.append(inst.name)
6014 tasklets.append(TLMigrateInstance(self, inst.name, False))
6016 self.tasklets = tasklets
6018 # Declare instance locks
6019 self.needed_locks[locking.LEVEL_INSTANCE] = names
6021 def DeclareLocks(self, level):
6022 if level == locking.LEVEL_NODE:
6023 self._LockInstancesNodes()
6025 def BuildHooksEnv(self):
6028 This runs on the master, the primary and all the secondaries.
6032 "NODE_NAME": self.op.node_name,
6035 nl = [self.cfg.GetMasterNode()]
6037 return (env, nl, nl)
6040 class TLMigrateInstance(Tasklet):
6041 """Tasklet class for instance migration.
6044 @ivar live: whether the migration will be done live or non-live;
6045 this variable is initalized only after CheckPrereq has run
6048 def __init__(self, lu, instance_name, cleanup):
6049 """Initializes this class.
6052 Tasklet.__init__(self, lu)
6055 self.instance_name = instance_name
6056 self.cleanup = cleanup
6057 self.live = False # will be overridden later
6059 def CheckPrereq(self):
6060 """Check prerequisites.
6062 This checks that the instance is in the cluster.
6065 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6066 instance = self.cfg.GetInstanceInfo(instance_name)
6067 assert instance is not None
6069 if instance.disk_template != constants.DT_DRBD8:
6070 raise errors.OpPrereqError("Instance's disk layout is not"
6071 " drbd8, cannot migrate.", errors.ECODE_STATE)
6073 secondary_nodes = instance.secondary_nodes
6074 if not secondary_nodes:
6075 raise errors.ConfigurationError("No secondary node but using"
6076 " drbd8 disk template")
6078 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6080 target_node = secondary_nodes[0]
6081 # check memory requirements on the secondary node
6082 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6083 instance.name, i_be[constants.BE_MEMORY],
6084 instance.hypervisor)
6086 # check bridge existance
6087 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6089 if not self.cleanup:
6090 _CheckNodeNotDrained(self.lu, target_node)
6091 result = self.rpc.call_instance_migratable(instance.primary_node,
6093 result.Raise("Can't migrate, please use failover",
6094 prereq=True, ecode=errors.ECODE_STATE)
6096 self.instance = instance
6098 if self.lu.op.live is not None and self.lu.op.mode is not None:
6099 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6100 " parameters are accepted",
6102 if self.lu.op.live is not None:
6104 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6106 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6107 # reset the 'live' parameter to None so that repeated
6108 # invocations of CheckPrereq do not raise an exception
6109 self.lu.op.live = None
6110 elif self.lu.op.mode is None:
6111 # read the default value from the hypervisor
6112 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6113 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6115 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6117 def _WaitUntilSync(self):
6118 """Poll with custom rpc for disk sync.
6120 This uses our own step-based rpc call.
6123 self.feedback_fn("* wait until resync is done")
6127 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6129 self.instance.disks)
6131 for node, nres in result.items():
6132 nres.Raise("Cannot resync disks on node %s" % node)
6133 node_done, node_percent = nres.payload
6134 all_done = all_done and node_done
6135 if node_percent is not None:
6136 min_percent = min(min_percent, node_percent)
6138 if min_percent < 100:
6139 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6142 def _EnsureSecondary(self, node):
6143 """Demote a node to secondary.
6146 self.feedback_fn("* switching node %s to secondary mode" % node)
6148 for dev in self.instance.disks:
6149 self.cfg.SetDiskID(dev, node)
6151 result = self.rpc.call_blockdev_close(node, self.instance.name,
6152 self.instance.disks)
6153 result.Raise("Cannot change disk to secondary on node %s" % node)
6155 def _GoStandalone(self):
6156 """Disconnect from the network.
6159 self.feedback_fn("* changing into standalone mode")
6160 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6161 self.instance.disks)
6162 for node, nres in result.items():
6163 nres.Raise("Cannot disconnect disks node %s" % node)
6165 def _GoReconnect(self, multimaster):
6166 """Reconnect to the network.
6172 msg = "single-master"
6173 self.feedback_fn("* changing disks into %s mode" % msg)
6174 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6175 self.instance.disks,
6176 self.instance.name, multimaster)
6177 for node, nres in result.items():
6178 nres.Raise("Cannot change disks config on node %s" % node)
6180 def _ExecCleanup(self):
6181 """Try to cleanup after a failed migration.
6183 The cleanup is done by:
6184 - check that the instance is running only on one node
6185 (and update the config if needed)
6186 - change disks on its secondary node to secondary
6187 - wait until disks are fully synchronized
6188 - disconnect from the network
6189 - change disks into single-master mode
6190 - wait again until disks are fully synchronized
6193 instance = self.instance
6194 target_node = self.target_node
6195 source_node = self.source_node
6197 # check running on only one node
6198 self.feedback_fn("* checking where the instance actually runs"
6199 " (if this hangs, the hypervisor might be in"
6201 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6202 for node, result in ins_l.items():
6203 result.Raise("Can't contact node %s" % node)
6205 runningon_source = instance.name in ins_l[source_node].payload
6206 runningon_target = instance.name in ins_l[target_node].payload
6208 if runningon_source and runningon_target:
6209 raise errors.OpExecError("Instance seems to be running on two nodes,"
6210 " or the hypervisor is confused. You will have"
6211 " to ensure manually that it runs only on one"
6212 " and restart this operation.")
6214 if not (runningon_source or runningon_target):
6215 raise errors.OpExecError("Instance does not seem to be running at all."
6216 " In this case, it's safer to repair by"
6217 " running 'gnt-instance stop' to ensure disk"
6218 " shutdown, and then restarting it.")
6220 if runningon_target:
6221 # the migration has actually succeeded, we need to update the config
6222 self.feedback_fn("* instance running on secondary node (%s),"
6223 " updating config" % target_node)
6224 instance.primary_node = target_node
6225 self.cfg.Update(instance, self.feedback_fn)
6226 demoted_node = source_node
6228 self.feedback_fn("* instance confirmed to be running on its"
6229 " primary node (%s)" % source_node)
6230 demoted_node = target_node
6232 self._EnsureSecondary(demoted_node)
6234 self._WaitUntilSync()
6235 except errors.OpExecError:
6236 # we ignore here errors, since if the device is standalone, it
6237 # won't be able to sync
6239 self._GoStandalone()
6240 self._GoReconnect(False)
6241 self._WaitUntilSync()
6243 self.feedback_fn("* done")
6245 def _RevertDiskStatus(self):
6246 """Try to revert the disk status after a failed migration.
6249 target_node = self.target_node
6251 self._EnsureSecondary(target_node)
6252 self._GoStandalone()
6253 self._GoReconnect(False)
6254 self._WaitUntilSync()
6255 except errors.OpExecError, err:
6256 self.lu.LogWarning("Migration failed and I can't reconnect the"
6257 " drives: error '%s'\n"
6258 "Please look and recover the instance status" %
6261 def _AbortMigration(self):
6262 """Call the hypervisor code to abort a started migration.
6265 instance = self.instance
6266 target_node = self.target_node
6267 migration_info = self.migration_info
6269 abort_result = self.rpc.call_finalize_migration(target_node,
6273 abort_msg = abort_result.fail_msg
6275 logging.error("Aborting migration failed on target node %s: %s",
6276 target_node, abort_msg)
6277 # Don't raise an exception here, as we stil have to try to revert the
6278 # disk status, even if this step failed.
6280 def _ExecMigration(self):
6281 """Migrate an instance.
6283 The migrate is done by:
6284 - change the disks into dual-master mode
6285 - wait until disks are fully synchronized again
6286 - migrate the instance
6287 - change disks on the new secondary node (the old primary) to secondary
6288 - wait until disks are fully synchronized
6289 - change disks into single-master mode
6292 instance = self.instance
6293 target_node = self.target_node
6294 source_node = self.source_node
6296 self.feedback_fn("* checking disk consistency between source and target")
6297 for dev in instance.disks:
6298 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6299 raise errors.OpExecError("Disk %s is degraded or not fully"
6300 " synchronized on target node,"
6301 " aborting migrate." % dev.iv_name)
6303 # First get the migration information from the remote node
6304 result = self.rpc.call_migration_info(source_node, instance)
6305 msg = result.fail_msg
6307 log_err = ("Failed fetching source migration information from %s: %s" %
6309 logging.error(log_err)
6310 raise errors.OpExecError(log_err)
6312 self.migration_info = migration_info = result.payload
6314 # Then switch the disks to master/master mode
6315 self._EnsureSecondary(target_node)
6316 self._GoStandalone()
6317 self._GoReconnect(True)
6318 self._WaitUntilSync()
6320 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6321 result = self.rpc.call_accept_instance(target_node,
6324 self.nodes_ip[target_node])
6326 msg = result.fail_msg
6328 logging.error("Instance pre-migration failed, trying to revert"
6329 " disk status: %s", msg)
6330 self.feedback_fn("Pre-migration failed, aborting")
6331 self._AbortMigration()
6332 self._RevertDiskStatus()
6333 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6334 (instance.name, msg))
6336 self.feedback_fn("* migrating instance to %s" % target_node)
6338 result = self.rpc.call_instance_migrate(source_node, instance,
6339 self.nodes_ip[target_node],
6341 msg = result.fail_msg
6343 logging.error("Instance migration failed, trying to revert"
6344 " disk status: %s", msg)
6345 self.feedback_fn("Migration failed, aborting")
6346 self._AbortMigration()
6347 self._RevertDiskStatus()
6348 raise errors.OpExecError("Could not migrate instance %s: %s" %
6349 (instance.name, msg))
6352 instance.primary_node = target_node
6353 # distribute new instance config to the other nodes
6354 self.cfg.Update(instance, self.feedback_fn)
6356 result = self.rpc.call_finalize_migration(target_node,
6360 msg = result.fail_msg
6362 logging.error("Instance migration succeeded, but finalization failed:"
6364 raise errors.OpExecError("Could not finalize instance migration: %s" %
6367 self._EnsureSecondary(source_node)
6368 self._WaitUntilSync()
6369 self._GoStandalone()
6370 self._GoReconnect(False)
6371 self._WaitUntilSync()
6373 self.feedback_fn("* done")
6375 def Exec(self, feedback_fn):
6376 """Perform the migration.
6379 feedback_fn("Migrating instance %s" % self.instance.name)
6381 self.feedback_fn = feedback_fn
6383 self.source_node = self.instance.primary_node
6384 self.target_node = self.instance.secondary_nodes[0]
6385 self.all_nodes = [self.source_node, self.target_node]
6387 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6388 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6392 return self._ExecCleanup()
6394 return self._ExecMigration()
6397 def _CreateBlockDev(lu, node, instance, device, force_create,
6399 """Create a tree of block devices on a given node.
6401 If this device type has to be created on secondaries, create it and
6404 If not, just recurse to children keeping the same 'force' value.
6406 @param lu: the lu on whose behalf we execute
6407 @param node: the node on which to create the device
6408 @type instance: L{objects.Instance}
6409 @param instance: the instance which owns the device
6410 @type device: L{objects.Disk}
6411 @param device: the device to create
6412 @type force_create: boolean
6413 @param force_create: whether to force creation of this device; this
6414 will be change to True whenever we find a device which has
6415 CreateOnSecondary() attribute
6416 @param info: the extra 'metadata' we should attach to the device
6417 (this will be represented as a LVM tag)
6418 @type force_open: boolean
6419 @param force_open: this parameter will be passes to the
6420 L{backend.BlockdevCreate} function where it specifies
6421 whether we run on primary or not, and it affects both
6422 the child assembly and the device own Open() execution
6425 if device.CreateOnSecondary():
6429 for child in device.children:
6430 _CreateBlockDev(lu, node, instance, child, force_create,
6433 if not force_create:
6436 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6439 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6440 """Create a single block device on a given node.
6442 This will not recurse over children of the device, so they must be
6445 @param lu: the lu on whose behalf we execute
6446 @param node: the node on which to create the device
6447 @type instance: L{objects.Instance}
6448 @param instance: the instance which owns the device
6449 @type device: L{objects.Disk}
6450 @param device: the device to create
6451 @param info: the extra 'metadata' we should attach to the device
6452 (this will be represented as a LVM tag)
6453 @type force_open: boolean
6454 @param force_open: this parameter will be passes to the
6455 L{backend.BlockdevCreate} function where it specifies
6456 whether we run on primary or not, and it affects both
6457 the child assembly and the device own Open() execution
6460 lu.cfg.SetDiskID(device, node)
6461 result = lu.rpc.call_blockdev_create(node, device, device.size,
6462 instance.name, force_open, info)
6463 result.Raise("Can't create block device %s on"
6464 " node %s for instance %s" % (device, node, instance.name))
6465 if device.physical_id is None:
6466 device.physical_id = result.payload
6469 def _GenerateUniqueNames(lu, exts):
6470 """Generate a suitable LV name.
6472 This will generate a logical volume name for the given instance.
6477 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6478 results.append("%s%s" % (new_id, val))
6482 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6484 """Generate a drbd8 device complete with its children.
6487 port = lu.cfg.AllocatePort()
6488 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6489 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6490 logical_id=(vgname, names[0]))
6491 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6492 logical_id=(vgname, names[1]))
6493 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6494 logical_id=(primary, secondary, port,
6497 children=[dev_data, dev_meta],
6502 def _GenerateDiskTemplate(lu, template_name,
6503 instance_name, primary_node,
6504 secondary_nodes, disk_info,
6505 file_storage_dir, file_driver,
6506 base_index, feedback_fn):
6507 """Generate the entire disk layout for a given template type.
6510 #TODO: compute space requirements
6512 vgname = lu.cfg.GetVGName()
6513 disk_count = len(disk_info)
6515 if template_name == constants.DT_DISKLESS:
6517 elif template_name == constants.DT_PLAIN:
6518 if len(secondary_nodes) != 0:
6519 raise errors.ProgrammerError("Wrong template configuration")
6521 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6522 for i in range(disk_count)])
6523 for idx, disk in enumerate(disk_info):
6524 disk_index = idx + base_index
6525 vg = disk.get("vg", vgname)
6526 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6527 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6528 logical_id=(vg, names[idx]),
6529 iv_name="disk/%d" % disk_index,
6531 disks.append(disk_dev)
6532 elif template_name == constants.DT_DRBD8:
6533 if len(secondary_nodes) != 1:
6534 raise errors.ProgrammerError("Wrong template configuration")
6535 remote_node = secondary_nodes[0]
6536 minors = lu.cfg.AllocateDRBDMinor(
6537 [primary_node, remote_node] * len(disk_info), instance_name)
6540 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6541 for i in range(disk_count)]):
6542 names.append(lv_prefix + "_data")
6543 names.append(lv_prefix + "_meta")
6544 for idx, disk in enumerate(disk_info):
6545 disk_index = idx + base_index
6546 vg = disk.get("vg", vgname)
6547 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6548 disk["size"], vg, names[idx*2:idx*2+2],
6549 "disk/%d" % disk_index,
6550 minors[idx*2], minors[idx*2+1])
6551 disk_dev.mode = disk["mode"]
6552 disks.append(disk_dev)
6553 elif template_name == constants.DT_FILE:
6554 if len(secondary_nodes) != 0:
6555 raise errors.ProgrammerError("Wrong template configuration")
6557 opcodes.RequireFileStorage()
6559 for idx, disk in enumerate(disk_info):
6560 disk_index = idx + base_index
6561 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6562 iv_name="disk/%d" % disk_index,
6563 logical_id=(file_driver,
6564 "%s/disk%d" % (file_storage_dir,
6567 disks.append(disk_dev)
6569 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6573 def _GetInstanceInfoText(instance):
6574 """Compute that text that should be added to the disk's metadata.
6577 return "originstname+%s" % instance.name
6580 def _CalcEta(time_taken, written, total_size):
6581 """Calculates the ETA based on size written and total size.
6583 @param time_taken: The time taken so far
6584 @param written: amount written so far
6585 @param total_size: The total size of data to be written
6586 @return: The remaining time in seconds
6589 avg_time = time_taken / float(written)
6590 return (total_size - written) * avg_time
6593 def _WipeDisks(lu, instance):
6594 """Wipes instance disks.
6596 @type lu: L{LogicalUnit}
6597 @param lu: the logical unit on whose behalf we execute
6598 @type instance: L{objects.Instance}
6599 @param instance: the instance whose disks we should create
6600 @return: the success of the wipe
6603 node = instance.primary_node
6604 logging.info("Pause sync of instance %s disks", instance.name)
6605 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6607 for idx, success in enumerate(result.payload):
6609 logging.warn("pause-sync of instance %s for disks %d failed",
6613 for idx, device in enumerate(instance.disks):
6614 lu.LogInfo("* Wiping disk %d", idx)
6615 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6617 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6618 # MAX_WIPE_CHUNK at max
6619 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6620 constants.MIN_WIPE_CHUNK_PERCENT)
6625 start_time = time.time()
6627 while offset < size:
6628 wipe_size = min(wipe_chunk_size, size - offset)
6629 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6630 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6631 (idx, offset, wipe_size))
6634 if now - last_output >= 60:
6635 eta = _CalcEta(now - start_time, offset, size)
6636 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6637 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6640 logging.info("Resume sync of instance %s disks", instance.name)
6642 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6644 for idx, success in enumerate(result.payload):
6646 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6647 " look at the status and troubleshoot the issue.", idx)
6648 logging.warn("resume-sync of instance %s for disks %d failed",
6652 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6653 """Create all disks for an instance.
6655 This abstracts away some work from AddInstance.
6657 @type lu: L{LogicalUnit}
6658 @param lu: the logical unit on whose behalf we execute
6659 @type instance: L{objects.Instance}
6660 @param instance: the instance whose disks we should create
6662 @param to_skip: list of indices to skip
6663 @type target_node: string
6664 @param target_node: if passed, overrides the target node for creation
6666 @return: the success of the creation
6669 info = _GetInstanceInfoText(instance)
6670 if target_node is None:
6671 pnode = instance.primary_node
6672 all_nodes = instance.all_nodes
6677 if instance.disk_template == constants.DT_FILE:
6678 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6679 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6681 result.Raise("Failed to create directory '%s' on"
6682 " node %s" % (file_storage_dir, pnode))
6684 # Note: this needs to be kept in sync with adding of disks in
6685 # LUInstanceSetParams
6686 for idx, device in enumerate(instance.disks):
6687 if to_skip and idx in to_skip:
6689 logging.info("Creating volume %s for instance %s",
6690 device.iv_name, instance.name)
6692 for node in all_nodes:
6693 f_create = node == pnode
6694 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6697 def _RemoveDisks(lu, instance, target_node=None):
6698 """Remove all disks for an instance.
6700 This abstracts away some work from `AddInstance()` and
6701 `RemoveInstance()`. Note that in case some of the devices couldn't
6702 be removed, the removal will continue with the other ones (compare
6703 with `_CreateDisks()`).
6705 @type lu: L{LogicalUnit}
6706 @param lu: the logical unit on whose behalf we execute
6707 @type instance: L{objects.Instance}
6708 @param instance: the instance whose disks we should remove
6709 @type target_node: string
6710 @param target_node: used to override the node on which to remove the disks
6712 @return: the success of the removal
6715 logging.info("Removing block devices for instance %s", instance.name)
6718 for device in instance.disks:
6720 edata = [(target_node, device)]
6722 edata = device.ComputeNodeTree(instance.primary_node)
6723 for node, disk in edata:
6724 lu.cfg.SetDiskID(disk, node)
6725 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6727 lu.LogWarning("Could not remove block device %s on node %s,"
6728 " continuing anyway: %s", device.iv_name, node, msg)
6731 if instance.disk_template == constants.DT_FILE:
6732 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6736 tgt = instance.primary_node
6737 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6739 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6740 file_storage_dir, instance.primary_node, result.fail_msg)
6746 def _ComputeDiskSizePerVG(disk_template, disks):
6747 """Compute disk size requirements in the volume group
6750 def _compute(disks, payload):
6751 """Universal algorithm
6756 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6760 # Required free disk space as a function of disk and swap space
6762 constants.DT_DISKLESS: {},
6763 constants.DT_PLAIN: _compute(disks, 0),
6764 # 128 MB are added for drbd metadata for each disk
6765 constants.DT_DRBD8: _compute(disks, 128),
6766 constants.DT_FILE: {},
6769 if disk_template not in req_size_dict:
6770 raise errors.ProgrammerError("Disk template '%s' size requirement"
6771 " is unknown" % disk_template)
6773 return req_size_dict[disk_template]
6776 def _ComputeDiskSize(disk_template, disks):
6777 """Compute disk size requirements in the volume group
6780 # Required free disk space as a function of disk and swap space
6782 constants.DT_DISKLESS: None,
6783 constants.DT_PLAIN: sum(d["size"] for d in disks),
6784 # 128 MB are added for drbd metadata for each disk
6785 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6786 constants.DT_FILE: None,
6789 if disk_template not in req_size_dict:
6790 raise errors.ProgrammerError("Disk template '%s' size requirement"
6791 " is unknown" % disk_template)
6793 return req_size_dict[disk_template]
6796 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6797 """Hypervisor parameter validation.
6799 This function abstract the hypervisor parameter validation to be
6800 used in both instance create and instance modify.
6802 @type lu: L{LogicalUnit}
6803 @param lu: the logical unit for which we check
6804 @type nodenames: list
6805 @param nodenames: the list of nodes on which we should check
6806 @type hvname: string
6807 @param hvname: the name of the hypervisor we should use
6808 @type hvparams: dict
6809 @param hvparams: the parameters which we need to check
6810 @raise errors.OpPrereqError: if the parameters are not valid
6813 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6816 for node in nodenames:
6820 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6823 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6824 """OS parameters validation.
6826 @type lu: L{LogicalUnit}
6827 @param lu: the logical unit for which we check
6828 @type required: boolean
6829 @param required: whether the validation should fail if the OS is not
6831 @type nodenames: list
6832 @param nodenames: the list of nodes on which we should check
6833 @type osname: string
6834 @param osname: the name of the hypervisor we should use
6835 @type osparams: dict
6836 @param osparams: the parameters which we need to check
6837 @raise errors.OpPrereqError: if the parameters are not valid
6840 result = lu.rpc.call_os_validate(required, nodenames, osname,
6841 [constants.OS_VALIDATE_PARAMETERS],
6843 for node, nres in result.items():
6844 # we don't check for offline cases since this should be run only
6845 # against the master node and/or an instance's nodes
6846 nres.Raise("OS Parameters validation failed on node %s" % node)
6847 if not nres.payload:
6848 lu.LogInfo("OS %s not found on node %s, validation skipped",
6852 class LUInstanceCreate(LogicalUnit):
6853 """Create an instance.
6856 HPATH = "instance-add"
6857 HTYPE = constants.HTYPE_INSTANCE
6860 def CheckArguments(self):
6864 # do not require name_check to ease forward/backward compatibility
6866 if self.op.no_install and self.op.start:
6867 self.LogInfo("No-installation mode selected, disabling startup")
6868 self.op.start = False
6869 # validate/normalize the instance name
6870 self.op.instance_name = \
6871 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6873 if self.op.ip_check and not self.op.name_check:
6874 # TODO: make the ip check more flexible and not depend on the name check
6875 raise errors.OpPrereqError("Cannot do ip check without a name check",
6878 # check nics' parameter names
6879 for nic in self.op.nics:
6880 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6882 # check disks. parameter names and consistent adopt/no-adopt strategy
6883 has_adopt = has_no_adopt = False
6884 for disk in self.op.disks:
6885 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6890 if has_adopt and has_no_adopt:
6891 raise errors.OpPrereqError("Either all disks are adopted or none is",
6894 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6895 raise errors.OpPrereqError("Disk adoption is not supported for the"
6896 " '%s' disk template" %
6897 self.op.disk_template,
6899 if self.op.iallocator is not None:
6900 raise errors.OpPrereqError("Disk adoption not allowed with an"
6901 " iallocator script", errors.ECODE_INVAL)
6902 if self.op.mode == constants.INSTANCE_IMPORT:
6903 raise errors.OpPrereqError("Disk adoption not allowed for"
6904 " instance import", errors.ECODE_INVAL)
6906 self.adopt_disks = has_adopt
6908 # instance name verification
6909 if self.op.name_check:
6910 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6911 self.op.instance_name = self.hostname1.name
6912 # used in CheckPrereq for ip ping check
6913 self.check_ip = self.hostname1.ip
6915 self.check_ip = None
6917 # file storage checks
6918 if (self.op.file_driver and
6919 not self.op.file_driver in constants.FILE_DRIVER):
6920 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6921 self.op.file_driver, errors.ECODE_INVAL)
6923 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6924 raise errors.OpPrereqError("File storage directory path not absolute",
6927 ### Node/iallocator related checks
6928 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6930 if self.op.pnode is not None:
6931 if self.op.disk_template in constants.DTS_NET_MIRROR:
6932 if self.op.snode is None:
6933 raise errors.OpPrereqError("The networked disk templates need"
6934 " a mirror node", errors.ECODE_INVAL)
6936 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6938 self.op.snode = None
6940 self._cds = _GetClusterDomainSecret()
6942 if self.op.mode == constants.INSTANCE_IMPORT:
6943 # On import force_variant must be True, because if we forced it at
6944 # initial install, our only chance when importing it back is that it
6946 self.op.force_variant = True
6948 if self.op.no_install:
6949 self.LogInfo("No-installation mode has no effect during import")
6951 elif self.op.mode == constants.INSTANCE_CREATE:
6952 if self.op.os_type is None:
6953 raise errors.OpPrereqError("No guest OS specified",
6955 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6956 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6957 " installation" % self.op.os_type,
6959 if self.op.disk_template is None:
6960 raise errors.OpPrereqError("No disk template specified",
6963 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6964 # Check handshake to ensure both clusters have the same domain secret
6965 src_handshake = self.op.source_handshake
6966 if not src_handshake:
6967 raise errors.OpPrereqError("Missing source handshake",
6970 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6973 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6976 # Load and check source CA
6977 self.source_x509_ca_pem = self.op.source_x509_ca
6978 if not self.source_x509_ca_pem:
6979 raise errors.OpPrereqError("Missing source X509 CA",
6983 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6985 except OpenSSL.crypto.Error, err:
6986 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6987 (err, ), errors.ECODE_INVAL)
6989 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6990 if errcode is not None:
6991 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6994 self.source_x509_ca = cert
6996 src_instance_name = self.op.source_instance_name
6997 if not src_instance_name:
6998 raise errors.OpPrereqError("Missing source instance name",
7001 self.source_instance_name = \
7002 netutils.GetHostname(name=src_instance_name).name
7005 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7006 self.op.mode, errors.ECODE_INVAL)
7008 def ExpandNames(self):
7009 """ExpandNames for CreateInstance.
7011 Figure out the right locks for instance creation.
7014 self.needed_locks = {}
7016 instance_name = self.op.instance_name
7017 # this is just a preventive check, but someone might still add this
7018 # instance in the meantime, and creation will fail at lock-add time
7019 if instance_name in self.cfg.GetInstanceList():
7020 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7021 instance_name, errors.ECODE_EXISTS)
7023 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7025 if self.op.iallocator:
7026 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7028 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7029 nodelist = [self.op.pnode]
7030 if self.op.snode is not None:
7031 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7032 nodelist.append(self.op.snode)
7033 self.needed_locks[locking.LEVEL_NODE] = nodelist
7035 # in case of import lock the source node too
7036 if self.op.mode == constants.INSTANCE_IMPORT:
7037 src_node = self.op.src_node
7038 src_path = self.op.src_path
7040 if src_path is None:
7041 self.op.src_path = src_path = self.op.instance_name
7043 if src_node is None:
7044 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7045 self.op.src_node = None
7046 if os.path.isabs(src_path):
7047 raise errors.OpPrereqError("Importing an instance from an absolute"
7048 " path requires a source node option.",
7051 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7052 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7053 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7054 if not os.path.isabs(src_path):
7055 self.op.src_path = src_path = \
7056 utils.PathJoin(constants.EXPORT_DIR, src_path)
7058 def _RunAllocator(self):
7059 """Run the allocator based on input opcode.
7062 nics = [n.ToDict() for n in self.nics]
7063 ial = IAllocator(self.cfg, self.rpc,
7064 mode=constants.IALLOCATOR_MODE_ALLOC,
7065 name=self.op.instance_name,
7066 disk_template=self.op.disk_template,
7069 vcpus=self.be_full[constants.BE_VCPUS],
7070 mem_size=self.be_full[constants.BE_MEMORY],
7073 hypervisor=self.op.hypervisor,
7076 ial.Run(self.op.iallocator)
7079 raise errors.OpPrereqError("Can't compute nodes using"
7080 " iallocator '%s': %s" %
7081 (self.op.iallocator, ial.info),
7083 if len(ial.result) != ial.required_nodes:
7084 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7085 " of nodes (%s), required %s" %
7086 (self.op.iallocator, len(ial.result),
7087 ial.required_nodes), errors.ECODE_FAULT)
7088 self.op.pnode = ial.result[0]
7089 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7090 self.op.instance_name, self.op.iallocator,
7091 utils.CommaJoin(ial.result))
7092 if ial.required_nodes == 2:
7093 self.op.snode = ial.result[1]
7095 def BuildHooksEnv(self):
7098 This runs on master, primary and secondary nodes of the instance.
7102 "ADD_MODE": self.op.mode,
7104 if self.op.mode == constants.INSTANCE_IMPORT:
7105 env["SRC_NODE"] = self.op.src_node
7106 env["SRC_PATH"] = self.op.src_path
7107 env["SRC_IMAGES"] = self.src_images
7109 env.update(_BuildInstanceHookEnv(
7110 name=self.op.instance_name,
7111 primary_node=self.op.pnode,
7112 secondary_nodes=self.secondaries,
7113 status=self.op.start,
7114 os_type=self.op.os_type,
7115 memory=self.be_full[constants.BE_MEMORY],
7116 vcpus=self.be_full[constants.BE_VCPUS],
7117 nics=_NICListToTuple(self, self.nics),
7118 disk_template=self.op.disk_template,
7119 disks=[(d["size"], d["mode"]) for d in self.disks],
7122 hypervisor_name=self.op.hypervisor,
7125 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7129 def _ReadExportInfo(self):
7130 """Reads the export information from disk.
7132 It will override the opcode source node and path with the actual
7133 information, if these two were not specified before.
7135 @return: the export information
7138 assert self.op.mode == constants.INSTANCE_IMPORT
7140 src_node = self.op.src_node
7141 src_path = self.op.src_path
7143 if src_node is None:
7144 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7145 exp_list = self.rpc.call_export_list(locked_nodes)
7147 for node in exp_list:
7148 if exp_list[node].fail_msg:
7150 if src_path in exp_list[node].payload:
7152 self.op.src_node = src_node = node
7153 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7157 raise errors.OpPrereqError("No export found for relative path %s" %
7158 src_path, errors.ECODE_INVAL)
7160 _CheckNodeOnline(self, src_node)
7161 result = self.rpc.call_export_info(src_node, src_path)
7162 result.Raise("No export or invalid export found in dir %s" % src_path)
7164 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7165 if not export_info.has_section(constants.INISECT_EXP):
7166 raise errors.ProgrammerError("Corrupted export config",
7167 errors.ECODE_ENVIRON)
7169 ei_version = export_info.get(constants.INISECT_EXP, "version")
7170 if (int(ei_version) != constants.EXPORT_VERSION):
7171 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7172 (ei_version, constants.EXPORT_VERSION),
7173 errors.ECODE_ENVIRON)
7176 def _ReadExportParams(self, einfo):
7177 """Use export parameters as defaults.
7179 In case the opcode doesn't specify (as in override) some instance
7180 parameters, then try to use them from the export information, if
7184 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7186 if self.op.disk_template is None:
7187 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7188 self.op.disk_template = einfo.get(constants.INISECT_INS,
7191 raise errors.OpPrereqError("No disk template specified and the export"
7192 " is missing the disk_template information",
7195 if not self.op.disks:
7196 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7198 # TODO: import the disk iv_name too
7199 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7200 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7201 disks.append({"size": disk_sz})
7202 self.op.disks = disks
7204 raise errors.OpPrereqError("No disk info specified and the export"
7205 " is missing the disk information",
7208 if (not self.op.nics and
7209 einfo.has_option(constants.INISECT_INS, "nic_count")):
7211 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7213 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7214 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7219 if (self.op.hypervisor is None and
7220 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7221 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7222 if einfo.has_section(constants.INISECT_HYP):
7223 # use the export parameters but do not override the ones
7224 # specified by the user
7225 for name, value in einfo.items(constants.INISECT_HYP):
7226 if name not in self.op.hvparams:
7227 self.op.hvparams[name] = value
7229 if einfo.has_section(constants.INISECT_BEP):
7230 # use the parameters, without overriding
7231 for name, value in einfo.items(constants.INISECT_BEP):
7232 if name not in self.op.beparams:
7233 self.op.beparams[name] = value
7235 # try to read the parameters old style, from the main section
7236 for name in constants.BES_PARAMETERS:
7237 if (name not in self.op.beparams and
7238 einfo.has_option(constants.INISECT_INS, name)):
7239 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7241 if einfo.has_section(constants.INISECT_OSP):
7242 # use the parameters, without overriding
7243 for name, value in einfo.items(constants.INISECT_OSP):
7244 if name not in self.op.osparams:
7245 self.op.osparams[name] = value
7247 def _RevertToDefaults(self, cluster):
7248 """Revert the instance parameters to the default values.
7252 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7253 for name in self.op.hvparams.keys():
7254 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7255 del self.op.hvparams[name]
7257 be_defs = cluster.SimpleFillBE({})
7258 for name in self.op.beparams.keys():
7259 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7260 del self.op.beparams[name]
7262 nic_defs = cluster.SimpleFillNIC({})
7263 for nic in self.op.nics:
7264 for name in constants.NICS_PARAMETERS:
7265 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7268 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7269 for name in self.op.osparams.keys():
7270 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7271 del self.op.osparams[name]
7273 def CheckPrereq(self):
7274 """Check prerequisites.
7277 if self.op.mode == constants.INSTANCE_IMPORT:
7278 export_info = self._ReadExportInfo()
7279 self._ReadExportParams(export_info)
7281 if (not self.cfg.GetVGName() and
7282 self.op.disk_template not in constants.DTS_NOT_LVM):
7283 raise errors.OpPrereqError("Cluster does not support lvm-based"
7284 " instances", errors.ECODE_STATE)
7286 if self.op.hypervisor is None:
7287 self.op.hypervisor = self.cfg.GetHypervisorType()
7289 cluster = self.cfg.GetClusterInfo()
7290 enabled_hvs = cluster.enabled_hypervisors
7291 if self.op.hypervisor not in enabled_hvs:
7292 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7293 " cluster (%s)" % (self.op.hypervisor,
7294 ",".join(enabled_hvs)),
7297 # check hypervisor parameter syntax (locally)
7298 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7299 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7301 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7302 hv_type.CheckParameterSyntax(filled_hvp)
7303 self.hv_full = filled_hvp
7304 # check that we don't specify global parameters on an instance
7305 _CheckGlobalHvParams(self.op.hvparams)
7307 # fill and remember the beparams dict
7308 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7309 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7311 # build os parameters
7312 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7314 # now that hvp/bep are in final format, let's reset to defaults,
7316 if self.op.identify_defaults:
7317 self._RevertToDefaults(cluster)
7321 for idx, nic in enumerate(self.op.nics):
7322 nic_mode_req = nic.get("mode", None)
7323 nic_mode = nic_mode_req
7324 if nic_mode is None:
7325 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7327 # in routed mode, for the first nic, the default ip is 'auto'
7328 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7329 default_ip_mode = constants.VALUE_AUTO
7331 default_ip_mode = constants.VALUE_NONE
7333 # ip validity checks
7334 ip = nic.get("ip", default_ip_mode)
7335 if ip is None or ip.lower() == constants.VALUE_NONE:
7337 elif ip.lower() == constants.VALUE_AUTO:
7338 if not self.op.name_check:
7339 raise errors.OpPrereqError("IP address set to auto but name checks"
7340 " have been skipped",
7342 nic_ip = self.hostname1.ip
7344 if not netutils.IPAddress.IsValid(ip):
7345 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7349 # TODO: check the ip address for uniqueness
7350 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7351 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7354 # MAC address verification
7355 mac = nic.get("mac", constants.VALUE_AUTO)
7356 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7357 mac = utils.NormalizeAndValidateMac(mac)
7360 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7361 except errors.ReservationError:
7362 raise errors.OpPrereqError("MAC address %s already in use"
7363 " in cluster" % mac,
7364 errors.ECODE_NOTUNIQUE)
7366 # bridge verification
7367 bridge = nic.get("bridge", None)
7368 link = nic.get("link", None)
7370 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7371 " at the same time", errors.ECODE_INVAL)
7372 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7373 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7380 nicparams[constants.NIC_MODE] = nic_mode_req
7382 nicparams[constants.NIC_LINK] = link
7384 check_params = cluster.SimpleFillNIC(nicparams)
7385 objects.NIC.CheckParameterSyntax(check_params)
7386 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7388 # disk checks/pre-build
7390 for disk in self.op.disks:
7391 mode = disk.get("mode", constants.DISK_RDWR)
7392 if mode not in constants.DISK_ACCESS_SET:
7393 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7394 mode, errors.ECODE_INVAL)
7395 size = disk.get("size", None)
7397 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7400 except (TypeError, ValueError):
7401 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7403 vg = disk.get("vg", self.cfg.GetVGName())
7404 new_disk = {"size": size, "mode": mode, "vg": vg}
7406 new_disk["adopt"] = disk["adopt"]
7407 self.disks.append(new_disk)
7409 if self.op.mode == constants.INSTANCE_IMPORT:
7411 # Check that the new instance doesn't have less disks than the export
7412 instance_disks = len(self.disks)
7413 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7414 if instance_disks < export_disks:
7415 raise errors.OpPrereqError("Not enough disks to import."
7416 " (instance: %d, export: %d)" %
7417 (instance_disks, export_disks),
7421 for idx in range(export_disks):
7422 option = 'disk%d_dump' % idx
7423 if export_info.has_option(constants.INISECT_INS, option):
7424 # FIXME: are the old os-es, disk sizes, etc. useful?
7425 export_name = export_info.get(constants.INISECT_INS, option)
7426 image = utils.PathJoin(self.op.src_path, export_name)
7427 disk_images.append(image)
7429 disk_images.append(False)
7431 self.src_images = disk_images
7433 old_name = export_info.get(constants.INISECT_INS, 'name')
7435 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7436 except (TypeError, ValueError), err:
7437 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7438 " an integer: %s" % str(err),
7440 if self.op.instance_name == old_name:
7441 for idx, nic in enumerate(self.nics):
7442 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7443 nic_mac_ini = 'nic%d_mac' % idx
7444 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7446 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7448 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7449 if self.op.ip_check:
7450 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7451 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7452 (self.check_ip, self.op.instance_name),
7453 errors.ECODE_NOTUNIQUE)
7455 #### mac address generation
7456 # By generating here the mac address both the allocator and the hooks get
7457 # the real final mac address rather than the 'auto' or 'generate' value.
7458 # There is a race condition between the generation and the instance object
7459 # creation, which means that we know the mac is valid now, but we're not
7460 # sure it will be when we actually add the instance. If things go bad
7461 # adding the instance will abort because of a duplicate mac, and the
7462 # creation job will fail.
7463 for nic in self.nics:
7464 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7465 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7469 if self.op.iallocator is not None:
7470 self._RunAllocator()
7472 #### node related checks
7474 # check primary node
7475 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7476 assert self.pnode is not None, \
7477 "Cannot retrieve locked node %s" % self.op.pnode
7479 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7480 pnode.name, errors.ECODE_STATE)
7482 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7483 pnode.name, errors.ECODE_STATE)
7484 if not pnode.vm_capable:
7485 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7486 " '%s'" % pnode.name, errors.ECODE_STATE)
7488 self.secondaries = []
7490 # mirror node verification
7491 if self.op.disk_template in constants.DTS_NET_MIRROR:
7492 if self.op.snode == pnode.name:
7493 raise errors.OpPrereqError("The secondary node cannot be the"
7494 " primary node.", errors.ECODE_INVAL)
7495 _CheckNodeOnline(self, self.op.snode)
7496 _CheckNodeNotDrained(self, self.op.snode)
7497 _CheckNodeVmCapable(self, self.op.snode)
7498 self.secondaries.append(self.op.snode)
7500 nodenames = [pnode.name] + self.secondaries
7502 if not self.adopt_disks:
7503 # Check lv size requirements, if not adopting
7504 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7505 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7507 else: # instead, we must check the adoption data
7508 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7509 if len(all_lvs) != len(self.disks):
7510 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7512 for lv_name in all_lvs:
7514 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7515 # to ReserveLV uses the same syntax
7516 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7517 except errors.ReservationError:
7518 raise errors.OpPrereqError("LV named %s used by another instance" %
7519 lv_name, errors.ECODE_NOTUNIQUE)
7521 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7522 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7524 node_lvs = self.rpc.call_lv_list([pnode.name],
7525 vg_names.payload.keys())[pnode.name]
7526 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7527 node_lvs = node_lvs.payload
7529 delta = all_lvs.difference(node_lvs.keys())
7531 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7532 utils.CommaJoin(delta),
7534 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7536 raise errors.OpPrereqError("Online logical volumes found, cannot"
7537 " adopt: %s" % utils.CommaJoin(online_lvs),
7539 # update the size of disk based on what is found
7540 for dsk in self.disks:
7541 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7543 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7545 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7546 # check OS parameters (remotely)
7547 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7549 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7551 # memory check on primary node
7553 _CheckNodeFreeMemory(self, self.pnode.name,
7554 "creating instance %s" % self.op.instance_name,
7555 self.be_full[constants.BE_MEMORY],
7558 self.dry_run_result = list(nodenames)
7560 def Exec(self, feedback_fn):
7561 """Create and add the instance to the cluster.
7564 instance = self.op.instance_name
7565 pnode_name = self.pnode.name
7567 ht_kind = self.op.hypervisor
7568 if ht_kind in constants.HTS_REQ_PORT:
7569 network_port = self.cfg.AllocatePort()
7573 if constants.ENABLE_FILE_STORAGE:
7574 # this is needed because os.path.join does not accept None arguments
7575 if self.op.file_storage_dir is None:
7576 string_file_storage_dir = ""
7578 string_file_storage_dir = self.op.file_storage_dir
7580 # build the full file storage dir path
7581 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7582 string_file_storage_dir, instance)
7584 file_storage_dir = ""
7586 disks = _GenerateDiskTemplate(self,
7587 self.op.disk_template,
7588 instance, pnode_name,
7592 self.op.file_driver,
7596 iobj = objects.Instance(name=instance, os=self.op.os_type,
7597 primary_node=pnode_name,
7598 nics=self.nics, disks=disks,
7599 disk_template=self.op.disk_template,
7601 network_port=network_port,
7602 beparams=self.op.beparams,
7603 hvparams=self.op.hvparams,
7604 hypervisor=self.op.hypervisor,
7605 osparams=self.op.osparams,
7608 if self.adopt_disks:
7609 # rename LVs to the newly-generated names; we need to construct
7610 # 'fake' LV disks with the old data, plus the new unique_id
7611 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7613 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7614 rename_to.append(t_dsk.logical_id)
7615 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7616 self.cfg.SetDiskID(t_dsk, pnode_name)
7617 result = self.rpc.call_blockdev_rename(pnode_name,
7618 zip(tmp_disks, rename_to))
7619 result.Raise("Failed to rename adoped LVs")
7621 feedback_fn("* creating instance disks...")
7623 _CreateDisks(self, iobj)
7624 except errors.OpExecError:
7625 self.LogWarning("Device creation failed, reverting...")
7627 _RemoveDisks(self, iobj)
7629 self.cfg.ReleaseDRBDMinors(instance)
7632 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7633 feedback_fn("* wiping instance disks...")
7635 _WipeDisks(self, iobj)
7636 except errors.OpExecError:
7637 self.LogWarning("Device wiping failed, reverting...")
7639 _RemoveDisks(self, iobj)
7641 self.cfg.ReleaseDRBDMinors(instance)
7644 feedback_fn("adding instance %s to cluster config" % instance)
7646 self.cfg.AddInstance(iobj, self.proc.GetECId())
7648 # Declare that we don't want to remove the instance lock anymore, as we've
7649 # added the instance to the config
7650 del self.remove_locks[locking.LEVEL_INSTANCE]
7651 # Unlock all the nodes
7652 if self.op.mode == constants.INSTANCE_IMPORT:
7653 nodes_keep = [self.op.src_node]
7654 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7655 if node != self.op.src_node]
7656 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7657 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7659 self.context.glm.release(locking.LEVEL_NODE)
7660 del self.acquired_locks[locking.LEVEL_NODE]
7662 if self.op.wait_for_sync:
7663 disk_abort = not _WaitForSync(self, iobj)
7664 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7665 # make sure the disks are not degraded (still sync-ing is ok)
7667 feedback_fn("* checking mirrors status")
7668 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7673 _RemoveDisks(self, iobj)
7674 self.cfg.RemoveInstance(iobj.name)
7675 # Make sure the instance lock gets removed
7676 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7677 raise errors.OpExecError("There are some degraded disks for"
7680 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7681 if self.op.mode == constants.INSTANCE_CREATE:
7682 if not self.op.no_install:
7683 feedback_fn("* running the instance OS create scripts...")
7684 # FIXME: pass debug option from opcode to backend
7685 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7686 self.op.debug_level)
7687 result.Raise("Could not add os for instance %s"
7688 " on node %s" % (instance, pnode_name))
7690 elif self.op.mode == constants.INSTANCE_IMPORT:
7691 feedback_fn("* running the instance OS import scripts...")
7695 for idx, image in enumerate(self.src_images):
7699 # FIXME: pass debug option from opcode to backend
7700 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7701 constants.IEIO_FILE, (image, ),
7702 constants.IEIO_SCRIPT,
7703 (iobj.disks[idx], idx),
7705 transfers.append(dt)
7708 masterd.instance.TransferInstanceData(self, feedback_fn,
7709 self.op.src_node, pnode_name,
7710 self.pnode.secondary_ip,
7712 if not compat.all(import_result):
7713 self.LogWarning("Some disks for instance %s on node %s were not"
7714 " imported successfully" % (instance, pnode_name))
7716 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7717 feedback_fn("* preparing remote import...")
7718 # The source cluster will stop the instance before attempting to make a
7719 # connection. In some cases stopping an instance can take a long time,
7720 # hence the shutdown timeout is added to the connection timeout.
7721 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7722 self.op.source_shutdown_timeout)
7723 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7725 assert iobj.primary_node == self.pnode.name
7727 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7728 self.source_x509_ca,
7729 self._cds, timeouts)
7730 if not compat.all(disk_results):
7731 # TODO: Should the instance still be started, even if some disks
7732 # failed to import (valid for local imports, too)?
7733 self.LogWarning("Some disks for instance %s on node %s were not"
7734 " imported successfully" % (instance, pnode_name))
7736 # Run rename script on newly imported instance
7737 assert iobj.name == instance
7738 feedback_fn("Running rename script for %s" % instance)
7739 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7740 self.source_instance_name,
7741 self.op.debug_level)
7743 self.LogWarning("Failed to run rename script for %s on node"
7744 " %s: %s" % (instance, pnode_name, result.fail_msg))
7747 # also checked in the prereq part
7748 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7752 iobj.admin_up = True
7753 self.cfg.Update(iobj, feedback_fn)
7754 logging.info("Starting instance %s on node %s", instance, pnode_name)
7755 feedback_fn("* starting instance...")
7756 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7757 result.Raise("Could not start instance")
7759 return list(iobj.all_nodes)
7762 class LUInstanceConsole(NoHooksLU):
7763 """Connect to an instance's console.
7765 This is somewhat special in that it returns the command line that
7766 you need to run on the master node in order to connect to the
7772 def ExpandNames(self):
7773 self._ExpandAndLockInstance()
7775 def CheckPrereq(self):
7776 """Check prerequisites.
7778 This checks that the instance is in the cluster.
7781 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7782 assert self.instance is not None, \
7783 "Cannot retrieve locked instance %s" % self.op.instance_name
7784 _CheckNodeOnline(self, self.instance.primary_node)
7786 def Exec(self, feedback_fn):
7787 """Connect to the console of an instance
7790 instance = self.instance
7791 node = instance.primary_node
7793 node_insts = self.rpc.call_instance_list([node],
7794 [instance.hypervisor])[node]
7795 node_insts.Raise("Can't get node information from %s" % node)
7797 if instance.name not in node_insts.payload:
7798 if instance.admin_up:
7799 state = "ERROR_down"
7801 state = "ADMIN_down"
7802 raise errors.OpExecError("Instance %s is not running (state %s)" %
7803 (instance.name, state))
7805 logging.debug("Connecting to console of %s on %s", instance.name, node)
7807 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7808 cluster = self.cfg.GetClusterInfo()
7809 # beparams and hvparams are passed separately, to avoid editing the
7810 # instance and then saving the defaults in the instance itself.
7811 hvparams = cluster.FillHV(instance)
7812 beparams = cluster.FillBE(instance)
7813 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7815 assert console.instance == instance.name
7816 assert console.Validate()
7818 return console.ToDict()
7821 class LUInstanceReplaceDisks(LogicalUnit):
7822 """Replace the disks of an instance.
7825 HPATH = "mirrors-replace"
7826 HTYPE = constants.HTYPE_INSTANCE
7829 def CheckArguments(self):
7830 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7833 def ExpandNames(self):
7834 self._ExpandAndLockInstance()
7836 if self.op.iallocator is not None:
7837 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7839 elif self.op.remote_node is not None:
7840 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7841 self.op.remote_node = remote_node
7843 # Warning: do not remove the locking of the new secondary here
7844 # unless DRBD8.AddChildren is changed to work in parallel;
7845 # currently it doesn't since parallel invocations of
7846 # FindUnusedMinor will conflict
7847 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7848 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7851 self.needed_locks[locking.LEVEL_NODE] = []
7852 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7854 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7855 self.op.iallocator, self.op.remote_node,
7856 self.op.disks, False, self.op.early_release)
7858 self.tasklets = [self.replacer]
7860 def DeclareLocks(self, level):
7861 # If we're not already locking all nodes in the set we have to declare the
7862 # instance's primary/secondary nodes.
7863 if (level == locking.LEVEL_NODE and
7864 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7865 self._LockInstancesNodes()
7867 def BuildHooksEnv(self):
7870 This runs on the master, the primary and all the secondaries.
7873 instance = self.replacer.instance
7875 "MODE": self.op.mode,
7876 "NEW_SECONDARY": self.op.remote_node,
7877 "OLD_SECONDARY": instance.secondary_nodes[0],
7879 env.update(_BuildInstanceHookEnvByObject(self, instance))
7881 self.cfg.GetMasterNode(),
7882 instance.primary_node,
7884 if self.op.remote_node is not None:
7885 nl.append(self.op.remote_node)
7889 class TLReplaceDisks(Tasklet):
7890 """Replaces disks for an instance.
7892 Note: Locking is not within the scope of this class.
7895 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7896 disks, delay_iallocator, early_release):
7897 """Initializes this class.
7900 Tasklet.__init__(self, lu)
7903 self.instance_name = instance_name
7905 self.iallocator_name = iallocator_name
7906 self.remote_node = remote_node
7908 self.delay_iallocator = delay_iallocator
7909 self.early_release = early_release
7912 self.instance = None
7913 self.new_node = None
7914 self.target_node = None
7915 self.other_node = None
7916 self.remote_node_info = None
7917 self.node_secondary_ip = None
7920 def CheckArguments(mode, remote_node, iallocator):
7921 """Helper function for users of this class.
7924 # check for valid parameter combination
7925 if mode == constants.REPLACE_DISK_CHG:
7926 if remote_node is None and iallocator is None:
7927 raise errors.OpPrereqError("When changing the secondary either an"
7928 " iallocator script must be used or the"
7929 " new node given", errors.ECODE_INVAL)
7931 if remote_node is not None and iallocator is not None:
7932 raise errors.OpPrereqError("Give either the iallocator or the new"
7933 " secondary, not both", errors.ECODE_INVAL)
7935 elif remote_node is not None or iallocator is not None:
7936 # Not replacing the secondary
7937 raise errors.OpPrereqError("The iallocator and new node options can"
7938 " only be used when changing the"
7939 " secondary node", errors.ECODE_INVAL)
7942 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7943 """Compute a new secondary node using an IAllocator.
7946 ial = IAllocator(lu.cfg, lu.rpc,
7947 mode=constants.IALLOCATOR_MODE_RELOC,
7949 relocate_from=relocate_from)
7951 ial.Run(iallocator_name)
7954 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7955 " %s" % (iallocator_name, ial.info),
7958 if len(ial.result) != ial.required_nodes:
7959 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7960 " of nodes (%s), required %s" %
7962 len(ial.result), ial.required_nodes),
7965 remote_node_name = ial.result[0]
7967 lu.LogInfo("Selected new secondary for instance '%s': %s",
7968 instance_name, remote_node_name)
7970 return remote_node_name
7972 def _FindFaultyDisks(self, node_name):
7973 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7976 def CheckPrereq(self):
7977 """Check prerequisites.
7979 This checks that the instance is in the cluster.
7982 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7983 assert instance is not None, \
7984 "Cannot retrieve locked instance %s" % self.instance_name
7986 if instance.disk_template != constants.DT_DRBD8:
7987 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7988 " instances", errors.ECODE_INVAL)
7990 if len(instance.secondary_nodes) != 1:
7991 raise errors.OpPrereqError("The instance has a strange layout,"
7992 " expected one secondary but found %d" %
7993 len(instance.secondary_nodes),
7996 if not self.delay_iallocator:
7997 self._CheckPrereq2()
7999 def _CheckPrereq2(self):
8000 """Check prerequisites, second part.
8002 This function should always be part of CheckPrereq. It was separated and is
8003 now called from Exec because during node evacuation iallocator was only
8004 called with an unmodified cluster model, not taking planned changes into
8008 instance = self.instance
8009 secondary_node = instance.secondary_nodes[0]
8011 if self.iallocator_name is None:
8012 remote_node = self.remote_node
8014 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8015 instance.name, instance.secondary_nodes)
8017 if remote_node is not None:
8018 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8019 assert self.remote_node_info is not None, \
8020 "Cannot retrieve locked node %s" % remote_node
8022 self.remote_node_info = None
8024 if remote_node == self.instance.primary_node:
8025 raise errors.OpPrereqError("The specified node is the primary node of"
8026 " the instance.", errors.ECODE_INVAL)
8028 if remote_node == secondary_node:
8029 raise errors.OpPrereqError("The specified node is already the"
8030 " secondary node of the instance.",
8033 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8034 constants.REPLACE_DISK_CHG):
8035 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8038 if self.mode == constants.REPLACE_DISK_AUTO:
8039 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8040 faulty_secondary = self._FindFaultyDisks(secondary_node)
8042 if faulty_primary and faulty_secondary:
8043 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8044 " one node and can not be repaired"
8045 " automatically" % self.instance_name,
8049 self.disks = faulty_primary
8050 self.target_node = instance.primary_node
8051 self.other_node = secondary_node
8052 check_nodes = [self.target_node, self.other_node]
8053 elif faulty_secondary:
8054 self.disks = faulty_secondary
8055 self.target_node = secondary_node
8056 self.other_node = instance.primary_node
8057 check_nodes = [self.target_node, self.other_node]
8063 # Non-automatic modes
8064 if self.mode == constants.REPLACE_DISK_PRI:
8065 self.target_node = instance.primary_node
8066 self.other_node = secondary_node
8067 check_nodes = [self.target_node, self.other_node]
8069 elif self.mode == constants.REPLACE_DISK_SEC:
8070 self.target_node = secondary_node
8071 self.other_node = instance.primary_node
8072 check_nodes = [self.target_node, self.other_node]
8074 elif self.mode == constants.REPLACE_DISK_CHG:
8075 self.new_node = remote_node
8076 self.other_node = instance.primary_node
8077 self.target_node = secondary_node
8078 check_nodes = [self.new_node, self.other_node]
8080 _CheckNodeNotDrained(self.lu, remote_node)
8081 _CheckNodeVmCapable(self.lu, remote_node)
8083 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8084 assert old_node_info is not None
8085 if old_node_info.offline and not self.early_release:
8086 # doesn't make sense to delay the release
8087 self.early_release = True
8088 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8089 " early-release mode", secondary_node)
8092 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8095 # If not specified all disks should be replaced
8097 self.disks = range(len(self.instance.disks))
8099 for node in check_nodes:
8100 _CheckNodeOnline(self.lu, node)
8102 # Check whether disks are valid
8103 for disk_idx in self.disks:
8104 instance.FindDisk(disk_idx)
8106 # Get secondary node IP addresses
8109 for node_name in [self.target_node, self.other_node, self.new_node]:
8110 if node_name is not None:
8111 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8113 self.node_secondary_ip = node_2nd_ip
8115 def Exec(self, feedback_fn):
8116 """Execute disk replacement.
8118 This dispatches the disk replacement to the appropriate handler.
8121 if self.delay_iallocator:
8122 self._CheckPrereq2()
8125 feedback_fn("No disks need replacement")
8128 feedback_fn("Replacing disk(s) %s for %s" %
8129 (utils.CommaJoin(self.disks), self.instance.name))
8131 activate_disks = (not self.instance.admin_up)
8133 # Activate the instance disks if we're replacing them on a down instance
8135 _StartInstanceDisks(self.lu, self.instance, True)
8138 # Should we replace the secondary node?
8139 if self.new_node is not None:
8140 fn = self._ExecDrbd8Secondary
8142 fn = self._ExecDrbd8DiskOnly
8144 return fn(feedback_fn)
8147 # Deactivate the instance disks if we're replacing them on a
8150 _SafeShutdownInstanceDisks(self.lu, self.instance)
8152 def _CheckVolumeGroup(self, nodes):
8153 self.lu.LogInfo("Checking volume groups")
8155 vgname = self.cfg.GetVGName()
8157 # Make sure volume group exists on all involved nodes
8158 results = self.rpc.call_vg_list(nodes)
8160 raise errors.OpExecError("Can't list volume groups on the nodes")
8164 res.Raise("Error checking node %s" % node)
8165 if vgname not in res.payload:
8166 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8169 def _CheckDisksExistence(self, nodes):
8170 # Check disk existence
8171 for idx, dev in enumerate(self.instance.disks):
8172 if idx not in self.disks:
8176 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8177 self.cfg.SetDiskID(dev, node)
8179 result = self.rpc.call_blockdev_find(node, dev)
8181 msg = result.fail_msg
8182 if msg or not result.payload:
8184 msg = "disk not found"
8185 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8188 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8189 for idx, dev in enumerate(self.instance.disks):
8190 if idx not in self.disks:
8193 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8196 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8198 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8199 " replace disks for instance %s" %
8200 (node_name, self.instance.name))
8202 def _CreateNewStorage(self, node_name):
8203 vgname = self.cfg.GetVGName()
8206 for idx, dev in enumerate(self.instance.disks):
8207 if idx not in self.disks:
8210 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8212 self.cfg.SetDiskID(dev, node_name)
8214 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8215 names = _GenerateUniqueNames(self.lu, lv_names)
8217 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8218 logical_id=(vgname, names[0]))
8219 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8220 logical_id=(vgname, names[1]))
8222 new_lvs = [lv_data, lv_meta]
8223 old_lvs = dev.children
8224 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8226 # we pass force_create=True to force the LVM creation
8227 for new_lv in new_lvs:
8228 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8229 _GetInstanceInfoText(self.instance), False)
8233 def _CheckDevices(self, node_name, iv_names):
8234 for name, (dev, _, _) in iv_names.iteritems():
8235 self.cfg.SetDiskID(dev, node_name)
8237 result = self.rpc.call_blockdev_find(node_name, dev)
8239 msg = result.fail_msg
8240 if msg or not result.payload:
8242 msg = "disk not found"
8243 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8246 if result.payload.is_degraded:
8247 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8249 def _RemoveOldStorage(self, node_name, iv_names):
8250 for name, (_, old_lvs, _) in iv_names.iteritems():
8251 self.lu.LogInfo("Remove logical volumes for %s" % name)
8254 self.cfg.SetDiskID(lv, node_name)
8256 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8258 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8259 hint="remove unused LVs manually")
8261 def _ReleaseNodeLock(self, node_name):
8262 """Releases the lock for a given node."""
8263 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8265 def _ExecDrbd8DiskOnly(self, feedback_fn):
8266 """Replace a disk on the primary or secondary for DRBD 8.
8268 The algorithm for replace is quite complicated:
8270 1. for each disk to be replaced:
8272 1. create new LVs on the target node with unique names
8273 1. detach old LVs from the drbd device
8274 1. rename old LVs to name_replaced.<time_t>
8275 1. rename new LVs to old LVs
8276 1. attach the new LVs (with the old names now) to the drbd device
8278 1. wait for sync across all devices
8280 1. for each modified disk:
8282 1. remove old LVs (which have the name name_replaces.<time_t>)
8284 Failures are not very well handled.
8289 # Step: check device activation
8290 self.lu.LogStep(1, steps_total, "Check device existence")
8291 self._CheckDisksExistence([self.other_node, self.target_node])
8292 self._CheckVolumeGroup([self.target_node, self.other_node])
8294 # Step: check other node consistency
8295 self.lu.LogStep(2, steps_total, "Check peer consistency")
8296 self._CheckDisksConsistency(self.other_node,
8297 self.other_node == self.instance.primary_node,
8300 # Step: create new storage
8301 self.lu.LogStep(3, steps_total, "Allocate new storage")
8302 iv_names = self._CreateNewStorage(self.target_node)
8304 # Step: for each lv, detach+rename*2+attach
8305 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8306 for dev, old_lvs, new_lvs in iv_names.itervalues():
8307 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8309 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8311 result.Raise("Can't detach drbd from local storage on node"
8312 " %s for device %s" % (self.target_node, dev.iv_name))
8314 #cfg.Update(instance)
8316 # ok, we created the new LVs, so now we know we have the needed
8317 # storage; as such, we proceed on the target node to rename
8318 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8319 # using the assumption that logical_id == physical_id (which in
8320 # turn is the unique_id on that node)
8322 # FIXME(iustin): use a better name for the replaced LVs
8323 temp_suffix = int(time.time())
8324 ren_fn = lambda d, suff: (d.physical_id[0],
8325 d.physical_id[1] + "_replaced-%s" % suff)
8327 # Build the rename list based on what LVs exist on the node
8328 rename_old_to_new = []
8329 for to_ren in old_lvs:
8330 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8331 if not result.fail_msg and result.payload:
8333 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8335 self.lu.LogInfo("Renaming the old LVs on the target node")
8336 result = self.rpc.call_blockdev_rename(self.target_node,
8338 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8340 # Now we rename the new LVs to the old LVs
8341 self.lu.LogInfo("Renaming the new LVs on the target node")
8342 rename_new_to_old = [(new, old.physical_id)
8343 for old, new in zip(old_lvs, new_lvs)]
8344 result = self.rpc.call_blockdev_rename(self.target_node,
8346 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8348 for old, new in zip(old_lvs, new_lvs):
8349 new.logical_id = old.logical_id
8350 self.cfg.SetDiskID(new, self.target_node)
8352 for disk in old_lvs:
8353 disk.logical_id = ren_fn(disk, temp_suffix)
8354 self.cfg.SetDiskID(disk, self.target_node)
8356 # Now that the new lvs have the old name, we can add them to the device
8357 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8358 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8360 msg = result.fail_msg
8362 for new_lv in new_lvs:
8363 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8366 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8367 hint=("cleanup manually the unused logical"
8369 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8371 dev.children = new_lvs
8373 self.cfg.Update(self.instance, feedback_fn)
8376 if self.early_release:
8377 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8379 self._RemoveOldStorage(self.target_node, iv_names)
8380 # WARNING: we release both node locks here, do not do other RPCs
8381 # than WaitForSync to the primary node
8382 self._ReleaseNodeLock([self.target_node, self.other_node])
8385 # This can fail as the old devices are degraded and _WaitForSync
8386 # does a combined result over all disks, so we don't check its return value
8387 self.lu.LogStep(cstep, steps_total, "Sync devices")
8389 _WaitForSync(self.lu, self.instance)
8391 # Check all devices manually
8392 self._CheckDevices(self.instance.primary_node, iv_names)
8394 # Step: remove old storage
8395 if not self.early_release:
8396 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8398 self._RemoveOldStorage(self.target_node, iv_names)
8400 def _ExecDrbd8Secondary(self, feedback_fn):
8401 """Replace the secondary node for DRBD 8.
8403 The algorithm for replace is quite complicated:
8404 - for all disks of the instance:
8405 - create new LVs on the new node with same names
8406 - shutdown the drbd device on the old secondary
8407 - disconnect the drbd network on the primary
8408 - create the drbd device on the new secondary
8409 - network attach the drbd on the primary, using an artifice:
8410 the drbd code for Attach() will connect to the network if it
8411 finds a device which is connected to the good local disks but
8413 - wait for sync across all devices
8414 - remove all disks from the old secondary
8416 Failures are not very well handled.
8421 # Step: check device activation
8422 self.lu.LogStep(1, steps_total, "Check device existence")
8423 self._CheckDisksExistence([self.instance.primary_node])
8424 self._CheckVolumeGroup([self.instance.primary_node])
8426 # Step: check other node consistency
8427 self.lu.LogStep(2, steps_total, "Check peer consistency")
8428 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8430 # Step: create new storage
8431 self.lu.LogStep(3, steps_total, "Allocate new storage")
8432 for idx, dev in enumerate(self.instance.disks):
8433 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8434 (self.new_node, idx))
8435 # we pass force_create=True to force LVM creation
8436 for new_lv in dev.children:
8437 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8438 _GetInstanceInfoText(self.instance), False)
8440 # Step 4: dbrd minors and drbd setups changes
8441 # after this, we must manually remove the drbd minors on both the
8442 # error and the success paths
8443 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8444 minors = self.cfg.AllocateDRBDMinor([self.new_node
8445 for dev in self.instance.disks],
8447 logging.debug("Allocated minors %r", minors)
8450 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8451 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8452 (self.new_node, idx))
8453 # create new devices on new_node; note that we create two IDs:
8454 # one without port, so the drbd will be activated without
8455 # networking information on the new node at this stage, and one
8456 # with network, for the latter activation in step 4
8457 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8458 if self.instance.primary_node == o_node1:
8461 assert self.instance.primary_node == o_node2, "Three-node instance?"
8464 new_alone_id = (self.instance.primary_node, self.new_node, None,
8465 p_minor, new_minor, o_secret)
8466 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8467 p_minor, new_minor, o_secret)
8469 iv_names[idx] = (dev, dev.children, new_net_id)
8470 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8472 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8473 logical_id=new_alone_id,
8474 children=dev.children,
8477 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8478 _GetInstanceInfoText(self.instance), False)
8479 except errors.GenericError:
8480 self.cfg.ReleaseDRBDMinors(self.instance.name)
8483 # We have new devices, shutdown the drbd on the old secondary
8484 for idx, dev in enumerate(self.instance.disks):
8485 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8486 self.cfg.SetDiskID(dev, self.target_node)
8487 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8489 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8490 "node: %s" % (idx, msg),
8491 hint=("Please cleanup this device manually as"
8492 " soon as possible"))
8494 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8495 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8496 self.node_secondary_ip,
8497 self.instance.disks)\
8498 [self.instance.primary_node]
8500 msg = result.fail_msg
8502 # detaches didn't succeed (unlikely)
8503 self.cfg.ReleaseDRBDMinors(self.instance.name)
8504 raise errors.OpExecError("Can't detach the disks from the network on"
8505 " old node: %s" % (msg,))
8507 # if we managed to detach at least one, we update all the disks of
8508 # the instance to point to the new secondary
8509 self.lu.LogInfo("Updating instance configuration")
8510 for dev, _, new_logical_id in iv_names.itervalues():
8511 dev.logical_id = new_logical_id
8512 self.cfg.SetDiskID(dev, self.instance.primary_node)
8514 self.cfg.Update(self.instance, feedback_fn)
8516 # and now perform the drbd attach
8517 self.lu.LogInfo("Attaching primary drbds to new secondary"
8518 " (standalone => connected)")
8519 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8521 self.node_secondary_ip,
8522 self.instance.disks,
8525 for to_node, to_result in result.items():
8526 msg = to_result.fail_msg
8528 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8530 hint=("please do a gnt-instance info to see the"
8531 " status of disks"))
8533 if self.early_release:
8534 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8536 self._RemoveOldStorage(self.target_node, iv_names)
8537 # WARNING: we release all node locks here, do not do other RPCs
8538 # than WaitForSync to the primary node
8539 self._ReleaseNodeLock([self.instance.primary_node,
8544 # This can fail as the old devices are degraded and _WaitForSync
8545 # does a combined result over all disks, so we don't check its return value
8546 self.lu.LogStep(cstep, steps_total, "Sync devices")
8548 _WaitForSync(self.lu, self.instance)
8550 # Check all devices manually
8551 self._CheckDevices(self.instance.primary_node, iv_names)
8553 # Step: remove old storage
8554 if not self.early_release:
8555 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8556 self._RemoveOldStorage(self.target_node, iv_names)
8559 class LURepairNodeStorage(NoHooksLU):
8560 """Repairs the volume group on a node.
8565 def CheckArguments(self):
8566 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8568 storage_type = self.op.storage_type
8570 if (constants.SO_FIX_CONSISTENCY not in
8571 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8572 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8573 " repaired" % storage_type,
8576 def ExpandNames(self):
8577 self.needed_locks = {
8578 locking.LEVEL_NODE: [self.op.node_name],
8581 def _CheckFaultyDisks(self, instance, node_name):
8582 """Ensure faulty disks abort the opcode or at least warn."""
8584 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8586 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8587 " node '%s'" % (instance.name, node_name),
8589 except errors.OpPrereqError, err:
8590 if self.op.ignore_consistency:
8591 self.proc.LogWarning(str(err.args[0]))
8595 def CheckPrereq(self):
8596 """Check prerequisites.
8599 # Check whether any instance on this node has faulty disks
8600 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8601 if not inst.admin_up:
8603 check_nodes = set(inst.all_nodes)
8604 check_nodes.discard(self.op.node_name)
8605 for inst_node_name in check_nodes:
8606 self._CheckFaultyDisks(inst, inst_node_name)
8608 def Exec(self, feedback_fn):
8609 feedback_fn("Repairing storage unit '%s' on %s ..." %
8610 (self.op.name, self.op.node_name))
8612 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8613 result = self.rpc.call_storage_execute(self.op.node_name,
8614 self.op.storage_type, st_args,
8616 constants.SO_FIX_CONSISTENCY)
8617 result.Raise("Failed to repair storage unit '%s' on %s" %
8618 (self.op.name, self.op.node_name))
8621 class LUNodeEvacStrategy(NoHooksLU):
8622 """Computes the node evacuation strategy.
8627 def CheckArguments(self):
8628 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8630 def ExpandNames(self):
8631 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8632 self.needed_locks = locks = {}
8633 if self.op.remote_node is None:
8634 locks[locking.LEVEL_NODE] = locking.ALL_SET
8636 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8637 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8639 def Exec(self, feedback_fn):
8640 if self.op.remote_node is not None:
8642 for node in self.op.nodes:
8643 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8646 if i.primary_node == self.op.remote_node:
8647 raise errors.OpPrereqError("Node %s is the primary node of"
8648 " instance %s, cannot use it as"
8650 (self.op.remote_node, i.name),
8652 result.append([i.name, self.op.remote_node])
8654 ial = IAllocator(self.cfg, self.rpc,
8655 mode=constants.IALLOCATOR_MODE_MEVAC,
8656 evac_nodes=self.op.nodes)
8657 ial.Run(self.op.iallocator, validate=True)
8659 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8665 class LUInstanceGrowDisk(LogicalUnit):
8666 """Grow a disk of an instance.
8670 HTYPE = constants.HTYPE_INSTANCE
8673 def ExpandNames(self):
8674 self._ExpandAndLockInstance()
8675 self.needed_locks[locking.LEVEL_NODE] = []
8676 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8678 def DeclareLocks(self, level):
8679 if level == locking.LEVEL_NODE:
8680 self._LockInstancesNodes()
8682 def BuildHooksEnv(self):
8685 This runs on the master, the primary and all the secondaries.
8689 "DISK": self.op.disk,
8690 "AMOUNT": self.op.amount,
8692 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8693 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8696 def CheckPrereq(self):
8697 """Check prerequisites.
8699 This checks that the instance is in the cluster.
8702 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8703 assert instance is not None, \
8704 "Cannot retrieve locked instance %s" % self.op.instance_name
8705 nodenames = list(instance.all_nodes)
8706 for node in nodenames:
8707 _CheckNodeOnline(self, node)
8709 self.instance = instance
8711 if instance.disk_template not in constants.DTS_GROWABLE:
8712 raise errors.OpPrereqError("Instance's disk layout does not support"
8713 " growing.", errors.ECODE_INVAL)
8715 self.disk = instance.FindDisk(self.op.disk)
8717 if instance.disk_template != constants.DT_FILE:
8718 # TODO: check the free disk space for file, when that feature
8720 _CheckNodesFreeDiskPerVG(self, nodenames,
8721 self.disk.ComputeGrowth(self.op.amount))
8723 def Exec(self, feedback_fn):
8724 """Execute disk grow.
8727 instance = self.instance
8730 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8732 raise errors.OpExecError("Cannot activate block device to grow")
8734 for node in instance.all_nodes:
8735 self.cfg.SetDiskID(disk, node)
8736 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8737 result.Raise("Grow request failed to node %s" % node)
8739 # TODO: Rewrite code to work properly
8740 # DRBD goes into sync mode for a short amount of time after executing the
8741 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8742 # calling "resize" in sync mode fails. Sleeping for a short amount of
8743 # time is a work-around.
8746 disk.RecordGrow(self.op.amount)
8747 self.cfg.Update(instance, feedback_fn)
8748 if self.op.wait_for_sync:
8749 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8751 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8752 " status.\nPlease check the instance.")
8753 if not instance.admin_up:
8754 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8755 elif not instance.admin_up:
8756 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8757 " not supposed to be running because no wait for"
8758 " sync mode was requested.")
8761 class LUInstanceQueryData(NoHooksLU):
8762 """Query runtime instance data.
8767 def ExpandNames(self):
8768 self.needed_locks = {}
8769 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8771 if self.op.instances:
8772 self.wanted_names = []
8773 for name in self.op.instances:
8774 full_name = _ExpandInstanceName(self.cfg, name)
8775 self.wanted_names.append(full_name)
8776 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8778 self.wanted_names = None
8779 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8781 self.needed_locks[locking.LEVEL_NODE] = []
8782 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8784 def DeclareLocks(self, level):
8785 if level == locking.LEVEL_NODE:
8786 self._LockInstancesNodes()
8788 def CheckPrereq(self):
8789 """Check prerequisites.
8791 This only checks the optional instance list against the existing names.
8794 if self.wanted_names is None:
8795 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8797 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8798 in self.wanted_names]
8800 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8801 """Returns the status of a block device
8804 if self.op.static or not node:
8807 self.cfg.SetDiskID(dev, node)
8809 result = self.rpc.call_blockdev_find(node, dev)
8813 result.Raise("Can't compute disk status for %s" % instance_name)
8815 status = result.payload
8819 return (status.dev_path, status.major, status.minor,
8820 status.sync_percent, status.estimated_time,
8821 status.is_degraded, status.ldisk_status)
8823 def _ComputeDiskStatus(self, instance, snode, dev):
8824 """Compute block device status.
8827 if dev.dev_type in constants.LDS_DRBD:
8828 # we change the snode then (otherwise we use the one passed in)
8829 if dev.logical_id[0] == instance.primary_node:
8830 snode = dev.logical_id[1]
8832 snode = dev.logical_id[0]
8834 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8836 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8839 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8840 for child in dev.children]
8845 "iv_name": dev.iv_name,
8846 "dev_type": dev.dev_type,
8847 "logical_id": dev.logical_id,
8848 "physical_id": dev.physical_id,
8849 "pstatus": dev_pstatus,
8850 "sstatus": dev_sstatus,
8851 "children": dev_children,
8858 def Exec(self, feedback_fn):
8859 """Gather and return data"""
8862 cluster = self.cfg.GetClusterInfo()
8864 for instance in self.wanted_instances:
8865 if not self.op.static:
8866 remote_info = self.rpc.call_instance_info(instance.primary_node,
8868 instance.hypervisor)
8869 remote_info.Raise("Error checking node %s" % instance.primary_node)
8870 remote_info = remote_info.payload
8871 if remote_info and "state" in remote_info:
8874 remote_state = "down"
8877 if instance.admin_up:
8880 config_state = "down"
8882 disks = [self._ComputeDiskStatus(instance, None, device)
8883 for device in instance.disks]
8886 "name": instance.name,
8887 "config_state": config_state,
8888 "run_state": remote_state,
8889 "pnode": instance.primary_node,
8890 "snodes": instance.secondary_nodes,
8892 # this happens to be the same format used for hooks
8893 "nics": _NICListToTuple(self, instance.nics),
8894 "disk_template": instance.disk_template,
8896 "hypervisor": instance.hypervisor,
8897 "network_port": instance.network_port,
8898 "hv_instance": instance.hvparams,
8899 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8900 "be_instance": instance.beparams,
8901 "be_actual": cluster.FillBE(instance),
8902 "os_instance": instance.osparams,
8903 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8904 "serial_no": instance.serial_no,
8905 "mtime": instance.mtime,
8906 "ctime": instance.ctime,
8907 "uuid": instance.uuid,
8910 result[instance.name] = idict
8915 class LUInstanceSetParams(LogicalUnit):
8916 """Modifies an instances's parameters.
8919 HPATH = "instance-modify"
8920 HTYPE = constants.HTYPE_INSTANCE
8923 def CheckArguments(self):
8924 if not (self.op.nics or self.op.disks or self.op.disk_template or
8925 self.op.hvparams or self.op.beparams or self.op.os_name):
8926 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8928 if self.op.hvparams:
8929 _CheckGlobalHvParams(self.op.hvparams)
8933 for disk_op, disk_dict in self.op.disks:
8934 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8935 if disk_op == constants.DDM_REMOVE:
8938 elif disk_op == constants.DDM_ADD:
8941 if not isinstance(disk_op, int):
8942 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8943 if not isinstance(disk_dict, dict):
8944 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8945 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8947 if disk_op == constants.DDM_ADD:
8948 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8949 if mode not in constants.DISK_ACCESS_SET:
8950 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8952 size = disk_dict.get('size', None)
8954 raise errors.OpPrereqError("Required disk parameter size missing",
8958 except (TypeError, ValueError), err:
8959 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8960 str(err), errors.ECODE_INVAL)
8961 disk_dict['size'] = size
8963 # modification of disk
8964 if 'size' in disk_dict:
8965 raise errors.OpPrereqError("Disk size change not possible, use"
8966 " grow-disk", errors.ECODE_INVAL)
8968 if disk_addremove > 1:
8969 raise errors.OpPrereqError("Only one disk add or remove operation"
8970 " supported at a time", errors.ECODE_INVAL)
8972 if self.op.disks and self.op.disk_template is not None:
8973 raise errors.OpPrereqError("Disk template conversion and other disk"
8974 " changes not supported at the same time",
8977 if (self.op.disk_template and
8978 self.op.disk_template in constants.DTS_NET_MIRROR and
8979 self.op.remote_node is None):
8980 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8981 " one requires specifying a secondary node",
8986 for nic_op, nic_dict in self.op.nics:
8987 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8988 if nic_op == constants.DDM_REMOVE:
8991 elif nic_op == constants.DDM_ADD:
8994 if not isinstance(nic_op, int):
8995 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8996 if not isinstance(nic_dict, dict):
8997 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8998 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9000 # nic_dict should be a dict
9001 nic_ip = nic_dict.get('ip', None)
9002 if nic_ip is not None:
9003 if nic_ip.lower() == constants.VALUE_NONE:
9004 nic_dict['ip'] = None
9006 if not netutils.IPAddress.IsValid(nic_ip):
9007 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9010 nic_bridge = nic_dict.get('bridge', None)
9011 nic_link = nic_dict.get('link', None)
9012 if nic_bridge and nic_link:
9013 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9014 " at the same time", errors.ECODE_INVAL)
9015 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9016 nic_dict['bridge'] = None
9017 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9018 nic_dict['link'] = None
9020 if nic_op == constants.DDM_ADD:
9021 nic_mac = nic_dict.get('mac', None)
9023 nic_dict['mac'] = constants.VALUE_AUTO
9025 if 'mac' in nic_dict:
9026 nic_mac = nic_dict['mac']
9027 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9028 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9030 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9031 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9032 " modifying an existing nic",
9035 if nic_addremove > 1:
9036 raise errors.OpPrereqError("Only one NIC add or remove operation"
9037 " supported at a time", errors.ECODE_INVAL)
9039 def ExpandNames(self):
9040 self._ExpandAndLockInstance()
9041 self.needed_locks[locking.LEVEL_NODE] = []
9042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9044 def DeclareLocks(self, level):
9045 if level == locking.LEVEL_NODE:
9046 self._LockInstancesNodes()
9047 if self.op.disk_template and self.op.remote_node:
9048 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9049 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9051 def BuildHooksEnv(self):
9054 This runs on the master, primary and secondaries.
9058 if constants.BE_MEMORY in self.be_new:
9059 args['memory'] = self.be_new[constants.BE_MEMORY]
9060 if constants.BE_VCPUS in self.be_new:
9061 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9062 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9063 # information at all.
9066 nic_override = dict(self.op.nics)
9067 for idx, nic in enumerate(self.instance.nics):
9068 if idx in nic_override:
9069 this_nic_override = nic_override[idx]
9071 this_nic_override = {}
9072 if 'ip' in this_nic_override:
9073 ip = this_nic_override['ip']
9076 if 'mac' in this_nic_override:
9077 mac = this_nic_override['mac']
9080 if idx in self.nic_pnew:
9081 nicparams = self.nic_pnew[idx]
9083 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9084 mode = nicparams[constants.NIC_MODE]
9085 link = nicparams[constants.NIC_LINK]
9086 args['nics'].append((ip, mac, mode, link))
9087 if constants.DDM_ADD in nic_override:
9088 ip = nic_override[constants.DDM_ADD].get('ip', None)
9089 mac = nic_override[constants.DDM_ADD]['mac']
9090 nicparams = self.nic_pnew[constants.DDM_ADD]
9091 mode = nicparams[constants.NIC_MODE]
9092 link = nicparams[constants.NIC_LINK]
9093 args['nics'].append((ip, mac, mode, link))
9094 elif constants.DDM_REMOVE in nic_override:
9095 del args['nics'][-1]
9097 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9098 if self.op.disk_template:
9099 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9100 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9103 def CheckPrereq(self):
9104 """Check prerequisites.
9106 This only checks the instance list against the existing names.
9109 # checking the new params on the primary/secondary nodes
9111 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9112 cluster = self.cluster = self.cfg.GetClusterInfo()
9113 assert self.instance is not None, \
9114 "Cannot retrieve locked instance %s" % self.op.instance_name
9115 pnode = instance.primary_node
9116 nodelist = list(instance.all_nodes)
9119 if self.op.os_name and not self.op.force:
9120 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9121 self.op.force_variant)
9122 instance_os = self.op.os_name
9124 instance_os = instance.os
9126 if self.op.disk_template:
9127 if instance.disk_template == self.op.disk_template:
9128 raise errors.OpPrereqError("Instance already has disk template %s" %
9129 instance.disk_template, errors.ECODE_INVAL)
9131 if (instance.disk_template,
9132 self.op.disk_template) not in self._DISK_CONVERSIONS:
9133 raise errors.OpPrereqError("Unsupported disk template conversion from"
9134 " %s to %s" % (instance.disk_template,
9135 self.op.disk_template),
9137 _CheckInstanceDown(self, instance, "cannot change disk template")
9138 if self.op.disk_template in constants.DTS_NET_MIRROR:
9139 if self.op.remote_node == pnode:
9140 raise errors.OpPrereqError("Given new secondary node %s is the same"
9141 " as the primary node of the instance" %
9142 self.op.remote_node, errors.ECODE_STATE)
9143 _CheckNodeOnline(self, self.op.remote_node)
9144 _CheckNodeNotDrained(self, self.op.remote_node)
9145 # FIXME: here we assume that the old instance type is DT_PLAIN
9146 assert instance.disk_template == constants.DT_PLAIN
9147 disks = [{"size": d.size, "vg": d.logical_id[0]}
9148 for d in instance.disks]
9149 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9150 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9152 # hvparams processing
9153 if self.op.hvparams:
9154 hv_type = instance.hypervisor
9155 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9156 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9157 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9160 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9161 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9162 self.hv_new = hv_new # the new actual values
9163 self.hv_inst = i_hvdict # the new dict (without defaults)
9165 self.hv_new = self.hv_inst = {}
9167 # beparams processing
9168 if self.op.beparams:
9169 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9171 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9172 be_new = cluster.SimpleFillBE(i_bedict)
9173 self.be_new = be_new # the new actual values
9174 self.be_inst = i_bedict # the new dict (without defaults)
9176 self.be_new = self.be_inst = {}
9178 # osparams processing
9179 if self.op.osparams:
9180 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9181 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9182 self.os_inst = i_osdict # the new dict (without defaults)
9188 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9189 mem_check_list = [pnode]
9190 if be_new[constants.BE_AUTO_BALANCE]:
9191 # either we changed auto_balance to yes or it was from before
9192 mem_check_list.extend(instance.secondary_nodes)
9193 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9194 instance.hypervisor)
9195 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9196 instance.hypervisor)
9197 pninfo = nodeinfo[pnode]
9198 msg = pninfo.fail_msg
9200 # Assume the primary node is unreachable and go ahead
9201 self.warn.append("Can't get info from primary node %s: %s" %
9203 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9204 self.warn.append("Node data from primary node %s doesn't contain"
9205 " free memory information" % pnode)
9206 elif instance_info.fail_msg:
9207 self.warn.append("Can't get instance runtime information: %s" %
9208 instance_info.fail_msg)
9210 if instance_info.payload:
9211 current_mem = int(instance_info.payload['memory'])
9213 # Assume instance not running
9214 # (there is a slight race condition here, but it's not very probable,
9215 # and we have no other way to check)
9217 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9218 pninfo.payload['memory_free'])
9220 raise errors.OpPrereqError("This change will prevent the instance"
9221 " from starting, due to %d MB of memory"
9222 " missing on its primary node" % miss_mem,
9225 if be_new[constants.BE_AUTO_BALANCE]:
9226 for node, nres in nodeinfo.items():
9227 if node not in instance.secondary_nodes:
9231 self.warn.append("Can't get info from secondary node %s: %s" %
9233 elif not isinstance(nres.payload.get('memory_free', None), int):
9234 self.warn.append("Secondary node %s didn't return free"
9235 " memory information" % node)
9236 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9237 self.warn.append("Not enough memory to failover instance to"
9238 " secondary node %s" % node)
9243 for nic_op, nic_dict in self.op.nics:
9244 if nic_op == constants.DDM_REMOVE:
9245 if not instance.nics:
9246 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9249 if nic_op != constants.DDM_ADD:
9251 if not instance.nics:
9252 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9253 " no NICs" % nic_op,
9255 if nic_op < 0 or nic_op >= len(instance.nics):
9256 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9258 (nic_op, len(instance.nics) - 1),
9260 old_nic_params = instance.nics[nic_op].nicparams
9261 old_nic_ip = instance.nics[nic_op].ip
9266 update_params_dict = dict([(key, nic_dict[key])
9267 for key in constants.NICS_PARAMETERS
9268 if key in nic_dict])
9270 if 'bridge' in nic_dict:
9271 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9273 new_nic_params = _GetUpdatedParams(old_nic_params,
9275 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9276 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9277 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9278 self.nic_pinst[nic_op] = new_nic_params
9279 self.nic_pnew[nic_op] = new_filled_nic_params
9280 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9282 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9283 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9284 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9286 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9288 self.warn.append(msg)
9290 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9291 if new_nic_mode == constants.NIC_MODE_ROUTED:
9292 if 'ip' in nic_dict:
9293 nic_ip = nic_dict['ip']
9297 raise errors.OpPrereqError('Cannot set the nic ip to None'
9298 ' on a routed nic', errors.ECODE_INVAL)
9299 if 'mac' in nic_dict:
9300 nic_mac = nic_dict['mac']
9302 raise errors.OpPrereqError('Cannot set the nic mac to None',
9304 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9305 # otherwise generate the mac
9306 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9308 # or validate/reserve the current one
9310 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9311 except errors.ReservationError:
9312 raise errors.OpPrereqError("MAC address %s already in use"
9313 " in cluster" % nic_mac,
9314 errors.ECODE_NOTUNIQUE)
9317 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9318 raise errors.OpPrereqError("Disk operations not supported for"
9319 " diskless instances",
9321 for disk_op, _ in self.op.disks:
9322 if disk_op == constants.DDM_REMOVE:
9323 if len(instance.disks) == 1:
9324 raise errors.OpPrereqError("Cannot remove the last disk of"
9325 " an instance", errors.ECODE_INVAL)
9326 _CheckInstanceDown(self, instance, "cannot remove disks")
9328 if (disk_op == constants.DDM_ADD and
9329 len(instance.disks) >= constants.MAX_DISKS):
9330 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9331 " add more" % constants.MAX_DISKS,
9333 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9335 if disk_op < 0 or disk_op >= len(instance.disks):
9336 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9338 (disk_op, len(instance.disks)),
9343 def _ConvertPlainToDrbd(self, feedback_fn):
9344 """Converts an instance from plain to drbd.
9347 feedback_fn("Converting template to drbd")
9348 instance = self.instance
9349 pnode = instance.primary_node
9350 snode = self.op.remote_node
9352 # create a fake disk info for _GenerateDiskTemplate
9353 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9354 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9355 instance.name, pnode, [snode],
9356 disk_info, None, None, 0, feedback_fn)
9357 info = _GetInstanceInfoText(instance)
9358 feedback_fn("Creating aditional volumes...")
9359 # first, create the missing data and meta devices
9360 for disk in new_disks:
9361 # unfortunately this is... not too nice
9362 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9364 for child in disk.children:
9365 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9366 # at this stage, all new LVs have been created, we can rename the
9368 feedback_fn("Renaming original volumes...")
9369 rename_list = [(o, n.children[0].logical_id)
9370 for (o, n) in zip(instance.disks, new_disks)]
9371 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9372 result.Raise("Failed to rename original LVs")
9374 feedback_fn("Initializing DRBD devices...")
9375 # all child devices are in place, we can now create the DRBD devices
9376 for disk in new_disks:
9377 for node in [pnode, snode]:
9378 f_create = node == pnode
9379 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9381 # at this point, the instance has been modified
9382 instance.disk_template = constants.DT_DRBD8
9383 instance.disks = new_disks
9384 self.cfg.Update(instance, feedback_fn)
9386 # disks are created, waiting for sync
9387 disk_abort = not _WaitForSync(self, instance)
9389 raise errors.OpExecError("There are some degraded disks for"
9390 " this instance, please cleanup manually")
9392 def _ConvertDrbdToPlain(self, feedback_fn):
9393 """Converts an instance from drbd to plain.
9396 instance = self.instance
9397 assert len(instance.secondary_nodes) == 1
9398 pnode = instance.primary_node
9399 snode = instance.secondary_nodes[0]
9400 feedback_fn("Converting template to plain")
9402 old_disks = instance.disks
9403 new_disks = [d.children[0] for d in old_disks]
9405 # copy over size and mode
9406 for parent, child in zip(old_disks, new_disks):
9407 child.size = parent.size
9408 child.mode = parent.mode
9410 # update instance structure
9411 instance.disks = new_disks
9412 instance.disk_template = constants.DT_PLAIN
9413 self.cfg.Update(instance, feedback_fn)
9415 feedback_fn("Removing volumes on the secondary node...")
9416 for disk in old_disks:
9417 self.cfg.SetDiskID(disk, snode)
9418 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9420 self.LogWarning("Could not remove block device %s on node %s,"
9421 " continuing anyway: %s", disk.iv_name, snode, msg)
9423 feedback_fn("Removing unneeded volumes on the primary node...")
9424 for idx, disk in enumerate(old_disks):
9425 meta = disk.children[1]
9426 self.cfg.SetDiskID(meta, pnode)
9427 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9429 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9430 " continuing anyway: %s", idx, pnode, msg)
9432 def Exec(self, feedback_fn):
9433 """Modifies an instance.
9435 All parameters take effect only at the next restart of the instance.
9438 # Process here the warnings from CheckPrereq, as we don't have a
9439 # feedback_fn there.
9440 for warn in self.warn:
9441 feedback_fn("WARNING: %s" % warn)
9444 instance = self.instance
9446 for disk_op, disk_dict in self.op.disks:
9447 if disk_op == constants.DDM_REMOVE:
9448 # remove the last disk
9449 device = instance.disks.pop()
9450 device_idx = len(instance.disks)
9451 for node, disk in device.ComputeNodeTree(instance.primary_node):
9452 self.cfg.SetDiskID(disk, node)
9453 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9455 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9456 " continuing anyway", device_idx, node, msg)
9457 result.append(("disk/%d" % device_idx, "remove"))
9458 elif disk_op == constants.DDM_ADD:
9460 if instance.disk_template == constants.DT_FILE:
9461 file_driver, file_path = instance.disks[0].logical_id
9462 file_path = os.path.dirname(file_path)
9464 file_driver = file_path = None
9465 disk_idx_base = len(instance.disks)
9466 new_disk = _GenerateDiskTemplate(self,
9467 instance.disk_template,
9468 instance.name, instance.primary_node,
9469 instance.secondary_nodes,
9473 disk_idx_base, feedback_fn)[0]
9474 instance.disks.append(new_disk)
9475 info = _GetInstanceInfoText(instance)
9477 logging.info("Creating volume %s for instance %s",
9478 new_disk.iv_name, instance.name)
9479 # Note: this needs to be kept in sync with _CreateDisks
9481 for node in instance.all_nodes:
9482 f_create = node == instance.primary_node
9484 _CreateBlockDev(self, node, instance, new_disk,
9485 f_create, info, f_create)
9486 except errors.OpExecError, err:
9487 self.LogWarning("Failed to create volume %s (%s) on"
9489 new_disk.iv_name, new_disk, node, err)
9490 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9491 (new_disk.size, new_disk.mode)))
9493 # change a given disk
9494 instance.disks[disk_op].mode = disk_dict['mode']
9495 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9497 if self.op.disk_template:
9498 r_shut = _ShutdownInstanceDisks(self, instance)
9500 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9501 " proceed with disk template conversion")
9502 mode = (instance.disk_template, self.op.disk_template)
9504 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9506 self.cfg.ReleaseDRBDMinors(instance.name)
9508 result.append(("disk_template", self.op.disk_template))
9511 for nic_op, nic_dict in self.op.nics:
9512 if nic_op == constants.DDM_REMOVE:
9513 # remove the last nic
9514 del instance.nics[-1]
9515 result.append(("nic.%d" % len(instance.nics), "remove"))
9516 elif nic_op == constants.DDM_ADD:
9517 # mac and bridge should be set, by now
9518 mac = nic_dict['mac']
9519 ip = nic_dict.get('ip', None)
9520 nicparams = self.nic_pinst[constants.DDM_ADD]
9521 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9522 instance.nics.append(new_nic)
9523 result.append(("nic.%d" % (len(instance.nics) - 1),
9524 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9525 (new_nic.mac, new_nic.ip,
9526 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9527 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9530 for key in 'mac', 'ip':
9532 setattr(instance.nics[nic_op], key, nic_dict[key])
9533 if nic_op in self.nic_pinst:
9534 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9535 for key, val in nic_dict.iteritems():
9536 result.append(("nic.%s/%d" % (key, nic_op), val))
9539 if self.op.hvparams:
9540 instance.hvparams = self.hv_inst
9541 for key, val in self.op.hvparams.iteritems():
9542 result.append(("hv/%s" % key, val))
9545 if self.op.beparams:
9546 instance.beparams = self.be_inst
9547 for key, val in self.op.beparams.iteritems():
9548 result.append(("be/%s" % key, val))
9552 instance.os = self.op.os_name
9555 if self.op.osparams:
9556 instance.osparams = self.os_inst
9557 for key, val in self.op.osparams.iteritems():
9558 result.append(("os/%s" % key, val))
9560 self.cfg.Update(instance, feedback_fn)
9564 _DISK_CONVERSIONS = {
9565 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9566 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9570 class LUBackupQuery(NoHooksLU):
9571 """Query the exports list
9576 def ExpandNames(self):
9577 self.needed_locks = {}
9578 self.share_locks[locking.LEVEL_NODE] = 1
9579 if not self.op.nodes:
9580 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9582 self.needed_locks[locking.LEVEL_NODE] = \
9583 _GetWantedNodes(self, self.op.nodes)
9585 def Exec(self, feedback_fn):
9586 """Compute the list of all the exported system images.
9589 @return: a dictionary with the structure node->(export-list)
9590 where export-list is a list of the instances exported on
9594 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9595 rpcresult = self.rpc.call_export_list(self.nodes)
9597 for node in rpcresult:
9598 if rpcresult[node].fail_msg:
9599 result[node] = False
9601 result[node] = rpcresult[node].payload
9606 class LUBackupPrepare(NoHooksLU):
9607 """Prepares an instance for an export and returns useful information.
9612 def ExpandNames(self):
9613 self._ExpandAndLockInstance()
9615 def CheckPrereq(self):
9616 """Check prerequisites.
9619 instance_name = self.op.instance_name
9621 self.instance = self.cfg.GetInstanceInfo(instance_name)
9622 assert self.instance is not None, \
9623 "Cannot retrieve locked instance %s" % self.op.instance_name
9624 _CheckNodeOnline(self, self.instance.primary_node)
9626 self._cds = _GetClusterDomainSecret()
9628 def Exec(self, feedback_fn):
9629 """Prepares an instance for an export.
9632 instance = self.instance
9634 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9635 salt = utils.GenerateSecret(8)
9637 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9638 result = self.rpc.call_x509_cert_create(instance.primary_node,
9639 constants.RIE_CERT_VALIDITY)
9640 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9642 (name, cert_pem) = result.payload
9644 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9648 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9649 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9651 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9657 class LUBackupExport(LogicalUnit):
9658 """Export an instance to an image in the cluster.
9661 HPATH = "instance-export"
9662 HTYPE = constants.HTYPE_INSTANCE
9665 def CheckArguments(self):
9666 """Check the arguments.
9669 self.x509_key_name = self.op.x509_key_name
9670 self.dest_x509_ca_pem = self.op.destination_x509_ca
9672 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9673 if not self.x509_key_name:
9674 raise errors.OpPrereqError("Missing X509 key name for encryption",
9677 if not self.dest_x509_ca_pem:
9678 raise errors.OpPrereqError("Missing destination X509 CA",
9681 def ExpandNames(self):
9682 self._ExpandAndLockInstance()
9684 # Lock all nodes for local exports
9685 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9686 # FIXME: lock only instance primary and destination node
9688 # Sad but true, for now we have do lock all nodes, as we don't know where
9689 # the previous export might be, and in this LU we search for it and
9690 # remove it from its current node. In the future we could fix this by:
9691 # - making a tasklet to search (share-lock all), then create the
9692 # new one, then one to remove, after
9693 # - removing the removal operation altogether
9694 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9696 def DeclareLocks(self, level):
9697 """Last minute lock declaration."""
9698 # All nodes are locked anyway, so nothing to do here.
9700 def BuildHooksEnv(self):
9703 This will run on the master, primary node and target node.
9707 "EXPORT_MODE": self.op.mode,
9708 "EXPORT_NODE": self.op.target_node,
9709 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9710 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9711 # TODO: Generic function for boolean env variables
9712 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9715 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9717 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9719 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9720 nl.append(self.op.target_node)
9724 def CheckPrereq(self):
9725 """Check prerequisites.
9727 This checks that the instance and node names are valid.
9730 instance_name = self.op.instance_name
9732 self.instance = self.cfg.GetInstanceInfo(instance_name)
9733 assert self.instance is not None, \
9734 "Cannot retrieve locked instance %s" % self.op.instance_name
9735 _CheckNodeOnline(self, self.instance.primary_node)
9737 if (self.op.remove_instance and self.instance.admin_up and
9738 not self.op.shutdown):
9739 raise errors.OpPrereqError("Can not remove instance without shutting it"
9742 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9743 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9744 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9745 assert self.dst_node is not None
9747 _CheckNodeOnline(self, self.dst_node.name)
9748 _CheckNodeNotDrained(self, self.dst_node.name)
9751 self.dest_disk_info = None
9752 self.dest_x509_ca = None
9754 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9755 self.dst_node = None
9757 if len(self.op.target_node) != len(self.instance.disks):
9758 raise errors.OpPrereqError(("Received destination information for %s"
9759 " disks, but instance %s has %s disks") %
9760 (len(self.op.target_node), instance_name,
9761 len(self.instance.disks)),
9764 cds = _GetClusterDomainSecret()
9766 # Check X509 key name
9768 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9769 except (TypeError, ValueError), err:
9770 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9772 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9773 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9776 # Load and verify CA
9778 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9779 except OpenSSL.crypto.Error, err:
9780 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9781 (err, ), errors.ECODE_INVAL)
9783 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9784 if errcode is not None:
9785 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9786 (msg, ), errors.ECODE_INVAL)
9788 self.dest_x509_ca = cert
9790 # Verify target information
9792 for idx, disk_data in enumerate(self.op.target_node):
9794 (host, port, magic) = \
9795 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9796 except errors.GenericError, err:
9797 raise errors.OpPrereqError("Target info for disk %s: %s" %
9798 (idx, err), errors.ECODE_INVAL)
9800 disk_info.append((host, port, magic))
9802 assert len(disk_info) == len(self.op.target_node)
9803 self.dest_disk_info = disk_info
9806 raise errors.ProgrammerError("Unhandled export mode %r" %
9809 # instance disk type verification
9810 # TODO: Implement export support for file-based disks
9811 for disk in self.instance.disks:
9812 if disk.dev_type == constants.LD_FILE:
9813 raise errors.OpPrereqError("Export not supported for instances with"
9814 " file-based disks", errors.ECODE_INVAL)
9816 def _CleanupExports(self, feedback_fn):
9817 """Removes exports of current instance from all other nodes.
9819 If an instance in a cluster with nodes A..D was exported to node C, its
9820 exports will be removed from the nodes A, B and D.
9823 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9825 nodelist = self.cfg.GetNodeList()
9826 nodelist.remove(self.dst_node.name)
9828 # on one-node clusters nodelist will be empty after the removal
9829 # if we proceed the backup would be removed because OpBackupQuery
9830 # substitutes an empty list with the full cluster node list.
9831 iname = self.instance.name
9833 feedback_fn("Removing old exports for instance %s" % iname)
9834 exportlist = self.rpc.call_export_list(nodelist)
9835 for node in exportlist:
9836 if exportlist[node].fail_msg:
9838 if iname in exportlist[node].payload:
9839 msg = self.rpc.call_export_remove(node, iname).fail_msg
9841 self.LogWarning("Could not remove older export for instance %s"
9842 " on node %s: %s", iname, node, msg)
9844 def Exec(self, feedback_fn):
9845 """Export an instance to an image in the cluster.
9848 assert self.op.mode in constants.EXPORT_MODES
9850 instance = self.instance
9851 src_node = instance.primary_node
9853 if self.op.shutdown:
9854 # shutdown the instance, but not the disks
9855 feedback_fn("Shutting down instance %s" % instance.name)
9856 result = self.rpc.call_instance_shutdown(src_node, instance,
9857 self.op.shutdown_timeout)
9858 # TODO: Maybe ignore failures if ignore_remove_failures is set
9859 result.Raise("Could not shutdown instance %s on"
9860 " node %s" % (instance.name, src_node))
9862 # set the disks ID correctly since call_instance_start needs the
9863 # correct drbd minor to create the symlinks
9864 for disk in instance.disks:
9865 self.cfg.SetDiskID(disk, src_node)
9867 activate_disks = (not instance.admin_up)
9870 # Activate the instance disks if we'exporting a stopped instance
9871 feedback_fn("Activating disks for %s" % instance.name)
9872 _StartInstanceDisks(self, instance, None)
9875 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9878 helper.CreateSnapshots()
9880 if (self.op.shutdown and instance.admin_up and
9881 not self.op.remove_instance):
9882 assert not activate_disks
9883 feedback_fn("Starting instance %s" % instance.name)
9884 result = self.rpc.call_instance_start(src_node, instance, None, None)
9885 msg = result.fail_msg
9887 feedback_fn("Failed to start instance: %s" % msg)
9888 _ShutdownInstanceDisks(self, instance)
9889 raise errors.OpExecError("Could not start instance: %s" % msg)
9891 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9892 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9893 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9894 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9895 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9897 (key_name, _, _) = self.x509_key_name
9900 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9903 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9904 key_name, dest_ca_pem,
9909 # Check for backwards compatibility
9910 assert len(dresults) == len(instance.disks)
9911 assert compat.all(isinstance(i, bool) for i in dresults), \
9912 "Not all results are boolean: %r" % dresults
9916 feedback_fn("Deactivating disks for %s" % instance.name)
9917 _ShutdownInstanceDisks(self, instance)
9919 if not (compat.all(dresults) and fin_resu):
9922 failures.append("export finalization")
9923 if not compat.all(dresults):
9924 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9926 failures.append("disk export: disk(s) %s" % fdsk)
9928 raise errors.OpExecError("Export failed, errors in %s" %
9929 utils.CommaJoin(failures))
9931 # At this point, the export was successful, we can cleanup/finish
9933 # Remove instance if requested
9934 if self.op.remove_instance:
9935 feedback_fn("Removing instance %s" % instance.name)
9936 _RemoveInstance(self, feedback_fn, instance,
9937 self.op.ignore_remove_failures)
9939 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9940 self._CleanupExports(feedback_fn)
9942 return fin_resu, dresults
9945 class LUBackupRemove(NoHooksLU):
9946 """Remove exports related to the named instance.
9951 def ExpandNames(self):
9952 self.needed_locks = {}
9953 # We need all nodes to be locked in order for RemoveExport to work, but we
9954 # don't need to lock the instance itself, as nothing will happen to it (and
9955 # we can remove exports also for a removed instance)
9956 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9958 def Exec(self, feedback_fn):
9959 """Remove any export.
9962 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9963 # If the instance was not found we'll try with the name that was passed in.
9964 # This will only work if it was an FQDN, though.
9966 if not instance_name:
9968 instance_name = self.op.instance_name
9970 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9971 exportlist = self.rpc.call_export_list(locked_nodes)
9973 for node in exportlist:
9974 msg = exportlist[node].fail_msg
9976 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9978 if instance_name in exportlist[node].payload:
9980 result = self.rpc.call_export_remove(node, instance_name)
9981 msg = result.fail_msg
9983 logging.error("Could not remove export for instance %s"
9984 " on node %s: %s", instance_name, node, msg)
9986 if fqdn_warn and not found:
9987 feedback_fn("Export not found. If trying to remove an export belonging"
9988 " to a deleted instance please use its Fully Qualified"
9992 class LUGroupAdd(LogicalUnit):
9993 """Logical unit for creating node groups.
9997 HTYPE = constants.HTYPE_GROUP
10000 def ExpandNames(self):
10001 # We need the new group's UUID here so that we can create and acquire the
10002 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10003 # that it should not check whether the UUID exists in the configuration.
10004 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10005 self.needed_locks = {}
10006 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10008 def CheckPrereq(self):
10009 """Check prerequisites.
10011 This checks that the given group name is not an existing node group
10016 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10017 except errors.OpPrereqError:
10020 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10021 " node group (UUID: %s)" %
10022 (self.op.group_name, existing_uuid),
10023 errors.ECODE_EXISTS)
10025 if self.op.ndparams:
10026 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10028 def BuildHooksEnv(self):
10029 """Build hooks env.
10033 "GROUP_NAME": self.op.group_name,
10035 mn = self.cfg.GetMasterNode()
10036 return env, [mn], [mn]
10038 def Exec(self, feedback_fn):
10039 """Add the node group to the cluster.
10042 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10043 uuid=self.group_uuid,
10044 alloc_policy=self.op.alloc_policy,
10045 ndparams=self.op.ndparams)
10047 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10048 del self.remove_locks[locking.LEVEL_NODEGROUP]
10051 class LUGroupAssignNodes(NoHooksLU):
10052 """Logical unit for assigning nodes to groups.
10057 def ExpandNames(self):
10058 # These raise errors.OpPrereqError on their own:
10059 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10060 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10062 # We want to lock all the affected nodes and groups. We have readily
10063 # available the list of nodes, and the *destination* group. To gather the
10064 # list of "source" groups, we need to fetch node information.
10065 self.node_data = self.cfg.GetAllNodesInfo()
10066 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10067 affected_groups.add(self.group_uuid)
10069 self.needed_locks = {
10070 locking.LEVEL_NODEGROUP: list(affected_groups),
10071 locking.LEVEL_NODE: self.op.nodes,
10074 def CheckPrereq(self):
10075 """Check prerequisites.
10078 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10079 instance_data = self.cfg.GetAllInstancesInfo()
10081 if self.group is None:
10082 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10083 (self.op.group_name, self.group_uuid))
10085 (new_splits, previous_splits) = \
10086 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10087 for node in self.op.nodes],
10088 self.node_data, instance_data)
10091 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10093 if not self.op.force:
10094 raise errors.OpExecError("The following instances get split by this"
10095 " change and --force was not given: %s" %
10098 self.LogWarning("This operation will split the following instances: %s",
10101 if previous_splits:
10102 self.LogWarning("In addition, these already-split instances continue"
10103 " to be spit across groups: %s",
10104 utils.CommaJoin(utils.NiceSort(previous_splits)))
10106 def Exec(self, feedback_fn):
10107 """Assign nodes to a new group.
10110 for node in self.op.nodes:
10111 self.node_data[node].group = self.group_uuid
10113 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10116 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10117 """Check for split instances after a node assignment.
10119 This method considers a series of node assignments as an atomic operation,
10120 and returns information about split instances after applying the set of
10123 In particular, it returns information about newly split instances, and
10124 instances that were already split, and remain so after the change.
10126 Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10129 @type changes: list of (node_name, new_group_uuid) pairs.
10130 @param changes: list of node assignments to consider.
10131 @param node_data: a dict with data for all nodes
10132 @param instance_data: a dict with all instances to consider
10133 @rtype: a two-tuple
10134 @return: a list of instances that were previously okay and result split as a
10135 consequence of this change, and a list of instances that were previously
10136 split and this change does not fix.
10139 changed_nodes = dict((node, group) for node, group in changes
10140 if node_data[node].group != group)
10142 all_split_instances = set()
10143 previously_split_instances = set()
10145 def InstanceNodes(instance):
10146 return [instance.primary_node] + list(instance.secondary_nodes)
10148 for inst in instance_data.values():
10149 if inst.disk_template not in constants.DTS_NET_MIRROR:
10152 instance_nodes = InstanceNodes(inst)
10154 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10155 previously_split_instances.add(inst.name)
10157 if len(set(changed_nodes.get(node, node_data[node].group)
10158 for node in instance_nodes)) > 1:
10159 all_split_instances.add(inst.name)
10161 return (list(all_split_instances - previously_split_instances),
10162 list(previously_split_instances & all_split_instances))
10165 class _GroupQuery(_QueryBase):
10167 FIELDS = query.GROUP_FIELDS
10169 def ExpandNames(self, lu):
10170 lu.needed_locks = {}
10172 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10173 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10176 self.wanted = [name_to_uuid[name]
10177 for name in utils.NiceSort(name_to_uuid.keys())]
10179 # Accept names to be either names or UUIDs.
10182 all_uuid = frozenset(self._all_groups.keys())
10184 for name in self.names:
10185 if name in all_uuid:
10186 self.wanted.append(name)
10187 elif name in name_to_uuid:
10188 self.wanted.append(name_to_uuid[name])
10190 missing.append(name)
10193 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10194 errors.ECODE_NOENT)
10196 def DeclareLocks(self, lu, level):
10199 def _GetQueryData(self, lu):
10200 """Computes the list of node groups and their attributes.
10203 do_nodes = query.GQ_NODE in self.requested_data
10204 do_instances = query.GQ_INST in self.requested_data
10206 group_to_nodes = None
10207 group_to_instances = None
10209 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10210 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10211 # latter GetAllInstancesInfo() is not enough, for we have to go through
10212 # instance->node. Hence, we will need to process nodes even if we only need
10213 # instance information.
10214 if do_nodes or do_instances:
10215 all_nodes = lu.cfg.GetAllNodesInfo()
10216 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10219 for node in all_nodes.values():
10220 if node.group in group_to_nodes:
10221 group_to_nodes[node.group].append(node.name)
10222 node_to_group[node.name] = node.group
10225 all_instances = lu.cfg.GetAllInstancesInfo()
10226 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10228 for instance in all_instances.values():
10229 node = instance.primary_node
10230 if node in node_to_group:
10231 group_to_instances[node_to_group[node]].append(instance.name)
10234 # Do not pass on node information if it was not requested.
10235 group_to_nodes = None
10237 return query.GroupQueryData([self._all_groups[uuid]
10238 for uuid in self.wanted],
10239 group_to_nodes, group_to_instances)
10242 class LUGroupQuery(NoHooksLU):
10243 """Logical unit for querying node groups.
10248 def CheckArguments(self):
10249 self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10251 def ExpandNames(self):
10252 self.gq.ExpandNames(self)
10254 def Exec(self, feedback_fn):
10255 return self.gq.OldStyleQuery(self)
10258 class LUGroupSetParams(LogicalUnit):
10259 """Modifies the parameters of a node group.
10262 HPATH = "group-modify"
10263 HTYPE = constants.HTYPE_GROUP
10266 def CheckArguments(self):
10269 self.op.alloc_policy,
10272 if all_changes.count(None) == len(all_changes):
10273 raise errors.OpPrereqError("Please pass at least one modification",
10274 errors.ECODE_INVAL)
10276 def ExpandNames(self):
10277 # This raises errors.OpPrereqError on its own:
10278 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10280 self.needed_locks = {
10281 locking.LEVEL_NODEGROUP: [self.group_uuid],
10284 def CheckPrereq(self):
10285 """Check prerequisites.
10288 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10290 if self.group is None:
10291 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10292 (self.op.group_name, self.group_uuid))
10294 if self.op.ndparams:
10295 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10296 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10297 self.new_ndparams = new_ndparams
10299 def BuildHooksEnv(self):
10300 """Build hooks env.
10304 "GROUP_NAME": self.op.group_name,
10305 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10307 mn = self.cfg.GetMasterNode()
10308 return env, [mn], [mn]
10310 def Exec(self, feedback_fn):
10311 """Modifies the node group.
10316 if self.op.ndparams:
10317 self.group.ndparams = self.new_ndparams
10318 result.append(("ndparams", str(self.group.ndparams)))
10320 if self.op.alloc_policy:
10321 self.group.alloc_policy = self.op.alloc_policy
10323 self.cfg.Update(self.group, feedback_fn)
10328 class LUGroupRemove(LogicalUnit):
10329 HPATH = "group-remove"
10330 HTYPE = constants.HTYPE_GROUP
10333 def ExpandNames(self):
10334 # This will raises errors.OpPrereqError on its own:
10335 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10336 self.needed_locks = {
10337 locking.LEVEL_NODEGROUP: [self.group_uuid],
10340 def CheckPrereq(self):
10341 """Check prerequisites.
10343 This checks that the given group name exists as a node group, that is
10344 empty (i.e., contains no nodes), and that is not the last group of the
10348 # Verify that the group is empty.
10349 group_nodes = [node.name
10350 for node in self.cfg.GetAllNodesInfo().values()
10351 if node.group == self.group_uuid]
10354 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10356 (self.op.group_name,
10357 utils.CommaJoin(utils.NiceSort(group_nodes))),
10358 errors.ECODE_STATE)
10360 # Verify the cluster would not be left group-less.
10361 if len(self.cfg.GetNodeGroupList()) == 1:
10362 raise errors.OpPrereqError("Group '%s' is the only group,"
10363 " cannot be removed" %
10364 self.op.group_name,
10365 errors.ECODE_STATE)
10367 def BuildHooksEnv(self):
10368 """Build hooks env.
10372 "GROUP_NAME": self.op.group_name,
10374 mn = self.cfg.GetMasterNode()
10375 return env, [mn], [mn]
10377 def Exec(self, feedback_fn):
10378 """Remove the node group.
10382 self.cfg.RemoveNodeGroup(self.group_uuid)
10383 except errors.ConfigurationError:
10384 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10385 (self.op.group_name, self.group_uuid))
10387 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10390 class LUGroupRename(LogicalUnit):
10391 HPATH = "group-rename"
10392 HTYPE = constants.HTYPE_GROUP
10395 def ExpandNames(self):
10396 # This raises errors.OpPrereqError on its own:
10397 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10399 self.needed_locks = {
10400 locking.LEVEL_NODEGROUP: [self.group_uuid],
10403 def CheckPrereq(self):
10404 """Check prerequisites.
10406 This checks that the given old_name exists as a node group, and that
10411 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10412 except errors.OpPrereqError:
10415 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10416 " node group (UUID: %s)" %
10417 (self.op.new_name, new_name_uuid),
10418 errors.ECODE_EXISTS)
10420 def BuildHooksEnv(self):
10421 """Build hooks env.
10425 "OLD_NAME": self.op.old_name,
10426 "NEW_NAME": self.op.new_name,
10429 mn = self.cfg.GetMasterNode()
10430 all_nodes = self.cfg.GetAllNodesInfo()
10432 all_nodes.pop(mn, None)
10434 for node in all_nodes.values():
10435 if node.group == self.group_uuid:
10436 run_nodes.append(node.name)
10438 return env, run_nodes, run_nodes
10440 def Exec(self, feedback_fn):
10441 """Rename the node group.
10444 group = self.cfg.GetNodeGroup(self.group_uuid)
10447 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10448 (self.op.old_name, self.group_uuid))
10450 group.name = self.op.new_name
10451 self.cfg.Update(group, feedback_fn)
10453 return self.op.new_name
10456 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10457 """Generic tags LU.
10459 This is an abstract class which is the parent of all the other tags LUs.
10463 def ExpandNames(self):
10464 self.needed_locks = {}
10465 if self.op.kind == constants.TAG_NODE:
10466 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10467 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10468 elif self.op.kind == constants.TAG_INSTANCE:
10469 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10470 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10472 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10473 # not possible to acquire the BGL based on opcode parameters)
10475 def CheckPrereq(self):
10476 """Check prerequisites.
10479 if self.op.kind == constants.TAG_CLUSTER:
10480 self.target = self.cfg.GetClusterInfo()
10481 elif self.op.kind == constants.TAG_NODE:
10482 self.target = self.cfg.GetNodeInfo(self.op.name)
10483 elif self.op.kind == constants.TAG_INSTANCE:
10484 self.target = self.cfg.GetInstanceInfo(self.op.name)
10486 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10487 str(self.op.kind), errors.ECODE_INVAL)
10490 class LUTagsGet(TagsLU):
10491 """Returns the tags of a given object.
10496 def ExpandNames(self):
10497 TagsLU.ExpandNames(self)
10499 # Share locks as this is only a read operation
10500 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10502 def Exec(self, feedback_fn):
10503 """Returns the tag list.
10506 return list(self.target.GetTags())
10509 class LUTagsSearch(NoHooksLU):
10510 """Searches the tags for a given pattern.
10515 def ExpandNames(self):
10516 self.needed_locks = {}
10518 def CheckPrereq(self):
10519 """Check prerequisites.
10521 This checks the pattern passed for validity by compiling it.
10525 self.re = re.compile(self.op.pattern)
10526 except re.error, err:
10527 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10528 (self.op.pattern, err), errors.ECODE_INVAL)
10530 def Exec(self, feedback_fn):
10531 """Returns the tag list.
10535 tgts = [("/cluster", cfg.GetClusterInfo())]
10536 ilist = cfg.GetAllInstancesInfo().values()
10537 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10538 nlist = cfg.GetAllNodesInfo().values()
10539 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10541 for path, target in tgts:
10542 for tag in target.GetTags():
10543 if self.re.search(tag):
10544 results.append((path, tag))
10548 class LUTagsSet(TagsLU):
10549 """Sets a tag on a given object.
10554 def CheckPrereq(self):
10555 """Check prerequisites.
10557 This checks the type and length of the tag name and value.
10560 TagsLU.CheckPrereq(self)
10561 for tag in self.op.tags:
10562 objects.TaggableObject.ValidateTag(tag)
10564 def Exec(self, feedback_fn):
10569 for tag in self.op.tags:
10570 self.target.AddTag(tag)
10571 except errors.TagError, err:
10572 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10573 self.cfg.Update(self.target, feedback_fn)
10576 class LUTagsDel(TagsLU):
10577 """Delete a list of tags from a given object.
10582 def CheckPrereq(self):
10583 """Check prerequisites.
10585 This checks that we have the given tag.
10588 TagsLU.CheckPrereq(self)
10589 for tag in self.op.tags:
10590 objects.TaggableObject.ValidateTag(tag)
10591 del_tags = frozenset(self.op.tags)
10592 cur_tags = self.target.GetTags()
10594 diff_tags = del_tags - cur_tags
10596 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10597 raise errors.OpPrereqError("Tag(s) %s not found" %
10598 (utils.CommaJoin(diff_names), ),
10599 errors.ECODE_NOENT)
10601 def Exec(self, feedback_fn):
10602 """Remove the tag from the object.
10605 for tag in self.op.tags:
10606 self.target.RemoveTag(tag)
10607 self.cfg.Update(self.target, feedback_fn)
10610 class LUTestDelay(NoHooksLU):
10611 """Sleep for a specified amount of time.
10613 This LU sleeps on the master and/or nodes for a specified amount of
10619 def ExpandNames(self):
10620 """Expand names and set required locks.
10622 This expands the node list, if any.
10625 self.needed_locks = {}
10626 if self.op.on_nodes:
10627 # _GetWantedNodes can be used here, but is not always appropriate to use
10628 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10629 # more information.
10630 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10631 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10633 def _TestDelay(self):
10634 """Do the actual sleep.
10637 if self.op.on_master:
10638 if not utils.TestDelay(self.op.duration):
10639 raise errors.OpExecError("Error during master delay test")
10640 if self.op.on_nodes:
10641 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10642 for node, node_result in result.items():
10643 node_result.Raise("Failure during rpc call to node %s" % node)
10645 def Exec(self, feedback_fn):
10646 """Execute the test delay opcode, with the wanted repetitions.
10649 if self.op.repeat == 0:
10652 top_value = self.op.repeat - 1
10653 for i in range(self.op.repeat):
10654 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10658 class LUTestJqueue(NoHooksLU):
10659 """Utility LU to test some aspects of the job queue.
10664 # Must be lower than default timeout for WaitForJobChange to see whether it
10665 # notices changed jobs
10666 _CLIENT_CONNECT_TIMEOUT = 20.0
10667 _CLIENT_CONFIRM_TIMEOUT = 60.0
10670 def _NotifyUsingSocket(cls, cb, errcls):
10671 """Opens a Unix socket and waits for another program to connect.
10674 @param cb: Callback to send socket name to client
10675 @type errcls: class
10676 @param errcls: Exception class to use for errors
10679 # Using a temporary directory as there's no easy way to create temporary
10680 # sockets without writing a custom loop around tempfile.mktemp and
10682 tmpdir = tempfile.mkdtemp()
10684 tmpsock = utils.PathJoin(tmpdir, "sock")
10686 logging.debug("Creating temporary socket at %s", tmpsock)
10687 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10692 # Send details to client
10695 # Wait for client to connect before continuing
10696 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10698 (conn, _) = sock.accept()
10699 except socket.error, err:
10700 raise errcls("Client didn't connect in time (%s)" % err)
10704 # Remove as soon as client is connected
10705 shutil.rmtree(tmpdir)
10707 # Wait for client to close
10710 # pylint: disable-msg=E1101
10711 # Instance of '_socketobject' has no ... member
10712 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10714 except socket.error, err:
10715 raise errcls("Client failed to confirm notification (%s)" % err)
10719 def _SendNotification(self, test, arg, sockname):
10720 """Sends a notification to the client.
10723 @param test: Test name
10724 @param arg: Test argument (depends on test)
10725 @type sockname: string
10726 @param sockname: Socket path
10729 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10731 def _Notify(self, prereq, test, arg):
10732 """Notifies the client of a test.
10735 @param prereq: Whether this is a prereq-phase test
10737 @param test: Test name
10738 @param arg: Test argument (depends on test)
10742 errcls = errors.OpPrereqError
10744 errcls = errors.OpExecError
10746 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10750 def CheckArguments(self):
10751 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10752 self.expandnames_calls = 0
10754 def ExpandNames(self):
10755 checkargs_calls = getattr(self, "checkargs_calls", 0)
10756 if checkargs_calls < 1:
10757 raise errors.ProgrammerError("CheckArguments was not called")
10759 self.expandnames_calls += 1
10761 if self.op.notify_waitlock:
10762 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10764 self.LogInfo("Expanding names")
10766 # Get lock on master node (just to get a lock, not for a particular reason)
10767 self.needed_locks = {
10768 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10771 def Exec(self, feedback_fn):
10772 if self.expandnames_calls < 1:
10773 raise errors.ProgrammerError("ExpandNames was not called")
10775 if self.op.notify_exec:
10776 self._Notify(False, constants.JQT_EXEC, None)
10778 self.LogInfo("Executing")
10780 if self.op.log_messages:
10781 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10782 for idx, msg in enumerate(self.op.log_messages):
10783 self.LogInfo("Sending log message %s", idx + 1)
10784 feedback_fn(constants.JQT_MSGPREFIX + msg)
10785 # Report how many test messages have been sent
10786 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10789 raise errors.OpExecError("Opcode failure was requested")
10794 class IAllocator(object):
10795 """IAllocator framework.
10797 An IAllocator instance has three sets of attributes:
10798 - cfg that is needed to query the cluster
10799 - input data (all members of the _KEYS class attribute are required)
10800 - four buffer attributes (in|out_data|text), that represent the
10801 input (to the external script) in text and data structure format,
10802 and the output from it, again in two formats
10803 - the result variables from the script (success, info, nodes) for
10807 # pylint: disable-msg=R0902
10808 # lots of instance attributes
10810 "name", "mem_size", "disks", "disk_template",
10811 "os", "tags", "nics", "vcpus", "hypervisor",
10814 "name", "relocate_from",
10820 def __init__(self, cfg, rpc, mode, **kwargs):
10823 # init buffer variables
10824 self.in_text = self.out_text = self.in_data = self.out_data = None
10825 # init all input fields so that pylint is happy
10827 self.mem_size = self.disks = self.disk_template = None
10828 self.os = self.tags = self.nics = self.vcpus = None
10829 self.hypervisor = None
10830 self.relocate_from = None
10832 self.evac_nodes = None
10834 self.required_nodes = None
10835 # init result fields
10836 self.success = self.info = self.result = None
10837 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10838 keyset = self._ALLO_KEYS
10839 fn = self._AddNewInstance
10840 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10841 keyset = self._RELO_KEYS
10842 fn = self._AddRelocateInstance
10843 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10844 keyset = self._EVAC_KEYS
10845 fn = self._AddEvacuateNodes
10847 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10848 " IAllocator" % self.mode)
10850 if key not in keyset:
10851 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10852 " IAllocator" % key)
10853 setattr(self, key, kwargs[key])
10856 if key not in kwargs:
10857 raise errors.ProgrammerError("Missing input parameter '%s' to"
10858 " IAllocator" % key)
10859 self._BuildInputData(fn)
10861 def _ComputeClusterData(self):
10862 """Compute the generic allocator input data.
10864 This is the data that is independent of the actual operation.
10868 cluster_info = cfg.GetClusterInfo()
10871 "version": constants.IALLOCATOR_VERSION,
10872 "cluster_name": cfg.GetClusterName(),
10873 "cluster_tags": list(cluster_info.GetTags()),
10874 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10875 # we don't have job IDs
10877 ninfo = cfg.GetAllNodesInfo()
10878 iinfo = cfg.GetAllInstancesInfo().values()
10879 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10882 node_list = [n.name for n in ninfo.values() if n.vm_capable]
10884 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10885 hypervisor_name = self.hypervisor
10886 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10887 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10888 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10889 hypervisor_name = cluster_info.enabled_hypervisors[0]
10891 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10894 self.rpc.call_all_instances_info(node_list,
10895 cluster_info.enabled_hypervisors)
10897 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10899 config_ndata = self._ComputeBasicNodeData(ninfo)
10900 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10901 i_list, config_ndata)
10902 assert len(data["nodes"]) == len(ninfo), \
10903 "Incomplete node data computed"
10905 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10907 self.in_data = data
10910 def _ComputeNodeGroupData(cfg):
10911 """Compute node groups data.
10915 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10917 "name": gdata.name,
10918 "alloc_policy": gdata.alloc_policy,
10923 def _ComputeBasicNodeData(node_cfg):
10924 """Compute global node data.
10927 @returns: a dict of name: (node dict, node config)
10931 for ninfo in node_cfg.values():
10932 # fill in static (config-based) values
10934 "tags": list(ninfo.GetTags()),
10935 "primary_ip": ninfo.primary_ip,
10936 "secondary_ip": ninfo.secondary_ip,
10937 "offline": ninfo.offline,
10938 "drained": ninfo.drained,
10939 "master_candidate": ninfo.master_candidate,
10940 "group": ninfo.group,
10941 "master_capable": ninfo.master_capable,
10942 "vm_capable": ninfo.vm_capable,
10945 node_results[ninfo.name] = pnr
10947 return node_results
10950 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10952 """Compute global node data.
10954 @param node_results: the basic node structures as filled from the config
10957 # make a copy of the current dict
10958 node_results = dict(node_results)
10959 for nname, nresult in node_data.items():
10960 assert nname in node_results, "Missing basic data for node %s" % nname
10961 ninfo = node_cfg[nname]
10963 if not (ninfo.offline or ninfo.drained):
10964 nresult.Raise("Can't get data for node %s" % nname)
10965 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10967 remote_info = nresult.payload
10969 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10970 'vg_size', 'vg_free', 'cpu_total']:
10971 if attr not in remote_info:
10972 raise errors.OpExecError("Node '%s' didn't return attribute"
10973 " '%s'" % (nname, attr))
10974 if not isinstance(remote_info[attr], int):
10975 raise errors.OpExecError("Node '%s' returned invalid value"
10977 (nname, attr, remote_info[attr]))
10978 # compute memory used by primary instances
10979 i_p_mem = i_p_up_mem = 0
10980 for iinfo, beinfo in i_list:
10981 if iinfo.primary_node == nname:
10982 i_p_mem += beinfo[constants.BE_MEMORY]
10983 if iinfo.name not in node_iinfo[nname].payload:
10986 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10987 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10988 remote_info['memory_free'] -= max(0, i_mem_diff)
10991 i_p_up_mem += beinfo[constants.BE_MEMORY]
10993 # compute memory used by instances
10995 "total_memory": remote_info['memory_total'],
10996 "reserved_memory": remote_info['memory_dom0'],
10997 "free_memory": remote_info['memory_free'],
10998 "total_disk": remote_info['vg_size'],
10999 "free_disk": remote_info['vg_free'],
11000 "total_cpus": remote_info['cpu_total'],
11001 "i_pri_memory": i_p_mem,
11002 "i_pri_up_memory": i_p_up_mem,
11004 pnr_dyn.update(node_results[nname])
11006 node_results[nname] = pnr_dyn
11008 return node_results
11011 def _ComputeInstanceData(cluster_info, i_list):
11012 """Compute global instance data.
11016 for iinfo, beinfo in i_list:
11018 for nic in iinfo.nics:
11019 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11020 nic_dict = {"mac": nic.mac,
11022 "mode": filled_params[constants.NIC_MODE],
11023 "link": filled_params[constants.NIC_LINK],
11025 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11026 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11027 nic_data.append(nic_dict)
11029 "tags": list(iinfo.GetTags()),
11030 "admin_up": iinfo.admin_up,
11031 "vcpus": beinfo[constants.BE_VCPUS],
11032 "memory": beinfo[constants.BE_MEMORY],
11034 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11036 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11037 "disk_template": iinfo.disk_template,
11038 "hypervisor": iinfo.hypervisor,
11040 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11042 instance_data[iinfo.name] = pir
11044 return instance_data
11046 def _AddNewInstance(self):
11047 """Add new instance data to allocator structure.
11049 This in combination with _AllocatorGetClusterData will create the
11050 correct structure needed as input for the allocator.
11052 The checks for the completeness of the opcode must have already been
11056 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11058 if self.disk_template in constants.DTS_NET_MIRROR:
11059 self.required_nodes = 2
11061 self.required_nodes = 1
11064 "disk_template": self.disk_template,
11067 "vcpus": self.vcpus,
11068 "memory": self.mem_size,
11069 "disks": self.disks,
11070 "disk_space_total": disk_space,
11072 "required_nodes": self.required_nodes,
11076 def _AddRelocateInstance(self):
11077 """Add relocate instance data to allocator structure.
11079 This in combination with _IAllocatorGetClusterData will create the
11080 correct structure needed as input for the allocator.
11082 The checks for the completeness of the opcode must have already been
11086 instance = self.cfg.GetInstanceInfo(self.name)
11087 if instance is None:
11088 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11089 " IAllocator" % self.name)
11091 if instance.disk_template not in constants.DTS_NET_MIRROR:
11092 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11093 errors.ECODE_INVAL)
11095 if len(instance.secondary_nodes) != 1:
11096 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11097 errors.ECODE_STATE)
11099 self.required_nodes = 1
11100 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11101 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11105 "disk_space_total": disk_space,
11106 "required_nodes": self.required_nodes,
11107 "relocate_from": self.relocate_from,
11111 def _AddEvacuateNodes(self):
11112 """Add evacuate nodes data to allocator structure.
11116 "evac_nodes": self.evac_nodes
11120 def _BuildInputData(self, fn):
11121 """Build input data structures.
11124 self._ComputeClusterData()
11127 request["type"] = self.mode
11128 self.in_data["request"] = request
11130 self.in_text = serializer.Dump(self.in_data)
11132 def Run(self, name, validate=True, call_fn=None):
11133 """Run an instance allocator and return the results.
11136 if call_fn is None:
11137 call_fn = self.rpc.call_iallocator_runner
11139 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11140 result.Raise("Failure while running the iallocator script")
11142 self.out_text = result.payload
11144 self._ValidateResult()
11146 def _ValidateResult(self):
11147 """Process the allocator results.
11149 This will process and if successful save the result in
11150 self.out_data and the other parameters.
11154 rdict = serializer.Load(self.out_text)
11155 except Exception, err:
11156 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11158 if not isinstance(rdict, dict):
11159 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11161 # TODO: remove backwards compatiblity in later versions
11162 if "nodes" in rdict and "result" not in rdict:
11163 rdict["result"] = rdict["nodes"]
11166 for key in "success", "info", "result":
11167 if key not in rdict:
11168 raise errors.OpExecError("Can't parse iallocator results:"
11169 " missing key '%s'" % key)
11170 setattr(self, key, rdict[key])
11172 if not isinstance(rdict["result"], list):
11173 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11175 self.out_data = rdict
11178 class LUTestAllocator(NoHooksLU):
11179 """Run allocator tests.
11181 This LU runs the allocator tests
11184 def CheckPrereq(self):
11185 """Check prerequisites.
11187 This checks the opcode parameters depending on the director and mode test.
11190 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11191 for attr in ["mem_size", "disks", "disk_template",
11192 "os", "tags", "nics", "vcpus"]:
11193 if not hasattr(self.op, attr):
11194 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11195 attr, errors.ECODE_INVAL)
11196 iname = self.cfg.ExpandInstanceName(self.op.name)
11197 if iname is not None:
11198 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11199 iname, errors.ECODE_EXISTS)
11200 if not isinstance(self.op.nics, list):
11201 raise errors.OpPrereqError("Invalid parameter 'nics'",
11202 errors.ECODE_INVAL)
11203 if not isinstance(self.op.disks, list):
11204 raise errors.OpPrereqError("Invalid parameter 'disks'",
11205 errors.ECODE_INVAL)
11206 for row in self.op.disks:
11207 if (not isinstance(row, dict) or
11208 "size" not in row or
11209 not isinstance(row["size"], int) or
11210 "mode" not in row or
11211 row["mode"] not in ['r', 'w']):
11212 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11213 " parameter", errors.ECODE_INVAL)
11214 if self.op.hypervisor is None:
11215 self.op.hypervisor = self.cfg.GetHypervisorType()
11216 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11217 fname = _ExpandInstanceName(self.cfg, self.op.name)
11218 self.op.name = fname
11219 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11220 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11221 if not hasattr(self.op, "evac_nodes"):
11222 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11223 " opcode input", errors.ECODE_INVAL)
11225 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11226 self.op.mode, errors.ECODE_INVAL)
11228 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11229 if self.op.allocator is None:
11230 raise errors.OpPrereqError("Missing allocator name",
11231 errors.ECODE_INVAL)
11232 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11233 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11234 self.op.direction, errors.ECODE_INVAL)
11236 def Exec(self, feedback_fn):
11237 """Run the allocator test.
11240 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11241 ial = IAllocator(self.cfg, self.rpc,
11244 mem_size=self.op.mem_size,
11245 disks=self.op.disks,
11246 disk_template=self.op.disk_template,
11250 vcpus=self.op.vcpus,
11251 hypervisor=self.op.hypervisor,
11253 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11254 ial = IAllocator(self.cfg, self.rpc,
11257 relocate_from=list(self.relocate_from),
11259 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11260 ial = IAllocator(self.cfg, self.rpc,
11262 evac_nodes=self.op.evac_nodes)
11264 raise errors.ProgrammerError("Uncatched mode %s in"
11265 " LUTestAllocator.Exec", self.op.mode)
11267 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11268 result = ial.in_text
11270 ial.Run(self.op.allocator, validate=False)
11271 result = ial.out_text
11275 #: Query type implementations
11277 constants.QR_INSTANCE: _InstanceQuery,
11278 constants.QR_NODE: _NodeQuery,
11279 constants.QR_GROUP: _GroupQuery,
11283 def _GetQueryImplementation(name):
11284 """Returns the implemtnation for a query type.
11286 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11290 return _QUERY_IMPL[name]
11292 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11293 errors.ECODE_INVAL)