4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 import ganeti.masterd.instance # pylint: disable-msg=W0611
65 def _SupportsOob(cfg, node):
66 """Tells if node supports OOB.
68 @type cfg: L{config.ConfigWriter}
69 @param cfg: The cluster configuration
70 @type node: L{objects.Node}
72 @return: The OOB script if supported or an empty string otherwise
75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
79 class LogicalUnit(object):
80 """Logical Unit base class.
82 Subclasses must follow these rules:
83 - implement ExpandNames
84 - implement CheckPrereq (except when tasklets are used)
85 - implement Exec (except when tasklets are used)
86 - implement BuildHooksEnv
87 - redefine HPATH and HTYPE
88 - optionally redefine their run requirements:
89 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
91 Note that all commands require root permissions.
93 @ivar dry_run_result: the value (if any) that will be returned to the caller
94 in dry-run mode (signalled by opcode dry_run parameter)
101 def __init__(self, processor, op, context, rpc):
102 """Constructor for LogicalUnit.
104 This needs to be overridden in derived classes in order to check op
108 self.proc = processor
110 self.cfg = context.cfg
111 self.context = context
113 # Dicts used to declare locking needs to mcpu
114 self.needed_locks = None
115 self.acquired_locks = {}
116 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
118 self.remove_locks = {}
119 # Used to force good behavior when calling helper functions
120 self.recalculate_locks = {}
123 self.Log = processor.Log # pylint: disable-msg=C0103
124 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
125 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
126 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
127 # support for dry-run
128 self.dry_run_result = None
129 # support for generic debug attribute
130 if (not hasattr(self.op, "debug_level") or
131 not isinstance(self.op.debug_level, int)):
132 self.op.debug_level = 0
137 # Validate opcode parameters and set defaults
138 self.op.Validate(True)
140 self.CheckArguments()
143 """Returns the SshRunner object
147 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
150 ssh = property(fget=__GetSSH)
152 def CheckArguments(self):
153 """Check syntactic validity for the opcode arguments.
155 This method is for doing a simple syntactic check and ensure
156 validity of opcode parameters, without any cluster-related
157 checks. While the same can be accomplished in ExpandNames and/or
158 CheckPrereq, doing these separate is better because:
160 - ExpandNames is left as as purely a lock-related function
161 - CheckPrereq is run after we have acquired locks (and possible
164 The function is allowed to change the self.op attribute so that
165 later methods can no longer worry about missing parameters.
170 def ExpandNames(self):
171 """Expand names for this LU.
173 This method is called before starting to execute the opcode, and it should
174 update all the parameters of the opcode to their canonical form (e.g. a
175 short node name must be fully expanded after this method has successfully
176 completed). This way locking, hooks, logging, etc. can work correctly.
178 LUs which implement this method must also populate the self.needed_locks
179 member, as a dict with lock levels as keys, and a list of needed lock names
182 - use an empty dict if you don't need any lock
183 - if you don't need any lock at a particular level omit that level
184 - don't put anything for the BGL level
185 - if you want all locks at a level use locking.ALL_SET as a value
187 If you need to share locks (rather than acquire them exclusively) at one
188 level you can modify self.share_locks, setting a true value (usually 1) for
189 that level. By default locks are not shared.
191 This function can also define a list of tasklets, which then will be
192 executed in order instead of the usual LU-level CheckPrereq and Exec
193 functions, if those are not defined by the LU.
197 # Acquire all nodes and one instance
198 self.needed_locks = {
199 locking.LEVEL_NODE: locking.ALL_SET,
200 locking.LEVEL_INSTANCE: ['instance1.example.com'],
202 # Acquire just two nodes
203 self.needed_locks = {
204 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
207 self.needed_locks = {} # No, you can't leave it to the default value None
210 # The implementation of this method is mandatory only if the new LU is
211 # concurrent, so that old LUs don't need to be changed all at the same
214 self.needed_locks = {} # Exclusive LUs don't need locks.
216 raise NotImplementedError
218 def DeclareLocks(self, level):
219 """Declare LU locking needs for a level
221 While most LUs can just declare their locking needs at ExpandNames time,
222 sometimes there's the need to calculate some locks after having acquired
223 the ones before. This function is called just before acquiring locks at a
224 particular level, but after acquiring the ones at lower levels, and permits
225 such calculations. It can be used to modify self.needed_locks, and by
226 default it does nothing.
228 This function is only called if you have something already set in
229 self.needed_locks for the level.
231 @param level: Locking level which is going to be locked
232 @type level: member of ganeti.locking.LEVELS
236 def CheckPrereq(self):
237 """Check prerequisites for this LU.
239 This method should check that the prerequisites for the execution
240 of this LU are fulfilled. It can do internode communication, but
241 it should be idempotent - no cluster or system changes are
244 The method should raise errors.OpPrereqError in case something is
245 not fulfilled. Its return value is ignored.
247 This method should also update all the parameters of the opcode to
248 their canonical form if it hasn't been done by ExpandNames before.
251 if self.tasklets is not None:
252 for (idx, tl) in enumerate(self.tasklets):
253 logging.debug("Checking prerequisites for tasklet %s/%s",
254 idx + 1, len(self.tasklets))
259 def Exec(self, feedback_fn):
262 This method should implement the actual work. It should raise
263 errors.OpExecError for failures that are somewhat dealt with in
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
272 raise NotImplementedError
274 def BuildHooksEnv(self):
275 """Build hooks environment for this LU.
277 This method should return a three-node tuple consisting of: a dict
278 containing the environment that will be used for running the
279 specific hook for this LU, a list of node names on which the hook
280 should run before the execution, and a list of node names on which
281 the hook should run after the execution.
283 The keys of the dict must not have 'GANETI_' prefixed as this will
284 be handled in the hooks runner. Also note additional keys will be
285 added by the hooks runner. If the LU doesn't define any
286 environment, an empty dict (and not None) should be returned.
288 No nodes should be returned as an empty list (and not None).
290 Note that if the HPATH for a LU class is None, this function will
294 raise NotImplementedError
296 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
297 """Notify the LU about the results of its hooks.
299 This method is called every time a hooks phase is executed, and notifies
300 the Logical Unit about the hooks' result. The LU can then use it to alter
301 its result based on the hooks. By default the method does nothing and the
302 previous result is passed back unchanged but any LU can define it if it
303 wants to use the local cluster hook-scripts somehow.
305 @param phase: one of L{constants.HOOKS_PHASE_POST} or
306 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
307 @param hook_results: the results of the multi-node hooks rpc call
308 @param feedback_fn: function used send feedback back to the caller
309 @param lu_result: the previous Exec result this LU had, or None
311 @return: the new Exec result, based on the previous result
315 # API must be kept, thus we ignore the unused argument and could
316 # be a function warnings
317 # pylint: disable-msg=W0613,R0201
320 def _ExpandAndLockInstance(self):
321 """Helper function to expand and lock an instance.
323 Many LUs that work on an instance take its name in self.op.instance_name
324 and need to expand it and then declare the expanded name for locking. This
325 function does it, and then updates self.op.instance_name to the expanded
326 name. It also initializes needed_locks as a dict, if this hasn't been done
330 if self.needed_locks is None:
331 self.needed_locks = {}
333 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
334 "_ExpandAndLockInstance called with instance-level locks set"
335 self.op.instance_name = _ExpandInstanceName(self.cfg,
336 self.op.instance_name)
337 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
339 def _LockInstancesNodes(self, primary_only=False):
340 """Helper function to declare instances' nodes for locking.
342 This function should be called after locking one or more instances to lock
343 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
344 with all primary or secondary nodes for instances already locked and
345 present in self.needed_locks[locking.LEVEL_INSTANCE].
347 It should be called from DeclareLocks, and for safety only works if
348 self.recalculate_locks[locking.LEVEL_NODE] is set.
350 In the future it may grow parameters to just lock some instance's nodes, or
351 to just lock primaries or secondary nodes, if needed.
353 If should be called in DeclareLocks in a way similar to::
355 if level == locking.LEVEL_NODE:
356 self._LockInstancesNodes()
358 @type primary_only: boolean
359 @param primary_only: only lock primary nodes of locked instances
362 assert locking.LEVEL_NODE in self.recalculate_locks, \
363 "_LockInstancesNodes helper function called with no nodes to recalculate"
365 # TODO: check if we're really been called with the instance locks held
367 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
368 # future we might want to have different behaviors depending on the value
369 # of self.recalculate_locks[locking.LEVEL_NODE]
371 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
372 instance = self.context.cfg.GetInstanceInfo(instance_name)
373 wanted_nodes.append(instance.primary_node)
375 wanted_nodes.extend(instance.secondary_nodes)
377 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
378 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
379 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
380 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
382 del self.recalculate_locks[locking.LEVEL_NODE]
385 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
386 """Simple LU which runs no hooks.
388 This LU is intended as a parent for other LogicalUnits which will
389 run no hooks, in order to reduce duplicate code.
395 def BuildHooksEnv(self):
396 """Empty BuildHooksEnv for NoHooksLu.
398 This just raises an error.
401 assert False, "BuildHooksEnv called for NoHooksLUs"
405 """Tasklet base class.
407 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
408 they can mix legacy code with tasklets. Locking needs to be done in the LU,
409 tasklets know nothing about locks.
411 Subclasses must follow these rules:
412 - Implement CheckPrereq
416 def __init__(self, lu):
423 def CheckPrereq(self):
424 """Check prerequisites for this tasklets.
426 This method should check whether the prerequisites for the execution of
427 this tasklet are fulfilled. It can do internode communication, but it
428 should be idempotent - no cluster or system changes are allowed.
430 The method should raise errors.OpPrereqError in case something is not
431 fulfilled. Its return value is ignored.
433 This method should also update all parameters to their canonical form if it
434 hasn't been done before.
439 def Exec(self, feedback_fn):
440 """Execute the tasklet.
442 This method should implement the actual work. It should raise
443 errors.OpExecError for failures that are somewhat dealt with in code, or
447 raise NotImplementedError
451 """Base for query utility classes.
454 #: Attribute holding field definitions
457 def __init__(self, names, fields, use_locking):
458 """Initializes this class.
462 self.use_locking = use_locking
464 self.query = query.Query(self.FIELDS, fields)
465 self.requested_data = self.query.RequestedData()
467 self.do_locking = None
470 def _GetNames(self, lu, all_names, lock_level):
471 """Helper function to determine names asked for in the query.
475 names = lu.acquired_locks[lock_level]
479 if self.wanted == locking.ALL_SET:
480 assert not self.names
481 # caller didn't specify names, so ordering is not important
482 return utils.NiceSort(names)
484 # caller specified names and we must keep the same order
486 assert not self.do_locking or lu.acquired_locks[lock_level]
488 missing = set(self.wanted).difference(names)
490 raise errors.OpExecError("Some items were removed before retrieving"
491 " their data: %s" % missing)
493 # Return expanded names
497 def FieldsQuery(cls, fields):
498 """Returns list of available fields.
500 @return: List of L{objects.QueryFieldDefinition}
503 return query.QueryFields(cls.FIELDS, fields)
505 def ExpandNames(self, lu):
506 """Expand names for this query.
508 See L{LogicalUnit.ExpandNames}.
511 raise NotImplementedError()
513 def DeclareLocks(self, lu, level):
514 """Declare locks for this query.
516 See L{LogicalUnit.DeclareLocks}.
519 raise NotImplementedError()
521 def _GetQueryData(self, lu):
522 """Collects all data for this query.
524 @return: Query data object
527 raise NotImplementedError()
529 def NewStyleQuery(self, lu):
530 """Collect data and execute query.
533 return query.GetQueryResponse(self.query, self._GetQueryData(lu))
535 def OldStyleQuery(self, lu):
536 """Collect data and execute query.
539 return self.query.OldStyleQuery(self._GetQueryData(lu))
542 def _GetWantedNodes(lu, nodes):
543 """Returns list of checked and expanded node names.
545 @type lu: L{LogicalUnit}
546 @param lu: the logical unit on whose behalf we execute
548 @param nodes: list of node names or None for all nodes
550 @return: the list of nodes, sorted
551 @raise errors.ProgrammerError: if the nodes parameter is wrong type
555 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
557 return utils.NiceSort(lu.cfg.GetNodeList())
560 def _GetWantedInstances(lu, instances):
561 """Returns list of checked and expanded instance names.
563 @type lu: L{LogicalUnit}
564 @param lu: the logical unit on whose behalf we execute
565 @type instances: list
566 @param instances: list of instance names or None for all instances
568 @return: the list of instances, sorted
569 @raise errors.OpPrereqError: if the instances parameter is wrong type
570 @raise errors.OpPrereqError: if any of the passed instances is not found
574 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
576 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
580 def _GetUpdatedParams(old_params, update_dict,
581 use_default=True, use_none=False):
582 """Return the new version of a parameter dictionary.
584 @type old_params: dict
585 @param old_params: old parameters
586 @type update_dict: dict
587 @param update_dict: dict containing new parameter values, or
588 constants.VALUE_DEFAULT to reset the parameter to its default
590 @param use_default: boolean
591 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
592 values as 'to be deleted' values
593 @param use_none: boolean
594 @type use_none: whether to recognise C{None} values as 'to be
597 @return: the new parameter dictionary
600 params_copy = copy.deepcopy(old_params)
601 for key, val in update_dict.iteritems():
602 if ((use_default and val == constants.VALUE_DEFAULT) or
603 (use_none and val is None)):
609 params_copy[key] = val
613 def _CheckOutputFields(static, dynamic, selected):
614 """Checks whether all selected fields are valid.
616 @type static: L{utils.FieldSet}
617 @param static: static fields set
618 @type dynamic: L{utils.FieldSet}
619 @param dynamic: dynamic fields set
626 delta = f.NonMatching(selected)
628 raise errors.OpPrereqError("Unknown output fields selected: %s"
629 % ",".join(delta), errors.ECODE_INVAL)
632 def _CheckGlobalHvParams(params):
633 """Validates that given hypervisor params are not global ones.
635 This will ensure that instances don't get customised versions of
639 used_globals = constants.HVC_GLOBALS.intersection(params)
641 msg = ("The following hypervisor parameters are global and cannot"
642 " be customized at instance level, please modify them at"
643 " cluster level: %s" % utils.CommaJoin(used_globals))
644 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
647 def _CheckNodeOnline(lu, node, msg=None):
648 """Ensure that a given node is online.
650 @param lu: the LU on behalf of which we make the check
651 @param node: the node to check
652 @param msg: if passed, should be a message to replace the default one
653 @raise errors.OpPrereqError: if the node is offline
657 msg = "Can't use offline node"
658 if lu.cfg.GetNodeInfo(node).offline:
659 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
662 def _CheckNodeNotDrained(lu, node):
663 """Ensure that a given node is not drained.
665 @param lu: the LU on behalf of which we make the check
666 @param node: the node to check
667 @raise errors.OpPrereqError: if the node is drained
670 if lu.cfg.GetNodeInfo(node).drained:
671 raise errors.OpPrereqError("Can't use drained node %s" % node,
675 def _CheckNodeVmCapable(lu, node):
676 """Ensure that a given node is vm capable.
678 @param lu: the LU on behalf of which we make the check
679 @param node: the node to check
680 @raise errors.OpPrereqError: if the node is not vm capable
683 if not lu.cfg.GetNodeInfo(node).vm_capable:
684 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
688 def _CheckNodeHasOS(lu, node, os_name, force_variant):
689 """Ensure that a node supports a given OS.
691 @param lu: the LU on behalf of which we make the check
692 @param node: the node to check
693 @param os_name: the OS to query about
694 @param force_variant: whether to ignore variant errors
695 @raise errors.OpPrereqError: if the node is not supporting the OS
698 result = lu.rpc.call_os_get(node, os_name)
699 result.Raise("OS '%s' not in supported OS list for node %s" %
701 prereq=True, ecode=errors.ECODE_INVAL)
702 if not force_variant:
703 _CheckOSVariant(result.payload, os_name)
706 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
707 """Ensure that a node has the given secondary ip.
709 @type lu: L{LogicalUnit}
710 @param lu: the LU on behalf of which we make the check
712 @param node: the node to check
713 @type secondary_ip: string
714 @param secondary_ip: the ip to check
715 @type prereq: boolean
716 @param prereq: whether to throw a prerequisite or an execute error
717 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
718 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
721 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
722 result.Raise("Failure checking secondary ip on node %s" % node,
723 prereq=prereq, ecode=errors.ECODE_ENVIRON)
724 if not result.payload:
725 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
726 " please fix and re-run this command" % secondary_ip)
728 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
730 raise errors.OpExecError(msg)
733 def _GetClusterDomainSecret():
734 """Reads the cluster domain secret.
737 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
741 def _CheckInstanceDown(lu, instance, reason):
742 """Ensure that an instance is not running."""
743 if instance.admin_up:
744 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
745 (instance.name, reason), errors.ECODE_STATE)
747 pnode = instance.primary_node
748 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
749 ins_l.Raise("Can't contact node %s for instance information" % pnode,
750 prereq=True, ecode=errors.ECODE_ENVIRON)
752 if instance.name in ins_l.payload:
753 raise errors.OpPrereqError("Instance %s is running, %s" %
754 (instance.name, reason), errors.ECODE_STATE)
757 def _ExpandItemName(fn, name, kind):
758 """Expand an item name.
760 @param fn: the function to use for expansion
761 @param name: requested item name
762 @param kind: text description ('Node' or 'Instance')
763 @return: the resolved (full) name
764 @raise errors.OpPrereqError: if the item is not found
768 if full_name is None:
769 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
774 def _ExpandNodeName(cfg, name):
775 """Wrapper over L{_ExpandItemName} for nodes."""
776 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
779 def _ExpandInstanceName(cfg, name):
780 """Wrapper over L{_ExpandItemName} for instance."""
781 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
784 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
785 memory, vcpus, nics, disk_template, disks,
786 bep, hvp, hypervisor_name):
787 """Builds instance related env variables for hooks
789 This builds the hook environment from individual variables.
792 @param name: the name of the instance
793 @type primary_node: string
794 @param primary_node: the name of the instance's primary node
795 @type secondary_nodes: list
796 @param secondary_nodes: list of secondary nodes as strings
797 @type os_type: string
798 @param os_type: the name of the instance's OS
799 @type status: boolean
800 @param status: the should_run status of the instance
802 @param memory: the memory size of the instance
804 @param vcpus: the count of VCPUs the instance has
806 @param nics: list of tuples (ip, mac, mode, link) representing
807 the NICs the instance has
808 @type disk_template: string
809 @param disk_template: the disk template of the instance
811 @param disks: the list of (size, mode) pairs
813 @param bep: the backend parameters for the instance
815 @param hvp: the hypervisor parameters for the instance
816 @type hypervisor_name: string
817 @param hypervisor_name: the hypervisor for the instance
819 @return: the hook environment for this instance
828 "INSTANCE_NAME": name,
829 "INSTANCE_PRIMARY": primary_node,
830 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
831 "INSTANCE_OS_TYPE": os_type,
832 "INSTANCE_STATUS": str_status,
833 "INSTANCE_MEMORY": memory,
834 "INSTANCE_VCPUS": vcpus,
835 "INSTANCE_DISK_TEMPLATE": disk_template,
836 "INSTANCE_HYPERVISOR": hypervisor_name,
840 nic_count = len(nics)
841 for idx, (ip, mac, mode, link) in enumerate(nics):
844 env["INSTANCE_NIC%d_IP" % idx] = ip
845 env["INSTANCE_NIC%d_MAC" % idx] = mac
846 env["INSTANCE_NIC%d_MODE" % idx] = mode
847 env["INSTANCE_NIC%d_LINK" % idx] = link
848 if mode == constants.NIC_MODE_BRIDGED:
849 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
853 env["INSTANCE_NIC_COUNT"] = nic_count
856 disk_count = len(disks)
857 for idx, (size, mode) in enumerate(disks):
858 env["INSTANCE_DISK%d_SIZE" % idx] = size
859 env["INSTANCE_DISK%d_MODE" % idx] = mode
863 env["INSTANCE_DISK_COUNT"] = disk_count
865 for source, kind in [(bep, "BE"), (hvp, "HV")]:
866 for key, value in source.items():
867 env["INSTANCE_%s_%s" % (kind, key)] = value
872 def _NICListToTuple(lu, nics):
873 """Build a list of nic information tuples.
875 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
876 value in LUInstanceQueryData.
878 @type lu: L{LogicalUnit}
879 @param lu: the logical unit on whose behalf we execute
880 @type nics: list of L{objects.NIC}
881 @param nics: list of nics to convert to hooks tuples
885 cluster = lu.cfg.GetClusterInfo()
889 filled_params = cluster.SimpleFillNIC(nic.nicparams)
890 mode = filled_params[constants.NIC_MODE]
891 link = filled_params[constants.NIC_LINK]
892 hooks_nics.append((ip, mac, mode, link))
896 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
897 """Builds instance related env variables for hooks from an object.
899 @type lu: L{LogicalUnit}
900 @param lu: the logical unit on whose behalf we execute
901 @type instance: L{objects.Instance}
902 @param instance: the instance for which we should build the
905 @param override: dictionary with key/values that will override
908 @return: the hook environment dictionary
911 cluster = lu.cfg.GetClusterInfo()
912 bep = cluster.FillBE(instance)
913 hvp = cluster.FillHV(instance)
915 'name': instance.name,
916 'primary_node': instance.primary_node,
917 'secondary_nodes': instance.secondary_nodes,
918 'os_type': instance.os,
919 'status': instance.admin_up,
920 'memory': bep[constants.BE_MEMORY],
921 'vcpus': bep[constants.BE_VCPUS],
922 'nics': _NICListToTuple(lu, instance.nics),
923 'disk_template': instance.disk_template,
924 'disks': [(disk.size, disk.mode) for disk in instance.disks],
927 'hypervisor_name': instance.hypervisor,
930 args.update(override)
931 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
934 def _AdjustCandidatePool(lu, exceptions):
935 """Adjust the candidate pool after node operations.
938 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
940 lu.LogInfo("Promoted nodes to master candidate role: %s",
941 utils.CommaJoin(node.name for node in mod_list))
942 for name in mod_list:
943 lu.context.ReaddNode(name)
944 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
946 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
950 def _DecideSelfPromotion(lu, exceptions=None):
951 """Decide whether I should promote myself as a master candidate.
954 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
955 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
956 # the new node will increase mc_max with one, so:
957 mc_should = min(mc_should + 1, cp_size)
958 return mc_now < mc_should
961 def _CheckNicsBridgesExist(lu, target_nics, target_node):
962 """Check that the brigdes needed by a list of nics exist.
965 cluster = lu.cfg.GetClusterInfo()
966 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
967 brlist = [params[constants.NIC_LINK] for params in paramslist
968 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
970 result = lu.rpc.call_bridges_exist(target_node, brlist)
971 result.Raise("Error checking bridges on destination node '%s'" %
972 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
975 def _CheckInstanceBridgesExist(lu, instance, node=None):
976 """Check that the brigdes needed by an instance exist.
980 node = instance.primary_node
981 _CheckNicsBridgesExist(lu, instance.nics, node)
984 def _CheckOSVariant(os_obj, name):
985 """Check whether an OS name conforms to the os variants specification.
987 @type os_obj: L{objects.OS}
988 @param os_obj: OS object to check
990 @param name: OS name passed by the user, to check for validity
993 if not os_obj.supported_variants:
995 variant = objects.OS.GetVariant(name)
997 raise errors.OpPrereqError("OS name must include a variant",
1000 if variant not in os_obj.supported_variants:
1001 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1004 def _GetNodeInstancesInner(cfg, fn):
1005 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1008 def _GetNodeInstances(cfg, node_name):
1009 """Returns a list of all primary and secondary instances on a node.
1013 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1016 def _GetNodePrimaryInstances(cfg, node_name):
1017 """Returns primary instances on a node.
1020 return _GetNodeInstancesInner(cfg,
1021 lambda inst: node_name == inst.primary_node)
1024 def _GetNodeSecondaryInstances(cfg, node_name):
1025 """Returns secondary instances on a node.
1028 return _GetNodeInstancesInner(cfg,
1029 lambda inst: node_name in inst.secondary_nodes)
1032 def _GetStorageTypeArgs(cfg, storage_type):
1033 """Returns the arguments for a storage type.
1036 # Special case for file storage
1037 if storage_type == constants.ST_FILE:
1038 # storage.FileStorage wants a list of storage directories
1039 return [[cfg.GetFileStorageDir()]]
1044 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1047 for dev in instance.disks:
1048 cfg.SetDiskID(dev, node_name)
1050 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1051 result.Raise("Failed to get disk status from node %s" % node_name,
1052 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1054 for idx, bdev_status in enumerate(result.payload):
1055 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1061 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1062 """Check the sanity of iallocator and node arguments and use the
1063 cluster-wide iallocator if appropriate.
1065 Check that at most one of (iallocator, node) is specified. If none is
1066 specified, then the LU's opcode's iallocator slot is filled with the
1067 cluster-wide default iallocator.
1069 @type iallocator_slot: string
1070 @param iallocator_slot: the name of the opcode iallocator slot
1071 @type node_slot: string
1072 @param node_slot: the name of the opcode target node slot
1075 node = getattr(lu.op, node_slot, None)
1076 iallocator = getattr(lu.op, iallocator_slot, None)
1078 if node is not None and iallocator is not None:
1079 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1081 elif node is None and iallocator is None:
1082 default_iallocator = lu.cfg.GetDefaultIAllocator()
1083 if default_iallocator:
1084 setattr(lu.op, iallocator_slot, default_iallocator)
1086 raise errors.OpPrereqError("No iallocator or node given and no"
1087 " cluster-wide default iallocator found."
1088 " Please specify either an iallocator or a"
1089 " node, or set a cluster-wide default"
1093 class LUClusterPostInit(LogicalUnit):
1094 """Logical unit for running hooks after cluster initialization.
1097 HPATH = "cluster-init"
1098 HTYPE = constants.HTYPE_CLUSTER
1100 def BuildHooksEnv(self):
1104 env = {"OP_TARGET": self.cfg.GetClusterName()}
1105 mn = self.cfg.GetMasterNode()
1106 return env, [], [mn]
1108 def Exec(self, feedback_fn):
1115 class LUClusterDestroy(LogicalUnit):
1116 """Logical unit for destroying the cluster.
1119 HPATH = "cluster-destroy"
1120 HTYPE = constants.HTYPE_CLUSTER
1122 def BuildHooksEnv(self):
1126 env = {"OP_TARGET": self.cfg.GetClusterName()}
1129 def CheckPrereq(self):
1130 """Check prerequisites.
1132 This checks whether the cluster is empty.
1134 Any errors are signaled by raising errors.OpPrereqError.
1137 master = self.cfg.GetMasterNode()
1139 nodelist = self.cfg.GetNodeList()
1140 if len(nodelist) != 1 or nodelist[0] != master:
1141 raise errors.OpPrereqError("There are still %d node(s) in"
1142 " this cluster." % (len(nodelist) - 1),
1144 instancelist = self.cfg.GetInstanceList()
1146 raise errors.OpPrereqError("There are still %d instance(s) in"
1147 " this cluster." % len(instancelist),
1150 def Exec(self, feedback_fn):
1151 """Destroys the cluster.
1154 master = self.cfg.GetMasterNode()
1156 # Run post hooks on master node before it's removed
1157 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1159 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1161 # pylint: disable-msg=W0702
1162 self.LogWarning("Errors occurred running hooks on %s" % master)
1164 result = self.rpc.call_node_stop_master(master, False)
1165 result.Raise("Could not disable the master role")
1170 def _VerifyCertificate(filename):
1171 """Verifies a certificate for LUClusterVerify.
1173 @type filename: string
1174 @param filename: Path to PEM file
1178 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1179 utils.ReadFile(filename))
1180 except Exception, err: # pylint: disable-msg=W0703
1181 return (LUClusterVerify.ETYPE_ERROR,
1182 "Failed to load X509 certificate %s: %s" % (filename, err))
1185 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1186 constants.SSL_CERT_EXPIRATION_ERROR)
1189 fnamemsg = "While verifying %s: %s" % (filename, msg)
1194 return (None, fnamemsg)
1195 elif errcode == utils.CERT_WARNING:
1196 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1197 elif errcode == utils.CERT_ERROR:
1198 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1200 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1203 class LUClusterVerify(LogicalUnit):
1204 """Verifies the cluster status.
1207 HPATH = "cluster-verify"
1208 HTYPE = constants.HTYPE_CLUSTER
1211 TCLUSTER = "cluster"
1213 TINSTANCE = "instance"
1215 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1216 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1217 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1218 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1219 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1220 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1221 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1222 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1223 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1224 ENODEDRBD = (TNODE, "ENODEDRBD")
1225 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1226 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1227 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1228 ENODEHV = (TNODE, "ENODEHV")
1229 ENODELVM = (TNODE, "ENODELVM")
1230 ENODEN1 = (TNODE, "ENODEN1")
1231 ENODENET = (TNODE, "ENODENET")
1232 ENODEOS = (TNODE, "ENODEOS")
1233 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1234 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1235 ENODERPC = (TNODE, "ENODERPC")
1236 ENODESSH = (TNODE, "ENODESSH")
1237 ENODEVERSION = (TNODE, "ENODEVERSION")
1238 ENODESETUP = (TNODE, "ENODESETUP")
1239 ENODETIME = (TNODE, "ENODETIME")
1240 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1242 ETYPE_FIELD = "code"
1243 ETYPE_ERROR = "ERROR"
1244 ETYPE_WARNING = "WARNING"
1246 _HOOKS_INDENT_RE = re.compile("^", re.M)
1248 class NodeImage(object):
1249 """A class representing the logical and physical status of a node.
1252 @ivar name: the node name to which this object refers
1253 @ivar volumes: a structure as returned from
1254 L{ganeti.backend.GetVolumeList} (runtime)
1255 @ivar instances: a list of running instances (runtime)
1256 @ivar pinst: list of configured primary instances (config)
1257 @ivar sinst: list of configured secondary instances (config)
1258 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1259 of this node (config)
1260 @ivar mfree: free memory, as reported by hypervisor (runtime)
1261 @ivar dfree: free disk, as reported by the node (runtime)
1262 @ivar offline: the offline status (config)
1263 @type rpc_fail: boolean
1264 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1265 not whether the individual keys were correct) (runtime)
1266 @type lvm_fail: boolean
1267 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1268 @type hyp_fail: boolean
1269 @ivar hyp_fail: whether the RPC call didn't return the instance list
1270 @type ghost: boolean
1271 @ivar ghost: whether this is a known node or not (config)
1272 @type os_fail: boolean
1273 @ivar os_fail: whether the RPC call didn't return valid OS data
1275 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1276 @type vm_capable: boolean
1277 @ivar vm_capable: whether the node can host instances
1280 def __init__(self, offline=False, name=None, vm_capable=True):
1289 self.offline = offline
1290 self.vm_capable = vm_capable
1291 self.rpc_fail = False
1292 self.lvm_fail = False
1293 self.hyp_fail = False
1295 self.os_fail = False
1298 def ExpandNames(self):
1299 self.needed_locks = {
1300 locking.LEVEL_NODE: locking.ALL_SET,
1301 locking.LEVEL_INSTANCE: locking.ALL_SET,
1303 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1305 def _Error(self, ecode, item, msg, *args, **kwargs):
1306 """Format an error message.
1308 Based on the opcode's error_codes parameter, either format a
1309 parseable error code, or a simpler error string.
1311 This must be called only from Exec and functions called from Exec.
1314 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1316 # first complete the msg
1319 # then format the whole message
1320 if self.op.error_codes:
1321 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1327 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1328 # and finally report it via the feedback_fn
1329 self._feedback_fn(" - %s" % msg)
1331 def _ErrorIf(self, cond, *args, **kwargs):
1332 """Log an error message if the passed condition is True.
1335 cond = bool(cond) or self.op.debug_simulate_errors
1337 self._Error(*args, **kwargs)
1338 # do not mark the operation as failed for WARN cases only
1339 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1340 self.bad = self.bad or cond
1342 def _VerifyNode(self, ninfo, nresult):
1343 """Perform some basic validation on data returned from a node.
1345 - check the result data structure is well formed and has all the
1347 - check ganeti version
1349 @type ninfo: L{objects.Node}
1350 @param ninfo: the node to check
1351 @param nresult: the results from the node
1353 @return: whether overall this call was successful (and we can expect
1354 reasonable values in the respose)
1358 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1360 # main result, nresult should be a non-empty dict
1361 test = not nresult or not isinstance(nresult, dict)
1362 _ErrorIf(test, self.ENODERPC, node,
1363 "unable to verify node: no data returned")
1367 # compares ganeti version
1368 local_version = constants.PROTOCOL_VERSION
1369 remote_version = nresult.get("version", None)
1370 test = not (remote_version and
1371 isinstance(remote_version, (list, tuple)) and
1372 len(remote_version) == 2)
1373 _ErrorIf(test, self.ENODERPC, node,
1374 "connection to node returned invalid data")
1378 test = local_version != remote_version[0]
1379 _ErrorIf(test, self.ENODEVERSION, node,
1380 "incompatible protocol versions: master %s,"
1381 " node %s", local_version, remote_version[0])
1385 # node seems compatible, we can actually try to look into its results
1387 # full package version
1388 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1389 self.ENODEVERSION, node,
1390 "software version mismatch: master %s, node %s",
1391 constants.RELEASE_VERSION, remote_version[1],
1392 code=self.ETYPE_WARNING)
1394 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1395 if ninfo.vm_capable and isinstance(hyp_result, dict):
1396 for hv_name, hv_result in hyp_result.iteritems():
1397 test = hv_result is not None
1398 _ErrorIf(test, self.ENODEHV, node,
1399 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1401 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1402 if ninfo.vm_capable and isinstance(hvp_result, list):
1403 for item, hv_name, hv_result in hvp_result:
1404 _ErrorIf(True, self.ENODEHV, node,
1405 "hypervisor %s parameter verify failure (source %s): %s",
1406 hv_name, item, hv_result)
1408 test = nresult.get(constants.NV_NODESETUP,
1409 ["Missing NODESETUP results"])
1410 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1415 def _VerifyNodeTime(self, ninfo, nresult,
1416 nvinfo_starttime, nvinfo_endtime):
1417 """Check the node time.
1419 @type ninfo: L{objects.Node}
1420 @param ninfo: the node to check
1421 @param nresult: the remote results for the node
1422 @param nvinfo_starttime: the start time of the RPC call
1423 @param nvinfo_endtime: the end time of the RPC call
1427 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1429 ntime = nresult.get(constants.NV_TIME, None)
1431 ntime_merged = utils.MergeTime(ntime)
1432 except (ValueError, TypeError):
1433 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1436 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1437 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1438 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1439 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1443 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1444 "Node time diverges by at least %s from master node time",
1447 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1448 """Check the node LVM results.
1450 @type ninfo: L{objects.Node}
1451 @param ninfo: the node to check
1452 @param nresult: the remote results for the node
1453 @param vg_name: the configured VG name
1460 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1462 # checks vg existence and size > 20G
1463 vglist = nresult.get(constants.NV_VGLIST, None)
1465 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1467 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1468 constants.MIN_VG_SIZE)
1469 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1472 pvlist = nresult.get(constants.NV_PVLIST, None)
1473 test = pvlist is None
1474 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1476 # check that ':' is not present in PV names, since it's a
1477 # special character for lvcreate (denotes the range of PEs to
1479 for _, pvname, owner_vg in pvlist:
1480 test = ":" in pvname
1481 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1482 " '%s' of VG '%s'", pvname, owner_vg)
1484 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1485 """Check the node bridges.
1487 @type ninfo: L{objects.Node}
1488 @param ninfo: the node to check
1489 @param nresult: the remote results for the node
1490 @param bridges: the expected list of bridges
1497 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1499 missing = nresult.get(constants.NV_BRIDGES, None)
1500 test = not isinstance(missing, list)
1501 _ErrorIf(test, self.ENODENET, node,
1502 "did not return valid bridge information")
1504 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1505 utils.CommaJoin(sorted(missing)))
1507 def _VerifyNodeNetwork(self, ninfo, nresult):
1508 """Check the node network connectivity results.
1510 @type ninfo: L{objects.Node}
1511 @param ninfo: the node to check
1512 @param nresult: the remote results for the node
1516 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1518 test = constants.NV_NODELIST not in nresult
1519 _ErrorIf(test, self.ENODESSH, node,
1520 "node hasn't returned node ssh connectivity data")
1522 if nresult[constants.NV_NODELIST]:
1523 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524 _ErrorIf(True, self.ENODESSH, node,
1525 "ssh communication with node '%s': %s", a_node, a_msg)
1527 test = constants.NV_NODENETTEST not in nresult
1528 _ErrorIf(test, self.ENODENET, node,
1529 "node hasn't returned node tcp connectivity data")
1531 if nresult[constants.NV_NODENETTEST]:
1532 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1534 _ErrorIf(True, self.ENODENET, node,
1535 "tcp communication with node '%s': %s",
1536 anode, nresult[constants.NV_NODENETTEST][anode])
1538 test = constants.NV_MASTERIP not in nresult
1539 _ErrorIf(test, self.ENODENET, node,
1540 "node hasn't returned node master IP reachability data")
1542 if not nresult[constants.NV_MASTERIP]:
1543 if node == self.master_node:
1544 msg = "the master node cannot reach the master IP (not configured?)"
1546 msg = "cannot reach the master IP"
1547 _ErrorIf(True, self.ENODENET, node, msg)
1549 def _VerifyInstance(self, instance, instanceconfig, node_image,
1551 """Verify an instance.
1553 This function checks to see if the required block devices are
1554 available on the instance's node.
1557 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558 node_current = instanceconfig.primary_node
1560 node_vol_should = {}
1561 instanceconfig.MapLVsByNode(node_vol_should)
1563 for node in node_vol_should:
1564 n_img = node_image[node]
1565 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566 # ignore missing volumes on offline or broken nodes
1568 for volume in node_vol_should[node]:
1569 test = volume not in n_img.volumes
1570 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571 "volume %s missing on node %s", volume, node)
1573 if instanceconfig.admin_up:
1574 pri_img = node_image[node_current]
1575 test = instance not in pri_img.instances and not pri_img.offline
1576 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577 "instance not running on its primary node %s",
1580 for node, n_img in node_image.items():
1581 if node != node_current:
1582 test = instance in n_img.instances
1583 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584 "instance should not run on node %s", node)
1586 diskdata = [(nname, success, status, idx)
1587 for (nname, disks) in diskstatus.items()
1588 for idx, (success, status) in enumerate(disks)]
1590 for nname, success, bdev_status, idx in diskdata:
1591 # the 'ghost node' construction in Exec() ensures that we have a
1593 snode = node_image[nname]
1594 bad_snode = snode.ghost or snode.offline
1595 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1596 self.EINSTANCEFAULTYDISK, instance,
1597 "couldn't retrieve status for disk/%s on %s: %s",
1598 idx, nname, bdev_status)
1599 _ErrorIf((instanceconfig.admin_up and success and
1600 bdev_status.ldisk_status == constants.LDS_FAULTY),
1601 self.EINSTANCEFAULTYDISK, instance,
1602 "disk/%s on %s is faulty", idx, nname)
1604 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1605 """Verify if there are any unknown volumes in the cluster.
1607 The .os, .swap and backup volumes are ignored. All other volumes are
1608 reported as unknown.
1610 @type reserved: L{ganeti.utils.FieldSet}
1611 @param reserved: a FieldSet of reserved volume names
1614 for node, n_img in node_image.items():
1615 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1616 # skip non-healthy nodes
1618 for volume in n_img.volumes:
1619 test = ((node not in node_vol_should or
1620 volume not in node_vol_should[node]) and
1621 not reserved.Matches(volume))
1622 self._ErrorIf(test, self.ENODEORPHANLV, node,
1623 "volume %s is unknown", volume)
1625 def _VerifyOrphanInstances(self, instancelist, node_image):
1626 """Verify the list of running instances.
1628 This checks what instances are running but unknown to the cluster.
1631 for node, n_img in node_image.items():
1632 for o_inst in n_img.instances:
1633 test = o_inst not in instancelist
1634 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1635 "instance %s on node %s should not exist", o_inst, node)
1637 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1638 """Verify N+1 Memory Resilience.
1640 Check that if one single node dies we can still start all the
1641 instances it was primary for.
1644 for node, n_img in node_image.items():
1645 # This code checks that every node which is now listed as
1646 # secondary has enough memory to host all instances it is
1647 # supposed to should a single other node in the cluster fail.
1648 # FIXME: not ready for failover to an arbitrary node
1649 # FIXME: does not support file-backed instances
1650 # WARNING: we currently take into account down instances as well
1651 # as up ones, considering that even if they're down someone
1652 # might want to start them even in the event of a node failure.
1654 # we're skipping offline nodes from the N+1 warning, since
1655 # most likely we don't have good memory infromation from them;
1656 # we already list instances living on such nodes, and that's
1659 for prinode, instances in n_img.sbp.items():
1661 for instance in instances:
1662 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1663 if bep[constants.BE_AUTO_BALANCE]:
1664 needed_mem += bep[constants.BE_MEMORY]
1665 test = n_img.mfree < needed_mem
1666 self._ErrorIf(test, self.ENODEN1, node,
1667 "not enough memory to accomodate instance failovers"
1668 " should node %s fail (%dMiB needed, %dMiB available)",
1669 prinode, needed_mem, n_img.mfree)
1671 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1673 """Verifies and computes the node required file checksums.
1675 @type ninfo: L{objects.Node}
1676 @param ninfo: the node to check
1677 @param nresult: the remote results for the node
1678 @param file_list: required list of files
1679 @param local_cksum: dictionary of local files and their checksums
1680 @param master_files: list of files that only masters should have
1684 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1686 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1687 test = not isinstance(remote_cksum, dict)
1688 _ErrorIf(test, self.ENODEFILECHECK, node,
1689 "node hasn't returned file checksum data")
1693 for file_name in file_list:
1694 node_is_mc = ninfo.master_candidate
1695 must_have = (file_name not in master_files) or node_is_mc
1697 test1 = file_name not in remote_cksum
1699 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1701 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1702 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1703 "file '%s' missing", file_name)
1704 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1705 "file '%s' has wrong checksum", file_name)
1706 # not candidate and this is not a must-have file
1707 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1708 "file '%s' should not exist on non master"
1709 " candidates (and the file is outdated)", file_name)
1710 # all good, except non-master/non-must have combination
1711 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1712 "file '%s' should not exist"
1713 " on non master candidates", file_name)
1715 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1717 """Verifies and the node DRBD status.
1719 @type ninfo: L{objects.Node}
1720 @param ninfo: the node to check
1721 @param nresult: the remote results for the node
1722 @param instanceinfo: the dict of instances
1723 @param drbd_helper: the configured DRBD usermode helper
1724 @param drbd_map: the DRBD map as returned by
1725 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1729 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1732 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1733 test = (helper_result == None)
1734 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1735 "no drbd usermode helper returned")
1737 status, payload = helper_result
1739 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1740 "drbd usermode helper check unsuccessful: %s", payload)
1741 test = status and (payload != drbd_helper)
1742 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1743 "wrong drbd usermode helper: %s", payload)
1745 # compute the DRBD minors
1747 for minor, instance in drbd_map[node].items():
1748 test = instance not in instanceinfo
1749 _ErrorIf(test, self.ECLUSTERCFG, None,
1750 "ghost instance '%s' in temporary DRBD map", instance)
1751 # ghost instance should not be running, but otherwise we
1752 # don't give double warnings (both ghost instance and
1753 # unallocated minor in use)
1755 node_drbd[minor] = (instance, False)
1757 instance = instanceinfo[instance]
1758 node_drbd[minor] = (instance.name, instance.admin_up)
1760 # and now check them
1761 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1762 test = not isinstance(used_minors, (tuple, list))
1763 _ErrorIf(test, self.ENODEDRBD, node,
1764 "cannot parse drbd status file: %s", str(used_minors))
1766 # we cannot check drbd status
1769 for minor, (iname, must_exist) in node_drbd.items():
1770 test = minor not in used_minors and must_exist
1771 _ErrorIf(test, self.ENODEDRBD, node,
1772 "drbd minor %d of instance %s is not active", minor, iname)
1773 for minor in used_minors:
1774 test = minor not in node_drbd
1775 _ErrorIf(test, self.ENODEDRBD, node,
1776 "unallocated drbd minor %d is in use", minor)
1778 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1779 """Builds the node OS structures.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param nimg: the node image object
1788 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1790 remote_os = nresult.get(constants.NV_OSLIST, None)
1791 test = (not isinstance(remote_os, list) or
1792 not compat.all(isinstance(v, list) and len(v) == 7
1793 for v in remote_os))
1795 _ErrorIf(test, self.ENODEOS, node,
1796 "node hasn't returned valid OS data")
1805 for (name, os_path, status, diagnose,
1806 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1808 if name not in os_dict:
1811 # parameters is a list of lists instead of list of tuples due to
1812 # JSON lacking a real tuple type, fix it:
1813 parameters = [tuple(v) for v in parameters]
1814 os_dict[name].append((os_path, status, diagnose,
1815 set(variants), set(parameters), set(api_ver)))
1817 nimg.oslist = os_dict
1819 def _VerifyNodeOS(self, ninfo, nimg, base):
1820 """Verifies the node OS list.
1822 @type ninfo: L{objects.Node}
1823 @param ninfo: the node to check
1824 @param nimg: the node image object
1825 @param base: the 'template' node we match against (e.g. from the master)
1829 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1831 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1833 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1834 for os_name, os_data in nimg.oslist.items():
1835 assert os_data, "Empty OS status for OS %s?!" % os_name
1836 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1837 _ErrorIf(not f_status, self.ENODEOS, node,
1838 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1839 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1840 "OS '%s' has multiple entries (first one shadows the rest): %s",
1841 os_name, utils.CommaJoin([v[0] for v in os_data]))
1842 # this will catched in backend too
1843 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1844 and not f_var, self.ENODEOS, node,
1845 "OS %s with API at least %d does not declare any variant",
1846 os_name, constants.OS_API_V15)
1847 # comparisons with the 'base' image
1848 test = os_name not in base.oslist
1849 _ErrorIf(test, self.ENODEOS, node,
1850 "Extra OS %s not present on reference node (%s)",
1854 assert base.oslist[os_name], "Base node has empty OS status?"
1855 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1857 # base OS is invalid, skipping
1859 for kind, a, b in [("API version", f_api, b_api),
1860 ("variants list", f_var, b_var),
1861 ("parameters", beautify_params(f_param),
1862 beautify_params(b_param))]:
1863 _ErrorIf(a != b, self.ENODEOS, node,
1864 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1865 kind, os_name, base.name,
1866 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1868 # check any missing OSes
1869 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1870 _ErrorIf(missing, self.ENODEOS, node,
1871 "OSes present on reference node %s but missing on this node: %s",
1872 base.name, utils.CommaJoin(missing))
1874 def _VerifyOob(self, ninfo, nresult):
1875 """Verifies out of band functionality of a node.
1877 @type ninfo: L{objects.Node}
1878 @param ninfo: the node to check
1879 @param nresult: the remote results for the node
1883 # We just have to verify the paths on master and/or master candidates
1884 # as the oob helper is invoked on the master
1885 if ((ninfo.master_candidate or ninfo.master_capable) and
1886 constants.NV_OOB_PATHS in nresult):
1887 for path_result in nresult[constants.NV_OOB_PATHS]:
1888 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1890 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1891 """Verifies and updates the node volume data.
1893 This function will update a L{NodeImage}'s internal structures
1894 with data from the remote call.
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param nimg: the node image object
1900 @param vg_name: the configured VG name
1904 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1906 nimg.lvm_fail = True
1907 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1910 elif isinstance(lvdata, basestring):
1911 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1912 utils.SafeEncode(lvdata))
1913 elif not isinstance(lvdata, dict):
1914 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1916 nimg.volumes = lvdata
1917 nimg.lvm_fail = False
1919 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1920 """Verifies and updates the node instance list.
1922 If the listing was successful, then updates this node's instance
1923 list. Otherwise, it marks the RPC call as failed for the instance
1926 @type ninfo: L{objects.Node}
1927 @param ninfo: the node to check
1928 @param nresult: the remote results for the node
1929 @param nimg: the node image object
1932 idata = nresult.get(constants.NV_INSTANCELIST, None)
1933 test = not isinstance(idata, list)
1934 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1935 " (instancelist): %s", utils.SafeEncode(str(idata)))
1937 nimg.hyp_fail = True
1939 nimg.instances = idata
1941 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1942 """Verifies and computes a node information map
1944 @type ninfo: L{objects.Node}
1945 @param ninfo: the node to check
1946 @param nresult: the remote results for the node
1947 @param nimg: the node image object
1948 @param vg_name: the configured VG name
1952 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1954 # try to read free memory (from the hypervisor)
1955 hv_info = nresult.get(constants.NV_HVINFO, None)
1956 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1957 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1960 nimg.mfree = int(hv_info["memory_free"])
1961 except (ValueError, TypeError):
1962 _ErrorIf(True, self.ENODERPC, node,
1963 "node returned invalid nodeinfo, check hypervisor")
1965 # FIXME: devise a free space model for file based instances as well
1966 if vg_name is not None:
1967 test = (constants.NV_VGLIST not in nresult or
1968 vg_name not in nresult[constants.NV_VGLIST])
1969 _ErrorIf(test, self.ENODELVM, node,
1970 "node didn't return data for the volume group '%s'"
1971 " - it is either missing or broken", vg_name)
1974 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1975 except (ValueError, TypeError):
1976 _ErrorIf(True, self.ENODERPC, node,
1977 "node returned invalid LVM info, check LVM status")
1979 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1980 """Gets per-disk status information for all instances.
1982 @type nodelist: list of strings
1983 @param nodelist: Node names
1984 @type node_image: dict of (name, L{objects.Node})
1985 @param node_image: Node objects
1986 @type instanceinfo: dict of (name, L{objects.Instance})
1987 @param instanceinfo: Instance objects
1988 @rtype: {instance: {node: [(succes, payload)]}}
1989 @return: a dictionary of per-instance dictionaries with nodes as
1990 keys and disk information as values; the disk information is a
1991 list of tuples (success, payload)
1994 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1997 node_disks_devonly = {}
1998 diskless_instances = set()
1999 diskless = constants.DT_DISKLESS
2001 for nname in nodelist:
2002 node_instances = list(itertools.chain(node_image[nname].pinst,
2003 node_image[nname].sinst))
2004 diskless_instances.update(inst for inst in node_instances
2005 if instanceinfo[inst].disk_template == diskless)
2006 disks = [(inst, disk)
2007 for inst in node_instances
2008 for disk in instanceinfo[inst].disks]
2011 # No need to collect data
2014 node_disks[nname] = disks
2016 # Creating copies as SetDiskID below will modify the objects and that can
2017 # lead to incorrect data returned from nodes
2018 devonly = [dev.Copy() for (_, dev) in disks]
2021 self.cfg.SetDiskID(dev, nname)
2023 node_disks_devonly[nname] = devonly
2025 assert len(node_disks) == len(node_disks_devonly)
2027 # Collect data from all nodes with disks
2028 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2031 assert len(result) == len(node_disks)
2035 for (nname, nres) in result.items():
2036 disks = node_disks[nname]
2039 # No data from this node
2040 data = len(disks) * [(False, "node offline")]
2043 _ErrorIf(msg, self.ENODERPC, nname,
2044 "while getting disk information: %s", msg)
2046 # No data from this node
2047 data = len(disks) * [(False, msg)]
2050 for idx, i in enumerate(nres.payload):
2051 if isinstance(i, (tuple, list)) and len(i) == 2:
2054 logging.warning("Invalid result from node %s, entry %d: %s",
2056 data.append((False, "Invalid result from the remote node"))
2058 for ((inst, _), status) in zip(disks, data):
2059 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2061 # Add empty entries for diskless instances.
2062 for inst in diskless_instances:
2063 assert inst not in instdisk
2066 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2067 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2068 compat.all(isinstance(s, (tuple, list)) and
2069 len(s) == 2 for s in statuses)
2070 for inst, nnames in instdisk.items()
2071 for nname, statuses in nnames.items())
2072 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2076 def _VerifyHVP(self, hvp_data):
2077 """Verifies locally the syntax of the hypervisor parameters.
2080 for item, hv_name, hv_params in hvp_data:
2081 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2084 hv_class = hypervisor.GetHypervisor(hv_name)
2085 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086 hv_class.CheckParameterSyntax(hv_params)
2087 except errors.GenericError, err:
2088 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2091 def BuildHooksEnv(self):
2094 Cluster-Verify hooks just ran in the post phase and their failure makes
2095 the output be logged in the verify output and the verification to fail.
2098 all_nodes = self.cfg.GetNodeList()
2100 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2102 for node in self.cfg.GetAllNodesInfo().values():
2103 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2105 return env, [], all_nodes
2107 def Exec(self, feedback_fn):
2108 """Verify integrity of cluster, performing various test on nodes.
2111 # This method has too many local variables. pylint: disable-msg=R0914
2113 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2114 verbose = self.op.verbose
2115 self._feedback_fn = feedback_fn
2116 feedback_fn("* Verifying global settings")
2117 for msg in self.cfg.VerifyConfig():
2118 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2120 # Check the cluster certificates
2121 for cert_filename in constants.ALL_CERT_FILES:
2122 (errcode, msg) = _VerifyCertificate(cert_filename)
2123 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2125 vg_name = self.cfg.GetVGName()
2126 drbd_helper = self.cfg.GetDRBDHelper()
2127 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2128 cluster = self.cfg.GetClusterInfo()
2129 nodeinfo_byname = self.cfg.GetAllNodesInfo()
2130 nodelist = utils.NiceSort(nodeinfo_byname.keys())
2131 nodeinfo = [nodeinfo_byname[nname] for nname in nodelist]
2132 instanceinfo = self.cfg.GetAllInstancesInfo()
2133 instancelist = utils.NiceSort(instanceinfo.keys())
2134 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2135 i_non_redundant = [] # Non redundant instances
2136 i_non_a_balanced = [] # Non auto-balanced instances
2137 n_offline = 0 # Count of offline nodes
2138 n_drained = 0 # Count of nodes being drained
2139 node_vol_should = {}
2141 # FIXME: verify OS list
2142 # do local checksums
2143 master_files = [constants.CLUSTER_CONF_FILE]
2144 master_node = self.master_node = self.cfg.GetMasterNode()
2145 master_ip = self.cfg.GetMasterIP()
2147 file_names = ssconf.SimpleStore().GetFileList()
2148 file_names.extend(constants.ALL_CERT_FILES)
2149 file_names.extend(master_files)
2150 if cluster.modify_etc_hosts:
2151 file_names.append(constants.ETC_HOSTS)
2153 local_checksums = utils.FingerprintFiles(file_names)
2155 # Compute the set of hypervisor parameters
2157 for hv_name in hypervisors:
2158 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2159 for os_name, os_hvp in cluster.os_hvp.items():
2160 for hv_name, hv_params in os_hvp.items():
2163 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2164 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2165 # TODO: collapse identical parameter values in a single one
2166 for instance in instanceinfo.values():
2167 if not instance.hvparams:
2169 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2170 cluster.FillHV(instance)))
2171 # and verify them locally
2172 self._VerifyHVP(hvp_data)
2174 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2175 node_verify_param = {
2176 constants.NV_FILELIST: file_names,
2177 constants.NV_NODELIST: [node.name for node in nodeinfo
2178 if not node.offline],
2179 constants.NV_HYPERVISOR: hypervisors,
2180 constants.NV_HVPARAMS: hvp_data,
2181 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2182 node.secondary_ip) for node in nodeinfo
2183 if not node.offline],
2184 constants.NV_INSTANCELIST: hypervisors,
2185 constants.NV_VERSION: None,
2186 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2187 constants.NV_NODESETUP: None,
2188 constants.NV_TIME: None,
2189 constants.NV_MASTERIP: (master_node, master_ip),
2190 constants.NV_OSLIST: None,
2191 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2194 if vg_name is not None:
2195 node_verify_param[constants.NV_VGLIST] = None
2196 node_verify_param[constants.NV_LVLIST] = vg_name
2197 node_verify_param[constants.NV_PVLIST] = [vg_name]
2198 node_verify_param[constants.NV_DRBDLIST] = None
2201 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2204 # FIXME: this needs to be changed per node-group, not cluster-wide
2206 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2207 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2208 bridges.add(default_nicpp[constants.NIC_LINK])
2209 for instance in instanceinfo.values():
2210 for nic in instance.nics:
2211 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2212 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2213 bridges.add(full_nic[constants.NIC_LINK])
2216 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2218 # Build our expected cluster state
2219 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2221 vm_capable=node.vm_capable))
2222 for node in nodeinfo)
2226 for node in nodeinfo:
2227 path = _SupportsOob(self.cfg, node)
2228 if path and path not in oob_paths:
2229 oob_paths.append(path)
2232 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2234 for instance in instancelist:
2235 inst_config = instanceinfo[instance]
2237 for nname in inst_config.all_nodes:
2238 if nname not in node_image:
2240 gnode = self.NodeImage(name=nname)
2242 node_image[nname] = gnode
2244 inst_config.MapLVsByNode(node_vol_should)
2246 pnode = inst_config.primary_node
2247 node_image[pnode].pinst.append(instance)
2249 for snode in inst_config.secondary_nodes:
2250 nimg = node_image[snode]
2251 nimg.sinst.append(instance)
2252 if pnode not in nimg.sbp:
2253 nimg.sbp[pnode] = []
2254 nimg.sbp[pnode].append(instance)
2256 # At this point, we have the in-memory data structures complete,
2257 # except for the runtime information, which we'll gather next
2259 # Due to the way our RPC system works, exact response times cannot be
2260 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2261 # time before and after executing the request, we can at least have a time
2263 nvinfo_starttime = time.time()
2264 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2265 self.cfg.GetClusterName())
2266 nvinfo_endtime = time.time()
2268 all_drbd_map = self.cfg.ComputeDRBDMap()
2270 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2271 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2273 feedback_fn("* Verifying node status")
2277 for node_i in nodeinfo:
2279 nimg = node_image[node]
2283 feedback_fn("* Skipping offline node %s" % (node,))
2287 if node == master_node:
2289 elif node_i.master_candidate:
2290 ntype = "master candidate"
2291 elif node_i.drained:
2297 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2299 msg = all_nvinfo[node].fail_msg
2300 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2302 nimg.rpc_fail = True
2305 nresult = all_nvinfo[node].payload
2307 nimg.call_ok = self._VerifyNode(node_i, nresult)
2308 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2309 self._VerifyNodeNetwork(node_i, nresult)
2310 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2313 self._VerifyOob(node_i, nresult)
2316 self._VerifyNodeLVM(node_i, nresult, vg_name)
2317 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2320 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2321 self._UpdateNodeInstances(node_i, nresult, nimg)
2322 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2323 self._UpdateNodeOS(node_i, nresult, nimg)
2324 if not nimg.os_fail:
2325 if refos_img is None:
2327 self._VerifyNodeOS(node_i, nimg, refos_img)
2328 self._VerifyNodeBridges(node_i, nresult, bridges)
2330 feedback_fn("* Verifying instance status")
2331 for instance in instancelist:
2333 feedback_fn("* Verifying instance %s" % instance)
2334 inst_config = instanceinfo[instance]
2335 self._VerifyInstance(instance, inst_config, node_image,
2337 inst_nodes_offline = []
2339 pnode = inst_config.primary_node
2340 pnode_img = node_image[pnode]
2341 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2342 self.ENODERPC, pnode, "instance %s, connection to"
2343 " primary node failed", instance)
2345 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2346 "instance lives on offline node %s", inst_config.primary_node)
2348 # If the instance is non-redundant we cannot survive losing its primary
2349 # node, so we are not N+1 compliant. On the other hand we have no disk
2350 # templates with more than one secondary so that situation is not well
2352 # FIXME: does not support file-backed instances
2353 if not inst_config.secondary_nodes:
2354 i_non_redundant.append(instance)
2356 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2357 instance, "instance has multiple secondary nodes: %s",
2358 utils.CommaJoin(inst_config.secondary_nodes),
2359 code=self.ETYPE_WARNING)
2361 if inst_config.disk_template in constants.DTS_NET_MIRROR:
2362 pnode = inst_config.primary_node
2363 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2364 instance_groups = {}
2366 for node in instance_nodes:
2367 instance_groups.setdefault(nodeinfo_byname[node].group,
2371 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2372 # Sort so that we always list the primary node first.
2373 for group, nodes in sorted(instance_groups.items(),
2374 key=lambda (_, nodes): pnode in nodes,
2377 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2378 instance, "instance has primary and secondary nodes in"
2379 " different groups: %s", utils.CommaJoin(pretty_list),
2380 code=self.ETYPE_WARNING)
2382 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2383 i_non_a_balanced.append(instance)
2385 for snode in inst_config.secondary_nodes:
2386 s_img = node_image[snode]
2387 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2388 "instance %s, connection to secondary node failed", instance)
2391 inst_nodes_offline.append(snode)
2393 # warn that the instance lives on offline nodes
2394 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2395 "instance has offline secondary node(s) %s",
2396 utils.CommaJoin(inst_nodes_offline))
2397 # ... or ghost/non-vm_capable nodes
2398 for node in inst_config.all_nodes:
2399 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2400 "instance lives on ghost node %s", node)
2401 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2402 instance, "instance lives on non-vm_capable node %s", node)
2404 feedback_fn("* Verifying orphan volumes")
2405 reserved = utils.FieldSet(*cluster.reserved_lvs)
2406 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2408 feedback_fn("* Verifying orphan instances")
2409 self._VerifyOrphanInstances(instancelist, node_image)
2411 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2412 feedback_fn("* Verifying N+1 Memory redundancy")
2413 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2415 feedback_fn("* Other Notes")
2417 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2418 % len(i_non_redundant))
2420 if i_non_a_balanced:
2421 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2422 % len(i_non_a_balanced))
2425 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2428 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2432 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2433 """Analyze the post-hooks' result
2435 This method analyses the hook result, handles it, and sends some
2436 nicely-formatted feedback back to the user.
2438 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2439 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2440 @param hooks_results: the results of the multi-node hooks rpc call
2441 @param feedback_fn: function used send feedback back to the caller
2442 @param lu_result: previous Exec result
2443 @return: the new Exec result, based on the previous result
2447 # We only really run POST phase hooks, and are only interested in
2449 if phase == constants.HOOKS_PHASE_POST:
2450 # Used to change hooks' output to proper indentation
2451 feedback_fn("* Hooks Results")
2452 assert hooks_results, "invalid result from hooks"
2454 for node_name in hooks_results:
2455 res = hooks_results[node_name]
2457 test = msg and not res.offline
2458 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2459 "Communication failure in hooks execution: %s", msg)
2460 if res.offline or msg:
2461 # No need to investigate payload if node is offline or gave an error.
2462 # override manually lu_result here as _ErrorIf only
2463 # overrides self.bad
2466 for script, hkr, output in res.payload:
2467 test = hkr == constants.HKR_FAIL
2468 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2469 "Script %s failed, output:", script)
2471 output = self._HOOKS_INDENT_RE.sub(' ', output)
2472 feedback_fn("%s" % output)
2478 class LUClusterVerifyDisks(NoHooksLU):
2479 """Verifies the cluster disks status.
2484 def ExpandNames(self):
2485 self.needed_locks = {
2486 locking.LEVEL_NODE: locking.ALL_SET,
2487 locking.LEVEL_INSTANCE: locking.ALL_SET,
2489 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2491 def Exec(self, feedback_fn):
2492 """Verify integrity of cluster disks.
2494 @rtype: tuple of three items
2495 @return: a tuple of (dict of node-to-node_error, list of instances
2496 which need activate-disks, dict of instance: (node, volume) for
2500 result = res_nodes, res_instances, res_missing = {}, [], {}
2502 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2503 instances = self.cfg.GetAllInstancesInfo().values()
2506 for inst in instances:
2508 if not inst.admin_up:
2510 inst.MapLVsByNode(inst_lvs)
2511 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2512 for node, vol_list in inst_lvs.iteritems():
2513 for vol in vol_list:
2514 nv_dict[(node, vol)] = inst
2519 node_lvs = self.rpc.call_lv_list(nodes, [])
2520 for node, node_res in node_lvs.items():
2521 if node_res.offline:
2523 msg = node_res.fail_msg
2525 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2526 res_nodes[node] = msg
2529 lvs = node_res.payload
2530 for lv_name, (_, _, lv_online) in lvs.items():
2531 inst = nv_dict.pop((node, lv_name), None)
2532 if (not lv_online and inst is not None
2533 and inst.name not in res_instances):
2534 res_instances.append(inst.name)
2536 # any leftover items in nv_dict are missing LVs, let's arrange the
2538 for key, inst in nv_dict.iteritems():
2539 if inst.name not in res_missing:
2540 res_missing[inst.name] = []
2541 res_missing[inst.name].append(key)
2546 class LUClusterRepairDiskSizes(NoHooksLU):
2547 """Verifies the cluster disks sizes.
2552 def ExpandNames(self):
2553 if self.op.instances:
2554 self.wanted_names = []
2555 for name in self.op.instances:
2556 full_name = _ExpandInstanceName(self.cfg, name)
2557 self.wanted_names.append(full_name)
2558 self.needed_locks = {
2559 locking.LEVEL_NODE: [],
2560 locking.LEVEL_INSTANCE: self.wanted_names,
2562 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2564 self.wanted_names = None
2565 self.needed_locks = {
2566 locking.LEVEL_NODE: locking.ALL_SET,
2567 locking.LEVEL_INSTANCE: locking.ALL_SET,
2569 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2571 def DeclareLocks(self, level):
2572 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2573 self._LockInstancesNodes(primary_only=True)
2575 def CheckPrereq(self):
2576 """Check prerequisites.
2578 This only checks the optional instance list against the existing names.
2581 if self.wanted_names is None:
2582 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2584 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2585 in self.wanted_names]
2587 def _EnsureChildSizes(self, disk):
2588 """Ensure children of the disk have the needed disk size.
2590 This is valid mainly for DRBD8 and fixes an issue where the
2591 children have smaller disk size.
2593 @param disk: an L{ganeti.objects.Disk} object
2596 if disk.dev_type == constants.LD_DRBD8:
2597 assert disk.children, "Empty children for DRBD8?"
2598 fchild = disk.children[0]
2599 mismatch = fchild.size < disk.size
2601 self.LogInfo("Child disk has size %d, parent %d, fixing",
2602 fchild.size, disk.size)
2603 fchild.size = disk.size
2605 # and we recurse on this child only, not on the metadev
2606 return self._EnsureChildSizes(fchild) or mismatch
2610 def Exec(self, feedback_fn):
2611 """Verify the size of cluster disks.
2614 # TODO: check child disks too
2615 # TODO: check differences in size between primary/secondary nodes
2617 for instance in self.wanted_instances:
2618 pnode = instance.primary_node
2619 if pnode not in per_node_disks:
2620 per_node_disks[pnode] = []
2621 for idx, disk in enumerate(instance.disks):
2622 per_node_disks[pnode].append((instance, idx, disk))
2625 for node, dskl in per_node_disks.items():
2626 newl = [v[2].Copy() for v in dskl]
2628 self.cfg.SetDiskID(dsk, node)
2629 result = self.rpc.call_blockdev_getsize(node, newl)
2631 self.LogWarning("Failure in blockdev_getsize call to node"
2632 " %s, ignoring", node)
2634 if len(result.payload) != len(dskl):
2635 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2636 " result.payload=%s", node, len(dskl), result.payload)
2637 self.LogWarning("Invalid result from node %s, ignoring node results",
2640 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2642 self.LogWarning("Disk %d of instance %s did not return size"
2643 " information, ignoring", idx, instance.name)
2645 if not isinstance(size, (int, long)):
2646 self.LogWarning("Disk %d of instance %s did not return valid"
2647 " size information, ignoring", idx, instance.name)
2650 if size != disk.size:
2651 self.LogInfo("Disk %d of instance %s has mismatched size,"
2652 " correcting: recorded %d, actual %d", idx,
2653 instance.name, disk.size, size)
2655 self.cfg.Update(instance, feedback_fn)
2656 changed.append((instance.name, idx, size))
2657 if self._EnsureChildSizes(disk):
2658 self.cfg.Update(instance, feedback_fn)
2659 changed.append((instance.name, idx, disk.size))
2663 class LUClusterRename(LogicalUnit):
2664 """Rename the cluster.
2667 HPATH = "cluster-rename"
2668 HTYPE = constants.HTYPE_CLUSTER
2670 def BuildHooksEnv(self):
2675 "OP_TARGET": self.cfg.GetClusterName(),
2676 "NEW_NAME": self.op.name,
2678 mn = self.cfg.GetMasterNode()
2679 all_nodes = self.cfg.GetNodeList()
2680 return env, [mn], all_nodes
2682 def CheckPrereq(self):
2683 """Verify that the passed name is a valid one.
2686 hostname = netutils.GetHostname(name=self.op.name,
2687 family=self.cfg.GetPrimaryIPFamily())
2689 new_name = hostname.name
2690 self.ip = new_ip = hostname.ip
2691 old_name = self.cfg.GetClusterName()
2692 old_ip = self.cfg.GetMasterIP()
2693 if new_name == old_name and new_ip == old_ip:
2694 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2695 " cluster has changed",
2697 if new_ip != old_ip:
2698 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2699 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2700 " reachable on the network" %
2701 new_ip, errors.ECODE_NOTUNIQUE)
2703 self.op.name = new_name
2705 def Exec(self, feedback_fn):
2706 """Rename the cluster.
2709 clustername = self.op.name
2712 # shutdown the master IP
2713 master = self.cfg.GetMasterNode()
2714 result = self.rpc.call_node_stop_master(master, False)
2715 result.Raise("Could not disable the master role")
2718 cluster = self.cfg.GetClusterInfo()
2719 cluster.cluster_name = clustername
2720 cluster.master_ip = ip
2721 self.cfg.Update(cluster, feedback_fn)
2723 # update the known hosts file
2724 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2725 node_list = self.cfg.GetOnlineNodeList()
2727 node_list.remove(master)
2730 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2732 result = self.rpc.call_node_start_master(master, False, False)
2733 msg = result.fail_msg
2735 self.LogWarning("Could not re-enable the master role on"
2736 " the master, please restart manually: %s", msg)
2741 class LUClusterSetParams(LogicalUnit):
2742 """Change the parameters of the cluster.
2745 HPATH = "cluster-modify"
2746 HTYPE = constants.HTYPE_CLUSTER
2749 def CheckArguments(self):
2753 if self.op.uid_pool:
2754 uidpool.CheckUidPool(self.op.uid_pool)
2756 if self.op.add_uids:
2757 uidpool.CheckUidPool(self.op.add_uids)
2759 if self.op.remove_uids:
2760 uidpool.CheckUidPool(self.op.remove_uids)
2762 def ExpandNames(self):
2763 # FIXME: in the future maybe other cluster params won't require checking on
2764 # all nodes to be modified.
2765 self.needed_locks = {
2766 locking.LEVEL_NODE: locking.ALL_SET,
2768 self.share_locks[locking.LEVEL_NODE] = 1
2770 def BuildHooksEnv(self):
2775 "OP_TARGET": self.cfg.GetClusterName(),
2776 "NEW_VG_NAME": self.op.vg_name,
2778 mn = self.cfg.GetMasterNode()
2779 return env, [mn], [mn]
2781 def CheckPrereq(self):
2782 """Check prerequisites.
2784 This checks whether the given params don't conflict and
2785 if the given volume group is valid.
2788 if self.op.vg_name is not None and not self.op.vg_name:
2789 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2790 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2791 " instances exist", errors.ECODE_INVAL)
2793 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2794 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2795 raise errors.OpPrereqError("Cannot disable drbd helper while"
2796 " drbd-based instances exist",
2799 node_list = self.acquired_locks[locking.LEVEL_NODE]
2801 # if vg_name not None, checks given volume group on all nodes
2803 vglist = self.rpc.call_vg_list(node_list)
2804 for node in node_list:
2805 msg = vglist[node].fail_msg
2807 # ignoring down node
2808 self.LogWarning("Error while gathering data on node %s"
2809 " (ignoring node): %s", node, msg)
2811 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2813 constants.MIN_VG_SIZE)
2815 raise errors.OpPrereqError("Error on node '%s': %s" %
2816 (node, vgstatus), errors.ECODE_ENVIRON)
2818 if self.op.drbd_helper:
2819 # checks given drbd helper on all nodes
2820 helpers = self.rpc.call_drbd_helper(node_list)
2821 for node in node_list:
2822 ninfo = self.cfg.GetNodeInfo(node)
2824 self.LogInfo("Not checking drbd helper on offline node %s", node)
2826 msg = helpers[node].fail_msg
2828 raise errors.OpPrereqError("Error checking drbd helper on node"
2829 " '%s': %s" % (node, msg),
2830 errors.ECODE_ENVIRON)
2831 node_helper = helpers[node].payload
2832 if node_helper != self.op.drbd_helper:
2833 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2834 (node, node_helper), errors.ECODE_ENVIRON)
2836 self.cluster = cluster = self.cfg.GetClusterInfo()
2837 # validate params changes
2838 if self.op.beparams:
2839 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2840 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2842 if self.op.ndparams:
2843 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2844 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2846 # TODO: we need a more general way to handle resetting
2847 # cluster-level parameters to default values
2848 if self.new_ndparams["oob_program"] == "":
2849 self.new_ndparams["oob_program"] = \
2850 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2852 if self.op.nicparams:
2853 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2854 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2855 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2858 # check all instances for consistency
2859 for instance in self.cfg.GetAllInstancesInfo().values():
2860 for nic_idx, nic in enumerate(instance.nics):
2861 params_copy = copy.deepcopy(nic.nicparams)
2862 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2864 # check parameter syntax
2866 objects.NIC.CheckParameterSyntax(params_filled)
2867 except errors.ConfigurationError, err:
2868 nic_errors.append("Instance %s, nic/%d: %s" %
2869 (instance.name, nic_idx, err))
2871 # if we're moving instances to routed, check that they have an ip
2872 target_mode = params_filled[constants.NIC_MODE]
2873 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2874 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
2875 " address" % (instance.name, nic_idx))
2877 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2878 "\n".join(nic_errors))
2880 # hypervisor list/parameters
2881 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2882 if self.op.hvparams:
2883 for hv_name, hv_dict in self.op.hvparams.items():
2884 if hv_name not in self.new_hvparams:
2885 self.new_hvparams[hv_name] = hv_dict
2887 self.new_hvparams[hv_name].update(hv_dict)
2889 # os hypervisor parameters
2890 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2892 for os_name, hvs in self.op.os_hvp.items():
2893 if os_name not in self.new_os_hvp:
2894 self.new_os_hvp[os_name] = hvs
2896 for hv_name, hv_dict in hvs.items():
2897 if hv_name not in self.new_os_hvp[os_name]:
2898 self.new_os_hvp[os_name][hv_name] = hv_dict
2900 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2903 self.new_osp = objects.FillDict(cluster.osparams, {})
2904 if self.op.osparams:
2905 for os_name, osp in self.op.osparams.items():
2906 if os_name not in self.new_osp:
2907 self.new_osp[os_name] = {}
2909 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2912 if not self.new_osp[os_name]:
2913 # we removed all parameters
2914 del self.new_osp[os_name]
2916 # check the parameter validity (remote check)
2917 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2918 os_name, self.new_osp[os_name])
2920 # changes to the hypervisor list
2921 if self.op.enabled_hypervisors is not None:
2922 self.hv_list = self.op.enabled_hypervisors
2923 for hv in self.hv_list:
2924 # if the hypervisor doesn't already exist in the cluster
2925 # hvparams, we initialize it to empty, and then (in both
2926 # cases) we make sure to fill the defaults, as we might not
2927 # have a complete defaults list if the hypervisor wasn't
2929 if hv not in new_hvp:
2931 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2932 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2934 self.hv_list = cluster.enabled_hypervisors
2936 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2937 # either the enabled list has changed, or the parameters have, validate
2938 for hv_name, hv_params in self.new_hvparams.items():
2939 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2940 (self.op.enabled_hypervisors and
2941 hv_name in self.op.enabled_hypervisors)):
2942 # either this is a new hypervisor, or its parameters have changed
2943 hv_class = hypervisor.GetHypervisor(hv_name)
2944 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2945 hv_class.CheckParameterSyntax(hv_params)
2946 _CheckHVParams(self, node_list, hv_name, hv_params)
2949 # no need to check any newly-enabled hypervisors, since the
2950 # defaults have already been checked in the above code-block
2951 for os_name, os_hvp in self.new_os_hvp.items():
2952 for hv_name, hv_params in os_hvp.items():
2953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2954 # we need to fill in the new os_hvp on top of the actual hv_p
2955 cluster_defaults = self.new_hvparams.get(hv_name, {})
2956 new_osp = objects.FillDict(cluster_defaults, hv_params)
2957 hv_class = hypervisor.GetHypervisor(hv_name)
2958 hv_class.CheckParameterSyntax(new_osp)
2959 _CheckHVParams(self, node_list, hv_name, new_osp)
2961 if self.op.default_iallocator:
2962 alloc_script = utils.FindFile(self.op.default_iallocator,
2963 constants.IALLOCATOR_SEARCH_PATH,
2965 if alloc_script is None:
2966 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2967 " specified" % self.op.default_iallocator,
2970 def Exec(self, feedback_fn):
2971 """Change the parameters of the cluster.
2974 if self.op.vg_name is not None:
2975 new_volume = self.op.vg_name
2978 if new_volume != self.cfg.GetVGName():
2979 self.cfg.SetVGName(new_volume)
2981 feedback_fn("Cluster LVM configuration already in desired"
2982 " state, not changing")
2983 if self.op.drbd_helper is not None:
2984 new_helper = self.op.drbd_helper
2987 if new_helper != self.cfg.GetDRBDHelper():
2988 self.cfg.SetDRBDHelper(new_helper)
2990 feedback_fn("Cluster DRBD helper already in desired state,"
2992 if self.op.hvparams:
2993 self.cluster.hvparams = self.new_hvparams
2995 self.cluster.os_hvp = self.new_os_hvp
2996 if self.op.enabled_hypervisors is not None:
2997 self.cluster.hvparams = self.new_hvparams
2998 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2999 if self.op.beparams:
3000 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3001 if self.op.nicparams:
3002 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3003 if self.op.osparams:
3004 self.cluster.osparams = self.new_osp
3005 if self.op.ndparams:
3006 self.cluster.ndparams = self.new_ndparams
3008 if self.op.candidate_pool_size is not None:
3009 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3010 # we need to update the pool size here, otherwise the save will fail
3011 _AdjustCandidatePool(self, [])
3013 if self.op.maintain_node_health is not None:
3014 self.cluster.maintain_node_health = self.op.maintain_node_health
3016 if self.op.prealloc_wipe_disks is not None:
3017 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3019 if self.op.add_uids is not None:
3020 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3022 if self.op.remove_uids is not None:
3023 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3025 if self.op.uid_pool is not None:
3026 self.cluster.uid_pool = self.op.uid_pool
3028 if self.op.default_iallocator is not None:
3029 self.cluster.default_iallocator = self.op.default_iallocator
3031 if self.op.reserved_lvs is not None:
3032 self.cluster.reserved_lvs = self.op.reserved_lvs
3034 def helper_os(aname, mods, desc):
3036 lst = getattr(self.cluster, aname)
3037 for key, val in mods:
3038 if key == constants.DDM_ADD:
3040 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3043 elif key == constants.DDM_REMOVE:
3047 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3049 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3051 if self.op.hidden_os:
3052 helper_os("hidden_os", self.op.hidden_os, "hidden")
3054 if self.op.blacklisted_os:
3055 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3057 if self.op.master_netdev:
3058 master = self.cfg.GetMasterNode()
3059 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3060 self.cluster.master_netdev)
3061 result = self.rpc.call_node_stop_master(master, False)
3062 result.Raise("Could not disable the master ip")
3063 feedback_fn("Changing master_netdev from %s to %s" %
3064 (self.cluster.master_netdev, self.op.master_netdev))
3065 self.cluster.master_netdev = self.op.master_netdev
3067 self.cfg.Update(self.cluster, feedback_fn)
3069 if self.op.master_netdev:
3070 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3071 self.op.master_netdev)
3072 result = self.rpc.call_node_start_master(master, False, False)
3074 self.LogWarning("Could not re-enable the master ip on"
3075 " the master, please restart manually: %s",
3079 def _UploadHelper(lu, nodes, fname):
3080 """Helper for uploading a file and showing warnings.
3083 if os.path.exists(fname):
3084 result = lu.rpc.call_upload_file(nodes, fname)
3085 for to_node, to_result in result.items():
3086 msg = to_result.fail_msg
3088 msg = ("Copy of file %s to node %s failed: %s" %
3089 (fname, to_node, msg))
3090 lu.proc.LogWarning(msg)
3093 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3094 """Distribute additional files which are part of the cluster configuration.
3096 ConfigWriter takes care of distributing the config and ssconf files, but
3097 there are more files which should be distributed to all nodes. This function
3098 makes sure those are copied.
3100 @param lu: calling logical unit
3101 @param additional_nodes: list of nodes not in the config to distribute to
3102 @type additional_vm: boolean
3103 @param additional_vm: whether the additional nodes are vm-capable or not
3106 # 1. Gather target nodes
3107 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3108 dist_nodes = lu.cfg.GetOnlineNodeList()
3109 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3110 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3111 if additional_nodes is not None:
3112 dist_nodes.extend(additional_nodes)
3114 vm_nodes.extend(additional_nodes)
3115 if myself.name in dist_nodes:
3116 dist_nodes.remove(myself.name)
3117 if myself.name in vm_nodes:
3118 vm_nodes.remove(myself.name)
3120 # 2. Gather files to distribute
3121 dist_files = set([constants.ETC_HOSTS,
3122 constants.SSH_KNOWN_HOSTS_FILE,
3123 constants.RAPI_CERT_FILE,
3124 constants.RAPI_USERS_FILE,
3125 constants.CONFD_HMAC_KEY,
3126 constants.CLUSTER_DOMAIN_SECRET_FILE,
3130 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3131 for hv_name in enabled_hypervisors:
3132 hv_class = hypervisor.GetHypervisor(hv_name)
3133 vm_files.update(hv_class.GetAncillaryFiles())
3135 # 3. Perform the files upload
3136 for fname in dist_files:
3137 _UploadHelper(lu, dist_nodes, fname)
3138 for fname in vm_files:
3139 _UploadHelper(lu, vm_nodes, fname)
3142 class LUClusterRedistConf(NoHooksLU):
3143 """Force the redistribution of cluster configuration.
3145 This is a very simple LU.
3150 def ExpandNames(self):
3151 self.needed_locks = {
3152 locking.LEVEL_NODE: locking.ALL_SET,
3154 self.share_locks[locking.LEVEL_NODE] = 1
3156 def Exec(self, feedback_fn):
3157 """Redistribute the configuration.
3160 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3161 _RedistributeAncillaryFiles(self)
3164 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3165 """Sleep and poll for an instance's disk to sync.
3168 if not instance.disks or disks is not None and not disks:
3171 disks = _ExpandCheckDisks(instance, disks)
3174 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3176 node = instance.primary_node
3179 lu.cfg.SetDiskID(dev, node)
3181 # TODO: Convert to utils.Retry
3184 degr_retries = 10 # in seconds, as we sleep 1 second each time
3188 cumul_degraded = False
3189 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3190 msg = rstats.fail_msg
3192 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3195 raise errors.RemoteError("Can't contact node %s for mirror data,"
3196 " aborting." % node)
3199 rstats = rstats.payload
3201 for i, mstat in enumerate(rstats):
3203 lu.LogWarning("Can't compute data for node %s/%s",
3204 node, disks[i].iv_name)
3207 cumul_degraded = (cumul_degraded or
3208 (mstat.is_degraded and mstat.sync_percent is None))
3209 if mstat.sync_percent is not None:
3211 if mstat.estimated_time is not None:
3212 rem_time = ("%s remaining (estimated)" %
3213 utils.FormatSeconds(mstat.estimated_time))
3214 max_time = mstat.estimated_time
3216 rem_time = "no time estimate"
3217 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3218 (disks[i].iv_name, mstat.sync_percent, rem_time))
3220 # if we're done but degraded, let's do a few small retries, to
3221 # make sure we see a stable and not transient situation; therefore
3222 # we force restart of the loop
3223 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3224 logging.info("Degraded disks found, %d retries left", degr_retries)
3232 time.sleep(min(60, max_time))
3235 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3236 return not cumul_degraded
3239 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3240 """Check that mirrors are not degraded.
3242 The ldisk parameter, if True, will change the test from the
3243 is_degraded attribute (which represents overall non-ok status for
3244 the device(s)) to the ldisk (representing the local storage status).
3247 lu.cfg.SetDiskID(dev, node)
3251 if on_primary or dev.AssembleOnSecondary():
3252 rstats = lu.rpc.call_blockdev_find(node, dev)
3253 msg = rstats.fail_msg
3255 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3257 elif not rstats.payload:
3258 lu.LogWarning("Can't find disk on node %s", node)
3262 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3264 result = result and not rstats.payload.is_degraded
3267 for child in dev.children:
3268 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3273 class LUOobCommand(NoHooksLU):
3274 """Logical unit for OOB handling.
3279 def CheckPrereq(self):
3280 """Check prerequisites.
3283 - the node exists in the configuration
3286 Any errors are signaled by raising errors.OpPrereqError.
3290 for node_name in self.op.node_names:
3291 node = self.cfg.GetNodeInfo(node_name)
3294 raise errors.OpPrereqError("Node %s not found" % node_name,
3297 self.nodes.append(node)
3299 if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3300 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3301 " not marked offline") % node_name,
3304 def ExpandNames(self):
3305 """Gather locks we need.
3308 if self.op.node_names:
3309 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3310 for name in self.op.node_names]
3312 self.op.node_names = self.cfg.GetNodeList()
3314 self.needed_locks = {
3315 locking.LEVEL_NODE: self.op.node_names,
3318 def Exec(self, feedback_fn):
3319 """Execute OOB and return result if we expect any.
3322 master_node = self.cfg.GetMasterNode()
3325 for node in self.nodes:
3326 node_entry = [(constants.RS_NORMAL, node.name)]
3327 ret.append(node_entry)
3329 oob_program = _SupportsOob(self.cfg, node)
3332 node_entry.append((constants.RS_UNAVAIL, None))
3335 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3336 self.op.command, oob_program, node.name)
3337 result = self.rpc.call_run_oob(master_node, oob_program,
3338 self.op.command, node.name,
3342 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3343 node.name, result.fail_msg)
3344 node_entry.append((constants.RS_NODATA, None))
3347 self._CheckPayload(result)
3348 except errors.OpExecError, err:
3349 self.LogWarning("The payload returned by '%s' is not valid: %s",
3351 node_entry.append((constants.RS_NODATA, None))
3353 if self.op.command == constants.OOB_HEALTH:
3354 # For health we should log important events
3355 for item, status in result.payload:
3356 if status in [constants.OOB_STATUS_WARNING,
3357 constants.OOB_STATUS_CRITICAL]:
3358 self.LogWarning("On node '%s' item '%s' has status '%s'",
3359 node.name, item, status)
3361 if self.op.command == constants.OOB_POWER_ON:
3363 elif self.op.command == constants.OOB_POWER_OFF:
3364 node.powered = False
3365 elif self.op.command == constants.OOB_POWER_STATUS:
3366 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3367 if powered != node.powered:
3368 logging.warning(("Recorded power state (%s) of node '%s' does not"
3369 " match actual power state (%s)"), node.powered,
3372 # For configuration changing commands we should update the node
3373 if self.op.command in (constants.OOB_POWER_ON,
3374 constants.OOB_POWER_OFF):
3375 self.cfg.Update(node, feedback_fn)
3377 node_entry.append((constants.RS_NORMAL, result.payload))
3381 def _CheckPayload(self, result):
3382 """Checks if the payload is valid.
3384 @param result: RPC result
3385 @raises errors.OpExecError: If payload is not valid
3389 if self.op.command == constants.OOB_HEALTH:
3390 if not isinstance(result.payload, list):
3391 errs.append("command 'health' is expected to return a list but got %s" %
3392 type(result.payload))
3394 for item, status in result.payload:
3395 if status not in constants.OOB_STATUSES:
3396 errs.append("health item '%s' has invalid status '%s'" %
3399 if self.op.command == constants.OOB_POWER_STATUS:
3400 if not isinstance(result.payload, dict):
3401 errs.append("power-status is expected to return a dict but got %s" %
3402 type(result.payload))
3404 if self.op.command in [
3405 constants.OOB_POWER_ON,
3406 constants.OOB_POWER_OFF,
3407 constants.OOB_POWER_CYCLE,
3409 if result.payload is not None:
3410 errs.append("%s is expected to not return payload but got '%s'" %
3411 (self.op.command, result.payload))
3414 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3415 utils.CommaJoin(errs))
3419 class LUOsDiagnose(NoHooksLU):
3420 """Logical unit for OS diagnose/query.
3425 _BLK = "blacklisted"
3427 _FIELDS_STATIC = utils.FieldSet()
3428 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3429 "parameters", "api_versions", _HID, _BLK)
3431 def CheckArguments(self):
3433 raise errors.OpPrereqError("Selective OS query not supported",
3436 _CheckOutputFields(static=self._FIELDS_STATIC,
3437 dynamic=self._FIELDS_DYNAMIC,
3438 selected=self.op.output_fields)
3440 def ExpandNames(self):
3441 # Lock all nodes, in shared mode
3442 # Temporary removal of locks, should be reverted later
3443 # TODO: reintroduce locks when they are lighter-weight
3444 self.needed_locks = {}
3445 #self.share_locks[locking.LEVEL_NODE] = 1
3446 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3449 def _DiagnoseByOS(rlist):
3450 """Remaps a per-node return list into an a per-os per-node dictionary
3452 @param rlist: a map with node names as keys and OS objects as values
3455 @return: a dictionary with osnames as keys and as value another
3456 map, with nodes as keys and tuples of (path, status, diagnose,
3457 variants, parameters, api_versions) as values, eg::
3459 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3460 (/srv/..., False, "invalid api")],
3461 "node2": [(/srv/..., True, "", [], [])]}
3466 # we build here the list of nodes that didn't fail the RPC (at RPC
3467 # level), so that nodes with a non-responding node daemon don't
3468 # make all OSes invalid
3469 good_nodes = [node_name for node_name in rlist
3470 if not rlist[node_name].fail_msg]
3471 for node_name, nr in rlist.items():
3472 if nr.fail_msg or not nr.payload:
3474 for (name, path, status, diagnose, variants,
3475 params, api_versions) in nr.payload:
3476 if name not in all_os:
3477 # build a list of nodes for this os containing empty lists
3478 # for each node in node_list
3480 for nname in good_nodes:
3481 all_os[name][nname] = []
3482 # convert params from [name, help] to (name, help)
3483 params = [tuple(v) for v in params]
3484 all_os[name][node_name].append((path, status, diagnose,
3485 variants, params, api_versions))
3488 def Exec(self, feedback_fn):
3489 """Compute the list of OSes.
3492 valid_nodes = [node.name
3493 for node in self.cfg.GetAllNodesInfo().values()
3494 if not node.offline and node.vm_capable]
3495 node_data = self.rpc.call_os_diagnose(valid_nodes)
3496 pol = self._DiagnoseByOS(node_data)
3498 cluster = self.cfg.GetClusterInfo()
3500 for os_name in utils.NiceSort(pol.keys()):
3501 os_data = pol[os_name]
3504 (variants, params, api_versions) = null_state = (set(), set(), set())
3505 for idx, osl in enumerate(os_data.values()):
3506 valid = bool(valid and osl and osl[0][1])
3508 (variants, params, api_versions) = null_state
3510 node_variants, node_params, node_api = osl[0][3:6]
3511 if idx == 0: # first entry
3512 variants = set(node_variants)
3513 params = set(node_params)
3514 api_versions = set(node_api)
3515 else: # keep consistency
3516 variants.intersection_update(node_variants)
3517 params.intersection_update(node_params)
3518 api_versions.intersection_update(node_api)
3520 is_hid = os_name in cluster.hidden_os
3521 is_blk = os_name in cluster.blacklisted_os
3522 if ((self._HID not in self.op.output_fields and is_hid) or
3523 (self._BLK not in self.op.output_fields and is_blk) or
3524 (self._VLD not in self.op.output_fields and not valid)):
3527 for field in self.op.output_fields:
3530 elif field == self._VLD:
3532 elif field == "node_status":
3533 # this is just a copy of the dict
3535 for node_name, nos_list in os_data.items():
3536 val[node_name] = nos_list
3537 elif field == "variants":
3538 val = utils.NiceSort(list(variants))
3539 elif field == "parameters":
3541 elif field == "api_versions":
3542 val = list(api_versions)
3543 elif field == self._HID:
3545 elif field == self._BLK:
3548 raise errors.ParameterError(field)
3555 class LUNodeRemove(LogicalUnit):
3556 """Logical unit for removing a node.
3559 HPATH = "node-remove"
3560 HTYPE = constants.HTYPE_NODE
3562 def BuildHooksEnv(self):
3565 This doesn't run on the target node in the pre phase as a failed
3566 node would then be impossible to remove.
3570 "OP_TARGET": self.op.node_name,
3571 "NODE_NAME": self.op.node_name,
3573 all_nodes = self.cfg.GetNodeList()
3575 all_nodes.remove(self.op.node_name)
3577 logging.warning("Node %s which is about to be removed not found"
3578 " in the all nodes list", self.op.node_name)
3579 return env, all_nodes, all_nodes
3581 def CheckPrereq(self):
3582 """Check prerequisites.
3585 - the node exists in the configuration
3586 - it does not have primary or secondary instances
3587 - it's not the master
3589 Any errors are signaled by raising errors.OpPrereqError.
3592 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3593 node = self.cfg.GetNodeInfo(self.op.node_name)
3594 assert node is not None
3596 instance_list = self.cfg.GetInstanceList()
3598 masternode = self.cfg.GetMasterNode()
3599 if node.name == masternode:
3600 raise errors.OpPrereqError("Node is the master node,"
3601 " you need to failover first.",
3604 for instance_name in instance_list:
3605 instance = self.cfg.GetInstanceInfo(instance_name)
3606 if node.name in instance.all_nodes:
3607 raise errors.OpPrereqError("Instance %s is still running on the node,"
3608 " please remove first." % instance_name,
3610 self.op.node_name = node.name
3613 def Exec(self, feedback_fn):
3614 """Removes the node from the cluster.
3618 logging.info("Stopping the node daemon and removing configs from node %s",
3621 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3623 # Promote nodes to master candidate as needed
3624 _AdjustCandidatePool(self, exceptions=[node.name])
3625 self.context.RemoveNode(node.name)
3627 # Run post hooks on the node before it's removed
3628 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3630 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3632 # pylint: disable-msg=W0702
3633 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3635 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3636 msg = result.fail_msg
3638 self.LogWarning("Errors encountered on the remote node while leaving"
3639 " the cluster: %s", msg)
3641 # Remove node from our /etc/hosts
3642 if self.cfg.GetClusterInfo().modify_etc_hosts:
3643 master_node = self.cfg.GetMasterNode()
3644 result = self.rpc.call_etc_hosts_modify(master_node,
3645 constants.ETC_HOSTS_REMOVE,
3647 result.Raise("Can't update hosts file with new host data")
3648 _RedistributeAncillaryFiles(self)
3651 class _NodeQuery(_QueryBase):
3652 FIELDS = query.NODE_FIELDS
3654 def ExpandNames(self, lu):
3655 lu.needed_locks = {}
3656 lu.share_locks[locking.LEVEL_NODE] = 1
3659 self.wanted = _GetWantedNodes(lu, self.names)
3661 self.wanted = locking.ALL_SET
3663 self.do_locking = (self.use_locking and
3664 query.NQ_LIVE in self.requested_data)
3667 # if we don't request only static fields, we need to lock the nodes
3668 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3670 def DeclareLocks(self, lu, level):
3673 def _GetQueryData(self, lu):
3674 """Computes the list of nodes and their attributes.
3677 all_info = lu.cfg.GetAllNodesInfo()
3679 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3681 # Gather data as requested
3682 if query.NQ_LIVE in self.requested_data:
3683 # filter out non-vm_capable nodes
3684 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3686 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3687 lu.cfg.GetHypervisorType())
3688 live_data = dict((name, nresult.payload)
3689 for (name, nresult) in node_data.items()
3690 if not nresult.fail_msg and nresult.payload)
3694 if query.NQ_INST in self.requested_data:
3695 node_to_primary = dict([(name, set()) for name in nodenames])
3696 node_to_secondary = dict([(name, set()) for name in nodenames])
3698 inst_data = lu.cfg.GetAllInstancesInfo()
3700 for inst in inst_data.values():
3701 if inst.primary_node in node_to_primary:
3702 node_to_primary[inst.primary_node].add(inst.name)
3703 for secnode in inst.secondary_nodes:
3704 if secnode in node_to_secondary:
3705 node_to_secondary[secnode].add(inst.name)
3707 node_to_primary = None
3708 node_to_secondary = None
3710 if query.NQ_OOB in self.requested_data:
3711 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3712 for name, node in all_info.iteritems())
3716 if query.NQ_GROUP in self.requested_data:
3717 groups = lu.cfg.GetAllNodeGroupsInfo()
3721 return query.NodeQueryData([all_info[name] for name in nodenames],
3722 live_data, lu.cfg.GetMasterNode(),
3723 node_to_primary, node_to_secondary, groups,
3724 oob_support, lu.cfg.GetClusterInfo())
3727 class LUNodeQuery(NoHooksLU):
3728 """Logical unit for querying nodes.
3731 # pylint: disable-msg=W0142
3734 def CheckArguments(self):
3735 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3736 self.op.use_locking)
3738 def ExpandNames(self):
3739 self.nq.ExpandNames(self)
3741 def Exec(self, feedback_fn):
3742 return self.nq.OldStyleQuery(self)
3745 class LUNodeQueryvols(NoHooksLU):
3746 """Logical unit for getting volumes on node(s).
3750 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3751 _FIELDS_STATIC = utils.FieldSet("node")
3753 def CheckArguments(self):
3754 _CheckOutputFields(static=self._FIELDS_STATIC,
3755 dynamic=self._FIELDS_DYNAMIC,
3756 selected=self.op.output_fields)
3758 def ExpandNames(self):
3759 self.needed_locks = {}
3760 self.share_locks[locking.LEVEL_NODE] = 1
3761 if not self.op.nodes:
3762 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3764 self.needed_locks[locking.LEVEL_NODE] = \
3765 _GetWantedNodes(self, self.op.nodes)
3767 def Exec(self, feedback_fn):
3768 """Computes the list of nodes and their attributes.
3771 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3772 volumes = self.rpc.call_node_volumes(nodenames)
3774 ilist = self.cfg.GetAllInstancesInfo()
3776 vol2inst = dict(((node, vol), inst.name)
3777 for inst in ilist.values()
3778 for (node, vols) in inst.MapLVsByNode().items()
3782 for node in nodenames:
3783 nresult = volumes[node]
3786 msg = nresult.fail_msg
3788 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3791 node_vols = sorted(nresult.payload,
3792 key=operator.itemgetter("dev"))
3794 for vol in node_vols:
3796 for field in self.op.output_fields:
3799 elif field == "phys":
3803 elif field == "name":
3805 elif field == "size":
3806 val = int(float(vol['size']))
3807 elif field == "instance":
3808 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
3810 raise errors.ParameterError(field)
3811 node_output.append(str(val))
3813 output.append(node_output)
3818 class LUNodeQueryStorage(NoHooksLU):
3819 """Logical unit for getting information on storage units on node(s).
3822 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3825 def CheckArguments(self):
3826 _CheckOutputFields(static=self._FIELDS_STATIC,
3827 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3828 selected=self.op.output_fields)
3830 def ExpandNames(self):
3831 self.needed_locks = {}
3832 self.share_locks[locking.LEVEL_NODE] = 1
3835 self.needed_locks[locking.LEVEL_NODE] = \
3836 _GetWantedNodes(self, self.op.nodes)
3838 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3840 def Exec(self, feedback_fn):
3841 """Computes the list of nodes and their attributes.
3844 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3846 # Always get name to sort by
3847 if constants.SF_NAME in self.op.output_fields:
3848 fields = self.op.output_fields[:]
3850 fields = [constants.SF_NAME] + self.op.output_fields
3852 # Never ask for node or type as it's only known to the LU
3853 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3854 while extra in fields:
3855 fields.remove(extra)
3857 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3858 name_idx = field_idx[constants.SF_NAME]
3860 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3861 data = self.rpc.call_storage_list(self.nodes,
3862 self.op.storage_type, st_args,
3863 self.op.name, fields)
3867 for node in utils.NiceSort(self.nodes):
3868 nresult = data[node]
3872 msg = nresult.fail_msg
3874 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3877 rows = dict([(row[name_idx], row) for row in nresult.payload])
3879 for name in utils.NiceSort(rows.keys()):
3884 for field in self.op.output_fields:
3885 if field == constants.SF_NODE:
3887 elif field == constants.SF_TYPE:
3888 val = self.op.storage_type
3889 elif field in field_idx:
3890 val = row[field_idx[field]]
3892 raise errors.ParameterError(field)
3901 class _InstanceQuery(_QueryBase):
3902 FIELDS = query.INSTANCE_FIELDS
3904 def ExpandNames(self, lu):
3905 lu.needed_locks = {}
3906 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3907 lu.share_locks[locking.LEVEL_NODE] = 1
3910 self.wanted = _GetWantedInstances(lu, self.names)
3912 self.wanted = locking.ALL_SET
3914 self.do_locking = (self.use_locking and
3915 query.IQ_LIVE in self.requested_data)
3917 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3918 lu.needed_locks[locking.LEVEL_NODE] = []
3919 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3921 def DeclareLocks(self, lu, level):
3922 if level == locking.LEVEL_NODE and self.do_locking:
3923 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3925 def _GetQueryData(self, lu):
3926 """Computes the list of instances and their attributes.
3929 cluster = lu.cfg.GetClusterInfo()
3930 all_info = lu.cfg.GetAllInstancesInfo()
3932 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3934 instance_list = [all_info[name] for name in instance_names]
3935 nodes = frozenset(itertools.chain(*(inst.all_nodes
3936 for inst in instance_list)))
3937 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3940 wrongnode_inst = set()
3942 # Gather data as requested
3943 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3945 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3947 result = node_data[name]
3949 # offline nodes will be in both lists
3950 assert result.fail_msg
3951 offline_nodes.append(name)
3953 bad_nodes.append(name)
3954 elif result.payload:
3955 for inst in result.payload:
3956 if inst in all_info:
3957 if all_info[inst].primary_node == name:
3958 live_data.update(result.payload)
3960 wrongnode_inst.add(inst)
3962 # orphan instance; we don't list it here as we don't
3963 # handle this case yet in the output of instance listing
3964 logging.warning("Orphan instance '%s' found on node %s",
3966 # else no instance is alive
3970 if query.IQ_DISKUSAGE in self.requested_data:
3971 disk_usage = dict((inst.name,
3972 _ComputeDiskSize(inst.disk_template,
3973 [{"size": disk.size}
3974 for disk in inst.disks]))
3975 for inst in instance_list)
3979 if query.IQ_CONSOLE in self.requested_data:
3981 for inst in instance_list:
3982 if inst.name in live_data:
3983 # Instance is running
3984 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3986 consinfo[inst.name] = None
3987 assert set(consinfo.keys()) == set(instance_names)
3991 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3992 disk_usage, offline_nodes, bad_nodes,
3993 live_data, wrongnode_inst, consinfo)
3996 class LUQuery(NoHooksLU):
3997 """Query for resources/items of a certain kind.
4000 # pylint: disable-msg=W0142
4003 def CheckArguments(self):
4004 qcls = _GetQueryImplementation(self.op.what)
4005 names = qlang.ReadSimpleFilter("name", self.op.filter)
4007 self.impl = qcls(names, self.op.fields, False)
4009 def ExpandNames(self):
4010 self.impl.ExpandNames(self)
4012 def DeclareLocks(self, level):
4013 self.impl.DeclareLocks(self, level)
4015 def Exec(self, feedback_fn):
4016 return self.impl.NewStyleQuery(self)
4019 class LUQueryFields(NoHooksLU):
4020 """Query for resources/items of a certain kind.
4023 # pylint: disable-msg=W0142
4026 def CheckArguments(self):
4027 self.qcls = _GetQueryImplementation(self.op.what)
4029 def ExpandNames(self):
4030 self.needed_locks = {}
4032 def Exec(self, feedback_fn):
4033 return self.qcls.FieldsQuery(self.op.fields)
4036 class LUNodeModifyStorage(NoHooksLU):
4037 """Logical unit for modifying a storage volume on a node.
4042 def CheckArguments(self):
4043 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4045 storage_type = self.op.storage_type
4048 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4050 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4051 " modified" % storage_type,
4054 diff = set(self.op.changes.keys()) - modifiable
4056 raise errors.OpPrereqError("The following fields can not be modified for"
4057 " storage units of type '%s': %r" %
4058 (storage_type, list(diff)),
4061 def ExpandNames(self):
4062 self.needed_locks = {
4063 locking.LEVEL_NODE: self.op.node_name,
4066 def Exec(self, feedback_fn):
4067 """Computes the list of nodes and their attributes.
4070 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4071 result = self.rpc.call_storage_modify(self.op.node_name,
4072 self.op.storage_type, st_args,
4073 self.op.name, self.op.changes)
4074 result.Raise("Failed to modify storage unit '%s' on %s" %
4075 (self.op.name, self.op.node_name))
4078 class LUNodeAdd(LogicalUnit):
4079 """Logical unit for adding node to the cluster.
4083 HTYPE = constants.HTYPE_NODE
4084 _NFLAGS = ["master_capable", "vm_capable"]
4086 def CheckArguments(self):
4087 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4088 # validate/normalize the node name
4089 self.hostname = netutils.GetHostname(name=self.op.node_name,
4090 family=self.primary_ip_family)
4091 self.op.node_name = self.hostname.name
4093 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4094 raise errors.OpPrereqError("Cannot readd the master node",
4097 if self.op.readd and self.op.group:
4098 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4099 " being readded", errors.ECODE_INVAL)
4101 def BuildHooksEnv(self):
4104 This will run on all nodes before, and on all nodes + the new node after.
4108 "OP_TARGET": self.op.node_name,
4109 "NODE_NAME": self.op.node_name,
4110 "NODE_PIP": self.op.primary_ip,
4111 "NODE_SIP": self.op.secondary_ip,
4112 "MASTER_CAPABLE": str(self.op.master_capable),
4113 "VM_CAPABLE": str(self.op.vm_capable),
4115 nodes_0 = self.cfg.GetNodeList()
4116 nodes_1 = nodes_0 + [self.op.node_name, ]
4117 return env, nodes_0, nodes_1
4119 def CheckPrereq(self):
4120 """Check prerequisites.
4123 - the new node is not already in the config
4125 - its parameters (single/dual homed) matches the cluster
4127 Any errors are signaled by raising errors.OpPrereqError.
4131 hostname = self.hostname
4132 node = hostname.name
4133 primary_ip = self.op.primary_ip = hostname.ip
4134 if self.op.secondary_ip is None:
4135 if self.primary_ip_family == netutils.IP6Address.family:
4136 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4137 " IPv4 address must be given as secondary",
4139 self.op.secondary_ip = primary_ip
4141 secondary_ip = self.op.secondary_ip
4142 if not netutils.IP4Address.IsValid(secondary_ip):
4143 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4144 " address" % secondary_ip, errors.ECODE_INVAL)
4146 node_list = cfg.GetNodeList()
4147 if not self.op.readd and node in node_list:
4148 raise errors.OpPrereqError("Node %s is already in the configuration" %
4149 node, errors.ECODE_EXISTS)
4150 elif self.op.readd and node not in node_list:
4151 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4154 self.changed_primary_ip = False
4156 for existing_node_name in node_list:
4157 existing_node = cfg.GetNodeInfo(existing_node_name)
4159 if self.op.readd and node == existing_node_name:
4160 if existing_node.secondary_ip != secondary_ip:
4161 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4162 " address configuration as before",
4164 if existing_node.primary_ip != primary_ip:
4165 self.changed_primary_ip = True
4169 if (existing_node.primary_ip == primary_ip or
4170 existing_node.secondary_ip == primary_ip or
4171 existing_node.primary_ip == secondary_ip or
4172 existing_node.secondary_ip == secondary_ip):
4173 raise errors.OpPrereqError("New node ip address(es) conflict with"
4174 " existing node %s" % existing_node.name,
4175 errors.ECODE_NOTUNIQUE)
4177 # After this 'if' block, None is no longer a valid value for the
4178 # _capable op attributes
4180 old_node = self.cfg.GetNodeInfo(node)
4181 assert old_node is not None, "Can't retrieve locked node %s" % node
4182 for attr in self._NFLAGS:
4183 if getattr(self.op, attr) is None:
4184 setattr(self.op, attr, getattr(old_node, attr))
4186 for attr in self._NFLAGS:
4187 if getattr(self.op, attr) is None:
4188 setattr(self.op, attr, True)
4190 if self.op.readd and not self.op.vm_capable:
4191 pri, sec = cfg.GetNodeInstances(node)
4193 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4194 " flag set to false, but it already holds"
4195 " instances" % node,
4198 # check that the type of the node (single versus dual homed) is the
4199 # same as for the master
4200 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4201 master_singlehomed = myself.secondary_ip == myself.primary_ip
4202 newbie_singlehomed = secondary_ip == primary_ip
4203 if master_singlehomed != newbie_singlehomed:
4204 if master_singlehomed:
4205 raise errors.OpPrereqError("The master has no secondary ip but the"
4206 " new node has one",
4209 raise errors.OpPrereqError("The master has a secondary ip but the"
4210 " new node doesn't have one",
4213 # checks reachability
4214 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4215 raise errors.OpPrereqError("Node not reachable by ping",
4216 errors.ECODE_ENVIRON)
4218 if not newbie_singlehomed:
4219 # check reachability from my secondary ip to newbie's secondary ip
4220 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4221 source=myself.secondary_ip):
4222 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4223 " based ping to node daemon port",
4224 errors.ECODE_ENVIRON)
4231 if self.op.master_capable:
4232 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4234 self.master_candidate = False
4237 self.new_node = old_node
4239 node_group = cfg.LookupNodeGroup(self.op.group)
4240 self.new_node = objects.Node(name=node,
4241 primary_ip=primary_ip,
4242 secondary_ip=secondary_ip,
4243 master_candidate=self.master_candidate,
4244 offline=False, drained=False,
4247 if self.op.ndparams:
4248 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4250 def Exec(self, feedback_fn):
4251 """Adds the new node to the cluster.
4254 new_node = self.new_node
4255 node = new_node.name
4257 # We adding a new node so we assume it's powered
4258 new_node.powered = True
4260 # for re-adds, reset the offline/drained/master-candidate flags;
4261 # we need to reset here, otherwise offline would prevent RPC calls
4262 # later in the procedure; this also means that if the re-add
4263 # fails, we are left with a non-offlined, broken node
4265 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4266 self.LogInfo("Readding a node, the offline/drained flags were reset")
4267 # if we demote the node, we do cleanup later in the procedure
4268 new_node.master_candidate = self.master_candidate
4269 if self.changed_primary_ip:
4270 new_node.primary_ip = self.op.primary_ip
4272 # copy the master/vm_capable flags
4273 for attr in self._NFLAGS:
4274 setattr(new_node, attr, getattr(self.op, attr))
4276 # notify the user about any possible mc promotion
4277 if new_node.master_candidate:
4278 self.LogInfo("Node will be a master candidate")
4280 if self.op.ndparams:
4281 new_node.ndparams = self.op.ndparams
4283 new_node.ndparams = {}
4285 # check connectivity
4286 result = self.rpc.call_version([node])[node]
4287 result.Raise("Can't get version information from node %s" % node)
4288 if constants.PROTOCOL_VERSION == result.payload:
4289 logging.info("Communication to node %s fine, sw version %s match",
4290 node, result.payload)
4292 raise errors.OpExecError("Version mismatch master version %s,"
4293 " node version %s" %
4294 (constants.PROTOCOL_VERSION, result.payload))
4296 # Add node to our /etc/hosts, and add key to known_hosts
4297 if self.cfg.GetClusterInfo().modify_etc_hosts:
4298 master_node = self.cfg.GetMasterNode()
4299 result = self.rpc.call_etc_hosts_modify(master_node,
4300 constants.ETC_HOSTS_ADD,
4303 result.Raise("Can't update hosts file with new host data")
4305 if new_node.secondary_ip != new_node.primary_ip:
4306 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4309 node_verify_list = [self.cfg.GetMasterNode()]
4310 node_verify_param = {
4311 constants.NV_NODELIST: [node],
4312 # TODO: do a node-net-test as well?
4315 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4316 self.cfg.GetClusterName())
4317 for verifier in node_verify_list:
4318 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4319 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4321 for failed in nl_payload:
4322 feedback_fn("ssh/hostname verification failed"
4323 " (checking from %s): %s" %
4324 (verifier, nl_payload[failed]))
4325 raise errors.OpExecError("ssh/hostname verification failed")
4328 _RedistributeAncillaryFiles(self)
4329 self.context.ReaddNode(new_node)
4330 # make sure we redistribute the config
4331 self.cfg.Update(new_node, feedback_fn)
4332 # and make sure the new node will not have old files around
4333 if not new_node.master_candidate:
4334 result = self.rpc.call_node_demote_from_mc(new_node.name)
4335 msg = result.fail_msg
4337 self.LogWarning("Node failed to demote itself from master"
4338 " candidate status: %s" % msg)
4340 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4341 additional_vm=self.op.vm_capable)
4342 self.context.AddNode(new_node, self.proc.GetECId())
4345 class LUNodeSetParams(LogicalUnit):
4346 """Modifies the parameters of a node.
4348 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4349 to the node role (as _ROLE_*)
4350 @cvar _R2F: a dictionary from node role to tuples of flags
4351 @cvar _FLAGS: a list of attribute names corresponding to the flags
4354 HPATH = "node-modify"
4355 HTYPE = constants.HTYPE_NODE
4357 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4359 (True, False, False): _ROLE_CANDIDATE,
4360 (False, True, False): _ROLE_DRAINED,
4361 (False, False, True): _ROLE_OFFLINE,
4362 (False, False, False): _ROLE_REGULAR,
4364 _R2F = dict((v, k) for k, v in _F2R.items())
4365 _FLAGS = ["master_candidate", "drained", "offline"]
4367 def CheckArguments(self):
4368 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4369 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4370 self.op.master_capable, self.op.vm_capable,
4371 self.op.secondary_ip, self.op.ndparams]
4372 if all_mods.count(None) == len(all_mods):
4373 raise errors.OpPrereqError("Please pass at least one modification",
4375 if all_mods.count(True) > 1:
4376 raise errors.OpPrereqError("Can't set the node into more than one"
4377 " state at the same time",
4380 # Boolean value that tells us whether we might be demoting from MC
4381 self.might_demote = (self.op.master_candidate == False or
4382 self.op.offline == True or
4383 self.op.drained == True or
4384 self.op.master_capable == False)
4386 if self.op.secondary_ip:
4387 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4388 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4389 " address" % self.op.secondary_ip,
4392 self.lock_all = self.op.auto_promote and self.might_demote
4393 self.lock_instances = self.op.secondary_ip is not None
4395 def ExpandNames(self):
4397 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4399 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4401 if self.lock_instances:
4402 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4404 def DeclareLocks(self, level):
4405 # If we have locked all instances, before waiting to lock nodes, release
4406 # all the ones living on nodes unrelated to the current operation.
4407 if level == locking.LEVEL_NODE and self.lock_instances:
4408 instances_release = []
4410 self.affected_instances = []
4411 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4412 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4413 instance = self.context.cfg.GetInstanceInfo(instance_name)
4414 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4415 if i_mirrored and self.op.node_name in instance.all_nodes:
4416 instances_keep.append(instance_name)
4417 self.affected_instances.append(instance)
4419 instances_release.append(instance_name)
4420 if instances_release:
4421 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4422 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4424 def BuildHooksEnv(self):
4427 This runs on the master node.
4431 "OP_TARGET": self.op.node_name,
4432 "MASTER_CANDIDATE": str(self.op.master_candidate),
4433 "OFFLINE": str(self.op.offline),
4434 "DRAINED": str(self.op.drained),
4435 "MASTER_CAPABLE": str(self.op.master_capable),
4436 "VM_CAPABLE": str(self.op.vm_capable),
4438 nl = [self.cfg.GetMasterNode(),
4442 def CheckPrereq(self):
4443 """Check prerequisites.
4445 This only checks the instance list against the existing names.
4448 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4450 if (self.op.master_candidate is not None or
4451 self.op.drained is not None or
4452 self.op.offline is not None):
4453 # we can't change the master's node flags
4454 if self.op.node_name == self.cfg.GetMasterNode():
4455 raise errors.OpPrereqError("The master role can be changed"
4456 " only via master-failover",
4459 if self.op.master_candidate and not node.master_capable:
4460 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4461 " it a master candidate" % node.name,
4464 if self.op.vm_capable == False:
4465 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4467 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4468 " the vm_capable flag" % node.name,
4471 if node.master_candidate and self.might_demote and not self.lock_all:
4472 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4473 # check if after removing the current node, we're missing master
4475 (mc_remaining, mc_should, _) = \
4476 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4477 if mc_remaining < mc_should:
4478 raise errors.OpPrereqError("Not enough master candidates, please"
4479 " pass auto promote option to allow"
4480 " promotion", errors.ECODE_STATE)
4482 self.old_flags = old_flags = (node.master_candidate,
4483 node.drained, node.offline)
4484 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4485 self.old_role = old_role = self._F2R[old_flags]
4487 # Check for ineffective changes
4488 for attr in self._FLAGS:
4489 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4490 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4491 setattr(self.op, attr, None)
4493 # Past this point, any flag change to False means a transition
4494 # away from the respective state, as only real changes are kept
4496 # TODO: We might query the real power state if it supports OOB
4497 if _SupportsOob(self.cfg, node):
4498 if self.op.offline is False and not (node.powered or
4499 self.op.powered == True):
4500 raise errors.OpPrereqError(("Please power on node %s first before you"
4501 " can reset offline state") %
4503 elif self.op.powered is not None:
4504 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4505 " which does not support out-of-band"
4506 " handling") % self.op.node_name)
4508 # If we're being deofflined/drained, we'll MC ourself if needed
4509 if (self.op.drained == False or self.op.offline == False or
4510 (self.op.master_capable and not node.master_capable)):
4511 if _DecideSelfPromotion(self):
4512 self.op.master_candidate = True
4513 self.LogInfo("Auto-promoting node to master candidate")
4515 # If we're no longer master capable, we'll demote ourselves from MC
4516 if self.op.master_capable == False and node.master_candidate:
4517 self.LogInfo("Demoting from master candidate")
4518 self.op.master_candidate = False
4521 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4522 if self.op.master_candidate:
4523 new_role = self._ROLE_CANDIDATE
4524 elif self.op.drained:
4525 new_role = self._ROLE_DRAINED
4526 elif self.op.offline:
4527 new_role = self._ROLE_OFFLINE
4528 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4529 # False is still in new flags, which means we're un-setting (the
4531 new_role = self._ROLE_REGULAR
4532 else: # no new flags, nothing, keep old role
4535 self.new_role = new_role
4537 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4538 # Trying to transition out of offline status
4539 result = self.rpc.call_version([node.name])[node.name]
4541 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4542 " to report its version: %s" %
4543 (node.name, result.fail_msg),
4546 self.LogWarning("Transitioning node from offline to online state"
4547 " without using re-add. Please make sure the node"
4550 if self.op.secondary_ip:
4551 # Ok even without locking, because this can't be changed by any LU
4552 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4553 master_singlehomed = master.secondary_ip == master.primary_ip
4554 if master_singlehomed and self.op.secondary_ip:
4555 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4556 " homed cluster", errors.ECODE_INVAL)
4559 if self.affected_instances:
4560 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4561 " node has instances (%s) configured"
4562 " to use it" % self.affected_instances)
4564 # On online nodes, check that no instances are running, and that
4565 # the node has the new ip and we can reach it.
4566 for instance in self.affected_instances:
4567 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4569 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4570 if master.name != node.name:
4571 # check reachability from master secondary ip to new secondary ip
4572 if not netutils.TcpPing(self.op.secondary_ip,
4573 constants.DEFAULT_NODED_PORT,
4574 source=master.secondary_ip):
4575 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4576 " based ping to node daemon port",
4577 errors.ECODE_ENVIRON)
4579 if self.op.ndparams:
4580 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4581 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4582 self.new_ndparams = new_ndparams
4584 def Exec(self, feedback_fn):
4589 old_role = self.old_role
4590 new_role = self.new_role
4594 if self.op.ndparams:
4595 node.ndparams = self.new_ndparams
4597 if self.op.powered is not None:
4598 node.powered = self.op.powered
4600 for attr in ["master_capable", "vm_capable"]:
4601 val = getattr(self.op, attr)
4603 setattr(node, attr, val)
4604 result.append((attr, str(val)))
4606 if new_role != old_role:
4607 # Tell the node to demote itself, if no longer MC and not offline
4608 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4609 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4611 self.LogWarning("Node failed to demote itself: %s", msg)
4613 new_flags = self._R2F[new_role]
4614 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4616 result.append((desc, str(nf)))
4617 (node.master_candidate, node.drained, node.offline) = new_flags
4619 # we locked all nodes, we adjust the CP before updating this node
4621 _AdjustCandidatePool(self, [node.name])
4623 if self.op.secondary_ip:
4624 node.secondary_ip = self.op.secondary_ip
4625 result.append(("secondary_ip", self.op.secondary_ip))
4627 # this will trigger configuration file update, if needed
4628 self.cfg.Update(node, feedback_fn)
4630 # this will trigger job queue propagation or cleanup if the mc
4632 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4633 self.context.ReaddNode(node)
4638 class LUNodePowercycle(NoHooksLU):
4639 """Powercycles a node.
4644 def CheckArguments(self):
4645 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4646 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4647 raise errors.OpPrereqError("The node is the master and the force"
4648 " parameter was not set",
4651 def ExpandNames(self):
4652 """Locking for PowercycleNode.
4654 This is a last-resort option and shouldn't block on other
4655 jobs. Therefore, we grab no locks.
4658 self.needed_locks = {}
4660 def Exec(self, feedback_fn):
4664 result = self.rpc.call_node_powercycle(self.op.node_name,
4665 self.cfg.GetHypervisorType())
4666 result.Raise("Failed to schedule the reboot")
4667 return result.payload
4670 class LUClusterQuery(NoHooksLU):
4671 """Query cluster configuration.
4676 def ExpandNames(self):
4677 self.needed_locks = {}
4679 def Exec(self, feedback_fn):
4680 """Return cluster config.
4683 cluster = self.cfg.GetClusterInfo()
4686 # Filter just for enabled hypervisors
4687 for os_name, hv_dict in cluster.os_hvp.items():
4688 os_hvp[os_name] = {}
4689 for hv_name, hv_params in hv_dict.items():
4690 if hv_name in cluster.enabled_hypervisors:
4691 os_hvp[os_name][hv_name] = hv_params
4693 # Convert ip_family to ip_version
4694 primary_ip_version = constants.IP4_VERSION
4695 if cluster.primary_ip_family == netutils.IP6Address.family:
4696 primary_ip_version = constants.IP6_VERSION
4699 "software_version": constants.RELEASE_VERSION,
4700 "protocol_version": constants.PROTOCOL_VERSION,
4701 "config_version": constants.CONFIG_VERSION,
4702 "os_api_version": max(constants.OS_API_VERSIONS),
4703 "export_version": constants.EXPORT_VERSION,
4704 "architecture": (platform.architecture()[0], platform.machine()),
4705 "name": cluster.cluster_name,
4706 "master": cluster.master_node,
4707 "default_hypervisor": cluster.enabled_hypervisors[0],
4708 "enabled_hypervisors": cluster.enabled_hypervisors,
4709 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4710 for hypervisor_name in cluster.enabled_hypervisors]),
4712 "beparams": cluster.beparams,
4713 "osparams": cluster.osparams,
4714 "nicparams": cluster.nicparams,
4715 "ndparams": cluster.ndparams,
4716 "candidate_pool_size": cluster.candidate_pool_size,
4717 "master_netdev": cluster.master_netdev,
4718 "volume_group_name": cluster.volume_group_name,
4719 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4720 "file_storage_dir": cluster.file_storage_dir,
4721 "maintain_node_health": cluster.maintain_node_health,
4722 "ctime": cluster.ctime,
4723 "mtime": cluster.mtime,
4724 "uuid": cluster.uuid,
4725 "tags": list(cluster.GetTags()),
4726 "uid_pool": cluster.uid_pool,
4727 "default_iallocator": cluster.default_iallocator,
4728 "reserved_lvs": cluster.reserved_lvs,
4729 "primary_ip_version": primary_ip_version,
4730 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4731 "hidden_os": cluster.hidden_os,
4732 "blacklisted_os": cluster.blacklisted_os,
4738 class LUClusterConfigQuery(NoHooksLU):
4739 """Return configuration values.
4743 _FIELDS_DYNAMIC = utils.FieldSet()
4744 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4745 "watcher_pause", "volume_group_name")
4747 def CheckArguments(self):
4748 _CheckOutputFields(static=self._FIELDS_STATIC,
4749 dynamic=self._FIELDS_DYNAMIC,
4750 selected=self.op.output_fields)
4752 def ExpandNames(self):
4753 self.needed_locks = {}
4755 def Exec(self, feedback_fn):
4756 """Dump a representation of the cluster config to the standard output.
4760 for field in self.op.output_fields:
4761 if field == "cluster_name":
4762 entry = self.cfg.GetClusterName()
4763 elif field == "master_node":
4764 entry = self.cfg.GetMasterNode()
4765 elif field == "drain_flag":
4766 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4767 elif field == "watcher_pause":
4768 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4769 elif field == "volume_group_name":
4770 entry = self.cfg.GetVGName()
4772 raise errors.ParameterError(field)
4773 values.append(entry)
4777 class LUInstanceActivateDisks(NoHooksLU):
4778 """Bring up an instance's disks.
4783 def ExpandNames(self):
4784 self._ExpandAndLockInstance()
4785 self.needed_locks[locking.LEVEL_NODE] = []
4786 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4788 def DeclareLocks(self, level):
4789 if level == locking.LEVEL_NODE:
4790 self._LockInstancesNodes()
4792 def CheckPrereq(self):
4793 """Check prerequisites.
4795 This checks that the instance is in the cluster.
4798 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4799 assert self.instance is not None, \
4800 "Cannot retrieve locked instance %s" % self.op.instance_name
4801 _CheckNodeOnline(self, self.instance.primary_node)
4803 def Exec(self, feedback_fn):
4804 """Activate the disks.
4807 disks_ok, disks_info = \
4808 _AssembleInstanceDisks(self, self.instance,
4809 ignore_size=self.op.ignore_size)
4811 raise errors.OpExecError("Cannot activate block devices")
4816 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4818 """Prepare the block devices for an instance.
4820 This sets up the block devices on all nodes.
4822 @type lu: L{LogicalUnit}
4823 @param lu: the logical unit on whose behalf we execute
4824 @type instance: L{objects.Instance}
4825 @param instance: the instance for whose disks we assemble
4826 @type disks: list of L{objects.Disk} or None
4827 @param disks: which disks to assemble (or all, if None)
4828 @type ignore_secondaries: boolean
4829 @param ignore_secondaries: if true, errors on secondary nodes
4830 won't result in an error return from the function
4831 @type ignore_size: boolean
4832 @param ignore_size: if true, the current known size of the disk
4833 will not be used during the disk activation, useful for cases
4834 when the size is wrong
4835 @return: False if the operation failed, otherwise a list of
4836 (host, instance_visible_name, node_visible_name)
4837 with the mapping from node devices to instance devices
4842 iname = instance.name
4843 disks = _ExpandCheckDisks(instance, disks)
4845 # With the two passes mechanism we try to reduce the window of
4846 # opportunity for the race condition of switching DRBD to primary
4847 # before handshaking occured, but we do not eliminate it
4849 # The proper fix would be to wait (with some limits) until the
4850 # connection has been made and drbd transitions from WFConnection
4851 # into any other network-connected state (Connected, SyncTarget,
4854 # 1st pass, assemble on all nodes in secondary mode
4855 for idx, inst_disk in enumerate(disks):
4856 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4858 node_disk = node_disk.Copy()
4859 node_disk.UnsetSize()
4860 lu.cfg.SetDiskID(node_disk, node)
4861 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4862 msg = result.fail_msg
4864 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4865 " (is_primary=False, pass=1): %s",
4866 inst_disk.iv_name, node, msg)
4867 if not ignore_secondaries:
4870 # FIXME: race condition on drbd migration to primary
4872 # 2nd pass, do only the primary node
4873 for idx, inst_disk in enumerate(disks):
4876 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4877 if node != instance.primary_node:
4880 node_disk = node_disk.Copy()
4881 node_disk.UnsetSize()
4882 lu.cfg.SetDiskID(node_disk, node)
4883 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4884 msg = result.fail_msg
4886 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4887 " (is_primary=True, pass=2): %s",
4888 inst_disk.iv_name, node, msg)
4891 dev_path = result.payload
4893 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4895 # leave the disks configured for the primary node
4896 # this is a workaround that would be fixed better by
4897 # improving the logical/physical id handling
4899 lu.cfg.SetDiskID(disk, instance.primary_node)
4901 return disks_ok, device_info
4904 def _StartInstanceDisks(lu, instance, force):
4905 """Start the disks of an instance.
4908 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4909 ignore_secondaries=force)
4911 _ShutdownInstanceDisks(lu, instance)
4912 if force is not None and not force:
4913 lu.proc.LogWarning("", hint="If the message above refers to a"
4915 " you can retry the operation using '--force'.")
4916 raise errors.OpExecError("Disk consistency error")
4919 class LUInstanceDeactivateDisks(NoHooksLU):
4920 """Shutdown an instance's disks.
4925 def ExpandNames(self):
4926 self._ExpandAndLockInstance()
4927 self.needed_locks[locking.LEVEL_NODE] = []
4928 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4930 def DeclareLocks(self, level):
4931 if level == locking.LEVEL_NODE:
4932 self._LockInstancesNodes()
4934 def CheckPrereq(self):
4935 """Check prerequisites.
4937 This checks that the instance is in the cluster.
4940 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4941 assert self.instance is not None, \
4942 "Cannot retrieve locked instance %s" % self.op.instance_name
4944 def Exec(self, feedback_fn):
4945 """Deactivate the disks
4948 instance = self.instance
4950 _ShutdownInstanceDisks(self, instance)
4952 _SafeShutdownInstanceDisks(self, instance)
4955 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4956 """Shutdown block devices of an instance.
4958 This function checks if an instance is running, before calling
4959 _ShutdownInstanceDisks.
4962 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4963 _ShutdownInstanceDisks(lu, instance, disks=disks)
4966 def _ExpandCheckDisks(instance, disks):
4967 """Return the instance disks selected by the disks list
4969 @type disks: list of L{objects.Disk} or None
4970 @param disks: selected disks
4971 @rtype: list of L{objects.Disk}
4972 @return: selected instance disks to act on
4976 return instance.disks
4978 if not set(disks).issubset(instance.disks):
4979 raise errors.ProgrammerError("Can only act on disks belonging to the"
4984 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4985 """Shutdown block devices of an instance.
4987 This does the shutdown on all nodes of the instance.
4989 If the ignore_primary is false, errors on the primary node are
4994 disks = _ExpandCheckDisks(instance, disks)
4997 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4998 lu.cfg.SetDiskID(top_disk, node)
4999 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5000 msg = result.fail_msg
5002 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5003 disk.iv_name, node, msg)
5004 if ((node == instance.primary_node and not ignore_primary) or
5005 (node != instance.primary_node and not result.offline)):
5010 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5011 """Checks if a node has enough free memory.
5013 This function check if a given node has the needed amount of free
5014 memory. In case the node has less memory or we cannot get the
5015 information from the node, this function raise an OpPrereqError
5018 @type lu: C{LogicalUnit}
5019 @param lu: a logical unit from which we get configuration data
5021 @param node: the node to check
5022 @type reason: C{str}
5023 @param reason: string to use in the error message
5024 @type requested: C{int}
5025 @param requested: the amount of memory in MiB to check for
5026 @type hypervisor_name: C{str}
5027 @param hypervisor_name: the hypervisor to ask for memory stats
5028 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5029 we cannot check the node
5032 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5033 nodeinfo[node].Raise("Can't get data from node %s" % node,
5034 prereq=True, ecode=errors.ECODE_ENVIRON)
5035 free_mem = nodeinfo[node].payload.get('memory_free', None)
5036 if not isinstance(free_mem, int):
5037 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5038 " was '%s'" % (node, free_mem),
5039 errors.ECODE_ENVIRON)
5040 if requested > free_mem:
5041 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5042 " needed %s MiB, available %s MiB" %
5043 (node, reason, requested, free_mem),
5047 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5048 """Checks if nodes have enough free disk space in the all VGs.
5050 This function check if all given nodes have the needed amount of
5051 free disk. In case any node has less disk or we cannot get the
5052 information from the node, this function raise an OpPrereqError
5055 @type lu: C{LogicalUnit}
5056 @param lu: a logical unit from which we get configuration data
5057 @type nodenames: C{list}
5058 @param nodenames: the list of node names to check
5059 @type req_sizes: C{dict}
5060 @param req_sizes: the hash of vg and corresponding amount of disk in
5062 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5063 or we cannot check the node
5066 for vg, req_size in req_sizes.items():
5067 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5070 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5071 """Checks if nodes have enough free disk space in the specified VG.
5073 This function check if all given nodes have the needed amount of
5074 free disk. In case any node has less disk or we cannot get the
5075 information from the node, this function raise an OpPrereqError
5078 @type lu: C{LogicalUnit}
5079 @param lu: a logical unit from which we get configuration data
5080 @type nodenames: C{list}
5081 @param nodenames: the list of node names to check
5083 @param vg: the volume group to check
5084 @type requested: C{int}
5085 @param requested: the amount of disk in MiB to check for
5086 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5087 or we cannot check the node
5090 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5091 for node in nodenames:
5092 info = nodeinfo[node]
5093 info.Raise("Cannot get current information from node %s" % node,
5094 prereq=True, ecode=errors.ECODE_ENVIRON)
5095 vg_free = info.payload.get("vg_free", None)
5096 if not isinstance(vg_free, int):
5097 raise errors.OpPrereqError("Can't compute free disk space on node"
5098 " %s for vg %s, result was '%s'" %
5099 (node, vg, vg_free), errors.ECODE_ENVIRON)
5100 if requested > vg_free:
5101 raise errors.OpPrereqError("Not enough disk space on target node %s"
5102 " vg %s: required %d MiB, available %d MiB" %
5103 (node, vg, requested, vg_free),
5107 class LUInstanceStartup(LogicalUnit):
5108 """Starts an instance.
5111 HPATH = "instance-start"
5112 HTYPE = constants.HTYPE_INSTANCE
5115 def CheckArguments(self):
5117 if self.op.beparams:
5118 # fill the beparams dict
5119 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5121 def ExpandNames(self):
5122 self._ExpandAndLockInstance()
5124 def BuildHooksEnv(self):
5127 This runs on master, primary and secondary nodes of the instance.
5131 "FORCE": self.op.force,
5133 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5134 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5137 def CheckPrereq(self):
5138 """Check prerequisites.
5140 This checks that the instance is in the cluster.
5143 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5144 assert self.instance is not None, \
5145 "Cannot retrieve locked instance %s" % self.op.instance_name
5148 if self.op.hvparams:
5149 # check hypervisor parameter syntax (locally)
5150 cluster = self.cfg.GetClusterInfo()
5151 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5152 filled_hvp = cluster.FillHV(instance)
5153 filled_hvp.update(self.op.hvparams)
5154 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5155 hv_type.CheckParameterSyntax(filled_hvp)
5156 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5158 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5160 if self.primary_offline and self.op.ignore_offline_nodes:
5161 self.proc.LogWarning("Ignoring offline primary node")
5163 if self.op.hvparams or self.op.beparams:
5164 self.proc.LogWarning("Overridden parameters are ignored")
5166 _CheckNodeOnline(self, instance.primary_node)
5168 bep = self.cfg.GetClusterInfo().FillBE(instance)
5170 # check bridges existence
5171 _CheckInstanceBridgesExist(self, instance)
5173 remote_info = self.rpc.call_instance_info(instance.primary_node,
5175 instance.hypervisor)
5176 remote_info.Raise("Error checking node %s" % instance.primary_node,
5177 prereq=True, ecode=errors.ECODE_ENVIRON)
5178 if not remote_info.payload: # not running already
5179 _CheckNodeFreeMemory(self, instance.primary_node,
5180 "starting instance %s" % instance.name,
5181 bep[constants.BE_MEMORY], instance.hypervisor)
5183 def Exec(self, feedback_fn):
5184 """Start the instance.
5187 instance = self.instance
5188 force = self.op.force
5190 if not self.op.no_remember:
5191 self.cfg.MarkInstanceUp(instance.name)
5193 if self.primary_offline:
5194 assert self.op.ignore_offline_nodes
5195 self.proc.LogInfo("Primary node offline, marked instance as started")
5197 node_current = instance.primary_node
5199 _StartInstanceDisks(self, instance, force)
5201 result = self.rpc.call_instance_start(node_current, instance,
5202 self.op.hvparams, self.op.beparams)
5203 msg = result.fail_msg
5205 _ShutdownInstanceDisks(self, instance)
5206 raise errors.OpExecError("Could not start instance: %s" % msg)
5209 class LUInstanceReboot(LogicalUnit):
5210 """Reboot an instance.
5213 HPATH = "instance-reboot"
5214 HTYPE = constants.HTYPE_INSTANCE
5217 def ExpandNames(self):
5218 self._ExpandAndLockInstance()
5220 def BuildHooksEnv(self):
5223 This runs on master, primary and secondary nodes of the instance.
5227 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5228 "REBOOT_TYPE": self.op.reboot_type,
5229 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5231 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5232 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5235 def CheckPrereq(self):
5236 """Check prerequisites.
5238 This checks that the instance is in the cluster.
5241 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5242 assert self.instance is not None, \
5243 "Cannot retrieve locked instance %s" % self.op.instance_name
5245 _CheckNodeOnline(self, instance.primary_node)
5247 # check bridges existence
5248 _CheckInstanceBridgesExist(self, instance)
5250 def Exec(self, feedback_fn):
5251 """Reboot the instance.
5254 instance = self.instance
5255 ignore_secondaries = self.op.ignore_secondaries
5256 reboot_type = self.op.reboot_type
5258 node_current = instance.primary_node
5260 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5261 constants.INSTANCE_REBOOT_HARD]:
5262 for disk in instance.disks:
5263 self.cfg.SetDiskID(disk, node_current)
5264 result = self.rpc.call_instance_reboot(node_current, instance,
5266 self.op.shutdown_timeout)
5267 result.Raise("Could not reboot instance")
5269 result = self.rpc.call_instance_shutdown(node_current, instance,
5270 self.op.shutdown_timeout)
5271 result.Raise("Could not shutdown instance for full reboot")
5272 _ShutdownInstanceDisks(self, instance)
5273 _StartInstanceDisks(self, instance, ignore_secondaries)
5274 result = self.rpc.call_instance_start(node_current, instance, None, None)
5275 msg = result.fail_msg
5277 _ShutdownInstanceDisks(self, instance)
5278 raise errors.OpExecError("Could not start instance for"
5279 " full reboot: %s" % msg)
5281 self.cfg.MarkInstanceUp(instance.name)
5284 class LUInstanceShutdown(LogicalUnit):
5285 """Shutdown an instance.
5288 HPATH = "instance-stop"
5289 HTYPE = constants.HTYPE_INSTANCE
5292 def ExpandNames(self):
5293 self._ExpandAndLockInstance()
5295 def BuildHooksEnv(self):
5298 This runs on master, primary and secondary nodes of the instance.
5301 env = _BuildInstanceHookEnvByObject(self, self.instance)
5302 env["TIMEOUT"] = self.op.timeout
5303 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5306 def CheckPrereq(self):
5307 """Check prerequisites.
5309 This checks that the instance is in the cluster.
5312 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5313 assert self.instance is not None, \
5314 "Cannot retrieve locked instance %s" % self.op.instance_name
5316 self.primary_offline = \
5317 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5319 if self.primary_offline and self.op.ignore_offline_nodes:
5320 self.proc.LogWarning("Ignoring offline primary node")
5322 _CheckNodeOnline(self, self.instance.primary_node)
5324 def Exec(self, feedback_fn):
5325 """Shutdown the instance.
5328 instance = self.instance
5329 node_current = instance.primary_node
5330 timeout = self.op.timeout
5332 if not self.op.no_remember:
5333 self.cfg.MarkInstanceDown(instance.name)
5335 if self.primary_offline:
5336 assert self.op.ignore_offline_nodes
5337 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5339 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5340 msg = result.fail_msg
5342 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5344 _ShutdownInstanceDisks(self, instance)
5347 class LUInstanceReinstall(LogicalUnit):
5348 """Reinstall an instance.
5351 HPATH = "instance-reinstall"
5352 HTYPE = constants.HTYPE_INSTANCE
5355 def ExpandNames(self):
5356 self._ExpandAndLockInstance()
5358 def BuildHooksEnv(self):
5361 This runs on master, primary and secondary nodes of the instance.
5364 env = _BuildInstanceHookEnvByObject(self, self.instance)
5365 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5368 def CheckPrereq(self):
5369 """Check prerequisites.
5371 This checks that the instance is in the cluster and is not running.
5374 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5375 assert instance is not None, \
5376 "Cannot retrieve locked instance %s" % self.op.instance_name
5377 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5378 " offline, cannot reinstall")
5379 for node in instance.secondary_nodes:
5380 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5381 " cannot reinstall")
5383 if instance.disk_template == constants.DT_DISKLESS:
5384 raise errors.OpPrereqError("Instance '%s' has no disks" %
5385 self.op.instance_name,
5387 _CheckInstanceDown(self, instance, "cannot reinstall")
5389 if self.op.os_type is not None:
5391 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5392 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5393 instance_os = self.op.os_type
5395 instance_os = instance.os
5397 nodelist = list(instance.all_nodes)
5399 if self.op.osparams:
5400 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5401 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5402 self.os_inst = i_osdict # the new dict (without defaults)
5406 self.instance = instance
5408 def Exec(self, feedback_fn):
5409 """Reinstall the instance.
5412 inst = self.instance
5414 if self.op.os_type is not None:
5415 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5416 inst.os = self.op.os_type
5417 # Write to configuration
5418 self.cfg.Update(inst, feedback_fn)
5420 _StartInstanceDisks(self, inst, None)
5422 feedback_fn("Running the instance OS create scripts...")
5423 # FIXME: pass debug option from opcode to backend
5424 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5425 self.op.debug_level,
5426 osparams=self.os_inst)
5427 result.Raise("Could not install OS for instance %s on node %s" %
5428 (inst.name, inst.primary_node))
5430 _ShutdownInstanceDisks(self, inst)
5433 class LUInstanceRecreateDisks(LogicalUnit):
5434 """Recreate an instance's missing disks.
5437 HPATH = "instance-recreate-disks"
5438 HTYPE = constants.HTYPE_INSTANCE
5441 def CheckArguments(self):
5442 # normalise the disk list
5443 self.op.disks = sorted(frozenset(self.op.disks))
5445 def ExpandNames(self):
5446 self._ExpandAndLockInstance()
5447 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5449 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5450 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5452 self.needed_locks[locking.LEVEL_NODE] = []
5454 def DeclareLocks(self, level):
5455 if level == locking.LEVEL_NODE:
5456 # if we replace the nodes, we only need to lock the old primary,
5457 # otherwise we need to lock all nodes for disk re-creation
5458 primary_only = bool(self.op.nodes)
5459 self._LockInstancesNodes(primary_only=primary_only)
5461 def BuildHooksEnv(self):
5464 This runs on master, primary and secondary nodes of the instance.
5467 env = _BuildInstanceHookEnvByObject(self, self.instance)
5468 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5471 def CheckPrereq(self):
5472 """Check prerequisites.
5474 This checks that the instance is in the cluster and is not running.
5477 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5478 assert instance is not None, \
5479 "Cannot retrieve locked instance %s" % self.op.instance_name
5481 if len(self.op.nodes) != len(instance.all_nodes):
5482 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
5483 " %d replacement nodes were specified" %
5484 (instance.name, len(instance.all_nodes),
5485 len(self.op.nodes)),
5487 assert instance.disk_template != constants.DT_DRBD8 or \
5488 len(self.op.nodes) == 2
5489 assert instance.disk_template != constants.DT_PLAIN or \
5490 len(self.op.nodes) == 1
5491 primary_node = self.op.nodes[0]
5493 primary_node = instance.primary_node
5494 _CheckNodeOnline(self, primary_node)
5496 if instance.disk_template == constants.DT_DISKLESS:
5497 raise errors.OpPrereqError("Instance '%s' has no disks" %
5498 self.op.instance_name, errors.ECODE_INVAL)
5499 # if we replace nodes *and* the old primary is offline, we don't
5501 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
5502 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
5503 if not (self.op.nodes and old_pnode.offline):
5504 _CheckInstanceDown(self, instance, "cannot recreate disks")
5506 if not self.op.disks:
5507 self.op.disks = range(len(instance.disks))
5509 for idx in self.op.disks:
5510 if idx >= len(instance.disks):
5511 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5513 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
5514 raise errors.OpPrereqError("Can't recreate disks partially and"
5515 " change the nodes at the same time",
5517 self.instance = instance
5519 def Exec(self, feedback_fn):
5520 """Recreate the disks.
5523 instance = self.instance
5526 mods = [] # keeps track of needed logical_id changes
5528 for idx, disk in enumerate(instance.disks):
5529 if idx not in self.op.disks: # disk idx has not been passed in
5532 # update secondaries for disks, if needed
5534 if disk.dev_type == constants.LD_DRBD8:
5535 # need to update the nodes and minors
5536 assert len(self.op.nodes) == 2
5537 assert len(disk.logical_id) == 6 # otherwise disk internals
5539 (_, _, old_port, _, _, old_secret) = disk.logical_id
5540 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
5541 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
5542 new_minors[0], new_minors[1], old_secret)
5543 assert len(disk.logical_id) == len(new_id)
5544 mods.append((idx, new_id))
5546 # now that we have passed all asserts above, we can apply the mods
5547 # in a single run (to avoid partial changes)
5548 for idx, new_id in mods:
5549 instance.disks[idx].logical_id = new_id
5551 # change primary node, if needed
5553 instance.primary_node = self.op.nodes[0]
5554 self.LogWarning("Changing the instance's nodes, you will have to"
5555 " remove any disks left on the older nodes manually")
5558 self.cfg.Update(instance, feedback_fn)
5560 _CreateDisks(self, instance, to_skip=to_skip)
5563 class LUInstanceRename(LogicalUnit):
5564 """Rename an instance.
5567 HPATH = "instance-rename"
5568 HTYPE = constants.HTYPE_INSTANCE
5570 def CheckArguments(self):
5574 if self.op.ip_check and not self.op.name_check:
5575 # TODO: make the ip check more flexible and not depend on the name check
5576 raise errors.OpPrereqError("Cannot do ip check without a name check",
5579 def BuildHooksEnv(self):
5582 This runs on master, primary and secondary nodes of the instance.
5585 env = _BuildInstanceHookEnvByObject(self, self.instance)
5586 env["INSTANCE_NEW_NAME"] = self.op.new_name
5587 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5590 def CheckPrereq(self):
5591 """Check prerequisites.
5593 This checks that the instance is in the cluster and is not running.
5596 self.op.instance_name = _ExpandInstanceName(self.cfg,
5597 self.op.instance_name)
5598 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5599 assert instance is not None
5600 _CheckNodeOnline(self, instance.primary_node)
5601 _CheckInstanceDown(self, instance, "cannot rename")
5602 self.instance = instance
5604 new_name = self.op.new_name
5605 if self.op.name_check:
5606 hostname = netutils.GetHostname(name=new_name)
5607 if hostname != new_name:
5608 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5610 new_name = self.op.new_name = hostname.name
5611 if (self.op.ip_check and
5612 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5613 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5614 (hostname.ip, new_name),
5615 errors.ECODE_NOTUNIQUE)
5617 instance_list = self.cfg.GetInstanceList()
5618 if new_name in instance_list and new_name != instance.name:
5619 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5620 new_name, errors.ECODE_EXISTS)
5622 def Exec(self, feedback_fn):
5623 """Rename the instance.
5626 inst = self.instance
5627 old_name = inst.name
5629 rename_file_storage = False
5630 if (inst.disk_template == constants.DT_FILE and
5631 self.op.new_name != inst.name):
5632 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5633 rename_file_storage = True
5635 self.cfg.RenameInstance(inst.name, self.op.new_name)
5636 # Change the instance lock. This is definitely safe while we hold the BGL
5637 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5638 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5640 # re-read the instance from the configuration after rename
5641 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5643 if rename_file_storage:
5644 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5645 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5646 old_file_storage_dir,
5647 new_file_storage_dir)
5648 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5649 " (but the instance has been renamed in Ganeti)" %
5650 (inst.primary_node, old_file_storage_dir,
5651 new_file_storage_dir))
5653 _StartInstanceDisks(self, inst, None)
5655 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5656 old_name, self.op.debug_level)
5657 msg = result.fail_msg
5659 msg = ("Could not run OS rename script for instance %s on node %s"
5660 " (but the instance has been renamed in Ganeti): %s" %
5661 (inst.name, inst.primary_node, msg))
5662 self.proc.LogWarning(msg)
5664 _ShutdownInstanceDisks(self, inst)
5669 class LUInstanceRemove(LogicalUnit):
5670 """Remove an instance.
5673 HPATH = "instance-remove"
5674 HTYPE = constants.HTYPE_INSTANCE
5677 def ExpandNames(self):
5678 self._ExpandAndLockInstance()
5679 self.needed_locks[locking.LEVEL_NODE] = []
5680 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5682 def DeclareLocks(self, level):
5683 if level == locking.LEVEL_NODE:
5684 self._LockInstancesNodes()
5686 def BuildHooksEnv(self):
5689 This runs on master, primary and secondary nodes of the instance.
5692 env = _BuildInstanceHookEnvByObject(self, self.instance)
5693 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5694 nl = [self.cfg.GetMasterNode()]
5695 nl_post = list(self.instance.all_nodes) + nl
5696 return env, nl, nl_post
5698 def CheckPrereq(self):
5699 """Check prerequisites.
5701 This checks that the instance is in the cluster.
5704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5705 assert self.instance is not None, \
5706 "Cannot retrieve locked instance %s" % self.op.instance_name
5708 def Exec(self, feedback_fn):
5709 """Remove the instance.
5712 instance = self.instance
5713 logging.info("Shutting down instance %s on node %s",
5714 instance.name, instance.primary_node)
5716 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5717 self.op.shutdown_timeout)
5718 msg = result.fail_msg
5720 if self.op.ignore_failures:
5721 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5723 raise errors.OpExecError("Could not shutdown instance %s on"
5725 (instance.name, instance.primary_node, msg))
5727 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5730 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5731 """Utility function to remove an instance.
5734 logging.info("Removing block devices for instance %s", instance.name)
5736 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
5737 if not ignore_failures:
5738 raise errors.OpExecError("Can't remove instance's disks")
5739 feedback_fn("Warning: can't remove instance's disks")
5741 logging.info("Removing instance %s out of cluster config", instance.name)
5743 lu.cfg.RemoveInstance(instance.name)
5745 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5746 "Instance lock removal conflict"
5748 # Remove lock for the instance
5749 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5752 class LUInstanceQuery(NoHooksLU):
5753 """Logical unit for querying instances.
5756 # pylint: disable-msg=W0142
5759 def CheckArguments(self):
5760 self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5761 self.op.use_locking)
5763 def ExpandNames(self):
5764 self.iq.ExpandNames(self)
5766 def DeclareLocks(self, level):
5767 self.iq.DeclareLocks(self, level)
5769 def Exec(self, feedback_fn):
5770 return self.iq.OldStyleQuery(self)
5773 class LUInstanceFailover(LogicalUnit):
5774 """Failover an instance.
5777 HPATH = "instance-failover"
5778 HTYPE = constants.HTYPE_INSTANCE
5781 def ExpandNames(self):
5782 self._ExpandAndLockInstance()
5783 self.needed_locks[locking.LEVEL_NODE] = []
5784 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5786 def DeclareLocks(self, level):
5787 if level == locking.LEVEL_NODE:
5788 self._LockInstancesNodes()
5790 def BuildHooksEnv(self):
5793 This runs on master, primary and secondary nodes of the instance.
5796 instance = self.instance
5797 source_node = instance.primary_node
5798 target_node = instance.secondary_nodes[0]
5800 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5801 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5802 "OLD_PRIMARY": source_node,
5803 "OLD_SECONDARY": target_node,
5804 "NEW_PRIMARY": target_node,
5805 "NEW_SECONDARY": source_node,
5807 env.update(_BuildInstanceHookEnvByObject(self, instance))
5808 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5810 nl_post.append(source_node)
5811 return env, nl, nl_post
5813 def CheckPrereq(self):
5814 """Check prerequisites.
5816 This checks that the instance is in the cluster.
5819 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5820 assert self.instance is not None, \
5821 "Cannot retrieve locked instance %s" % self.op.instance_name
5823 bep = self.cfg.GetClusterInfo().FillBE(instance)
5824 if instance.disk_template not in constants.DTS_NET_MIRROR:
5825 raise errors.OpPrereqError("Instance's disk layout is not"
5826 " network mirrored, cannot failover.",
5829 secondary_nodes = instance.secondary_nodes
5830 if not secondary_nodes:
5831 raise errors.ProgrammerError("no secondary node but using "
5832 "a mirrored disk template")
5834 target_node = secondary_nodes[0]
5835 _CheckNodeOnline(self, target_node)
5836 _CheckNodeNotDrained(self, target_node)
5837 if instance.admin_up:
5838 # check memory requirements on the secondary node
5839 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5840 instance.name, bep[constants.BE_MEMORY],
5841 instance.hypervisor)
5843 self.LogInfo("Not checking memory on the secondary node as"
5844 " instance will not be started")
5846 # check bridge existance
5847 _CheckInstanceBridgesExist(self, instance, node=target_node)
5849 def Exec(self, feedback_fn):
5850 """Failover an instance.
5852 The failover is done by shutting it down on its present node and
5853 starting it on the secondary.
5856 instance = self.instance
5857 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5859 source_node = instance.primary_node
5860 target_node = instance.secondary_nodes[0]
5862 if instance.admin_up:
5863 feedback_fn("* checking disk consistency between source and target")
5864 for dev in instance.disks:
5865 # for drbd, these are drbd over lvm
5866 if not _CheckDiskConsistency(self, dev, target_node, False):
5867 if not self.op.ignore_consistency:
5868 raise errors.OpExecError("Disk %s is degraded on target node,"
5869 " aborting failover." % dev.iv_name)
5871 feedback_fn("* not checking disk consistency as instance is not running")
5873 feedback_fn("* shutting down instance on source node")
5874 logging.info("Shutting down instance %s on node %s",
5875 instance.name, source_node)
5877 result = self.rpc.call_instance_shutdown(source_node, instance,
5878 self.op.shutdown_timeout)
5879 msg = result.fail_msg
5881 if self.op.ignore_consistency or primary_node.offline:
5882 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5883 " Proceeding anyway. Please make sure node"
5884 " %s is down. Error details: %s",
5885 instance.name, source_node, source_node, msg)
5887 raise errors.OpExecError("Could not shutdown instance %s on"
5889 (instance.name, source_node, msg))
5891 feedback_fn("* deactivating the instance's disks on source node")
5892 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5893 raise errors.OpExecError("Can't shut down the instance's disks.")
5895 instance.primary_node = target_node
5896 # distribute new instance config to the other nodes
5897 self.cfg.Update(instance, feedback_fn)
5899 # Only start the instance if it's marked as up
5900 if instance.admin_up:
5901 feedback_fn("* activating the instance's disks on target node")
5902 logging.info("Starting instance %s on node %s",
5903 instance.name, target_node)
5905 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5906 ignore_secondaries=True)
5908 _ShutdownInstanceDisks(self, instance)
5909 raise errors.OpExecError("Can't activate the instance's disks")
5911 feedback_fn("* starting the instance on the target node")
5912 result = self.rpc.call_instance_start(target_node, instance, None, None)
5913 msg = result.fail_msg
5915 _ShutdownInstanceDisks(self, instance)
5916 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5917 (instance.name, target_node, msg))
5920 class LUInstanceMigrate(LogicalUnit):
5921 """Migrate an instance.
5923 This is migration without shutting down, compared to the failover,
5924 which is done with shutdown.
5927 HPATH = "instance-migrate"
5928 HTYPE = constants.HTYPE_INSTANCE
5931 def ExpandNames(self):
5932 self._ExpandAndLockInstance()
5934 self.needed_locks[locking.LEVEL_NODE] = []
5935 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5937 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5939 self.tasklets = [self._migrater]
5941 def DeclareLocks(self, level):
5942 if level == locking.LEVEL_NODE:
5943 self._LockInstancesNodes()
5945 def BuildHooksEnv(self):
5948 This runs on master, primary and secondary nodes of the instance.
5951 instance = self._migrater.instance
5952 source_node = instance.primary_node
5953 target_node = instance.secondary_nodes[0]
5954 env = _BuildInstanceHookEnvByObject(self, instance)
5955 env["MIGRATE_LIVE"] = self._migrater.live
5956 env["MIGRATE_CLEANUP"] = self.op.cleanup
5958 "OLD_PRIMARY": source_node,
5959 "OLD_SECONDARY": target_node,
5960 "NEW_PRIMARY": target_node,
5961 "NEW_SECONDARY": source_node,
5963 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5965 nl_post.append(source_node)
5966 return env, nl, nl_post
5969 class LUInstanceMove(LogicalUnit):
5970 """Move an instance by data-copying.
5973 HPATH = "instance-move"
5974 HTYPE = constants.HTYPE_INSTANCE
5977 def ExpandNames(self):
5978 self._ExpandAndLockInstance()
5979 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5980 self.op.target_node = target_node
5981 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5982 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5984 def DeclareLocks(self, level):
5985 if level == locking.LEVEL_NODE:
5986 self._LockInstancesNodes(primary_only=True)
5988 def BuildHooksEnv(self):
5991 This runs on master, primary and secondary nodes of the instance.
5995 "TARGET_NODE": self.op.target_node,
5996 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5998 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5999 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6000 self.op.target_node]
6003 def CheckPrereq(self):
6004 """Check prerequisites.
6006 This checks that the instance is in the cluster.
6009 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6010 assert self.instance is not None, \
6011 "Cannot retrieve locked instance %s" % self.op.instance_name
6013 node = self.cfg.GetNodeInfo(self.op.target_node)
6014 assert node is not None, \
6015 "Cannot retrieve locked node %s" % self.op.target_node
6017 self.target_node = target_node = node.name
6019 if target_node == instance.primary_node:
6020 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6021 (instance.name, target_node),
6024 bep = self.cfg.GetClusterInfo().FillBE(instance)
6026 for idx, dsk in enumerate(instance.disks):
6027 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6028 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6029 " cannot copy" % idx, errors.ECODE_STATE)
6031 _CheckNodeOnline(self, target_node)
6032 _CheckNodeNotDrained(self, target_node)
6033 _CheckNodeVmCapable(self, target_node)
6035 if instance.admin_up:
6036 # check memory requirements on the secondary node
6037 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6038 instance.name, bep[constants.BE_MEMORY],
6039 instance.hypervisor)
6041 self.LogInfo("Not checking memory on the secondary node as"
6042 " instance will not be started")
6044 # check bridge existance
6045 _CheckInstanceBridgesExist(self, instance, node=target_node)
6047 def Exec(self, feedback_fn):
6048 """Move an instance.
6050 The move is done by shutting it down on its present node, copying
6051 the data over (slow) and starting it on the new node.
6054 instance = self.instance
6056 source_node = instance.primary_node
6057 target_node = self.target_node
6059 self.LogInfo("Shutting down instance %s on source node %s",
6060 instance.name, source_node)
6062 result = self.rpc.call_instance_shutdown(source_node, instance,
6063 self.op.shutdown_timeout)
6064 msg = result.fail_msg
6066 if self.op.ignore_consistency:
6067 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6068 " Proceeding anyway. Please make sure node"
6069 " %s is down. Error details: %s",
6070 instance.name, source_node, source_node, msg)
6072 raise errors.OpExecError("Could not shutdown instance %s on"
6074 (instance.name, source_node, msg))
6076 # create the target disks
6078 _CreateDisks(self, instance, target_node=target_node)
6079 except errors.OpExecError:
6080 self.LogWarning("Device creation failed, reverting...")
6082 _RemoveDisks(self, instance, target_node=target_node)
6084 self.cfg.ReleaseDRBDMinors(instance.name)
6087 cluster_name = self.cfg.GetClusterInfo().cluster_name
6090 # activate, get path, copy the data over
6091 for idx, disk in enumerate(instance.disks):
6092 self.LogInfo("Copying data for disk %d", idx)
6093 result = self.rpc.call_blockdev_assemble(target_node, disk,
6094 instance.name, True, idx)
6096 self.LogWarning("Can't assemble newly created disk %d: %s",
6097 idx, result.fail_msg)
6098 errs.append(result.fail_msg)
6100 dev_path = result.payload
6101 result = self.rpc.call_blockdev_export(source_node, disk,
6102 target_node, dev_path,
6105 self.LogWarning("Can't copy data over for disk %d: %s",
6106 idx, result.fail_msg)
6107 errs.append(result.fail_msg)
6111 self.LogWarning("Some disks failed to copy, aborting")
6113 _RemoveDisks(self, instance, target_node=target_node)
6115 self.cfg.ReleaseDRBDMinors(instance.name)
6116 raise errors.OpExecError("Errors during disk copy: %s" %
6119 instance.primary_node = target_node
6120 self.cfg.Update(instance, feedback_fn)
6122 self.LogInfo("Removing the disks on the original node")
6123 _RemoveDisks(self, instance, target_node=source_node)
6125 # Only start the instance if it's marked as up
6126 if instance.admin_up:
6127 self.LogInfo("Starting instance %s on node %s",
6128 instance.name, target_node)
6130 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6131 ignore_secondaries=True)
6133 _ShutdownInstanceDisks(self, instance)
6134 raise errors.OpExecError("Can't activate the instance's disks")
6136 result = self.rpc.call_instance_start(target_node, instance, None, None)
6137 msg = result.fail_msg
6139 _ShutdownInstanceDisks(self, instance)
6140 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6141 (instance.name, target_node, msg))
6144 class LUNodeMigrate(LogicalUnit):
6145 """Migrate all instances from a node.
6148 HPATH = "node-migrate"
6149 HTYPE = constants.HTYPE_NODE
6152 def ExpandNames(self):
6153 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6155 self.needed_locks = {
6156 locking.LEVEL_NODE: [self.op.node_name],
6159 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6161 # Create tasklets for migrating instances for all instances on this node
6165 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6166 logging.debug("Migrating instance %s", inst.name)
6167 names.append(inst.name)
6169 tasklets.append(TLMigrateInstance(self, inst.name, False))
6171 self.tasklets = tasklets
6173 # Declare instance locks
6174 self.needed_locks[locking.LEVEL_INSTANCE] = names
6176 def DeclareLocks(self, level):
6177 if level == locking.LEVEL_NODE:
6178 self._LockInstancesNodes()
6180 def BuildHooksEnv(self):
6183 This runs on the master, the primary and all the secondaries.
6187 "NODE_NAME": self.op.node_name,
6190 nl = [self.cfg.GetMasterNode()]
6192 return (env, nl, nl)
6195 class TLMigrateInstance(Tasklet):
6196 """Tasklet class for instance migration.
6199 @ivar live: whether the migration will be done live or non-live;
6200 this variable is initalized only after CheckPrereq has run
6203 def __init__(self, lu, instance_name, cleanup):
6204 """Initializes this class.
6207 Tasklet.__init__(self, lu)
6210 self.instance_name = instance_name
6211 self.cleanup = cleanup
6212 self.live = False # will be overridden later
6214 def CheckPrereq(self):
6215 """Check prerequisites.
6217 This checks that the instance is in the cluster.
6220 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6221 instance = self.cfg.GetInstanceInfo(instance_name)
6222 assert instance is not None
6224 if instance.disk_template != constants.DT_DRBD8:
6225 raise errors.OpPrereqError("Instance's disk layout is not"
6226 " drbd8, cannot migrate.", errors.ECODE_STATE)
6228 secondary_nodes = instance.secondary_nodes
6229 if not secondary_nodes:
6230 raise errors.ConfigurationError("No secondary node but using"
6231 " drbd8 disk template")
6233 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6235 target_node = secondary_nodes[0]
6236 # check memory requirements on the secondary node
6237 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6238 instance.name, i_be[constants.BE_MEMORY],
6239 instance.hypervisor)
6241 # check bridge existance
6242 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6244 if not self.cleanup:
6245 _CheckNodeNotDrained(self.lu, target_node)
6246 result = self.rpc.call_instance_migratable(instance.primary_node,
6248 result.Raise("Can't migrate, please use failover",
6249 prereq=True, ecode=errors.ECODE_STATE)
6251 self.instance = instance
6253 if self.lu.op.live is not None and self.lu.op.mode is not None:
6254 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6255 " parameters are accepted",
6257 if self.lu.op.live is not None:
6259 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6261 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6262 # reset the 'live' parameter to None so that repeated
6263 # invocations of CheckPrereq do not raise an exception
6264 self.lu.op.live = None
6265 elif self.lu.op.mode is None:
6266 # read the default value from the hypervisor
6267 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6268 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6270 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6272 def _WaitUntilSync(self):
6273 """Poll with custom rpc for disk sync.
6275 This uses our own step-based rpc call.
6278 self.feedback_fn("* wait until resync is done")
6282 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6284 self.instance.disks)
6286 for node, nres in result.items():
6287 nres.Raise("Cannot resync disks on node %s" % node)
6288 node_done, node_percent = nres.payload
6289 all_done = all_done and node_done
6290 if node_percent is not None:
6291 min_percent = min(min_percent, node_percent)
6293 if min_percent < 100:
6294 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6297 def _EnsureSecondary(self, node):
6298 """Demote a node to secondary.
6301 self.feedback_fn("* switching node %s to secondary mode" % node)
6303 for dev in self.instance.disks:
6304 self.cfg.SetDiskID(dev, node)
6306 result = self.rpc.call_blockdev_close(node, self.instance.name,
6307 self.instance.disks)
6308 result.Raise("Cannot change disk to secondary on node %s" % node)
6310 def _GoStandalone(self):
6311 """Disconnect from the network.
6314 self.feedback_fn("* changing into standalone mode")
6315 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6316 self.instance.disks)
6317 for node, nres in result.items():
6318 nres.Raise("Cannot disconnect disks node %s" % node)
6320 def _GoReconnect(self, multimaster):
6321 """Reconnect to the network.
6327 msg = "single-master"
6328 self.feedback_fn("* changing disks into %s mode" % msg)
6329 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6330 self.instance.disks,
6331 self.instance.name, multimaster)
6332 for node, nres in result.items():
6333 nres.Raise("Cannot change disks config on node %s" % node)
6335 def _ExecCleanup(self):
6336 """Try to cleanup after a failed migration.
6338 The cleanup is done by:
6339 - check that the instance is running only on one node
6340 (and update the config if needed)
6341 - change disks on its secondary node to secondary
6342 - wait until disks are fully synchronized
6343 - disconnect from the network
6344 - change disks into single-master mode
6345 - wait again until disks are fully synchronized
6348 instance = self.instance
6349 target_node = self.target_node
6350 source_node = self.source_node
6352 # check running on only one node
6353 self.feedback_fn("* checking where the instance actually runs"
6354 " (if this hangs, the hypervisor might be in"
6356 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6357 for node, result in ins_l.items():
6358 result.Raise("Can't contact node %s" % node)
6360 runningon_source = instance.name in ins_l[source_node].payload
6361 runningon_target = instance.name in ins_l[target_node].payload
6363 if runningon_source and runningon_target:
6364 raise errors.OpExecError("Instance seems to be running on two nodes,"
6365 " or the hypervisor is confused. You will have"
6366 " to ensure manually that it runs only on one"
6367 " and restart this operation.")
6369 if not (runningon_source or runningon_target):
6370 raise errors.OpExecError("Instance does not seem to be running at all."
6371 " In this case, it's safer to repair by"
6372 " running 'gnt-instance stop' to ensure disk"
6373 " shutdown, and then restarting it.")
6375 if runningon_target:
6376 # the migration has actually succeeded, we need to update the config
6377 self.feedback_fn("* instance running on secondary node (%s),"
6378 " updating config" % target_node)
6379 instance.primary_node = target_node
6380 self.cfg.Update(instance, self.feedback_fn)
6381 demoted_node = source_node
6383 self.feedback_fn("* instance confirmed to be running on its"
6384 " primary node (%s)" % source_node)
6385 demoted_node = target_node
6387 self._EnsureSecondary(demoted_node)
6389 self._WaitUntilSync()
6390 except errors.OpExecError:
6391 # we ignore here errors, since if the device is standalone, it
6392 # won't be able to sync
6394 self._GoStandalone()
6395 self._GoReconnect(False)
6396 self._WaitUntilSync()
6398 self.feedback_fn("* done")
6400 def _RevertDiskStatus(self):
6401 """Try to revert the disk status after a failed migration.
6404 target_node = self.target_node
6406 self._EnsureSecondary(target_node)
6407 self._GoStandalone()
6408 self._GoReconnect(False)
6409 self._WaitUntilSync()
6410 except errors.OpExecError, err:
6411 self.lu.LogWarning("Migration failed and I can't reconnect the"
6412 " drives: error '%s'\n"
6413 "Please look and recover the instance status" %
6416 def _AbortMigration(self):
6417 """Call the hypervisor code to abort a started migration.
6420 instance = self.instance
6421 target_node = self.target_node
6422 migration_info = self.migration_info
6424 abort_result = self.rpc.call_finalize_migration(target_node,
6428 abort_msg = abort_result.fail_msg
6430 logging.error("Aborting migration failed on target node %s: %s",
6431 target_node, abort_msg)
6432 # Don't raise an exception here, as we stil have to try to revert the
6433 # disk status, even if this step failed.
6435 def _ExecMigration(self):
6436 """Migrate an instance.
6438 The migrate is done by:
6439 - change the disks into dual-master mode
6440 - wait until disks are fully synchronized again
6441 - migrate the instance
6442 - change disks on the new secondary node (the old primary) to secondary
6443 - wait until disks are fully synchronized
6444 - change disks into single-master mode
6447 instance = self.instance
6448 target_node = self.target_node
6449 source_node = self.source_node
6451 self.feedback_fn("* checking disk consistency between source and target")
6452 for dev in instance.disks:
6453 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6454 raise errors.OpExecError("Disk %s is degraded or not fully"
6455 " synchronized on target node,"
6456 " aborting migrate." % dev.iv_name)
6458 # First get the migration information from the remote node
6459 result = self.rpc.call_migration_info(source_node, instance)
6460 msg = result.fail_msg
6462 log_err = ("Failed fetching source migration information from %s: %s" %
6464 logging.error(log_err)
6465 raise errors.OpExecError(log_err)
6467 self.migration_info = migration_info = result.payload
6469 # Then switch the disks to master/master mode
6470 self._EnsureSecondary(target_node)
6471 self._GoStandalone()
6472 self._GoReconnect(True)
6473 self._WaitUntilSync()
6475 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6476 result = self.rpc.call_accept_instance(target_node,
6479 self.nodes_ip[target_node])
6481 msg = result.fail_msg
6483 logging.error("Instance pre-migration failed, trying to revert"
6484 " disk status: %s", msg)
6485 self.feedback_fn("Pre-migration failed, aborting")
6486 self._AbortMigration()
6487 self._RevertDiskStatus()
6488 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6489 (instance.name, msg))
6491 self.feedback_fn("* migrating instance to %s" % target_node)
6493 result = self.rpc.call_instance_migrate(source_node, instance,
6494 self.nodes_ip[target_node],
6496 msg = result.fail_msg
6498 logging.error("Instance migration failed, trying to revert"
6499 " disk status: %s", msg)
6500 self.feedback_fn("Migration failed, aborting")
6501 self._AbortMigration()
6502 self._RevertDiskStatus()
6503 raise errors.OpExecError("Could not migrate instance %s: %s" %
6504 (instance.name, msg))
6507 instance.primary_node = target_node
6508 # distribute new instance config to the other nodes
6509 self.cfg.Update(instance, self.feedback_fn)
6511 result = self.rpc.call_finalize_migration(target_node,
6515 msg = result.fail_msg
6517 logging.error("Instance migration succeeded, but finalization failed:"
6519 raise errors.OpExecError("Could not finalize instance migration: %s" %
6522 self._EnsureSecondary(source_node)
6523 self._WaitUntilSync()
6524 self._GoStandalone()
6525 self._GoReconnect(False)
6526 self._WaitUntilSync()
6528 self.feedback_fn("* done")
6530 def Exec(self, feedback_fn):
6531 """Perform the migration.
6534 feedback_fn("Migrating instance %s" % self.instance.name)
6536 self.feedback_fn = feedback_fn
6538 self.source_node = self.instance.primary_node
6539 self.target_node = self.instance.secondary_nodes[0]
6540 self.all_nodes = [self.source_node, self.target_node]
6542 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6543 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6547 return self._ExecCleanup()
6549 return self._ExecMigration()
6552 def _CreateBlockDev(lu, node, instance, device, force_create,
6554 """Create a tree of block devices on a given node.
6556 If this device type has to be created on secondaries, create it and
6559 If not, just recurse to children keeping the same 'force' value.
6561 @param lu: the lu on whose behalf we execute
6562 @param node: the node on which to create the device
6563 @type instance: L{objects.Instance}
6564 @param instance: the instance which owns the device
6565 @type device: L{objects.Disk}
6566 @param device: the device to create
6567 @type force_create: boolean
6568 @param force_create: whether to force creation of this device; this
6569 will be change to True whenever we find a device which has
6570 CreateOnSecondary() attribute
6571 @param info: the extra 'metadata' we should attach to the device
6572 (this will be represented as a LVM tag)
6573 @type force_open: boolean
6574 @param force_open: this parameter will be passes to the
6575 L{backend.BlockdevCreate} function where it specifies
6576 whether we run on primary or not, and it affects both
6577 the child assembly and the device own Open() execution
6580 if device.CreateOnSecondary():
6584 for child in device.children:
6585 _CreateBlockDev(lu, node, instance, child, force_create,
6588 if not force_create:
6591 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6594 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6595 """Create a single block device on a given node.
6597 This will not recurse over children of the device, so they must be
6600 @param lu: the lu on whose behalf we execute
6601 @param node: the node on which to create the device
6602 @type instance: L{objects.Instance}
6603 @param instance: the instance which owns the device
6604 @type device: L{objects.Disk}
6605 @param device: the device to create
6606 @param info: the extra 'metadata' we should attach to the device
6607 (this will be represented as a LVM tag)
6608 @type force_open: boolean
6609 @param force_open: this parameter will be passes to the
6610 L{backend.BlockdevCreate} function where it specifies
6611 whether we run on primary or not, and it affects both
6612 the child assembly and the device own Open() execution
6615 lu.cfg.SetDiskID(device, node)
6616 result = lu.rpc.call_blockdev_create(node, device, device.size,
6617 instance.name, force_open, info)
6618 result.Raise("Can't create block device %s on"
6619 " node %s for instance %s" % (device, node, instance.name))
6620 if device.physical_id is None:
6621 device.physical_id = result.payload
6624 def _GenerateUniqueNames(lu, exts):
6625 """Generate a suitable LV name.
6627 This will generate a logical volume name for the given instance.
6632 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6633 results.append("%s%s" % (new_id, val))
6637 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
6638 iv_name, p_minor, s_minor):
6639 """Generate a drbd8 device complete with its children.
6642 assert len(vgnames) == len(names) == 2
6643 port = lu.cfg.AllocatePort()
6644 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6645 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6646 logical_id=(vgnames[0], names[0]))
6647 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6648 logical_id=(vgnames[1], names[1]))
6649 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6650 logical_id=(primary, secondary, port,
6653 children=[dev_data, dev_meta],
6658 def _GenerateDiskTemplate(lu, template_name,
6659 instance_name, primary_node,
6660 secondary_nodes, disk_info,
6661 file_storage_dir, file_driver,
6662 base_index, feedback_fn):
6663 """Generate the entire disk layout for a given template type.
6666 #TODO: compute space requirements
6668 vgname = lu.cfg.GetVGName()
6669 disk_count = len(disk_info)
6671 if template_name == constants.DT_DISKLESS:
6673 elif template_name == constants.DT_PLAIN:
6674 if len(secondary_nodes) != 0:
6675 raise errors.ProgrammerError("Wrong template configuration")
6677 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6678 for i in range(disk_count)])
6679 for idx, disk in enumerate(disk_info):
6680 disk_index = idx + base_index
6681 vg = disk.get("vg", vgname)
6682 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6683 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6684 logical_id=(vg, names[idx]),
6685 iv_name="disk/%d" % disk_index,
6687 disks.append(disk_dev)
6688 elif template_name == constants.DT_DRBD8:
6689 if len(secondary_nodes) != 1:
6690 raise errors.ProgrammerError("Wrong template configuration")
6691 remote_node = secondary_nodes[0]
6692 minors = lu.cfg.AllocateDRBDMinor(
6693 [primary_node, remote_node] * len(disk_info), instance_name)
6696 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6697 for i in range(disk_count)]):
6698 names.append(lv_prefix + "_data")
6699 names.append(lv_prefix + "_meta")
6700 for idx, disk in enumerate(disk_info):
6701 disk_index = idx + base_index
6702 data_vg = disk.get("vg", vgname)
6703 meta_vg = disk.get("metavg", data_vg)
6704 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6705 disk["size"], [data_vg, meta_vg],
6706 names[idx*2:idx*2+2],
6707 "disk/%d" % disk_index,
6708 minors[idx*2], minors[idx*2+1])
6709 disk_dev.mode = disk["mode"]
6710 disks.append(disk_dev)
6711 elif template_name == constants.DT_FILE:
6712 if len(secondary_nodes) != 0:
6713 raise errors.ProgrammerError("Wrong template configuration")
6715 opcodes.RequireFileStorage()
6717 for idx, disk in enumerate(disk_info):
6718 disk_index = idx + base_index
6719 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6720 iv_name="disk/%d" % disk_index,
6721 logical_id=(file_driver,
6722 "%s/disk%d" % (file_storage_dir,
6725 disks.append(disk_dev)
6727 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6731 def _GetInstanceInfoText(instance):
6732 """Compute that text that should be added to the disk's metadata.
6735 return "originstname+%s" % instance.name
6738 def _CalcEta(time_taken, written, total_size):
6739 """Calculates the ETA based on size written and total size.
6741 @param time_taken: The time taken so far
6742 @param written: amount written so far
6743 @param total_size: The total size of data to be written
6744 @return: The remaining time in seconds
6747 avg_time = time_taken / float(written)
6748 return (total_size - written) * avg_time
6751 def _WipeDisks(lu, instance):
6752 """Wipes instance disks.
6754 @type lu: L{LogicalUnit}
6755 @param lu: the logical unit on whose behalf we execute
6756 @type instance: L{objects.Instance}
6757 @param instance: the instance whose disks we should create
6758 @return: the success of the wipe
6761 node = instance.primary_node
6763 for device in instance.disks:
6764 lu.cfg.SetDiskID(device, node)
6766 logging.info("Pause sync of instance %s disks", instance.name)
6767 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6769 for idx, success in enumerate(result.payload):
6771 logging.warn("pause-sync of instance %s for disks %d failed",
6775 for idx, device in enumerate(instance.disks):
6776 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6777 # MAX_WIPE_CHUNK at max
6778 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6779 constants.MIN_WIPE_CHUNK_PERCENT)
6780 # we _must_ make this an int, otherwise rounding errors will
6782 wipe_chunk_size = int(wipe_chunk_size)
6784 lu.LogInfo("* Wiping disk %d", idx)
6785 logging.info("Wiping disk %d for instance %s, node %s using"
6786 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
6791 start_time = time.time()
6793 while offset < size:
6794 wipe_size = min(wipe_chunk_size, size - offset)
6795 logging.debug("Wiping disk %d, offset %s, chunk %s",
6796 idx, offset, wipe_size)
6797 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6798 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6799 (idx, offset, wipe_size))
6802 if now - last_output >= 60:
6803 eta = _CalcEta(now - start_time, offset, size)
6804 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6805 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6808 logging.info("Resume sync of instance %s disks", instance.name)
6810 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6812 for idx, success in enumerate(result.payload):
6814 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6815 " look at the status and troubleshoot the issue.", idx)
6816 logging.warn("resume-sync of instance %s for disks %d failed",
6820 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6821 """Create all disks for an instance.
6823 This abstracts away some work from AddInstance.
6825 @type lu: L{LogicalUnit}
6826 @param lu: the logical unit on whose behalf we execute
6827 @type instance: L{objects.Instance}
6828 @param instance: the instance whose disks we should create
6830 @param to_skip: list of indices to skip
6831 @type target_node: string
6832 @param target_node: if passed, overrides the target node for creation
6834 @return: the success of the creation
6837 info = _GetInstanceInfoText(instance)
6838 if target_node is None:
6839 pnode = instance.primary_node
6840 all_nodes = instance.all_nodes
6845 if instance.disk_template == constants.DT_FILE:
6846 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6847 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6849 result.Raise("Failed to create directory '%s' on"
6850 " node %s" % (file_storage_dir, pnode))
6852 # Note: this needs to be kept in sync with adding of disks in
6853 # LUInstanceSetParams
6854 for idx, device in enumerate(instance.disks):
6855 if to_skip and idx in to_skip:
6857 logging.info("Creating volume %s for instance %s",
6858 device.iv_name, instance.name)
6860 for node in all_nodes:
6861 f_create = node == pnode
6862 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6865 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
6866 """Remove all disks for an instance.
6868 This abstracts away some work from `AddInstance()` and
6869 `RemoveInstance()`. Note that in case some of the devices couldn't
6870 be removed, the removal will continue with the other ones (compare
6871 with `_CreateDisks()`).
6873 @type lu: L{LogicalUnit}
6874 @param lu: the logical unit on whose behalf we execute
6875 @type instance: L{objects.Instance}
6876 @param instance: the instance whose disks we should remove
6877 @type target_node: string
6878 @param target_node: used to override the node on which to remove the disks
6880 @return: the success of the removal
6883 logging.info("Removing block devices for instance %s", instance.name)
6886 ports_to_release = set()
6887 for device in instance.disks:
6889 edata = [(target_node, device)]
6891 edata = device.ComputeNodeTree(instance.primary_node)
6892 for node, disk in edata:
6893 lu.cfg.SetDiskID(disk, node)
6894 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6896 lu.LogWarning("Could not remove block device %s on node %s,"
6897 " continuing anyway: %s", device.iv_name, node, msg)
6900 # if this is a DRBD disk, return its port to the pool
6901 if device.dev_type in constants.LDS_DRBD:
6902 ports_to_release.add(device.logical_id[2])
6904 if all_result or ignore_failures:
6905 for port in ports_to_release:
6906 lu.cfg.AddTcpUdpPort(port)
6908 if instance.disk_template == constants.DT_FILE:
6909 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6913 tgt = instance.primary_node
6914 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6916 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6917 file_storage_dir, instance.primary_node, result.fail_msg)
6923 def _ComputeDiskSizePerVG(disk_template, disks):
6924 """Compute disk size requirements in the volume group
6927 def _compute(disks, payload):
6928 """Universal algorithm
6933 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6937 # Required free disk space as a function of disk and swap space
6939 constants.DT_DISKLESS: {},
6940 constants.DT_PLAIN: _compute(disks, 0),
6941 # 128 MB are added for drbd metadata for each disk
6942 constants.DT_DRBD8: _compute(disks, 128),
6943 constants.DT_FILE: {},
6946 if disk_template not in req_size_dict:
6947 raise errors.ProgrammerError("Disk template '%s' size requirement"
6948 " is unknown" % disk_template)
6950 return req_size_dict[disk_template]
6953 def _ComputeDiskSize(disk_template, disks):
6954 """Compute disk size requirements in the volume group
6957 # Required free disk space as a function of disk and swap space
6959 constants.DT_DISKLESS: None,
6960 constants.DT_PLAIN: sum(d["size"] for d in disks),
6961 # 128 MB are added for drbd metadata for each disk
6962 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6963 constants.DT_FILE: None,
6966 if disk_template not in req_size_dict:
6967 raise errors.ProgrammerError("Disk template '%s' size requirement"
6968 " is unknown" % disk_template)
6970 return req_size_dict[disk_template]
6973 def _FilterVmNodes(lu, nodenames):
6974 """Filters out non-vm_capable nodes from a list.
6976 @type lu: L{LogicalUnit}
6977 @param lu: the logical unit for which we check
6978 @type nodenames: list
6979 @param nodenames: the list of nodes on which we should check
6981 @return: the list of vm-capable nodes
6984 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6985 return [name for name in nodenames if name not in vm_nodes]
6988 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6989 """Hypervisor parameter validation.
6991 This function abstract the hypervisor parameter validation to be
6992 used in both instance create and instance modify.
6994 @type lu: L{LogicalUnit}
6995 @param lu: the logical unit for which we check
6996 @type nodenames: list
6997 @param nodenames: the list of nodes on which we should check
6998 @type hvname: string
6999 @param hvname: the name of the hypervisor we should use
7000 @type hvparams: dict
7001 @param hvparams: the parameters which we need to check
7002 @raise errors.OpPrereqError: if the parameters are not valid
7005 nodenames = _FilterVmNodes(lu, nodenames)
7006 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7009 for node in nodenames:
7013 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7016 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7017 """OS parameters validation.
7019 @type lu: L{LogicalUnit}
7020 @param lu: the logical unit for which we check
7021 @type required: boolean
7022 @param required: whether the validation should fail if the OS is not
7024 @type nodenames: list
7025 @param nodenames: the list of nodes on which we should check
7026 @type osname: string
7027 @param osname: the name of the hypervisor we should use
7028 @type osparams: dict
7029 @param osparams: the parameters which we need to check
7030 @raise errors.OpPrereqError: if the parameters are not valid
7033 nodenames = _FilterVmNodes(lu, nodenames)
7034 result = lu.rpc.call_os_validate(required, nodenames, osname,
7035 [constants.OS_VALIDATE_PARAMETERS],
7037 for node, nres in result.items():
7038 # we don't check for offline cases since this should be run only
7039 # against the master node and/or an instance's nodes
7040 nres.Raise("OS Parameters validation failed on node %s" % node)
7041 if not nres.payload:
7042 lu.LogInfo("OS %s not found on node %s, validation skipped",
7046 class LUInstanceCreate(LogicalUnit):
7047 """Create an instance.
7050 HPATH = "instance-add"
7051 HTYPE = constants.HTYPE_INSTANCE
7054 def CheckArguments(self):
7058 # do not require name_check to ease forward/backward compatibility
7060 if self.op.no_install and self.op.start:
7061 self.LogInfo("No-installation mode selected, disabling startup")
7062 self.op.start = False
7063 # validate/normalize the instance name
7064 self.op.instance_name = \
7065 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7067 if self.op.ip_check and not self.op.name_check:
7068 # TODO: make the ip check more flexible and not depend on the name check
7069 raise errors.OpPrereqError("Cannot do ip check without a name check",
7072 # check nics' parameter names
7073 for nic in self.op.nics:
7074 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7076 # check disks. parameter names and consistent adopt/no-adopt strategy
7077 has_adopt = has_no_adopt = False
7078 for disk in self.op.disks:
7079 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7084 if has_adopt and has_no_adopt:
7085 raise errors.OpPrereqError("Either all disks are adopted or none is",
7088 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7089 raise errors.OpPrereqError("Disk adoption is not supported for the"
7090 " '%s' disk template" %
7091 self.op.disk_template,
7093 if self.op.iallocator is not None:
7094 raise errors.OpPrereqError("Disk adoption not allowed with an"
7095 " iallocator script", errors.ECODE_INVAL)
7096 if self.op.mode == constants.INSTANCE_IMPORT:
7097 raise errors.OpPrereqError("Disk adoption not allowed for"
7098 " instance import", errors.ECODE_INVAL)
7100 self.adopt_disks = has_adopt
7102 # instance name verification
7103 if self.op.name_check:
7104 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7105 self.op.instance_name = self.hostname1.name
7106 # used in CheckPrereq for ip ping check
7107 self.check_ip = self.hostname1.ip
7109 self.check_ip = None
7111 # file storage checks
7112 if (self.op.file_driver and
7113 not self.op.file_driver in constants.FILE_DRIVER):
7114 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7115 self.op.file_driver, errors.ECODE_INVAL)
7117 if self.op.disk_template == constants.DT_FILE:
7118 opcodes.RequireFileStorage()
7120 ### Node/iallocator related checks
7121 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7123 if self.op.pnode is not None:
7124 if self.op.disk_template in constants.DTS_NET_MIRROR:
7125 if self.op.snode is None:
7126 raise errors.OpPrereqError("The networked disk templates need"
7127 " a mirror node", errors.ECODE_INVAL)
7129 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7131 self.op.snode = None
7133 self._cds = _GetClusterDomainSecret()
7135 if self.op.mode == constants.INSTANCE_IMPORT:
7136 # On import force_variant must be True, because if we forced it at
7137 # initial install, our only chance when importing it back is that it
7139 self.op.force_variant = True
7141 if self.op.no_install:
7142 self.LogInfo("No-installation mode has no effect during import")
7144 elif self.op.mode == constants.INSTANCE_CREATE:
7145 if self.op.os_type is None:
7146 raise errors.OpPrereqError("No guest OS specified",
7148 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7149 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7150 " installation" % self.op.os_type,
7152 if self.op.disk_template is None:
7153 raise errors.OpPrereqError("No disk template specified",
7156 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7157 # Check handshake to ensure both clusters have the same domain secret
7158 src_handshake = self.op.source_handshake
7159 if not src_handshake:
7160 raise errors.OpPrereqError("Missing source handshake",
7163 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7166 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7169 # Load and check source CA
7170 self.source_x509_ca_pem = self.op.source_x509_ca
7171 if not self.source_x509_ca_pem:
7172 raise errors.OpPrereqError("Missing source X509 CA",
7176 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7178 except OpenSSL.crypto.Error, err:
7179 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7180 (err, ), errors.ECODE_INVAL)
7182 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7183 if errcode is not None:
7184 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7187 self.source_x509_ca = cert
7189 src_instance_name = self.op.source_instance_name
7190 if not src_instance_name:
7191 raise errors.OpPrereqError("Missing source instance name",
7194 self.source_instance_name = \
7195 netutils.GetHostname(name=src_instance_name).name
7198 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7199 self.op.mode, errors.ECODE_INVAL)
7201 def ExpandNames(self):
7202 """ExpandNames for CreateInstance.
7204 Figure out the right locks for instance creation.
7207 self.needed_locks = {}
7209 instance_name = self.op.instance_name
7210 # this is just a preventive check, but someone might still add this
7211 # instance in the meantime, and creation will fail at lock-add time
7212 if instance_name in self.cfg.GetInstanceList():
7213 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7214 instance_name, errors.ECODE_EXISTS)
7216 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7218 if self.op.iallocator:
7219 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7221 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7222 nodelist = [self.op.pnode]
7223 if self.op.snode is not None:
7224 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7225 nodelist.append(self.op.snode)
7226 self.needed_locks[locking.LEVEL_NODE] = nodelist
7228 # in case of import lock the source node too
7229 if self.op.mode == constants.INSTANCE_IMPORT:
7230 src_node = self.op.src_node
7231 src_path = self.op.src_path
7233 if src_path is None:
7234 self.op.src_path = src_path = self.op.instance_name
7236 if src_node is None:
7237 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7238 self.op.src_node = None
7239 if os.path.isabs(src_path):
7240 raise errors.OpPrereqError("Importing an instance from a path"
7241 " requires a source node option",
7244 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7245 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7246 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7247 if not os.path.isabs(src_path):
7248 self.op.src_path = src_path = \
7249 utils.PathJoin(constants.EXPORT_DIR, src_path)
7251 def _RunAllocator(self):
7252 """Run the allocator based on input opcode.
7255 nics = [n.ToDict() for n in self.nics]
7256 ial = IAllocator(self.cfg, self.rpc,
7257 mode=constants.IALLOCATOR_MODE_ALLOC,
7258 name=self.op.instance_name,
7259 disk_template=self.op.disk_template,
7262 vcpus=self.be_full[constants.BE_VCPUS],
7263 mem_size=self.be_full[constants.BE_MEMORY],
7266 hypervisor=self.op.hypervisor,
7269 ial.Run(self.op.iallocator)
7272 raise errors.OpPrereqError("Can't compute nodes using"
7273 " iallocator '%s': %s" %
7274 (self.op.iallocator, ial.info),
7276 if len(ial.result) != ial.required_nodes:
7277 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7278 " of nodes (%s), required %s" %
7279 (self.op.iallocator, len(ial.result),
7280 ial.required_nodes), errors.ECODE_FAULT)
7281 self.op.pnode = ial.result[0]
7282 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7283 self.op.instance_name, self.op.iallocator,
7284 utils.CommaJoin(ial.result))
7285 if ial.required_nodes == 2:
7286 self.op.snode = ial.result[1]
7288 def BuildHooksEnv(self):
7291 This runs on master, primary and secondary nodes of the instance.
7295 "ADD_MODE": self.op.mode,
7297 if self.op.mode == constants.INSTANCE_IMPORT:
7298 env["SRC_NODE"] = self.op.src_node
7299 env["SRC_PATH"] = self.op.src_path
7300 env["SRC_IMAGES"] = self.src_images
7302 env.update(_BuildInstanceHookEnv(
7303 name=self.op.instance_name,
7304 primary_node=self.op.pnode,
7305 secondary_nodes=self.secondaries,
7306 status=self.op.start,
7307 os_type=self.op.os_type,
7308 memory=self.be_full[constants.BE_MEMORY],
7309 vcpus=self.be_full[constants.BE_VCPUS],
7310 nics=_NICListToTuple(self, self.nics),
7311 disk_template=self.op.disk_template,
7312 disks=[(d["size"], d["mode"]) for d in self.disks],
7315 hypervisor_name=self.op.hypervisor,
7318 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7322 def _ReadExportInfo(self):
7323 """Reads the export information from disk.
7325 It will override the opcode source node and path with the actual
7326 information, if these two were not specified before.
7328 @return: the export information
7331 assert self.op.mode == constants.INSTANCE_IMPORT
7333 src_node = self.op.src_node
7334 src_path = self.op.src_path
7336 if src_node is None:
7337 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7338 exp_list = self.rpc.call_export_list(locked_nodes)
7340 for node in exp_list:
7341 if exp_list[node].fail_msg:
7343 if src_path in exp_list[node].payload:
7345 self.op.src_node = src_node = node
7346 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7350 raise errors.OpPrereqError("No export found for relative path %s" %
7351 src_path, errors.ECODE_INVAL)
7353 _CheckNodeOnline(self, src_node)
7354 result = self.rpc.call_export_info(src_node, src_path)
7355 result.Raise("No export or invalid export found in dir %s" % src_path)
7357 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7358 if not export_info.has_section(constants.INISECT_EXP):
7359 raise errors.ProgrammerError("Corrupted export config",
7360 errors.ECODE_ENVIRON)
7362 ei_version = export_info.get(constants.INISECT_EXP, "version")
7363 if (int(ei_version) != constants.EXPORT_VERSION):
7364 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7365 (ei_version, constants.EXPORT_VERSION),
7366 errors.ECODE_ENVIRON)
7369 def _ReadExportParams(self, einfo):
7370 """Use export parameters as defaults.
7372 In case the opcode doesn't specify (as in override) some instance
7373 parameters, then try to use them from the export information, if
7377 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7379 if self.op.disk_template is None:
7380 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7381 self.op.disk_template = einfo.get(constants.INISECT_INS,
7384 raise errors.OpPrereqError("No disk template specified and the export"
7385 " is missing the disk_template information",
7388 if not self.op.disks:
7389 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7391 # TODO: import the disk iv_name too
7392 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7393 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7394 disks.append({"size": disk_sz})
7395 self.op.disks = disks
7397 raise errors.OpPrereqError("No disk info specified and the export"
7398 " is missing the disk information",
7401 if (not self.op.nics and
7402 einfo.has_option(constants.INISECT_INS, "nic_count")):
7404 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7406 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7407 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7412 if (self.op.hypervisor is None and
7413 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7414 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7415 if einfo.has_section(constants.INISECT_HYP):
7416 # use the export parameters but do not override the ones
7417 # specified by the user
7418 for name, value in einfo.items(constants.INISECT_HYP):
7419 if name not in self.op.hvparams:
7420 self.op.hvparams[name] = value
7422 if einfo.has_section(constants.INISECT_BEP):
7423 # use the parameters, without overriding
7424 for name, value in einfo.items(constants.INISECT_BEP):
7425 if name not in self.op.beparams:
7426 self.op.beparams[name] = value
7428 # try to read the parameters old style, from the main section
7429 for name in constants.BES_PARAMETERS:
7430 if (name not in self.op.beparams and
7431 einfo.has_option(constants.INISECT_INS, name)):
7432 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7434 if einfo.has_section(constants.INISECT_OSP):
7435 # use the parameters, without overriding
7436 for name, value in einfo.items(constants.INISECT_OSP):
7437 if name not in self.op.osparams:
7438 self.op.osparams[name] = value
7440 def _RevertToDefaults(self, cluster):
7441 """Revert the instance parameters to the default values.
7445 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7446 for name in self.op.hvparams.keys():
7447 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7448 del self.op.hvparams[name]
7450 be_defs = cluster.SimpleFillBE({})
7451 for name in self.op.beparams.keys():
7452 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7453 del self.op.beparams[name]
7455 nic_defs = cluster.SimpleFillNIC({})
7456 for nic in self.op.nics:
7457 for name in constants.NICS_PARAMETERS:
7458 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7461 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7462 for name in self.op.osparams.keys():
7463 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7464 del self.op.osparams[name]
7466 def _CalculateFileStorageDir(self):
7467 """Calculate final instance file storage dir.
7470 # file storage dir calculation/check
7471 self.instance_file_storage_dir = None
7472 if self.op.disk_template == constants.DT_FILE:
7473 # build the full file storage dir path
7476 cfg_storagedir = self.cfg.GetFileStorageDir()
7477 if not cfg_storagedir:
7478 raise errors.OpPrereqError("Cluster file storage dir not defined")
7479 joinargs.append(cfg_storagedir)
7481 if self.op.file_storage_dir is not None:
7482 joinargs.append(self.op.file_storage_dir)
7484 joinargs.append(self.op.instance_name)
7486 # pylint: disable-msg=W0142
7487 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
7489 def CheckPrereq(self):
7490 """Check prerequisites.
7493 self._CalculateFileStorageDir()
7495 if self.op.mode == constants.INSTANCE_IMPORT:
7496 export_info = self._ReadExportInfo()
7497 self._ReadExportParams(export_info)
7499 if (not self.cfg.GetVGName() and
7500 self.op.disk_template not in constants.DTS_NOT_LVM):
7501 raise errors.OpPrereqError("Cluster does not support lvm-based"
7502 " instances", errors.ECODE_STATE)
7504 if self.op.hypervisor is None:
7505 self.op.hypervisor = self.cfg.GetHypervisorType()
7507 cluster = self.cfg.GetClusterInfo()
7508 enabled_hvs = cluster.enabled_hypervisors
7509 if self.op.hypervisor not in enabled_hvs:
7510 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7511 " cluster (%s)" % (self.op.hypervisor,
7512 ",".join(enabled_hvs)),
7515 # check hypervisor parameter syntax (locally)
7516 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7517 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7519 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7520 hv_type.CheckParameterSyntax(filled_hvp)
7521 self.hv_full = filled_hvp
7522 # check that we don't specify global parameters on an instance
7523 _CheckGlobalHvParams(self.op.hvparams)
7525 # fill and remember the beparams dict
7526 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7527 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7529 # build os parameters
7530 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7532 # now that hvp/bep are in final format, let's reset to defaults,
7534 if self.op.identify_defaults:
7535 self._RevertToDefaults(cluster)
7539 for idx, nic in enumerate(self.op.nics):
7540 nic_mode_req = nic.get("mode", None)
7541 nic_mode = nic_mode_req
7542 if nic_mode is None:
7543 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7545 # in routed mode, for the first nic, the default ip is 'auto'
7546 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7547 default_ip_mode = constants.VALUE_AUTO
7549 default_ip_mode = constants.VALUE_NONE
7551 # ip validity checks
7552 ip = nic.get("ip", default_ip_mode)
7553 if ip is None or ip.lower() == constants.VALUE_NONE:
7555 elif ip.lower() == constants.VALUE_AUTO:
7556 if not self.op.name_check:
7557 raise errors.OpPrereqError("IP address set to auto but name checks"
7558 " have been skipped",
7560 nic_ip = self.hostname1.ip
7562 if not netutils.IPAddress.IsValid(ip):
7563 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7567 # TODO: check the ip address for uniqueness
7568 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7569 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7572 # MAC address verification
7573 mac = nic.get("mac", constants.VALUE_AUTO)
7574 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7575 mac = utils.NormalizeAndValidateMac(mac)
7578 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7579 except errors.ReservationError:
7580 raise errors.OpPrereqError("MAC address %s already in use"
7581 " in cluster" % mac,
7582 errors.ECODE_NOTUNIQUE)
7584 # bridge verification
7585 bridge = nic.get("bridge", None)
7586 link = nic.get("link", None)
7588 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7589 " at the same time", errors.ECODE_INVAL)
7590 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7591 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7598 nicparams[constants.NIC_MODE] = nic_mode_req
7600 nicparams[constants.NIC_LINK] = link
7602 check_params = cluster.SimpleFillNIC(nicparams)
7603 objects.NIC.CheckParameterSyntax(check_params)
7604 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7606 # disk checks/pre-build
7608 for disk in self.op.disks:
7609 mode = disk.get("mode", constants.DISK_RDWR)
7610 if mode not in constants.DISK_ACCESS_SET:
7611 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7612 mode, errors.ECODE_INVAL)
7613 size = disk.get("size", None)
7615 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7618 except (TypeError, ValueError):
7619 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7621 data_vg = disk.get("vg", self.cfg.GetVGName())
7622 meta_vg = disk.get("metavg", data_vg)
7623 new_disk = {"size": size, "mode": mode, "vg": data_vg, "metavg": meta_vg}
7625 new_disk["adopt"] = disk["adopt"]
7626 self.disks.append(new_disk)
7628 if self.op.mode == constants.INSTANCE_IMPORT:
7630 # Check that the new instance doesn't have less disks than the export
7631 instance_disks = len(self.disks)
7632 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7633 if instance_disks < export_disks:
7634 raise errors.OpPrereqError("Not enough disks to import."
7635 " (instance: %d, export: %d)" %
7636 (instance_disks, export_disks),
7640 for idx in range(export_disks):
7641 option = 'disk%d_dump' % idx
7642 if export_info.has_option(constants.INISECT_INS, option):
7643 # FIXME: are the old os-es, disk sizes, etc. useful?
7644 export_name = export_info.get(constants.INISECT_INS, option)
7645 image = utils.PathJoin(self.op.src_path, export_name)
7646 disk_images.append(image)
7648 disk_images.append(False)
7650 self.src_images = disk_images
7652 old_name = export_info.get(constants.INISECT_INS, 'name')
7654 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7655 except (TypeError, ValueError), err:
7656 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7657 " an integer: %s" % str(err),
7659 if self.op.instance_name == old_name:
7660 for idx, nic in enumerate(self.nics):
7661 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7662 nic_mac_ini = 'nic%d_mac' % idx
7663 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7665 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7667 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7668 if self.op.ip_check:
7669 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7670 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7671 (self.check_ip, self.op.instance_name),
7672 errors.ECODE_NOTUNIQUE)
7674 #### mac address generation
7675 # By generating here the mac address both the allocator and the hooks get
7676 # the real final mac address rather than the 'auto' or 'generate' value.
7677 # There is a race condition between the generation and the instance object
7678 # creation, which means that we know the mac is valid now, but we're not
7679 # sure it will be when we actually add the instance. If things go bad
7680 # adding the instance will abort because of a duplicate mac, and the
7681 # creation job will fail.
7682 for nic in self.nics:
7683 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7684 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7688 if self.op.iallocator is not None:
7689 self._RunAllocator()
7691 #### node related checks
7693 # check primary node
7694 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7695 assert self.pnode is not None, \
7696 "Cannot retrieve locked node %s" % self.op.pnode
7698 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7699 pnode.name, errors.ECODE_STATE)
7701 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7702 pnode.name, errors.ECODE_STATE)
7703 if not pnode.vm_capable:
7704 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7705 " '%s'" % pnode.name, errors.ECODE_STATE)
7707 self.secondaries = []
7709 # mirror node verification
7710 if self.op.disk_template in constants.DTS_NET_MIRROR:
7711 if self.op.snode == pnode.name:
7712 raise errors.OpPrereqError("The secondary node cannot be the"
7713 " primary node.", errors.ECODE_INVAL)
7714 _CheckNodeOnline(self, self.op.snode)
7715 _CheckNodeNotDrained(self, self.op.snode)
7716 _CheckNodeVmCapable(self, self.op.snode)
7717 self.secondaries.append(self.op.snode)
7719 nodenames = [pnode.name] + self.secondaries
7721 if not self.adopt_disks:
7722 # Check lv size requirements, if not adopting
7723 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7724 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7726 else: # instead, we must check the adoption data
7727 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7728 if len(all_lvs) != len(self.disks):
7729 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7731 for lv_name in all_lvs:
7733 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7734 # to ReserveLV uses the same syntax
7735 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7736 except errors.ReservationError:
7737 raise errors.OpPrereqError("LV named %s used by another instance" %
7738 lv_name, errors.ECODE_NOTUNIQUE)
7740 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7741 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7743 node_lvs = self.rpc.call_lv_list([pnode.name],
7744 vg_names.payload.keys())[pnode.name]
7745 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7746 node_lvs = node_lvs.payload
7748 delta = all_lvs.difference(node_lvs.keys())
7750 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7751 utils.CommaJoin(delta),
7753 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7755 raise errors.OpPrereqError("Online logical volumes found, cannot"
7756 " adopt: %s" % utils.CommaJoin(online_lvs),
7758 # update the size of disk based on what is found
7759 for dsk in self.disks:
7760 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7762 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7764 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7765 # check OS parameters (remotely)
7766 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7768 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7770 # memory check on primary node
7772 _CheckNodeFreeMemory(self, self.pnode.name,
7773 "creating instance %s" % self.op.instance_name,
7774 self.be_full[constants.BE_MEMORY],
7777 self.dry_run_result = list(nodenames)
7779 def Exec(self, feedback_fn):
7780 """Create and add the instance to the cluster.
7783 instance = self.op.instance_name
7784 pnode_name = self.pnode.name
7786 ht_kind = self.op.hypervisor
7787 if ht_kind in constants.HTS_REQ_PORT:
7788 network_port = self.cfg.AllocatePort()
7792 disks = _GenerateDiskTemplate(self,
7793 self.op.disk_template,
7794 instance, pnode_name,
7797 self.instance_file_storage_dir,
7798 self.op.file_driver,
7802 iobj = objects.Instance(name=instance, os=self.op.os_type,
7803 primary_node=pnode_name,
7804 nics=self.nics, disks=disks,
7805 disk_template=self.op.disk_template,
7807 network_port=network_port,
7808 beparams=self.op.beparams,
7809 hvparams=self.op.hvparams,
7810 hypervisor=self.op.hypervisor,
7811 osparams=self.op.osparams,
7814 if self.adopt_disks:
7815 # rename LVs to the newly-generated names; we need to construct
7816 # 'fake' LV disks with the old data, plus the new unique_id
7817 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7819 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7820 rename_to.append(t_dsk.logical_id)
7821 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7822 self.cfg.SetDiskID(t_dsk, pnode_name)
7823 result = self.rpc.call_blockdev_rename(pnode_name,
7824 zip(tmp_disks, rename_to))
7825 result.Raise("Failed to rename adoped LVs")
7827 feedback_fn("* creating instance disks...")
7829 _CreateDisks(self, iobj)
7830 except errors.OpExecError:
7831 self.LogWarning("Device creation failed, reverting...")
7833 _RemoveDisks(self, iobj)
7835 self.cfg.ReleaseDRBDMinors(instance)
7838 feedback_fn("adding instance %s to cluster config" % instance)
7840 self.cfg.AddInstance(iobj, self.proc.GetECId())
7842 # Declare that we don't want to remove the instance lock anymore, as we've
7843 # added the instance to the config
7844 del self.remove_locks[locking.LEVEL_INSTANCE]
7845 # Unlock all the nodes
7846 if self.op.mode == constants.INSTANCE_IMPORT:
7847 nodes_keep = [self.op.src_node]
7848 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7849 if node != self.op.src_node]
7850 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7851 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7853 self.context.glm.release(locking.LEVEL_NODE)
7854 del self.acquired_locks[locking.LEVEL_NODE]
7857 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7858 feedback_fn("* wiping instance disks...")
7860 _WipeDisks(self, iobj)
7861 except errors.OpExecError, err:
7862 logging.exception("Wiping disks failed")
7863 self.LogWarning("Wiping instance disks failed (%s)", err)
7867 # Something is already wrong with the disks, don't do anything else
7869 elif self.op.wait_for_sync:
7870 disk_abort = not _WaitForSync(self, iobj)
7871 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7872 # make sure the disks are not degraded (still sync-ing is ok)
7874 feedback_fn("* checking mirrors status")
7875 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7880 _RemoveDisks(self, iobj)
7881 self.cfg.RemoveInstance(iobj.name)
7882 # Make sure the instance lock gets removed
7883 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7884 raise errors.OpExecError("There are some degraded disks for"
7887 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7888 if self.op.mode == constants.INSTANCE_CREATE:
7889 if not self.op.no_install:
7890 feedback_fn("* running the instance OS create scripts...")
7891 # FIXME: pass debug option from opcode to backend
7892 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7893 self.op.debug_level)
7894 result.Raise("Could not add os for instance %s"
7895 " on node %s" % (instance, pnode_name))
7897 elif self.op.mode == constants.INSTANCE_IMPORT:
7898 feedback_fn("* running the instance OS import scripts...")
7902 for idx, image in enumerate(self.src_images):
7906 # FIXME: pass debug option from opcode to backend
7907 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7908 constants.IEIO_FILE, (image, ),
7909 constants.IEIO_SCRIPT,
7910 (iobj.disks[idx], idx),
7912 transfers.append(dt)
7915 masterd.instance.TransferInstanceData(self, feedback_fn,
7916 self.op.src_node, pnode_name,
7917 self.pnode.secondary_ip,
7919 if not compat.all(import_result):
7920 self.LogWarning("Some disks for instance %s on node %s were not"
7921 " imported successfully" % (instance, pnode_name))
7923 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7924 feedback_fn("* preparing remote import...")
7925 # The source cluster will stop the instance before attempting to make a
7926 # connection. In some cases stopping an instance can take a long time,
7927 # hence the shutdown timeout is added to the connection timeout.
7928 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7929 self.op.source_shutdown_timeout)
7930 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7932 assert iobj.primary_node == self.pnode.name
7934 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7935 self.source_x509_ca,
7936 self._cds, timeouts)
7937 if not compat.all(disk_results):
7938 # TODO: Should the instance still be started, even if some disks
7939 # failed to import (valid for local imports, too)?
7940 self.LogWarning("Some disks for instance %s on node %s were not"
7941 " imported successfully" % (instance, pnode_name))
7943 # Run rename script on newly imported instance
7944 assert iobj.name == instance
7945 feedback_fn("Running rename script for %s" % instance)
7946 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7947 self.source_instance_name,
7948 self.op.debug_level)
7950 self.LogWarning("Failed to run rename script for %s on node"
7951 " %s: %s" % (instance, pnode_name, result.fail_msg))
7954 # also checked in the prereq part
7955 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7959 iobj.admin_up = True
7960 self.cfg.Update(iobj, feedback_fn)
7961 logging.info("Starting instance %s on node %s", instance, pnode_name)
7962 feedback_fn("* starting instance...")
7963 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7964 result.Raise("Could not start instance")
7966 return list(iobj.all_nodes)
7969 class LUInstanceConsole(NoHooksLU):
7970 """Connect to an instance's console.
7972 This is somewhat special in that it returns the command line that
7973 you need to run on the master node in order to connect to the
7979 def ExpandNames(self):
7980 self._ExpandAndLockInstance()
7982 def CheckPrereq(self):
7983 """Check prerequisites.
7985 This checks that the instance is in the cluster.
7988 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7989 assert self.instance is not None, \
7990 "Cannot retrieve locked instance %s" % self.op.instance_name
7991 _CheckNodeOnline(self, self.instance.primary_node)
7993 def Exec(self, feedback_fn):
7994 """Connect to the console of an instance
7997 instance = self.instance
7998 node = instance.primary_node
8000 node_insts = self.rpc.call_instance_list([node],
8001 [instance.hypervisor])[node]
8002 node_insts.Raise("Can't get node information from %s" % node)
8004 if instance.name not in node_insts.payload:
8005 if instance.admin_up:
8006 state = "ERROR_down"
8008 state = "ADMIN_down"
8009 raise errors.OpExecError("Instance %s is not running (state %s)" %
8010 (instance.name, state))
8012 logging.debug("Connecting to console of %s on %s", instance.name, node)
8014 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8017 def _GetInstanceConsole(cluster, instance):
8018 """Returns console information for an instance.
8020 @type cluster: L{objects.Cluster}
8021 @type instance: L{objects.Instance}
8025 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8026 # beparams and hvparams are passed separately, to avoid editing the
8027 # instance and then saving the defaults in the instance itself.
8028 hvparams = cluster.FillHV(instance)
8029 beparams = cluster.FillBE(instance)
8030 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8032 assert console.instance == instance.name
8033 assert console.Validate()
8035 return console.ToDict()
8038 class LUInstanceReplaceDisks(LogicalUnit):
8039 """Replace the disks of an instance.
8042 HPATH = "mirrors-replace"
8043 HTYPE = constants.HTYPE_INSTANCE
8046 def CheckArguments(self):
8047 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8050 def ExpandNames(self):
8051 self._ExpandAndLockInstance()
8053 if self.op.iallocator is not None:
8054 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8056 elif self.op.remote_node is not None:
8057 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8058 self.op.remote_node = remote_node
8060 # Warning: do not remove the locking of the new secondary here
8061 # unless DRBD8.AddChildren is changed to work in parallel;
8062 # currently it doesn't since parallel invocations of
8063 # FindUnusedMinor will conflict
8064 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8065 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8068 self.needed_locks[locking.LEVEL_NODE] = []
8069 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8071 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8072 self.op.iallocator, self.op.remote_node,
8073 self.op.disks, False, self.op.early_release)
8075 self.tasklets = [self.replacer]
8077 def DeclareLocks(self, level):
8078 # If we're not already locking all nodes in the set we have to declare the
8079 # instance's primary/secondary nodes.
8080 if (level == locking.LEVEL_NODE and
8081 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8082 self._LockInstancesNodes()
8084 def BuildHooksEnv(self):
8087 This runs on the master, the primary and all the secondaries.
8090 instance = self.replacer.instance
8092 "MODE": self.op.mode,
8093 "NEW_SECONDARY": self.op.remote_node,
8094 "OLD_SECONDARY": instance.secondary_nodes[0],
8096 env.update(_BuildInstanceHookEnvByObject(self, instance))
8098 self.cfg.GetMasterNode(),
8099 instance.primary_node,
8101 if self.op.remote_node is not None:
8102 nl.append(self.op.remote_node)
8106 class TLReplaceDisks(Tasklet):
8107 """Replaces disks for an instance.
8109 Note: Locking is not within the scope of this class.
8112 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8113 disks, delay_iallocator, early_release):
8114 """Initializes this class.
8117 Tasklet.__init__(self, lu)
8120 self.instance_name = instance_name
8122 self.iallocator_name = iallocator_name
8123 self.remote_node = remote_node
8125 self.delay_iallocator = delay_iallocator
8126 self.early_release = early_release
8129 self.instance = None
8130 self.new_node = None
8131 self.target_node = None
8132 self.other_node = None
8133 self.remote_node_info = None
8134 self.node_secondary_ip = None
8137 def CheckArguments(mode, remote_node, iallocator):
8138 """Helper function for users of this class.
8141 # check for valid parameter combination
8142 if mode == constants.REPLACE_DISK_CHG:
8143 if remote_node is None and iallocator is None:
8144 raise errors.OpPrereqError("When changing the secondary either an"
8145 " iallocator script must be used or the"
8146 " new node given", errors.ECODE_INVAL)
8148 if remote_node is not None and iallocator is not None:
8149 raise errors.OpPrereqError("Give either the iallocator or the new"
8150 " secondary, not both", errors.ECODE_INVAL)
8152 elif remote_node is not None or iallocator is not None:
8153 # Not replacing the secondary
8154 raise errors.OpPrereqError("The iallocator and new node options can"
8155 " only be used when changing the"
8156 " secondary node", errors.ECODE_INVAL)
8159 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8160 """Compute a new secondary node using an IAllocator.
8163 ial = IAllocator(lu.cfg, lu.rpc,
8164 mode=constants.IALLOCATOR_MODE_RELOC,
8166 relocate_from=relocate_from)
8168 ial.Run(iallocator_name)
8171 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8172 " %s" % (iallocator_name, ial.info),
8175 if len(ial.result) != ial.required_nodes:
8176 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8177 " of nodes (%s), required %s" %
8179 len(ial.result), ial.required_nodes),
8182 remote_node_name = ial.result[0]
8184 lu.LogInfo("Selected new secondary for instance '%s': %s",
8185 instance_name, remote_node_name)
8187 return remote_node_name
8189 def _FindFaultyDisks(self, node_name):
8190 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8193 def CheckPrereq(self):
8194 """Check prerequisites.
8196 This checks that the instance is in the cluster.
8199 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8200 assert instance is not None, \
8201 "Cannot retrieve locked instance %s" % self.instance_name
8203 if instance.disk_template != constants.DT_DRBD8:
8204 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8205 " instances", errors.ECODE_INVAL)
8207 if len(instance.secondary_nodes) != 1:
8208 raise errors.OpPrereqError("The instance has a strange layout,"
8209 " expected one secondary but found %d" %
8210 len(instance.secondary_nodes),
8213 if not self.delay_iallocator:
8214 self._CheckPrereq2()
8216 def _CheckPrereq2(self):
8217 """Check prerequisites, second part.
8219 This function should always be part of CheckPrereq. It was separated and is
8220 now called from Exec because during node evacuation iallocator was only
8221 called with an unmodified cluster model, not taking planned changes into
8225 instance = self.instance
8226 secondary_node = instance.secondary_nodes[0]
8228 if self.iallocator_name is None:
8229 remote_node = self.remote_node
8231 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8232 instance.name, instance.secondary_nodes)
8234 if remote_node is not None:
8235 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8236 assert self.remote_node_info is not None, \
8237 "Cannot retrieve locked node %s" % remote_node
8239 self.remote_node_info = None
8241 if remote_node == self.instance.primary_node:
8242 raise errors.OpPrereqError("The specified node is the primary node of"
8243 " the instance.", errors.ECODE_INVAL)
8245 if remote_node == secondary_node:
8246 raise errors.OpPrereqError("The specified node is already the"
8247 " secondary node of the instance.",
8250 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8251 constants.REPLACE_DISK_CHG):
8252 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8255 if self.mode == constants.REPLACE_DISK_AUTO:
8256 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8257 faulty_secondary = self._FindFaultyDisks(secondary_node)
8259 if faulty_primary and faulty_secondary:
8260 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8261 " one node and can not be repaired"
8262 " automatically" % self.instance_name,
8266 self.disks = faulty_primary
8267 self.target_node = instance.primary_node
8268 self.other_node = secondary_node
8269 check_nodes = [self.target_node, self.other_node]
8270 elif faulty_secondary:
8271 self.disks = faulty_secondary
8272 self.target_node = secondary_node
8273 self.other_node = instance.primary_node
8274 check_nodes = [self.target_node, self.other_node]
8280 # Non-automatic modes
8281 if self.mode == constants.REPLACE_DISK_PRI:
8282 self.target_node = instance.primary_node
8283 self.other_node = secondary_node
8284 check_nodes = [self.target_node, self.other_node]
8286 elif self.mode == constants.REPLACE_DISK_SEC:
8287 self.target_node = secondary_node
8288 self.other_node = instance.primary_node
8289 check_nodes = [self.target_node, self.other_node]
8291 elif self.mode == constants.REPLACE_DISK_CHG:
8292 self.new_node = remote_node
8293 self.other_node = instance.primary_node
8294 self.target_node = secondary_node
8295 check_nodes = [self.new_node, self.other_node]
8297 _CheckNodeNotDrained(self.lu, remote_node)
8298 _CheckNodeVmCapable(self.lu, remote_node)
8300 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8301 assert old_node_info is not None
8302 if old_node_info.offline and not self.early_release:
8303 # doesn't make sense to delay the release
8304 self.early_release = True
8305 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8306 " early-release mode", secondary_node)
8309 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8312 # If not specified all disks should be replaced
8314 self.disks = range(len(self.instance.disks))
8316 for node in check_nodes:
8317 _CheckNodeOnline(self.lu, node)
8319 touched_nodes = frozenset([self.new_node, self.other_node,
8322 if self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET:
8323 # Release unneeded node locks
8324 for name in self.lu.acquired_locks[locking.LEVEL_NODE]:
8325 if name not in touched_nodes:
8326 self._ReleaseNodeLock(name)
8328 # Check whether disks are valid
8329 for disk_idx in self.disks:
8330 instance.FindDisk(disk_idx)
8332 # Get secondary node IP addresses
8333 self.node_secondary_ip = \
8334 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8335 for node_name in touched_nodes
8336 if node_name is not None)
8338 def Exec(self, feedback_fn):
8339 """Execute disk replacement.
8341 This dispatches the disk replacement to the appropriate handler.
8344 if self.delay_iallocator:
8345 self._CheckPrereq2()
8347 if (self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET and
8349 # Verify owned locks before starting operation
8350 owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8351 assert set(owned_locks) == set(self.node_secondary_ip), \
8352 "Not owning the correct locks: %s" % (owned_locks, )
8355 feedback_fn("No disks need replacement")
8358 feedback_fn("Replacing disk(s) %s for %s" %
8359 (utils.CommaJoin(self.disks), self.instance.name))
8361 activate_disks = (not self.instance.admin_up)
8363 # Activate the instance disks if we're replacing them on a down instance
8365 _StartInstanceDisks(self.lu, self.instance, True)
8368 # Should we replace the secondary node?
8369 if self.new_node is not None:
8370 fn = self._ExecDrbd8Secondary
8372 fn = self._ExecDrbd8DiskOnly
8374 result = fn(feedback_fn)
8376 # Deactivate the instance disks if we're replacing them on a
8379 _SafeShutdownInstanceDisks(self.lu, self.instance)
8382 # Verify owned locks
8383 owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8384 assert ((self.early_release and not owned_locks) or
8385 (not self.early_release and
8386 set(owned_locks) == set(self.node_secondary_ip))), \
8387 ("Not owning the correct locks, early_release=%s, owned=%r" %
8388 (self.early_release, owned_locks))
8392 def _CheckVolumeGroup(self, nodes):
8393 self.lu.LogInfo("Checking volume groups")
8395 vgname = self.cfg.GetVGName()
8397 # Make sure volume group exists on all involved nodes
8398 results = self.rpc.call_vg_list(nodes)
8400 raise errors.OpExecError("Can't list volume groups on the nodes")
8404 res.Raise("Error checking node %s" % node)
8405 if vgname not in res.payload:
8406 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8409 def _CheckDisksExistence(self, nodes):
8410 # Check disk existence
8411 for idx, dev in enumerate(self.instance.disks):
8412 if idx not in self.disks:
8416 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8417 self.cfg.SetDiskID(dev, node)
8419 result = self.rpc.call_blockdev_find(node, dev)
8421 msg = result.fail_msg
8422 if msg or not result.payload:
8424 msg = "disk not found"
8425 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8428 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8429 for idx, dev in enumerate(self.instance.disks):
8430 if idx not in self.disks:
8433 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8436 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8438 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8439 " replace disks for instance %s" %
8440 (node_name, self.instance.name))
8442 def _CreateNewStorage(self, node_name):
8443 """Create new storage on the primary or secondary node.
8445 This is only used for same-node replaces, not for changing the
8446 secondary node, hence we don't want to modify the existing disk.
8451 for idx, dev in enumerate(self.instance.disks):
8452 if idx not in self.disks:
8455 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8457 self.cfg.SetDiskID(dev, node_name)
8459 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8460 names = _GenerateUniqueNames(self.lu, lv_names)
8462 vg_data = dev.children[0].logical_id[0]
8463 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8464 logical_id=(vg_data, names[0]))
8465 vg_meta = dev.children[1].logical_id[0]
8466 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8467 logical_id=(vg_meta, names[1]))
8469 new_lvs = [lv_data, lv_meta]
8470 old_lvs = [child.Copy() for child in dev.children]
8471 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8473 # we pass force_create=True to force the LVM creation
8474 for new_lv in new_lvs:
8475 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8476 _GetInstanceInfoText(self.instance), False)
8480 def _CheckDevices(self, node_name, iv_names):
8481 for name, (dev, _, _) in iv_names.iteritems():
8482 self.cfg.SetDiskID(dev, node_name)
8484 result = self.rpc.call_blockdev_find(node_name, dev)
8486 msg = result.fail_msg
8487 if msg or not result.payload:
8489 msg = "disk not found"
8490 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8493 if result.payload.is_degraded:
8494 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8496 def _RemoveOldStorage(self, node_name, iv_names):
8497 for name, (_, old_lvs, _) in iv_names.iteritems():
8498 self.lu.LogInfo("Remove logical volumes for %s" % name)
8501 self.cfg.SetDiskID(lv, node_name)
8503 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8505 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8506 hint="remove unused LVs manually")
8508 def _ReleaseNodeLock(self, node_name):
8509 """Releases the lock for a given node."""
8510 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8512 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
8513 """Replace a disk on the primary or secondary for DRBD 8.
8515 The algorithm for replace is quite complicated:
8517 1. for each disk to be replaced:
8519 1. create new LVs on the target node with unique names
8520 1. detach old LVs from the drbd device
8521 1. rename old LVs to name_replaced.<time_t>
8522 1. rename new LVs to old LVs
8523 1. attach the new LVs (with the old names now) to the drbd device
8525 1. wait for sync across all devices
8527 1. for each modified disk:
8529 1. remove old LVs (which have the name name_replaces.<time_t>)
8531 Failures are not very well handled.
8536 # Step: check device activation
8537 self.lu.LogStep(1, steps_total, "Check device existence")
8538 self._CheckDisksExistence([self.other_node, self.target_node])
8539 self._CheckVolumeGroup([self.target_node, self.other_node])
8541 # Step: check other node consistency
8542 self.lu.LogStep(2, steps_total, "Check peer consistency")
8543 self._CheckDisksConsistency(self.other_node,
8544 self.other_node == self.instance.primary_node,
8547 # Step: create new storage
8548 self.lu.LogStep(3, steps_total, "Allocate new storage")
8549 iv_names = self._CreateNewStorage(self.target_node)
8551 # Step: for each lv, detach+rename*2+attach
8552 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8553 for dev, old_lvs, new_lvs in iv_names.itervalues():
8554 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8556 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8558 result.Raise("Can't detach drbd from local storage on node"
8559 " %s for device %s" % (self.target_node, dev.iv_name))
8561 #cfg.Update(instance)
8563 # ok, we created the new LVs, so now we know we have the needed
8564 # storage; as such, we proceed on the target node to rename
8565 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8566 # using the assumption that logical_id == physical_id (which in
8567 # turn is the unique_id on that node)
8569 # FIXME(iustin): use a better name for the replaced LVs
8570 temp_suffix = int(time.time())
8571 ren_fn = lambda d, suff: (d.physical_id[0],
8572 d.physical_id[1] + "_replaced-%s" % suff)
8574 # Build the rename list based on what LVs exist on the node
8575 rename_old_to_new = []
8576 for to_ren in old_lvs:
8577 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8578 if not result.fail_msg and result.payload:
8580 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8582 self.lu.LogInfo("Renaming the old LVs on the target node")
8583 result = self.rpc.call_blockdev_rename(self.target_node,
8585 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8587 # Now we rename the new LVs to the old LVs
8588 self.lu.LogInfo("Renaming the new LVs on the target node")
8589 rename_new_to_old = [(new, old.physical_id)
8590 for old, new in zip(old_lvs, new_lvs)]
8591 result = self.rpc.call_blockdev_rename(self.target_node,
8593 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8595 # Intermediate steps of in memory modifications
8596 for old, new in zip(old_lvs, new_lvs):
8597 new.logical_id = old.logical_id
8598 self.cfg.SetDiskID(new, self.target_node)
8600 # We need to modify old_lvs so that removal later removes the
8601 # right LVs, not the newly added ones; note that old_lvs is a
8603 for disk in old_lvs:
8604 disk.logical_id = ren_fn(disk, temp_suffix)
8605 self.cfg.SetDiskID(disk, self.target_node)
8607 # Now that the new lvs have the old name, we can add them to the device
8608 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8609 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8611 msg = result.fail_msg
8613 for new_lv in new_lvs:
8614 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8617 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8618 hint=("cleanup manually the unused logical"
8620 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8623 if self.early_release:
8624 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8626 self._RemoveOldStorage(self.target_node, iv_names)
8627 # WARNING: we release both node locks here, do not do other RPCs
8628 # than WaitForSync to the primary node
8629 self._ReleaseNodeLock([self.target_node, self.other_node])
8632 # This can fail as the old devices are degraded and _WaitForSync
8633 # does a combined result over all disks, so we don't check its return value
8634 self.lu.LogStep(cstep, steps_total, "Sync devices")
8636 _WaitForSync(self.lu, self.instance)
8638 # Check all devices manually
8639 self._CheckDevices(self.instance.primary_node, iv_names)
8641 # Step: remove old storage
8642 if not self.early_release:
8643 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8645 self._RemoveOldStorage(self.target_node, iv_names)
8647 def _ExecDrbd8Secondary(self, feedback_fn):
8648 """Replace the secondary node for DRBD 8.
8650 The algorithm for replace is quite complicated:
8651 - for all disks of the instance:
8652 - create new LVs on the new node with same names
8653 - shutdown the drbd device on the old secondary
8654 - disconnect the drbd network on the primary
8655 - create the drbd device on the new secondary
8656 - network attach the drbd on the primary, using an artifice:
8657 the drbd code for Attach() will connect to the network if it
8658 finds a device which is connected to the good local disks but
8660 - wait for sync across all devices
8661 - remove all disks from the old secondary
8663 Failures are not very well handled.
8668 # Step: check device activation
8669 self.lu.LogStep(1, steps_total, "Check device existence")
8670 self._CheckDisksExistence([self.instance.primary_node])
8671 self._CheckVolumeGroup([self.instance.primary_node])
8673 # Step: check other node consistency
8674 self.lu.LogStep(2, steps_total, "Check peer consistency")
8675 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8677 # Step: create new storage
8678 self.lu.LogStep(3, steps_total, "Allocate new storage")
8679 for idx, dev in enumerate(self.instance.disks):
8680 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8681 (self.new_node, idx))
8682 # we pass force_create=True to force LVM creation
8683 for new_lv in dev.children:
8684 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8685 _GetInstanceInfoText(self.instance), False)
8687 # Step 4: dbrd minors and drbd setups changes
8688 # after this, we must manually remove the drbd minors on both the
8689 # error and the success paths
8690 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8691 minors = self.cfg.AllocateDRBDMinor([self.new_node
8692 for dev in self.instance.disks],
8694 logging.debug("Allocated minors %r", minors)
8697 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8698 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8699 (self.new_node, idx))
8700 # create new devices on new_node; note that we create two IDs:
8701 # one without port, so the drbd will be activated without
8702 # networking information on the new node at this stage, and one
8703 # with network, for the latter activation in step 4
8704 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8705 if self.instance.primary_node == o_node1:
8708 assert self.instance.primary_node == o_node2, "Three-node instance?"
8711 new_alone_id = (self.instance.primary_node, self.new_node, None,
8712 p_minor, new_minor, o_secret)
8713 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8714 p_minor, new_minor, o_secret)
8716 iv_names[idx] = (dev, dev.children, new_net_id)
8717 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8719 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8720 logical_id=new_alone_id,
8721 children=dev.children,
8724 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8725 _GetInstanceInfoText(self.instance), False)
8726 except errors.GenericError:
8727 self.cfg.ReleaseDRBDMinors(self.instance.name)
8730 # We have new devices, shutdown the drbd on the old secondary
8731 for idx, dev in enumerate(self.instance.disks):
8732 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8733 self.cfg.SetDiskID(dev, self.target_node)
8734 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8736 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8737 "node: %s" % (idx, msg),
8738 hint=("Please cleanup this device manually as"
8739 " soon as possible"))
8741 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8742 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8743 self.node_secondary_ip,
8744 self.instance.disks)\
8745 [self.instance.primary_node]
8747 msg = result.fail_msg
8749 # detaches didn't succeed (unlikely)
8750 self.cfg.ReleaseDRBDMinors(self.instance.name)
8751 raise errors.OpExecError("Can't detach the disks from the network on"
8752 " old node: %s" % (msg,))
8754 # if we managed to detach at least one, we update all the disks of
8755 # the instance to point to the new secondary
8756 self.lu.LogInfo("Updating instance configuration")
8757 for dev, _, new_logical_id in iv_names.itervalues():
8758 dev.logical_id = new_logical_id
8759 self.cfg.SetDiskID(dev, self.instance.primary_node)
8761 self.cfg.Update(self.instance, feedback_fn)
8763 # and now perform the drbd attach
8764 self.lu.LogInfo("Attaching primary drbds to new secondary"
8765 " (standalone => connected)")
8766 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8768 self.node_secondary_ip,
8769 self.instance.disks,
8772 for to_node, to_result in result.items():
8773 msg = to_result.fail_msg
8775 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8777 hint=("please do a gnt-instance info to see the"
8778 " status of disks"))
8780 if self.early_release:
8781 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8783 self._RemoveOldStorage(self.target_node, iv_names)
8784 # WARNING: we release all node locks here, do not do other RPCs
8785 # than WaitForSync to the primary node
8786 self._ReleaseNodeLock([self.instance.primary_node,
8791 # This can fail as the old devices are degraded and _WaitForSync
8792 # does a combined result over all disks, so we don't check its return value
8793 self.lu.LogStep(cstep, steps_total, "Sync devices")
8795 _WaitForSync(self.lu, self.instance)
8797 # Check all devices manually
8798 self._CheckDevices(self.instance.primary_node, iv_names)
8800 # Step: remove old storage
8801 if not self.early_release:
8802 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8803 self._RemoveOldStorage(self.target_node, iv_names)
8806 class LURepairNodeStorage(NoHooksLU):
8807 """Repairs the volume group on a node.
8812 def CheckArguments(self):
8813 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8815 storage_type = self.op.storage_type
8817 if (constants.SO_FIX_CONSISTENCY not in
8818 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8819 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8820 " repaired" % storage_type,
8823 def ExpandNames(self):
8824 self.needed_locks = {
8825 locking.LEVEL_NODE: [self.op.node_name],
8828 def _CheckFaultyDisks(self, instance, node_name):
8829 """Ensure faulty disks abort the opcode or at least warn."""
8831 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8833 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8834 " node '%s'" % (instance.name, node_name),
8836 except errors.OpPrereqError, err:
8837 if self.op.ignore_consistency:
8838 self.proc.LogWarning(str(err.args[0]))
8842 def CheckPrereq(self):
8843 """Check prerequisites.
8846 # Check whether any instance on this node has faulty disks
8847 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8848 if not inst.admin_up:
8850 check_nodes = set(inst.all_nodes)
8851 check_nodes.discard(self.op.node_name)
8852 for inst_node_name in check_nodes:
8853 self._CheckFaultyDisks(inst, inst_node_name)
8855 def Exec(self, feedback_fn):
8856 feedback_fn("Repairing storage unit '%s' on %s ..." %
8857 (self.op.name, self.op.node_name))
8859 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8860 result = self.rpc.call_storage_execute(self.op.node_name,
8861 self.op.storage_type, st_args,
8863 constants.SO_FIX_CONSISTENCY)
8864 result.Raise("Failed to repair storage unit '%s' on %s" %
8865 (self.op.name, self.op.node_name))
8868 class LUNodeEvacStrategy(NoHooksLU):
8869 """Computes the node evacuation strategy.
8874 def CheckArguments(self):
8875 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8877 def ExpandNames(self):
8878 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8879 self.needed_locks = locks = {}
8880 if self.op.remote_node is None:
8881 locks[locking.LEVEL_NODE] = locking.ALL_SET
8883 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8884 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8886 def Exec(self, feedback_fn):
8888 for node in self.op.nodes:
8889 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8893 if self.op.remote_node is not None:
8896 if i.primary_node == self.op.remote_node:
8897 raise errors.OpPrereqError("Node %s is the primary node of"
8898 " instance %s, cannot use it as"
8900 (self.op.remote_node, i.name),
8902 result.append([i.name, self.op.remote_node])
8904 ial = IAllocator(self.cfg, self.rpc,
8905 mode=constants.IALLOCATOR_MODE_MEVAC,
8906 evac_nodes=self.op.nodes)
8907 ial.Run(self.op.iallocator, validate=True)
8909 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8915 class LUInstanceGrowDisk(LogicalUnit):
8916 """Grow a disk of an instance.
8920 HTYPE = constants.HTYPE_INSTANCE
8923 def ExpandNames(self):
8924 self._ExpandAndLockInstance()
8925 self.needed_locks[locking.LEVEL_NODE] = []
8926 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8928 def DeclareLocks(self, level):
8929 if level == locking.LEVEL_NODE:
8930 self._LockInstancesNodes()
8932 def BuildHooksEnv(self):
8935 This runs on the master, the primary and all the secondaries.
8939 "DISK": self.op.disk,
8940 "AMOUNT": self.op.amount,
8942 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8943 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8946 def CheckPrereq(self):
8947 """Check prerequisites.
8949 This checks that the instance is in the cluster.
8952 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8953 assert instance is not None, \
8954 "Cannot retrieve locked instance %s" % self.op.instance_name
8955 nodenames = list(instance.all_nodes)
8956 for node in nodenames:
8957 _CheckNodeOnline(self, node)
8959 self.instance = instance
8961 if instance.disk_template not in constants.DTS_GROWABLE:
8962 raise errors.OpPrereqError("Instance's disk layout does not support"
8963 " growing.", errors.ECODE_INVAL)
8965 self.disk = instance.FindDisk(self.op.disk)
8967 if instance.disk_template != constants.DT_FILE:
8968 # TODO: check the free disk space for file, when that feature
8970 _CheckNodesFreeDiskPerVG(self, nodenames,
8971 self.disk.ComputeGrowth(self.op.amount))
8973 def Exec(self, feedback_fn):
8974 """Execute disk grow.
8977 instance = self.instance
8980 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8982 raise errors.OpExecError("Cannot activate block device to grow")
8984 for node in instance.all_nodes:
8985 self.cfg.SetDiskID(disk, node)
8986 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8987 result.Raise("Grow request failed to node %s" % node)
8989 # TODO: Rewrite code to work properly
8990 # DRBD goes into sync mode for a short amount of time after executing the
8991 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8992 # calling "resize" in sync mode fails. Sleeping for a short amount of
8993 # time is a work-around.
8996 disk.RecordGrow(self.op.amount)
8997 self.cfg.Update(instance, feedback_fn)
8998 if self.op.wait_for_sync:
8999 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9001 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9002 " status.\nPlease check the instance.")
9003 if not instance.admin_up:
9004 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9005 elif not instance.admin_up:
9006 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9007 " not supposed to be running because no wait for"
9008 " sync mode was requested.")
9011 class LUInstanceQueryData(NoHooksLU):
9012 """Query runtime instance data.
9017 def ExpandNames(self):
9018 self.needed_locks = {}
9020 # Use locking if requested or when non-static information is wanted
9021 if not (self.op.static or self.op.use_locking):
9022 self.LogWarning("Non-static data requested, locks need to be acquired")
9023 self.op.use_locking = True
9025 if self.op.instances or not self.op.use_locking:
9026 # Expand instance names right here
9027 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9029 # Will use acquired locks
9030 self.wanted_names = None
9032 if self.op.use_locking:
9033 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9035 if self.wanted_names is None:
9036 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9038 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9040 self.needed_locks[locking.LEVEL_NODE] = []
9041 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9044 def DeclareLocks(self, level):
9045 if self.op.use_locking and level == locking.LEVEL_NODE:
9046 self._LockInstancesNodes()
9048 def CheckPrereq(self):
9049 """Check prerequisites.
9051 This only checks the optional instance list against the existing names.
9054 if self.wanted_names is None:
9055 assert self.op.use_locking, "Locking was not used"
9056 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9058 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9059 for name in self.wanted_names]
9061 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9062 """Returns the status of a block device
9065 if self.op.static or not node:
9068 self.cfg.SetDiskID(dev, node)
9070 result = self.rpc.call_blockdev_find(node, dev)
9074 result.Raise("Can't compute disk status for %s" % instance_name)
9076 status = result.payload
9080 return (status.dev_path, status.major, status.minor,
9081 status.sync_percent, status.estimated_time,
9082 status.is_degraded, status.ldisk_status)
9084 def _ComputeDiskStatus(self, instance, snode, dev):
9085 """Compute block device status.
9088 if dev.dev_type in constants.LDS_DRBD:
9089 # we change the snode then (otherwise we use the one passed in)
9090 if dev.logical_id[0] == instance.primary_node:
9091 snode = dev.logical_id[1]
9093 snode = dev.logical_id[0]
9095 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9097 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9100 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9101 for child in dev.children]
9106 "iv_name": dev.iv_name,
9107 "dev_type": dev.dev_type,
9108 "logical_id": dev.logical_id,
9109 "physical_id": dev.physical_id,
9110 "pstatus": dev_pstatus,
9111 "sstatus": dev_sstatus,
9112 "children": dev_children,
9117 def Exec(self, feedback_fn):
9118 """Gather and return data"""
9121 cluster = self.cfg.GetClusterInfo()
9123 for instance in self.wanted_instances:
9124 if not self.op.static:
9125 remote_info = self.rpc.call_instance_info(instance.primary_node,
9127 instance.hypervisor)
9128 remote_info.Raise("Error checking node %s" % instance.primary_node)
9129 remote_info = remote_info.payload
9130 if remote_info and "state" in remote_info:
9133 remote_state = "down"
9136 if instance.admin_up:
9139 config_state = "down"
9141 disks = [self._ComputeDiskStatus(instance, None, device)
9142 for device in instance.disks]
9144 result[instance.name] = {
9145 "name": instance.name,
9146 "config_state": config_state,
9147 "run_state": remote_state,
9148 "pnode": instance.primary_node,
9149 "snodes": instance.secondary_nodes,
9151 # this happens to be the same format used for hooks
9152 "nics": _NICListToTuple(self, instance.nics),
9153 "disk_template": instance.disk_template,
9155 "hypervisor": instance.hypervisor,
9156 "network_port": instance.network_port,
9157 "hv_instance": instance.hvparams,
9158 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9159 "be_instance": instance.beparams,
9160 "be_actual": cluster.FillBE(instance),
9161 "os_instance": instance.osparams,
9162 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9163 "serial_no": instance.serial_no,
9164 "mtime": instance.mtime,
9165 "ctime": instance.ctime,
9166 "uuid": instance.uuid,
9172 class LUInstanceSetParams(LogicalUnit):
9173 """Modifies an instances's parameters.
9176 HPATH = "instance-modify"
9177 HTYPE = constants.HTYPE_INSTANCE
9180 def CheckArguments(self):
9181 if not (self.op.nics or self.op.disks or self.op.disk_template or
9182 self.op.hvparams or self.op.beparams or self.op.os_name):
9183 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9185 if self.op.hvparams:
9186 _CheckGlobalHvParams(self.op.hvparams)
9190 for disk_op, disk_dict in self.op.disks:
9191 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9192 if disk_op == constants.DDM_REMOVE:
9195 elif disk_op == constants.DDM_ADD:
9198 if not isinstance(disk_op, int):
9199 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9200 if not isinstance(disk_dict, dict):
9201 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9202 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9204 if disk_op == constants.DDM_ADD:
9205 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9206 if mode not in constants.DISK_ACCESS_SET:
9207 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9209 size = disk_dict.get('size', None)
9211 raise errors.OpPrereqError("Required disk parameter size missing",
9215 except (TypeError, ValueError), err:
9216 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9217 str(err), errors.ECODE_INVAL)
9218 disk_dict['size'] = size
9220 # modification of disk
9221 if 'size' in disk_dict:
9222 raise errors.OpPrereqError("Disk size change not possible, use"
9223 " grow-disk", errors.ECODE_INVAL)
9225 if disk_addremove > 1:
9226 raise errors.OpPrereqError("Only one disk add or remove operation"
9227 " supported at a time", errors.ECODE_INVAL)
9229 if self.op.disks and self.op.disk_template is not None:
9230 raise errors.OpPrereqError("Disk template conversion and other disk"
9231 " changes not supported at the same time",
9234 if (self.op.disk_template and
9235 self.op.disk_template in constants.DTS_NET_MIRROR and
9236 self.op.remote_node is None):
9237 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9238 " one requires specifying a secondary node",
9243 for nic_op, nic_dict in self.op.nics:
9244 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9245 if nic_op == constants.DDM_REMOVE:
9248 elif nic_op == constants.DDM_ADD:
9251 if not isinstance(nic_op, int):
9252 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9253 if not isinstance(nic_dict, dict):
9254 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9255 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9257 # nic_dict should be a dict
9258 nic_ip = nic_dict.get('ip', None)
9259 if nic_ip is not None:
9260 if nic_ip.lower() == constants.VALUE_NONE:
9261 nic_dict['ip'] = None
9263 if not netutils.IPAddress.IsValid(nic_ip):
9264 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9267 nic_bridge = nic_dict.get('bridge', None)
9268 nic_link = nic_dict.get('link', None)
9269 if nic_bridge and nic_link:
9270 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9271 " at the same time", errors.ECODE_INVAL)
9272 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9273 nic_dict['bridge'] = None
9274 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9275 nic_dict['link'] = None
9277 if nic_op == constants.DDM_ADD:
9278 nic_mac = nic_dict.get('mac', None)
9280 nic_dict['mac'] = constants.VALUE_AUTO
9282 if 'mac' in nic_dict:
9283 nic_mac = nic_dict['mac']
9284 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9285 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9287 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9288 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9289 " modifying an existing nic",
9292 if nic_addremove > 1:
9293 raise errors.OpPrereqError("Only one NIC add or remove operation"
9294 " supported at a time", errors.ECODE_INVAL)
9296 def ExpandNames(self):
9297 self._ExpandAndLockInstance()
9298 self.needed_locks[locking.LEVEL_NODE] = []
9299 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9301 def DeclareLocks(self, level):
9302 if level == locking.LEVEL_NODE:
9303 self._LockInstancesNodes()
9304 if self.op.disk_template and self.op.remote_node:
9305 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9306 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9308 def BuildHooksEnv(self):
9311 This runs on the master, primary and secondaries.
9315 if constants.BE_MEMORY in self.be_new:
9316 args['memory'] = self.be_new[constants.BE_MEMORY]
9317 if constants.BE_VCPUS in self.be_new:
9318 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9319 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9320 # information at all.
9323 nic_override = dict(self.op.nics)
9324 for idx, nic in enumerate(self.instance.nics):
9325 if idx in nic_override:
9326 this_nic_override = nic_override[idx]
9328 this_nic_override = {}
9329 if 'ip' in this_nic_override:
9330 ip = this_nic_override['ip']
9333 if 'mac' in this_nic_override:
9334 mac = this_nic_override['mac']
9337 if idx in self.nic_pnew:
9338 nicparams = self.nic_pnew[idx]
9340 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9341 mode = nicparams[constants.NIC_MODE]
9342 link = nicparams[constants.NIC_LINK]
9343 args['nics'].append((ip, mac, mode, link))
9344 if constants.DDM_ADD in nic_override:
9345 ip = nic_override[constants.DDM_ADD].get('ip', None)
9346 mac = nic_override[constants.DDM_ADD]['mac']
9347 nicparams = self.nic_pnew[constants.DDM_ADD]
9348 mode = nicparams[constants.NIC_MODE]
9349 link = nicparams[constants.NIC_LINK]
9350 args['nics'].append((ip, mac, mode, link))
9351 elif constants.DDM_REMOVE in nic_override:
9352 del args['nics'][-1]
9354 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9355 if self.op.disk_template:
9356 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9357 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9360 def CheckPrereq(self):
9361 """Check prerequisites.
9363 This only checks the instance list against the existing names.
9366 # checking the new params on the primary/secondary nodes
9368 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9369 cluster = self.cluster = self.cfg.GetClusterInfo()
9370 assert self.instance is not None, \
9371 "Cannot retrieve locked instance %s" % self.op.instance_name
9372 pnode = instance.primary_node
9373 nodelist = list(instance.all_nodes)
9376 if self.op.os_name and not self.op.force:
9377 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9378 self.op.force_variant)
9379 instance_os = self.op.os_name
9381 instance_os = instance.os
9383 if self.op.disk_template:
9384 if instance.disk_template == self.op.disk_template:
9385 raise errors.OpPrereqError("Instance already has disk template %s" %
9386 instance.disk_template, errors.ECODE_INVAL)
9388 if (instance.disk_template,
9389 self.op.disk_template) not in self._DISK_CONVERSIONS:
9390 raise errors.OpPrereqError("Unsupported disk template conversion from"
9391 " %s to %s" % (instance.disk_template,
9392 self.op.disk_template),
9394 _CheckInstanceDown(self, instance, "cannot change disk template")
9395 if self.op.disk_template in constants.DTS_NET_MIRROR:
9396 if self.op.remote_node == pnode:
9397 raise errors.OpPrereqError("Given new secondary node %s is the same"
9398 " as the primary node of the instance" %
9399 self.op.remote_node, errors.ECODE_STATE)
9400 _CheckNodeOnline(self, self.op.remote_node)
9401 _CheckNodeNotDrained(self, self.op.remote_node)
9402 # FIXME: here we assume that the old instance type is DT_PLAIN
9403 assert instance.disk_template == constants.DT_PLAIN
9404 disks = [{"size": d.size, "vg": d.logical_id[0]}
9405 for d in instance.disks]
9406 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9407 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9409 # hvparams processing
9410 if self.op.hvparams:
9411 hv_type = instance.hypervisor
9412 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9413 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9414 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9417 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9418 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9419 self.hv_new = hv_new # the new actual values
9420 self.hv_inst = i_hvdict # the new dict (without defaults)
9422 self.hv_new = self.hv_inst = {}
9424 # beparams processing
9425 if self.op.beparams:
9426 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9428 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9429 be_new = cluster.SimpleFillBE(i_bedict)
9430 self.be_new = be_new # the new actual values
9431 self.be_inst = i_bedict # the new dict (without defaults)
9433 self.be_new = self.be_inst = {}
9434 be_old = cluster.FillBE(instance)
9436 # osparams processing
9437 if self.op.osparams:
9438 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9439 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9440 self.os_inst = i_osdict # the new dict (without defaults)
9446 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
9447 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
9448 mem_check_list = [pnode]
9449 if be_new[constants.BE_AUTO_BALANCE]:
9450 # either we changed auto_balance to yes or it was from before
9451 mem_check_list.extend(instance.secondary_nodes)
9452 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9453 instance.hypervisor)
9454 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9455 instance.hypervisor)
9456 pninfo = nodeinfo[pnode]
9457 msg = pninfo.fail_msg
9459 # Assume the primary node is unreachable and go ahead
9460 self.warn.append("Can't get info from primary node %s: %s" %
9462 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9463 self.warn.append("Node data from primary node %s doesn't contain"
9464 " free memory information" % pnode)
9465 elif instance_info.fail_msg:
9466 self.warn.append("Can't get instance runtime information: %s" %
9467 instance_info.fail_msg)
9469 if instance_info.payload:
9470 current_mem = int(instance_info.payload['memory'])
9472 # Assume instance not running
9473 # (there is a slight race condition here, but it's not very probable,
9474 # and we have no other way to check)
9476 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9477 pninfo.payload['memory_free'])
9479 raise errors.OpPrereqError("This change will prevent the instance"
9480 " from starting, due to %d MB of memory"
9481 " missing on its primary node" % miss_mem,
9484 if be_new[constants.BE_AUTO_BALANCE]:
9485 for node, nres in nodeinfo.items():
9486 if node not in instance.secondary_nodes:
9488 nres.Raise("Can't get info from secondary node %s" % node,
9489 prereq=True, ecode=errors.ECODE_STATE)
9490 if not isinstance(nres.payload.get('memory_free', None), int):
9491 raise errors.OpPrereqError("Secondary node %s didn't return free"
9492 " memory information" % node,
9494 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9495 raise errors.OpPrereqError("This change will prevent the instance"
9496 " from failover to its secondary node"
9497 " %s, due to not enough memory" % node,
9503 for nic_op, nic_dict in self.op.nics:
9504 if nic_op == constants.DDM_REMOVE:
9505 if not instance.nics:
9506 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9509 if nic_op != constants.DDM_ADD:
9511 if not instance.nics:
9512 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9513 " no NICs" % nic_op,
9515 if nic_op < 0 or nic_op >= len(instance.nics):
9516 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9518 (nic_op, len(instance.nics) - 1),
9520 old_nic_params = instance.nics[nic_op].nicparams
9521 old_nic_ip = instance.nics[nic_op].ip
9526 update_params_dict = dict([(key, nic_dict[key])
9527 for key in constants.NICS_PARAMETERS
9528 if key in nic_dict])
9530 if 'bridge' in nic_dict:
9531 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9533 new_nic_params = _GetUpdatedParams(old_nic_params,
9535 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9536 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9537 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9538 self.nic_pinst[nic_op] = new_nic_params
9539 self.nic_pnew[nic_op] = new_filled_nic_params
9540 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9542 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9543 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9544 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9546 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9548 self.warn.append(msg)
9550 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9551 if new_nic_mode == constants.NIC_MODE_ROUTED:
9552 if 'ip' in nic_dict:
9553 nic_ip = nic_dict['ip']
9557 raise errors.OpPrereqError('Cannot set the nic ip to None'
9558 ' on a routed nic', errors.ECODE_INVAL)
9559 if 'mac' in nic_dict:
9560 nic_mac = nic_dict['mac']
9562 raise errors.OpPrereqError('Cannot set the nic mac to None',
9564 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9565 # otherwise generate the mac
9566 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9568 # or validate/reserve the current one
9570 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9571 except errors.ReservationError:
9572 raise errors.OpPrereqError("MAC address %s already in use"
9573 " in cluster" % nic_mac,
9574 errors.ECODE_NOTUNIQUE)
9577 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9578 raise errors.OpPrereqError("Disk operations not supported for"
9579 " diskless instances",
9581 for disk_op, _ in self.op.disks:
9582 if disk_op == constants.DDM_REMOVE:
9583 if len(instance.disks) == 1:
9584 raise errors.OpPrereqError("Cannot remove the last disk of"
9585 " an instance", errors.ECODE_INVAL)
9586 _CheckInstanceDown(self, instance, "cannot remove disks")
9588 if (disk_op == constants.DDM_ADD and
9589 len(instance.disks) >= constants.MAX_DISKS):
9590 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9591 " add more" % constants.MAX_DISKS,
9593 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9595 if disk_op < 0 or disk_op >= len(instance.disks):
9596 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9598 (disk_op, len(instance.disks)),
9603 def _ConvertPlainToDrbd(self, feedback_fn):
9604 """Converts an instance from plain to drbd.
9607 feedback_fn("Converting template to drbd")
9608 instance = self.instance
9609 pnode = instance.primary_node
9610 snode = self.op.remote_node
9612 # create a fake disk info for _GenerateDiskTemplate
9613 disk_info = [{"size": d.size, "mode": d.mode,
9614 "vg": d.logical_id[0]} for d in instance.disks]
9615 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9616 instance.name, pnode, [snode],
9617 disk_info, None, None, 0, feedback_fn)
9618 info = _GetInstanceInfoText(instance)
9619 feedback_fn("Creating aditional volumes...")
9620 # first, create the missing data and meta devices
9621 for disk in new_disks:
9622 # unfortunately this is... not too nice
9623 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9625 for child in disk.children:
9626 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9627 # at this stage, all new LVs have been created, we can rename the
9629 feedback_fn("Renaming original volumes...")
9630 rename_list = [(o, n.children[0].logical_id)
9631 for (o, n) in zip(instance.disks, new_disks)]
9632 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9633 result.Raise("Failed to rename original LVs")
9635 feedback_fn("Initializing DRBD devices...")
9636 # all child devices are in place, we can now create the DRBD devices
9637 for disk in new_disks:
9638 for node in [pnode, snode]:
9639 f_create = node == pnode
9640 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9642 # at this point, the instance has been modified
9643 instance.disk_template = constants.DT_DRBD8
9644 instance.disks = new_disks
9645 self.cfg.Update(instance, feedback_fn)
9647 # disks are created, waiting for sync
9648 disk_abort = not _WaitForSync(self, instance,
9649 oneshot=not self.op.wait_for_sync)
9651 raise errors.OpExecError("There are some degraded disks for"
9652 " this instance, please cleanup manually")
9654 def _ConvertDrbdToPlain(self, feedback_fn):
9655 """Converts an instance from drbd to plain.
9658 instance = self.instance
9659 assert len(instance.secondary_nodes) == 1
9660 pnode = instance.primary_node
9661 snode = instance.secondary_nodes[0]
9662 feedback_fn("Converting template to plain")
9664 old_disks = instance.disks
9665 new_disks = [d.children[0] for d in old_disks]
9667 # copy over size and mode
9668 for parent, child in zip(old_disks, new_disks):
9669 child.size = parent.size
9670 child.mode = parent.mode
9672 # this is a DRBD disk, return its port to the pool
9673 # NOTE: this must be done right before the call to cfg.Update!
9674 for disk in old_disks:
9675 tcp_port = disk.logical_id[2]
9676 self.cfg.AddTcpUdpPort(tcp_port)
9678 # update instance structure
9679 instance.disks = new_disks
9680 instance.disk_template = constants.DT_PLAIN
9681 self.cfg.Update(instance, feedback_fn)
9683 feedback_fn("Removing volumes on the secondary node...")
9684 for disk in old_disks:
9685 self.cfg.SetDiskID(disk, snode)
9686 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9688 self.LogWarning("Could not remove block device %s on node %s,"
9689 " continuing anyway: %s", disk.iv_name, snode, msg)
9691 feedback_fn("Removing unneeded volumes on the primary node...")
9692 for idx, disk in enumerate(old_disks):
9693 meta = disk.children[1]
9694 self.cfg.SetDiskID(meta, pnode)
9695 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9697 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9698 " continuing anyway: %s", idx, pnode, msg)
9700 def Exec(self, feedback_fn):
9701 """Modifies an instance.
9703 All parameters take effect only at the next restart of the instance.
9706 # Process here the warnings from CheckPrereq, as we don't have a
9707 # feedback_fn there.
9708 for warn in self.warn:
9709 feedback_fn("WARNING: %s" % warn)
9712 instance = self.instance
9714 for disk_op, disk_dict in self.op.disks:
9715 if disk_op == constants.DDM_REMOVE:
9716 # remove the last disk
9717 device = instance.disks.pop()
9718 device_idx = len(instance.disks)
9719 for node, disk in device.ComputeNodeTree(instance.primary_node):
9720 self.cfg.SetDiskID(disk, node)
9721 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9723 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9724 " continuing anyway", device_idx, node, msg)
9725 result.append(("disk/%d" % device_idx, "remove"))
9727 # if this is a DRBD disk, return its port to the pool
9728 if device.dev_type in constants.LDS_DRBD:
9729 tcp_port = device.logical_id[2]
9730 self.cfg.AddTcpUdpPort(tcp_port)
9731 elif disk_op == constants.DDM_ADD:
9733 if instance.disk_template == constants.DT_FILE:
9734 file_driver, file_path = instance.disks[0].logical_id
9735 file_path = os.path.dirname(file_path)
9737 file_driver = file_path = None
9738 disk_idx_base = len(instance.disks)
9739 new_disk = _GenerateDiskTemplate(self,
9740 instance.disk_template,
9741 instance.name, instance.primary_node,
9742 instance.secondary_nodes,
9746 disk_idx_base, feedback_fn)[0]
9747 instance.disks.append(new_disk)
9748 info = _GetInstanceInfoText(instance)
9750 logging.info("Creating volume %s for instance %s",
9751 new_disk.iv_name, instance.name)
9752 # Note: this needs to be kept in sync with _CreateDisks
9754 for node in instance.all_nodes:
9755 f_create = node == instance.primary_node
9757 _CreateBlockDev(self, node, instance, new_disk,
9758 f_create, info, f_create)
9759 except errors.OpExecError, err:
9760 self.LogWarning("Failed to create volume %s (%s) on"
9762 new_disk.iv_name, new_disk, node, err)
9763 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9764 (new_disk.size, new_disk.mode)))
9766 # change a given disk
9767 instance.disks[disk_op].mode = disk_dict['mode']
9768 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9770 if self.op.disk_template:
9771 r_shut = _ShutdownInstanceDisks(self, instance)
9773 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9774 " proceed with disk template conversion")
9775 mode = (instance.disk_template, self.op.disk_template)
9777 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9779 self.cfg.ReleaseDRBDMinors(instance.name)
9781 result.append(("disk_template", self.op.disk_template))
9784 for nic_op, nic_dict in self.op.nics:
9785 if nic_op == constants.DDM_REMOVE:
9786 # remove the last nic
9787 del instance.nics[-1]
9788 result.append(("nic.%d" % len(instance.nics), "remove"))
9789 elif nic_op == constants.DDM_ADD:
9790 # mac and bridge should be set, by now
9791 mac = nic_dict['mac']
9792 ip = nic_dict.get('ip', None)
9793 nicparams = self.nic_pinst[constants.DDM_ADD]
9794 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9795 instance.nics.append(new_nic)
9796 result.append(("nic.%d" % (len(instance.nics) - 1),
9797 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9798 (new_nic.mac, new_nic.ip,
9799 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9800 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9803 for key in 'mac', 'ip':
9805 setattr(instance.nics[nic_op], key, nic_dict[key])
9806 if nic_op in self.nic_pinst:
9807 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9808 for key, val in nic_dict.iteritems():
9809 result.append(("nic.%s/%d" % (key, nic_op), val))
9812 if self.op.hvparams:
9813 instance.hvparams = self.hv_inst
9814 for key, val in self.op.hvparams.iteritems():
9815 result.append(("hv/%s" % key, val))
9818 if self.op.beparams:
9819 instance.beparams = self.be_inst
9820 for key, val in self.op.beparams.iteritems():
9821 result.append(("be/%s" % key, val))
9825 instance.os = self.op.os_name
9828 if self.op.osparams:
9829 instance.osparams = self.os_inst
9830 for key, val in self.op.osparams.iteritems():
9831 result.append(("os/%s" % key, val))
9833 self.cfg.Update(instance, feedback_fn)
9837 _DISK_CONVERSIONS = {
9838 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9839 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9843 class LUBackupQuery(NoHooksLU):
9844 """Query the exports list
9849 def ExpandNames(self):
9850 self.needed_locks = {}
9851 self.share_locks[locking.LEVEL_NODE] = 1
9852 if not self.op.nodes:
9853 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9855 self.needed_locks[locking.LEVEL_NODE] = \
9856 _GetWantedNodes(self, self.op.nodes)
9858 def Exec(self, feedback_fn):
9859 """Compute the list of all the exported system images.
9862 @return: a dictionary with the structure node->(export-list)
9863 where export-list is a list of the instances exported on
9867 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9868 rpcresult = self.rpc.call_export_list(self.nodes)
9870 for node in rpcresult:
9871 if rpcresult[node].fail_msg:
9872 result[node] = False
9874 result[node] = rpcresult[node].payload
9879 class LUBackupPrepare(NoHooksLU):
9880 """Prepares an instance for an export and returns useful information.
9885 def ExpandNames(self):
9886 self._ExpandAndLockInstance()
9888 def CheckPrereq(self):
9889 """Check prerequisites.
9892 instance_name = self.op.instance_name
9894 self.instance = self.cfg.GetInstanceInfo(instance_name)
9895 assert self.instance is not None, \
9896 "Cannot retrieve locked instance %s" % self.op.instance_name
9897 _CheckNodeOnline(self, self.instance.primary_node)
9899 self._cds = _GetClusterDomainSecret()
9901 def Exec(self, feedback_fn):
9902 """Prepares an instance for an export.
9905 instance = self.instance
9907 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9908 salt = utils.GenerateSecret(8)
9910 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9911 result = self.rpc.call_x509_cert_create(instance.primary_node,
9912 constants.RIE_CERT_VALIDITY)
9913 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9915 (name, cert_pem) = result.payload
9917 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9921 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9922 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9924 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9930 class LUBackupExport(LogicalUnit):
9931 """Export an instance to an image in the cluster.
9934 HPATH = "instance-export"
9935 HTYPE = constants.HTYPE_INSTANCE
9938 def CheckArguments(self):
9939 """Check the arguments.
9942 self.x509_key_name = self.op.x509_key_name
9943 self.dest_x509_ca_pem = self.op.destination_x509_ca
9945 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9946 if not self.x509_key_name:
9947 raise errors.OpPrereqError("Missing X509 key name for encryption",
9950 if not self.dest_x509_ca_pem:
9951 raise errors.OpPrereqError("Missing destination X509 CA",
9954 def ExpandNames(self):
9955 self._ExpandAndLockInstance()
9957 # Lock all nodes for local exports
9958 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9959 # FIXME: lock only instance primary and destination node
9961 # Sad but true, for now we have do lock all nodes, as we don't know where
9962 # the previous export might be, and in this LU we search for it and
9963 # remove it from its current node. In the future we could fix this by:
9964 # - making a tasklet to search (share-lock all), then create the
9965 # new one, then one to remove, after
9966 # - removing the removal operation altogether
9967 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9969 def DeclareLocks(self, level):
9970 """Last minute lock declaration."""
9971 # All nodes are locked anyway, so nothing to do here.
9973 def BuildHooksEnv(self):
9976 This will run on the master, primary node and target node.
9980 "EXPORT_MODE": self.op.mode,
9981 "EXPORT_NODE": self.op.target_node,
9982 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9983 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9984 # TODO: Generic function for boolean env variables
9985 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9988 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9990 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9992 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9993 nl.append(self.op.target_node)
9997 def CheckPrereq(self):
9998 """Check prerequisites.
10000 This checks that the instance and node names are valid.
10003 instance_name = self.op.instance_name
10005 self.instance = self.cfg.GetInstanceInfo(instance_name)
10006 assert self.instance is not None, \
10007 "Cannot retrieve locked instance %s" % self.op.instance_name
10008 _CheckNodeOnline(self, self.instance.primary_node)
10010 if (self.op.remove_instance and self.instance.admin_up and
10011 not self.op.shutdown):
10012 raise errors.OpPrereqError("Can not remove instance without shutting it"
10015 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10016 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10017 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10018 assert self.dst_node is not None
10020 _CheckNodeOnline(self, self.dst_node.name)
10021 _CheckNodeNotDrained(self, self.dst_node.name)
10024 self.dest_disk_info = None
10025 self.dest_x509_ca = None
10027 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10028 self.dst_node = None
10030 if len(self.op.target_node) != len(self.instance.disks):
10031 raise errors.OpPrereqError(("Received destination information for %s"
10032 " disks, but instance %s has %s disks") %
10033 (len(self.op.target_node), instance_name,
10034 len(self.instance.disks)),
10035 errors.ECODE_INVAL)
10037 cds = _GetClusterDomainSecret()
10039 # Check X509 key name
10041 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10042 except (TypeError, ValueError), err:
10043 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10045 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10046 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10047 errors.ECODE_INVAL)
10049 # Load and verify CA
10051 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10052 except OpenSSL.crypto.Error, err:
10053 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10054 (err, ), errors.ECODE_INVAL)
10056 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10057 if errcode is not None:
10058 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10059 (msg, ), errors.ECODE_INVAL)
10061 self.dest_x509_ca = cert
10063 # Verify target information
10065 for idx, disk_data in enumerate(self.op.target_node):
10067 (host, port, magic) = \
10068 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10069 except errors.GenericError, err:
10070 raise errors.OpPrereqError("Target info for disk %s: %s" %
10071 (idx, err), errors.ECODE_INVAL)
10073 disk_info.append((host, port, magic))
10075 assert len(disk_info) == len(self.op.target_node)
10076 self.dest_disk_info = disk_info
10079 raise errors.ProgrammerError("Unhandled export mode %r" %
10082 # instance disk type verification
10083 # TODO: Implement export support for file-based disks
10084 for disk in self.instance.disks:
10085 if disk.dev_type == constants.LD_FILE:
10086 raise errors.OpPrereqError("Export not supported for instances with"
10087 " file-based disks", errors.ECODE_INVAL)
10089 def _CleanupExports(self, feedback_fn):
10090 """Removes exports of current instance from all other nodes.
10092 If an instance in a cluster with nodes A..D was exported to node C, its
10093 exports will be removed from the nodes A, B and D.
10096 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10098 nodelist = self.cfg.GetNodeList()
10099 nodelist.remove(self.dst_node.name)
10101 # on one-node clusters nodelist will be empty after the removal
10102 # if we proceed the backup would be removed because OpBackupQuery
10103 # substitutes an empty list with the full cluster node list.
10104 iname = self.instance.name
10106 feedback_fn("Removing old exports for instance %s" % iname)
10107 exportlist = self.rpc.call_export_list(nodelist)
10108 for node in exportlist:
10109 if exportlist[node].fail_msg:
10111 if iname in exportlist[node].payload:
10112 msg = self.rpc.call_export_remove(node, iname).fail_msg
10114 self.LogWarning("Could not remove older export for instance %s"
10115 " on node %s: %s", iname, node, msg)
10117 def Exec(self, feedback_fn):
10118 """Export an instance to an image in the cluster.
10121 assert self.op.mode in constants.EXPORT_MODES
10123 instance = self.instance
10124 src_node = instance.primary_node
10126 if self.op.shutdown:
10127 # shutdown the instance, but not the disks
10128 feedback_fn("Shutting down instance %s" % instance.name)
10129 result = self.rpc.call_instance_shutdown(src_node, instance,
10130 self.op.shutdown_timeout)
10131 # TODO: Maybe ignore failures if ignore_remove_failures is set
10132 result.Raise("Could not shutdown instance %s on"
10133 " node %s" % (instance.name, src_node))
10135 # set the disks ID correctly since call_instance_start needs the
10136 # correct drbd minor to create the symlinks
10137 for disk in instance.disks:
10138 self.cfg.SetDiskID(disk, src_node)
10140 activate_disks = (not instance.admin_up)
10143 # Activate the instance disks if we'exporting a stopped instance
10144 feedback_fn("Activating disks for %s" % instance.name)
10145 _StartInstanceDisks(self, instance, None)
10148 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10151 helper.CreateSnapshots()
10153 if (self.op.shutdown and instance.admin_up and
10154 not self.op.remove_instance):
10155 assert not activate_disks
10156 feedback_fn("Starting instance %s" % instance.name)
10157 result = self.rpc.call_instance_start(src_node, instance, None, None)
10158 msg = result.fail_msg
10160 feedback_fn("Failed to start instance: %s" % msg)
10161 _ShutdownInstanceDisks(self, instance)
10162 raise errors.OpExecError("Could not start instance: %s" % msg)
10164 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10165 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10166 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10167 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10168 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10170 (key_name, _, _) = self.x509_key_name
10173 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10176 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10177 key_name, dest_ca_pem,
10182 # Check for backwards compatibility
10183 assert len(dresults) == len(instance.disks)
10184 assert compat.all(isinstance(i, bool) for i in dresults), \
10185 "Not all results are boolean: %r" % dresults
10189 feedback_fn("Deactivating disks for %s" % instance.name)
10190 _ShutdownInstanceDisks(self, instance)
10192 if not (compat.all(dresults) and fin_resu):
10195 failures.append("export finalization")
10196 if not compat.all(dresults):
10197 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10199 failures.append("disk export: disk(s) %s" % fdsk)
10201 raise errors.OpExecError("Export failed, errors in %s" %
10202 utils.CommaJoin(failures))
10204 # At this point, the export was successful, we can cleanup/finish
10206 # Remove instance if requested
10207 if self.op.remove_instance:
10208 feedback_fn("Removing instance %s" % instance.name)
10209 _RemoveInstance(self, feedback_fn, instance,
10210 self.op.ignore_remove_failures)
10212 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10213 self._CleanupExports(feedback_fn)
10215 return fin_resu, dresults
10218 class LUBackupRemove(NoHooksLU):
10219 """Remove exports related to the named instance.
10224 def ExpandNames(self):
10225 self.needed_locks = {}
10226 # We need all nodes to be locked in order for RemoveExport to work, but we
10227 # don't need to lock the instance itself, as nothing will happen to it (and
10228 # we can remove exports also for a removed instance)
10229 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10231 def Exec(self, feedback_fn):
10232 """Remove any export.
10235 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10236 # If the instance was not found we'll try with the name that was passed in.
10237 # This will only work if it was an FQDN, though.
10239 if not instance_name:
10241 instance_name = self.op.instance_name
10243 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10244 exportlist = self.rpc.call_export_list(locked_nodes)
10246 for node in exportlist:
10247 msg = exportlist[node].fail_msg
10249 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10251 if instance_name in exportlist[node].payload:
10253 result = self.rpc.call_export_remove(node, instance_name)
10254 msg = result.fail_msg
10256 logging.error("Could not remove export for instance %s"
10257 " on node %s: %s", instance_name, node, msg)
10259 if fqdn_warn and not found:
10260 feedback_fn("Export not found. If trying to remove an export belonging"
10261 " to a deleted instance please use its Fully Qualified"
10265 class LUGroupAdd(LogicalUnit):
10266 """Logical unit for creating node groups.
10269 HPATH = "group-add"
10270 HTYPE = constants.HTYPE_GROUP
10273 def ExpandNames(self):
10274 # We need the new group's UUID here so that we can create and acquire the
10275 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10276 # that it should not check whether the UUID exists in the configuration.
10277 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10278 self.needed_locks = {}
10279 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10281 def CheckPrereq(self):
10282 """Check prerequisites.
10284 This checks that the given group name is not an existing node group
10289 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10290 except errors.OpPrereqError:
10293 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10294 " node group (UUID: %s)" %
10295 (self.op.group_name, existing_uuid),
10296 errors.ECODE_EXISTS)
10298 if self.op.ndparams:
10299 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10301 def BuildHooksEnv(self):
10302 """Build hooks env.
10306 "GROUP_NAME": self.op.group_name,
10308 mn = self.cfg.GetMasterNode()
10309 return env, [mn], [mn]
10311 def Exec(self, feedback_fn):
10312 """Add the node group to the cluster.
10315 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10316 uuid=self.group_uuid,
10317 alloc_policy=self.op.alloc_policy,
10318 ndparams=self.op.ndparams)
10320 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10321 del self.remove_locks[locking.LEVEL_NODEGROUP]
10324 class LUGroupAssignNodes(NoHooksLU):
10325 """Logical unit for assigning nodes to groups.
10330 def ExpandNames(self):
10331 # These raise errors.OpPrereqError on their own:
10332 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10333 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10335 # We want to lock all the affected nodes and groups. We have readily
10336 # available the list of nodes, and the *destination* group. To gather the
10337 # list of "source" groups, we need to fetch node information later on.
10338 self.needed_locks = {
10339 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
10340 locking.LEVEL_NODE: self.op.nodes,
10343 def DeclareLocks(self, level):
10344 if level == locking.LEVEL_NODEGROUP:
10345 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
10347 # Try to get all affected nodes' groups without having the group or node
10348 # lock yet. Needs verification later in the code flow.
10349 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
10351 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
10353 def CheckPrereq(self):
10354 """Check prerequisites.
10357 assert self.needed_locks[locking.LEVEL_NODEGROUP]
10358 assert (frozenset(self.acquired_locks[locking.LEVEL_NODE]) ==
10359 frozenset(self.op.nodes))
10361 expected_locks = (set([self.group_uuid]) |
10362 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
10363 actual_locks = self.acquired_locks[locking.LEVEL_NODEGROUP]
10364 if actual_locks != expected_locks:
10365 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
10366 " current groups are '%s', used to be '%s'" %
10367 (utils.CommaJoin(expected_locks),
10368 utils.CommaJoin(actual_locks)))
10370 self.node_data = self.cfg.GetAllNodesInfo()
10371 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10372 instance_data = self.cfg.GetAllInstancesInfo()
10374 if self.group is None:
10375 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10376 (self.op.group_name, self.group_uuid))
10378 (new_splits, previous_splits) = \
10379 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10380 for node in self.op.nodes],
10381 self.node_data, instance_data)
10384 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10386 if not self.op.force:
10387 raise errors.OpExecError("The following instances get split by this"
10388 " change and --force was not given: %s" %
10391 self.LogWarning("This operation will split the following instances: %s",
10394 if previous_splits:
10395 self.LogWarning("In addition, these already-split instances continue"
10396 " to be split across groups: %s",
10397 utils.CommaJoin(utils.NiceSort(previous_splits)))
10399 def Exec(self, feedback_fn):
10400 """Assign nodes to a new group.
10403 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
10405 self.cfg.AssignGroupNodes(mods)
10408 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10409 """Check for split instances after a node assignment.
10411 This method considers a series of node assignments as an atomic operation,
10412 and returns information about split instances after applying the set of
10415 In particular, it returns information about newly split instances, and
10416 instances that were already split, and remain so after the change.
10418 Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10421 @type changes: list of (node_name, new_group_uuid) pairs.
10422 @param changes: list of node assignments to consider.
10423 @param node_data: a dict with data for all nodes
10424 @param instance_data: a dict with all instances to consider
10425 @rtype: a two-tuple
10426 @return: a list of instances that were previously okay and result split as a
10427 consequence of this change, and a list of instances that were previously
10428 split and this change does not fix.
10431 changed_nodes = dict((node, group) for node, group in changes
10432 if node_data[node].group != group)
10434 all_split_instances = set()
10435 previously_split_instances = set()
10437 def InstanceNodes(instance):
10438 return [instance.primary_node] + list(instance.secondary_nodes)
10440 for inst in instance_data.values():
10441 if inst.disk_template not in constants.DTS_NET_MIRROR:
10444 instance_nodes = InstanceNodes(inst)
10446 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10447 previously_split_instances.add(inst.name)
10449 if len(set(changed_nodes.get(node, node_data[node].group)
10450 for node in instance_nodes)) > 1:
10451 all_split_instances.add(inst.name)
10453 return (list(all_split_instances - previously_split_instances),
10454 list(previously_split_instances & all_split_instances))
10457 class _GroupQuery(_QueryBase):
10459 FIELDS = query.GROUP_FIELDS
10461 def ExpandNames(self, lu):
10462 lu.needed_locks = {}
10464 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10465 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10468 self.wanted = [name_to_uuid[name]
10469 for name in utils.NiceSort(name_to_uuid.keys())]
10471 # Accept names to be either names or UUIDs.
10474 all_uuid = frozenset(self._all_groups.keys())
10476 for name in self.names:
10477 if name in all_uuid:
10478 self.wanted.append(name)
10479 elif name in name_to_uuid:
10480 self.wanted.append(name_to_uuid[name])
10482 missing.append(name)
10485 raise errors.OpPrereqError("Some groups do not exist: %s" %
10486 utils.CommaJoin(missing),
10487 errors.ECODE_NOENT)
10489 def DeclareLocks(self, lu, level):
10492 def _GetQueryData(self, lu):
10493 """Computes the list of node groups and their attributes.
10496 do_nodes = query.GQ_NODE in self.requested_data
10497 do_instances = query.GQ_INST in self.requested_data
10499 group_to_nodes = None
10500 group_to_instances = None
10502 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10503 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10504 # latter GetAllInstancesInfo() is not enough, for we have to go through
10505 # instance->node. Hence, we will need to process nodes even if we only need
10506 # instance information.
10507 if do_nodes or do_instances:
10508 all_nodes = lu.cfg.GetAllNodesInfo()
10509 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10512 for node in all_nodes.values():
10513 if node.group in group_to_nodes:
10514 group_to_nodes[node.group].append(node.name)
10515 node_to_group[node.name] = node.group
10518 all_instances = lu.cfg.GetAllInstancesInfo()
10519 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10521 for instance in all_instances.values():
10522 node = instance.primary_node
10523 if node in node_to_group:
10524 group_to_instances[node_to_group[node]].append(instance.name)
10527 # Do not pass on node information if it was not requested.
10528 group_to_nodes = None
10530 return query.GroupQueryData([self._all_groups[uuid]
10531 for uuid in self.wanted],
10532 group_to_nodes, group_to_instances)
10535 class LUGroupQuery(NoHooksLU):
10536 """Logical unit for querying node groups.
10541 def CheckArguments(self):
10542 self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10544 def ExpandNames(self):
10545 self.gq.ExpandNames(self)
10547 def Exec(self, feedback_fn):
10548 return self.gq.OldStyleQuery(self)
10551 class LUGroupSetParams(LogicalUnit):
10552 """Modifies the parameters of a node group.
10555 HPATH = "group-modify"
10556 HTYPE = constants.HTYPE_GROUP
10559 def CheckArguments(self):
10562 self.op.alloc_policy,
10565 if all_changes.count(None) == len(all_changes):
10566 raise errors.OpPrereqError("Please pass at least one modification",
10567 errors.ECODE_INVAL)
10569 def ExpandNames(self):
10570 # This raises errors.OpPrereqError on its own:
10571 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10573 self.needed_locks = {
10574 locking.LEVEL_NODEGROUP: [self.group_uuid],
10577 def CheckPrereq(self):
10578 """Check prerequisites.
10581 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10583 if self.group is None:
10584 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10585 (self.op.group_name, self.group_uuid))
10587 if self.op.ndparams:
10588 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10589 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10590 self.new_ndparams = new_ndparams
10592 def BuildHooksEnv(self):
10593 """Build hooks env.
10597 "GROUP_NAME": self.op.group_name,
10598 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10600 mn = self.cfg.GetMasterNode()
10601 return env, [mn], [mn]
10603 def Exec(self, feedback_fn):
10604 """Modifies the node group.
10609 if self.op.ndparams:
10610 self.group.ndparams = self.new_ndparams
10611 result.append(("ndparams", str(self.group.ndparams)))
10613 if self.op.alloc_policy:
10614 self.group.alloc_policy = self.op.alloc_policy
10616 self.cfg.Update(self.group, feedback_fn)
10621 class LUGroupRemove(LogicalUnit):
10622 HPATH = "group-remove"
10623 HTYPE = constants.HTYPE_GROUP
10626 def ExpandNames(self):
10627 # This will raises errors.OpPrereqError on its own:
10628 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10629 self.needed_locks = {
10630 locking.LEVEL_NODEGROUP: [self.group_uuid],
10633 def CheckPrereq(self):
10634 """Check prerequisites.
10636 This checks that the given group name exists as a node group, that is
10637 empty (i.e., contains no nodes), and that is not the last group of the
10641 # Verify that the group is empty.
10642 group_nodes = [node.name
10643 for node in self.cfg.GetAllNodesInfo().values()
10644 if node.group == self.group_uuid]
10647 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10649 (self.op.group_name,
10650 utils.CommaJoin(utils.NiceSort(group_nodes))),
10651 errors.ECODE_STATE)
10653 # Verify the cluster would not be left group-less.
10654 if len(self.cfg.GetNodeGroupList()) == 1:
10655 raise errors.OpPrereqError("Group '%s' is the only group,"
10656 " cannot be removed" %
10657 self.op.group_name,
10658 errors.ECODE_STATE)
10660 def BuildHooksEnv(self):
10661 """Build hooks env.
10665 "GROUP_NAME": self.op.group_name,
10667 mn = self.cfg.GetMasterNode()
10668 return env, [mn], [mn]
10670 def Exec(self, feedback_fn):
10671 """Remove the node group.
10675 self.cfg.RemoveNodeGroup(self.group_uuid)
10676 except errors.ConfigurationError:
10677 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10678 (self.op.group_name, self.group_uuid))
10680 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10683 class LUGroupRename(LogicalUnit):
10684 HPATH = "group-rename"
10685 HTYPE = constants.HTYPE_GROUP
10688 def ExpandNames(self):
10689 # This raises errors.OpPrereqError on its own:
10690 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10692 self.needed_locks = {
10693 locking.LEVEL_NODEGROUP: [self.group_uuid],
10696 def CheckPrereq(self):
10697 """Check prerequisites.
10699 This checks that the given old_name exists as a node group, and that
10704 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10705 except errors.OpPrereqError:
10708 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10709 " node group (UUID: %s)" %
10710 (self.op.new_name, new_name_uuid),
10711 errors.ECODE_EXISTS)
10713 def BuildHooksEnv(self):
10714 """Build hooks env.
10718 "OLD_NAME": self.op.old_name,
10719 "NEW_NAME": self.op.new_name,
10722 mn = self.cfg.GetMasterNode()
10723 all_nodes = self.cfg.GetAllNodesInfo()
10725 all_nodes.pop(mn, None)
10727 for node in all_nodes.values():
10728 if node.group == self.group_uuid:
10729 run_nodes.append(node.name)
10731 return env, run_nodes, run_nodes
10733 def Exec(self, feedback_fn):
10734 """Rename the node group.
10737 group = self.cfg.GetNodeGroup(self.group_uuid)
10740 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10741 (self.op.old_name, self.group_uuid))
10743 group.name = self.op.new_name
10744 self.cfg.Update(group, feedback_fn)
10746 return self.op.new_name
10749 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10750 """Generic tags LU.
10752 This is an abstract class which is the parent of all the other tags LUs.
10756 def ExpandNames(self):
10757 self.needed_locks = {}
10758 if self.op.kind == constants.TAG_NODE:
10759 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10760 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10761 elif self.op.kind == constants.TAG_INSTANCE:
10762 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10763 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10765 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10766 # not possible to acquire the BGL based on opcode parameters)
10768 def CheckPrereq(self):
10769 """Check prerequisites.
10772 if self.op.kind == constants.TAG_CLUSTER:
10773 self.target = self.cfg.GetClusterInfo()
10774 elif self.op.kind == constants.TAG_NODE:
10775 self.target = self.cfg.GetNodeInfo(self.op.name)
10776 elif self.op.kind == constants.TAG_INSTANCE:
10777 self.target = self.cfg.GetInstanceInfo(self.op.name)
10779 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10780 str(self.op.kind), errors.ECODE_INVAL)
10783 class LUTagsGet(TagsLU):
10784 """Returns the tags of a given object.
10789 def ExpandNames(self):
10790 TagsLU.ExpandNames(self)
10792 # Share locks as this is only a read operation
10793 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10795 def Exec(self, feedback_fn):
10796 """Returns the tag list.
10799 return list(self.target.GetTags())
10802 class LUTagsSearch(NoHooksLU):
10803 """Searches the tags for a given pattern.
10808 def ExpandNames(self):
10809 self.needed_locks = {}
10811 def CheckPrereq(self):
10812 """Check prerequisites.
10814 This checks the pattern passed for validity by compiling it.
10818 self.re = re.compile(self.op.pattern)
10819 except re.error, err:
10820 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10821 (self.op.pattern, err), errors.ECODE_INVAL)
10823 def Exec(self, feedback_fn):
10824 """Returns the tag list.
10828 tgts = [("/cluster", cfg.GetClusterInfo())]
10829 ilist = cfg.GetAllInstancesInfo().values()
10830 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10831 nlist = cfg.GetAllNodesInfo().values()
10832 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10834 for path, target in tgts:
10835 for tag in target.GetTags():
10836 if self.re.search(tag):
10837 results.append((path, tag))
10841 class LUTagsSet(TagsLU):
10842 """Sets a tag on a given object.
10847 def CheckPrereq(self):
10848 """Check prerequisites.
10850 This checks the type and length of the tag name and value.
10853 TagsLU.CheckPrereq(self)
10854 for tag in self.op.tags:
10855 objects.TaggableObject.ValidateTag(tag)
10857 def Exec(self, feedback_fn):
10862 for tag in self.op.tags:
10863 self.target.AddTag(tag)
10864 except errors.TagError, err:
10865 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10866 self.cfg.Update(self.target, feedback_fn)
10869 class LUTagsDel(TagsLU):
10870 """Delete a list of tags from a given object.
10875 def CheckPrereq(self):
10876 """Check prerequisites.
10878 This checks that we have the given tag.
10881 TagsLU.CheckPrereq(self)
10882 for tag in self.op.tags:
10883 objects.TaggableObject.ValidateTag(tag)
10884 del_tags = frozenset(self.op.tags)
10885 cur_tags = self.target.GetTags()
10887 diff_tags = del_tags - cur_tags
10889 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10890 raise errors.OpPrereqError("Tag(s) %s not found" %
10891 (utils.CommaJoin(diff_names), ),
10892 errors.ECODE_NOENT)
10894 def Exec(self, feedback_fn):
10895 """Remove the tag from the object.
10898 for tag in self.op.tags:
10899 self.target.RemoveTag(tag)
10900 self.cfg.Update(self.target, feedback_fn)
10903 class LUTestDelay(NoHooksLU):
10904 """Sleep for a specified amount of time.
10906 This LU sleeps on the master and/or nodes for a specified amount of
10912 def ExpandNames(self):
10913 """Expand names and set required locks.
10915 This expands the node list, if any.
10918 self.needed_locks = {}
10919 if self.op.on_nodes:
10920 # _GetWantedNodes can be used here, but is not always appropriate to use
10921 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10922 # more information.
10923 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10924 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10926 def _TestDelay(self):
10927 """Do the actual sleep.
10930 if self.op.on_master:
10931 if not utils.TestDelay(self.op.duration):
10932 raise errors.OpExecError("Error during master delay test")
10933 if self.op.on_nodes:
10934 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10935 for node, node_result in result.items():
10936 node_result.Raise("Failure during rpc call to node %s" % node)
10938 def Exec(self, feedback_fn):
10939 """Execute the test delay opcode, with the wanted repetitions.
10942 if self.op.repeat == 0:
10945 top_value = self.op.repeat - 1
10946 for i in range(self.op.repeat):
10947 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10951 class LUTestJqueue(NoHooksLU):
10952 """Utility LU to test some aspects of the job queue.
10957 # Must be lower than default timeout for WaitForJobChange to see whether it
10958 # notices changed jobs
10959 _CLIENT_CONNECT_TIMEOUT = 20.0
10960 _CLIENT_CONFIRM_TIMEOUT = 60.0
10963 def _NotifyUsingSocket(cls, cb, errcls):
10964 """Opens a Unix socket and waits for another program to connect.
10967 @param cb: Callback to send socket name to client
10968 @type errcls: class
10969 @param errcls: Exception class to use for errors
10972 # Using a temporary directory as there's no easy way to create temporary
10973 # sockets without writing a custom loop around tempfile.mktemp and
10975 tmpdir = tempfile.mkdtemp()
10977 tmpsock = utils.PathJoin(tmpdir, "sock")
10979 logging.debug("Creating temporary socket at %s", tmpsock)
10980 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10985 # Send details to client
10988 # Wait for client to connect before continuing
10989 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10991 (conn, _) = sock.accept()
10992 except socket.error, err:
10993 raise errcls("Client didn't connect in time (%s)" % err)
10997 # Remove as soon as client is connected
10998 shutil.rmtree(tmpdir)
11000 # Wait for client to close
11003 # pylint: disable-msg=E1101
11004 # Instance of '_socketobject' has no ... member
11005 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11007 except socket.error, err:
11008 raise errcls("Client failed to confirm notification (%s)" % err)
11012 def _SendNotification(self, test, arg, sockname):
11013 """Sends a notification to the client.
11016 @param test: Test name
11017 @param arg: Test argument (depends on test)
11018 @type sockname: string
11019 @param sockname: Socket path
11022 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11024 def _Notify(self, prereq, test, arg):
11025 """Notifies the client of a test.
11028 @param prereq: Whether this is a prereq-phase test
11030 @param test: Test name
11031 @param arg: Test argument (depends on test)
11035 errcls = errors.OpPrereqError
11037 errcls = errors.OpExecError
11039 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11043 def CheckArguments(self):
11044 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11045 self.expandnames_calls = 0
11047 def ExpandNames(self):
11048 checkargs_calls = getattr(self, "checkargs_calls", 0)
11049 if checkargs_calls < 1:
11050 raise errors.ProgrammerError("CheckArguments was not called")
11052 self.expandnames_calls += 1
11054 if self.op.notify_waitlock:
11055 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11057 self.LogInfo("Expanding names")
11059 # Get lock on master node (just to get a lock, not for a particular reason)
11060 self.needed_locks = {
11061 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11064 def Exec(self, feedback_fn):
11065 if self.expandnames_calls < 1:
11066 raise errors.ProgrammerError("ExpandNames was not called")
11068 if self.op.notify_exec:
11069 self._Notify(False, constants.JQT_EXEC, None)
11071 self.LogInfo("Executing")
11073 if self.op.log_messages:
11074 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11075 for idx, msg in enumerate(self.op.log_messages):
11076 self.LogInfo("Sending log message %s", idx + 1)
11077 feedback_fn(constants.JQT_MSGPREFIX + msg)
11078 # Report how many test messages have been sent
11079 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11082 raise errors.OpExecError("Opcode failure was requested")
11087 class IAllocator(object):
11088 """IAllocator framework.
11090 An IAllocator instance has three sets of attributes:
11091 - cfg that is needed to query the cluster
11092 - input data (all members of the _KEYS class attribute are required)
11093 - four buffer attributes (in|out_data|text), that represent the
11094 input (to the external script) in text and data structure format,
11095 and the output from it, again in two formats
11096 - the result variables from the script (success, info, nodes) for
11100 # pylint: disable-msg=R0902
11101 # lots of instance attributes
11103 "name", "mem_size", "disks", "disk_template",
11104 "os", "tags", "nics", "vcpus", "hypervisor",
11107 "name", "relocate_from",
11113 def __init__(self, cfg, rpc, mode, **kwargs):
11116 # init buffer variables
11117 self.in_text = self.out_text = self.in_data = self.out_data = None
11118 # init all input fields so that pylint is happy
11120 self.mem_size = self.disks = self.disk_template = None
11121 self.os = self.tags = self.nics = self.vcpus = None
11122 self.hypervisor = None
11123 self.relocate_from = None
11125 self.evac_nodes = None
11127 self.required_nodes = None
11128 # init result fields
11129 self.success = self.info = self.result = None
11130 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11131 keyset = self._ALLO_KEYS
11132 fn = self._AddNewInstance
11133 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11134 keyset = self._RELO_KEYS
11135 fn = self._AddRelocateInstance
11136 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11137 keyset = self._EVAC_KEYS
11138 fn = self._AddEvacuateNodes
11140 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11141 " IAllocator" % self.mode)
11143 if key not in keyset:
11144 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11145 " IAllocator" % key)
11146 setattr(self, key, kwargs[key])
11149 if key not in kwargs:
11150 raise errors.ProgrammerError("Missing input parameter '%s' to"
11151 " IAllocator" % key)
11152 self._BuildInputData(fn)
11154 def _ComputeClusterData(self):
11155 """Compute the generic allocator input data.
11157 This is the data that is independent of the actual operation.
11161 cluster_info = cfg.GetClusterInfo()
11164 "version": constants.IALLOCATOR_VERSION,
11165 "cluster_name": cfg.GetClusterName(),
11166 "cluster_tags": list(cluster_info.GetTags()),
11167 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11168 # we don't have job IDs
11170 ninfo = cfg.GetAllNodesInfo()
11171 iinfo = cfg.GetAllInstancesInfo().values()
11172 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11175 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11177 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11178 hypervisor_name = self.hypervisor
11179 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11180 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11181 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11182 hypervisor_name = cluster_info.enabled_hypervisors[0]
11184 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11187 self.rpc.call_all_instances_info(node_list,
11188 cluster_info.enabled_hypervisors)
11190 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11192 config_ndata = self._ComputeBasicNodeData(ninfo)
11193 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11194 i_list, config_ndata)
11195 assert len(data["nodes"]) == len(ninfo), \
11196 "Incomplete node data computed"
11198 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11200 self.in_data = data
11203 def _ComputeNodeGroupData(cfg):
11204 """Compute node groups data.
11208 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11210 "name": gdata.name,
11211 "alloc_policy": gdata.alloc_policy,
11216 def _ComputeBasicNodeData(node_cfg):
11217 """Compute global node data.
11220 @returns: a dict of name: (node dict, node config)
11224 for ninfo in node_cfg.values():
11225 # fill in static (config-based) values
11227 "tags": list(ninfo.GetTags()),
11228 "primary_ip": ninfo.primary_ip,
11229 "secondary_ip": ninfo.secondary_ip,
11230 "offline": ninfo.offline,
11231 "drained": ninfo.drained,
11232 "master_candidate": ninfo.master_candidate,
11233 "group": ninfo.group,
11234 "master_capable": ninfo.master_capable,
11235 "vm_capable": ninfo.vm_capable,
11238 node_results[ninfo.name] = pnr
11240 return node_results
11243 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11245 """Compute global node data.
11247 @param node_results: the basic node structures as filled from the config
11250 # make a copy of the current dict
11251 node_results = dict(node_results)
11252 for nname, nresult in node_data.items():
11253 assert nname in node_results, "Missing basic data for node %s" % nname
11254 ninfo = node_cfg[nname]
11256 if not (ninfo.offline or ninfo.drained):
11257 nresult.Raise("Can't get data for node %s" % nname)
11258 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11260 remote_info = nresult.payload
11262 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11263 'vg_size', 'vg_free', 'cpu_total']:
11264 if attr not in remote_info:
11265 raise errors.OpExecError("Node '%s' didn't return attribute"
11266 " '%s'" % (nname, attr))
11267 if not isinstance(remote_info[attr], int):
11268 raise errors.OpExecError("Node '%s' returned invalid value"
11270 (nname, attr, remote_info[attr]))
11271 # compute memory used by primary instances
11272 i_p_mem = i_p_up_mem = 0
11273 for iinfo, beinfo in i_list:
11274 if iinfo.primary_node == nname:
11275 i_p_mem += beinfo[constants.BE_MEMORY]
11276 if iinfo.name not in node_iinfo[nname].payload:
11279 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11280 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11281 remote_info['memory_free'] -= max(0, i_mem_diff)
11284 i_p_up_mem += beinfo[constants.BE_MEMORY]
11286 # compute memory used by instances
11288 "total_memory": remote_info['memory_total'],
11289 "reserved_memory": remote_info['memory_dom0'],
11290 "free_memory": remote_info['memory_free'],
11291 "total_disk": remote_info['vg_size'],
11292 "free_disk": remote_info['vg_free'],
11293 "total_cpus": remote_info['cpu_total'],
11294 "i_pri_memory": i_p_mem,
11295 "i_pri_up_memory": i_p_up_mem,
11297 pnr_dyn.update(node_results[nname])
11298 node_results[nname] = pnr_dyn
11300 return node_results
11303 def _ComputeInstanceData(cluster_info, i_list):
11304 """Compute global instance data.
11308 for iinfo, beinfo in i_list:
11310 for nic in iinfo.nics:
11311 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11312 nic_dict = {"mac": nic.mac,
11314 "mode": filled_params[constants.NIC_MODE],
11315 "link": filled_params[constants.NIC_LINK],
11317 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11318 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11319 nic_data.append(nic_dict)
11321 "tags": list(iinfo.GetTags()),
11322 "admin_up": iinfo.admin_up,
11323 "vcpus": beinfo[constants.BE_VCPUS],
11324 "memory": beinfo[constants.BE_MEMORY],
11326 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11328 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11329 "disk_template": iinfo.disk_template,
11330 "hypervisor": iinfo.hypervisor,
11332 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11334 instance_data[iinfo.name] = pir
11336 return instance_data
11338 def _AddNewInstance(self):
11339 """Add new instance data to allocator structure.
11341 This in combination with _AllocatorGetClusterData will create the
11342 correct structure needed as input for the allocator.
11344 The checks for the completeness of the opcode must have already been
11348 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11350 if self.disk_template in constants.DTS_NET_MIRROR:
11351 self.required_nodes = 2
11353 self.required_nodes = 1
11356 "disk_template": self.disk_template,
11359 "vcpus": self.vcpus,
11360 "memory": self.mem_size,
11361 "disks": self.disks,
11362 "disk_space_total": disk_space,
11364 "required_nodes": self.required_nodes,
11368 def _AddRelocateInstance(self):
11369 """Add relocate instance data to allocator structure.
11371 This in combination with _IAllocatorGetClusterData will create the
11372 correct structure needed as input for the allocator.
11374 The checks for the completeness of the opcode must have already been
11378 instance = self.cfg.GetInstanceInfo(self.name)
11379 if instance is None:
11380 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11381 " IAllocator" % self.name)
11383 if instance.disk_template not in constants.DTS_NET_MIRROR:
11384 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11385 errors.ECODE_INVAL)
11387 if len(instance.secondary_nodes) != 1:
11388 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11389 errors.ECODE_STATE)
11391 self.required_nodes = 1
11392 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11393 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11397 "disk_space_total": disk_space,
11398 "required_nodes": self.required_nodes,
11399 "relocate_from": self.relocate_from,
11403 def _AddEvacuateNodes(self):
11404 """Add evacuate nodes data to allocator structure.
11408 "evac_nodes": self.evac_nodes
11412 def _BuildInputData(self, fn):
11413 """Build input data structures.
11416 self._ComputeClusterData()
11419 request["type"] = self.mode
11420 self.in_data["request"] = request
11422 self.in_text = serializer.Dump(self.in_data)
11424 def Run(self, name, validate=True, call_fn=None):
11425 """Run an instance allocator and return the results.
11428 if call_fn is None:
11429 call_fn = self.rpc.call_iallocator_runner
11431 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11432 result.Raise("Failure while running the iallocator script")
11434 self.out_text = result.payload
11436 self._ValidateResult()
11438 def _ValidateResult(self):
11439 """Process the allocator results.
11441 This will process and if successful save the result in
11442 self.out_data and the other parameters.
11446 rdict = serializer.Load(self.out_text)
11447 except Exception, err:
11448 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11450 if not isinstance(rdict, dict):
11451 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11453 # TODO: remove backwards compatiblity in later versions
11454 if "nodes" in rdict and "result" not in rdict:
11455 rdict["result"] = rdict["nodes"]
11458 for key in "success", "info", "result":
11459 if key not in rdict:
11460 raise errors.OpExecError("Can't parse iallocator results:"
11461 " missing key '%s'" % key)
11462 setattr(self, key, rdict[key])
11464 if not isinstance(rdict["result"], list):
11465 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11467 self.out_data = rdict
11470 class LUTestAllocator(NoHooksLU):
11471 """Run allocator tests.
11473 This LU runs the allocator tests
11476 def CheckPrereq(self):
11477 """Check prerequisites.
11479 This checks the opcode parameters depending on the director and mode test.
11482 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11483 for attr in ["mem_size", "disks", "disk_template",
11484 "os", "tags", "nics", "vcpus"]:
11485 if not hasattr(self.op, attr):
11486 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11487 attr, errors.ECODE_INVAL)
11488 iname = self.cfg.ExpandInstanceName(self.op.name)
11489 if iname is not None:
11490 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11491 iname, errors.ECODE_EXISTS)
11492 if not isinstance(self.op.nics, list):
11493 raise errors.OpPrereqError("Invalid parameter 'nics'",
11494 errors.ECODE_INVAL)
11495 if not isinstance(self.op.disks, list):
11496 raise errors.OpPrereqError("Invalid parameter 'disks'",
11497 errors.ECODE_INVAL)
11498 for row in self.op.disks:
11499 if (not isinstance(row, dict) or
11500 "size" not in row or
11501 not isinstance(row["size"], int) or
11502 "mode" not in row or
11503 row["mode"] not in ['r', 'w']):
11504 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11505 " parameter", errors.ECODE_INVAL)
11506 if self.op.hypervisor is None:
11507 self.op.hypervisor = self.cfg.GetHypervisorType()
11508 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11509 fname = _ExpandInstanceName(self.cfg, self.op.name)
11510 self.op.name = fname
11511 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11512 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11513 if not hasattr(self.op, "evac_nodes"):
11514 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11515 " opcode input", errors.ECODE_INVAL)
11517 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11518 self.op.mode, errors.ECODE_INVAL)
11520 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11521 if self.op.allocator is None:
11522 raise errors.OpPrereqError("Missing allocator name",
11523 errors.ECODE_INVAL)
11524 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11525 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11526 self.op.direction, errors.ECODE_INVAL)
11528 def Exec(self, feedback_fn):
11529 """Run the allocator test.
11532 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11533 ial = IAllocator(self.cfg, self.rpc,
11536 mem_size=self.op.mem_size,
11537 disks=self.op.disks,
11538 disk_template=self.op.disk_template,
11542 vcpus=self.op.vcpus,
11543 hypervisor=self.op.hypervisor,
11545 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11546 ial = IAllocator(self.cfg, self.rpc,
11549 relocate_from=list(self.relocate_from),
11551 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11552 ial = IAllocator(self.cfg, self.rpc,
11554 evac_nodes=self.op.evac_nodes)
11556 raise errors.ProgrammerError("Uncatched mode %s in"
11557 " LUTestAllocator.Exec", self.op.mode)
11559 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11560 result = ial.in_text
11562 ial.Run(self.op.allocator, validate=False)
11563 result = ial.out_text
11567 #: Query type implementations
11569 constants.QR_INSTANCE: _InstanceQuery,
11570 constants.QR_NODE: _NodeQuery,
11571 constants.QR_GROUP: _GroupQuery,
11575 def _GetQueryImplementation(name):
11576 """Returns the implemtnation for a query type.
11578 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11582 return _QUERY_IMPL[name]
11584 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11585 errors.ECODE_INVAL)