4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 """Data container for LU results with jobs.
80 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82 contained in the C{jobs} attribute and include the job IDs in the opcode
86 def __init__(self, jobs, **kwargs):
87 """Initializes this class.
89 Additional return values can be specified as keyword arguments.
91 @type jobs: list of lists of L{opcode.OpCode}
92 @param jobs: A list of lists of opcode objects
99 class LogicalUnit(object):
100 """Logical Unit base class.
102 Subclasses must follow these rules:
103 - implement ExpandNames
104 - implement CheckPrereq (except when tasklets are used)
105 - implement Exec (except when tasklets are used)
106 - implement BuildHooksEnv
107 - implement BuildHooksNodes
108 - redefine HPATH and HTYPE
109 - optionally redefine their run requirements:
110 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112 Note that all commands require root permissions.
114 @ivar dry_run_result: the value (if any) that will be returned to the caller
115 in dry-run mode (signalled by opcode dry_run parameter)
122 def __init__(self, processor, op, context, rpc):
123 """Constructor for LogicalUnit.
125 This needs to be overridden in derived classes in order to check op
129 self.proc = processor
131 self.cfg = context.cfg
132 self.context = context
134 # Dicts used to declare locking needs to mcpu
135 self.needed_locks = None
136 self.acquired_locks = {}
137 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139 self.remove_locks = {}
140 # Used to force good behavior when calling helper functions
141 self.recalculate_locks = {}
144 self.Log = processor.Log # pylint: disable-msg=C0103
145 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148 # support for dry-run
149 self.dry_run_result = None
150 # support for generic debug attribute
151 if (not hasattr(self.op, "debug_level") or
152 not isinstance(self.op.debug_level, int)):
153 self.op.debug_level = 0
158 # Validate opcode parameters and set defaults
159 self.op.Validate(True)
161 self.CheckArguments()
164 """Returns the SshRunner object
168 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
171 ssh = property(fget=__GetSSH)
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that level
205 - don't put anything for the BGL level
206 - if you want all locks at a level use locking.ALL_SET as a value
208 If you need to share locks (rather than acquire them exclusively) at one
209 level you can modify self.share_locks, setting a true value (usually 1) for
210 that level. By default locks are not shared.
212 This function can also define a list of tasklets, which then will be
213 executed in order instead of the usual LU-level CheckPrereq and Exec
214 functions, if those are not defined by the LU.
218 # Acquire all nodes and one instance
219 self.needed_locks = {
220 locking.LEVEL_NODE: locking.ALL_SET,
221 locking.LEVEL_INSTANCE: ['instance1.example.com'],
223 # Acquire just two nodes
224 self.needed_locks = {
225 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
228 self.needed_locks = {} # No, you can't leave it to the default value None
231 # The implementation of this method is mandatory only if the new LU is
232 # concurrent, so that old LUs don't need to be changed all at the same
235 self.needed_locks = {} # Exclusive LUs don't need locks.
237 raise NotImplementedError
239 def DeclareLocks(self, level):
240 """Declare LU locking needs for a level
242 While most LUs can just declare their locking needs at ExpandNames time,
243 sometimes there's the need to calculate some locks after having acquired
244 the ones before. This function is called just before acquiring locks at a
245 particular level, but after acquiring the ones at lower levels, and permits
246 such calculations. It can be used to modify self.needed_locks, and by
247 default it does nothing.
249 This function is only called if you have something already set in
250 self.needed_locks for the level.
252 @param level: Locking level which is going to be locked
253 @type level: member of ganeti.locking.LEVELS
257 def CheckPrereq(self):
258 """Check prerequisites for this LU.
260 This method should check that the prerequisites for the execution
261 of this LU are fulfilled. It can do internode communication, but
262 it should be idempotent - no cluster or system changes are
265 The method should raise errors.OpPrereqError in case something is
266 not fulfilled. Its return value is ignored.
268 This method should also update all the parameters of the opcode to
269 their canonical form if it hasn't been done by ExpandNames before.
272 if self.tasklets is not None:
273 for (idx, tl) in enumerate(self.tasklets):
274 logging.debug("Checking prerequisites for tasklet %s/%s",
275 idx + 1, len(self.tasklets))
280 def Exec(self, feedback_fn):
283 This method should implement the actual work. It should raise
284 errors.OpExecError for failures that are somewhat dealt with in
288 if self.tasklets is not None:
289 for (idx, tl) in enumerate(self.tasklets):
290 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
293 raise NotImplementedError
295 def BuildHooksEnv(self):
296 """Build hooks environment for this LU.
299 @return: Dictionary containing the environment that will be used for
300 running the hooks for this LU. The keys of the dict must not be prefixed
301 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302 will extend the environment with additional variables. If no environment
303 should be defined, an empty dictionary should be returned (not C{None}).
304 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308 raise NotImplementedError
310 def BuildHooksNodes(self):
311 """Build list of nodes to run LU's hooks.
313 @rtype: tuple; (list, list)
314 @return: Tuple containing a list of node names on which the hook
315 should run before the execution and a list of node names on which the
316 hook should run after the execution. No nodes should be returned as an
317 empty list (and not None).
318 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
322 raise NotImplementedError
324 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325 """Notify the LU about the results of its hooks.
327 This method is called every time a hooks phase is executed, and notifies
328 the Logical Unit about the hooks' result. The LU can then use it to alter
329 its result based on the hooks. By default the method does nothing and the
330 previous result is passed back unchanged but any LU can define it if it
331 wants to use the local cluster hook-scripts somehow.
333 @param phase: one of L{constants.HOOKS_PHASE_POST} or
334 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335 @param hook_results: the results of the multi-node hooks rpc call
336 @param feedback_fn: function used send feedback back to the caller
337 @param lu_result: the previous Exec result this LU had, or None
339 @return: the new Exec result, based on the previous result
343 # API must be kept, thus we ignore the unused argument and could
344 # be a function warnings
345 # pylint: disable-msg=W0613,R0201
348 def _ExpandAndLockInstance(self):
349 """Helper function to expand and lock an instance.
351 Many LUs that work on an instance take its name in self.op.instance_name
352 and need to expand it and then declare the expanded name for locking. This
353 function does it, and then updates self.op.instance_name to the expanded
354 name. It also initializes needed_locks as a dict, if this hasn't been done
358 if self.needed_locks is None:
359 self.needed_locks = {}
361 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362 "_ExpandAndLockInstance called with instance-level locks set"
363 self.op.instance_name = _ExpandInstanceName(self.cfg,
364 self.op.instance_name)
365 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367 def _LockInstancesNodes(self, primary_only=False):
368 """Helper function to declare instances' nodes for locking.
370 This function should be called after locking one or more instances to lock
371 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372 with all primary or secondary nodes for instances already locked and
373 present in self.needed_locks[locking.LEVEL_INSTANCE].
375 It should be called from DeclareLocks, and for safety only works if
376 self.recalculate_locks[locking.LEVEL_NODE] is set.
378 In the future it may grow parameters to just lock some instance's nodes, or
379 to just lock primaries or secondary nodes, if needed.
381 If should be called in DeclareLocks in a way similar to::
383 if level == locking.LEVEL_NODE:
384 self._LockInstancesNodes()
386 @type primary_only: boolean
387 @param primary_only: only lock primary nodes of locked instances
390 assert locking.LEVEL_NODE in self.recalculate_locks, \
391 "_LockInstancesNodes helper function called with no nodes to recalculate"
393 # TODO: check if we're really been called with the instance locks held
395 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396 # future we might want to have different behaviors depending on the value
397 # of self.recalculate_locks[locking.LEVEL_NODE]
399 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
400 instance = self.context.cfg.GetInstanceInfo(instance_name)
401 wanted_nodes.append(instance.primary_node)
403 wanted_nodes.extend(instance.secondary_nodes)
405 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
406 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
407 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
408 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
410 del self.recalculate_locks[locking.LEVEL_NODE]
413 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
414 """Simple LU which runs no hooks.
416 This LU is intended as a parent for other LogicalUnits which will
417 run no hooks, in order to reduce duplicate code.
423 def BuildHooksEnv(self):
424 """Empty BuildHooksEnv for NoHooksLu.
426 This just raises an error.
429 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
431 def BuildHooksNodes(self):
432 """Empty BuildHooksNodes for NoHooksLU.
435 raise AssertionError("BuildHooksNodes called for NoHooksLU")
439 """Tasklet base class.
441 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
442 they can mix legacy code with tasklets. Locking needs to be done in the LU,
443 tasklets know nothing about locks.
445 Subclasses must follow these rules:
446 - Implement CheckPrereq
450 def __init__(self, lu):
457 def CheckPrereq(self):
458 """Check prerequisites for this tasklets.
460 This method should check whether the prerequisites for the execution of
461 this tasklet are fulfilled. It can do internode communication, but it
462 should be idempotent - no cluster or system changes are allowed.
464 The method should raise errors.OpPrereqError in case something is not
465 fulfilled. Its return value is ignored.
467 This method should also update all parameters to their canonical form if it
468 hasn't been done before.
473 def Exec(self, feedback_fn):
474 """Execute the tasklet.
476 This method should implement the actual work. It should raise
477 errors.OpExecError for failures that are somewhat dealt with in code, or
481 raise NotImplementedError
485 """Base for query utility classes.
488 #: Attribute holding field definitions
491 def __init__(self, filter_, fields, use_locking):
492 """Initializes this class.
495 self.use_locking = use_locking
497 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
499 self.requested_data = self.query.RequestedData()
500 self.names = self.query.RequestedNames()
502 # Sort only if no names were requested
503 self.sort_by_name = not self.names
505 self.do_locking = None
508 def _GetNames(self, lu, all_names, lock_level):
509 """Helper function to determine names asked for in the query.
513 names = lu.acquired_locks[lock_level]
517 if self.wanted == locking.ALL_SET:
518 assert not self.names
519 # caller didn't specify names, so ordering is not important
520 return utils.NiceSort(names)
522 # caller specified names and we must keep the same order
524 assert not self.do_locking or lu.acquired_locks[lock_level]
526 missing = set(self.wanted).difference(names)
528 raise errors.OpExecError("Some items were removed before retrieving"
529 " their data: %s" % missing)
531 # Return expanded names
534 def ExpandNames(self, lu):
535 """Expand names for this query.
537 See L{LogicalUnit.ExpandNames}.
540 raise NotImplementedError()
542 def DeclareLocks(self, lu, level):
543 """Declare locks for this query.
545 See L{LogicalUnit.DeclareLocks}.
548 raise NotImplementedError()
550 def _GetQueryData(self, lu):
551 """Collects all data for this query.
553 @return: Query data object
556 raise NotImplementedError()
558 def NewStyleQuery(self, lu):
559 """Collect data and execute query.
562 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
563 sort_by_name=self.sort_by_name)
565 def OldStyleQuery(self, lu):
566 """Collect data and execute query.
569 return self.query.OldStyleQuery(self._GetQueryData(lu),
570 sort_by_name=self.sort_by_name)
573 def _GetWantedNodes(lu, nodes):
574 """Returns list of checked and expanded node names.
576 @type lu: L{LogicalUnit}
577 @param lu: the logical unit on whose behalf we execute
579 @param nodes: list of node names or None for all nodes
581 @return: the list of nodes, sorted
582 @raise errors.ProgrammerError: if the nodes parameter is wrong type
586 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
588 return utils.NiceSort(lu.cfg.GetNodeList())
591 def _GetWantedInstances(lu, instances):
592 """Returns list of checked and expanded instance names.
594 @type lu: L{LogicalUnit}
595 @param lu: the logical unit on whose behalf we execute
596 @type instances: list
597 @param instances: list of instance names or None for all instances
599 @return: the list of instances, sorted
600 @raise errors.OpPrereqError: if the instances parameter is wrong type
601 @raise errors.OpPrereqError: if any of the passed instances is not found
605 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
607 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
611 def _GetUpdatedParams(old_params, update_dict,
612 use_default=True, use_none=False):
613 """Return the new version of a parameter dictionary.
615 @type old_params: dict
616 @param old_params: old parameters
617 @type update_dict: dict
618 @param update_dict: dict containing new parameter values, or
619 constants.VALUE_DEFAULT to reset the parameter to its default
621 @param use_default: boolean
622 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
623 values as 'to be deleted' values
624 @param use_none: boolean
625 @type use_none: whether to recognise C{None} values as 'to be
628 @return: the new parameter dictionary
631 params_copy = copy.deepcopy(old_params)
632 for key, val in update_dict.iteritems():
633 if ((use_default and val == constants.VALUE_DEFAULT) or
634 (use_none and val is None)):
640 params_copy[key] = val
644 def _RunPostHook(lu, node_name):
645 """Runs the post-hook for an opcode on a single node.
648 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
650 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
652 # pylint: disable-msg=W0702
653 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
656 def _CheckOutputFields(static, dynamic, selected):
657 """Checks whether all selected fields are valid.
659 @type static: L{utils.FieldSet}
660 @param static: static fields set
661 @type dynamic: L{utils.FieldSet}
662 @param dynamic: dynamic fields set
669 delta = f.NonMatching(selected)
671 raise errors.OpPrereqError("Unknown output fields selected: %s"
672 % ",".join(delta), errors.ECODE_INVAL)
675 def _CheckGlobalHvParams(params):
676 """Validates that given hypervisor params are not global ones.
678 This will ensure that instances don't get customised versions of
682 used_globals = constants.HVC_GLOBALS.intersection(params)
684 msg = ("The following hypervisor parameters are global and cannot"
685 " be customized at instance level, please modify them at"
686 " cluster level: %s" % utils.CommaJoin(used_globals))
687 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
690 def _CheckNodeOnline(lu, node, msg=None):
691 """Ensure that a given node is online.
693 @param lu: the LU on behalf of which we make the check
694 @param node: the node to check
695 @param msg: if passed, should be a message to replace the default one
696 @raise errors.OpPrereqError: if the node is offline
700 msg = "Can't use offline node"
701 if lu.cfg.GetNodeInfo(node).offline:
702 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
705 def _CheckNodeNotDrained(lu, node):
706 """Ensure that a given node is not drained.
708 @param lu: the LU on behalf of which we make the check
709 @param node: the node to check
710 @raise errors.OpPrereqError: if the node is drained
713 if lu.cfg.GetNodeInfo(node).drained:
714 raise errors.OpPrereqError("Can't use drained node %s" % node,
718 def _CheckNodeVmCapable(lu, node):
719 """Ensure that a given node is vm capable.
721 @param lu: the LU on behalf of which we make the check
722 @param node: the node to check
723 @raise errors.OpPrereqError: if the node is not vm capable
726 if not lu.cfg.GetNodeInfo(node).vm_capable:
727 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
732 """Ensure that a node supports a given OS.
734 @param lu: the LU on behalf of which we make the check
735 @param node: the node to check
736 @param os_name: the OS to query about
737 @param force_variant: whether to ignore variant errors
738 @raise errors.OpPrereqError: if the node is not supporting the OS
741 result = lu.rpc.call_os_get(node, os_name)
742 result.Raise("OS '%s' not in supported OS list for node %s" %
744 prereq=True, ecode=errors.ECODE_INVAL)
745 if not force_variant:
746 _CheckOSVariant(result.payload, os_name)
749 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
750 """Ensure that a node has the given secondary ip.
752 @type lu: L{LogicalUnit}
753 @param lu: the LU on behalf of which we make the check
755 @param node: the node to check
756 @type secondary_ip: string
757 @param secondary_ip: the ip to check
758 @type prereq: boolean
759 @param prereq: whether to throw a prerequisite or an execute error
760 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
761 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
764 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
765 result.Raise("Failure checking secondary ip on node %s" % node,
766 prereq=prereq, ecode=errors.ECODE_ENVIRON)
767 if not result.payload:
768 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
769 " please fix and re-run this command" % secondary_ip)
771 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
773 raise errors.OpExecError(msg)
776 def _GetClusterDomainSecret():
777 """Reads the cluster domain secret.
780 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
784 def _CheckInstanceDown(lu, instance, reason):
785 """Ensure that an instance is not running."""
786 if instance.admin_up:
787 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
788 (instance.name, reason), errors.ECODE_STATE)
790 pnode = instance.primary_node
791 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
792 ins_l.Raise("Can't contact node %s for instance information" % pnode,
793 prereq=True, ecode=errors.ECODE_ENVIRON)
795 if instance.name in ins_l.payload:
796 raise errors.OpPrereqError("Instance %s is running, %s" %
797 (instance.name, reason), errors.ECODE_STATE)
800 def _ExpandItemName(fn, name, kind):
801 """Expand an item name.
803 @param fn: the function to use for expansion
804 @param name: requested item name
805 @param kind: text description ('Node' or 'Instance')
806 @return: the resolved (full) name
807 @raise errors.OpPrereqError: if the item is not found
811 if full_name is None:
812 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
817 def _ExpandNodeName(cfg, name):
818 """Wrapper over L{_ExpandItemName} for nodes."""
819 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
822 def _ExpandInstanceName(cfg, name):
823 """Wrapper over L{_ExpandItemName} for instance."""
824 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
827 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
828 memory, vcpus, nics, disk_template, disks,
829 bep, hvp, hypervisor_name):
830 """Builds instance related env variables for hooks
832 This builds the hook environment from individual variables.
835 @param name: the name of the instance
836 @type primary_node: string
837 @param primary_node: the name of the instance's primary node
838 @type secondary_nodes: list
839 @param secondary_nodes: list of secondary nodes as strings
840 @type os_type: string
841 @param os_type: the name of the instance's OS
842 @type status: boolean
843 @param status: the should_run status of the instance
845 @param memory: the memory size of the instance
847 @param vcpus: the count of VCPUs the instance has
849 @param nics: list of tuples (ip, mac, mode, link) representing
850 the NICs the instance has
851 @type disk_template: string
852 @param disk_template: the disk template of the instance
854 @param disks: the list of (size, mode) pairs
856 @param bep: the backend parameters for the instance
858 @param hvp: the hypervisor parameters for the instance
859 @type hypervisor_name: string
860 @param hypervisor_name: the hypervisor for the instance
862 @return: the hook environment for this instance
871 "INSTANCE_NAME": name,
872 "INSTANCE_PRIMARY": primary_node,
873 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
874 "INSTANCE_OS_TYPE": os_type,
875 "INSTANCE_STATUS": str_status,
876 "INSTANCE_MEMORY": memory,
877 "INSTANCE_VCPUS": vcpus,
878 "INSTANCE_DISK_TEMPLATE": disk_template,
879 "INSTANCE_HYPERVISOR": hypervisor_name,
883 nic_count = len(nics)
884 for idx, (ip, mac, mode, link) in enumerate(nics):
887 env["INSTANCE_NIC%d_IP" % idx] = ip
888 env["INSTANCE_NIC%d_MAC" % idx] = mac
889 env["INSTANCE_NIC%d_MODE" % idx] = mode
890 env["INSTANCE_NIC%d_LINK" % idx] = link
891 if mode == constants.NIC_MODE_BRIDGED:
892 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
896 env["INSTANCE_NIC_COUNT"] = nic_count
899 disk_count = len(disks)
900 for idx, (size, mode) in enumerate(disks):
901 env["INSTANCE_DISK%d_SIZE" % idx] = size
902 env["INSTANCE_DISK%d_MODE" % idx] = mode
906 env["INSTANCE_DISK_COUNT"] = disk_count
908 for source, kind in [(bep, "BE"), (hvp, "HV")]:
909 for key, value in source.items():
910 env["INSTANCE_%s_%s" % (kind, key)] = value
915 def _NICListToTuple(lu, nics):
916 """Build a list of nic information tuples.
918 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
919 value in LUInstanceQueryData.
921 @type lu: L{LogicalUnit}
922 @param lu: the logical unit on whose behalf we execute
923 @type nics: list of L{objects.NIC}
924 @param nics: list of nics to convert to hooks tuples
928 cluster = lu.cfg.GetClusterInfo()
932 filled_params = cluster.SimpleFillNIC(nic.nicparams)
933 mode = filled_params[constants.NIC_MODE]
934 link = filled_params[constants.NIC_LINK]
935 hooks_nics.append((ip, mac, mode, link))
939 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
940 """Builds instance related env variables for hooks from an object.
942 @type lu: L{LogicalUnit}
943 @param lu: the logical unit on whose behalf we execute
944 @type instance: L{objects.Instance}
945 @param instance: the instance for which we should build the
948 @param override: dictionary with key/values that will override
951 @return: the hook environment dictionary
954 cluster = lu.cfg.GetClusterInfo()
955 bep = cluster.FillBE(instance)
956 hvp = cluster.FillHV(instance)
958 'name': instance.name,
959 'primary_node': instance.primary_node,
960 'secondary_nodes': instance.secondary_nodes,
961 'os_type': instance.os,
962 'status': instance.admin_up,
963 'memory': bep[constants.BE_MEMORY],
964 'vcpus': bep[constants.BE_VCPUS],
965 'nics': _NICListToTuple(lu, instance.nics),
966 'disk_template': instance.disk_template,
967 'disks': [(disk.size, disk.mode) for disk in instance.disks],
970 'hypervisor_name': instance.hypervisor,
973 args.update(override)
974 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
977 def _AdjustCandidatePool(lu, exceptions):
978 """Adjust the candidate pool after node operations.
981 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
983 lu.LogInfo("Promoted nodes to master candidate role: %s",
984 utils.CommaJoin(node.name for node in mod_list))
985 for name in mod_list:
986 lu.context.ReaddNode(name)
987 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
989 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
993 def _DecideSelfPromotion(lu, exceptions=None):
994 """Decide whether I should promote myself as a master candidate.
997 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
998 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
999 # the new node will increase mc_max with one, so:
1000 mc_should = min(mc_should + 1, cp_size)
1001 return mc_now < mc_should
1004 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005 """Check that the brigdes needed by a list of nics exist.
1008 cluster = lu.cfg.GetClusterInfo()
1009 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010 brlist = [params[constants.NIC_LINK] for params in paramslist
1011 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1013 result = lu.rpc.call_bridges_exist(target_node, brlist)
1014 result.Raise("Error checking bridges on destination node '%s'" %
1015 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1018 def _CheckInstanceBridgesExist(lu, instance, node=None):
1019 """Check that the brigdes needed by an instance exist.
1023 node = instance.primary_node
1024 _CheckNicsBridgesExist(lu, instance.nics, node)
1027 def _CheckOSVariant(os_obj, name):
1028 """Check whether an OS name conforms to the os variants specification.
1030 @type os_obj: L{objects.OS}
1031 @param os_obj: OS object to check
1033 @param name: OS name passed by the user, to check for validity
1036 if not os_obj.supported_variants:
1038 variant = objects.OS.GetVariant(name)
1040 raise errors.OpPrereqError("OS name must include a variant",
1043 if variant not in os_obj.supported_variants:
1044 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1047 def _GetNodeInstancesInner(cfg, fn):
1048 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1051 def _GetNodeInstances(cfg, node_name):
1052 """Returns a list of all primary and secondary instances on a node.
1056 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1059 def _GetNodePrimaryInstances(cfg, node_name):
1060 """Returns primary instances on a node.
1063 return _GetNodeInstancesInner(cfg,
1064 lambda inst: node_name == inst.primary_node)
1067 def _GetNodeSecondaryInstances(cfg, node_name):
1068 """Returns secondary instances on a node.
1071 return _GetNodeInstancesInner(cfg,
1072 lambda inst: node_name in inst.secondary_nodes)
1075 def _GetStorageTypeArgs(cfg, storage_type):
1076 """Returns the arguments for a storage type.
1079 # Special case for file storage
1080 if storage_type == constants.ST_FILE:
1081 # storage.FileStorage wants a list of storage directories
1082 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1087 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1090 for dev in instance.disks:
1091 cfg.SetDiskID(dev, node_name)
1093 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094 result.Raise("Failed to get disk status from node %s" % node_name,
1095 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1097 for idx, bdev_status in enumerate(result.payload):
1098 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1104 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105 """Check the sanity of iallocator and node arguments and use the
1106 cluster-wide iallocator if appropriate.
1108 Check that at most one of (iallocator, node) is specified. If none is
1109 specified, then the LU's opcode's iallocator slot is filled with the
1110 cluster-wide default iallocator.
1112 @type iallocator_slot: string
1113 @param iallocator_slot: the name of the opcode iallocator slot
1114 @type node_slot: string
1115 @param node_slot: the name of the opcode target node slot
1118 node = getattr(lu.op, node_slot, None)
1119 iallocator = getattr(lu.op, iallocator_slot, None)
1121 if node is not None and iallocator is not None:
1122 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1124 elif node is None and iallocator is None:
1125 default_iallocator = lu.cfg.GetDefaultIAllocator()
1126 if default_iallocator:
1127 setattr(lu.op, iallocator_slot, default_iallocator)
1129 raise errors.OpPrereqError("No iallocator or node given and no"
1130 " cluster-wide default iallocator found."
1131 " Please specify either an iallocator or a"
1132 " node, or set a cluster-wide default"
1136 class LUClusterPostInit(LogicalUnit):
1137 """Logical unit for running hooks after cluster initialization.
1140 HPATH = "cluster-init"
1141 HTYPE = constants.HTYPE_CLUSTER
1143 def BuildHooksEnv(self):
1148 "OP_TARGET": self.cfg.GetClusterName(),
1151 def BuildHooksNodes(self):
1152 """Build hooks nodes.
1155 return ([], [self.cfg.GetMasterNode()])
1157 def Exec(self, feedback_fn):
1164 class LUClusterDestroy(LogicalUnit):
1165 """Logical unit for destroying the cluster.
1168 HPATH = "cluster-destroy"
1169 HTYPE = constants.HTYPE_CLUSTER
1171 def BuildHooksEnv(self):
1176 "OP_TARGET": self.cfg.GetClusterName(),
1179 def BuildHooksNodes(self):
1180 """Build hooks nodes.
1185 def CheckPrereq(self):
1186 """Check prerequisites.
1188 This checks whether the cluster is empty.
1190 Any errors are signaled by raising errors.OpPrereqError.
1193 master = self.cfg.GetMasterNode()
1195 nodelist = self.cfg.GetNodeList()
1196 if len(nodelist) != 1 or nodelist[0] != master:
1197 raise errors.OpPrereqError("There are still %d node(s) in"
1198 " this cluster." % (len(nodelist) - 1),
1200 instancelist = self.cfg.GetInstanceList()
1202 raise errors.OpPrereqError("There are still %d instance(s) in"
1203 " this cluster." % len(instancelist),
1206 def Exec(self, feedback_fn):
1207 """Destroys the cluster.
1210 master = self.cfg.GetMasterNode()
1212 # Run post hooks on master node before it's removed
1213 _RunPostHook(self, master)
1215 result = self.rpc.call_node_stop_master(master, False)
1216 result.Raise("Could not disable the master role")
1221 def _VerifyCertificate(filename):
1222 """Verifies a certificate for LUClusterVerify.
1224 @type filename: string
1225 @param filename: Path to PEM file
1229 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230 utils.ReadFile(filename))
1231 except Exception, err: # pylint: disable-msg=W0703
1232 return (LUClusterVerify.ETYPE_ERROR,
1233 "Failed to load X509 certificate %s: %s" % (filename, err))
1236 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237 constants.SSL_CERT_EXPIRATION_ERROR)
1240 fnamemsg = "While verifying %s: %s" % (filename, msg)
1245 return (None, fnamemsg)
1246 elif errcode == utils.CERT_WARNING:
1247 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248 elif errcode == utils.CERT_ERROR:
1249 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1251 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1254 class LUClusterVerify(LogicalUnit):
1255 """Verifies the cluster status.
1258 HPATH = "cluster-verify"
1259 HTYPE = constants.HTYPE_CLUSTER
1262 TCLUSTER = "cluster"
1264 TINSTANCE = "instance"
1266 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1269 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1270 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1271 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1272 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1273 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1274 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1275 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1276 ENODEDRBD = (TNODE, "ENODEDRBD")
1277 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1278 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1279 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1280 ENODEHV = (TNODE, "ENODEHV")
1281 ENODELVM = (TNODE, "ENODELVM")
1282 ENODEN1 = (TNODE, "ENODEN1")
1283 ENODENET = (TNODE, "ENODENET")
1284 ENODEOS = (TNODE, "ENODEOS")
1285 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1286 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1287 ENODERPC = (TNODE, "ENODERPC")
1288 ENODESSH = (TNODE, "ENODESSH")
1289 ENODEVERSION = (TNODE, "ENODEVERSION")
1290 ENODESETUP = (TNODE, "ENODESETUP")
1291 ENODETIME = (TNODE, "ENODETIME")
1292 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1294 ETYPE_FIELD = "code"
1295 ETYPE_ERROR = "ERROR"
1296 ETYPE_WARNING = "WARNING"
1298 _HOOKS_INDENT_RE = re.compile("^", re.M)
1300 class NodeImage(object):
1301 """A class representing the logical and physical status of a node.
1304 @ivar name: the node name to which this object refers
1305 @ivar volumes: a structure as returned from
1306 L{ganeti.backend.GetVolumeList} (runtime)
1307 @ivar instances: a list of running instances (runtime)
1308 @ivar pinst: list of configured primary instances (config)
1309 @ivar sinst: list of configured secondary instances (config)
1310 @ivar sbp: dictionary of {primary-node: list of instances} for all
1311 instances for which this node is secondary (config)
1312 @ivar mfree: free memory, as reported by hypervisor (runtime)
1313 @ivar dfree: free disk, as reported by the node (runtime)
1314 @ivar offline: the offline status (config)
1315 @type rpc_fail: boolean
1316 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1317 not whether the individual keys were correct) (runtime)
1318 @type lvm_fail: boolean
1319 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1320 @type hyp_fail: boolean
1321 @ivar hyp_fail: whether the RPC call didn't return the instance list
1322 @type ghost: boolean
1323 @ivar ghost: whether this is a known node or not (config)
1324 @type os_fail: boolean
1325 @ivar os_fail: whether the RPC call didn't return valid OS data
1327 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1328 @type vm_capable: boolean
1329 @ivar vm_capable: whether the node can host instances
1332 def __init__(self, offline=False, name=None, vm_capable=True):
1341 self.offline = offline
1342 self.vm_capable = vm_capable
1343 self.rpc_fail = False
1344 self.lvm_fail = False
1345 self.hyp_fail = False
1347 self.os_fail = False
1350 def ExpandNames(self):
1351 self.needed_locks = {
1352 locking.LEVEL_NODE: locking.ALL_SET,
1353 locking.LEVEL_INSTANCE: locking.ALL_SET,
1355 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1357 def _Error(self, ecode, item, msg, *args, **kwargs):
1358 """Format an error message.
1360 Based on the opcode's error_codes parameter, either format a
1361 parseable error code, or a simpler error string.
1363 This must be called only from Exec and functions called from Exec.
1366 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1368 # first complete the msg
1371 # then format the whole message
1372 if self.op.error_codes:
1373 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1379 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1380 # and finally report it via the feedback_fn
1381 self._feedback_fn(" - %s" % msg)
1383 def _ErrorIf(self, cond, *args, **kwargs):
1384 """Log an error message if the passed condition is True.
1387 cond = bool(cond) or self.op.debug_simulate_errors
1389 self._Error(*args, **kwargs)
1390 # do not mark the operation as failed for WARN cases only
1391 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1392 self.bad = self.bad or cond
1394 def _VerifyNode(self, ninfo, nresult):
1395 """Perform some basic validation on data returned from a node.
1397 - check the result data structure is well formed and has all the
1399 - check ganeti version
1401 @type ninfo: L{objects.Node}
1402 @param ninfo: the node to check
1403 @param nresult: the results from the node
1405 @return: whether overall this call was successful (and we can expect
1406 reasonable values in the respose)
1410 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1412 # main result, nresult should be a non-empty dict
1413 test = not nresult or not isinstance(nresult, dict)
1414 _ErrorIf(test, self.ENODERPC, node,
1415 "unable to verify node: no data returned")
1419 # compares ganeti version
1420 local_version = constants.PROTOCOL_VERSION
1421 remote_version = nresult.get("version", None)
1422 test = not (remote_version and
1423 isinstance(remote_version, (list, tuple)) and
1424 len(remote_version) == 2)
1425 _ErrorIf(test, self.ENODERPC, node,
1426 "connection to node returned invalid data")
1430 test = local_version != remote_version[0]
1431 _ErrorIf(test, self.ENODEVERSION, node,
1432 "incompatible protocol versions: master %s,"
1433 " node %s", local_version, remote_version[0])
1437 # node seems compatible, we can actually try to look into its results
1439 # full package version
1440 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1441 self.ENODEVERSION, node,
1442 "software version mismatch: master %s, node %s",
1443 constants.RELEASE_VERSION, remote_version[1],
1444 code=self.ETYPE_WARNING)
1446 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1447 if ninfo.vm_capable and isinstance(hyp_result, dict):
1448 for hv_name, hv_result in hyp_result.iteritems():
1449 test = hv_result is not None
1450 _ErrorIf(test, self.ENODEHV, node,
1451 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1453 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1454 if ninfo.vm_capable and isinstance(hvp_result, list):
1455 for item, hv_name, hv_result in hvp_result:
1456 _ErrorIf(True, self.ENODEHV, node,
1457 "hypervisor %s parameter verify failure (source %s): %s",
1458 hv_name, item, hv_result)
1460 test = nresult.get(constants.NV_NODESETUP,
1461 ["Missing NODESETUP results"])
1462 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1467 def _VerifyNodeTime(self, ninfo, nresult,
1468 nvinfo_starttime, nvinfo_endtime):
1469 """Check the node time.
1471 @type ninfo: L{objects.Node}
1472 @param ninfo: the node to check
1473 @param nresult: the remote results for the node
1474 @param nvinfo_starttime: the start time of the RPC call
1475 @param nvinfo_endtime: the end time of the RPC call
1479 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1481 ntime = nresult.get(constants.NV_TIME, None)
1483 ntime_merged = utils.MergeTime(ntime)
1484 except (ValueError, TypeError):
1485 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1488 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1489 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1490 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1491 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1495 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1496 "Node time diverges by at least %s from master node time",
1499 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1500 """Check the node time.
1502 @type ninfo: L{objects.Node}
1503 @param ninfo: the node to check
1504 @param nresult: the remote results for the node
1505 @param vg_name: the configured VG name
1512 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514 # checks vg existence and size > 20G
1515 vglist = nresult.get(constants.NV_VGLIST, None)
1517 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1519 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1520 constants.MIN_VG_SIZE)
1521 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1524 pvlist = nresult.get(constants.NV_PVLIST, None)
1525 test = pvlist is None
1526 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1528 # check that ':' is not present in PV names, since it's a
1529 # special character for lvcreate (denotes the range of PEs to
1531 for _, pvname, owner_vg in pvlist:
1532 test = ":" in pvname
1533 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1534 " '%s' of VG '%s'", pvname, owner_vg)
1536 def _VerifyNodeNetwork(self, ninfo, nresult):
1537 """Check the node time.
1539 @type ninfo: L{objects.Node}
1540 @param ninfo: the node to check
1541 @param nresult: the remote results for the node
1545 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547 test = constants.NV_NODELIST not in nresult
1548 _ErrorIf(test, self.ENODESSH, node,
1549 "node hasn't returned node ssh connectivity data")
1551 if nresult[constants.NV_NODELIST]:
1552 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1553 _ErrorIf(True, self.ENODESSH, node,
1554 "ssh communication with node '%s': %s", a_node, a_msg)
1556 test = constants.NV_NODENETTEST not in nresult
1557 _ErrorIf(test, self.ENODENET, node,
1558 "node hasn't returned node tcp connectivity data")
1560 if nresult[constants.NV_NODENETTEST]:
1561 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1563 _ErrorIf(True, self.ENODENET, node,
1564 "tcp communication with node '%s': %s",
1565 anode, nresult[constants.NV_NODENETTEST][anode])
1567 test = constants.NV_MASTERIP not in nresult
1568 _ErrorIf(test, self.ENODENET, node,
1569 "node hasn't returned node master IP reachability data")
1571 if not nresult[constants.NV_MASTERIP]:
1572 if node == self.master_node:
1573 msg = "the master node cannot reach the master IP (not configured?)"
1575 msg = "cannot reach the master IP"
1576 _ErrorIf(True, self.ENODENET, node, msg)
1578 def _VerifyInstance(self, instance, instanceconfig, node_image,
1580 """Verify an instance.
1582 This function checks to see if the required block devices are
1583 available on the instance's node.
1586 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1587 node_current = instanceconfig.primary_node
1589 node_vol_should = {}
1590 instanceconfig.MapLVsByNode(node_vol_should)
1592 for node in node_vol_should:
1593 n_img = node_image[node]
1594 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595 # ignore missing volumes on offline or broken nodes
1597 for volume in node_vol_should[node]:
1598 test = volume not in n_img.volumes
1599 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1600 "volume %s missing on node %s", volume, node)
1602 if instanceconfig.admin_up:
1603 pri_img = node_image[node_current]
1604 test = instance not in pri_img.instances and not pri_img.offline
1605 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1606 "instance not running on its primary node %s",
1609 for node, n_img in node_image.items():
1610 if node != node_current:
1611 test = instance in n_img.instances
1612 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1613 "instance should not run on node %s", node)
1615 diskdata = [(nname, success, status, idx)
1616 for (nname, disks) in diskstatus.items()
1617 for idx, (success, status) in enumerate(disks)]
1619 for nname, success, bdev_status, idx in diskdata:
1620 # the 'ghost node' construction in Exec() ensures that we have a
1622 snode = node_image[nname]
1623 bad_snode = snode.ghost or snode.offline
1624 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1625 self.EINSTANCEFAULTYDISK, instance,
1626 "couldn't retrieve status for disk/%s on %s: %s",
1627 idx, nname, bdev_status)
1628 _ErrorIf((instanceconfig.admin_up and success and
1629 bdev_status.ldisk_status == constants.LDS_FAULTY),
1630 self.EINSTANCEFAULTYDISK, instance,
1631 "disk/%s on %s is faulty", idx, nname)
1633 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1634 """Verify if there are any unknown volumes in the cluster.
1636 The .os, .swap and backup volumes are ignored. All other volumes are
1637 reported as unknown.
1639 @type reserved: L{ganeti.utils.FieldSet}
1640 @param reserved: a FieldSet of reserved volume names
1643 for node, n_img in node_image.items():
1644 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1645 # skip non-healthy nodes
1647 for volume in n_img.volumes:
1648 test = ((node not in node_vol_should or
1649 volume not in node_vol_should[node]) and
1650 not reserved.Matches(volume))
1651 self._ErrorIf(test, self.ENODEORPHANLV, node,
1652 "volume %s is unknown", volume)
1654 def _VerifyOrphanInstances(self, instancelist, node_image):
1655 """Verify the list of running instances.
1657 This checks what instances are running but unknown to the cluster.
1660 for node, n_img in node_image.items():
1661 for o_inst in n_img.instances:
1662 test = o_inst not in instancelist
1663 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1664 "instance %s on node %s should not exist", o_inst, node)
1666 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1667 """Verify N+1 Memory Resilience.
1669 Check that if one single node dies we can still start all the
1670 instances it was primary for.
1673 cluster_info = self.cfg.GetClusterInfo()
1674 for node, n_img in node_image.items():
1675 # This code checks that every node which is now listed as
1676 # secondary has enough memory to host all instances it is
1677 # supposed to should a single other node in the cluster fail.
1678 # FIXME: not ready for failover to an arbitrary node
1679 # FIXME: does not support file-backed instances
1680 # WARNING: we currently take into account down instances as well
1681 # as up ones, considering that even if they're down someone
1682 # might want to start them even in the event of a node failure.
1684 # we're skipping offline nodes from the N+1 warning, since
1685 # most likely we don't have good memory infromation from them;
1686 # we already list instances living on such nodes, and that's
1689 for prinode, instances in n_img.sbp.items():
1691 for instance in instances:
1692 bep = cluster_info.FillBE(instance_cfg[instance])
1693 if bep[constants.BE_AUTO_BALANCE]:
1694 needed_mem += bep[constants.BE_MEMORY]
1695 test = n_img.mfree < needed_mem
1696 self._ErrorIf(test, self.ENODEN1, node,
1697 "not enough memory to accomodate instance failovers"
1698 " should node %s fail (%dMiB needed, %dMiB available)",
1699 prinode, needed_mem, n_img.mfree)
1702 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1703 (files_all, files_all_opt, files_mc, files_vm)):
1704 """Verifies file checksums collected from all nodes.
1706 @param errorif: Callback for reporting errors
1707 @param nodeinfo: List of L{objects.Node} objects
1708 @param master_node: Name of master node
1709 @param all_nvinfo: RPC results
1712 node_names = frozenset(node.name for node in nodeinfo)
1714 assert master_node in node_names
1715 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1716 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1717 "Found file listed in more than one file list"
1719 # Define functions determining which nodes to consider for a file
1720 file2nodefn = dict([(filename, fn)
1721 for (files, fn) in [(files_all, None),
1722 (files_all_opt, None),
1723 (files_mc, lambda node: (node.master_candidate or
1724 node.name == master_node)),
1725 (files_vm, lambda node: node.vm_capable)]
1726 for filename in files])
1728 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1730 for node in nodeinfo:
1731 nresult = all_nvinfo[node.name]
1733 if nresult.fail_msg or not nresult.payload:
1736 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1738 test = not (node_files and isinstance(node_files, dict))
1739 errorif(test, cls.ENODEFILECHECK, node.name,
1740 "Node did not return file checksum data")
1744 for (filename, checksum) in node_files.items():
1745 # Check if the file should be considered for a node
1746 fn = file2nodefn[filename]
1747 if fn is None or fn(node):
1748 fileinfo[filename].setdefault(checksum, set()).add(node.name)
1750 for (filename, checksums) in fileinfo.items():
1751 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1753 # Nodes having the file
1754 with_file = frozenset(node_name
1755 for nodes in fileinfo[filename].values()
1756 for node_name in nodes)
1758 # Nodes missing file
1759 missing_file = node_names - with_file
1761 if filename in files_all_opt:
1763 errorif(missing_file and missing_file != node_names,
1764 cls.ECLUSTERFILECHECK, None,
1765 "File %s is optional, but it must exist on all or no nodes (not"
1767 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1769 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1770 "File %s is missing from node(s) %s", filename,
1771 utils.CommaJoin(utils.NiceSort(missing_file)))
1773 # See if there are multiple versions of the file
1774 test = len(checksums) > 1
1776 variants = ["variant %s on %s" %
1777 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1778 for (idx, (checksum, nodes)) in
1779 enumerate(sorted(checksums.items()))]
1783 errorif(test, cls.ECLUSTERFILECHECK, None,
1784 "File %s found with %s different checksums (%s)",
1785 filename, len(checksums), "; ".join(variants))
1787 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1789 """Verifies and the node DRBD status.
1791 @type ninfo: L{objects.Node}
1792 @param ninfo: the node to check
1793 @param nresult: the remote results for the node
1794 @param instanceinfo: the dict of instances
1795 @param drbd_helper: the configured DRBD usermode helper
1796 @param drbd_map: the DRBD map as returned by
1797 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1801 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1804 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1805 test = (helper_result == None)
1806 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1807 "no drbd usermode helper returned")
1809 status, payload = helper_result
1811 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1812 "drbd usermode helper check unsuccessful: %s", payload)
1813 test = status and (payload != drbd_helper)
1814 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1815 "wrong drbd usermode helper: %s", payload)
1817 # compute the DRBD minors
1819 for minor, instance in drbd_map[node].items():
1820 test = instance not in instanceinfo
1821 _ErrorIf(test, self.ECLUSTERCFG, None,
1822 "ghost instance '%s' in temporary DRBD map", instance)
1823 # ghost instance should not be running, but otherwise we
1824 # don't give double warnings (both ghost instance and
1825 # unallocated minor in use)
1827 node_drbd[minor] = (instance, False)
1829 instance = instanceinfo[instance]
1830 node_drbd[minor] = (instance.name, instance.admin_up)
1832 # and now check them
1833 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1834 test = not isinstance(used_minors, (tuple, list))
1835 _ErrorIf(test, self.ENODEDRBD, node,
1836 "cannot parse drbd status file: %s", str(used_minors))
1838 # we cannot check drbd status
1841 for minor, (iname, must_exist) in node_drbd.items():
1842 test = minor not in used_minors and must_exist
1843 _ErrorIf(test, self.ENODEDRBD, node,
1844 "drbd minor %d of instance %s is not active", minor, iname)
1845 for minor in used_minors:
1846 test = minor not in node_drbd
1847 _ErrorIf(test, self.ENODEDRBD, node,
1848 "unallocated drbd minor %d is in use", minor)
1850 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1851 """Builds the node OS structures.
1853 @type ninfo: L{objects.Node}
1854 @param ninfo: the node to check
1855 @param nresult: the remote results for the node
1856 @param nimg: the node image object
1860 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1862 remote_os = nresult.get(constants.NV_OSLIST, None)
1863 test = (not isinstance(remote_os, list) or
1864 not compat.all(isinstance(v, list) and len(v) == 7
1865 for v in remote_os))
1867 _ErrorIf(test, self.ENODEOS, node,
1868 "node hasn't returned valid OS data")
1877 for (name, os_path, status, diagnose,
1878 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1880 if name not in os_dict:
1883 # parameters is a list of lists instead of list of tuples due to
1884 # JSON lacking a real tuple type, fix it:
1885 parameters = [tuple(v) for v in parameters]
1886 os_dict[name].append((os_path, status, diagnose,
1887 set(variants), set(parameters), set(api_ver)))
1889 nimg.oslist = os_dict
1891 def _VerifyNodeOS(self, ninfo, nimg, base):
1892 """Verifies the node OS list.
1894 @type ninfo: L{objects.Node}
1895 @param ninfo: the node to check
1896 @param nimg: the node image object
1897 @param base: the 'template' node we match against (e.g. from the master)
1901 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1903 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1905 for os_name, os_data in nimg.oslist.items():
1906 assert os_data, "Empty OS status for OS %s?!" % os_name
1907 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1908 _ErrorIf(not f_status, self.ENODEOS, node,
1909 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1910 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1911 "OS '%s' has multiple entries (first one shadows the rest): %s",
1912 os_name, utils.CommaJoin([v[0] for v in os_data]))
1913 # this will catched in backend too
1914 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1915 and not f_var, self.ENODEOS, node,
1916 "OS %s with API at least %d does not declare any variant",
1917 os_name, constants.OS_API_V15)
1918 # comparisons with the 'base' image
1919 test = os_name not in base.oslist
1920 _ErrorIf(test, self.ENODEOS, node,
1921 "Extra OS %s not present on reference node (%s)",
1925 assert base.oslist[os_name], "Base node has empty OS status?"
1926 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1928 # base OS is invalid, skipping
1930 for kind, a, b in [("API version", f_api, b_api),
1931 ("variants list", f_var, b_var),
1932 ("parameters", f_param, b_param)]:
1933 _ErrorIf(a != b, self.ENODEOS, node,
1934 "OS %s %s differs from reference node %s: %s vs. %s",
1935 kind, os_name, base.name,
1936 utils.CommaJoin(a), utils.CommaJoin(b))
1938 # check any missing OSes
1939 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1940 _ErrorIf(missing, self.ENODEOS, node,
1941 "OSes present on reference node %s but missing on this node: %s",
1942 base.name, utils.CommaJoin(missing))
1944 def _VerifyOob(self, ninfo, nresult):
1945 """Verifies out of band functionality of a node.
1947 @type ninfo: L{objects.Node}
1948 @param ninfo: the node to check
1949 @param nresult: the remote results for the node
1953 # We just have to verify the paths on master and/or master candidates
1954 # as the oob helper is invoked on the master
1955 if ((ninfo.master_candidate or ninfo.master_capable) and
1956 constants.NV_OOB_PATHS in nresult):
1957 for path_result in nresult[constants.NV_OOB_PATHS]:
1958 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1960 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1961 """Verifies and updates the node volume data.
1963 This function will update a L{NodeImage}'s internal structures
1964 with data from the remote call.
1966 @type ninfo: L{objects.Node}
1967 @param ninfo: the node to check
1968 @param nresult: the remote results for the node
1969 @param nimg: the node image object
1970 @param vg_name: the configured VG name
1974 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1976 nimg.lvm_fail = True
1977 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1980 elif isinstance(lvdata, basestring):
1981 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1982 utils.SafeEncode(lvdata))
1983 elif not isinstance(lvdata, dict):
1984 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1986 nimg.volumes = lvdata
1987 nimg.lvm_fail = False
1989 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1990 """Verifies and updates the node instance list.
1992 If the listing was successful, then updates this node's instance
1993 list. Otherwise, it marks the RPC call as failed for the instance
1996 @type ninfo: L{objects.Node}
1997 @param ninfo: the node to check
1998 @param nresult: the remote results for the node
1999 @param nimg: the node image object
2002 idata = nresult.get(constants.NV_INSTANCELIST, None)
2003 test = not isinstance(idata, list)
2004 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2005 " (instancelist): %s", utils.SafeEncode(str(idata)))
2007 nimg.hyp_fail = True
2009 nimg.instances = idata
2011 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2012 """Verifies and computes a node information map
2014 @type ninfo: L{objects.Node}
2015 @param ninfo: the node to check
2016 @param nresult: the remote results for the node
2017 @param nimg: the node image object
2018 @param vg_name: the configured VG name
2022 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2024 # try to read free memory (from the hypervisor)
2025 hv_info = nresult.get(constants.NV_HVINFO, None)
2026 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2027 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2030 nimg.mfree = int(hv_info["memory_free"])
2031 except (ValueError, TypeError):
2032 _ErrorIf(True, self.ENODERPC, node,
2033 "node returned invalid nodeinfo, check hypervisor")
2035 # FIXME: devise a free space model for file based instances as well
2036 if vg_name is not None:
2037 test = (constants.NV_VGLIST not in nresult or
2038 vg_name not in nresult[constants.NV_VGLIST])
2039 _ErrorIf(test, self.ENODELVM, node,
2040 "node didn't return data for the volume group '%s'"
2041 " - it is either missing or broken", vg_name)
2044 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2045 except (ValueError, TypeError):
2046 _ErrorIf(True, self.ENODERPC, node,
2047 "node returned invalid LVM info, check LVM status")
2049 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2050 """Gets per-disk status information for all instances.
2052 @type nodelist: list of strings
2053 @param nodelist: Node names
2054 @type node_image: dict of (name, L{objects.Node})
2055 @param node_image: Node objects
2056 @type instanceinfo: dict of (name, L{objects.Instance})
2057 @param instanceinfo: Instance objects
2058 @rtype: {instance: {node: [(succes, payload)]}}
2059 @return: a dictionary of per-instance dictionaries with nodes as
2060 keys and disk information as values; the disk information is a
2061 list of tuples (success, payload)
2064 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2067 node_disks_devonly = {}
2068 diskless_instances = set()
2069 diskless = constants.DT_DISKLESS
2071 for nname in nodelist:
2072 node_instances = list(itertools.chain(node_image[nname].pinst,
2073 node_image[nname].sinst))
2074 diskless_instances.update(inst for inst in node_instances
2075 if instanceinfo[inst].disk_template == diskless)
2076 disks = [(inst, disk)
2077 for inst in node_instances
2078 for disk in instanceinfo[inst].disks]
2081 # No need to collect data
2084 node_disks[nname] = disks
2086 # Creating copies as SetDiskID below will modify the objects and that can
2087 # lead to incorrect data returned from nodes
2088 devonly = [dev.Copy() for (_, dev) in disks]
2091 self.cfg.SetDiskID(dev, nname)
2093 node_disks_devonly[nname] = devonly
2095 assert len(node_disks) == len(node_disks_devonly)
2097 # Collect data from all nodes with disks
2098 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2101 assert len(result) == len(node_disks)
2105 for (nname, nres) in result.items():
2106 disks = node_disks[nname]
2109 # No data from this node
2110 data = len(disks) * [(False, "node offline")]
2113 _ErrorIf(msg, self.ENODERPC, nname,
2114 "while getting disk information: %s", msg)
2116 # No data from this node
2117 data = len(disks) * [(False, msg)]
2120 for idx, i in enumerate(nres.payload):
2121 if isinstance(i, (tuple, list)) and len(i) == 2:
2124 logging.warning("Invalid result from node %s, entry %d: %s",
2126 data.append((False, "Invalid result from the remote node"))
2128 for ((inst, _), status) in zip(disks, data):
2129 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2131 # Add empty entries for diskless instances.
2132 for inst in diskless_instances:
2133 assert inst not in instdisk
2136 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2137 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2138 compat.all(isinstance(s, (tuple, list)) and
2139 len(s) == 2 for s in statuses)
2140 for inst, nnames in instdisk.items()
2141 for nname, statuses in nnames.items())
2142 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2146 def _VerifyHVP(self, hvp_data):
2147 """Verifies locally the syntax of the hypervisor parameters.
2150 for item, hv_name, hv_params in hvp_data:
2151 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2154 hv_class = hypervisor.GetHypervisor(hv_name)
2155 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2156 hv_class.CheckParameterSyntax(hv_params)
2157 except errors.GenericError, err:
2158 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2160 def BuildHooksEnv(self):
2163 Cluster-Verify hooks just ran in the post phase and their failure makes
2164 the output be logged in the verify output and the verification to fail.
2170 "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2173 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2174 for node in cfg.GetAllNodesInfo().values())
2178 def BuildHooksNodes(self):
2179 """Build hooks nodes.
2182 return ([], self.cfg.GetNodeList())
2184 def Exec(self, feedback_fn):
2185 """Verify integrity of cluster, performing various test on nodes.
2188 # This method has too many local variables. pylint: disable-msg=R0914
2190 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2191 verbose = self.op.verbose
2192 self._feedback_fn = feedback_fn
2193 feedback_fn("* Verifying global settings")
2194 for msg in self.cfg.VerifyConfig():
2195 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2197 # Check the cluster certificates
2198 for cert_filename in constants.ALL_CERT_FILES:
2199 (errcode, msg) = _VerifyCertificate(cert_filename)
2200 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2202 vg_name = self.cfg.GetVGName()
2203 drbd_helper = self.cfg.GetDRBDHelper()
2204 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2205 cluster = self.cfg.GetClusterInfo()
2206 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2207 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2208 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2209 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2210 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2211 for iname in instancelist)
2212 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2213 i_non_redundant = [] # Non redundant instances
2214 i_non_a_balanced = [] # Non auto-balanced instances
2215 n_offline = 0 # Count of offline nodes
2216 n_drained = 0 # Count of nodes being drained
2217 node_vol_should = {}
2219 # FIXME: verify OS list
2222 filemap = _ComputeAncillaryFiles(cluster, False)
2224 # do local checksums
2225 master_node = self.master_node = self.cfg.GetMasterNode()
2226 master_ip = self.cfg.GetMasterIP()
2228 # Compute the set of hypervisor parameters
2230 for hv_name in hypervisors:
2231 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2232 for os_name, os_hvp in cluster.os_hvp.items():
2233 for hv_name, hv_params in os_hvp.items():
2236 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2237 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2238 # TODO: collapse identical parameter values in a single one
2239 for instance in instanceinfo.values():
2240 if not instance.hvparams:
2242 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2243 cluster.FillHV(instance)))
2244 # and verify them locally
2245 self._VerifyHVP(hvp_data)
2247 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2248 node_verify_param = {
2249 constants.NV_FILELIST:
2250 utils.UniqueSequence(filename
2251 for files in filemap
2252 for filename in files),
2253 constants.NV_NODELIST: [node.name for node in nodeinfo
2254 if not node.offline],
2255 constants.NV_HYPERVISOR: hypervisors,
2256 constants.NV_HVPARAMS: hvp_data,
2257 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2258 node.secondary_ip) for node in nodeinfo
2259 if not node.offline],
2260 constants.NV_INSTANCELIST: hypervisors,
2261 constants.NV_VERSION: None,
2262 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2263 constants.NV_NODESETUP: None,
2264 constants.NV_TIME: None,
2265 constants.NV_MASTERIP: (master_node, master_ip),
2266 constants.NV_OSLIST: None,
2267 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2270 if vg_name is not None:
2271 node_verify_param[constants.NV_VGLIST] = None
2272 node_verify_param[constants.NV_LVLIST] = vg_name
2273 node_verify_param[constants.NV_PVLIST] = [vg_name]
2274 node_verify_param[constants.NV_DRBDLIST] = None
2277 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2279 # Build our expected cluster state
2280 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2282 vm_capable=node.vm_capable))
2283 for node in nodeinfo)
2287 for node in nodeinfo:
2288 path = _SupportsOob(self.cfg, node)
2289 if path and path not in oob_paths:
2290 oob_paths.append(path)
2293 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2295 for instance in instancelist:
2296 inst_config = instanceinfo[instance]
2298 for nname in inst_config.all_nodes:
2299 if nname not in node_image:
2301 gnode = self.NodeImage(name=nname)
2303 node_image[nname] = gnode
2305 inst_config.MapLVsByNode(node_vol_should)
2307 pnode = inst_config.primary_node
2308 node_image[pnode].pinst.append(instance)
2310 for snode in inst_config.secondary_nodes:
2311 nimg = node_image[snode]
2312 nimg.sinst.append(instance)
2313 if pnode not in nimg.sbp:
2314 nimg.sbp[pnode] = []
2315 nimg.sbp[pnode].append(instance)
2317 # At this point, we have the in-memory data structures complete,
2318 # except for the runtime information, which we'll gather next
2320 # Due to the way our RPC system works, exact response times cannot be
2321 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2322 # time before and after executing the request, we can at least have a time
2324 nvinfo_starttime = time.time()
2325 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2326 self.cfg.GetClusterName())
2327 nvinfo_endtime = time.time()
2329 all_drbd_map = self.cfg.ComputeDRBDMap()
2331 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2332 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2334 feedback_fn("* Verifying configuration file consistency")
2335 self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2337 feedback_fn("* Verifying node status")
2341 for node_i in nodeinfo:
2343 nimg = node_image[node]
2347 feedback_fn("* Skipping offline node %s" % (node,))
2351 if node == master_node:
2353 elif node_i.master_candidate:
2354 ntype = "master candidate"
2355 elif node_i.drained:
2361 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2363 msg = all_nvinfo[node].fail_msg
2364 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2366 nimg.rpc_fail = True
2369 nresult = all_nvinfo[node].payload
2371 nimg.call_ok = self._VerifyNode(node_i, nresult)
2372 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2373 self._VerifyNodeNetwork(node_i, nresult)
2374 self._VerifyOob(node_i, nresult)
2377 self._VerifyNodeLVM(node_i, nresult, vg_name)
2378 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2381 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2382 self._UpdateNodeInstances(node_i, nresult, nimg)
2383 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2384 self._UpdateNodeOS(node_i, nresult, nimg)
2385 if not nimg.os_fail:
2386 if refos_img is None:
2388 self._VerifyNodeOS(node_i, nimg, refos_img)
2390 feedback_fn("* Verifying instance status")
2391 for instance in instancelist:
2393 feedback_fn("* Verifying instance %s" % instance)
2394 inst_config = instanceinfo[instance]
2395 self._VerifyInstance(instance, inst_config, node_image,
2397 inst_nodes_offline = []
2399 pnode = inst_config.primary_node
2400 pnode_img = node_image[pnode]
2401 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2402 self.ENODERPC, pnode, "instance %s, connection to"
2403 " primary node failed", instance)
2405 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2406 self.EINSTANCEBADNODE, instance,
2407 "instance is marked as running and lives on offline node %s",
2408 inst_config.primary_node)
2410 # If the instance is non-redundant we cannot survive losing its primary
2411 # node, so we are not N+1 compliant. On the other hand we have no disk
2412 # templates with more than one secondary so that situation is not well
2414 # FIXME: does not support file-backed instances
2415 if not inst_config.secondary_nodes:
2416 i_non_redundant.append(instance)
2418 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2419 instance, "instance has multiple secondary nodes: %s",
2420 utils.CommaJoin(inst_config.secondary_nodes),
2421 code=self.ETYPE_WARNING)
2423 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2424 pnode = inst_config.primary_node
2425 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2426 instance_groups = {}
2428 for node in instance_nodes:
2429 instance_groups.setdefault(nodeinfo_byname[node].group,
2433 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2434 # Sort so that we always list the primary node first.
2435 for group, nodes in sorted(instance_groups.items(),
2436 key=lambda (_, nodes): pnode in nodes,
2439 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2440 instance, "instance has primary and secondary nodes in"
2441 " different groups: %s", utils.CommaJoin(pretty_list),
2442 code=self.ETYPE_WARNING)
2444 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2445 i_non_a_balanced.append(instance)
2447 for snode in inst_config.secondary_nodes:
2448 s_img = node_image[snode]
2449 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2450 "instance %s, connection to secondary node failed", instance)
2453 inst_nodes_offline.append(snode)
2455 # warn that the instance lives on offline nodes
2456 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2457 "instance has offline secondary node(s) %s",
2458 utils.CommaJoin(inst_nodes_offline))
2459 # ... or ghost/non-vm_capable nodes
2460 for node in inst_config.all_nodes:
2461 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2462 "instance lives on ghost node %s", node)
2463 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2464 instance, "instance lives on non-vm_capable node %s", node)
2466 feedback_fn("* Verifying orphan volumes")
2467 reserved = utils.FieldSet(*cluster.reserved_lvs)
2468 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2470 feedback_fn("* Verifying orphan instances")
2471 self._VerifyOrphanInstances(instancelist, node_image)
2473 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2474 feedback_fn("* Verifying N+1 Memory redundancy")
2475 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2477 feedback_fn("* Other Notes")
2479 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2480 % len(i_non_redundant))
2482 if i_non_a_balanced:
2483 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2484 % len(i_non_a_balanced))
2487 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2490 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2494 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2495 """Analyze the post-hooks' result
2497 This method analyses the hook result, handles it, and sends some
2498 nicely-formatted feedback back to the user.
2500 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2501 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2502 @param hooks_results: the results of the multi-node hooks rpc call
2503 @param feedback_fn: function used send feedback back to the caller
2504 @param lu_result: previous Exec result
2505 @return: the new Exec result, based on the previous result
2509 # We only really run POST phase hooks, and are only interested in
2511 if phase == constants.HOOKS_PHASE_POST:
2512 # Used to change hooks' output to proper indentation
2513 feedback_fn("* Hooks Results")
2514 assert hooks_results, "invalid result from hooks"
2516 for node_name in hooks_results:
2517 res = hooks_results[node_name]
2519 test = msg and not res.offline
2520 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2521 "Communication failure in hooks execution: %s", msg)
2522 if res.offline or msg:
2523 # No need to investigate payload if node is offline or gave an error.
2524 # override manually lu_result here as _ErrorIf only
2525 # overrides self.bad
2528 for script, hkr, output in res.payload:
2529 test = hkr == constants.HKR_FAIL
2530 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2531 "Script %s failed, output:", script)
2533 output = self._HOOKS_INDENT_RE.sub(' ', output)
2534 feedback_fn("%s" % output)
2540 class LUClusterVerifyDisks(NoHooksLU):
2541 """Verifies the cluster disks status.
2546 def ExpandNames(self):
2547 self.needed_locks = {
2548 locking.LEVEL_NODE: locking.ALL_SET,
2549 locking.LEVEL_INSTANCE: locking.ALL_SET,
2551 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2553 def Exec(self, feedback_fn):
2554 """Verify integrity of cluster disks.
2556 @rtype: tuple of three items
2557 @return: a tuple of (dict of node-to-node_error, list of instances
2558 which need activate-disks, dict of instance: (node, volume) for
2562 result = res_nodes, res_instances, res_missing = {}, [], {}
2564 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2565 instances = self.cfg.GetAllInstancesInfo().values()
2568 for inst in instances:
2570 if not inst.admin_up:
2572 inst.MapLVsByNode(inst_lvs)
2573 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2574 for node, vol_list in inst_lvs.iteritems():
2575 for vol in vol_list:
2576 nv_dict[(node, vol)] = inst
2581 node_lvs = self.rpc.call_lv_list(nodes, [])
2582 for node, node_res in node_lvs.items():
2583 if node_res.offline:
2585 msg = node_res.fail_msg
2587 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2588 res_nodes[node] = msg
2591 lvs = node_res.payload
2592 for lv_name, (_, _, lv_online) in lvs.items():
2593 inst = nv_dict.pop((node, lv_name), None)
2594 if (not lv_online and inst is not None
2595 and inst.name not in res_instances):
2596 res_instances.append(inst.name)
2598 # any leftover items in nv_dict are missing LVs, let's arrange the
2600 for key, inst in nv_dict.iteritems():
2601 if inst.name not in res_missing:
2602 res_missing[inst.name] = []
2603 res_missing[inst.name].append(key)
2608 class LUClusterRepairDiskSizes(NoHooksLU):
2609 """Verifies the cluster disks sizes.
2614 def ExpandNames(self):
2615 if self.op.instances:
2616 self.wanted_names = []
2617 for name in self.op.instances:
2618 full_name = _ExpandInstanceName(self.cfg, name)
2619 self.wanted_names.append(full_name)
2620 self.needed_locks = {
2621 locking.LEVEL_NODE: [],
2622 locking.LEVEL_INSTANCE: self.wanted_names,
2624 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2626 self.wanted_names = None
2627 self.needed_locks = {
2628 locking.LEVEL_NODE: locking.ALL_SET,
2629 locking.LEVEL_INSTANCE: locking.ALL_SET,
2631 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2633 def DeclareLocks(self, level):
2634 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2635 self._LockInstancesNodes(primary_only=True)
2637 def CheckPrereq(self):
2638 """Check prerequisites.
2640 This only checks the optional instance list against the existing names.
2643 if self.wanted_names is None:
2644 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2646 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2647 in self.wanted_names]
2649 def _EnsureChildSizes(self, disk):
2650 """Ensure children of the disk have the needed disk size.
2652 This is valid mainly for DRBD8 and fixes an issue where the
2653 children have smaller disk size.
2655 @param disk: an L{ganeti.objects.Disk} object
2658 if disk.dev_type == constants.LD_DRBD8:
2659 assert disk.children, "Empty children for DRBD8?"
2660 fchild = disk.children[0]
2661 mismatch = fchild.size < disk.size
2663 self.LogInfo("Child disk has size %d, parent %d, fixing",
2664 fchild.size, disk.size)
2665 fchild.size = disk.size
2667 # and we recurse on this child only, not on the metadev
2668 return self._EnsureChildSizes(fchild) or mismatch
2672 def Exec(self, feedback_fn):
2673 """Verify the size of cluster disks.
2676 # TODO: check child disks too
2677 # TODO: check differences in size between primary/secondary nodes
2679 for instance in self.wanted_instances:
2680 pnode = instance.primary_node
2681 if pnode not in per_node_disks:
2682 per_node_disks[pnode] = []
2683 for idx, disk in enumerate(instance.disks):
2684 per_node_disks[pnode].append((instance, idx, disk))
2687 for node, dskl in per_node_disks.items():
2688 newl = [v[2].Copy() for v in dskl]
2690 self.cfg.SetDiskID(dsk, node)
2691 result = self.rpc.call_blockdev_getsize(node, newl)
2693 self.LogWarning("Failure in blockdev_getsize call to node"
2694 " %s, ignoring", node)
2696 if len(result.payload) != len(dskl):
2697 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2698 " result.payload=%s", node, len(dskl), result.payload)
2699 self.LogWarning("Invalid result from node %s, ignoring node results",
2702 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2704 self.LogWarning("Disk %d of instance %s did not return size"
2705 " information, ignoring", idx, instance.name)
2707 if not isinstance(size, (int, long)):
2708 self.LogWarning("Disk %d of instance %s did not return valid"
2709 " size information, ignoring", idx, instance.name)
2712 if size != disk.size:
2713 self.LogInfo("Disk %d of instance %s has mismatched size,"
2714 " correcting: recorded %d, actual %d", idx,
2715 instance.name, disk.size, size)
2717 self.cfg.Update(instance, feedback_fn)
2718 changed.append((instance.name, idx, size))
2719 if self._EnsureChildSizes(disk):
2720 self.cfg.Update(instance, feedback_fn)
2721 changed.append((instance.name, idx, disk.size))
2725 class LUClusterRename(LogicalUnit):
2726 """Rename the cluster.
2729 HPATH = "cluster-rename"
2730 HTYPE = constants.HTYPE_CLUSTER
2732 def BuildHooksEnv(self):
2737 "OP_TARGET": self.cfg.GetClusterName(),
2738 "NEW_NAME": self.op.name,
2741 def BuildHooksNodes(self):
2742 """Build hooks nodes.
2745 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2747 def CheckPrereq(self):
2748 """Verify that the passed name is a valid one.
2751 hostname = netutils.GetHostname(name=self.op.name,
2752 family=self.cfg.GetPrimaryIPFamily())
2754 new_name = hostname.name
2755 self.ip = new_ip = hostname.ip
2756 old_name = self.cfg.GetClusterName()
2757 old_ip = self.cfg.GetMasterIP()
2758 if new_name == old_name and new_ip == old_ip:
2759 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2760 " cluster has changed",
2762 if new_ip != old_ip:
2763 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2764 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2765 " reachable on the network" %
2766 new_ip, errors.ECODE_NOTUNIQUE)
2768 self.op.name = new_name
2770 def Exec(self, feedback_fn):
2771 """Rename the cluster.
2774 clustername = self.op.name
2777 # shutdown the master IP
2778 master = self.cfg.GetMasterNode()
2779 result = self.rpc.call_node_stop_master(master, False)
2780 result.Raise("Could not disable the master role")
2783 cluster = self.cfg.GetClusterInfo()
2784 cluster.cluster_name = clustername
2785 cluster.master_ip = ip
2786 self.cfg.Update(cluster, feedback_fn)
2788 # update the known hosts file
2789 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2790 node_list = self.cfg.GetOnlineNodeList()
2792 node_list.remove(master)
2795 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2797 result = self.rpc.call_node_start_master(master, False, False)
2798 msg = result.fail_msg
2800 self.LogWarning("Could not re-enable the master role on"
2801 " the master, please restart manually: %s", msg)
2806 class LUClusterSetParams(LogicalUnit):
2807 """Change the parameters of the cluster.
2810 HPATH = "cluster-modify"
2811 HTYPE = constants.HTYPE_CLUSTER
2814 def CheckArguments(self):
2818 if self.op.uid_pool:
2819 uidpool.CheckUidPool(self.op.uid_pool)
2821 if self.op.add_uids:
2822 uidpool.CheckUidPool(self.op.add_uids)
2824 if self.op.remove_uids:
2825 uidpool.CheckUidPool(self.op.remove_uids)
2827 def ExpandNames(self):
2828 # FIXME: in the future maybe other cluster params won't require checking on
2829 # all nodes to be modified.
2830 self.needed_locks = {
2831 locking.LEVEL_NODE: locking.ALL_SET,
2833 self.share_locks[locking.LEVEL_NODE] = 1
2835 def BuildHooksEnv(self):
2840 "OP_TARGET": self.cfg.GetClusterName(),
2841 "NEW_VG_NAME": self.op.vg_name,
2844 def BuildHooksNodes(self):
2845 """Build hooks nodes.
2848 mn = self.cfg.GetMasterNode()
2851 def CheckPrereq(self):
2852 """Check prerequisites.
2854 This checks whether the given params don't conflict and
2855 if the given volume group is valid.
2858 if self.op.vg_name is not None and not self.op.vg_name:
2859 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2860 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2861 " instances exist", errors.ECODE_INVAL)
2863 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2864 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2865 raise errors.OpPrereqError("Cannot disable drbd helper while"
2866 " drbd-based instances exist",
2869 node_list = self.acquired_locks[locking.LEVEL_NODE]
2871 # if vg_name not None, checks given volume group on all nodes
2873 vglist = self.rpc.call_vg_list(node_list)
2874 for node in node_list:
2875 msg = vglist[node].fail_msg
2877 # ignoring down node
2878 self.LogWarning("Error while gathering data on node %s"
2879 " (ignoring node): %s", node, msg)
2881 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2883 constants.MIN_VG_SIZE)
2885 raise errors.OpPrereqError("Error on node '%s': %s" %
2886 (node, vgstatus), errors.ECODE_ENVIRON)
2888 if self.op.drbd_helper:
2889 # checks given drbd helper on all nodes
2890 helpers = self.rpc.call_drbd_helper(node_list)
2891 for node in node_list:
2892 ninfo = self.cfg.GetNodeInfo(node)
2894 self.LogInfo("Not checking drbd helper on offline node %s", node)
2896 msg = helpers[node].fail_msg
2898 raise errors.OpPrereqError("Error checking drbd helper on node"
2899 " '%s': %s" % (node, msg),
2900 errors.ECODE_ENVIRON)
2901 node_helper = helpers[node].payload
2902 if node_helper != self.op.drbd_helper:
2903 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2904 (node, node_helper), errors.ECODE_ENVIRON)
2906 self.cluster = cluster = self.cfg.GetClusterInfo()
2907 # validate params changes
2908 if self.op.beparams:
2909 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2910 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2912 if self.op.ndparams:
2913 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2914 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2916 # TODO: we need a more general way to handle resetting
2917 # cluster-level parameters to default values
2918 if self.new_ndparams["oob_program"] == "":
2919 self.new_ndparams["oob_program"] = \
2920 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2922 if self.op.nicparams:
2923 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2924 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2925 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2928 # check all instances for consistency
2929 for instance in self.cfg.GetAllInstancesInfo().values():
2930 for nic_idx, nic in enumerate(instance.nics):
2931 params_copy = copy.deepcopy(nic.nicparams)
2932 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2934 # check parameter syntax
2936 objects.NIC.CheckParameterSyntax(params_filled)
2937 except errors.ConfigurationError, err:
2938 nic_errors.append("Instance %s, nic/%d: %s" %
2939 (instance.name, nic_idx, err))
2941 # if we're moving instances to routed, check that they have an ip
2942 target_mode = params_filled[constants.NIC_MODE]
2943 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2944 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2945 (instance.name, nic_idx))
2947 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2948 "\n".join(nic_errors))
2950 # hypervisor list/parameters
2951 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2952 if self.op.hvparams:
2953 for hv_name, hv_dict in self.op.hvparams.items():
2954 if hv_name not in self.new_hvparams:
2955 self.new_hvparams[hv_name] = hv_dict
2957 self.new_hvparams[hv_name].update(hv_dict)
2959 # os hypervisor parameters
2960 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2962 for os_name, hvs in self.op.os_hvp.items():
2963 if os_name not in self.new_os_hvp:
2964 self.new_os_hvp[os_name] = hvs
2966 for hv_name, hv_dict in hvs.items():
2967 if hv_name not in self.new_os_hvp[os_name]:
2968 self.new_os_hvp[os_name][hv_name] = hv_dict
2970 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2973 self.new_osp = objects.FillDict(cluster.osparams, {})
2974 if self.op.osparams:
2975 for os_name, osp in self.op.osparams.items():
2976 if os_name not in self.new_osp:
2977 self.new_osp[os_name] = {}
2979 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2982 if not self.new_osp[os_name]:
2983 # we removed all parameters
2984 del self.new_osp[os_name]
2986 # check the parameter validity (remote check)
2987 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2988 os_name, self.new_osp[os_name])
2990 # changes to the hypervisor list
2991 if self.op.enabled_hypervisors is not None:
2992 self.hv_list = self.op.enabled_hypervisors
2993 for hv in self.hv_list:
2994 # if the hypervisor doesn't already exist in the cluster
2995 # hvparams, we initialize it to empty, and then (in both
2996 # cases) we make sure to fill the defaults, as we might not
2997 # have a complete defaults list if the hypervisor wasn't
2999 if hv not in new_hvp:
3001 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3002 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3004 self.hv_list = cluster.enabled_hypervisors
3006 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3007 # either the enabled list has changed, or the parameters have, validate
3008 for hv_name, hv_params in self.new_hvparams.items():
3009 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3010 (self.op.enabled_hypervisors and
3011 hv_name in self.op.enabled_hypervisors)):
3012 # either this is a new hypervisor, or its parameters have changed
3013 hv_class = hypervisor.GetHypervisor(hv_name)
3014 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3015 hv_class.CheckParameterSyntax(hv_params)
3016 _CheckHVParams(self, node_list, hv_name, hv_params)
3019 # no need to check any newly-enabled hypervisors, since the
3020 # defaults have already been checked in the above code-block
3021 for os_name, os_hvp in self.new_os_hvp.items():
3022 for hv_name, hv_params in os_hvp.items():
3023 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3024 # we need to fill in the new os_hvp on top of the actual hv_p
3025 cluster_defaults = self.new_hvparams.get(hv_name, {})
3026 new_osp = objects.FillDict(cluster_defaults, hv_params)
3027 hv_class = hypervisor.GetHypervisor(hv_name)
3028 hv_class.CheckParameterSyntax(new_osp)
3029 _CheckHVParams(self, node_list, hv_name, new_osp)
3031 if self.op.default_iallocator:
3032 alloc_script = utils.FindFile(self.op.default_iallocator,
3033 constants.IALLOCATOR_SEARCH_PATH,
3035 if alloc_script is None:
3036 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3037 " specified" % self.op.default_iallocator,
3040 def Exec(self, feedback_fn):
3041 """Change the parameters of the cluster.
3044 if self.op.vg_name is not None:
3045 new_volume = self.op.vg_name
3048 if new_volume != self.cfg.GetVGName():
3049 self.cfg.SetVGName(new_volume)
3051 feedback_fn("Cluster LVM configuration already in desired"
3052 " state, not changing")
3053 if self.op.drbd_helper is not None:
3054 new_helper = self.op.drbd_helper
3057 if new_helper != self.cfg.GetDRBDHelper():
3058 self.cfg.SetDRBDHelper(new_helper)
3060 feedback_fn("Cluster DRBD helper already in desired state,"
3062 if self.op.hvparams:
3063 self.cluster.hvparams = self.new_hvparams
3065 self.cluster.os_hvp = self.new_os_hvp
3066 if self.op.enabled_hypervisors is not None:
3067 self.cluster.hvparams = self.new_hvparams
3068 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3069 if self.op.beparams:
3070 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3071 if self.op.nicparams:
3072 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3073 if self.op.osparams:
3074 self.cluster.osparams = self.new_osp
3075 if self.op.ndparams:
3076 self.cluster.ndparams = self.new_ndparams
3078 if self.op.candidate_pool_size is not None:
3079 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3080 # we need to update the pool size here, otherwise the save will fail
3081 _AdjustCandidatePool(self, [])
3083 if self.op.maintain_node_health is not None:
3084 self.cluster.maintain_node_health = self.op.maintain_node_health
3086 if self.op.prealloc_wipe_disks is not None:
3087 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3089 if self.op.add_uids is not None:
3090 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3092 if self.op.remove_uids is not None:
3093 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3095 if self.op.uid_pool is not None:
3096 self.cluster.uid_pool = self.op.uid_pool
3098 if self.op.default_iallocator is not None:
3099 self.cluster.default_iallocator = self.op.default_iallocator
3101 if self.op.reserved_lvs is not None:
3102 self.cluster.reserved_lvs = self.op.reserved_lvs
3104 def helper_os(aname, mods, desc):
3106 lst = getattr(self.cluster, aname)
3107 for key, val in mods:
3108 if key == constants.DDM_ADD:
3110 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3113 elif key == constants.DDM_REMOVE:
3117 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3119 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3121 if self.op.hidden_os:
3122 helper_os("hidden_os", self.op.hidden_os, "hidden")
3124 if self.op.blacklisted_os:
3125 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3127 if self.op.master_netdev:
3128 master = self.cfg.GetMasterNode()
3129 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3130 self.cluster.master_netdev)
3131 result = self.rpc.call_node_stop_master(master, False)
3132 result.Raise("Could not disable the master ip")
3133 feedback_fn("Changing master_netdev from %s to %s" %
3134 (self.cluster.master_netdev, self.op.master_netdev))
3135 self.cluster.master_netdev = self.op.master_netdev
3137 self.cfg.Update(self.cluster, feedback_fn)
3139 if self.op.master_netdev:
3140 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3141 self.op.master_netdev)
3142 result = self.rpc.call_node_start_master(master, False, False)
3144 self.LogWarning("Could not re-enable the master ip on"
3145 " the master, please restart manually: %s",
3149 def _UploadHelper(lu, nodes, fname):
3150 """Helper for uploading a file and showing warnings.
3153 if os.path.exists(fname):
3154 result = lu.rpc.call_upload_file(nodes, fname)
3155 for to_node, to_result in result.items():
3156 msg = to_result.fail_msg
3158 msg = ("Copy of file %s to node %s failed: %s" %
3159 (fname, to_node, msg))
3160 lu.proc.LogWarning(msg)
3163 def _ComputeAncillaryFiles(cluster, redist):
3164 """Compute files external to Ganeti which need to be consistent.
3166 @type redist: boolean
3167 @param redist: Whether to include files which need to be redistributed
3170 # Compute files for all nodes
3172 constants.SSH_KNOWN_HOSTS_FILE,
3173 constants.CONFD_HMAC_KEY,
3174 constants.CLUSTER_DOMAIN_SECRET_FILE,
3178 files_all.update(constants.ALL_CERT_FILES)
3179 files_all.update(ssconf.SimpleStore().GetFileList())
3181 if cluster.modify_etc_hosts:
3182 files_all.add(constants.ETC_HOSTS)
3184 # Files which must either exist on all nodes or on none
3185 files_all_opt = set([
3186 constants.RAPI_USERS_FILE,
3189 # Files which should only be on master candidates
3192 files_mc.add(constants.CLUSTER_CONF_FILE)
3194 # Files which should only be on VM-capable nodes
3195 files_vm = set(filename
3196 for hv_name in cluster.enabled_hypervisors
3197 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3199 # Filenames must be unique
3200 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3201 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3202 "Found file listed in more than one file list"
3204 return (files_all, files_all_opt, files_mc, files_vm)
3207 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3208 """Distribute additional files which are part of the cluster configuration.
3210 ConfigWriter takes care of distributing the config and ssconf files, but
3211 there are more files which should be distributed to all nodes. This function
3212 makes sure those are copied.
3214 @param lu: calling logical unit
3215 @param additional_nodes: list of nodes not in the config to distribute to
3216 @type additional_vm: boolean
3217 @param additional_vm: whether the additional nodes are vm-capable or not
3220 # Gather target nodes
3221 cluster = lu.cfg.GetClusterInfo()
3222 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3224 online_nodes = lu.cfg.GetOnlineNodeList()
3225 vm_nodes = lu.cfg.GetVmCapableNodeList()
3227 if additional_nodes is not None:
3228 online_nodes.extend(additional_nodes)
3230 vm_nodes.extend(additional_nodes)
3232 # Never distribute to master node
3233 for nodelist in [online_nodes, vm_nodes]:
3234 if master_info.name in nodelist:
3235 nodelist.remove(master_info.name)
3238 (files_all, files_all_opt, files_mc, files_vm) = \
3239 _ComputeAncillaryFiles(cluster, True)
3241 # Never re-distribute configuration file from here
3242 assert not (constants.CLUSTER_CONF_FILE in files_all or
3243 constants.CLUSTER_CONF_FILE in files_vm)
3244 assert not files_mc, "Master candidates not handled in this function"
3247 (online_nodes, files_all),
3248 (online_nodes, files_all_opt),
3249 (vm_nodes, files_vm),
3253 for (node_list, files) in filemap:
3255 _UploadHelper(lu, node_list, fname)
3258 class LUClusterRedistConf(NoHooksLU):
3259 """Force the redistribution of cluster configuration.
3261 This is a very simple LU.
3266 def ExpandNames(self):
3267 self.needed_locks = {
3268 locking.LEVEL_NODE: locking.ALL_SET,
3270 self.share_locks[locking.LEVEL_NODE] = 1
3272 def Exec(self, feedback_fn):
3273 """Redistribute the configuration.
3276 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3277 _RedistributeAncillaryFiles(self)
3280 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3281 """Sleep and poll for an instance's disk to sync.
3284 if not instance.disks or disks is not None and not disks:
3287 disks = _ExpandCheckDisks(instance, disks)
3290 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3292 node = instance.primary_node
3295 lu.cfg.SetDiskID(dev, node)
3297 # TODO: Convert to utils.Retry
3300 degr_retries = 10 # in seconds, as we sleep 1 second each time
3304 cumul_degraded = False
3305 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3306 msg = rstats.fail_msg
3308 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3311 raise errors.RemoteError("Can't contact node %s for mirror data,"
3312 " aborting." % node)
3315 rstats = rstats.payload
3317 for i, mstat in enumerate(rstats):
3319 lu.LogWarning("Can't compute data for node %s/%s",
3320 node, disks[i].iv_name)
3323 cumul_degraded = (cumul_degraded or
3324 (mstat.is_degraded and mstat.sync_percent is None))
3325 if mstat.sync_percent is not None:
3327 if mstat.estimated_time is not None:
3328 rem_time = ("%s remaining (estimated)" %
3329 utils.FormatSeconds(mstat.estimated_time))
3330 max_time = mstat.estimated_time
3332 rem_time = "no time estimate"
3333 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3334 (disks[i].iv_name, mstat.sync_percent, rem_time))
3336 # if we're done but degraded, let's do a few small retries, to
3337 # make sure we see a stable and not transient situation; therefore
3338 # we force restart of the loop
3339 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3340 logging.info("Degraded disks found, %d retries left", degr_retries)
3348 time.sleep(min(60, max_time))
3351 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3352 return not cumul_degraded
3355 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3356 """Check that mirrors are not degraded.
3358 The ldisk parameter, if True, will change the test from the
3359 is_degraded attribute (which represents overall non-ok status for
3360 the device(s)) to the ldisk (representing the local storage status).
3363 lu.cfg.SetDiskID(dev, node)
3367 if on_primary or dev.AssembleOnSecondary():
3368 rstats = lu.rpc.call_blockdev_find(node, dev)
3369 msg = rstats.fail_msg
3371 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3373 elif not rstats.payload:
3374 lu.LogWarning("Can't find disk on node %s", node)
3378 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3380 result = result and not rstats.payload.is_degraded
3383 for child in dev.children:
3384 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3389 class LUOobCommand(NoHooksLU):
3390 """Logical unit for OOB handling.
3394 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3396 def CheckPrereq(self):
3397 """Check prerequisites.
3400 - the node exists in the configuration
3403 Any errors are signaled by raising errors.OpPrereqError.
3407 self.master_node = self.cfg.GetMasterNode()
3409 assert self.op.power_delay >= 0.0
3411 if self.op.node_names:
3412 if self.op.command in self._SKIP_MASTER:
3413 if self.master_node in self.op.node_names:
3414 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3415 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3417 if master_oob_handler:
3418 additional_text = ("Run '%s %s %s' if you want to operate on the"
3419 " master regardless") % (master_oob_handler,
3423 additional_text = "The master node does not support out-of-band"
3425 raise errors.OpPrereqError(("Operating on the master node %s is not"
3426 " allowed for %s\n%s") %
3427 (self.master_node, self.op.command,
3428 additional_text), errors.ECODE_INVAL)
3430 self.op.node_names = self.cfg.GetNodeList()
3431 if self.op.command in self._SKIP_MASTER:
3432 self.op.node_names.remove(self.master_node)
3434 if self.op.command in self._SKIP_MASTER:
3435 assert self.master_node not in self.op.node_names
3437 for node_name in self.op.node_names:
3438 node = self.cfg.GetNodeInfo(node_name)
3441 raise errors.OpPrereqError("Node %s not found" % node_name,
3444 self.nodes.append(node)
3446 if (not self.op.ignore_status and
3447 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3448 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3449 " not marked offline") % node_name,
3452 def ExpandNames(self):
3453 """Gather locks we need.
3456 if self.op.node_names:
3457 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3458 for name in self.op.node_names]
3459 lock_names = self.op.node_names
3461 lock_names = locking.ALL_SET
3463 self.needed_locks = {
3464 locking.LEVEL_NODE: lock_names,
3467 def Exec(self, feedback_fn):
3468 """Execute OOB and return result if we expect any.
3471 master_node = self.master_node
3474 for idx, node in enumerate(self.nodes):
3475 node_entry = [(constants.RS_NORMAL, node.name)]
3476 ret.append(node_entry)
3478 oob_program = _SupportsOob(self.cfg, node)
3481 node_entry.append((constants.RS_UNAVAIL, None))
3484 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3485 self.op.command, oob_program, node.name)
3486 result = self.rpc.call_run_oob(master_node, oob_program,
3487 self.op.command, node.name,
3491 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3492 node.name, result.fail_msg)
3493 node_entry.append((constants.RS_NODATA, None))
3496 self._CheckPayload(result)
3497 except errors.OpExecError, err:
3498 self.LogWarning("The payload returned by '%s' is not valid: %s",
3500 node_entry.append((constants.RS_NODATA, None))
3502 if self.op.command == constants.OOB_HEALTH:
3503 # For health we should log important events
3504 for item, status in result.payload:
3505 if status in [constants.OOB_STATUS_WARNING,
3506 constants.OOB_STATUS_CRITICAL]:
3507 self.LogWarning("On node '%s' item '%s' has status '%s'",
3508 node.name, item, status)
3510 if self.op.command == constants.OOB_POWER_ON:
3512 elif self.op.command == constants.OOB_POWER_OFF:
3513 node.powered = False
3514 elif self.op.command == constants.OOB_POWER_STATUS:
3515 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3516 if powered != node.powered:
3517 logging.warning(("Recorded power state (%s) of node '%s' does not"
3518 " match actual power state (%s)"), node.powered,
3521 # For configuration changing commands we should update the node
3522 if self.op.command in (constants.OOB_POWER_ON,
3523 constants.OOB_POWER_OFF):
3524 self.cfg.Update(node, feedback_fn)
3526 node_entry.append((constants.RS_NORMAL, result.payload))
3528 if (self.op.command == constants.OOB_POWER_ON and
3529 idx < len(self.nodes) - 1):
3530 time.sleep(self.op.power_delay)
3534 def _CheckPayload(self, result):
3535 """Checks if the payload is valid.
3537 @param result: RPC result
3538 @raises errors.OpExecError: If payload is not valid
3542 if self.op.command == constants.OOB_HEALTH:
3543 if not isinstance(result.payload, list):
3544 errs.append("command 'health' is expected to return a list but got %s" %
3545 type(result.payload))
3547 for item, status in result.payload:
3548 if status not in constants.OOB_STATUSES:
3549 errs.append("health item '%s' has invalid status '%s'" %
3552 if self.op.command == constants.OOB_POWER_STATUS:
3553 if not isinstance(result.payload, dict):
3554 errs.append("power-status is expected to return a dict but got %s" %
3555 type(result.payload))
3557 if self.op.command in [
3558 constants.OOB_POWER_ON,
3559 constants.OOB_POWER_OFF,
3560 constants.OOB_POWER_CYCLE,
3562 if result.payload is not None:
3563 errs.append("%s is expected to not return payload but got '%s'" %
3564 (self.op.command, result.payload))
3567 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3568 utils.CommaJoin(errs))
3570 class _OsQuery(_QueryBase):
3571 FIELDS = query.OS_FIELDS
3573 def ExpandNames(self, lu):
3574 # Lock all nodes in shared mode
3575 # Temporary removal of locks, should be reverted later
3576 # TODO: reintroduce locks when they are lighter-weight
3577 lu.needed_locks = {}
3578 #self.share_locks[locking.LEVEL_NODE] = 1
3579 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3581 # The following variables interact with _QueryBase._GetNames
3583 self.wanted = self.names
3585 self.wanted = locking.ALL_SET
3587 self.do_locking = self.use_locking
3589 def DeclareLocks(self, lu, level):
3593 def _DiagnoseByOS(rlist):
3594 """Remaps a per-node return list into an a per-os per-node dictionary
3596 @param rlist: a map with node names as keys and OS objects as values
3599 @return: a dictionary with osnames as keys and as value another
3600 map, with nodes as keys and tuples of (path, status, diagnose,
3601 variants, parameters, api_versions) as values, eg::
3603 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3604 (/srv/..., False, "invalid api")],
3605 "node2": [(/srv/..., True, "", [], [])]}
3610 # we build here the list of nodes that didn't fail the RPC (at RPC
3611 # level), so that nodes with a non-responding node daemon don't
3612 # make all OSes invalid
3613 good_nodes = [node_name for node_name in rlist
3614 if not rlist[node_name].fail_msg]
3615 for node_name, nr in rlist.items():
3616 if nr.fail_msg or not nr.payload:
3618 for (name, path, status, diagnose, variants,
3619 params, api_versions) in nr.payload:
3620 if name not in all_os:
3621 # build a list of nodes for this os containing empty lists
3622 # for each node in node_list
3624 for nname in good_nodes:
3625 all_os[name][nname] = []
3626 # convert params from [name, help] to (name, help)
3627 params = [tuple(v) for v in params]
3628 all_os[name][node_name].append((path, status, diagnose,
3629 variants, params, api_versions))
3632 def _GetQueryData(self, lu):
3633 """Computes the list of nodes and their attributes.
3636 # Locking is not used
3637 assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3639 valid_nodes = [node.name
3640 for node in lu.cfg.GetAllNodesInfo().values()
3641 if not node.offline and node.vm_capable]
3642 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3643 cluster = lu.cfg.GetClusterInfo()
3647 for (os_name, os_data) in pol.items():
3648 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3649 hidden=(os_name in cluster.hidden_os),
3650 blacklisted=(os_name in cluster.blacklisted_os))
3654 api_versions = set()
3656 for idx, osl in enumerate(os_data.values()):
3657 info.valid = bool(info.valid and osl and osl[0][1])
3661 (node_variants, node_params, node_api) = osl[0][3:6]
3664 variants.update(node_variants)
3665 parameters.update(node_params)
3666 api_versions.update(node_api)
3668 # Filter out inconsistent values
3669 variants.intersection_update(node_variants)
3670 parameters.intersection_update(node_params)
3671 api_versions.intersection_update(node_api)
3673 info.variants = list(variants)
3674 info.parameters = list(parameters)
3675 info.api_versions = list(api_versions)
3677 data[os_name] = info
3679 # Prepare data in requested order
3680 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3684 class LUOsDiagnose(NoHooksLU):
3685 """Logical unit for OS diagnose/query.
3691 def _BuildFilter(fields, names):
3692 """Builds a filter for querying OSes.
3695 name_filter = qlang.MakeSimpleFilter("name", names)
3697 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3698 # respective field is not requested
3699 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3700 for fname in ["hidden", "blacklisted"]
3701 if fname not in fields]
3702 if "valid" not in fields:
3703 status_filter.append([qlang.OP_TRUE, "valid"])
3706 status_filter.insert(0, qlang.OP_AND)
3708 status_filter = None
3710 if name_filter and status_filter:
3711 return [qlang.OP_AND, name_filter, status_filter]
3715 return status_filter
3717 def CheckArguments(self):
3718 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3719 self.op.output_fields, False)
3721 def ExpandNames(self):
3722 self.oq.ExpandNames(self)
3724 def Exec(self, feedback_fn):
3725 return self.oq.OldStyleQuery(self)
3728 class LUNodeRemove(LogicalUnit):
3729 """Logical unit for removing a node.
3732 HPATH = "node-remove"
3733 HTYPE = constants.HTYPE_NODE
3735 def BuildHooksEnv(self):
3738 This doesn't run on the target node in the pre phase as a failed
3739 node would then be impossible to remove.
3743 "OP_TARGET": self.op.node_name,
3744 "NODE_NAME": self.op.node_name,
3747 def BuildHooksNodes(self):
3748 """Build hooks nodes.
3751 all_nodes = self.cfg.GetNodeList()
3753 all_nodes.remove(self.op.node_name)
3755 logging.warning("Node '%s', which is about to be removed, was not found"
3756 " in the list of all nodes", self.op.node_name)
3757 return (all_nodes, all_nodes)
3759 def CheckPrereq(self):
3760 """Check prerequisites.
3763 - the node exists in the configuration
3764 - it does not have primary or secondary instances
3765 - it's not the master
3767 Any errors are signaled by raising errors.OpPrereqError.
3770 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3771 node = self.cfg.GetNodeInfo(self.op.node_name)
3772 assert node is not None
3774 instance_list = self.cfg.GetInstanceList()
3776 masternode = self.cfg.GetMasterNode()
3777 if node.name == masternode:
3778 raise errors.OpPrereqError("Node is the master node,"
3779 " you need to failover first.",
3782 for instance_name in instance_list:
3783 instance = self.cfg.GetInstanceInfo(instance_name)
3784 if node.name in instance.all_nodes:
3785 raise errors.OpPrereqError("Instance %s is still running on the node,"
3786 " please remove first." % instance_name,
3788 self.op.node_name = node.name
3791 def Exec(self, feedback_fn):
3792 """Removes the node from the cluster.
3796 logging.info("Stopping the node daemon and removing configs from node %s",
3799 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3801 # Promote nodes to master candidate as needed
3802 _AdjustCandidatePool(self, exceptions=[node.name])
3803 self.context.RemoveNode(node.name)
3805 # Run post hooks on the node before it's removed
3806 _RunPostHook(self, node.name)
3808 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3809 msg = result.fail_msg
3811 self.LogWarning("Errors encountered on the remote node while leaving"
3812 " the cluster: %s", msg)
3814 # Remove node from our /etc/hosts
3815 if self.cfg.GetClusterInfo().modify_etc_hosts:
3816 master_node = self.cfg.GetMasterNode()
3817 result = self.rpc.call_etc_hosts_modify(master_node,
3818 constants.ETC_HOSTS_REMOVE,
3820 result.Raise("Can't update hosts file with new host data")
3821 _RedistributeAncillaryFiles(self)
3824 class _NodeQuery(_QueryBase):
3825 FIELDS = query.NODE_FIELDS
3827 def ExpandNames(self, lu):
3828 lu.needed_locks = {}
3829 lu.share_locks[locking.LEVEL_NODE] = 1
3832 self.wanted = _GetWantedNodes(lu, self.names)
3834 self.wanted = locking.ALL_SET
3836 self.do_locking = (self.use_locking and
3837 query.NQ_LIVE in self.requested_data)
3840 # if we don't request only static fields, we need to lock the nodes
3841 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3843 def DeclareLocks(self, lu, level):
3846 def _GetQueryData(self, lu):
3847 """Computes the list of nodes and their attributes.
3850 all_info = lu.cfg.GetAllNodesInfo()
3852 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3854 # Gather data as requested
3855 if query.NQ_LIVE in self.requested_data:
3856 # filter out non-vm_capable nodes
3857 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3859 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3860 lu.cfg.GetHypervisorType())
3861 live_data = dict((name, nresult.payload)
3862 for (name, nresult) in node_data.items()
3863 if not nresult.fail_msg and nresult.payload)
3867 if query.NQ_INST in self.requested_data:
3868 node_to_primary = dict([(name, set()) for name in nodenames])
3869 node_to_secondary = dict([(name, set()) for name in nodenames])
3871 inst_data = lu.cfg.GetAllInstancesInfo()
3873 for inst in inst_data.values():
3874 if inst.primary_node in node_to_primary:
3875 node_to_primary[inst.primary_node].add(inst.name)
3876 for secnode in inst.secondary_nodes:
3877 if secnode in node_to_secondary:
3878 node_to_secondary[secnode].add(inst.name)
3880 node_to_primary = None
3881 node_to_secondary = None
3883 if query.NQ_OOB in self.requested_data:
3884 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3885 for name, node in all_info.iteritems())
3889 if query.NQ_GROUP in self.requested_data:
3890 groups = lu.cfg.GetAllNodeGroupsInfo()
3894 return query.NodeQueryData([all_info[name] for name in nodenames],
3895 live_data, lu.cfg.GetMasterNode(),
3896 node_to_primary, node_to_secondary, groups,
3897 oob_support, lu.cfg.GetClusterInfo())
3900 class LUNodeQuery(NoHooksLU):
3901 """Logical unit for querying nodes.
3904 # pylint: disable-msg=W0142
3907 def CheckArguments(self):
3908 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3909 self.op.output_fields, self.op.use_locking)
3911 def ExpandNames(self):
3912 self.nq.ExpandNames(self)
3914 def Exec(self, feedback_fn):
3915 return self.nq.OldStyleQuery(self)
3918 class LUNodeQueryvols(NoHooksLU):
3919 """Logical unit for getting volumes on node(s).
3923 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3924 _FIELDS_STATIC = utils.FieldSet("node")
3926 def CheckArguments(self):
3927 _CheckOutputFields(static=self._FIELDS_STATIC,
3928 dynamic=self._FIELDS_DYNAMIC,
3929 selected=self.op.output_fields)
3931 def ExpandNames(self):
3932 self.needed_locks = {}
3933 self.share_locks[locking.LEVEL_NODE] = 1
3934 if not self.op.nodes:
3935 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3937 self.needed_locks[locking.LEVEL_NODE] = \
3938 _GetWantedNodes(self, self.op.nodes)
3940 def Exec(self, feedback_fn):
3941 """Computes the list of nodes and their attributes.
3944 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3945 volumes = self.rpc.call_node_volumes(nodenames)
3947 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3948 in self.cfg.GetInstanceList()]
3950 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3953 for node in nodenames:
3954 nresult = volumes[node]
3957 msg = nresult.fail_msg
3959 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3962 node_vols = nresult.payload[:]
3963 node_vols.sort(key=lambda vol: vol['dev'])
3965 for vol in node_vols:
3967 for field in self.op.output_fields:
3970 elif field == "phys":
3974 elif field == "name":
3976 elif field == "size":
3977 val = int(float(vol['size']))
3978 elif field == "instance":
3980 if node not in lv_by_node[inst]:
3982 if vol['name'] in lv_by_node[inst][node]:
3988 raise errors.ParameterError(field)
3989 node_output.append(str(val))
3991 output.append(node_output)
3996 class LUNodeQueryStorage(NoHooksLU):
3997 """Logical unit for getting information on storage units on node(s).
4000 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4003 def CheckArguments(self):
4004 _CheckOutputFields(static=self._FIELDS_STATIC,
4005 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4006 selected=self.op.output_fields)
4008 def ExpandNames(self):
4009 self.needed_locks = {}
4010 self.share_locks[locking.LEVEL_NODE] = 1
4013 self.needed_locks[locking.LEVEL_NODE] = \
4014 _GetWantedNodes(self, self.op.nodes)
4016 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4018 def Exec(self, feedback_fn):
4019 """Computes the list of nodes and their attributes.
4022 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4024 # Always get name to sort by
4025 if constants.SF_NAME in self.op.output_fields:
4026 fields = self.op.output_fields[:]
4028 fields = [constants.SF_NAME] + self.op.output_fields
4030 # Never ask for node or type as it's only known to the LU
4031 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4032 while extra in fields:
4033 fields.remove(extra)
4035 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4036 name_idx = field_idx[constants.SF_NAME]
4038 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4039 data = self.rpc.call_storage_list(self.nodes,
4040 self.op.storage_type, st_args,
4041 self.op.name, fields)
4045 for node in utils.NiceSort(self.nodes):
4046 nresult = data[node]
4050 msg = nresult.fail_msg
4052 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4055 rows = dict([(row[name_idx], row) for row in nresult.payload])
4057 for name in utils.NiceSort(rows.keys()):
4062 for field in self.op.output_fields:
4063 if field == constants.SF_NODE:
4065 elif field == constants.SF_TYPE:
4066 val = self.op.storage_type
4067 elif field in field_idx:
4068 val = row[field_idx[field]]
4070 raise errors.ParameterError(field)
4079 class _InstanceQuery(_QueryBase):
4080 FIELDS = query.INSTANCE_FIELDS
4082 def ExpandNames(self, lu):
4083 lu.needed_locks = {}
4084 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4085 lu.share_locks[locking.LEVEL_NODE] = 1
4088 self.wanted = _GetWantedInstances(lu, self.names)
4090 self.wanted = locking.ALL_SET
4092 self.do_locking = (self.use_locking and
4093 query.IQ_LIVE in self.requested_data)
4095 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4096 lu.needed_locks[locking.LEVEL_NODE] = []
4097 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4099 def DeclareLocks(self, lu, level):
4100 if level == locking.LEVEL_NODE and self.do_locking:
4101 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4103 def _GetQueryData(self, lu):
4104 """Computes the list of instances and their attributes.
4107 cluster = lu.cfg.GetClusterInfo()
4108 all_info = lu.cfg.GetAllInstancesInfo()
4110 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4112 instance_list = [all_info[name] for name in instance_names]
4113 nodes = frozenset(itertools.chain(*(inst.all_nodes
4114 for inst in instance_list)))
4115 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4118 wrongnode_inst = set()
4120 # Gather data as requested
4121 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4123 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4125 result = node_data[name]
4127 # offline nodes will be in both lists
4128 assert result.fail_msg
4129 offline_nodes.append(name)
4131 bad_nodes.append(name)
4132 elif result.payload:
4133 for inst in result.payload:
4134 if inst in all_info:
4135 if all_info[inst].primary_node == name:
4136 live_data.update(result.payload)
4138 wrongnode_inst.add(inst)
4140 # orphan instance; we don't list it here as we don't
4141 # handle this case yet in the output of instance listing
4142 logging.warning("Orphan instance '%s' found on node %s",
4144 # else no instance is alive
4148 if query.IQ_DISKUSAGE in self.requested_data:
4149 disk_usage = dict((inst.name,
4150 _ComputeDiskSize(inst.disk_template,
4151 [{constants.IDISK_SIZE: disk.size}
4152 for disk in inst.disks]))
4153 for inst in instance_list)
4157 if query.IQ_CONSOLE in self.requested_data:
4159 for inst in instance_list:
4160 if inst.name in live_data:
4161 # Instance is running
4162 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4164 consinfo[inst.name] = None
4165 assert set(consinfo.keys()) == set(instance_names)
4169 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4170 disk_usage, offline_nodes, bad_nodes,
4171 live_data, wrongnode_inst, consinfo)
4174 class LUQuery(NoHooksLU):
4175 """Query for resources/items of a certain kind.
4178 # pylint: disable-msg=W0142
4181 def CheckArguments(self):
4182 qcls = _GetQueryImplementation(self.op.what)
4184 self.impl = qcls(self.op.filter, self.op.fields, False)
4186 def ExpandNames(self):
4187 self.impl.ExpandNames(self)
4189 def DeclareLocks(self, level):
4190 self.impl.DeclareLocks(self, level)
4192 def Exec(self, feedback_fn):
4193 return self.impl.NewStyleQuery(self)
4196 class LUQueryFields(NoHooksLU):
4197 """Query for resources/items of a certain kind.
4200 # pylint: disable-msg=W0142
4203 def CheckArguments(self):
4204 self.qcls = _GetQueryImplementation(self.op.what)
4206 def ExpandNames(self):
4207 self.needed_locks = {}
4209 def Exec(self, feedback_fn):
4210 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4213 class LUNodeModifyStorage(NoHooksLU):
4214 """Logical unit for modifying a storage volume on a node.
4219 def CheckArguments(self):
4220 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4222 storage_type = self.op.storage_type
4225 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4227 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4228 " modified" % storage_type,
4231 diff = set(self.op.changes.keys()) - modifiable
4233 raise errors.OpPrereqError("The following fields can not be modified for"
4234 " storage units of type '%s': %r" %
4235 (storage_type, list(diff)),
4238 def ExpandNames(self):
4239 self.needed_locks = {
4240 locking.LEVEL_NODE: self.op.node_name,
4243 def Exec(self, feedback_fn):
4244 """Computes the list of nodes and their attributes.
4247 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4248 result = self.rpc.call_storage_modify(self.op.node_name,
4249 self.op.storage_type, st_args,
4250 self.op.name, self.op.changes)
4251 result.Raise("Failed to modify storage unit '%s' on %s" %
4252 (self.op.name, self.op.node_name))
4255 class LUNodeAdd(LogicalUnit):
4256 """Logical unit for adding node to the cluster.
4260 HTYPE = constants.HTYPE_NODE
4261 _NFLAGS = ["master_capable", "vm_capable"]
4263 def CheckArguments(self):
4264 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4265 # validate/normalize the node name
4266 self.hostname = netutils.GetHostname(name=self.op.node_name,
4267 family=self.primary_ip_family)
4268 self.op.node_name = self.hostname.name
4269 if self.op.readd and self.op.group:
4270 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4271 " being readded", errors.ECODE_INVAL)
4273 def BuildHooksEnv(self):
4276 This will run on all nodes before, and on all nodes + the new node after.
4280 "OP_TARGET": self.op.node_name,
4281 "NODE_NAME": self.op.node_name,
4282 "NODE_PIP": self.op.primary_ip,
4283 "NODE_SIP": self.op.secondary_ip,
4284 "MASTER_CAPABLE": str(self.op.master_capable),
4285 "VM_CAPABLE": str(self.op.vm_capable),
4288 def BuildHooksNodes(self):
4289 """Build hooks nodes.
4292 # Exclude added node
4293 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4294 post_nodes = pre_nodes + [self.op.node_name, ]
4296 return (pre_nodes, post_nodes)
4298 def CheckPrereq(self):
4299 """Check prerequisites.
4302 - the new node is not already in the config
4304 - its parameters (single/dual homed) matches the cluster
4306 Any errors are signaled by raising errors.OpPrereqError.
4310 hostname = self.hostname
4311 node = hostname.name
4312 primary_ip = self.op.primary_ip = hostname.ip
4313 if self.op.secondary_ip is None:
4314 if self.primary_ip_family == netutils.IP6Address.family:
4315 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4316 " IPv4 address must be given as secondary",
4318 self.op.secondary_ip = primary_ip
4320 secondary_ip = self.op.secondary_ip
4321 if not netutils.IP4Address.IsValid(secondary_ip):
4322 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4323 " address" % secondary_ip, errors.ECODE_INVAL)
4325 node_list = cfg.GetNodeList()
4326 if not self.op.readd and node in node_list:
4327 raise errors.OpPrereqError("Node %s is already in the configuration" %
4328 node, errors.ECODE_EXISTS)
4329 elif self.op.readd and node not in node_list:
4330 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4333 self.changed_primary_ip = False
4335 for existing_node_name in node_list:
4336 existing_node = cfg.GetNodeInfo(existing_node_name)
4338 if self.op.readd and node == existing_node_name:
4339 if existing_node.secondary_ip != secondary_ip:
4340 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4341 " address configuration as before",
4343 if existing_node.primary_ip != primary_ip:
4344 self.changed_primary_ip = True
4348 if (existing_node.primary_ip == primary_ip or
4349 existing_node.secondary_ip == primary_ip or
4350 existing_node.primary_ip == secondary_ip or
4351 existing_node.secondary_ip == secondary_ip):
4352 raise errors.OpPrereqError("New node ip address(es) conflict with"
4353 " existing node %s" % existing_node.name,
4354 errors.ECODE_NOTUNIQUE)
4356 # After this 'if' block, None is no longer a valid value for the
4357 # _capable op attributes
4359 old_node = self.cfg.GetNodeInfo(node)
4360 assert old_node is not None, "Can't retrieve locked node %s" % node
4361 for attr in self._NFLAGS:
4362 if getattr(self.op, attr) is None:
4363 setattr(self.op, attr, getattr(old_node, attr))
4365 for attr in self._NFLAGS:
4366 if getattr(self.op, attr) is None:
4367 setattr(self.op, attr, True)
4369 if self.op.readd and not self.op.vm_capable:
4370 pri, sec = cfg.GetNodeInstances(node)
4372 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4373 " flag set to false, but it already holds"
4374 " instances" % node,
4377 # check that the type of the node (single versus dual homed) is the
4378 # same as for the master
4379 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4380 master_singlehomed = myself.secondary_ip == myself.primary_ip
4381 newbie_singlehomed = secondary_ip == primary_ip
4382 if master_singlehomed != newbie_singlehomed:
4383 if master_singlehomed:
4384 raise errors.OpPrereqError("The master has no secondary ip but the"
4385 " new node has one",
4388 raise errors.OpPrereqError("The master has a secondary ip but the"
4389 " new node doesn't have one",
4392 # checks reachability
4393 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4394 raise errors.OpPrereqError("Node not reachable by ping",
4395 errors.ECODE_ENVIRON)
4397 if not newbie_singlehomed:
4398 # check reachability from my secondary ip to newbie's secondary ip
4399 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4400 source=myself.secondary_ip):
4401 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4402 " based ping to node daemon port",
4403 errors.ECODE_ENVIRON)
4410 if self.op.master_capable:
4411 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4413 self.master_candidate = False
4416 self.new_node = old_node
4418 node_group = cfg.LookupNodeGroup(self.op.group)
4419 self.new_node = objects.Node(name=node,
4420 primary_ip=primary_ip,
4421 secondary_ip=secondary_ip,
4422 master_candidate=self.master_candidate,
4423 offline=False, drained=False,
4426 if self.op.ndparams:
4427 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4429 def Exec(self, feedback_fn):
4430 """Adds the new node to the cluster.
4433 new_node = self.new_node
4434 node = new_node.name
4436 # We adding a new node so we assume it's powered
4437 new_node.powered = True
4439 # for re-adds, reset the offline/drained/master-candidate flags;
4440 # we need to reset here, otherwise offline would prevent RPC calls
4441 # later in the procedure; this also means that if the re-add
4442 # fails, we are left with a non-offlined, broken node
4444 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4445 self.LogInfo("Readding a node, the offline/drained flags were reset")
4446 # if we demote the node, we do cleanup later in the procedure
4447 new_node.master_candidate = self.master_candidate
4448 if self.changed_primary_ip:
4449 new_node.primary_ip = self.op.primary_ip
4451 # copy the master/vm_capable flags
4452 for attr in self._NFLAGS:
4453 setattr(new_node, attr, getattr(self.op, attr))
4455 # notify the user about any possible mc promotion
4456 if new_node.master_candidate:
4457 self.LogInfo("Node will be a master candidate")
4459 if self.op.ndparams:
4460 new_node.ndparams = self.op.ndparams
4462 new_node.ndparams = {}
4464 # check connectivity
4465 result = self.rpc.call_version([node])[node]
4466 result.Raise("Can't get version information from node %s" % node)
4467 if constants.PROTOCOL_VERSION == result.payload:
4468 logging.info("Communication to node %s fine, sw version %s match",
4469 node, result.payload)
4471 raise errors.OpExecError("Version mismatch master version %s,"
4472 " node version %s" %
4473 (constants.PROTOCOL_VERSION, result.payload))
4475 # Add node to our /etc/hosts, and add key to known_hosts
4476 if self.cfg.GetClusterInfo().modify_etc_hosts:
4477 master_node = self.cfg.GetMasterNode()
4478 result = self.rpc.call_etc_hosts_modify(master_node,
4479 constants.ETC_HOSTS_ADD,
4482 result.Raise("Can't update hosts file with new host data")
4484 if new_node.secondary_ip != new_node.primary_ip:
4485 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4488 node_verify_list = [self.cfg.GetMasterNode()]
4489 node_verify_param = {
4490 constants.NV_NODELIST: [node],
4491 # TODO: do a node-net-test as well?
4494 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4495 self.cfg.GetClusterName())
4496 for verifier in node_verify_list:
4497 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4498 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4500 for failed in nl_payload:
4501 feedback_fn("ssh/hostname verification failed"
4502 " (checking from %s): %s" %
4503 (verifier, nl_payload[failed]))
4504 raise errors.OpExecError("ssh/hostname verification failed.")
4507 _RedistributeAncillaryFiles(self)
4508 self.context.ReaddNode(new_node)
4509 # make sure we redistribute the config
4510 self.cfg.Update(new_node, feedback_fn)
4511 # and make sure the new node will not have old files around
4512 if not new_node.master_candidate:
4513 result = self.rpc.call_node_demote_from_mc(new_node.name)
4514 msg = result.fail_msg
4516 self.LogWarning("Node failed to demote itself from master"
4517 " candidate status: %s" % msg)
4519 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4520 additional_vm=self.op.vm_capable)
4521 self.context.AddNode(new_node, self.proc.GetECId())
4524 class LUNodeSetParams(LogicalUnit):
4525 """Modifies the parameters of a node.
4527 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4528 to the node role (as _ROLE_*)
4529 @cvar _R2F: a dictionary from node role to tuples of flags
4530 @cvar _FLAGS: a list of attribute names corresponding to the flags
4533 HPATH = "node-modify"
4534 HTYPE = constants.HTYPE_NODE
4536 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4538 (True, False, False): _ROLE_CANDIDATE,
4539 (False, True, False): _ROLE_DRAINED,
4540 (False, False, True): _ROLE_OFFLINE,
4541 (False, False, False): _ROLE_REGULAR,
4543 _R2F = dict((v, k) for k, v in _F2R.items())
4544 _FLAGS = ["master_candidate", "drained", "offline"]
4546 def CheckArguments(self):
4547 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4548 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4549 self.op.master_capable, self.op.vm_capable,
4550 self.op.secondary_ip, self.op.ndparams]
4551 if all_mods.count(None) == len(all_mods):
4552 raise errors.OpPrereqError("Please pass at least one modification",
4554 if all_mods.count(True) > 1:
4555 raise errors.OpPrereqError("Can't set the node into more than one"
4556 " state at the same time",
4559 # Boolean value that tells us whether we might be demoting from MC
4560 self.might_demote = (self.op.master_candidate == False or
4561 self.op.offline == True or
4562 self.op.drained == True or
4563 self.op.master_capable == False)
4565 if self.op.secondary_ip:
4566 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4567 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4568 " address" % self.op.secondary_ip,
4571 self.lock_all = self.op.auto_promote and self.might_demote
4572 self.lock_instances = self.op.secondary_ip is not None
4574 def ExpandNames(self):
4576 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4578 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4580 if self.lock_instances:
4581 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4583 def DeclareLocks(self, level):
4584 # If we have locked all instances, before waiting to lock nodes, release
4585 # all the ones living on nodes unrelated to the current operation.
4586 if level == locking.LEVEL_NODE and self.lock_instances:
4587 instances_release = []
4589 self.affected_instances = []
4590 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4591 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4592 instance = self.context.cfg.GetInstanceInfo(instance_name)
4593 i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4594 if i_mirrored and self.op.node_name in instance.all_nodes:
4595 instances_keep.append(instance_name)
4596 self.affected_instances.append(instance)
4598 instances_release.append(instance_name)
4599 if instances_release:
4600 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4601 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4603 def BuildHooksEnv(self):
4606 This runs on the master node.
4610 "OP_TARGET": self.op.node_name,
4611 "MASTER_CANDIDATE": str(self.op.master_candidate),
4612 "OFFLINE": str(self.op.offline),
4613 "DRAINED": str(self.op.drained),
4614 "MASTER_CAPABLE": str(self.op.master_capable),
4615 "VM_CAPABLE": str(self.op.vm_capable),
4618 def BuildHooksNodes(self):
4619 """Build hooks nodes.
4622 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4625 def CheckPrereq(self):
4626 """Check prerequisites.
4628 This only checks the instance list against the existing names.
4631 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4633 if (self.op.master_candidate is not None or
4634 self.op.drained is not None or
4635 self.op.offline is not None):
4636 # we can't change the master's node flags
4637 if self.op.node_name == self.cfg.GetMasterNode():
4638 raise errors.OpPrereqError("The master role can be changed"
4639 " only via master-failover",
4642 if self.op.master_candidate and not node.master_capable:
4643 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4644 " it a master candidate" % node.name,
4647 if self.op.vm_capable == False:
4648 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4650 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4651 " the vm_capable flag" % node.name,
4654 if node.master_candidate and self.might_demote and not self.lock_all:
4655 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4656 # check if after removing the current node, we're missing master
4658 (mc_remaining, mc_should, _) = \
4659 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4660 if mc_remaining < mc_should:
4661 raise errors.OpPrereqError("Not enough master candidates, please"
4662 " pass auto promote option to allow"
4663 " promotion", errors.ECODE_STATE)
4665 self.old_flags = old_flags = (node.master_candidate,
4666 node.drained, node.offline)
4667 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4668 self.old_role = old_role = self._F2R[old_flags]
4670 # Check for ineffective changes
4671 for attr in self._FLAGS:
4672 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4673 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4674 setattr(self.op, attr, None)
4676 # Past this point, any flag change to False means a transition
4677 # away from the respective state, as only real changes are kept
4679 # TODO: We might query the real power state if it supports OOB
4680 if _SupportsOob(self.cfg, node):
4681 if self.op.offline is False and not (node.powered or
4682 self.op.powered == True):
4683 raise errors.OpPrereqError(("Please power on node %s first before you"
4684 " can reset offline state") %
4686 elif self.op.powered is not None:
4687 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4688 " which does not support out-of-band"
4689 " handling") % self.op.node_name)
4691 # If we're being deofflined/drained, we'll MC ourself if needed
4692 if (self.op.drained == False or self.op.offline == False or
4693 (self.op.master_capable and not node.master_capable)):
4694 if _DecideSelfPromotion(self):
4695 self.op.master_candidate = True
4696 self.LogInfo("Auto-promoting node to master candidate")
4698 # If we're no longer master capable, we'll demote ourselves from MC
4699 if self.op.master_capable == False and node.master_candidate:
4700 self.LogInfo("Demoting from master candidate")
4701 self.op.master_candidate = False
4704 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4705 if self.op.master_candidate:
4706 new_role = self._ROLE_CANDIDATE
4707 elif self.op.drained:
4708 new_role = self._ROLE_DRAINED
4709 elif self.op.offline:
4710 new_role = self._ROLE_OFFLINE
4711 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4712 # False is still in new flags, which means we're un-setting (the
4714 new_role = self._ROLE_REGULAR
4715 else: # no new flags, nothing, keep old role
4718 self.new_role = new_role
4720 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4721 # Trying to transition out of offline status
4722 result = self.rpc.call_version([node.name])[node.name]
4724 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4725 " to report its version: %s" %
4726 (node.name, result.fail_msg),
4729 self.LogWarning("Transitioning node from offline to online state"
4730 " without using re-add. Please make sure the node"
4733 if self.op.secondary_ip:
4734 # Ok even without locking, because this can't be changed by any LU
4735 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4736 master_singlehomed = master.secondary_ip == master.primary_ip
4737 if master_singlehomed and self.op.secondary_ip:
4738 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4739 " homed cluster", errors.ECODE_INVAL)
4742 if self.affected_instances:
4743 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4744 " node has instances (%s) configured"
4745 " to use it" % self.affected_instances)
4747 # On online nodes, check that no instances are running, and that
4748 # the node has the new ip and we can reach it.
4749 for instance in self.affected_instances:
4750 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4752 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4753 if master.name != node.name:
4754 # check reachability from master secondary ip to new secondary ip
4755 if not netutils.TcpPing(self.op.secondary_ip,
4756 constants.DEFAULT_NODED_PORT,
4757 source=master.secondary_ip):
4758 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4759 " based ping to node daemon port",
4760 errors.ECODE_ENVIRON)
4762 if self.op.ndparams:
4763 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4764 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4765 self.new_ndparams = new_ndparams
4767 def Exec(self, feedback_fn):
4772 old_role = self.old_role
4773 new_role = self.new_role
4777 if self.op.ndparams:
4778 node.ndparams = self.new_ndparams
4780 if self.op.powered is not None:
4781 node.powered = self.op.powered
4783 for attr in ["master_capable", "vm_capable"]:
4784 val = getattr(self.op, attr)
4786 setattr(node, attr, val)
4787 result.append((attr, str(val)))
4789 if new_role != old_role:
4790 # Tell the node to demote itself, if no longer MC and not offline
4791 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4792 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4794 self.LogWarning("Node failed to demote itself: %s", msg)
4796 new_flags = self._R2F[new_role]
4797 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4799 result.append((desc, str(nf)))
4800 (node.master_candidate, node.drained, node.offline) = new_flags
4802 # we locked all nodes, we adjust the CP before updating this node
4804 _AdjustCandidatePool(self, [node.name])
4806 if self.op.secondary_ip:
4807 node.secondary_ip = self.op.secondary_ip
4808 result.append(("secondary_ip", self.op.secondary_ip))
4810 # this will trigger configuration file update, if needed
4811 self.cfg.Update(node, feedback_fn)
4813 # this will trigger job queue propagation or cleanup if the mc
4815 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4816 self.context.ReaddNode(node)
4821 class LUNodePowercycle(NoHooksLU):
4822 """Powercycles a node.
4827 def CheckArguments(self):
4828 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4829 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4830 raise errors.OpPrereqError("The node is the master and the force"
4831 " parameter was not set",
4834 def ExpandNames(self):
4835 """Locking for PowercycleNode.
4837 This is a last-resort option and shouldn't block on other
4838 jobs. Therefore, we grab no locks.
4841 self.needed_locks = {}
4843 def Exec(self, feedback_fn):
4847 result = self.rpc.call_node_powercycle(self.op.node_name,
4848 self.cfg.GetHypervisorType())
4849 result.Raise("Failed to schedule the reboot")
4850 return result.payload
4853 class LUClusterQuery(NoHooksLU):
4854 """Query cluster configuration.
4859 def ExpandNames(self):
4860 self.needed_locks = {}
4862 def Exec(self, feedback_fn):
4863 """Return cluster config.
4866 cluster = self.cfg.GetClusterInfo()
4869 # Filter just for enabled hypervisors
4870 for os_name, hv_dict in cluster.os_hvp.items():
4871 os_hvp[os_name] = {}
4872 for hv_name, hv_params in hv_dict.items():
4873 if hv_name in cluster.enabled_hypervisors:
4874 os_hvp[os_name][hv_name] = hv_params
4876 # Convert ip_family to ip_version
4877 primary_ip_version = constants.IP4_VERSION
4878 if cluster.primary_ip_family == netutils.IP6Address.family:
4879 primary_ip_version = constants.IP6_VERSION
4882 "software_version": constants.RELEASE_VERSION,
4883 "protocol_version": constants.PROTOCOL_VERSION,
4884 "config_version": constants.CONFIG_VERSION,
4885 "os_api_version": max(constants.OS_API_VERSIONS),
4886 "export_version": constants.EXPORT_VERSION,
4887 "architecture": (platform.architecture()[0], platform.machine()),
4888 "name": cluster.cluster_name,
4889 "master": cluster.master_node,
4890 "default_hypervisor": cluster.enabled_hypervisors[0],
4891 "enabled_hypervisors": cluster.enabled_hypervisors,
4892 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4893 for hypervisor_name in cluster.enabled_hypervisors]),
4895 "beparams": cluster.beparams,
4896 "osparams": cluster.osparams,
4897 "nicparams": cluster.nicparams,
4898 "ndparams": cluster.ndparams,
4899 "candidate_pool_size": cluster.candidate_pool_size,
4900 "master_netdev": cluster.master_netdev,
4901 "volume_group_name": cluster.volume_group_name,
4902 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4903 "file_storage_dir": cluster.file_storage_dir,
4904 "shared_file_storage_dir": cluster.shared_file_storage_dir,
4905 "maintain_node_health": cluster.maintain_node_health,
4906 "ctime": cluster.ctime,
4907 "mtime": cluster.mtime,
4908 "uuid": cluster.uuid,
4909 "tags": list(cluster.GetTags()),
4910 "uid_pool": cluster.uid_pool,
4911 "default_iallocator": cluster.default_iallocator,
4912 "reserved_lvs": cluster.reserved_lvs,
4913 "primary_ip_version": primary_ip_version,
4914 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4915 "hidden_os": cluster.hidden_os,
4916 "blacklisted_os": cluster.blacklisted_os,
4922 class LUClusterConfigQuery(NoHooksLU):
4923 """Return configuration values.
4927 _FIELDS_DYNAMIC = utils.FieldSet()
4928 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4929 "watcher_pause", "volume_group_name")
4931 def CheckArguments(self):
4932 _CheckOutputFields(static=self._FIELDS_STATIC,
4933 dynamic=self._FIELDS_DYNAMIC,
4934 selected=self.op.output_fields)
4936 def ExpandNames(self):
4937 self.needed_locks = {}
4939 def Exec(self, feedback_fn):
4940 """Dump a representation of the cluster config to the standard output.
4944 for field in self.op.output_fields:
4945 if field == "cluster_name":
4946 entry = self.cfg.GetClusterName()
4947 elif field == "master_node":
4948 entry = self.cfg.GetMasterNode()
4949 elif field == "drain_flag":
4950 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4951 elif field == "watcher_pause":
4952 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4953 elif field == "volume_group_name":
4954 entry = self.cfg.GetVGName()
4956 raise errors.ParameterError(field)
4957 values.append(entry)
4961 class LUInstanceActivateDisks(NoHooksLU):
4962 """Bring up an instance's disks.
4967 def ExpandNames(self):
4968 self._ExpandAndLockInstance()
4969 self.needed_locks[locking.LEVEL_NODE] = []
4970 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4972 def DeclareLocks(self, level):
4973 if level == locking.LEVEL_NODE:
4974 self._LockInstancesNodes()
4976 def CheckPrereq(self):
4977 """Check prerequisites.
4979 This checks that the instance is in the cluster.
4982 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4983 assert self.instance is not None, \
4984 "Cannot retrieve locked instance %s" % self.op.instance_name
4985 _CheckNodeOnline(self, self.instance.primary_node)
4987 def Exec(self, feedback_fn):
4988 """Activate the disks.
4991 disks_ok, disks_info = \
4992 _AssembleInstanceDisks(self, self.instance,
4993 ignore_size=self.op.ignore_size)
4995 raise errors.OpExecError("Cannot activate block devices")
5000 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5002 """Prepare the block devices for an instance.
5004 This sets up the block devices on all nodes.
5006 @type lu: L{LogicalUnit}
5007 @param lu: the logical unit on whose behalf we execute
5008 @type instance: L{objects.Instance}
5009 @param instance: the instance for whose disks we assemble
5010 @type disks: list of L{objects.Disk} or None
5011 @param disks: which disks to assemble (or all, if None)
5012 @type ignore_secondaries: boolean
5013 @param ignore_secondaries: if true, errors on secondary nodes
5014 won't result in an error return from the function
5015 @type ignore_size: boolean
5016 @param ignore_size: if true, the current known size of the disk
5017 will not be used during the disk activation, useful for cases
5018 when the size is wrong
5019 @return: False if the operation failed, otherwise a list of
5020 (host, instance_visible_name, node_visible_name)
5021 with the mapping from node devices to instance devices
5026 iname = instance.name
5027 disks = _ExpandCheckDisks(instance, disks)
5029 # With the two passes mechanism we try to reduce the window of
5030 # opportunity for the race condition of switching DRBD to primary
5031 # before handshaking occured, but we do not eliminate it
5033 # The proper fix would be to wait (with some limits) until the
5034 # connection has been made and drbd transitions from WFConnection
5035 # into any other network-connected state (Connected, SyncTarget,
5038 # 1st pass, assemble on all nodes in secondary mode
5039 for idx, inst_disk in enumerate(disks):
5040 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5042 node_disk = node_disk.Copy()
5043 node_disk.UnsetSize()
5044 lu.cfg.SetDiskID(node_disk, node)
5045 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5046 msg = result.fail_msg
5048 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5049 " (is_primary=False, pass=1): %s",
5050 inst_disk.iv_name, node, msg)
5051 if not ignore_secondaries:
5054 # FIXME: race condition on drbd migration to primary
5056 # 2nd pass, do only the primary node
5057 for idx, inst_disk in enumerate(disks):
5060 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5061 if node != instance.primary_node:
5064 node_disk = node_disk.Copy()
5065 node_disk.UnsetSize()
5066 lu.cfg.SetDiskID(node_disk, node)
5067 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5068 msg = result.fail_msg
5070 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5071 " (is_primary=True, pass=2): %s",
5072 inst_disk.iv_name, node, msg)
5075 dev_path = result.payload
5077 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5079 # leave the disks configured for the primary node
5080 # this is a workaround that would be fixed better by
5081 # improving the logical/physical id handling
5083 lu.cfg.SetDiskID(disk, instance.primary_node)
5085 return disks_ok, device_info
5088 def _StartInstanceDisks(lu, instance, force):
5089 """Start the disks of an instance.
5092 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5093 ignore_secondaries=force)
5095 _ShutdownInstanceDisks(lu, instance)
5096 if force is not None and not force:
5097 lu.proc.LogWarning("", hint="If the message above refers to a"
5099 " you can retry the operation using '--force'.")
5100 raise errors.OpExecError("Disk consistency error")
5103 class LUInstanceDeactivateDisks(NoHooksLU):
5104 """Shutdown an instance's disks.
5109 def ExpandNames(self):
5110 self._ExpandAndLockInstance()
5111 self.needed_locks[locking.LEVEL_NODE] = []
5112 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5114 def DeclareLocks(self, level):
5115 if level == locking.LEVEL_NODE:
5116 self._LockInstancesNodes()
5118 def CheckPrereq(self):
5119 """Check prerequisites.
5121 This checks that the instance is in the cluster.
5124 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5125 assert self.instance is not None, \
5126 "Cannot retrieve locked instance %s" % self.op.instance_name
5128 def Exec(self, feedback_fn):
5129 """Deactivate the disks
5132 instance = self.instance
5134 _ShutdownInstanceDisks(self, instance)
5136 _SafeShutdownInstanceDisks(self, instance)
5139 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5140 """Shutdown block devices of an instance.
5142 This function checks if an instance is running, before calling
5143 _ShutdownInstanceDisks.
5146 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5147 _ShutdownInstanceDisks(lu, instance, disks=disks)
5150 def _ExpandCheckDisks(instance, disks):
5151 """Return the instance disks selected by the disks list
5153 @type disks: list of L{objects.Disk} or None
5154 @param disks: selected disks
5155 @rtype: list of L{objects.Disk}
5156 @return: selected instance disks to act on
5160 return instance.disks
5162 if not set(disks).issubset(instance.disks):
5163 raise errors.ProgrammerError("Can only act on disks belonging to the"
5168 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5169 """Shutdown block devices of an instance.
5171 This does the shutdown on all nodes of the instance.
5173 If the ignore_primary is false, errors on the primary node are
5178 disks = _ExpandCheckDisks(instance, disks)
5181 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5182 lu.cfg.SetDiskID(top_disk, node)
5183 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5184 msg = result.fail_msg
5186 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5187 disk.iv_name, node, msg)
5188 if ((node == instance.primary_node and not ignore_primary) or
5189 (node != instance.primary_node and not result.offline)):
5194 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5195 """Checks if a node has enough free memory.
5197 This function check if a given node has the needed amount of free
5198 memory. In case the node has less memory or we cannot get the
5199 information from the node, this function raise an OpPrereqError
5202 @type lu: C{LogicalUnit}
5203 @param lu: a logical unit from which we get configuration data
5205 @param node: the node to check
5206 @type reason: C{str}
5207 @param reason: string to use in the error message
5208 @type requested: C{int}
5209 @param requested: the amount of memory in MiB to check for
5210 @type hypervisor_name: C{str}
5211 @param hypervisor_name: the hypervisor to ask for memory stats
5212 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5213 we cannot check the node
5216 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5217 nodeinfo[node].Raise("Can't get data from node %s" % node,
5218 prereq=True, ecode=errors.ECODE_ENVIRON)
5219 free_mem = nodeinfo[node].payload.get('memory_free', None)
5220 if not isinstance(free_mem, int):
5221 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5222 " was '%s'" % (node, free_mem),
5223 errors.ECODE_ENVIRON)
5224 if requested > free_mem:
5225 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5226 " needed %s MiB, available %s MiB" %
5227 (node, reason, requested, free_mem),
5231 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5232 """Checks if nodes have enough free disk space in the all VGs.
5234 This function check if all given nodes have the needed amount of
5235 free disk. In case any node has less disk or we cannot get the
5236 information from the node, this function raise an OpPrereqError
5239 @type lu: C{LogicalUnit}
5240 @param lu: a logical unit from which we get configuration data
5241 @type nodenames: C{list}
5242 @param nodenames: the list of node names to check
5243 @type req_sizes: C{dict}
5244 @param req_sizes: the hash of vg and corresponding amount of disk in
5246 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5247 or we cannot check the node
5250 for vg, req_size in req_sizes.items():
5251 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5254 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5255 """Checks if nodes have enough free disk space in the specified VG.
5257 This function check if all given nodes have the needed amount of
5258 free disk. In case any node has less disk or we cannot get the
5259 information from the node, this function raise an OpPrereqError
5262 @type lu: C{LogicalUnit}
5263 @param lu: a logical unit from which we get configuration data
5264 @type nodenames: C{list}
5265 @param nodenames: the list of node names to check
5267 @param vg: the volume group to check
5268 @type requested: C{int}
5269 @param requested: the amount of disk in MiB to check for
5270 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5271 or we cannot check the node
5274 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5275 for node in nodenames:
5276 info = nodeinfo[node]
5277 info.Raise("Cannot get current information from node %s" % node,
5278 prereq=True, ecode=errors.ECODE_ENVIRON)
5279 vg_free = info.payload.get("vg_free", None)
5280 if not isinstance(vg_free, int):
5281 raise errors.OpPrereqError("Can't compute free disk space on node"
5282 " %s for vg %s, result was '%s'" %
5283 (node, vg, vg_free), errors.ECODE_ENVIRON)
5284 if requested > vg_free:
5285 raise errors.OpPrereqError("Not enough disk space on target node %s"
5286 " vg %s: required %d MiB, available %d MiB" %
5287 (node, vg, requested, vg_free),
5291 class LUInstanceStartup(LogicalUnit):
5292 """Starts an instance.
5295 HPATH = "instance-start"
5296 HTYPE = constants.HTYPE_INSTANCE
5299 def CheckArguments(self):
5301 if self.op.beparams:
5302 # fill the beparams dict
5303 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5305 def ExpandNames(self):
5306 self._ExpandAndLockInstance()
5308 def BuildHooksEnv(self):
5311 This runs on master, primary and secondary nodes of the instance.
5315 "FORCE": self.op.force,
5318 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5322 def BuildHooksNodes(self):
5323 """Build hooks nodes.
5326 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5329 def CheckPrereq(self):
5330 """Check prerequisites.
5332 This checks that the instance is in the cluster.
5335 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5336 assert self.instance is not None, \
5337 "Cannot retrieve locked instance %s" % self.op.instance_name
5340 if self.op.hvparams:
5341 # check hypervisor parameter syntax (locally)
5342 cluster = self.cfg.GetClusterInfo()
5343 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5344 filled_hvp = cluster.FillHV(instance)
5345 filled_hvp.update(self.op.hvparams)
5346 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5347 hv_type.CheckParameterSyntax(filled_hvp)
5348 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5350 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5352 if self.primary_offline and self.op.ignore_offline_nodes:
5353 self.proc.LogWarning("Ignoring offline primary node")
5355 if self.op.hvparams or self.op.beparams:
5356 self.proc.LogWarning("Overridden parameters are ignored")
5358 _CheckNodeOnline(self, instance.primary_node)
5360 bep = self.cfg.GetClusterInfo().FillBE(instance)
5362 # check bridges existence
5363 _CheckInstanceBridgesExist(self, instance)
5365 remote_info = self.rpc.call_instance_info(instance.primary_node,
5367 instance.hypervisor)
5368 remote_info.Raise("Error checking node %s" % instance.primary_node,
5369 prereq=True, ecode=errors.ECODE_ENVIRON)
5370 if not remote_info.payload: # not running already
5371 _CheckNodeFreeMemory(self, instance.primary_node,
5372 "starting instance %s" % instance.name,
5373 bep[constants.BE_MEMORY], instance.hypervisor)
5375 def Exec(self, feedback_fn):
5376 """Start the instance.
5379 instance = self.instance
5380 force = self.op.force
5382 self.cfg.MarkInstanceUp(instance.name)
5384 if self.primary_offline:
5385 assert self.op.ignore_offline_nodes
5386 self.proc.LogInfo("Primary node offline, marked instance as started")
5388 node_current = instance.primary_node
5390 _StartInstanceDisks(self, instance, force)
5392 result = self.rpc.call_instance_start(node_current, instance,
5393 self.op.hvparams, self.op.beparams)
5394 msg = result.fail_msg
5396 _ShutdownInstanceDisks(self, instance)
5397 raise errors.OpExecError("Could not start instance: %s" % msg)
5400 class LUInstanceReboot(LogicalUnit):
5401 """Reboot an instance.
5404 HPATH = "instance-reboot"
5405 HTYPE = constants.HTYPE_INSTANCE
5408 def ExpandNames(self):
5409 self._ExpandAndLockInstance()
5411 def BuildHooksEnv(self):
5414 This runs on master, primary and secondary nodes of the instance.
5418 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5419 "REBOOT_TYPE": self.op.reboot_type,
5420 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5423 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5427 def BuildHooksNodes(self):
5428 """Build hooks nodes.
5431 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5434 def CheckPrereq(self):
5435 """Check prerequisites.
5437 This checks that the instance is in the cluster.
5440 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5441 assert self.instance is not None, \
5442 "Cannot retrieve locked instance %s" % self.op.instance_name
5444 _CheckNodeOnline(self, instance.primary_node)
5446 # check bridges existence
5447 _CheckInstanceBridgesExist(self, instance)
5449 def Exec(self, feedback_fn):
5450 """Reboot the instance.
5453 instance = self.instance
5454 ignore_secondaries = self.op.ignore_secondaries
5455 reboot_type = self.op.reboot_type
5457 remote_info = self.rpc.call_instance_info(instance.primary_node,
5459 instance.hypervisor)
5460 remote_info.Raise("Error checking node %s" % instance.primary_node)
5461 instance_running = bool(remote_info.payload)
5463 node_current = instance.primary_node
5465 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5466 constants.INSTANCE_REBOOT_HARD]:
5467 for disk in instance.disks:
5468 self.cfg.SetDiskID(disk, node_current)
5469 result = self.rpc.call_instance_reboot(node_current, instance,
5471 self.op.shutdown_timeout)
5472 result.Raise("Could not reboot instance")
5474 if instance_running:
5475 result = self.rpc.call_instance_shutdown(node_current, instance,
5476 self.op.shutdown_timeout)
5477 result.Raise("Could not shutdown instance for full reboot")
5478 _ShutdownInstanceDisks(self, instance)
5480 self.LogInfo("Instance %s was already stopped, starting now",
5482 _StartInstanceDisks(self, instance, ignore_secondaries)
5483 result = self.rpc.call_instance_start(node_current, instance, None, None)
5484 msg = result.fail_msg
5486 _ShutdownInstanceDisks(self, instance)
5487 raise errors.OpExecError("Could not start instance for"
5488 " full reboot: %s" % msg)
5490 self.cfg.MarkInstanceUp(instance.name)
5493 class LUInstanceShutdown(LogicalUnit):
5494 """Shutdown an instance.
5497 HPATH = "instance-stop"
5498 HTYPE = constants.HTYPE_INSTANCE
5501 def ExpandNames(self):
5502 self._ExpandAndLockInstance()
5504 def BuildHooksEnv(self):
5507 This runs on master, primary and secondary nodes of the instance.
5510 env = _BuildInstanceHookEnvByObject(self, self.instance)
5511 env["TIMEOUT"] = self.op.timeout
5514 def BuildHooksNodes(self):
5515 """Build hooks nodes.
5518 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5521 def CheckPrereq(self):
5522 """Check prerequisites.
5524 This checks that the instance is in the cluster.
5527 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5528 assert self.instance is not None, \
5529 "Cannot retrieve locked instance %s" % self.op.instance_name
5531 self.primary_offline = \
5532 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5534 if self.primary_offline and self.op.ignore_offline_nodes:
5535 self.proc.LogWarning("Ignoring offline primary node")
5537 _CheckNodeOnline(self, self.instance.primary_node)
5539 def Exec(self, feedback_fn):
5540 """Shutdown the instance.
5543 instance = self.instance
5544 node_current = instance.primary_node
5545 timeout = self.op.timeout
5547 self.cfg.MarkInstanceDown(instance.name)
5549 if self.primary_offline:
5550 assert self.op.ignore_offline_nodes
5551 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5553 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5554 msg = result.fail_msg
5556 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5558 _ShutdownInstanceDisks(self, instance)
5561 class LUInstanceReinstall(LogicalUnit):
5562 """Reinstall an instance.
5565 HPATH = "instance-reinstall"
5566 HTYPE = constants.HTYPE_INSTANCE
5569 def ExpandNames(self):
5570 self._ExpandAndLockInstance()
5572 def BuildHooksEnv(self):
5575 This runs on master, primary and secondary nodes of the instance.
5578 return _BuildInstanceHookEnvByObject(self, self.instance)
5580 def BuildHooksNodes(self):
5581 """Build hooks nodes.
5584 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5587 def CheckPrereq(self):
5588 """Check prerequisites.
5590 This checks that the instance is in the cluster and is not running.
5593 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5594 assert instance is not None, \
5595 "Cannot retrieve locked instance %s" % self.op.instance_name
5596 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5597 " offline, cannot reinstall")
5598 for node in instance.secondary_nodes:
5599 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5600 " cannot reinstall")
5602 if instance.disk_template == constants.DT_DISKLESS:
5603 raise errors.OpPrereqError("Instance '%s' has no disks" %
5604 self.op.instance_name,
5606 _CheckInstanceDown(self, instance, "cannot reinstall")
5608 if self.op.os_type is not None:
5610 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5611 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5612 instance_os = self.op.os_type
5614 instance_os = instance.os
5616 nodelist = list(instance.all_nodes)
5618 if self.op.osparams:
5619 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5620 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5621 self.os_inst = i_osdict # the new dict (without defaults)
5625 self.instance = instance
5627 def Exec(self, feedback_fn):
5628 """Reinstall the instance.
5631 inst = self.instance
5633 if self.op.os_type is not None:
5634 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5635 inst.os = self.op.os_type
5636 # Write to configuration
5637 self.cfg.Update(inst, feedback_fn)
5639 _StartInstanceDisks(self, inst, None)
5641 feedback_fn("Running the instance OS create scripts...")
5642 # FIXME: pass debug option from opcode to backend
5643 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5644 self.op.debug_level,
5645 osparams=self.os_inst)
5646 result.Raise("Could not install OS for instance %s on node %s" %
5647 (inst.name, inst.primary_node))
5649 _ShutdownInstanceDisks(self, inst)
5652 class LUInstanceRecreateDisks(LogicalUnit):
5653 """Recreate an instance's missing disks.
5656 HPATH = "instance-recreate-disks"
5657 HTYPE = constants.HTYPE_INSTANCE
5660 def ExpandNames(self):
5661 self._ExpandAndLockInstance()
5663 def BuildHooksEnv(self):
5666 This runs on master, primary and secondary nodes of the instance.
5669 return _BuildInstanceHookEnvByObject(self, self.instance)
5671 def BuildHooksNodes(self):
5672 """Build hooks nodes.
5675 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5678 def CheckPrereq(self):
5679 """Check prerequisites.
5681 This checks that the instance is in the cluster and is not running.
5684 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5685 assert instance is not None, \
5686 "Cannot retrieve locked instance %s" % self.op.instance_name
5687 _CheckNodeOnline(self, instance.primary_node)
5689 if instance.disk_template == constants.DT_DISKLESS:
5690 raise errors.OpPrereqError("Instance '%s' has no disks" %
5691 self.op.instance_name, errors.ECODE_INVAL)
5692 _CheckInstanceDown(self, instance, "cannot recreate disks")
5694 if not self.op.disks:
5695 self.op.disks = range(len(instance.disks))
5697 for idx in self.op.disks:
5698 if idx >= len(instance.disks):
5699 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5702 self.instance = instance
5704 def Exec(self, feedback_fn):
5705 """Recreate the disks.
5709 for idx, _ in enumerate(self.instance.disks):
5710 if idx not in self.op.disks: # disk idx has not been passed in
5714 _CreateDisks(self, self.instance, to_skip=to_skip)
5717 class LUInstanceRename(LogicalUnit):
5718 """Rename an instance.
5721 HPATH = "instance-rename"
5722 HTYPE = constants.HTYPE_INSTANCE
5724 def CheckArguments(self):
5728 if self.op.ip_check and not self.op.name_check:
5729 # TODO: make the ip check more flexible and not depend on the name check
5730 raise errors.OpPrereqError("Cannot do ip check without a name check",
5733 def BuildHooksEnv(self):
5736 This runs on master, primary and secondary nodes of the instance.
5739 env = _BuildInstanceHookEnvByObject(self, self.instance)
5740 env["INSTANCE_NEW_NAME"] = self.op.new_name
5743 def BuildHooksNodes(self):
5744 """Build hooks nodes.
5747 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5750 def CheckPrereq(self):
5751 """Check prerequisites.
5753 This checks that the instance is in the cluster and is not running.
5756 self.op.instance_name = _ExpandInstanceName(self.cfg,
5757 self.op.instance_name)
5758 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5759 assert instance is not None
5760 _CheckNodeOnline(self, instance.primary_node)
5761 _CheckInstanceDown(self, instance, "cannot rename")
5762 self.instance = instance
5764 new_name = self.op.new_name
5765 if self.op.name_check:
5766 hostname = netutils.GetHostname(name=new_name)
5767 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5769 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5770 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5771 " same as given hostname '%s'") %
5772 (hostname.name, self.op.new_name),
5774 new_name = self.op.new_name = hostname.name
5775 if (self.op.ip_check and
5776 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5777 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5778 (hostname.ip, new_name),
5779 errors.ECODE_NOTUNIQUE)
5781 instance_list = self.cfg.GetInstanceList()
5782 if new_name in instance_list and new_name != instance.name:
5783 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5784 new_name, errors.ECODE_EXISTS)
5786 def Exec(self, feedback_fn):
5787 """Rename the instance.
5790 inst = self.instance
5791 old_name = inst.name
5793 rename_file_storage = False
5794 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5795 self.op.new_name != inst.name):
5796 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5797 rename_file_storage = True
5799 self.cfg.RenameInstance(inst.name, self.op.new_name)
5800 # Change the instance lock. This is definitely safe while we hold the BGL
5801 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5802 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5804 # re-read the instance from the configuration after rename
5805 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5807 if rename_file_storage:
5808 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5809 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5810 old_file_storage_dir,
5811 new_file_storage_dir)
5812 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5813 " (but the instance has been renamed in Ganeti)" %
5814 (inst.primary_node, old_file_storage_dir,
5815 new_file_storage_dir))
5817 _StartInstanceDisks(self, inst, None)
5819 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5820 old_name, self.op.debug_level)
5821 msg = result.fail_msg
5823 msg = ("Could not run OS rename script for instance %s on node %s"
5824 " (but the instance has been renamed in Ganeti): %s" %
5825 (inst.name, inst.primary_node, msg))
5826 self.proc.LogWarning(msg)
5828 _ShutdownInstanceDisks(self, inst)
5833 class LUInstanceRemove(LogicalUnit):
5834 """Remove an instance.
5837 HPATH = "instance-remove"
5838 HTYPE = constants.HTYPE_INSTANCE
5841 def ExpandNames(self):
5842 self._ExpandAndLockInstance()
5843 self.needed_locks[locking.LEVEL_NODE] = []
5844 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5846 def DeclareLocks(self, level):
5847 if level == locking.LEVEL_NODE:
5848 self._LockInstancesNodes()
5850 def BuildHooksEnv(self):
5853 This runs on master, primary and secondary nodes of the instance.
5856 env = _BuildInstanceHookEnvByObject(self, self.instance)
5857 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5860 def BuildHooksNodes(self):
5861 """Build hooks nodes.
5864 nl = [self.cfg.GetMasterNode()]
5865 nl_post = list(self.instance.all_nodes) + nl
5866 return (nl, nl_post)
5868 def CheckPrereq(self):
5869 """Check prerequisites.
5871 This checks that the instance is in the cluster.
5874 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5875 assert self.instance is not None, \
5876 "Cannot retrieve locked instance %s" % self.op.instance_name
5878 def Exec(self, feedback_fn):
5879 """Remove the instance.
5882 instance = self.instance
5883 logging.info("Shutting down instance %s on node %s",
5884 instance.name, instance.primary_node)
5886 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5887 self.op.shutdown_timeout)
5888 msg = result.fail_msg
5890 if self.op.ignore_failures:
5891 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5893 raise errors.OpExecError("Could not shutdown instance %s on"
5895 (instance.name, instance.primary_node, msg))
5897 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5900 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5901 """Utility function to remove an instance.
5904 logging.info("Removing block devices for instance %s", instance.name)
5906 if not _RemoveDisks(lu, instance):
5907 if not ignore_failures:
5908 raise errors.OpExecError("Can't remove instance's disks")
5909 feedback_fn("Warning: can't remove instance's disks")
5911 logging.info("Removing instance %s out of cluster config", instance.name)
5913 lu.cfg.RemoveInstance(instance.name)
5915 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5916 "Instance lock removal conflict"
5918 # Remove lock for the instance
5919 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5922 class LUInstanceQuery(NoHooksLU):
5923 """Logical unit for querying instances.
5926 # pylint: disable-msg=W0142
5929 def CheckArguments(self):
5930 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5931 self.op.output_fields, self.op.use_locking)
5933 def ExpandNames(self):
5934 self.iq.ExpandNames(self)
5936 def DeclareLocks(self, level):
5937 self.iq.DeclareLocks(self, level)
5939 def Exec(self, feedback_fn):
5940 return self.iq.OldStyleQuery(self)
5943 class LUInstanceFailover(LogicalUnit):
5944 """Failover an instance.
5947 HPATH = "instance-failover"
5948 HTYPE = constants.HTYPE_INSTANCE
5951 def CheckArguments(self):
5952 """Check the arguments.
5955 self.iallocator = getattr(self.op, "iallocator", None)
5956 self.target_node = getattr(self.op, "target_node", None)
5958 def ExpandNames(self):
5959 self._ExpandAndLockInstance()
5961 if self.op.target_node is not None:
5962 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5964 self.needed_locks[locking.LEVEL_NODE] = []
5965 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5967 ignore_consistency = self.op.ignore_consistency
5968 shutdown_timeout = self.op.shutdown_timeout
5969 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5971 iallocator=self.op.iallocator,
5972 target_node=self.op.target_node,
5974 ignore_consistency=ignore_consistency,
5975 shutdown_timeout=shutdown_timeout)
5976 self.tasklets = [self._migrater]
5978 def DeclareLocks(self, level):
5979 if level == locking.LEVEL_NODE:
5980 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5981 if instance.disk_template in constants.DTS_EXT_MIRROR:
5982 if self.op.target_node is None:
5983 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5985 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5986 self.op.target_node]
5987 del self.recalculate_locks[locking.LEVEL_NODE]
5989 self._LockInstancesNodes()
5991 def BuildHooksEnv(self):
5994 This runs on master, primary and secondary nodes of the instance.
5997 instance = self._migrater.instance
5998 source_node = instance.primary_node
5999 target_node = self._migrater.target_node
6001 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6002 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6003 "OLD_PRIMARY": source_node,
6004 "NEW_PRIMARY": target_node,
6007 if instance.disk_template in constants.DTS_INT_MIRROR:
6008 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6009 env["NEW_SECONDARY"] = source_node
6011 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6013 env.update(_BuildInstanceHookEnvByObject(self, instance))
6017 def BuildHooksNodes(self):
6018 """Build hooks nodes.
6021 instance = self._migrater.instance
6022 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6023 return (nl, nl + [instance.primary_node])
6026 class LUInstanceMigrate(LogicalUnit):
6027 """Migrate an instance.
6029 This is migration without shutting down, compared to the failover,
6030 which is done with shutdown.
6033 HPATH = "instance-migrate"
6034 HTYPE = constants.HTYPE_INSTANCE
6037 def ExpandNames(self):
6038 self._ExpandAndLockInstance()
6040 if self.op.target_node is not None:
6041 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6043 self.needed_locks[locking.LEVEL_NODE] = []
6044 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6046 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6047 cleanup=self.op.cleanup,
6048 iallocator=self.op.iallocator,
6049 target_node=self.op.target_node,
6051 fallback=self.op.allow_failover)
6052 self.tasklets = [self._migrater]
6054 def DeclareLocks(self, level):
6055 if level == locking.LEVEL_NODE:
6056 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6057 if instance.disk_template in constants.DTS_EXT_MIRROR:
6058 if self.op.target_node is None:
6059 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6061 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6062 self.op.target_node]
6063 del self.recalculate_locks[locking.LEVEL_NODE]
6065 self._LockInstancesNodes()
6067 def BuildHooksEnv(self):
6070 This runs on master, primary and secondary nodes of the instance.
6073 instance = self._migrater.instance
6074 source_node = instance.primary_node
6075 target_node = self._migrater.target_node
6076 env = _BuildInstanceHookEnvByObject(self, instance)
6078 "MIGRATE_LIVE": self._migrater.live,
6079 "MIGRATE_CLEANUP": self.op.cleanup,
6080 "OLD_PRIMARY": source_node,
6081 "NEW_PRIMARY": target_node,
6084 if instance.disk_template in constants.DTS_INT_MIRROR:
6085 env["OLD_SECONDARY"] = target_node
6086 env["NEW_SECONDARY"] = source_node
6088 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6092 def BuildHooksNodes(self):
6093 """Build hooks nodes.
6096 instance = self._migrater.instance
6097 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6098 return (nl, nl + [instance.primary_node])
6101 class LUInstanceMove(LogicalUnit):
6102 """Move an instance by data-copying.
6105 HPATH = "instance-move"
6106 HTYPE = constants.HTYPE_INSTANCE
6109 def ExpandNames(self):
6110 self._ExpandAndLockInstance()
6111 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6112 self.op.target_node = target_node
6113 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6114 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6116 def DeclareLocks(self, level):
6117 if level == locking.LEVEL_NODE:
6118 self._LockInstancesNodes(primary_only=True)
6120 def BuildHooksEnv(self):
6123 This runs on master, primary and secondary nodes of the instance.
6127 "TARGET_NODE": self.op.target_node,
6128 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6130 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6133 def BuildHooksNodes(self):
6134 """Build hooks nodes.
6138 self.cfg.GetMasterNode(),
6139 self.instance.primary_node,
6140 self.op.target_node,
6144 def CheckPrereq(self):
6145 """Check prerequisites.
6147 This checks that the instance is in the cluster.
6150 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6151 assert self.instance is not None, \
6152 "Cannot retrieve locked instance %s" % self.op.instance_name
6154 node = self.cfg.GetNodeInfo(self.op.target_node)
6155 assert node is not None, \
6156 "Cannot retrieve locked node %s" % self.op.target_node
6158 self.target_node = target_node = node.name
6160 if target_node == instance.primary_node:
6161 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6162 (instance.name, target_node),
6165 bep = self.cfg.GetClusterInfo().FillBE(instance)
6167 for idx, dsk in enumerate(instance.disks):
6168 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6169 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6170 " cannot copy" % idx, errors.ECODE_STATE)
6172 _CheckNodeOnline(self, target_node)
6173 _CheckNodeNotDrained(self, target_node)
6174 _CheckNodeVmCapable(self, target_node)
6176 if instance.admin_up:
6177 # check memory requirements on the secondary node
6178 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6179 instance.name, bep[constants.BE_MEMORY],
6180 instance.hypervisor)
6182 self.LogInfo("Not checking memory on the secondary node as"
6183 " instance will not be started")
6185 # check bridge existance
6186 _CheckInstanceBridgesExist(self, instance, node=target_node)
6188 def Exec(self, feedback_fn):
6189 """Move an instance.
6191 The move is done by shutting it down on its present node, copying
6192 the data over (slow) and starting it on the new node.
6195 instance = self.instance
6197 source_node = instance.primary_node
6198 target_node = self.target_node
6200 self.LogInfo("Shutting down instance %s on source node %s",
6201 instance.name, source_node)
6203 result = self.rpc.call_instance_shutdown(source_node, instance,
6204 self.op.shutdown_timeout)
6205 msg = result.fail_msg
6207 if self.op.ignore_consistency:
6208 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6209 " Proceeding anyway. Please make sure node"
6210 " %s is down. Error details: %s",
6211 instance.name, source_node, source_node, msg)
6213 raise errors.OpExecError("Could not shutdown instance %s on"
6215 (instance.name, source_node, msg))
6217 # create the target disks
6219 _CreateDisks(self, instance, target_node=target_node)
6220 except errors.OpExecError:
6221 self.LogWarning("Device creation failed, reverting...")
6223 _RemoveDisks(self, instance, target_node=target_node)
6225 self.cfg.ReleaseDRBDMinors(instance.name)
6228 cluster_name = self.cfg.GetClusterInfo().cluster_name
6231 # activate, get path, copy the data over
6232 for idx, disk in enumerate(instance.disks):
6233 self.LogInfo("Copying data for disk %d", idx)
6234 result = self.rpc.call_blockdev_assemble(target_node, disk,
6235 instance.name, True, idx)
6237 self.LogWarning("Can't assemble newly created disk %d: %s",
6238 idx, result.fail_msg)
6239 errs.append(result.fail_msg)
6241 dev_path = result.payload
6242 result = self.rpc.call_blockdev_export(source_node, disk,
6243 target_node, dev_path,
6246 self.LogWarning("Can't copy data over for disk %d: %s",
6247 idx, result.fail_msg)
6248 errs.append(result.fail_msg)
6252 self.LogWarning("Some disks failed to copy, aborting")
6254 _RemoveDisks(self, instance, target_node=target_node)
6256 self.cfg.ReleaseDRBDMinors(instance.name)
6257 raise errors.OpExecError("Errors during disk copy: %s" %
6260 instance.primary_node = target_node
6261 self.cfg.Update(instance, feedback_fn)
6263 self.LogInfo("Removing the disks on the original node")
6264 _RemoveDisks(self, instance, target_node=source_node)
6266 # Only start the instance if it's marked as up
6267 if instance.admin_up:
6268 self.LogInfo("Starting instance %s on node %s",
6269 instance.name, target_node)
6271 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6272 ignore_secondaries=True)
6274 _ShutdownInstanceDisks(self, instance)
6275 raise errors.OpExecError("Can't activate the instance's disks")
6277 result = self.rpc.call_instance_start(target_node, instance, None, None)
6278 msg = result.fail_msg
6280 _ShutdownInstanceDisks(self, instance)
6281 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6282 (instance.name, target_node, msg))
6285 class LUNodeMigrate(LogicalUnit):
6286 """Migrate all instances from a node.
6289 HPATH = "node-migrate"
6290 HTYPE = constants.HTYPE_NODE
6293 def CheckArguments(self):
6294 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6296 def ExpandNames(self):
6297 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6299 self.needed_locks = {}
6301 # Create tasklets for migrating instances for all instances on this node
6305 self.lock_all_nodes = False
6307 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6308 logging.debug("Migrating instance %s", inst.name)
6309 names.append(inst.name)
6311 tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6312 iallocator=self.op.iallocator,
6315 if inst.disk_template in constants.DTS_EXT_MIRROR:
6316 # We need to lock all nodes, as the iallocator will choose the
6317 # destination nodes afterwards
6318 self.lock_all_nodes = True
6320 self.tasklets = tasklets
6322 # Declare node locks
6323 if self.lock_all_nodes:
6324 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6326 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6327 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6329 # Declare instance locks
6330 self.needed_locks[locking.LEVEL_INSTANCE] = names
6332 def DeclareLocks(self, level):
6333 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6334 self._LockInstancesNodes()
6336 def BuildHooksEnv(self):
6339 This runs on the master, the primary and all the secondaries.
6343 "NODE_NAME": self.op.node_name,
6346 def BuildHooksNodes(self):
6347 """Build hooks nodes.
6350 nl = [self.cfg.GetMasterNode()]
6354 class TLMigrateInstance(Tasklet):
6355 """Tasklet class for instance migration.
6358 @ivar live: whether the migration will be done live or non-live;
6359 this variable is initalized only after CheckPrereq has run
6360 @type cleanup: boolean
6361 @ivar cleanup: Wheater we cleanup from a failed migration
6362 @type iallocator: string
6363 @ivar iallocator: The iallocator used to determine target_node
6364 @type target_node: string
6365 @ivar target_node: If given, the target_node to reallocate the instance to
6366 @type failover: boolean
6367 @ivar failover: Whether operation results in failover or migration
6368 @type fallback: boolean
6369 @ivar fallback: Whether fallback to failover is allowed if migration not
6371 @type ignore_consistency: boolean
6372 @ivar ignore_consistency: Wheter we should ignore consistency between source
6374 @type shutdown_timeout: int
6375 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6378 def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6379 target_node=None, failover=False, fallback=False,
6380 ignore_consistency=False,
6381 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6382 """Initializes this class.
6385 Tasklet.__init__(self, lu)
6388 self.instance_name = instance_name
6389 self.cleanup = cleanup
6390 self.live = False # will be overridden later
6391 self.iallocator = iallocator
6392 self.target_node = target_node
6393 self.failover = failover
6394 self.fallback = fallback
6395 self.ignore_consistency = ignore_consistency
6396 self.shutdown_timeout = shutdown_timeout
6398 def CheckPrereq(self):
6399 """Check prerequisites.
6401 This checks that the instance is in the cluster.
6404 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6405 instance = self.cfg.GetInstanceInfo(instance_name)
6406 assert instance is not None
6407 self.instance = instance
6409 if (not self.cleanup and not instance.admin_up and not self.failover and
6411 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6413 self.failover = True
6415 if instance.disk_template not in constants.DTS_MIRRORED:
6420 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6421 " %s" % (instance.disk_template, text),
6424 if instance.disk_template in constants.DTS_EXT_MIRROR:
6425 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6428 self._RunAllocator()
6430 # self.target_node is already populated, either directly or by the
6432 target_node = self.target_node
6434 if len(self.lu.tasklets) == 1:
6435 # It is safe to remove locks only when we're the only tasklet in the LU
6436 nodes_keep = [instance.primary_node, self.target_node]
6437 nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6438 if node not in nodes_keep]
6439 self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6440 self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6443 secondary_nodes = instance.secondary_nodes
6444 if not secondary_nodes:
6445 raise errors.ConfigurationError("No secondary node but using"
6446 " %s disk template" %
6447 instance.disk_template)
6448 target_node = secondary_nodes[0]
6449 if self.iallocator or (self.target_node and
6450 self.target_node != target_node):
6452 text = "failed over"
6455 raise errors.OpPrereqError("Instances with disk template %s cannot"
6456 " be %s over to arbitrary nodes"
6457 " (neither an iallocator nor a target"
6458 " node can be passed)" %
6459 (text, instance.disk_template),
6462 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6464 # check memory requirements on the secondary node
6465 if not self.failover or instance.admin_up:
6466 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6467 instance.name, i_be[constants.BE_MEMORY],
6468 instance.hypervisor)
6470 self.lu.LogInfo("Not checking memory on the secondary node as"
6471 " instance will not be started")
6473 # check bridge existance
6474 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6476 if not self.cleanup:
6477 _CheckNodeNotDrained(self.lu, target_node)
6478 if not self.failover:
6479 result = self.rpc.call_instance_migratable(instance.primary_node,
6481 if result.fail_msg and self.fallback:
6482 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6484 self.failover = True
6486 result.Raise("Can't migrate, please use failover",
6487 prereq=True, ecode=errors.ECODE_STATE)
6489 assert not (self.failover and self.cleanup)
6491 def _RunAllocator(self):
6492 """Run the allocator based on input opcode.
6495 ial = IAllocator(self.cfg, self.rpc,
6496 mode=constants.IALLOCATOR_MODE_RELOC,
6497 name=self.instance_name,
6498 # TODO See why hail breaks with a single node below
6499 relocate_from=[self.instance.primary_node,
6500 self.instance.primary_node],
6503 ial.Run(self.iallocator)
6506 raise errors.OpPrereqError("Can't compute nodes using"
6507 " iallocator '%s': %s" %
6508 (self.iallocator, ial.info),
6510 if len(ial.result) != ial.required_nodes:
6511 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6512 " of nodes (%s), required %s" %
6513 (self.iallocator, len(ial.result),
6514 ial.required_nodes), errors.ECODE_FAULT)
6515 self.target_node = ial.result[0]
6516 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6517 self.instance_name, self.iallocator,
6518 utils.CommaJoin(ial.result))
6520 if not self.failover:
6521 if self.lu.op.live is not None and self.lu.op.mode is not None:
6522 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6523 " parameters are accepted",
6525 if self.lu.op.live is not None:
6527 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6529 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6530 # reset the 'live' parameter to None so that repeated
6531 # invocations of CheckPrereq do not raise an exception
6532 self.lu.op.live = None
6533 elif self.lu.op.mode is None:
6534 # read the default value from the hypervisor
6535 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6537 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6539 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6541 # Failover is never live
6544 def _WaitUntilSync(self):
6545 """Poll with custom rpc for disk sync.
6547 This uses our own step-based rpc call.
6550 self.feedback_fn("* wait until resync is done")
6554 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6556 self.instance.disks)
6558 for node, nres in result.items():
6559 nres.Raise("Cannot resync disks on node %s" % node)
6560 node_done, node_percent = nres.payload
6561 all_done = all_done and node_done
6562 if node_percent is not None:
6563 min_percent = min(min_percent, node_percent)
6565 if min_percent < 100:
6566 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6569 def _EnsureSecondary(self, node):
6570 """Demote a node to secondary.
6573 self.feedback_fn("* switching node %s to secondary mode" % node)
6575 for dev in self.instance.disks:
6576 self.cfg.SetDiskID(dev, node)
6578 result = self.rpc.call_blockdev_close(node, self.instance.name,
6579 self.instance.disks)
6580 result.Raise("Cannot change disk to secondary on node %s" % node)
6582 def _GoStandalone(self):
6583 """Disconnect from the network.
6586 self.feedback_fn("* changing into standalone mode")
6587 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6588 self.instance.disks)
6589 for node, nres in result.items():
6590 nres.Raise("Cannot disconnect disks node %s" % node)
6592 def _GoReconnect(self, multimaster):
6593 """Reconnect to the network.
6599 msg = "single-master"
6600 self.feedback_fn("* changing disks into %s mode" % msg)
6601 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6602 self.instance.disks,
6603 self.instance.name, multimaster)
6604 for node, nres in result.items():
6605 nres.Raise("Cannot change disks config on node %s" % node)
6607 def _ExecCleanup(self):
6608 """Try to cleanup after a failed migration.
6610 The cleanup is done by:
6611 - check that the instance is running only on one node
6612 (and update the config if needed)
6613 - change disks on its secondary node to secondary
6614 - wait until disks are fully synchronized
6615 - disconnect from the network
6616 - change disks into single-master mode
6617 - wait again until disks are fully synchronized
6620 instance = self.instance
6621 target_node = self.target_node
6622 source_node = self.source_node
6624 # check running on only one node
6625 self.feedback_fn("* checking where the instance actually runs"
6626 " (if this hangs, the hypervisor might be in"
6628 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6629 for node, result in ins_l.items():
6630 result.Raise("Can't contact node %s" % node)
6632 runningon_source = instance.name in ins_l[source_node].payload
6633 runningon_target = instance.name in ins_l[target_node].payload
6635 if runningon_source and runningon_target:
6636 raise errors.OpExecError("Instance seems to be running on two nodes,"
6637 " or the hypervisor is confused. You will have"
6638 " to ensure manually that it runs only on one"
6639 " and restart this operation.")
6641 if not (runningon_source or runningon_target):
6642 raise errors.OpExecError("Instance does not seem to be running at all."
6643 " In this case, it's safer to repair by"
6644 " running 'gnt-instance stop' to ensure disk"
6645 " shutdown, and then restarting it.")
6647 if runningon_target:
6648 # the migration has actually succeeded, we need to update the config
6649 self.feedback_fn("* instance running on secondary node (%s),"
6650 " updating config" % target_node)
6651 instance.primary_node = target_node
6652 self.cfg.Update(instance, self.feedback_fn)
6653 demoted_node = source_node
6655 self.feedback_fn("* instance confirmed to be running on its"
6656 " primary node (%s)" % source_node)
6657 demoted_node = target_node
6659 if instance.disk_template in constants.DTS_INT_MIRROR:
6660 self._EnsureSecondary(demoted_node)
6662 self._WaitUntilSync()
6663 except errors.OpExecError:
6664 # we ignore here errors, since if the device is standalone, it
6665 # won't be able to sync
6667 self._GoStandalone()
6668 self._GoReconnect(False)
6669 self._WaitUntilSync()
6671 self.feedback_fn("* done")
6673 def _RevertDiskStatus(self):
6674 """Try to revert the disk status after a failed migration.
6677 target_node = self.target_node
6678 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6682 self._EnsureSecondary(target_node)
6683 self._GoStandalone()
6684 self._GoReconnect(False)
6685 self._WaitUntilSync()
6686 except errors.OpExecError, err:
6687 self.lu.LogWarning("Migration failed and I can't reconnect the"
6688 " drives: error '%s'\n"
6689 "Please look and recover the instance status" %
6692 def _AbortMigration(self):
6693 """Call the hypervisor code to abort a started migration.
6696 instance = self.instance
6697 target_node = self.target_node
6698 migration_info = self.migration_info
6700 abort_result = self.rpc.call_finalize_migration(target_node,
6704 abort_msg = abort_result.fail_msg
6706 logging.error("Aborting migration failed on target node %s: %s",
6707 target_node, abort_msg)
6708 # Don't raise an exception here, as we stil have to try to revert the
6709 # disk status, even if this step failed.
6711 def _ExecMigration(self):
6712 """Migrate an instance.
6714 The migrate is done by:
6715 - change the disks into dual-master mode
6716 - wait until disks are fully synchronized again
6717 - migrate the instance
6718 - change disks on the new secondary node (the old primary) to secondary
6719 - wait until disks are fully synchronized
6720 - change disks into single-master mode
6723 instance = self.instance
6724 target_node = self.target_node
6725 source_node = self.source_node
6727 self.feedback_fn("* checking disk consistency between source and target")
6728 for dev in instance.disks:
6729 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6730 raise errors.OpExecError("Disk %s is degraded or not fully"
6731 " synchronized on target node,"
6732 " aborting migrate." % dev.iv_name)
6734 # First get the migration information from the remote node
6735 result = self.rpc.call_migration_info(source_node, instance)
6736 msg = result.fail_msg
6738 log_err = ("Failed fetching source migration information from %s: %s" %
6740 logging.error(log_err)
6741 raise errors.OpExecError(log_err)
6743 self.migration_info = migration_info = result.payload
6745 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6746 # Then switch the disks to master/master mode
6747 self._EnsureSecondary(target_node)
6748 self._GoStandalone()
6749 self._GoReconnect(True)
6750 self._WaitUntilSync()
6752 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6753 result = self.rpc.call_accept_instance(target_node,
6756 self.nodes_ip[target_node])
6758 msg = result.fail_msg
6760 logging.error("Instance pre-migration failed, trying to revert"
6761 " disk status: %s", msg)
6762 self.feedback_fn("Pre-migration failed, aborting")
6763 self._AbortMigration()
6764 self._RevertDiskStatus()
6765 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6766 (instance.name, msg))
6768 self.feedback_fn("* migrating instance to %s" % target_node)
6769 result = self.rpc.call_instance_migrate(source_node, instance,
6770 self.nodes_ip[target_node],
6772 msg = result.fail_msg
6774 logging.error("Instance migration failed, trying to revert"
6775 " disk status: %s", msg)
6776 self.feedback_fn("Migration failed, aborting")
6777 self._AbortMigration()
6778 self._RevertDiskStatus()
6779 raise errors.OpExecError("Could not migrate instance %s: %s" %
6780 (instance.name, msg))
6782 instance.primary_node = target_node
6783 # distribute new instance config to the other nodes
6784 self.cfg.Update(instance, self.feedback_fn)
6786 result = self.rpc.call_finalize_migration(target_node,
6790 msg = result.fail_msg
6792 logging.error("Instance migration succeeded, but finalization failed:"
6794 raise errors.OpExecError("Could not finalize instance migration: %s" %
6797 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6798 self._EnsureSecondary(source_node)
6799 self._WaitUntilSync()
6800 self._GoStandalone()
6801 self._GoReconnect(False)
6802 self._WaitUntilSync()
6804 self.feedback_fn("* done")
6806 def _ExecFailover(self):
6807 """Failover an instance.
6809 The failover is done by shutting it down on its present node and
6810 starting it on the secondary.
6813 instance = self.instance
6814 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6816 source_node = instance.primary_node
6817 target_node = self.target_node
6819 if instance.admin_up:
6820 self.feedback_fn("* checking disk consistency between source and target")
6821 for dev in instance.disks:
6822 # for drbd, these are drbd over lvm
6823 if not _CheckDiskConsistency(self, dev, target_node, False):
6824 if not self.ignore_consistency:
6825 raise errors.OpExecError("Disk %s is degraded on target node,"
6826 " aborting failover." % dev.iv_name)
6828 self.feedback_fn("* not checking disk consistency as instance is not"
6831 self.feedback_fn("* shutting down instance on source node")
6832 logging.info("Shutting down instance %s on node %s",
6833 instance.name, source_node)
6835 result = self.rpc.call_instance_shutdown(source_node, instance,
6836 self.shutdown_timeout)
6837 msg = result.fail_msg
6839 if self.ignore_consistency or primary_node.offline:
6840 self.lu.LogWarning("Could not shutdown instance %s on node %s."
6841 " Proceeding anyway. Please make sure node"
6842 " %s is down. Error details: %s",
6843 instance.name, source_node, source_node, msg)
6845 raise errors.OpExecError("Could not shutdown instance %s on"
6847 (instance.name, source_node, msg))
6849 self.feedback_fn("* deactivating the instance's disks on source node")
6850 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6851 raise errors.OpExecError("Can't shut down the instance's disks.")
6853 instance.primary_node = target_node
6854 # distribute new instance config to the other nodes
6855 self.cfg.Update(instance, self.feedback_fn)
6857 # Only start the instance if it's marked as up
6858 if instance.admin_up:
6859 self.feedback_fn("* activating the instance's disks on target node")
6860 logging.info("Starting instance %s on node %s",
6861 instance.name, target_node)
6863 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6864 ignore_secondaries=True)
6866 _ShutdownInstanceDisks(self, instance)
6867 raise errors.OpExecError("Can't activate the instance's disks")
6869 self.feedback_fn("* starting the instance on the target node")
6870 result = self.rpc.call_instance_start(target_node, instance, None, None)
6871 msg = result.fail_msg
6873 _ShutdownInstanceDisks(self, instance)
6874 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6875 (instance.name, target_node, msg))
6877 def Exec(self, feedback_fn):
6878 """Perform the migration.
6881 self.feedback_fn = feedback_fn
6882 self.source_node = self.instance.primary_node
6884 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6885 if self.instance.disk_template in constants.DTS_INT_MIRROR:
6886 self.target_node = self.instance.secondary_nodes[0]
6887 # Otherwise self.target_node has been populated either
6888 # directly, or through an iallocator.
6890 self.all_nodes = [self.source_node, self.target_node]
6892 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6893 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6897 feedback_fn("Failover instance %s" % self.instance.name)
6898 self._ExecFailover()
6900 feedback_fn("Migrating instance %s" % self.instance.name)
6903 return self._ExecCleanup()
6905 return self._ExecMigration()
6908 def _CreateBlockDev(lu, node, instance, device, force_create,
6910 """Create a tree of block devices on a given node.
6912 If this device type has to be created on secondaries, create it and
6915 If not, just recurse to children keeping the same 'force' value.
6917 @param lu: the lu on whose behalf we execute
6918 @param node: the node on which to create the device
6919 @type instance: L{objects.Instance}
6920 @param instance: the instance which owns the device
6921 @type device: L{objects.Disk}
6922 @param device: the device to create
6923 @type force_create: boolean
6924 @param force_create: whether to force creation of this device; this
6925 will be change to True whenever we find a device which has
6926 CreateOnSecondary() attribute
6927 @param info: the extra 'metadata' we should attach to the device
6928 (this will be represented as a LVM tag)
6929 @type force_open: boolean
6930 @param force_open: this parameter will be passes to the
6931 L{backend.BlockdevCreate} function where it specifies
6932 whether we run on primary or not, and it affects both
6933 the child assembly and the device own Open() execution
6936 if device.CreateOnSecondary():
6940 for child in device.children:
6941 _CreateBlockDev(lu, node, instance, child, force_create,
6944 if not force_create:
6947 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6950 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6951 """Create a single block device on a given node.
6953 This will not recurse over children of the device, so they must be
6956 @param lu: the lu on whose behalf we execute
6957 @param node: the node on which to create the device
6958 @type instance: L{objects.Instance}
6959 @param instance: the instance which owns the device
6960 @type device: L{objects.Disk}
6961 @param device: the device to create
6962 @param info: the extra 'metadata' we should attach to the device
6963 (this will be represented as a LVM tag)
6964 @type force_open: boolean
6965 @param force_open: this parameter will be passes to the
6966 L{backend.BlockdevCreate} function where it specifies
6967 whether we run on primary or not, and it affects both
6968 the child assembly and the device own Open() execution
6971 lu.cfg.SetDiskID(device, node)
6972 result = lu.rpc.call_blockdev_create(node, device, device.size,
6973 instance.name, force_open, info)
6974 result.Raise("Can't create block device %s on"
6975 " node %s for instance %s" % (device, node, instance.name))
6976 if device.physical_id is None:
6977 device.physical_id = result.payload
6980 def _GenerateUniqueNames(lu, exts):
6981 """Generate a suitable LV name.
6983 This will generate a logical volume name for the given instance.
6988 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6989 results.append("%s%s" % (new_id, val))
6993 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6995 """Generate a drbd8 device complete with its children.
6998 port = lu.cfg.AllocatePort()
6999 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7000 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7001 logical_id=(vgname, names[0]))
7002 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7003 logical_id=(vgname, names[1]))
7004 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7005 logical_id=(primary, secondary, port,
7008 children=[dev_data, dev_meta],
7013 def _GenerateDiskTemplate(lu, template_name,
7014 instance_name, primary_node,
7015 secondary_nodes, disk_info,
7016 file_storage_dir, file_driver,
7017 base_index, feedback_fn):
7018 """Generate the entire disk layout for a given template type.
7021 #TODO: compute space requirements
7023 vgname = lu.cfg.GetVGName()
7024 disk_count = len(disk_info)
7026 if template_name == constants.DT_DISKLESS:
7028 elif template_name == constants.DT_PLAIN:
7029 if len(secondary_nodes) != 0:
7030 raise errors.ProgrammerError("Wrong template configuration")
7032 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7033 for i in range(disk_count)])
7034 for idx, disk in enumerate(disk_info):
7035 disk_index = idx + base_index
7036 vg = disk.get(constants.IDISK_VG, vgname)
7037 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7038 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7039 size=disk[constants.IDISK_SIZE],
7040 logical_id=(vg, names[idx]),
7041 iv_name="disk/%d" % disk_index,
7042 mode=disk[constants.IDISK_MODE])
7043 disks.append(disk_dev)
7044 elif template_name == constants.DT_DRBD8:
7045 if len(secondary_nodes) != 1:
7046 raise errors.ProgrammerError("Wrong template configuration")
7047 remote_node = secondary_nodes[0]
7048 minors = lu.cfg.AllocateDRBDMinor(
7049 [primary_node, remote_node] * len(disk_info), instance_name)
7052 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7053 for i in range(disk_count)]):
7054 names.append(lv_prefix + "_data")
7055 names.append(lv_prefix + "_meta")
7056 for idx, disk in enumerate(disk_info):
7057 disk_index = idx + base_index
7058 vg = disk.get(constants.IDISK_VG, vgname)
7059 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7060 disk[constants.IDISK_SIZE], vg,
7061 names[idx * 2:idx * 2 + 2],
7062 "disk/%d" % disk_index,
7063 minors[idx * 2], minors[idx * 2 + 1])
7064 disk_dev.mode = disk[constants.IDISK_MODE]
7065 disks.append(disk_dev)
7066 elif template_name == constants.DT_FILE:
7067 if len(secondary_nodes) != 0:
7068 raise errors.ProgrammerError("Wrong template configuration")
7070 opcodes.RequireFileStorage()
7072 for idx, disk in enumerate(disk_info):
7073 disk_index = idx + base_index
7074 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7075 size=disk[constants.IDISK_SIZE],
7076 iv_name="disk/%d" % disk_index,
7077 logical_id=(file_driver,
7078 "%s/disk%d" % (file_storage_dir,
7080 mode=disk[constants.IDISK_MODE])
7081 disks.append(disk_dev)
7082 elif template_name == constants.DT_SHARED_FILE:
7083 if len(secondary_nodes) != 0:
7084 raise errors.ProgrammerError("Wrong template configuration")
7086 opcodes.RequireSharedFileStorage()
7088 for idx, disk in enumerate(disk_info):
7089 disk_index = idx + base_index
7090 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7091 size=disk[constants.IDISK_SIZE],
7092 iv_name="disk/%d" % disk_index,
7093 logical_id=(file_driver,
7094 "%s/disk%d" % (file_storage_dir,
7096 mode=disk[constants.IDISK_MODE])
7097 disks.append(disk_dev)
7098 elif template_name == constants.DT_BLOCK:
7099 if len(secondary_nodes) != 0:
7100 raise errors.ProgrammerError("Wrong template configuration")
7102 for idx, disk in enumerate(disk_info):
7103 disk_index = idx + base_index
7104 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7105 size=disk[constants.IDISK_SIZE],
7106 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7107 disk[constants.IDISK_ADOPT]),
7108 iv_name="disk/%d" % disk_index,
7109 mode=disk[constants.IDISK_MODE])
7110 disks.append(disk_dev)
7113 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7117 def _GetInstanceInfoText(instance):
7118 """Compute that text that should be added to the disk's metadata.
7121 return "originstname+%s" % instance.name
7124 def _CalcEta(time_taken, written, total_size):
7125 """Calculates the ETA based on size written and total size.
7127 @param time_taken: The time taken so far
7128 @param written: amount written so far
7129 @param total_size: The total size of data to be written
7130 @return: The remaining time in seconds
7133 avg_time = time_taken / float(written)
7134 return (total_size - written) * avg_time
7137 def _WipeDisks(lu, instance):
7138 """Wipes instance disks.
7140 @type lu: L{LogicalUnit}
7141 @param lu: the logical unit on whose behalf we execute
7142 @type instance: L{objects.Instance}
7143 @param instance: the instance whose disks we should create
7144 @return: the success of the wipe
7147 node = instance.primary_node
7149 for device in instance.disks:
7150 lu.cfg.SetDiskID(device, node)
7152 logging.info("Pause sync of instance %s disks", instance.name)
7153 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7155 for idx, success in enumerate(result.payload):
7157 logging.warn("pause-sync of instance %s for disks %d failed",
7161 for idx, device in enumerate(instance.disks):
7162 lu.LogInfo("* Wiping disk %d", idx)
7163 logging.info("Wiping disk %d for instance %s, node %s",
7164 idx, instance.name, node)
7166 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7167 # MAX_WIPE_CHUNK at max
7168 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7169 constants.MIN_WIPE_CHUNK_PERCENT)
7174 start_time = time.time()
7176 while offset < size:
7177 wipe_size = min(wipe_chunk_size, size - offset)
7178 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7179 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7180 (idx, offset, wipe_size))
7183 if now - last_output >= 60:
7184 eta = _CalcEta(now - start_time, offset, size)
7185 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7186 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7189 logging.info("Resume sync of instance %s disks", instance.name)
7191 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7193 for idx, success in enumerate(result.payload):
7195 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7196 " look at the status and troubleshoot the issue.", idx)
7197 logging.warn("resume-sync of instance %s for disks %d failed",
7201 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7202 """Create all disks for an instance.
7204 This abstracts away some work from AddInstance.
7206 @type lu: L{LogicalUnit}
7207 @param lu: the logical unit on whose behalf we execute
7208 @type instance: L{objects.Instance}
7209 @param instance: the instance whose disks we should create
7211 @param to_skip: list of indices to skip
7212 @type target_node: string
7213 @param target_node: if passed, overrides the target node for creation
7215 @return: the success of the creation
7218 info = _GetInstanceInfoText(instance)
7219 if target_node is None:
7220 pnode = instance.primary_node
7221 all_nodes = instance.all_nodes
7226 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7227 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7228 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7230 result.Raise("Failed to create directory '%s' on"
7231 " node %s" % (file_storage_dir, pnode))
7233 # Note: this needs to be kept in sync with adding of disks in
7234 # LUInstanceSetParams
7235 for idx, device in enumerate(instance.disks):
7236 if to_skip and idx in to_skip:
7238 logging.info("Creating volume %s for instance %s",
7239 device.iv_name, instance.name)
7241 for node in all_nodes:
7242 f_create = node == pnode
7243 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7246 def _RemoveDisks(lu, instance, target_node=None):
7247 """Remove all disks for an instance.
7249 This abstracts away some work from `AddInstance()` and
7250 `RemoveInstance()`. Note that in case some of the devices couldn't
7251 be removed, the removal will continue with the other ones (compare
7252 with `_CreateDisks()`).
7254 @type lu: L{LogicalUnit}
7255 @param lu: the logical unit on whose behalf we execute
7256 @type instance: L{objects.Instance}
7257 @param instance: the instance whose disks we should remove
7258 @type target_node: string
7259 @param target_node: used to override the node on which to remove the disks
7261 @return: the success of the removal
7264 logging.info("Removing block devices for instance %s", instance.name)
7267 for device in instance.disks:
7269 edata = [(target_node, device)]
7271 edata = device.ComputeNodeTree(instance.primary_node)
7272 for node, disk in edata:
7273 lu.cfg.SetDiskID(disk, node)
7274 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7276 lu.LogWarning("Could not remove block device %s on node %s,"
7277 " continuing anyway: %s", device.iv_name, node, msg)
7280 if instance.disk_template == constants.DT_FILE:
7281 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7285 tgt = instance.primary_node
7286 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7288 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7289 file_storage_dir, instance.primary_node, result.fail_msg)
7295 def _ComputeDiskSizePerVG(disk_template, disks):
7296 """Compute disk size requirements in the volume group
7299 def _compute(disks, payload):
7300 """Universal algorithm.
7305 vgs[disk[constants.IDISK_VG]] = \
7306 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7310 # Required free disk space as a function of disk and swap space
7312 constants.DT_DISKLESS: {},
7313 constants.DT_PLAIN: _compute(disks, 0),
7314 # 128 MB are added for drbd metadata for each disk
7315 constants.DT_DRBD8: _compute(disks, 128),
7316 constants.DT_FILE: {},
7317 constants.DT_SHARED_FILE: {},
7320 if disk_template not in req_size_dict:
7321 raise errors.ProgrammerError("Disk template '%s' size requirement"
7322 " is unknown" % disk_template)
7324 return req_size_dict[disk_template]
7327 def _ComputeDiskSize(disk_template, disks):
7328 """Compute disk size requirements in the volume group
7331 # Required free disk space as a function of disk and swap space
7333 constants.DT_DISKLESS: None,
7334 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7335 # 128 MB are added for drbd metadata for each disk
7336 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7337 constants.DT_FILE: None,
7338 constants.DT_SHARED_FILE: 0,
7339 constants.DT_BLOCK: 0,
7342 if disk_template not in req_size_dict:
7343 raise errors.ProgrammerError("Disk template '%s' size requirement"
7344 " is unknown" % disk_template)
7346 return req_size_dict[disk_template]
7349 def _FilterVmNodes(lu, nodenames):
7350 """Filters out non-vm_capable nodes from a list.
7352 @type lu: L{LogicalUnit}
7353 @param lu: the logical unit for which we check
7354 @type nodenames: list
7355 @param nodenames: the list of nodes on which we should check
7357 @return: the list of vm-capable nodes
7360 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7361 return [name for name in nodenames if name not in vm_nodes]
7364 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7365 """Hypervisor parameter validation.
7367 This function abstract the hypervisor parameter validation to be
7368 used in both instance create and instance modify.
7370 @type lu: L{LogicalUnit}
7371 @param lu: the logical unit for which we check
7372 @type nodenames: list
7373 @param nodenames: the list of nodes on which we should check
7374 @type hvname: string
7375 @param hvname: the name of the hypervisor we should use
7376 @type hvparams: dict
7377 @param hvparams: the parameters which we need to check
7378 @raise errors.OpPrereqError: if the parameters are not valid
7381 nodenames = _FilterVmNodes(lu, nodenames)
7382 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7385 for node in nodenames:
7389 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7392 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7393 """OS parameters validation.
7395 @type lu: L{LogicalUnit}
7396 @param lu: the logical unit for which we check
7397 @type required: boolean
7398 @param required: whether the validation should fail if the OS is not
7400 @type nodenames: list
7401 @param nodenames: the list of nodes on which we should check
7402 @type osname: string
7403 @param osname: the name of the hypervisor we should use
7404 @type osparams: dict
7405 @param osparams: the parameters which we need to check
7406 @raise errors.OpPrereqError: if the parameters are not valid
7409 nodenames = _FilterVmNodes(lu, nodenames)
7410 result = lu.rpc.call_os_validate(required, nodenames, osname,
7411 [constants.OS_VALIDATE_PARAMETERS],
7413 for node, nres in result.items():
7414 # we don't check for offline cases since this should be run only
7415 # against the master node and/or an instance's nodes
7416 nres.Raise("OS Parameters validation failed on node %s" % node)
7417 if not nres.payload:
7418 lu.LogInfo("OS %s not found on node %s, validation skipped",
7422 class LUInstanceCreate(LogicalUnit):
7423 """Create an instance.
7426 HPATH = "instance-add"
7427 HTYPE = constants.HTYPE_INSTANCE
7430 def CheckArguments(self):
7434 # do not require name_check to ease forward/backward compatibility
7436 if self.op.no_install and self.op.start:
7437 self.LogInfo("No-installation mode selected, disabling startup")
7438 self.op.start = False
7439 # validate/normalize the instance name
7440 self.op.instance_name = \
7441 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7443 if self.op.ip_check and not self.op.name_check:
7444 # TODO: make the ip check more flexible and not depend on the name check
7445 raise errors.OpPrereqError("Cannot do ip check without a name check",
7448 # check nics' parameter names
7449 for nic in self.op.nics:
7450 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7452 # check disks. parameter names and consistent adopt/no-adopt strategy
7453 has_adopt = has_no_adopt = False
7454 for disk in self.op.disks:
7455 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7456 if constants.IDISK_ADOPT in disk:
7460 if has_adopt and has_no_adopt:
7461 raise errors.OpPrereqError("Either all disks are adopted or none is",
7464 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7465 raise errors.OpPrereqError("Disk adoption is not supported for the"
7466 " '%s' disk template" %
7467 self.op.disk_template,
7469 if self.op.iallocator is not None:
7470 raise errors.OpPrereqError("Disk adoption not allowed with an"
7471 " iallocator script", errors.ECODE_INVAL)
7472 if self.op.mode == constants.INSTANCE_IMPORT:
7473 raise errors.OpPrereqError("Disk adoption not allowed for"
7474 " instance import", errors.ECODE_INVAL)
7476 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7477 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7478 " but no 'adopt' parameter given" %
7479 self.op.disk_template,
7482 self.adopt_disks = has_adopt
7484 # instance name verification
7485 if self.op.name_check:
7486 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7487 self.op.instance_name = self.hostname1.name
7488 # used in CheckPrereq for ip ping check
7489 self.check_ip = self.hostname1.ip
7491 self.check_ip = None
7493 # file storage checks
7494 if (self.op.file_driver and
7495 not self.op.file_driver in constants.FILE_DRIVER):
7496 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7497 self.op.file_driver, errors.ECODE_INVAL)
7499 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7500 raise errors.OpPrereqError("File storage directory path not absolute",
7503 ### Node/iallocator related checks
7504 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7506 if self.op.pnode is not None:
7507 if self.op.disk_template in constants.DTS_INT_MIRROR:
7508 if self.op.snode is None:
7509 raise errors.OpPrereqError("The networked disk templates need"
7510 " a mirror node", errors.ECODE_INVAL)
7512 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7514 self.op.snode = None
7516 self._cds = _GetClusterDomainSecret()
7518 if self.op.mode == constants.INSTANCE_IMPORT:
7519 # On import force_variant must be True, because if we forced it at
7520 # initial install, our only chance when importing it back is that it
7522 self.op.force_variant = True
7524 if self.op.no_install:
7525 self.LogInfo("No-installation mode has no effect during import")
7527 elif self.op.mode == constants.INSTANCE_CREATE:
7528 if self.op.os_type is None:
7529 raise errors.OpPrereqError("No guest OS specified",
7531 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7532 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7533 " installation" % self.op.os_type,
7535 if self.op.disk_template is None:
7536 raise errors.OpPrereqError("No disk template specified",
7539 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7540 # Check handshake to ensure both clusters have the same domain secret
7541 src_handshake = self.op.source_handshake
7542 if not src_handshake:
7543 raise errors.OpPrereqError("Missing source handshake",
7546 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7549 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7552 # Load and check source CA
7553 self.source_x509_ca_pem = self.op.source_x509_ca
7554 if not self.source_x509_ca_pem:
7555 raise errors.OpPrereqError("Missing source X509 CA",
7559 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7561 except OpenSSL.crypto.Error, err:
7562 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7563 (err, ), errors.ECODE_INVAL)
7565 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7566 if errcode is not None:
7567 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7570 self.source_x509_ca = cert
7572 src_instance_name = self.op.source_instance_name
7573 if not src_instance_name:
7574 raise errors.OpPrereqError("Missing source instance name",
7577 self.source_instance_name = \
7578 netutils.GetHostname(name=src_instance_name).name
7581 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7582 self.op.mode, errors.ECODE_INVAL)
7584 def ExpandNames(self):
7585 """ExpandNames for CreateInstance.
7587 Figure out the right locks for instance creation.
7590 self.needed_locks = {}
7592 instance_name = self.op.instance_name
7593 # this is just a preventive check, but someone might still add this
7594 # instance in the meantime, and creation will fail at lock-add time
7595 if instance_name in self.cfg.GetInstanceList():
7596 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7597 instance_name, errors.ECODE_EXISTS)
7599 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7601 if self.op.iallocator:
7602 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7604 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7605 nodelist = [self.op.pnode]
7606 if self.op.snode is not None:
7607 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7608 nodelist.append(self.op.snode)
7609 self.needed_locks[locking.LEVEL_NODE] = nodelist
7611 # in case of import lock the source node too
7612 if self.op.mode == constants.INSTANCE_IMPORT:
7613 src_node = self.op.src_node
7614 src_path = self.op.src_path
7616 if src_path is None:
7617 self.op.src_path = src_path = self.op.instance_name
7619 if src_node is None:
7620 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7621 self.op.src_node = None
7622 if os.path.isabs(src_path):
7623 raise errors.OpPrereqError("Importing an instance from an absolute"
7624 " path requires a source node option.",
7627 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7628 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7629 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7630 if not os.path.isabs(src_path):
7631 self.op.src_path = src_path = \
7632 utils.PathJoin(constants.EXPORT_DIR, src_path)
7634 def _RunAllocator(self):
7635 """Run the allocator based on input opcode.
7638 nics = [n.ToDict() for n in self.nics]
7639 ial = IAllocator(self.cfg, self.rpc,
7640 mode=constants.IALLOCATOR_MODE_ALLOC,
7641 name=self.op.instance_name,
7642 disk_template=self.op.disk_template,
7645 vcpus=self.be_full[constants.BE_VCPUS],
7646 mem_size=self.be_full[constants.BE_MEMORY],
7649 hypervisor=self.op.hypervisor,
7652 ial.Run(self.op.iallocator)
7655 raise errors.OpPrereqError("Can't compute nodes using"
7656 " iallocator '%s': %s" %
7657 (self.op.iallocator, ial.info),
7659 if len(ial.result) != ial.required_nodes:
7660 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7661 " of nodes (%s), required %s" %
7662 (self.op.iallocator, len(ial.result),
7663 ial.required_nodes), errors.ECODE_FAULT)
7664 self.op.pnode = ial.result[0]
7665 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7666 self.op.instance_name, self.op.iallocator,
7667 utils.CommaJoin(ial.result))
7668 if ial.required_nodes == 2:
7669 self.op.snode = ial.result[1]
7671 def BuildHooksEnv(self):
7674 This runs on master, primary and secondary nodes of the instance.
7678 "ADD_MODE": self.op.mode,
7680 if self.op.mode == constants.INSTANCE_IMPORT:
7681 env["SRC_NODE"] = self.op.src_node
7682 env["SRC_PATH"] = self.op.src_path
7683 env["SRC_IMAGES"] = self.src_images
7685 env.update(_BuildInstanceHookEnv(
7686 name=self.op.instance_name,
7687 primary_node=self.op.pnode,
7688 secondary_nodes=self.secondaries,
7689 status=self.op.start,
7690 os_type=self.op.os_type,
7691 memory=self.be_full[constants.BE_MEMORY],
7692 vcpus=self.be_full[constants.BE_VCPUS],
7693 nics=_NICListToTuple(self, self.nics),
7694 disk_template=self.op.disk_template,
7695 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7696 for d in self.disks],
7699 hypervisor_name=self.op.hypervisor,
7704 def BuildHooksNodes(self):
7705 """Build hooks nodes.
7708 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7711 def _ReadExportInfo(self):
7712 """Reads the export information from disk.
7714 It will override the opcode source node and path with the actual
7715 information, if these two were not specified before.
7717 @return: the export information
7720 assert self.op.mode == constants.INSTANCE_IMPORT
7722 src_node = self.op.src_node
7723 src_path = self.op.src_path
7725 if src_node is None:
7726 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7727 exp_list = self.rpc.call_export_list(locked_nodes)
7729 for node in exp_list:
7730 if exp_list[node].fail_msg:
7732 if src_path in exp_list[node].payload:
7734 self.op.src_node = src_node = node
7735 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7739 raise errors.OpPrereqError("No export found for relative path %s" %
7740 src_path, errors.ECODE_INVAL)
7742 _CheckNodeOnline(self, src_node)
7743 result = self.rpc.call_export_info(src_node, src_path)
7744 result.Raise("No export or invalid export found in dir %s" % src_path)
7746 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7747 if not export_info.has_section(constants.INISECT_EXP):
7748 raise errors.ProgrammerError("Corrupted export config",
7749 errors.ECODE_ENVIRON)
7751 ei_version = export_info.get(constants.INISECT_EXP, "version")
7752 if (int(ei_version) != constants.EXPORT_VERSION):
7753 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7754 (ei_version, constants.EXPORT_VERSION),
7755 errors.ECODE_ENVIRON)
7758 def _ReadExportParams(self, einfo):
7759 """Use export parameters as defaults.
7761 In case the opcode doesn't specify (as in override) some instance
7762 parameters, then try to use them from the export information, if
7766 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7768 if self.op.disk_template is None:
7769 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7770 self.op.disk_template = einfo.get(constants.INISECT_INS,
7773 raise errors.OpPrereqError("No disk template specified and the export"
7774 " is missing the disk_template information",
7777 if not self.op.disks:
7778 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7780 # TODO: import the disk iv_name too
7781 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7782 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7783 disks.append({constants.IDISK_SIZE: disk_sz})
7784 self.op.disks = disks
7786 raise errors.OpPrereqError("No disk info specified and the export"
7787 " is missing the disk information",
7790 if (not self.op.nics and
7791 einfo.has_option(constants.INISECT_INS, "nic_count")):
7793 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7795 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7796 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7801 if (self.op.hypervisor is None and
7802 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7803 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7804 if einfo.has_section(constants.INISECT_HYP):
7805 # use the export parameters but do not override the ones
7806 # specified by the user
7807 for name, value in einfo.items(constants.INISECT_HYP):
7808 if name not in self.op.hvparams:
7809 self.op.hvparams[name] = value
7811 if einfo.has_section(constants.INISECT_BEP):
7812 # use the parameters, without overriding
7813 for name, value in einfo.items(constants.INISECT_BEP):
7814 if name not in self.op.beparams:
7815 self.op.beparams[name] = value
7817 # try to read the parameters old style, from the main section
7818 for name in constants.BES_PARAMETERS:
7819 if (name not in self.op.beparams and
7820 einfo.has_option(constants.INISECT_INS, name)):
7821 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7823 if einfo.has_section(constants.INISECT_OSP):
7824 # use the parameters, without overriding
7825 for name, value in einfo.items(constants.INISECT_OSP):
7826 if name not in self.op.osparams:
7827 self.op.osparams[name] = value
7829 def _RevertToDefaults(self, cluster):
7830 """Revert the instance parameters to the default values.
7834 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7835 for name in self.op.hvparams.keys():
7836 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7837 del self.op.hvparams[name]
7839 be_defs = cluster.SimpleFillBE({})
7840 for name in self.op.beparams.keys():
7841 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7842 del self.op.beparams[name]
7844 nic_defs = cluster.SimpleFillNIC({})
7845 for nic in self.op.nics:
7846 for name in constants.NICS_PARAMETERS:
7847 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7850 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7851 for name in self.op.osparams.keys():
7852 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7853 del self.op.osparams[name]
7855 def CheckPrereq(self):
7856 """Check prerequisites.
7859 if self.op.mode == constants.INSTANCE_IMPORT:
7860 export_info = self._ReadExportInfo()
7861 self._ReadExportParams(export_info)
7863 if (not self.cfg.GetVGName() and
7864 self.op.disk_template not in constants.DTS_NOT_LVM):
7865 raise errors.OpPrereqError("Cluster does not support lvm-based"
7866 " instances", errors.ECODE_STATE)
7868 if self.op.hypervisor is None:
7869 self.op.hypervisor = self.cfg.GetHypervisorType()
7871 cluster = self.cfg.GetClusterInfo()
7872 enabled_hvs = cluster.enabled_hypervisors
7873 if self.op.hypervisor not in enabled_hvs:
7874 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7875 " cluster (%s)" % (self.op.hypervisor,
7876 ",".join(enabled_hvs)),
7879 # check hypervisor parameter syntax (locally)
7880 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7881 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7883 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7884 hv_type.CheckParameterSyntax(filled_hvp)
7885 self.hv_full = filled_hvp
7886 # check that we don't specify global parameters on an instance
7887 _CheckGlobalHvParams(self.op.hvparams)
7889 # fill and remember the beparams dict
7890 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7891 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7893 # build os parameters
7894 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7896 # now that hvp/bep are in final format, let's reset to defaults,
7898 if self.op.identify_defaults:
7899 self._RevertToDefaults(cluster)
7903 for idx, nic in enumerate(self.op.nics):
7904 nic_mode_req = nic.get(constants.INIC_MODE, None)
7905 nic_mode = nic_mode_req
7906 if nic_mode is None:
7907 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7909 # in routed mode, for the first nic, the default ip is 'auto'
7910 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7911 default_ip_mode = constants.VALUE_AUTO
7913 default_ip_mode = constants.VALUE_NONE
7915 # ip validity checks
7916 ip = nic.get(constants.INIC_IP, default_ip_mode)
7917 if ip is None or ip.lower() == constants.VALUE_NONE:
7919 elif ip.lower() == constants.VALUE_AUTO:
7920 if not self.op.name_check:
7921 raise errors.OpPrereqError("IP address set to auto but name checks"
7922 " have been skipped",
7924 nic_ip = self.hostname1.ip
7926 if not netutils.IPAddress.IsValid(ip):
7927 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7931 # TODO: check the ip address for uniqueness
7932 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7933 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7936 # MAC address verification
7937 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7938 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7939 mac = utils.NormalizeAndValidateMac(mac)
7942 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7943 except errors.ReservationError:
7944 raise errors.OpPrereqError("MAC address %s already in use"
7945 " in cluster" % mac,
7946 errors.ECODE_NOTUNIQUE)
7948 # Build nic parameters
7949 link = nic.get(constants.INIC_LINK, None)
7952 nicparams[constants.NIC_MODE] = nic_mode_req
7954 nicparams[constants.NIC_LINK] = link
7956 check_params = cluster.SimpleFillNIC(nicparams)
7957 objects.NIC.CheckParameterSyntax(check_params)
7958 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7960 # disk checks/pre-build
7961 default_vg = self.cfg.GetVGName()
7963 for disk in self.op.disks:
7964 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7965 if mode not in constants.DISK_ACCESS_SET:
7966 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7967 mode, errors.ECODE_INVAL)
7968 size = disk.get(constants.IDISK_SIZE, None)
7970 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7973 except (TypeError, ValueError):
7974 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7977 constants.IDISK_SIZE: size,
7978 constants.IDISK_MODE: mode,
7979 constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7981 if constants.IDISK_ADOPT in disk:
7982 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7983 self.disks.append(new_disk)
7985 if self.op.mode == constants.INSTANCE_IMPORT:
7987 # Check that the new instance doesn't have less disks than the export
7988 instance_disks = len(self.disks)
7989 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7990 if instance_disks < export_disks:
7991 raise errors.OpPrereqError("Not enough disks to import."
7992 " (instance: %d, export: %d)" %
7993 (instance_disks, export_disks),
7997 for idx in range(export_disks):
7998 option = 'disk%d_dump' % idx
7999 if export_info.has_option(constants.INISECT_INS, option):
8000 # FIXME: are the old os-es, disk sizes, etc. useful?
8001 export_name = export_info.get(constants.INISECT_INS, option)
8002 image = utils.PathJoin(self.op.src_path, export_name)
8003 disk_images.append(image)
8005 disk_images.append(False)
8007 self.src_images = disk_images
8009 old_name = export_info.get(constants.INISECT_INS, 'name')
8011 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8012 except (TypeError, ValueError), err:
8013 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8014 " an integer: %s" % str(err),
8016 if self.op.instance_name == old_name:
8017 for idx, nic in enumerate(self.nics):
8018 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8019 nic_mac_ini = 'nic%d_mac' % idx
8020 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8022 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8024 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8025 if self.op.ip_check:
8026 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8027 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8028 (self.check_ip, self.op.instance_name),
8029 errors.ECODE_NOTUNIQUE)
8031 #### mac address generation
8032 # By generating here the mac address both the allocator and the hooks get
8033 # the real final mac address rather than the 'auto' or 'generate' value.
8034 # There is a race condition between the generation and the instance object
8035 # creation, which means that we know the mac is valid now, but we're not
8036 # sure it will be when we actually add the instance. If things go bad
8037 # adding the instance will abort because of a duplicate mac, and the
8038 # creation job will fail.
8039 for nic in self.nics:
8040 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8041 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8045 if self.op.iallocator is not None:
8046 self._RunAllocator()
8048 #### node related checks
8050 # check primary node
8051 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8052 assert self.pnode is not None, \
8053 "Cannot retrieve locked node %s" % self.op.pnode
8055 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8056 pnode.name, errors.ECODE_STATE)
8058 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8059 pnode.name, errors.ECODE_STATE)
8060 if not pnode.vm_capable:
8061 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8062 " '%s'" % pnode.name, errors.ECODE_STATE)
8064 self.secondaries = []
8066 # mirror node verification
8067 if self.op.disk_template in constants.DTS_INT_MIRROR:
8068 if self.op.snode == pnode.name:
8069 raise errors.OpPrereqError("The secondary node cannot be the"
8070 " primary node.", errors.ECODE_INVAL)
8071 _CheckNodeOnline(self, self.op.snode)
8072 _CheckNodeNotDrained(self, self.op.snode)
8073 _CheckNodeVmCapable(self, self.op.snode)
8074 self.secondaries.append(self.op.snode)
8076 nodenames = [pnode.name] + self.secondaries
8078 if not self.adopt_disks:
8079 # Check lv size requirements, if not adopting
8080 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8081 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8083 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8084 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8085 disk[constants.IDISK_ADOPT])
8086 for disk in self.disks])
8087 if len(all_lvs) != len(self.disks):
8088 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8090 for lv_name in all_lvs:
8092 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8093 # to ReserveLV uses the same syntax
8094 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8095 except errors.ReservationError:
8096 raise errors.OpPrereqError("LV named %s used by another instance" %
8097 lv_name, errors.ECODE_NOTUNIQUE)
8099 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8100 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8102 node_lvs = self.rpc.call_lv_list([pnode.name],
8103 vg_names.payload.keys())[pnode.name]
8104 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8105 node_lvs = node_lvs.payload
8107 delta = all_lvs.difference(node_lvs.keys())
8109 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8110 utils.CommaJoin(delta),
8112 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8114 raise errors.OpPrereqError("Online logical volumes found, cannot"
8115 " adopt: %s" % utils.CommaJoin(online_lvs),
8117 # update the size of disk based on what is found
8118 for dsk in self.disks:
8119 dsk[constants.IDISK_SIZE] = \
8120 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8121 dsk[constants.IDISK_ADOPT])][0]))
8123 elif self.op.disk_template == constants.DT_BLOCK:
8124 # Normalize and de-duplicate device paths
8125 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8126 for disk in self.disks])
8127 if len(all_disks) != len(self.disks):
8128 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8130 baddisks = [d for d in all_disks
8131 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8133 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8134 " cannot be adopted" %
8135 (", ".join(baddisks),
8136 constants.ADOPTABLE_BLOCKDEV_ROOT),
8139 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8140 list(all_disks))[pnode.name]
8141 node_disks.Raise("Cannot get block device information from node %s" %
8143 node_disks = node_disks.payload
8144 delta = all_disks.difference(node_disks.keys())
8146 raise errors.OpPrereqError("Missing block device(s): %s" %
8147 utils.CommaJoin(delta),
8149 for dsk in self.disks:
8150 dsk[constants.IDISK_SIZE] = \
8151 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8153 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8155 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8156 # check OS parameters (remotely)
8157 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8159 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8161 # memory check on primary node
8163 _CheckNodeFreeMemory(self, self.pnode.name,
8164 "creating instance %s" % self.op.instance_name,
8165 self.be_full[constants.BE_MEMORY],
8168 self.dry_run_result = list(nodenames)
8170 def Exec(self, feedback_fn):
8171 """Create and add the instance to the cluster.
8174 instance = self.op.instance_name
8175 pnode_name = self.pnode.name
8177 ht_kind = self.op.hypervisor
8178 if ht_kind in constants.HTS_REQ_PORT:
8179 network_port = self.cfg.AllocatePort()
8183 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8184 # this is needed because os.path.join does not accept None arguments
8185 if self.op.file_storage_dir is None:
8186 string_file_storage_dir = ""
8188 string_file_storage_dir = self.op.file_storage_dir
8190 # build the full file storage dir path
8191 if self.op.disk_template == constants.DT_SHARED_FILE:
8192 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8194 get_fsd_fn = self.cfg.GetFileStorageDir
8196 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8197 string_file_storage_dir, instance)
8199 file_storage_dir = ""
8201 disks = _GenerateDiskTemplate(self,
8202 self.op.disk_template,
8203 instance, pnode_name,
8207 self.op.file_driver,
8211 iobj = objects.Instance(name=instance, os=self.op.os_type,
8212 primary_node=pnode_name,
8213 nics=self.nics, disks=disks,
8214 disk_template=self.op.disk_template,
8216 network_port=network_port,
8217 beparams=self.op.beparams,
8218 hvparams=self.op.hvparams,
8219 hypervisor=self.op.hypervisor,
8220 osparams=self.op.osparams,
8223 if self.adopt_disks:
8224 if self.op.disk_template == constants.DT_PLAIN:
8225 # rename LVs to the newly-generated names; we need to construct
8226 # 'fake' LV disks with the old data, plus the new unique_id
8227 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8229 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8230 rename_to.append(t_dsk.logical_id)
8231 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8232 self.cfg.SetDiskID(t_dsk, pnode_name)
8233 result = self.rpc.call_blockdev_rename(pnode_name,
8234 zip(tmp_disks, rename_to))
8235 result.Raise("Failed to rename adoped LVs")
8237 feedback_fn("* creating instance disks...")
8239 _CreateDisks(self, iobj)
8240 except errors.OpExecError:
8241 self.LogWarning("Device creation failed, reverting...")
8243 _RemoveDisks(self, iobj)
8245 self.cfg.ReleaseDRBDMinors(instance)
8248 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8249 feedback_fn("* wiping instance disks...")
8251 _WipeDisks(self, iobj)
8252 except errors.OpExecError:
8253 self.LogWarning("Device wiping failed, reverting...")
8255 _RemoveDisks(self, iobj)
8257 self.cfg.ReleaseDRBDMinors(instance)
8260 feedback_fn("adding instance %s to cluster config" % instance)
8262 self.cfg.AddInstance(iobj, self.proc.GetECId())
8264 # Declare that we don't want to remove the instance lock anymore, as we've
8265 # added the instance to the config
8266 del self.remove_locks[locking.LEVEL_INSTANCE]
8267 # Unlock all the nodes
8268 if self.op.mode == constants.INSTANCE_IMPORT:
8269 nodes_keep = [self.op.src_node]
8270 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8271 if node != self.op.src_node]
8272 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8273 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8275 self.context.glm.release(locking.LEVEL_NODE)
8276 del self.acquired_locks[locking.LEVEL_NODE]
8278 if self.op.wait_for_sync:
8279 disk_abort = not _WaitForSync(self, iobj)
8280 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8281 # make sure the disks are not degraded (still sync-ing is ok)
8283 feedback_fn("* checking mirrors status")
8284 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8289 _RemoveDisks(self, iobj)
8290 self.cfg.RemoveInstance(iobj.name)
8291 # Make sure the instance lock gets removed
8292 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8293 raise errors.OpExecError("There are some degraded disks for"
8296 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8297 if self.op.mode == constants.INSTANCE_CREATE:
8298 if not self.op.no_install:
8299 feedback_fn("* running the instance OS create scripts...")
8300 # FIXME: pass debug option from opcode to backend
8301 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8302 self.op.debug_level)
8303 result.Raise("Could not add os for instance %s"
8304 " on node %s" % (instance, pnode_name))
8306 elif self.op.mode == constants.INSTANCE_IMPORT:
8307 feedback_fn("* running the instance OS import scripts...")
8311 for idx, image in enumerate(self.src_images):
8315 # FIXME: pass debug option from opcode to backend
8316 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8317 constants.IEIO_FILE, (image, ),
8318 constants.IEIO_SCRIPT,
8319 (iobj.disks[idx], idx),
8321 transfers.append(dt)
8324 masterd.instance.TransferInstanceData(self, feedback_fn,
8325 self.op.src_node, pnode_name,
8326 self.pnode.secondary_ip,
8328 if not compat.all(import_result):
8329 self.LogWarning("Some disks for instance %s on node %s were not"
8330 " imported successfully" % (instance, pnode_name))
8332 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8333 feedback_fn("* preparing remote import...")
8334 # The source cluster will stop the instance before attempting to make a
8335 # connection. In some cases stopping an instance can take a long time,
8336 # hence the shutdown timeout is added to the connection timeout.
8337 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8338 self.op.source_shutdown_timeout)
8339 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8341 assert iobj.primary_node == self.pnode.name
8343 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8344 self.source_x509_ca,
8345 self._cds, timeouts)
8346 if not compat.all(disk_results):
8347 # TODO: Should the instance still be started, even if some disks
8348 # failed to import (valid for local imports, too)?
8349 self.LogWarning("Some disks for instance %s on node %s were not"
8350 " imported successfully" % (instance, pnode_name))
8352 # Run rename script on newly imported instance
8353 assert iobj.name == instance
8354 feedback_fn("Running rename script for %s" % instance)
8355 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8356 self.source_instance_name,
8357 self.op.debug_level)
8359 self.LogWarning("Failed to run rename script for %s on node"
8360 " %s: %s" % (instance, pnode_name, result.fail_msg))
8363 # also checked in the prereq part
8364 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8368 iobj.admin_up = True
8369 self.cfg.Update(iobj, feedback_fn)
8370 logging.info("Starting instance %s on node %s", instance, pnode_name)
8371 feedback_fn("* starting instance...")
8372 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8373 result.Raise("Could not start instance")
8375 return list(iobj.all_nodes)
8378 class LUInstanceConsole(NoHooksLU):
8379 """Connect to an instance's console.
8381 This is somewhat special in that it returns the command line that
8382 you need to run on the master node in order to connect to the
8388 def ExpandNames(self):
8389 self._ExpandAndLockInstance()
8391 def CheckPrereq(self):
8392 """Check prerequisites.
8394 This checks that the instance is in the cluster.
8397 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8398 assert self.instance is not None, \
8399 "Cannot retrieve locked instance %s" % self.op.instance_name
8400 _CheckNodeOnline(self, self.instance.primary_node)
8402 def Exec(self, feedback_fn):
8403 """Connect to the console of an instance
8406 instance = self.instance
8407 node = instance.primary_node
8409 node_insts = self.rpc.call_instance_list([node],
8410 [instance.hypervisor])[node]
8411 node_insts.Raise("Can't get node information from %s" % node)
8413 if instance.name not in node_insts.payload:
8414 if instance.admin_up:
8415 state = constants.INSTST_ERRORDOWN
8417 state = constants.INSTST_ADMINDOWN
8418 raise errors.OpExecError("Instance %s is not running (state %s)" %
8419 (instance.name, state))
8421 logging.debug("Connecting to console of %s on %s", instance.name, node)
8423 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8426 def _GetInstanceConsole(cluster, instance):
8427 """Returns console information for an instance.
8429 @type cluster: L{objects.Cluster}
8430 @type instance: L{objects.Instance}
8434 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8435 # beparams and hvparams are passed separately, to avoid editing the
8436 # instance and then saving the defaults in the instance itself.
8437 hvparams = cluster.FillHV(instance)
8438 beparams = cluster.FillBE(instance)
8439 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8441 assert console.instance == instance.name
8442 assert console.Validate()
8444 return console.ToDict()
8447 class LUInstanceReplaceDisks(LogicalUnit):
8448 """Replace the disks of an instance.
8451 HPATH = "mirrors-replace"
8452 HTYPE = constants.HTYPE_INSTANCE
8455 def CheckArguments(self):
8456 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8459 def ExpandNames(self):
8460 self._ExpandAndLockInstance()
8462 if self.op.iallocator is not None:
8463 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8465 elif self.op.remote_node is not None:
8466 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8467 self.op.remote_node = remote_node
8469 # Warning: do not remove the locking of the new secondary here
8470 # unless DRBD8.AddChildren is changed to work in parallel;
8471 # currently it doesn't since parallel invocations of
8472 # FindUnusedMinor will conflict
8473 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8474 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8477 self.needed_locks[locking.LEVEL_NODE] = []
8478 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8480 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8481 self.op.iallocator, self.op.remote_node,
8482 self.op.disks, False, self.op.early_release)
8484 self.tasklets = [self.replacer]
8486 def DeclareLocks(self, level):
8487 # If we're not already locking all nodes in the set we have to declare the
8488 # instance's primary/secondary nodes.
8489 if (level == locking.LEVEL_NODE and
8490 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8491 self._LockInstancesNodes()
8493 def BuildHooksEnv(self):
8496 This runs on the master, the primary and all the secondaries.
8499 instance = self.replacer.instance
8501 "MODE": self.op.mode,
8502 "NEW_SECONDARY": self.op.remote_node,
8503 "OLD_SECONDARY": instance.secondary_nodes[0],
8505 env.update(_BuildInstanceHookEnvByObject(self, instance))
8508 def BuildHooksNodes(self):
8509 """Build hooks nodes.
8512 instance = self.replacer.instance
8514 self.cfg.GetMasterNode(),
8515 instance.primary_node,
8517 if self.op.remote_node is not None:
8518 nl.append(self.op.remote_node)
8522 class TLReplaceDisks(Tasklet):
8523 """Replaces disks for an instance.
8525 Note: Locking is not within the scope of this class.
8528 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8529 disks, delay_iallocator, early_release):
8530 """Initializes this class.
8533 Tasklet.__init__(self, lu)
8536 self.instance_name = instance_name
8538 self.iallocator_name = iallocator_name
8539 self.remote_node = remote_node
8541 self.delay_iallocator = delay_iallocator
8542 self.early_release = early_release
8545 self.instance = None
8546 self.new_node = None
8547 self.target_node = None
8548 self.other_node = None
8549 self.remote_node_info = None
8550 self.node_secondary_ip = None
8553 def CheckArguments(mode, remote_node, iallocator):
8554 """Helper function for users of this class.
8557 # check for valid parameter combination
8558 if mode == constants.REPLACE_DISK_CHG:
8559 if remote_node is None and iallocator is None:
8560 raise errors.OpPrereqError("When changing the secondary either an"
8561 " iallocator script must be used or the"
8562 " new node given", errors.ECODE_INVAL)
8564 if remote_node is not None and iallocator is not None:
8565 raise errors.OpPrereqError("Give either the iallocator or the new"
8566 " secondary, not both", errors.ECODE_INVAL)
8568 elif remote_node is not None or iallocator is not None:
8569 # Not replacing the secondary
8570 raise errors.OpPrereqError("The iallocator and new node options can"
8571 " only be used when changing the"
8572 " secondary node", errors.ECODE_INVAL)
8575 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8576 """Compute a new secondary node using an IAllocator.
8579 ial = IAllocator(lu.cfg, lu.rpc,
8580 mode=constants.IALLOCATOR_MODE_RELOC,
8582 relocate_from=relocate_from)
8584 ial.Run(iallocator_name)
8587 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8588 " %s" % (iallocator_name, ial.info),
8591 if len(ial.result) != ial.required_nodes:
8592 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8593 " of nodes (%s), required %s" %
8595 len(ial.result), ial.required_nodes),
8598 remote_node_name = ial.result[0]
8600 lu.LogInfo("Selected new secondary for instance '%s': %s",
8601 instance_name, remote_node_name)
8603 return remote_node_name
8605 def _FindFaultyDisks(self, node_name):
8606 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8609 def _CheckDisksActivated(self, instance):
8610 """Checks if the instance disks are activated.
8612 @param instance: The instance to check disks
8613 @return: True if they are activated, False otherwise
8616 nodes = instance.all_nodes
8618 for idx, dev in enumerate(instance.disks):
8620 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8621 self.cfg.SetDiskID(dev, node)
8623 result = self.rpc.call_blockdev_find(node, dev)
8627 elif result.fail_msg or not result.payload:
8633 def CheckPrereq(self):
8634 """Check prerequisites.
8636 This checks that the instance is in the cluster.
8639 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8640 assert instance is not None, \
8641 "Cannot retrieve locked instance %s" % self.instance_name
8643 if instance.disk_template != constants.DT_DRBD8:
8644 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8645 " instances", errors.ECODE_INVAL)
8647 if len(instance.secondary_nodes) != 1:
8648 raise errors.OpPrereqError("The instance has a strange layout,"
8649 " expected one secondary but found %d" %
8650 len(instance.secondary_nodes),
8653 if not self.delay_iallocator:
8654 self._CheckPrereq2()
8656 def _CheckPrereq2(self):
8657 """Check prerequisites, second part.
8659 This function should always be part of CheckPrereq. It was separated and is
8660 now called from Exec because during node evacuation iallocator was only
8661 called with an unmodified cluster model, not taking planned changes into
8665 instance = self.instance
8666 secondary_node = instance.secondary_nodes[0]
8668 if self.iallocator_name is None:
8669 remote_node = self.remote_node
8671 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8672 instance.name, instance.secondary_nodes)
8674 if remote_node is not None:
8675 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8676 assert self.remote_node_info is not None, \
8677 "Cannot retrieve locked node %s" % remote_node
8679 self.remote_node_info = None
8681 if remote_node == self.instance.primary_node:
8682 raise errors.OpPrereqError("The specified node is the primary node of"
8683 " the instance.", errors.ECODE_INVAL)
8685 if remote_node == secondary_node:
8686 raise errors.OpPrereqError("The specified node is already the"
8687 " secondary node of the instance.",
8690 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8691 constants.REPLACE_DISK_CHG):
8692 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8695 if self.mode == constants.REPLACE_DISK_AUTO:
8696 if not self._CheckDisksActivated(instance):
8697 raise errors.OpPrereqError("Please run activate-disks on instance %s"
8698 " first" % self.instance_name,
8700 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8701 faulty_secondary = self._FindFaultyDisks(secondary_node)
8703 if faulty_primary and faulty_secondary:
8704 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8705 " one node and can not be repaired"
8706 " automatically" % self.instance_name,
8710 self.disks = faulty_primary
8711 self.target_node = instance.primary_node
8712 self.other_node = secondary_node
8713 check_nodes = [self.target_node, self.other_node]
8714 elif faulty_secondary:
8715 self.disks = faulty_secondary
8716 self.target_node = secondary_node
8717 self.other_node = instance.primary_node
8718 check_nodes = [self.target_node, self.other_node]
8724 # Non-automatic modes
8725 if self.mode == constants.REPLACE_DISK_PRI:
8726 self.target_node = instance.primary_node
8727 self.other_node = secondary_node
8728 check_nodes = [self.target_node, self.other_node]
8730 elif self.mode == constants.REPLACE_DISK_SEC:
8731 self.target_node = secondary_node
8732 self.other_node = instance.primary_node
8733 check_nodes = [self.target_node, self.other_node]
8735 elif self.mode == constants.REPLACE_DISK_CHG:
8736 self.new_node = remote_node
8737 self.other_node = instance.primary_node
8738 self.target_node = secondary_node
8739 check_nodes = [self.new_node, self.other_node]
8741 _CheckNodeNotDrained(self.lu, remote_node)
8742 _CheckNodeVmCapable(self.lu, remote_node)
8744 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8745 assert old_node_info is not None
8746 if old_node_info.offline and not self.early_release:
8747 # doesn't make sense to delay the release
8748 self.early_release = True
8749 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8750 " early-release mode", secondary_node)
8753 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8756 # If not specified all disks should be replaced
8758 self.disks = range(len(self.instance.disks))
8760 for node in check_nodes:
8761 _CheckNodeOnline(self.lu, node)
8763 # Check whether disks are valid
8764 for disk_idx in self.disks:
8765 instance.FindDisk(disk_idx)
8767 # Get secondary node IP addresses
8770 for node_name in [self.target_node, self.other_node, self.new_node]:
8771 if node_name is not None:
8772 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8774 self.node_secondary_ip = node_2nd_ip
8776 def Exec(self, feedback_fn):
8777 """Execute disk replacement.
8779 This dispatches the disk replacement to the appropriate handler.
8782 if self.delay_iallocator:
8783 self._CheckPrereq2()
8786 feedback_fn("No disks need replacement")
8789 feedback_fn("Replacing disk(s) %s for %s" %
8790 (utils.CommaJoin(self.disks), self.instance.name))
8792 activate_disks = (not self.instance.admin_up)
8794 # Activate the instance disks if we're replacing them on a down instance
8796 _StartInstanceDisks(self.lu, self.instance, True)
8799 # Should we replace the secondary node?
8800 if self.new_node is not None:
8801 fn = self._ExecDrbd8Secondary
8803 fn = self._ExecDrbd8DiskOnly
8805 return fn(feedback_fn)
8808 # Deactivate the instance disks if we're replacing them on a
8811 _SafeShutdownInstanceDisks(self.lu, self.instance)
8813 def _CheckVolumeGroup(self, nodes):
8814 self.lu.LogInfo("Checking volume groups")
8816 vgname = self.cfg.GetVGName()
8818 # Make sure volume group exists on all involved nodes
8819 results = self.rpc.call_vg_list(nodes)
8821 raise errors.OpExecError("Can't list volume groups on the nodes")
8825 res.Raise("Error checking node %s" % node)
8826 if vgname not in res.payload:
8827 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8830 def _CheckDisksExistence(self, nodes):
8831 # Check disk existence
8832 for idx, dev in enumerate(self.instance.disks):
8833 if idx not in self.disks:
8837 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8838 self.cfg.SetDiskID(dev, node)
8840 result = self.rpc.call_blockdev_find(node, dev)
8842 msg = result.fail_msg
8843 if msg or not result.payload:
8845 msg = "disk not found"
8846 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8849 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8850 for idx, dev in enumerate(self.instance.disks):
8851 if idx not in self.disks:
8854 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8857 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8859 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8860 " replace disks for instance %s" %
8861 (node_name, self.instance.name))
8863 def _CreateNewStorage(self, node_name):
8864 vgname = self.cfg.GetVGName()
8867 for idx, dev in enumerate(self.instance.disks):
8868 if idx not in self.disks:
8871 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8873 self.cfg.SetDiskID(dev, node_name)
8875 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8876 names = _GenerateUniqueNames(self.lu, lv_names)
8878 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8879 logical_id=(vgname, names[0]))
8880 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8881 logical_id=(vgname, names[1]))
8883 new_lvs = [lv_data, lv_meta]
8884 old_lvs = dev.children
8885 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8887 # we pass force_create=True to force the LVM creation
8888 for new_lv in new_lvs:
8889 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8890 _GetInstanceInfoText(self.instance), False)
8894 def _CheckDevices(self, node_name, iv_names):
8895 for name, (dev, _, _) in iv_names.iteritems():
8896 self.cfg.SetDiskID(dev, node_name)
8898 result = self.rpc.call_blockdev_find(node_name, dev)
8900 msg = result.fail_msg
8901 if msg or not result.payload:
8903 msg = "disk not found"
8904 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8907 if result.payload.is_degraded:
8908 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8910 def _RemoveOldStorage(self, node_name, iv_names):
8911 for name, (_, old_lvs, _) in iv_names.iteritems():
8912 self.lu.LogInfo("Remove logical volumes for %s" % name)
8915 self.cfg.SetDiskID(lv, node_name)
8917 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8919 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8920 hint="remove unused LVs manually")
8922 def _ReleaseNodeLock(self, node_name):
8923 """Releases the lock for a given node."""
8924 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8926 def _ExecDrbd8DiskOnly(self, feedback_fn):
8927 """Replace a disk on the primary or secondary for DRBD 8.
8929 The algorithm for replace is quite complicated:
8931 1. for each disk to be replaced:
8933 1. create new LVs on the target node with unique names
8934 1. detach old LVs from the drbd device
8935 1. rename old LVs to name_replaced.<time_t>
8936 1. rename new LVs to old LVs
8937 1. attach the new LVs (with the old names now) to the drbd device
8939 1. wait for sync across all devices
8941 1. for each modified disk:
8943 1. remove old LVs (which have the name name_replaces.<time_t>)
8945 Failures are not very well handled.
8950 # Step: check device activation
8951 self.lu.LogStep(1, steps_total, "Check device existence")
8952 self._CheckDisksExistence([self.other_node, self.target_node])
8953 self._CheckVolumeGroup([self.target_node, self.other_node])
8955 # Step: check other node consistency
8956 self.lu.LogStep(2, steps_total, "Check peer consistency")
8957 self._CheckDisksConsistency(self.other_node,
8958 self.other_node == self.instance.primary_node,
8961 # Step: create new storage
8962 self.lu.LogStep(3, steps_total, "Allocate new storage")
8963 iv_names = self._CreateNewStorage(self.target_node)
8965 # Step: for each lv, detach+rename*2+attach
8966 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8967 for dev, old_lvs, new_lvs in iv_names.itervalues():
8968 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8970 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8972 result.Raise("Can't detach drbd from local storage on node"
8973 " %s for device %s" % (self.target_node, dev.iv_name))
8975 #cfg.Update(instance)
8977 # ok, we created the new LVs, so now we know we have the needed
8978 # storage; as such, we proceed on the target node to rename
8979 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8980 # using the assumption that logical_id == physical_id (which in
8981 # turn is the unique_id on that node)
8983 # FIXME(iustin): use a better name for the replaced LVs
8984 temp_suffix = int(time.time())
8985 ren_fn = lambda d, suff: (d.physical_id[0],
8986 d.physical_id[1] + "_replaced-%s" % suff)
8988 # Build the rename list based on what LVs exist on the node
8989 rename_old_to_new = []
8990 for to_ren in old_lvs:
8991 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8992 if not result.fail_msg and result.payload:
8994 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8996 self.lu.LogInfo("Renaming the old LVs on the target node")
8997 result = self.rpc.call_blockdev_rename(self.target_node,
8999 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9001 # Now we rename the new LVs to the old LVs
9002 self.lu.LogInfo("Renaming the new LVs on the target node")
9003 rename_new_to_old = [(new, old.physical_id)
9004 for old, new in zip(old_lvs, new_lvs)]
9005 result = self.rpc.call_blockdev_rename(self.target_node,
9007 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9009 for old, new in zip(old_lvs, new_lvs):
9010 new.logical_id = old.logical_id
9011 self.cfg.SetDiskID(new, self.target_node)
9013 for disk in old_lvs:
9014 disk.logical_id = ren_fn(disk, temp_suffix)
9015 self.cfg.SetDiskID(disk, self.target_node)
9017 # Now that the new lvs have the old name, we can add them to the device
9018 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9019 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9021 msg = result.fail_msg
9023 for new_lv in new_lvs:
9024 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9027 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9028 hint=("cleanup manually the unused logical"
9030 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9032 dev.children = new_lvs
9034 self.cfg.Update(self.instance, feedback_fn)
9037 if self.early_release:
9038 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9040 self._RemoveOldStorage(self.target_node, iv_names)
9041 # WARNING: we release both node locks here, do not do other RPCs
9042 # than WaitForSync to the primary node
9043 self._ReleaseNodeLock([self.target_node, self.other_node])
9046 # This can fail as the old devices are degraded and _WaitForSync
9047 # does a combined result over all disks, so we don't check its return value
9048 self.lu.LogStep(cstep, steps_total, "Sync devices")
9050 _WaitForSync(self.lu, self.instance)
9052 # Check all devices manually
9053 self._CheckDevices(self.instance.primary_node, iv_names)
9055 # Step: remove old storage
9056 if not self.early_release:
9057 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9059 self._RemoveOldStorage(self.target_node, iv_names)
9061 def _ExecDrbd8Secondary(self, feedback_fn):
9062 """Replace the secondary node for DRBD 8.
9064 The algorithm for replace is quite complicated:
9065 - for all disks of the instance:
9066 - create new LVs on the new node with same names
9067 - shutdown the drbd device on the old secondary
9068 - disconnect the drbd network on the primary
9069 - create the drbd device on the new secondary
9070 - network attach the drbd on the primary, using an artifice:
9071 the drbd code for Attach() will connect to the network if it
9072 finds a device which is connected to the good local disks but
9074 - wait for sync across all devices
9075 - remove all disks from the old secondary
9077 Failures are not very well handled.
9082 # Step: check device activation
9083 self.lu.LogStep(1, steps_total, "Check device existence")
9084 self._CheckDisksExistence([self.instance.primary_node])
9085 self._CheckVolumeGroup([self.instance.primary_node])
9087 # Step: check other node consistency
9088 self.lu.LogStep(2, steps_total, "Check peer consistency")
9089 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9091 # Step: create new storage
9092 self.lu.LogStep(3, steps_total, "Allocate new storage")
9093 for idx, dev in enumerate(self.instance.disks):
9094 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9095 (self.new_node, idx))
9096 # we pass force_create=True to force LVM creation
9097 for new_lv in dev.children:
9098 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9099 _GetInstanceInfoText(self.instance), False)
9101 # Step 4: dbrd minors and drbd setups changes
9102 # after this, we must manually remove the drbd minors on both the
9103 # error and the success paths
9104 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9105 minors = self.cfg.AllocateDRBDMinor([self.new_node
9106 for dev in self.instance.disks],
9108 logging.debug("Allocated minors %r", minors)
9111 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9112 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9113 (self.new_node, idx))
9114 # create new devices on new_node; note that we create two IDs:
9115 # one without port, so the drbd will be activated without
9116 # networking information on the new node at this stage, and one
9117 # with network, for the latter activation in step 4
9118 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9119 if self.instance.primary_node == o_node1:
9122 assert self.instance.primary_node == o_node2, "Three-node instance?"
9125 new_alone_id = (self.instance.primary_node, self.new_node, None,
9126 p_minor, new_minor, o_secret)
9127 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9128 p_minor, new_minor, o_secret)
9130 iv_names[idx] = (dev, dev.children, new_net_id)
9131 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9133 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9134 logical_id=new_alone_id,
9135 children=dev.children,
9138 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9139 _GetInstanceInfoText(self.instance), False)
9140 except errors.GenericError:
9141 self.cfg.ReleaseDRBDMinors(self.instance.name)
9144 # We have new devices, shutdown the drbd on the old secondary
9145 for idx, dev in enumerate(self.instance.disks):
9146 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9147 self.cfg.SetDiskID(dev, self.target_node)
9148 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9150 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9151 "node: %s" % (idx, msg),
9152 hint=("Please cleanup this device manually as"
9153 " soon as possible"))
9155 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9156 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9157 self.node_secondary_ip,
9158 self.instance.disks)\
9159 [self.instance.primary_node]
9161 msg = result.fail_msg
9163 # detaches didn't succeed (unlikely)
9164 self.cfg.ReleaseDRBDMinors(self.instance.name)
9165 raise errors.OpExecError("Can't detach the disks from the network on"
9166 " old node: %s" % (msg,))
9168 # if we managed to detach at least one, we update all the disks of
9169 # the instance to point to the new secondary
9170 self.lu.LogInfo("Updating instance configuration")
9171 for dev, _, new_logical_id in iv_names.itervalues():
9172 dev.logical_id = new_logical_id
9173 self.cfg.SetDiskID(dev, self.instance.primary_node)
9175 self.cfg.Update(self.instance, feedback_fn)
9177 # and now perform the drbd attach
9178 self.lu.LogInfo("Attaching primary drbds to new secondary"
9179 " (standalone => connected)")
9180 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9182 self.node_secondary_ip,
9183 self.instance.disks,
9186 for to_node, to_result in result.items():
9187 msg = to_result.fail_msg
9189 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9191 hint=("please do a gnt-instance info to see the"
9192 " status of disks"))
9194 if self.early_release:
9195 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9197 self._RemoveOldStorage(self.target_node, iv_names)
9198 # WARNING: we release all node locks here, do not do other RPCs
9199 # than WaitForSync to the primary node
9200 self._ReleaseNodeLock([self.instance.primary_node,
9205 # This can fail as the old devices are degraded and _WaitForSync
9206 # does a combined result over all disks, so we don't check its return value
9207 self.lu.LogStep(cstep, steps_total, "Sync devices")
9209 _WaitForSync(self.lu, self.instance)
9211 # Check all devices manually
9212 self._CheckDevices(self.instance.primary_node, iv_names)
9214 # Step: remove old storage
9215 if not self.early_release:
9216 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9217 self._RemoveOldStorage(self.target_node, iv_names)
9220 class LURepairNodeStorage(NoHooksLU):
9221 """Repairs the volume group on a node.
9226 def CheckArguments(self):
9227 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9229 storage_type = self.op.storage_type
9231 if (constants.SO_FIX_CONSISTENCY not in
9232 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9233 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9234 " repaired" % storage_type,
9237 def ExpandNames(self):
9238 self.needed_locks = {
9239 locking.LEVEL_NODE: [self.op.node_name],
9242 def _CheckFaultyDisks(self, instance, node_name):
9243 """Ensure faulty disks abort the opcode or at least warn."""
9245 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9247 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9248 " node '%s'" % (instance.name, node_name),
9250 except errors.OpPrereqError, err:
9251 if self.op.ignore_consistency:
9252 self.proc.LogWarning(str(err.args[0]))
9256 def CheckPrereq(self):
9257 """Check prerequisites.
9260 # Check whether any instance on this node has faulty disks
9261 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9262 if not inst.admin_up:
9264 check_nodes = set(inst.all_nodes)
9265 check_nodes.discard(self.op.node_name)
9266 for inst_node_name in check_nodes:
9267 self._CheckFaultyDisks(inst, inst_node_name)
9269 def Exec(self, feedback_fn):
9270 feedback_fn("Repairing storage unit '%s' on %s ..." %
9271 (self.op.name, self.op.node_name))
9273 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9274 result = self.rpc.call_storage_execute(self.op.node_name,
9275 self.op.storage_type, st_args,
9277 constants.SO_FIX_CONSISTENCY)
9278 result.Raise("Failed to repair storage unit '%s' on %s" %
9279 (self.op.name, self.op.node_name))
9282 class LUNodeEvacStrategy(NoHooksLU):
9283 """Computes the node evacuation strategy.
9288 def CheckArguments(self):
9289 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9291 def ExpandNames(self):
9292 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9293 self.needed_locks = locks = {}
9294 if self.op.remote_node is None:
9295 locks[locking.LEVEL_NODE] = locking.ALL_SET
9297 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9298 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9300 def Exec(self, feedback_fn):
9301 if self.op.remote_node is not None:
9303 for node in self.op.nodes:
9304 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9307 if i.primary_node == self.op.remote_node:
9308 raise errors.OpPrereqError("Node %s is the primary node of"
9309 " instance %s, cannot use it as"
9311 (self.op.remote_node, i.name),
9313 result.append([i.name, self.op.remote_node])
9315 ial = IAllocator(self.cfg, self.rpc,
9316 mode=constants.IALLOCATOR_MODE_MEVAC,
9317 evac_nodes=self.op.nodes)
9318 ial.Run(self.op.iallocator, validate=True)
9320 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9326 class LUInstanceGrowDisk(LogicalUnit):
9327 """Grow a disk of an instance.
9331 HTYPE = constants.HTYPE_INSTANCE
9334 def ExpandNames(self):
9335 self._ExpandAndLockInstance()
9336 self.needed_locks[locking.LEVEL_NODE] = []
9337 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9339 def DeclareLocks(self, level):
9340 if level == locking.LEVEL_NODE:
9341 self._LockInstancesNodes()
9343 def BuildHooksEnv(self):
9346 This runs on the master, the primary and all the secondaries.
9350 "DISK": self.op.disk,
9351 "AMOUNT": self.op.amount,
9353 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9356 def BuildHooksNodes(self):
9357 """Build hooks nodes.
9360 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9363 def CheckPrereq(self):
9364 """Check prerequisites.
9366 This checks that the instance is in the cluster.
9369 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9370 assert instance is not None, \
9371 "Cannot retrieve locked instance %s" % self.op.instance_name
9372 nodenames = list(instance.all_nodes)
9373 for node in nodenames:
9374 _CheckNodeOnline(self, node)
9376 self.instance = instance
9378 if instance.disk_template not in constants.DTS_GROWABLE:
9379 raise errors.OpPrereqError("Instance's disk layout does not support"
9380 " growing.", errors.ECODE_INVAL)
9382 self.disk = instance.FindDisk(self.op.disk)
9384 if instance.disk_template not in (constants.DT_FILE,
9385 constants.DT_SHARED_FILE):
9386 # TODO: check the free disk space for file, when that feature will be
9388 _CheckNodesFreeDiskPerVG(self, nodenames,
9389 self.disk.ComputeGrowth(self.op.amount))
9391 def Exec(self, feedback_fn):
9392 """Execute disk grow.
9395 instance = self.instance
9398 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9400 raise errors.OpExecError("Cannot activate block device to grow")
9402 for node in instance.all_nodes:
9403 self.cfg.SetDiskID(disk, node)
9404 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9405 result.Raise("Grow request failed to node %s" % node)
9407 # TODO: Rewrite code to work properly
9408 # DRBD goes into sync mode for a short amount of time after executing the
9409 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9410 # calling "resize" in sync mode fails. Sleeping for a short amount of
9411 # time is a work-around.
9414 disk.RecordGrow(self.op.amount)
9415 self.cfg.Update(instance, feedback_fn)
9416 if self.op.wait_for_sync:
9417 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9419 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9420 " status.\nPlease check the instance.")
9421 if not instance.admin_up:
9422 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9423 elif not instance.admin_up:
9424 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9425 " not supposed to be running because no wait for"
9426 " sync mode was requested.")
9429 class LUInstanceQueryData(NoHooksLU):
9430 """Query runtime instance data.
9435 def ExpandNames(self):
9436 self.needed_locks = {}
9438 # Use locking if requested or when non-static information is wanted
9439 if not (self.op.static or self.op.use_locking):
9440 self.LogWarning("Non-static data requested, locks need to be acquired")
9441 self.op.use_locking = True
9443 if self.op.instances or not self.op.use_locking:
9444 # Expand instance names right here
9445 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9447 # Will use acquired locks
9448 self.wanted_names = None
9450 if self.op.use_locking:
9451 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9453 if self.wanted_names is None:
9454 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9456 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9458 self.needed_locks[locking.LEVEL_NODE] = []
9459 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9460 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9462 def DeclareLocks(self, level):
9463 if self.op.use_locking and level == locking.LEVEL_NODE:
9464 self._LockInstancesNodes()
9466 def CheckPrereq(self):
9467 """Check prerequisites.
9469 This only checks the optional instance list against the existing names.
9472 if self.wanted_names is None:
9473 assert self.op.use_locking, "Locking was not used"
9474 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9476 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9477 for name in self.wanted_names]
9479 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9480 """Returns the status of a block device
9483 if self.op.static or not node:
9486 self.cfg.SetDiskID(dev, node)
9488 result = self.rpc.call_blockdev_find(node, dev)
9492 result.Raise("Can't compute disk status for %s" % instance_name)
9494 status = result.payload
9498 return (status.dev_path, status.major, status.minor,
9499 status.sync_percent, status.estimated_time,
9500 status.is_degraded, status.ldisk_status)
9502 def _ComputeDiskStatus(self, instance, snode, dev):
9503 """Compute block device status.
9506 if dev.dev_type in constants.LDS_DRBD:
9507 # we change the snode then (otherwise we use the one passed in)
9508 if dev.logical_id[0] == instance.primary_node:
9509 snode = dev.logical_id[1]
9511 snode = dev.logical_id[0]
9513 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9515 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9518 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9519 for child in dev.children]
9524 "iv_name": dev.iv_name,
9525 "dev_type": dev.dev_type,
9526 "logical_id": dev.logical_id,
9527 "physical_id": dev.physical_id,
9528 "pstatus": dev_pstatus,
9529 "sstatus": dev_sstatus,
9530 "children": dev_children,
9535 def Exec(self, feedback_fn):
9536 """Gather and return data"""
9539 cluster = self.cfg.GetClusterInfo()
9541 for instance in self.wanted_instances:
9542 if not self.op.static:
9543 remote_info = self.rpc.call_instance_info(instance.primary_node,
9545 instance.hypervisor)
9546 remote_info.Raise("Error checking node %s" % instance.primary_node)
9547 remote_info = remote_info.payload
9548 if remote_info and "state" in remote_info:
9551 remote_state = "down"
9554 if instance.admin_up:
9557 config_state = "down"
9559 disks = [self._ComputeDiskStatus(instance, None, device)
9560 for device in instance.disks]
9562 result[instance.name] = {
9563 "name": instance.name,
9564 "config_state": config_state,
9565 "run_state": remote_state,
9566 "pnode": instance.primary_node,
9567 "snodes": instance.secondary_nodes,
9569 # this happens to be the same format used for hooks
9570 "nics": _NICListToTuple(self, instance.nics),
9571 "disk_template": instance.disk_template,
9573 "hypervisor": instance.hypervisor,
9574 "network_port": instance.network_port,
9575 "hv_instance": instance.hvparams,
9576 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9577 "be_instance": instance.beparams,
9578 "be_actual": cluster.FillBE(instance),
9579 "os_instance": instance.osparams,
9580 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9581 "serial_no": instance.serial_no,
9582 "mtime": instance.mtime,
9583 "ctime": instance.ctime,
9584 "uuid": instance.uuid,
9590 class LUInstanceSetParams(LogicalUnit):
9591 """Modifies an instances's parameters.
9594 HPATH = "instance-modify"
9595 HTYPE = constants.HTYPE_INSTANCE
9598 def CheckArguments(self):
9599 if not (self.op.nics or self.op.disks or self.op.disk_template or
9600 self.op.hvparams or self.op.beparams or self.op.os_name):
9601 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9603 if self.op.hvparams:
9604 _CheckGlobalHvParams(self.op.hvparams)
9608 for disk_op, disk_dict in self.op.disks:
9609 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9610 if disk_op == constants.DDM_REMOVE:
9613 elif disk_op == constants.DDM_ADD:
9616 if not isinstance(disk_op, int):
9617 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9618 if not isinstance(disk_dict, dict):
9619 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9620 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9622 if disk_op == constants.DDM_ADD:
9623 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9624 if mode not in constants.DISK_ACCESS_SET:
9625 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9627 size = disk_dict.get(constants.IDISK_SIZE, None)
9629 raise errors.OpPrereqError("Required disk parameter size missing",
9633 except (TypeError, ValueError), err:
9634 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9635 str(err), errors.ECODE_INVAL)
9636 disk_dict[constants.IDISK_SIZE] = size
9638 # modification of disk
9639 if constants.IDISK_SIZE in disk_dict:
9640 raise errors.OpPrereqError("Disk size change not possible, use"
9641 " grow-disk", errors.ECODE_INVAL)
9643 if disk_addremove > 1:
9644 raise errors.OpPrereqError("Only one disk add or remove operation"
9645 " supported at a time", errors.ECODE_INVAL)
9647 if self.op.disks and self.op.disk_template is not None:
9648 raise errors.OpPrereqError("Disk template conversion and other disk"
9649 " changes not supported at the same time",
9652 if (self.op.disk_template and
9653 self.op.disk_template in constants.DTS_INT_MIRROR and
9654 self.op.remote_node is None):
9655 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9656 " one requires specifying a secondary node",
9661 for nic_op, nic_dict in self.op.nics:
9662 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9663 if nic_op == constants.DDM_REMOVE:
9666 elif nic_op == constants.DDM_ADD:
9669 if not isinstance(nic_op, int):
9670 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9671 if not isinstance(nic_dict, dict):
9672 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9673 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9675 # nic_dict should be a dict
9676 nic_ip = nic_dict.get(constants.INIC_IP, None)
9677 if nic_ip is not None:
9678 if nic_ip.lower() == constants.VALUE_NONE:
9679 nic_dict[constants.INIC_IP] = None
9681 if not netutils.IPAddress.IsValid(nic_ip):
9682 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9685 nic_bridge = nic_dict.get('bridge', None)
9686 nic_link = nic_dict.get(constants.INIC_LINK, None)
9687 if nic_bridge and nic_link:
9688 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9689 " at the same time", errors.ECODE_INVAL)
9690 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9691 nic_dict['bridge'] = None
9692 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9693 nic_dict[constants.INIC_LINK] = None
9695 if nic_op == constants.DDM_ADD:
9696 nic_mac = nic_dict.get(constants.INIC_MAC, None)
9698 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9700 if constants.INIC_MAC in nic_dict:
9701 nic_mac = nic_dict[constants.INIC_MAC]
9702 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9703 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9705 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9706 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9707 " modifying an existing nic",
9710 if nic_addremove > 1:
9711 raise errors.OpPrereqError("Only one NIC add or remove operation"
9712 " supported at a time", errors.ECODE_INVAL)
9714 def ExpandNames(self):
9715 self._ExpandAndLockInstance()
9716 self.needed_locks[locking.LEVEL_NODE] = []
9717 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9719 def DeclareLocks(self, level):
9720 if level == locking.LEVEL_NODE:
9721 self._LockInstancesNodes()
9722 if self.op.disk_template and self.op.remote_node:
9723 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9724 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9726 def BuildHooksEnv(self):
9729 This runs on the master, primary and secondaries.
9733 if constants.BE_MEMORY in self.be_new:
9734 args['memory'] = self.be_new[constants.BE_MEMORY]
9735 if constants.BE_VCPUS in self.be_new:
9736 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9737 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9738 # information at all.
9741 nic_override = dict(self.op.nics)
9742 for idx, nic in enumerate(self.instance.nics):
9743 if idx in nic_override:
9744 this_nic_override = nic_override[idx]
9746 this_nic_override = {}
9747 if constants.INIC_IP in this_nic_override:
9748 ip = this_nic_override[constants.INIC_IP]
9751 if constants.INIC_MAC in this_nic_override:
9752 mac = this_nic_override[constants.INIC_MAC]
9755 if idx in self.nic_pnew:
9756 nicparams = self.nic_pnew[idx]
9758 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9759 mode = nicparams[constants.NIC_MODE]
9760 link = nicparams[constants.NIC_LINK]
9761 args['nics'].append((ip, mac, mode, link))
9762 if constants.DDM_ADD in nic_override:
9763 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9764 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9765 nicparams = self.nic_pnew[constants.DDM_ADD]
9766 mode = nicparams[constants.NIC_MODE]
9767 link = nicparams[constants.NIC_LINK]
9768 args['nics'].append((ip, mac, mode, link))
9769 elif constants.DDM_REMOVE in nic_override:
9770 del args['nics'][-1]
9772 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9773 if self.op.disk_template:
9774 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9778 def BuildHooksNodes(self):
9779 """Build hooks nodes.
9782 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9785 def CheckPrereq(self):
9786 """Check prerequisites.
9788 This only checks the instance list against the existing names.
9791 # checking the new params on the primary/secondary nodes
9793 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9794 cluster = self.cluster = self.cfg.GetClusterInfo()
9795 assert self.instance is not None, \
9796 "Cannot retrieve locked instance %s" % self.op.instance_name
9797 pnode = instance.primary_node
9798 nodelist = list(instance.all_nodes)
9801 if self.op.os_name and not self.op.force:
9802 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9803 self.op.force_variant)
9804 instance_os = self.op.os_name
9806 instance_os = instance.os
9808 if self.op.disk_template:
9809 if instance.disk_template == self.op.disk_template:
9810 raise errors.OpPrereqError("Instance already has disk template %s" %
9811 instance.disk_template, errors.ECODE_INVAL)
9813 if (instance.disk_template,
9814 self.op.disk_template) not in self._DISK_CONVERSIONS:
9815 raise errors.OpPrereqError("Unsupported disk template conversion from"
9816 " %s to %s" % (instance.disk_template,
9817 self.op.disk_template),
9819 _CheckInstanceDown(self, instance, "cannot change disk template")
9820 if self.op.disk_template in constants.DTS_INT_MIRROR:
9821 if self.op.remote_node == pnode:
9822 raise errors.OpPrereqError("Given new secondary node %s is the same"
9823 " as the primary node of the instance" %
9824 self.op.remote_node, errors.ECODE_STATE)
9825 _CheckNodeOnline(self, self.op.remote_node)
9826 _CheckNodeNotDrained(self, self.op.remote_node)
9827 # FIXME: here we assume that the old instance type is DT_PLAIN
9828 assert instance.disk_template == constants.DT_PLAIN
9829 disks = [{constants.IDISK_SIZE: d.size,
9830 constants.IDISK_VG: d.logical_id[0]}
9831 for d in instance.disks]
9832 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9833 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9835 # hvparams processing
9836 if self.op.hvparams:
9837 hv_type = instance.hypervisor
9838 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9839 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9840 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9843 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9844 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9845 self.hv_new = hv_new # the new actual values
9846 self.hv_inst = i_hvdict # the new dict (without defaults)
9848 self.hv_new = self.hv_inst = {}
9850 # beparams processing
9851 if self.op.beparams:
9852 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9854 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9855 be_new = cluster.SimpleFillBE(i_bedict)
9856 self.be_new = be_new # the new actual values
9857 self.be_inst = i_bedict # the new dict (without defaults)
9859 self.be_new = self.be_inst = {}
9861 # osparams processing
9862 if self.op.osparams:
9863 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9864 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9865 self.os_inst = i_osdict # the new dict (without defaults)
9871 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9872 mem_check_list = [pnode]
9873 if be_new[constants.BE_AUTO_BALANCE]:
9874 # either we changed auto_balance to yes or it was from before
9875 mem_check_list.extend(instance.secondary_nodes)
9876 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9877 instance.hypervisor)
9878 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9879 instance.hypervisor)
9880 pninfo = nodeinfo[pnode]
9881 msg = pninfo.fail_msg
9883 # Assume the primary node is unreachable and go ahead
9884 self.warn.append("Can't get info from primary node %s: %s" %
9886 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9887 self.warn.append("Node data from primary node %s doesn't contain"
9888 " free memory information" % pnode)
9889 elif instance_info.fail_msg:
9890 self.warn.append("Can't get instance runtime information: %s" %
9891 instance_info.fail_msg)
9893 if instance_info.payload:
9894 current_mem = int(instance_info.payload['memory'])
9896 # Assume instance not running
9897 # (there is a slight race condition here, but it's not very probable,
9898 # and we have no other way to check)
9900 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9901 pninfo.payload['memory_free'])
9903 raise errors.OpPrereqError("This change will prevent the instance"
9904 " from starting, due to %d MB of memory"
9905 " missing on its primary node" % miss_mem,
9908 if be_new[constants.BE_AUTO_BALANCE]:
9909 for node, nres in nodeinfo.items():
9910 if node not in instance.secondary_nodes:
9914 self.warn.append("Can't get info from secondary node %s: %s" %
9916 elif not isinstance(nres.payload.get('memory_free', None), int):
9917 self.warn.append("Secondary node %s didn't return free"
9918 " memory information" % node)
9919 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9920 self.warn.append("Not enough memory to failover instance to"
9921 " secondary node %s" % node)
9926 for nic_op, nic_dict in self.op.nics:
9927 if nic_op == constants.DDM_REMOVE:
9928 if not instance.nics:
9929 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9932 if nic_op != constants.DDM_ADD:
9934 if not instance.nics:
9935 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9936 " no NICs" % nic_op,
9938 if nic_op < 0 or nic_op >= len(instance.nics):
9939 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9941 (nic_op, len(instance.nics) - 1),
9943 old_nic_params = instance.nics[nic_op].nicparams
9944 old_nic_ip = instance.nics[nic_op].ip
9949 update_params_dict = dict([(key, nic_dict[key])
9950 for key in constants.NICS_PARAMETERS
9951 if key in nic_dict])
9953 if 'bridge' in nic_dict:
9954 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9956 new_nic_params = _GetUpdatedParams(old_nic_params,
9958 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9959 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9960 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9961 self.nic_pinst[nic_op] = new_nic_params
9962 self.nic_pnew[nic_op] = new_filled_nic_params
9963 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9965 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9966 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9967 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9969 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9971 self.warn.append(msg)
9973 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9974 if new_nic_mode == constants.NIC_MODE_ROUTED:
9975 if constants.INIC_IP in nic_dict:
9976 nic_ip = nic_dict[constants.INIC_IP]
9980 raise errors.OpPrereqError('Cannot set the nic ip to None'
9981 ' on a routed nic', errors.ECODE_INVAL)
9982 if constants.INIC_MAC in nic_dict:
9983 nic_mac = nic_dict[constants.INIC_MAC]
9985 raise errors.OpPrereqError('Cannot set the nic mac to None',
9987 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9988 # otherwise generate the mac
9989 nic_dict[constants.INIC_MAC] = \
9990 self.cfg.GenerateMAC(self.proc.GetECId())
9992 # or validate/reserve the current one
9994 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9995 except errors.ReservationError:
9996 raise errors.OpPrereqError("MAC address %s already in use"
9997 " in cluster" % nic_mac,
9998 errors.ECODE_NOTUNIQUE)
10001 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10002 raise errors.OpPrereqError("Disk operations not supported for"
10003 " diskless instances",
10004 errors.ECODE_INVAL)
10005 for disk_op, _ in self.op.disks:
10006 if disk_op == constants.DDM_REMOVE:
10007 if len(instance.disks) == 1:
10008 raise errors.OpPrereqError("Cannot remove the last disk of"
10009 " an instance", errors.ECODE_INVAL)
10010 _CheckInstanceDown(self, instance, "cannot remove disks")
10012 if (disk_op == constants.DDM_ADD and
10013 len(instance.disks) >= constants.MAX_DISKS):
10014 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10015 " add more" % constants.MAX_DISKS,
10016 errors.ECODE_STATE)
10017 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10019 if disk_op < 0 or disk_op >= len(instance.disks):
10020 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10022 (disk_op, len(instance.disks)),
10023 errors.ECODE_INVAL)
10027 def _ConvertPlainToDrbd(self, feedback_fn):
10028 """Converts an instance from plain to drbd.
10031 feedback_fn("Converting template to drbd")
10032 instance = self.instance
10033 pnode = instance.primary_node
10034 snode = self.op.remote_node
10036 # create a fake disk info for _GenerateDiskTemplate
10037 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10038 for d in instance.disks]
10039 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10040 instance.name, pnode, [snode],
10041 disk_info, None, None, 0, feedback_fn)
10042 info = _GetInstanceInfoText(instance)
10043 feedback_fn("Creating aditional volumes...")
10044 # first, create the missing data and meta devices
10045 for disk in new_disks:
10046 # unfortunately this is... not too nice
10047 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10049 for child in disk.children:
10050 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10051 # at this stage, all new LVs have been created, we can rename the
10053 feedback_fn("Renaming original volumes...")
10054 rename_list = [(o, n.children[0].logical_id)
10055 for (o, n) in zip(instance.disks, new_disks)]
10056 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10057 result.Raise("Failed to rename original LVs")
10059 feedback_fn("Initializing DRBD devices...")
10060 # all child devices are in place, we can now create the DRBD devices
10061 for disk in new_disks:
10062 for node in [pnode, snode]:
10063 f_create = node == pnode
10064 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10066 # at this point, the instance has been modified
10067 instance.disk_template = constants.DT_DRBD8
10068 instance.disks = new_disks
10069 self.cfg.Update(instance, feedback_fn)
10071 # disks are created, waiting for sync
10072 disk_abort = not _WaitForSync(self, instance)
10074 raise errors.OpExecError("There are some degraded disks for"
10075 " this instance, please cleanup manually")
10077 def _ConvertDrbdToPlain(self, feedback_fn):
10078 """Converts an instance from drbd to plain.
10081 instance = self.instance
10082 assert len(instance.secondary_nodes) == 1
10083 pnode = instance.primary_node
10084 snode = instance.secondary_nodes[0]
10085 feedback_fn("Converting template to plain")
10087 old_disks = instance.disks
10088 new_disks = [d.children[0] for d in old_disks]
10090 # copy over size and mode
10091 for parent, child in zip(old_disks, new_disks):
10092 child.size = parent.size
10093 child.mode = parent.mode
10095 # update instance structure
10096 instance.disks = new_disks
10097 instance.disk_template = constants.DT_PLAIN
10098 self.cfg.Update(instance, feedback_fn)
10100 feedback_fn("Removing volumes on the secondary node...")
10101 for disk in old_disks:
10102 self.cfg.SetDiskID(disk, snode)
10103 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10105 self.LogWarning("Could not remove block device %s on node %s,"
10106 " continuing anyway: %s", disk.iv_name, snode, msg)
10108 feedback_fn("Removing unneeded volumes on the primary node...")
10109 for idx, disk in enumerate(old_disks):
10110 meta = disk.children[1]
10111 self.cfg.SetDiskID(meta, pnode)
10112 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10114 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10115 " continuing anyway: %s", idx, pnode, msg)
10117 def Exec(self, feedback_fn):
10118 """Modifies an instance.
10120 All parameters take effect only at the next restart of the instance.
10123 # Process here the warnings from CheckPrereq, as we don't have a
10124 # feedback_fn there.
10125 for warn in self.warn:
10126 feedback_fn("WARNING: %s" % warn)
10129 instance = self.instance
10131 for disk_op, disk_dict in self.op.disks:
10132 if disk_op == constants.DDM_REMOVE:
10133 # remove the last disk
10134 device = instance.disks.pop()
10135 device_idx = len(instance.disks)
10136 for node, disk in device.ComputeNodeTree(instance.primary_node):
10137 self.cfg.SetDiskID(disk, node)
10138 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10140 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10141 " continuing anyway", device_idx, node, msg)
10142 result.append(("disk/%d" % device_idx, "remove"))
10143 elif disk_op == constants.DDM_ADD:
10145 if instance.disk_template in (constants.DT_FILE,
10146 constants.DT_SHARED_FILE):
10147 file_driver, file_path = instance.disks[0].logical_id
10148 file_path = os.path.dirname(file_path)
10150 file_driver = file_path = None
10151 disk_idx_base = len(instance.disks)
10152 new_disk = _GenerateDiskTemplate(self,
10153 instance.disk_template,
10154 instance.name, instance.primary_node,
10155 instance.secondary_nodes,
10159 disk_idx_base, feedback_fn)[0]
10160 instance.disks.append(new_disk)
10161 info = _GetInstanceInfoText(instance)
10163 logging.info("Creating volume %s for instance %s",
10164 new_disk.iv_name, instance.name)
10165 # Note: this needs to be kept in sync with _CreateDisks
10167 for node in instance.all_nodes:
10168 f_create = node == instance.primary_node
10170 _CreateBlockDev(self, node, instance, new_disk,
10171 f_create, info, f_create)
10172 except errors.OpExecError, err:
10173 self.LogWarning("Failed to create volume %s (%s) on"
10175 new_disk.iv_name, new_disk, node, err)
10176 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10177 (new_disk.size, new_disk.mode)))
10179 # change a given disk
10180 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10181 result.append(("disk.mode/%d" % disk_op,
10182 disk_dict[constants.IDISK_MODE]))
10184 if self.op.disk_template:
10185 r_shut = _ShutdownInstanceDisks(self, instance)
10187 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10188 " proceed with disk template conversion")
10189 mode = (instance.disk_template, self.op.disk_template)
10191 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10193 self.cfg.ReleaseDRBDMinors(instance.name)
10195 result.append(("disk_template", self.op.disk_template))
10198 for nic_op, nic_dict in self.op.nics:
10199 if nic_op == constants.DDM_REMOVE:
10200 # remove the last nic
10201 del instance.nics[-1]
10202 result.append(("nic.%d" % len(instance.nics), "remove"))
10203 elif nic_op == constants.DDM_ADD:
10204 # mac and bridge should be set, by now
10205 mac = nic_dict[constants.INIC_MAC]
10206 ip = nic_dict.get(constants.INIC_IP, None)
10207 nicparams = self.nic_pinst[constants.DDM_ADD]
10208 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10209 instance.nics.append(new_nic)
10210 result.append(("nic.%d" % (len(instance.nics) - 1),
10211 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10212 (new_nic.mac, new_nic.ip,
10213 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10214 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10217 for key in (constants.INIC_MAC, constants.INIC_IP):
10218 if key in nic_dict:
10219 setattr(instance.nics[nic_op], key, nic_dict[key])
10220 if nic_op in self.nic_pinst:
10221 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10222 for key, val in nic_dict.iteritems():
10223 result.append(("nic.%s/%d" % (key, nic_op), val))
10226 if self.op.hvparams:
10227 instance.hvparams = self.hv_inst
10228 for key, val in self.op.hvparams.iteritems():
10229 result.append(("hv/%s" % key, val))
10232 if self.op.beparams:
10233 instance.beparams = self.be_inst
10234 for key, val in self.op.beparams.iteritems():
10235 result.append(("be/%s" % key, val))
10238 if self.op.os_name:
10239 instance.os = self.op.os_name
10242 if self.op.osparams:
10243 instance.osparams = self.os_inst
10244 for key, val in self.op.osparams.iteritems():
10245 result.append(("os/%s" % key, val))
10247 self.cfg.Update(instance, feedback_fn)
10251 _DISK_CONVERSIONS = {
10252 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10253 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10257 class LUBackupQuery(NoHooksLU):
10258 """Query the exports list
10263 def ExpandNames(self):
10264 self.needed_locks = {}
10265 self.share_locks[locking.LEVEL_NODE] = 1
10266 if not self.op.nodes:
10267 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10269 self.needed_locks[locking.LEVEL_NODE] = \
10270 _GetWantedNodes(self, self.op.nodes)
10272 def Exec(self, feedback_fn):
10273 """Compute the list of all the exported system images.
10276 @return: a dictionary with the structure node->(export-list)
10277 where export-list is a list of the instances exported on
10281 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10282 rpcresult = self.rpc.call_export_list(self.nodes)
10284 for node in rpcresult:
10285 if rpcresult[node].fail_msg:
10286 result[node] = False
10288 result[node] = rpcresult[node].payload
10293 class LUBackupPrepare(NoHooksLU):
10294 """Prepares an instance for an export and returns useful information.
10299 def ExpandNames(self):
10300 self._ExpandAndLockInstance()
10302 def CheckPrereq(self):
10303 """Check prerequisites.
10306 instance_name = self.op.instance_name
10308 self.instance = self.cfg.GetInstanceInfo(instance_name)
10309 assert self.instance is not None, \
10310 "Cannot retrieve locked instance %s" % self.op.instance_name
10311 _CheckNodeOnline(self, self.instance.primary_node)
10313 self._cds = _GetClusterDomainSecret()
10315 def Exec(self, feedback_fn):
10316 """Prepares an instance for an export.
10319 instance = self.instance
10321 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10322 salt = utils.GenerateSecret(8)
10324 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10325 result = self.rpc.call_x509_cert_create(instance.primary_node,
10326 constants.RIE_CERT_VALIDITY)
10327 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10329 (name, cert_pem) = result.payload
10331 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10335 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10336 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10338 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10344 class LUBackupExport(LogicalUnit):
10345 """Export an instance to an image in the cluster.
10348 HPATH = "instance-export"
10349 HTYPE = constants.HTYPE_INSTANCE
10352 def CheckArguments(self):
10353 """Check the arguments.
10356 self.x509_key_name = self.op.x509_key_name
10357 self.dest_x509_ca_pem = self.op.destination_x509_ca
10359 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10360 if not self.x509_key_name:
10361 raise errors.OpPrereqError("Missing X509 key name for encryption",
10362 errors.ECODE_INVAL)
10364 if not self.dest_x509_ca_pem:
10365 raise errors.OpPrereqError("Missing destination X509 CA",
10366 errors.ECODE_INVAL)
10368 def ExpandNames(self):
10369 self._ExpandAndLockInstance()
10371 # Lock all nodes for local exports
10372 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10373 # FIXME: lock only instance primary and destination node
10375 # Sad but true, for now we have do lock all nodes, as we don't know where
10376 # the previous export might be, and in this LU we search for it and
10377 # remove it from its current node. In the future we could fix this by:
10378 # - making a tasklet to search (share-lock all), then create the
10379 # new one, then one to remove, after
10380 # - removing the removal operation altogether
10381 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10383 def DeclareLocks(self, level):
10384 """Last minute lock declaration."""
10385 # All nodes are locked anyway, so nothing to do here.
10387 def BuildHooksEnv(self):
10388 """Build hooks env.
10390 This will run on the master, primary node and target node.
10394 "EXPORT_MODE": self.op.mode,
10395 "EXPORT_NODE": self.op.target_node,
10396 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10397 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10398 # TODO: Generic function for boolean env variables
10399 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10402 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10406 def BuildHooksNodes(self):
10407 """Build hooks nodes.
10410 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10412 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10413 nl.append(self.op.target_node)
10417 def CheckPrereq(self):
10418 """Check prerequisites.
10420 This checks that the instance and node names are valid.
10423 instance_name = self.op.instance_name
10425 self.instance = self.cfg.GetInstanceInfo(instance_name)
10426 assert self.instance is not None, \
10427 "Cannot retrieve locked instance %s" % self.op.instance_name
10428 _CheckNodeOnline(self, self.instance.primary_node)
10430 if (self.op.remove_instance and self.instance.admin_up and
10431 not self.op.shutdown):
10432 raise errors.OpPrereqError("Can not remove instance without shutting it"
10435 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10436 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10437 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10438 assert self.dst_node is not None
10440 _CheckNodeOnline(self, self.dst_node.name)
10441 _CheckNodeNotDrained(self, self.dst_node.name)
10444 self.dest_disk_info = None
10445 self.dest_x509_ca = None
10447 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10448 self.dst_node = None
10450 if len(self.op.target_node) != len(self.instance.disks):
10451 raise errors.OpPrereqError(("Received destination information for %s"
10452 " disks, but instance %s has %s disks") %
10453 (len(self.op.target_node), instance_name,
10454 len(self.instance.disks)),
10455 errors.ECODE_INVAL)
10457 cds = _GetClusterDomainSecret()
10459 # Check X509 key name
10461 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10462 except (TypeError, ValueError), err:
10463 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10465 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10466 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10467 errors.ECODE_INVAL)
10469 # Load and verify CA
10471 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10472 except OpenSSL.crypto.Error, err:
10473 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10474 (err, ), errors.ECODE_INVAL)
10476 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10477 if errcode is not None:
10478 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10479 (msg, ), errors.ECODE_INVAL)
10481 self.dest_x509_ca = cert
10483 # Verify target information
10485 for idx, disk_data in enumerate(self.op.target_node):
10487 (host, port, magic) = \
10488 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10489 except errors.GenericError, err:
10490 raise errors.OpPrereqError("Target info for disk %s: %s" %
10491 (idx, err), errors.ECODE_INVAL)
10493 disk_info.append((host, port, magic))
10495 assert len(disk_info) == len(self.op.target_node)
10496 self.dest_disk_info = disk_info
10499 raise errors.ProgrammerError("Unhandled export mode %r" %
10502 # instance disk type verification
10503 # TODO: Implement export support for file-based disks
10504 for disk in self.instance.disks:
10505 if disk.dev_type == constants.LD_FILE:
10506 raise errors.OpPrereqError("Export not supported for instances with"
10507 " file-based disks", errors.ECODE_INVAL)
10509 def _CleanupExports(self, feedback_fn):
10510 """Removes exports of current instance from all other nodes.
10512 If an instance in a cluster with nodes A..D was exported to node C, its
10513 exports will be removed from the nodes A, B and D.
10516 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10518 nodelist = self.cfg.GetNodeList()
10519 nodelist.remove(self.dst_node.name)
10521 # on one-node clusters nodelist will be empty after the removal
10522 # if we proceed the backup would be removed because OpBackupQuery
10523 # substitutes an empty list with the full cluster node list.
10524 iname = self.instance.name
10526 feedback_fn("Removing old exports for instance %s" % iname)
10527 exportlist = self.rpc.call_export_list(nodelist)
10528 for node in exportlist:
10529 if exportlist[node].fail_msg:
10531 if iname in exportlist[node].payload:
10532 msg = self.rpc.call_export_remove(node, iname).fail_msg
10534 self.LogWarning("Could not remove older export for instance %s"
10535 " on node %s: %s", iname, node, msg)
10537 def Exec(self, feedback_fn):
10538 """Export an instance to an image in the cluster.
10541 assert self.op.mode in constants.EXPORT_MODES
10543 instance = self.instance
10544 src_node = instance.primary_node
10546 if self.op.shutdown:
10547 # shutdown the instance, but not the disks
10548 feedback_fn("Shutting down instance %s" % instance.name)
10549 result = self.rpc.call_instance_shutdown(src_node, instance,
10550 self.op.shutdown_timeout)
10551 # TODO: Maybe ignore failures if ignore_remove_failures is set
10552 result.Raise("Could not shutdown instance %s on"
10553 " node %s" % (instance.name, src_node))
10555 # set the disks ID correctly since call_instance_start needs the
10556 # correct drbd minor to create the symlinks
10557 for disk in instance.disks:
10558 self.cfg.SetDiskID(disk, src_node)
10560 activate_disks = (not instance.admin_up)
10563 # Activate the instance disks if we'exporting a stopped instance
10564 feedback_fn("Activating disks for %s" % instance.name)
10565 _StartInstanceDisks(self, instance, None)
10568 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10571 helper.CreateSnapshots()
10573 if (self.op.shutdown and instance.admin_up and
10574 not self.op.remove_instance):
10575 assert not activate_disks
10576 feedback_fn("Starting instance %s" % instance.name)
10577 result = self.rpc.call_instance_start(src_node, instance, None, None)
10578 msg = result.fail_msg
10580 feedback_fn("Failed to start instance: %s" % msg)
10581 _ShutdownInstanceDisks(self, instance)
10582 raise errors.OpExecError("Could not start instance: %s" % msg)
10584 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10585 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10586 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10587 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10588 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10590 (key_name, _, _) = self.x509_key_name
10593 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10596 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10597 key_name, dest_ca_pem,
10602 # Check for backwards compatibility
10603 assert len(dresults) == len(instance.disks)
10604 assert compat.all(isinstance(i, bool) for i in dresults), \
10605 "Not all results are boolean: %r" % dresults
10609 feedback_fn("Deactivating disks for %s" % instance.name)
10610 _ShutdownInstanceDisks(self, instance)
10612 if not (compat.all(dresults) and fin_resu):
10615 failures.append("export finalization")
10616 if not compat.all(dresults):
10617 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10619 failures.append("disk export: disk(s) %s" % fdsk)
10621 raise errors.OpExecError("Export failed, errors in %s" %
10622 utils.CommaJoin(failures))
10624 # At this point, the export was successful, we can cleanup/finish
10626 # Remove instance if requested
10627 if self.op.remove_instance:
10628 feedback_fn("Removing instance %s" % instance.name)
10629 _RemoveInstance(self, feedback_fn, instance,
10630 self.op.ignore_remove_failures)
10632 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10633 self._CleanupExports(feedback_fn)
10635 return fin_resu, dresults
10638 class LUBackupRemove(NoHooksLU):
10639 """Remove exports related to the named instance.
10644 def ExpandNames(self):
10645 self.needed_locks = {}
10646 # We need all nodes to be locked in order for RemoveExport to work, but we
10647 # don't need to lock the instance itself, as nothing will happen to it (and
10648 # we can remove exports also for a removed instance)
10649 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10651 def Exec(self, feedback_fn):
10652 """Remove any export.
10655 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10656 # If the instance was not found we'll try with the name that was passed in.
10657 # This will only work if it was an FQDN, though.
10659 if not instance_name:
10661 instance_name = self.op.instance_name
10663 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10664 exportlist = self.rpc.call_export_list(locked_nodes)
10666 for node in exportlist:
10667 msg = exportlist[node].fail_msg
10669 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10671 if instance_name in exportlist[node].payload:
10673 result = self.rpc.call_export_remove(node, instance_name)
10674 msg = result.fail_msg
10676 logging.error("Could not remove export for instance %s"
10677 " on node %s: %s", instance_name, node, msg)
10679 if fqdn_warn and not found:
10680 feedback_fn("Export not found. If trying to remove an export belonging"
10681 " to a deleted instance please use its Fully Qualified"
10685 class LUGroupAdd(LogicalUnit):
10686 """Logical unit for creating node groups.
10689 HPATH = "group-add"
10690 HTYPE = constants.HTYPE_GROUP
10693 def ExpandNames(self):
10694 # We need the new group's UUID here so that we can create and acquire the
10695 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10696 # that it should not check whether the UUID exists in the configuration.
10697 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10698 self.needed_locks = {}
10699 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10701 def CheckPrereq(self):
10702 """Check prerequisites.
10704 This checks that the given group name is not an existing node group
10709 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10710 except errors.OpPrereqError:
10713 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10714 " node group (UUID: %s)" %
10715 (self.op.group_name, existing_uuid),
10716 errors.ECODE_EXISTS)
10718 if self.op.ndparams:
10719 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10721 def BuildHooksEnv(self):
10722 """Build hooks env.
10726 "GROUP_NAME": self.op.group_name,
10729 def BuildHooksNodes(self):
10730 """Build hooks nodes.
10733 mn = self.cfg.GetMasterNode()
10734 return ([mn], [mn])
10736 def Exec(self, feedback_fn):
10737 """Add the node group to the cluster.
10740 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10741 uuid=self.group_uuid,
10742 alloc_policy=self.op.alloc_policy,
10743 ndparams=self.op.ndparams)
10745 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10746 del self.remove_locks[locking.LEVEL_NODEGROUP]
10749 class LUGroupAssignNodes(NoHooksLU):
10750 """Logical unit for assigning nodes to groups.
10755 def ExpandNames(self):
10756 # These raise errors.OpPrereqError on their own:
10757 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10758 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10760 # We want to lock all the affected nodes and groups. We have readily
10761 # available the list of nodes, and the *destination* group. To gather the
10762 # list of "source" groups, we need to fetch node information.
10763 self.node_data = self.cfg.GetAllNodesInfo()
10764 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10765 affected_groups.add(self.group_uuid)
10767 self.needed_locks = {
10768 locking.LEVEL_NODEGROUP: list(affected_groups),
10769 locking.LEVEL_NODE: self.op.nodes,
10772 def CheckPrereq(self):
10773 """Check prerequisites.
10776 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10777 instance_data = self.cfg.GetAllInstancesInfo()
10779 if self.group is None:
10780 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10781 (self.op.group_name, self.group_uuid))
10783 (new_splits, previous_splits) = \
10784 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10785 for node in self.op.nodes],
10786 self.node_data, instance_data)
10789 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10791 if not self.op.force:
10792 raise errors.OpExecError("The following instances get split by this"
10793 " change and --force was not given: %s" %
10796 self.LogWarning("This operation will split the following instances: %s",
10799 if previous_splits:
10800 self.LogWarning("In addition, these already-split instances continue"
10801 " to be spit across groups: %s",
10802 utils.CommaJoin(utils.NiceSort(previous_splits)))
10804 def Exec(self, feedback_fn):
10805 """Assign nodes to a new group.
10808 for node in self.op.nodes:
10809 self.node_data[node].group = self.group_uuid
10811 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10814 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10815 """Check for split instances after a node assignment.
10817 This method considers a series of node assignments as an atomic operation,
10818 and returns information about split instances after applying the set of
10821 In particular, it returns information about newly split instances, and
10822 instances that were already split, and remain so after the change.
10824 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10827 @type changes: list of (node_name, new_group_uuid) pairs.
10828 @param changes: list of node assignments to consider.
10829 @param node_data: a dict with data for all nodes
10830 @param instance_data: a dict with all instances to consider
10831 @rtype: a two-tuple
10832 @return: a list of instances that were previously okay and result split as a
10833 consequence of this change, and a list of instances that were previously
10834 split and this change does not fix.
10837 changed_nodes = dict((node, group) for node, group in changes
10838 if node_data[node].group != group)
10840 all_split_instances = set()
10841 previously_split_instances = set()
10843 def InstanceNodes(instance):
10844 return [instance.primary_node] + list(instance.secondary_nodes)
10846 for inst in instance_data.values():
10847 if inst.disk_template not in constants.DTS_INT_MIRROR:
10850 instance_nodes = InstanceNodes(inst)
10852 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10853 previously_split_instances.add(inst.name)
10855 if len(set(changed_nodes.get(node, node_data[node].group)
10856 for node in instance_nodes)) > 1:
10857 all_split_instances.add(inst.name)
10859 return (list(all_split_instances - previously_split_instances),
10860 list(previously_split_instances & all_split_instances))
10863 class _GroupQuery(_QueryBase):
10864 FIELDS = query.GROUP_FIELDS
10866 def ExpandNames(self, lu):
10867 lu.needed_locks = {}
10869 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10870 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10873 self.wanted = [name_to_uuid[name]
10874 for name in utils.NiceSort(name_to_uuid.keys())]
10876 # Accept names to be either names or UUIDs.
10879 all_uuid = frozenset(self._all_groups.keys())
10881 for name in self.names:
10882 if name in all_uuid:
10883 self.wanted.append(name)
10884 elif name in name_to_uuid:
10885 self.wanted.append(name_to_uuid[name])
10887 missing.append(name)
10890 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10891 errors.ECODE_NOENT)
10893 def DeclareLocks(self, lu, level):
10896 def _GetQueryData(self, lu):
10897 """Computes the list of node groups and their attributes.
10900 do_nodes = query.GQ_NODE in self.requested_data
10901 do_instances = query.GQ_INST in self.requested_data
10903 group_to_nodes = None
10904 group_to_instances = None
10906 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10907 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10908 # latter GetAllInstancesInfo() is not enough, for we have to go through
10909 # instance->node. Hence, we will need to process nodes even if we only need
10910 # instance information.
10911 if do_nodes or do_instances:
10912 all_nodes = lu.cfg.GetAllNodesInfo()
10913 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10916 for node in all_nodes.values():
10917 if node.group in group_to_nodes:
10918 group_to_nodes[node.group].append(node.name)
10919 node_to_group[node.name] = node.group
10922 all_instances = lu.cfg.GetAllInstancesInfo()
10923 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10925 for instance in all_instances.values():
10926 node = instance.primary_node
10927 if node in node_to_group:
10928 group_to_instances[node_to_group[node]].append(instance.name)
10931 # Do not pass on node information if it was not requested.
10932 group_to_nodes = None
10934 return query.GroupQueryData([self._all_groups[uuid]
10935 for uuid in self.wanted],
10936 group_to_nodes, group_to_instances)
10939 class LUGroupQuery(NoHooksLU):
10940 """Logical unit for querying node groups.
10945 def CheckArguments(self):
10946 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10947 self.op.output_fields, False)
10949 def ExpandNames(self):
10950 self.gq.ExpandNames(self)
10952 def Exec(self, feedback_fn):
10953 return self.gq.OldStyleQuery(self)
10956 class LUGroupSetParams(LogicalUnit):
10957 """Modifies the parameters of a node group.
10960 HPATH = "group-modify"
10961 HTYPE = constants.HTYPE_GROUP
10964 def CheckArguments(self):
10967 self.op.alloc_policy,
10970 if all_changes.count(None) == len(all_changes):
10971 raise errors.OpPrereqError("Please pass at least one modification",
10972 errors.ECODE_INVAL)
10974 def ExpandNames(self):
10975 # This raises errors.OpPrereqError on its own:
10976 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10978 self.needed_locks = {
10979 locking.LEVEL_NODEGROUP: [self.group_uuid],
10982 def CheckPrereq(self):
10983 """Check prerequisites.
10986 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10988 if self.group is None:
10989 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10990 (self.op.group_name, self.group_uuid))
10992 if self.op.ndparams:
10993 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10994 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10995 self.new_ndparams = new_ndparams
10997 def BuildHooksEnv(self):
10998 """Build hooks env.
11002 "GROUP_NAME": self.op.group_name,
11003 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11006 def BuildHooksNodes(self):
11007 """Build hooks nodes.
11010 mn = self.cfg.GetMasterNode()
11011 return ([mn], [mn])
11013 def Exec(self, feedback_fn):
11014 """Modifies the node group.
11019 if self.op.ndparams:
11020 self.group.ndparams = self.new_ndparams
11021 result.append(("ndparams", str(self.group.ndparams)))
11023 if self.op.alloc_policy:
11024 self.group.alloc_policy = self.op.alloc_policy
11026 self.cfg.Update(self.group, feedback_fn)
11031 class LUGroupRemove(LogicalUnit):
11032 HPATH = "group-remove"
11033 HTYPE = constants.HTYPE_GROUP
11036 def ExpandNames(self):
11037 # This will raises errors.OpPrereqError on its own:
11038 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11039 self.needed_locks = {
11040 locking.LEVEL_NODEGROUP: [self.group_uuid],
11043 def CheckPrereq(self):
11044 """Check prerequisites.
11046 This checks that the given group name exists as a node group, that is
11047 empty (i.e., contains no nodes), and that is not the last group of the
11051 # Verify that the group is empty.
11052 group_nodes = [node.name
11053 for node in self.cfg.GetAllNodesInfo().values()
11054 if node.group == self.group_uuid]
11057 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11059 (self.op.group_name,
11060 utils.CommaJoin(utils.NiceSort(group_nodes))),
11061 errors.ECODE_STATE)
11063 # Verify the cluster would not be left group-less.
11064 if len(self.cfg.GetNodeGroupList()) == 1:
11065 raise errors.OpPrereqError("Group '%s' is the only group,"
11066 " cannot be removed" %
11067 self.op.group_name,
11068 errors.ECODE_STATE)
11070 def BuildHooksEnv(self):
11071 """Build hooks env.
11075 "GROUP_NAME": self.op.group_name,
11078 def BuildHooksNodes(self):
11079 """Build hooks nodes.
11082 mn = self.cfg.GetMasterNode()
11083 return ([mn], [mn])
11085 def Exec(self, feedback_fn):
11086 """Remove the node group.
11090 self.cfg.RemoveNodeGroup(self.group_uuid)
11091 except errors.ConfigurationError:
11092 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11093 (self.op.group_name, self.group_uuid))
11095 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11098 class LUGroupRename(LogicalUnit):
11099 HPATH = "group-rename"
11100 HTYPE = constants.HTYPE_GROUP
11103 def ExpandNames(self):
11104 # This raises errors.OpPrereqError on its own:
11105 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11107 self.needed_locks = {
11108 locking.LEVEL_NODEGROUP: [self.group_uuid],
11111 def CheckPrereq(self):
11112 """Check prerequisites.
11114 Ensures requested new name is not yet used.
11118 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11119 except errors.OpPrereqError:
11122 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11123 " node group (UUID: %s)" %
11124 (self.op.new_name, new_name_uuid),
11125 errors.ECODE_EXISTS)
11127 def BuildHooksEnv(self):
11128 """Build hooks env.
11132 "OLD_NAME": self.op.group_name,
11133 "NEW_NAME": self.op.new_name,
11136 def BuildHooksNodes(self):
11137 """Build hooks nodes.
11140 mn = self.cfg.GetMasterNode()
11142 all_nodes = self.cfg.GetAllNodesInfo()
11143 all_nodes.pop(mn, None)
11146 run_nodes.extend(node.name for node in all_nodes.values()
11147 if node.group == self.group_uuid)
11149 return (run_nodes, run_nodes)
11151 def Exec(self, feedback_fn):
11152 """Rename the node group.
11155 group = self.cfg.GetNodeGroup(self.group_uuid)
11158 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11159 (self.op.group_name, self.group_uuid))
11161 group.name = self.op.new_name
11162 self.cfg.Update(group, feedback_fn)
11164 return self.op.new_name
11167 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11168 """Generic tags LU.
11170 This is an abstract class which is the parent of all the other tags LUs.
11173 def ExpandNames(self):
11174 self.group_uuid = None
11175 self.needed_locks = {}
11176 if self.op.kind == constants.TAG_NODE:
11177 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11178 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11179 elif self.op.kind == constants.TAG_INSTANCE:
11180 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11181 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11182 elif self.op.kind == constants.TAG_NODEGROUP:
11183 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11185 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11186 # not possible to acquire the BGL based on opcode parameters)
11188 def CheckPrereq(self):
11189 """Check prerequisites.
11192 if self.op.kind == constants.TAG_CLUSTER:
11193 self.target = self.cfg.GetClusterInfo()
11194 elif self.op.kind == constants.TAG_NODE:
11195 self.target = self.cfg.GetNodeInfo(self.op.name)
11196 elif self.op.kind == constants.TAG_INSTANCE:
11197 self.target = self.cfg.GetInstanceInfo(self.op.name)
11198 elif self.op.kind == constants.TAG_NODEGROUP:
11199 self.target = self.cfg.GetNodeGroup(self.group_uuid)
11201 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11202 str(self.op.kind), errors.ECODE_INVAL)
11205 class LUTagsGet(TagsLU):
11206 """Returns the tags of a given object.
11211 def ExpandNames(self):
11212 TagsLU.ExpandNames(self)
11214 # Share locks as this is only a read operation
11215 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11217 def Exec(self, feedback_fn):
11218 """Returns the tag list.
11221 return list(self.target.GetTags())
11224 class LUTagsSearch(NoHooksLU):
11225 """Searches the tags for a given pattern.
11230 def ExpandNames(self):
11231 self.needed_locks = {}
11233 def CheckPrereq(self):
11234 """Check prerequisites.
11236 This checks the pattern passed for validity by compiling it.
11240 self.re = re.compile(self.op.pattern)
11241 except re.error, err:
11242 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11243 (self.op.pattern, err), errors.ECODE_INVAL)
11245 def Exec(self, feedback_fn):
11246 """Returns the tag list.
11250 tgts = [("/cluster", cfg.GetClusterInfo())]
11251 ilist = cfg.GetAllInstancesInfo().values()
11252 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11253 nlist = cfg.GetAllNodesInfo().values()
11254 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11255 tgts.extend(("/nodegroup/%s" % n.name, n)
11256 for n in cfg.GetAllNodeGroupsInfo().values())
11258 for path, target in tgts:
11259 for tag in target.GetTags():
11260 if self.re.search(tag):
11261 results.append((path, tag))
11265 class LUTagsSet(TagsLU):
11266 """Sets a tag on a given object.
11271 def CheckPrereq(self):
11272 """Check prerequisites.
11274 This checks the type and length of the tag name and value.
11277 TagsLU.CheckPrereq(self)
11278 for tag in self.op.tags:
11279 objects.TaggableObject.ValidateTag(tag)
11281 def Exec(self, feedback_fn):
11286 for tag in self.op.tags:
11287 self.target.AddTag(tag)
11288 except errors.TagError, err:
11289 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11290 self.cfg.Update(self.target, feedback_fn)
11293 class LUTagsDel(TagsLU):
11294 """Delete a list of tags from a given object.
11299 def CheckPrereq(self):
11300 """Check prerequisites.
11302 This checks that we have the given tag.
11305 TagsLU.CheckPrereq(self)
11306 for tag in self.op.tags:
11307 objects.TaggableObject.ValidateTag(tag)
11308 del_tags = frozenset(self.op.tags)
11309 cur_tags = self.target.GetTags()
11311 diff_tags = del_tags - cur_tags
11313 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11314 raise errors.OpPrereqError("Tag(s) %s not found" %
11315 (utils.CommaJoin(diff_names), ),
11316 errors.ECODE_NOENT)
11318 def Exec(self, feedback_fn):
11319 """Remove the tag from the object.
11322 for tag in self.op.tags:
11323 self.target.RemoveTag(tag)
11324 self.cfg.Update(self.target, feedback_fn)
11327 class LUTestDelay(NoHooksLU):
11328 """Sleep for a specified amount of time.
11330 This LU sleeps on the master and/or nodes for a specified amount of
11336 def ExpandNames(self):
11337 """Expand names and set required locks.
11339 This expands the node list, if any.
11342 self.needed_locks = {}
11343 if self.op.on_nodes:
11344 # _GetWantedNodes can be used here, but is not always appropriate to use
11345 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11346 # more information.
11347 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11348 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11350 def _TestDelay(self):
11351 """Do the actual sleep.
11354 if self.op.on_master:
11355 if not utils.TestDelay(self.op.duration):
11356 raise errors.OpExecError("Error during master delay test")
11357 if self.op.on_nodes:
11358 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11359 for node, node_result in result.items():
11360 node_result.Raise("Failure during rpc call to node %s" % node)
11362 def Exec(self, feedback_fn):
11363 """Execute the test delay opcode, with the wanted repetitions.
11366 if self.op.repeat == 0:
11369 top_value = self.op.repeat - 1
11370 for i in range(self.op.repeat):
11371 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11375 class LUTestJqueue(NoHooksLU):
11376 """Utility LU to test some aspects of the job queue.
11381 # Must be lower than default timeout for WaitForJobChange to see whether it
11382 # notices changed jobs
11383 _CLIENT_CONNECT_TIMEOUT = 20.0
11384 _CLIENT_CONFIRM_TIMEOUT = 60.0
11387 def _NotifyUsingSocket(cls, cb, errcls):
11388 """Opens a Unix socket and waits for another program to connect.
11391 @param cb: Callback to send socket name to client
11392 @type errcls: class
11393 @param errcls: Exception class to use for errors
11396 # Using a temporary directory as there's no easy way to create temporary
11397 # sockets without writing a custom loop around tempfile.mktemp and
11399 tmpdir = tempfile.mkdtemp()
11401 tmpsock = utils.PathJoin(tmpdir, "sock")
11403 logging.debug("Creating temporary socket at %s", tmpsock)
11404 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11409 # Send details to client
11412 # Wait for client to connect before continuing
11413 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11415 (conn, _) = sock.accept()
11416 except socket.error, err:
11417 raise errcls("Client didn't connect in time (%s)" % err)
11421 # Remove as soon as client is connected
11422 shutil.rmtree(tmpdir)
11424 # Wait for client to close
11427 # pylint: disable-msg=E1101
11428 # Instance of '_socketobject' has no ... member
11429 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11431 except socket.error, err:
11432 raise errcls("Client failed to confirm notification (%s)" % err)
11436 def _SendNotification(self, test, arg, sockname):
11437 """Sends a notification to the client.
11440 @param test: Test name
11441 @param arg: Test argument (depends on test)
11442 @type sockname: string
11443 @param sockname: Socket path
11446 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11448 def _Notify(self, prereq, test, arg):
11449 """Notifies the client of a test.
11452 @param prereq: Whether this is a prereq-phase test
11454 @param test: Test name
11455 @param arg: Test argument (depends on test)
11459 errcls = errors.OpPrereqError
11461 errcls = errors.OpExecError
11463 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11467 def CheckArguments(self):
11468 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11469 self.expandnames_calls = 0
11471 def ExpandNames(self):
11472 checkargs_calls = getattr(self, "checkargs_calls", 0)
11473 if checkargs_calls < 1:
11474 raise errors.ProgrammerError("CheckArguments was not called")
11476 self.expandnames_calls += 1
11478 if self.op.notify_waitlock:
11479 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11481 self.LogInfo("Expanding names")
11483 # Get lock on master node (just to get a lock, not for a particular reason)
11484 self.needed_locks = {
11485 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11488 def Exec(self, feedback_fn):
11489 if self.expandnames_calls < 1:
11490 raise errors.ProgrammerError("ExpandNames was not called")
11492 if self.op.notify_exec:
11493 self._Notify(False, constants.JQT_EXEC, None)
11495 self.LogInfo("Executing")
11497 if self.op.log_messages:
11498 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11499 for idx, msg in enumerate(self.op.log_messages):
11500 self.LogInfo("Sending log message %s", idx + 1)
11501 feedback_fn(constants.JQT_MSGPREFIX + msg)
11502 # Report how many test messages have been sent
11503 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11506 raise errors.OpExecError("Opcode failure was requested")
11511 class IAllocator(object):
11512 """IAllocator framework.
11514 An IAllocator instance has three sets of attributes:
11515 - cfg that is needed to query the cluster
11516 - input data (all members of the _KEYS class attribute are required)
11517 - four buffer attributes (in|out_data|text), that represent the
11518 input (to the external script) in text and data structure format,
11519 and the output from it, again in two formats
11520 - the result variables from the script (success, info, nodes) for
11524 # pylint: disable-msg=R0902
11525 # lots of instance attributes
11527 "name", "mem_size", "disks", "disk_template",
11528 "os", "tags", "nics", "vcpus", "hypervisor",
11531 "name", "relocate_from",
11537 def __init__(self, cfg, rpc, mode, **kwargs):
11540 # init buffer variables
11541 self.in_text = self.out_text = self.in_data = self.out_data = None
11542 # init all input fields so that pylint is happy
11544 self.mem_size = self.disks = self.disk_template = None
11545 self.os = self.tags = self.nics = self.vcpus = None
11546 self.hypervisor = None
11547 self.relocate_from = None
11549 self.evac_nodes = None
11551 self.required_nodes = None
11552 # init result fields
11553 self.success = self.info = self.result = None
11554 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11555 keyset = self._ALLO_KEYS
11556 fn = self._AddNewInstance
11557 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11558 keyset = self._RELO_KEYS
11559 fn = self._AddRelocateInstance
11560 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11561 keyset = self._EVAC_KEYS
11562 fn = self._AddEvacuateNodes
11564 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11565 " IAllocator" % self.mode)
11567 if key not in keyset:
11568 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11569 " IAllocator" % key)
11570 setattr(self, key, kwargs[key])
11573 if key not in kwargs:
11574 raise errors.ProgrammerError("Missing input parameter '%s' to"
11575 " IAllocator" % key)
11576 self._BuildInputData(fn)
11578 def _ComputeClusterData(self):
11579 """Compute the generic allocator input data.
11581 This is the data that is independent of the actual operation.
11585 cluster_info = cfg.GetClusterInfo()
11588 "version": constants.IALLOCATOR_VERSION,
11589 "cluster_name": cfg.GetClusterName(),
11590 "cluster_tags": list(cluster_info.GetTags()),
11591 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11592 # we don't have job IDs
11594 ninfo = cfg.GetAllNodesInfo()
11595 iinfo = cfg.GetAllInstancesInfo().values()
11596 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11599 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11601 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11602 hypervisor_name = self.hypervisor
11603 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11604 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11605 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11606 hypervisor_name = cluster_info.enabled_hypervisors[0]
11608 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11611 self.rpc.call_all_instances_info(node_list,
11612 cluster_info.enabled_hypervisors)
11614 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11616 config_ndata = self._ComputeBasicNodeData(ninfo)
11617 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11618 i_list, config_ndata)
11619 assert len(data["nodes"]) == len(ninfo), \
11620 "Incomplete node data computed"
11622 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11624 self.in_data = data
11627 def _ComputeNodeGroupData(cfg):
11628 """Compute node groups data.
11632 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11634 "name": gdata.name,
11635 "alloc_policy": gdata.alloc_policy,
11640 def _ComputeBasicNodeData(node_cfg):
11641 """Compute global node data.
11644 @returns: a dict of name: (node dict, node config)
11648 for ninfo in node_cfg.values():
11649 # fill in static (config-based) values
11651 "tags": list(ninfo.GetTags()),
11652 "primary_ip": ninfo.primary_ip,
11653 "secondary_ip": ninfo.secondary_ip,
11654 "offline": ninfo.offline,
11655 "drained": ninfo.drained,
11656 "master_candidate": ninfo.master_candidate,
11657 "group": ninfo.group,
11658 "master_capable": ninfo.master_capable,
11659 "vm_capable": ninfo.vm_capable,
11662 node_results[ninfo.name] = pnr
11664 return node_results
11667 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11669 """Compute global node data.
11671 @param node_results: the basic node structures as filled from the config
11674 # make a copy of the current dict
11675 node_results = dict(node_results)
11676 for nname, nresult in node_data.items():
11677 assert nname in node_results, "Missing basic data for node %s" % nname
11678 ninfo = node_cfg[nname]
11680 if not (ninfo.offline or ninfo.drained):
11681 nresult.Raise("Can't get data for node %s" % nname)
11682 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11684 remote_info = nresult.payload
11686 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11687 'vg_size', 'vg_free', 'cpu_total']:
11688 if attr not in remote_info:
11689 raise errors.OpExecError("Node '%s' didn't return attribute"
11690 " '%s'" % (nname, attr))
11691 if not isinstance(remote_info[attr], int):
11692 raise errors.OpExecError("Node '%s' returned invalid value"
11694 (nname, attr, remote_info[attr]))
11695 # compute memory used by primary instances
11696 i_p_mem = i_p_up_mem = 0
11697 for iinfo, beinfo in i_list:
11698 if iinfo.primary_node == nname:
11699 i_p_mem += beinfo[constants.BE_MEMORY]
11700 if iinfo.name not in node_iinfo[nname].payload:
11703 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11704 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11705 remote_info['memory_free'] -= max(0, i_mem_diff)
11708 i_p_up_mem += beinfo[constants.BE_MEMORY]
11710 # compute memory used by instances
11712 "total_memory": remote_info['memory_total'],
11713 "reserved_memory": remote_info['memory_dom0'],
11714 "free_memory": remote_info['memory_free'],
11715 "total_disk": remote_info['vg_size'],
11716 "free_disk": remote_info['vg_free'],
11717 "total_cpus": remote_info['cpu_total'],
11718 "i_pri_memory": i_p_mem,
11719 "i_pri_up_memory": i_p_up_mem,
11721 pnr_dyn.update(node_results[nname])
11722 node_results[nname] = pnr_dyn
11724 return node_results
11727 def _ComputeInstanceData(cluster_info, i_list):
11728 """Compute global instance data.
11732 for iinfo, beinfo in i_list:
11734 for nic in iinfo.nics:
11735 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11736 nic_dict = {"mac": nic.mac,
11738 "mode": filled_params[constants.NIC_MODE],
11739 "link": filled_params[constants.NIC_LINK],
11741 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11742 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11743 nic_data.append(nic_dict)
11745 "tags": list(iinfo.GetTags()),
11746 "admin_up": iinfo.admin_up,
11747 "vcpus": beinfo[constants.BE_VCPUS],
11748 "memory": beinfo[constants.BE_MEMORY],
11750 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11752 "disks": [{constants.IDISK_SIZE: dsk.size,
11753 constants.IDISK_MODE: dsk.mode}
11754 for dsk in iinfo.disks],
11755 "disk_template": iinfo.disk_template,
11756 "hypervisor": iinfo.hypervisor,
11758 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11760 instance_data[iinfo.name] = pir
11762 return instance_data
11764 def _AddNewInstance(self):
11765 """Add new instance data to allocator structure.
11767 This in combination with _AllocatorGetClusterData will create the
11768 correct structure needed as input for the allocator.
11770 The checks for the completeness of the opcode must have already been
11774 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11776 if self.disk_template in constants.DTS_INT_MIRROR:
11777 self.required_nodes = 2
11779 self.required_nodes = 1
11782 "disk_template": self.disk_template,
11785 "vcpus": self.vcpus,
11786 "memory": self.mem_size,
11787 "disks": self.disks,
11788 "disk_space_total": disk_space,
11790 "required_nodes": self.required_nodes,
11794 def _AddRelocateInstance(self):
11795 """Add relocate instance data to allocator structure.
11797 This in combination with _IAllocatorGetClusterData will create the
11798 correct structure needed as input for the allocator.
11800 The checks for the completeness of the opcode must have already been
11804 instance = self.cfg.GetInstanceInfo(self.name)
11805 if instance is None:
11806 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11807 " IAllocator" % self.name)
11809 if instance.disk_template not in constants.DTS_MIRRORED:
11810 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11811 errors.ECODE_INVAL)
11813 if instance.disk_template in constants.DTS_INT_MIRROR and \
11814 len(instance.secondary_nodes) != 1:
11815 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11816 errors.ECODE_STATE)
11818 self.required_nodes = 1
11819 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11820 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11824 "disk_space_total": disk_space,
11825 "required_nodes": self.required_nodes,
11826 "relocate_from": self.relocate_from,
11830 def _AddEvacuateNodes(self):
11831 """Add evacuate nodes data to allocator structure.
11835 "evac_nodes": self.evac_nodes
11839 def _BuildInputData(self, fn):
11840 """Build input data structures.
11843 self._ComputeClusterData()
11846 request["type"] = self.mode
11847 self.in_data["request"] = request
11849 self.in_text = serializer.Dump(self.in_data)
11851 def Run(self, name, validate=True, call_fn=None):
11852 """Run an instance allocator and return the results.
11855 if call_fn is None:
11856 call_fn = self.rpc.call_iallocator_runner
11858 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11859 result.Raise("Failure while running the iallocator script")
11861 self.out_text = result.payload
11863 self._ValidateResult()
11865 def _ValidateResult(self):
11866 """Process the allocator results.
11868 This will process and if successful save the result in
11869 self.out_data and the other parameters.
11873 rdict = serializer.Load(self.out_text)
11874 except Exception, err:
11875 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11877 if not isinstance(rdict, dict):
11878 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11880 # TODO: remove backwards compatiblity in later versions
11881 if "nodes" in rdict and "result" not in rdict:
11882 rdict["result"] = rdict["nodes"]
11885 for key in "success", "info", "result":
11886 if key not in rdict:
11887 raise errors.OpExecError("Can't parse iallocator results:"
11888 " missing key '%s'" % key)
11889 setattr(self, key, rdict[key])
11891 if not isinstance(rdict["result"], list):
11892 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11895 if self.mode == constants.IALLOCATOR_MODE_RELOC:
11896 assert self.relocate_from is not None
11897 assert self.required_nodes == 1
11899 node2group = dict((name, ndata["group"])
11900 for (name, ndata) in self.in_data["nodes"].items())
11902 fn = compat.partial(self._NodesToGroups, node2group,
11903 self.in_data["nodegroups"])
11905 request_groups = fn(self.relocate_from)
11906 result_groups = fn(rdict["result"])
11908 if result_groups != request_groups:
11909 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11910 " differ from original groups (%s)" %
11911 (utils.CommaJoin(result_groups),
11912 utils.CommaJoin(request_groups)))
11914 self.out_data = rdict
11917 def _NodesToGroups(node2group, groups, nodes):
11918 """Returns a list of unique group names for a list of nodes.
11920 @type node2group: dict
11921 @param node2group: Map from node name to group UUID
11923 @param groups: Group information
11925 @param nodes: Node names
11932 group_uuid = node2group[node]
11934 # Ignore unknown node
11938 group = groups[group_uuid]
11940 # Can't find group, let's use UUID
11941 group_name = group_uuid
11943 group_name = group["name"]
11945 result.add(group_name)
11947 return sorted(result)
11950 class LUTestAllocator(NoHooksLU):
11951 """Run allocator tests.
11953 This LU runs the allocator tests
11956 def CheckPrereq(self):
11957 """Check prerequisites.
11959 This checks the opcode parameters depending on the director and mode test.
11962 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11963 for attr in ["mem_size", "disks", "disk_template",
11964 "os", "tags", "nics", "vcpus"]:
11965 if not hasattr(self.op, attr):
11966 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11967 attr, errors.ECODE_INVAL)
11968 iname = self.cfg.ExpandInstanceName(self.op.name)
11969 if iname is not None:
11970 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11971 iname, errors.ECODE_EXISTS)
11972 if not isinstance(self.op.nics, list):
11973 raise errors.OpPrereqError("Invalid parameter 'nics'",
11974 errors.ECODE_INVAL)
11975 if not isinstance(self.op.disks, list):
11976 raise errors.OpPrereqError("Invalid parameter 'disks'",
11977 errors.ECODE_INVAL)
11978 for row in self.op.disks:
11979 if (not isinstance(row, dict) or
11980 "size" not in row or
11981 not isinstance(row["size"], int) or
11982 "mode" not in row or
11983 row["mode"] not in ['r', 'w']):
11984 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11985 " parameter", errors.ECODE_INVAL)
11986 if self.op.hypervisor is None:
11987 self.op.hypervisor = self.cfg.GetHypervisorType()
11988 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11989 fname = _ExpandInstanceName(self.cfg, self.op.name)
11990 self.op.name = fname
11991 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11992 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11993 if not hasattr(self.op, "evac_nodes"):
11994 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11995 " opcode input", errors.ECODE_INVAL)
11997 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11998 self.op.mode, errors.ECODE_INVAL)
12000 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12001 if self.op.allocator is None:
12002 raise errors.OpPrereqError("Missing allocator name",
12003 errors.ECODE_INVAL)
12004 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12005 raise errors.OpPrereqError("Wrong allocator test '%s'" %
12006 self.op.direction, errors.ECODE_INVAL)
12008 def Exec(self, feedback_fn):
12009 """Run the allocator test.
12012 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12013 ial = IAllocator(self.cfg, self.rpc,
12016 mem_size=self.op.mem_size,
12017 disks=self.op.disks,
12018 disk_template=self.op.disk_template,
12022 vcpus=self.op.vcpus,
12023 hypervisor=self.op.hypervisor,
12025 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12026 ial = IAllocator(self.cfg, self.rpc,
12029 relocate_from=list(self.relocate_from),
12031 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12032 ial = IAllocator(self.cfg, self.rpc,
12034 evac_nodes=self.op.evac_nodes)
12036 raise errors.ProgrammerError("Uncatched mode %s in"
12037 " LUTestAllocator.Exec", self.op.mode)
12039 if self.op.direction == constants.IALLOCATOR_DIR_IN:
12040 result = ial.in_text
12042 ial.Run(self.op.allocator, validate=False)
12043 result = ial.out_text
12047 #: Query type implementations
12049 constants.QR_INSTANCE: _InstanceQuery,
12050 constants.QR_NODE: _NodeQuery,
12051 constants.QR_GROUP: _GroupQuery,
12052 constants.QR_OS: _OsQuery,
12055 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12058 def _GetQueryImplementation(name):
12059 """Returns the implemtnation for a query type.
12061 @param name: Query type, must be one of L{constants.QR_VIA_OP}
12065 return _QUERY_IMPL[name]
12067 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12068 errors.ECODE_INVAL)