4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 """Data container for LU results with jobs.
80 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82 contained in the C{jobs} attribute and include the job IDs in the opcode
86 def __init__(self, jobs, **kwargs):
87 """Initializes this class.
89 Additional return values can be specified as keyword arguments.
91 @type jobs: list of lists of L{opcode.OpCode}
92 @param jobs: A list of lists of opcode objects
99 class LogicalUnit(object):
100 """Logical Unit base class.
102 Subclasses must follow these rules:
103 - implement ExpandNames
104 - implement CheckPrereq (except when tasklets are used)
105 - implement Exec (except when tasklets are used)
106 - implement BuildHooksEnv
107 - implement BuildHooksNodes
108 - redefine HPATH and HTYPE
109 - optionally redefine their run requirements:
110 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112 Note that all commands require root permissions.
114 @ivar dry_run_result: the value (if any) that will be returned to the caller
115 in dry-run mode (signalled by opcode dry_run parameter)
122 def __init__(self, processor, op, context, rpc):
123 """Constructor for LogicalUnit.
125 This needs to be overridden in derived classes in order to check op
129 self.proc = processor
131 self.cfg = context.cfg
132 self.context = context
134 # Dicts used to declare locking needs to mcpu
135 self.needed_locks = None
136 self.acquired_locks = {}
137 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139 self.remove_locks = {}
140 # Used to force good behavior when calling helper functions
141 self.recalculate_locks = {}
144 self.Log = processor.Log # pylint: disable-msg=C0103
145 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148 # support for dry-run
149 self.dry_run_result = None
150 # support for generic debug attribute
151 if (not hasattr(self.op, "debug_level") or
152 not isinstance(self.op.debug_level, int)):
153 self.op.debug_level = 0
158 # Validate opcode parameters and set defaults
159 self.op.Validate(True)
161 self.CheckArguments()
164 """Returns the SshRunner object
168 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
171 ssh = property(fget=__GetSSH)
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that level
205 - don't put anything for the BGL level
206 - if you want all locks at a level use locking.ALL_SET as a value
208 If you need to share locks (rather than acquire them exclusively) at one
209 level you can modify self.share_locks, setting a true value (usually 1) for
210 that level. By default locks are not shared.
212 This function can also define a list of tasklets, which then will be
213 executed in order instead of the usual LU-level CheckPrereq and Exec
214 functions, if those are not defined by the LU.
218 # Acquire all nodes and one instance
219 self.needed_locks = {
220 locking.LEVEL_NODE: locking.ALL_SET,
221 locking.LEVEL_INSTANCE: ['instance1.example.com'],
223 # Acquire just two nodes
224 self.needed_locks = {
225 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
228 self.needed_locks = {} # No, you can't leave it to the default value None
231 # The implementation of this method is mandatory only if the new LU is
232 # concurrent, so that old LUs don't need to be changed all at the same
235 self.needed_locks = {} # Exclusive LUs don't need locks.
237 raise NotImplementedError
239 def DeclareLocks(self, level):
240 """Declare LU locking needs for a level
242 While most LUs can just declare their locking needs at ExpandNames time,
243 sometimes there's the need to calculate some locks after having acquired
244 the ones before. This function is called just before acquiring locks at a
245 particular level, but after acquiring the ones at lower levels, and permits
246 such calculations. It can be used to modify self.needed_locks, and by
247 default it does nothing.
249 This function is only called if you have something already set in
250 self.needed_locks for the level.
252 @param level: Locking level which is going to be locked
253 @type level: member of ganeti.locking.LEVELS
257 def CheckPrereq(self):
258 """Check prerequisites for this LU.
260 This method should check that the prerequisites for the execution
261 of this LU are fulfilled. It can do internode communication, but
262 it should be idempotent - no cluster or system changes are
265 The method should raise errors.OpPrereqError in case something is
266 not fulfilled. Its return value is ignored.
268 This method should also update all the parameters of the opcode to
269 their canonical form if it hasn't been done by ExpandNames before.
272 if self.tasklets is not None:
273 for (idx, tl) in enumerate(self.tasklets):
274 logging.debug("Checking prerequisites for tasklet %s/%s",
275 idx + 1, len(self.tasklets))
280 def Exec(self, feedback_fn):
283 This method should implement the actual work. It should raise
284 errors.OpExecError for failures that are somewhat dealt with in
288 if self.tasklets is not None:
289 for (idx, tl) in enumerate(self.tasklets):
290 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
293 raise NotImplementedError
295 def BuildHooksEnv(self):
296 """Build hooks environment for this LU.
299 @return: Dictionary containing the environment that will be used for
300 running the hooks for this LU. The keys of the dict must not be prefixed
301 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302 will extend the environment with additional variables. If no environment
303 should be defined, an empty dictionary should be returned (not C{None}).
304 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308 raise NotImplementedError
310 def BuildHooksNodes(self):
311 """Build list of nodes to run LU's hooks.
313 @rtype: tuple; (list, list)
314 @return: Tuple containing a list of node names on which the hook
315 should run before the execution and a list of node names on which the
316 hook should run after the execution. No nodes should be returned as an
317 empty list (and not None).
318 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
322 raise NotImplementedError
324 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325 """Notify the LU about the results of its hooks.
327 This method is called every time a hooks phase is executed, and notifies
328 the Logical Unit about the hooks' result. The LU can then use it to alter
329 its result based on the hooks. By default the method does nothing and the
330 previous result is passed back unchanged but any LU can define it if it
331 wants to use the local cluster hook-scripts somehow.
333 @param phase: one of L{constants.HOOKS_PHASE_POST} or
334 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335 @param hook_results: the results of the multi-node hooks rpc call
336 @param feedback_fn: function used send feedback back to the caller
337 @param lu_result: the previous Exec result this LU had, or None
339 @return: the new Exec result, based on the previous result
343 # API must be kept, thus we ignore the unused argument and could
344 # be a function warnings
345 # pylint: disable-msg=W0613,R0201
348 def _ExpandAndLockInstance(self):
349 """Helper function to expand and lock an instance.
351 Many LUs that work on an instance take its name in self.op.instance_name
352 and need to expand it and then declare the expanded name for locking. This
353 function does it, and then updates self.op.instance_name to the expanded
354 name. It also initializes needed_locks as a dict, if this hasn't been done
358 if self.needed_locks is None:
359 self.needed_locks = {}
361 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362 "_ExpandAndLockInstance called with instance-level locks set"
363 self.op.instance_name = _ExpandInstanceName(self.cfg,
364 self.op.instance_name)
365 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367 def _LockInstancesNodes(self, primary_only=False):
368 """Helper function to declare instances' nodes for locking.
370 This function should be called after locking one or more instances to lock
371 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372 with all primary or secondary nodes for instances already locked and
373 present in self.needed_locks[locking.LEVEL_INSTANCE].
375 It should be called from DeclareLocks, and for safety only works if
376 self.recalculate_locks[locking.LEVEL_NODE] is set.
378 In the future it may grow parameters to just lock some instance's nodes, or
379 to just lock primaries or secondary nodes, if needed.
381 If should be called in DeclareLocks in a way similar to::
383 if level == locking.LEVEL_NODE:
384 self._LockInstancesNodes()
386 @type primary_only: boolean
387 @param primary_only: only lock primary nodes of locked instances
390 assert locking.LEVEL_NODE in self.recalculate_locks, \
391 "_LockInstancesNodes helper function called with no nodes to recalculate"
393 # TODO: check if we're really been called with the instance locks held
395 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396 # future we might want to have different behaviors depending on the value
397 # of self.recalculate_locks[locking.LEVEL_NODE]
399 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
400 instance = self.context.cfg.GetInstanceInfo(instance_name)
401 wanted_nodes.append(instance.primary_node)
403 wanted_nodes.extend(instance.secondary_nodes)
405 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
406 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
407 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
408 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
410 del self.recalculate_locks[locking.LEVEL_NODE]
413 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
414 """Simple LU which runs no hooks.
416 This LU is intended as a parent for other LogicalUnits which will
417 run no hooks, in order to reduce duplicate code.
423 def BuildHooksEnv(self):
424 """Empty BuildHooksEnv for NoHooksLu.
426 This just raises an error.
429 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
431 def BuildHooksNodes(self):
432 """Empty BuildHooksNodes for NoHooksLU.
435 raise AssertionError("BuildHooksNodes called for NoHooksLU")
439 """Tasklet base class.
441 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
442 they can mix legacy code with tasklets. Locking needs to be done in the LU,
443 tasklets know nothing about locks.
445 Subclasses must follow these rules:
446 - Implement CheckPrereq
450 def __init__(self, lu):
457 def CheckPrereq(self):
458 """Check prerequisites for this tasklets.
460 This method should check whether the prerequisites for the execution of
461 this tasklet are fulfilled. It can do internode communication, but it
462 should be idempotent - no cluster or system changes are allowed.
464 The method should raise errors.OpPrereqError in case something is not
465 fulfilled. Its return value is ignored.
467 This method should also update all parameters to their canonical form if it
468 hasn't been done before.
473 def Exec(self, feedback_fn):
474 """Execute the tasklet.
476 This method should implement the actual work. It should raise
477 errors.OpExecError for failures that are somewhat dealt with in code, or
481 raise NotImplementedError
485 """Base for query utility classes.
488 #: Attribute holding field definitions
491 def __init__(self, filter_, fields, use_locking):
492 """Initializes this class.
495 self.use_locking = use_locking
497 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
499 self.requested_data = self.query.RequestedData()
500 self.names = self.query.RequestedNames()
502 # Sort only if no names were requested
503 self.sort_by_name = not self.names
505 self.do_locking = None
508 def _GetNames(self, lu, all_names, lock_level):
509 """Helper function to determine names asked for in the query.
513 names = lu.acquired_locks[lock_level]
517 if self.wanted == locking.ALL_SET:
518 assert not self.names
519 # caller didn't specify names, so ordering is not important
520 return utils.NiceSort(names)
522 # caller specified names and we must keep the same order
524 assert not self.do_locking or lu.acquired_locks[lock_level]
526 missing = set(self.wanted).difference(names)
528 raise errors.OpExecError("Some items were removed before retrieving"
529 " their data: %s" % missing)
531 # Return expanded names
534 def ExpandNames(self, lu):
535 """Expand names for this query.
537 See L{LogicalUnit.ExpandNames}.
540 raise NotImplementedError()
542 def DeclareLocks(self, lu, level):
543 """Declare locks for this query.
545 See L{LogicalUnit.DeclareLocks}.
548 raise NotImplementedError()
550 def _GetQueryData(self, lu):
551 """Collects all data for this query.
553 @return: Query data object
556 raise NotImplementedError()
558 def NewStyleQuery(self, lu):
559 """Collect data and execute query.
562 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
563 sort_by_name=self.sort_by_name)
565 def OldStyleQuery(self, lu):
566 """Collect data and execute query.
569 return self.query.OldStyleQuery(self._GetQueryData(lu),
570 sort_by_name=self.sort_by_name)
573 def _GetWantedNodes(lu, nodes):
574 """Returns list of checked and expanded node names.
576 @type lu: L{LogicalUnit}
577 @param lu: the logical unit on whose behalf we execute
579 @param nodes: list of node names or None for all nodes
581 @return: the list of nodes, sorted
582 @raise errors.ProgrammerError: if the nodes parameter is wrong type
586 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
588 return utils.NiceSort(lu.cfg.GetNodeList())
591 def _GetWantedInstances(lu, instances):
592 """Returns list of checked and expanded instance names.
594 @type lu: L{LogicalUnit}
595 @param lu: the logical unit on whose behalf we execute
596 @type instances: list
597 @param instances: list of instance names or None for all instances
599 @return: the list of instances, sorted
600 @raise errors.OpPrereqError: if the instances parameter is wrong type
601 @raise errors.OpPrereqError: if any of the passed instances is not found
605 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
607 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
611 def _GetUpdatedParams(old_params, update_dict,
612 use_default=True, use_none=False):
613 """Return the new version of a parameter dictionary.
615 @type old_params: dict
616 @param old_params: old parameters
617 @type update_dict: dict
618 @param update_dict: dict containing new parameter values, or
619 constants.VALUE_DEFAULT to reset the parameter to its default
621 @param use_default: boolean
622 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
623 values as 'to be deleted' values
624 @param use_none: boolean
625 @type use_none: whether to recognise C{None} values as 'to be
628 @return: the new parameter dictionary
631 params_copy = copy.deepcopy(old_params)
632 for key, val in update_dict.iteritems():
633 if ((use_default and val == constants.VALUE_DEFAULT) or
634 (use_none and val is None)):
640 params_copy[key] = val
644 def _RunPostHook(lu, node_name):
645 """Runs the post-hook for an opcode on a single node.
648 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
650 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
652 # pylint: disable-msg=W0702
653 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
656 def _CheckOutputFields(static, dynamic, selected):
657 """Checks whether all selected fields are valid.
659 @type static: L{utils.FieldSet}
660 @param static: static fields set
661 @type dynamic: L{utils.FieldSet}
662 @param dynamic: dynamic fields set
669 delta = f.NonMatching(selected)
671 raise errors.OpPrereqError("Unknown output fields selected: %s"
672 % ",".join(delta), errors.ECODE_INVAL)
675 def _CheckGlobalHvParams(params):
676 """Validates that given hypervisor params are not global ones.
678 This will ensure that instances don't get customised versions of
682 used_globals = constants.HVC_GLOBALS.intersection(params)
684 msg = ("The following hypervisor parameters are global and cannot"
685 " be customized at instance level, please modify them at"
686 " cluster level: %s" % utils.CommaJoin(used_globals))
687 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
690 def _CheckNodeOnline(lu, node, msg=None):
691 """Ensure that a given node is online.
693 @param lu: the LU on behalf of which we make the check
694 @param node: the node to check
695 @param msg: if passed, should be a message to replace the default one
696 @raise errors.OpPrereqError: if the node is offline
700 msg = "Can't use offline node"
701 if lu.cfg.GetNodeInfo(node).offline:
702 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
705 def _CheckNodeNotDrained(lu, node):
706 """Ensure that a given node is not drained.
708 @param lu: the LU on behalf of which we make the check
709 @param node: the node to check
710 @raise errors.OpPrereqError: if the node is drained
713 if lu.cfg.GetNodeInfo(node).drained:
714 raise errors.OpPrereqError("Can't use drained node %s" % node,
718 def _CheckNodeVmCapable(lu, node):
719 """Ensure that a given node is vm capable.
721 @param lu: the LU on behalf of which we make the check
722 @param node: the node to check
723 @raise errors.OpPrereqError: if the node is not vm capable
726 if not lu.cfg.GetNodeInfo(node).vm_capable:
727 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
732 """Ensure that a node supports a given OS.
734 @param lu: the LU on behalf of which we make the check
735 @param node: the node to check
736 @param os_name: the OS to query about
737 @param force_variant: whether to ignore variant errors
738 @raise errors.OpPrereqError: if the node is not supporting the OS
741 result = lu.rpc.call_os_get(node, os_name)
742 result.Raise("OS '%s' not in supported OS list for node %s" %
744 prereq=True, ecode=errors.ECODE_INVAL)
745 if not force_variant:
746 _CheckOSVariant(result.payload, os_name)
749 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
750 """Ensure that a node has the given secondary ip.
752 @type lu: L{LogicalUnit}
753 @param lu: the LU on behalf of which we make the check
755 @param node: the node to check
756 @type secondary_ip: string
757 @param secondary_ip: the ip to check
758 @type prereq: boolean
759 @param prereq: whether to throw a prerequisite or an execute error
760 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
761 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
764 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
765 result.Raise("Failure checking secondary ip on node %s" % node,
766 prereq=prereq, ecode=errors.ECODE_ENVIRON)
767 if not result.payload:
768 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
769 " please fix and re-run this command" % secondary_ip)
771 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
773 raise errors.OpExecError(msg)
776 def _GetClusterDomainSecret():
777 """Reads the cluster domain secret.
780 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
784 def _CheckInstanceDown(lu, instance, reason):
785 """Ensure that an instance is not running."""
786 if instance.admin_up:
787 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
788 (instance.name, reason), errors.ECODE_STATE)
790 pnode = instance.primary_node
791 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
792 ins_l.Raise("Can't contact node %s for instance information" % pnode,
793 prereq=True, ecode=errors.ECODE_ENVIRON)
795 if instance.name in ins_l.payload:
796 raise errors.OpPrereqError("Instance %s is running, %s" %
797 (instance.name, reason), errors.ECODE_STATE)
800 def _ExpandItemName(fn, name, kind):
801 """Expand an item name.
803 @param fn: the function to use for expansion
804 @param name: requested item name
805 @param kind: text description ('Node' or 'Instance')
806 @return: the resolved (full) name
807 @raise errors.OpPrereqError: if the item is not found
811 if full_name is None:
812 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
817 def _ExpandNodeName(cfg, name):
818 """Wrapper over L{_ExpandItemName} for nodes."""
819 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
822 def _ExpandInstanceName(cfg, name):
823 """Wrapper over L{_ExpandItemName} for instance."""
824 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
827 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
828 memory, vcpus, nics, disk_template, disks,
829 bep, hvp, hypervisor_name):
830 """Builds instance related env variables for hooks
832 This builds the hook environment from individual variables.
835 @param name: the name of the instance
836 @type primary_node: string
837 @param primary_node: the name of the instance's primary node
838 @type secondary_nodes: list
839 @param secondary_nodes: list of secondary nodes as strings
840 @type os_type: string
841 @param os_type: the name of the instance's OS
842 @type status: boolean
843 @param status: the should_run status of the instance
845 @param memory: the memory size of the instance
847 @param vcpus: the count of VCPUs the instance has
849 @param nics: list of tuples (ip, mac, mode, link) representing
850 the NICs the instance has
851 @type disk_template: string
852 @param disk_template: the disk template of the instance
854 @param disks: the list of (size, mode) pairs
856 @param bep: the backend parameters for the instance
858 @param hvp: the hypervisor parameters for the instance
859 @type hypervisor_name: string
860 @param hypervisor_name: the hypervisor for the instance
862 @return: the hook environment for this instance
871 "INSTANCE_NAME": name,
872 "INSTANCE_PRIMARY": primary_node,
873 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
874 "INSTANCE_OS_TYPE": os_type,
875 "INSTANCE_STATUS": str_status,
876 "INSTANCE_MEMORY": memory,
877 "INSTANCE_VCPUS": vcpus,
878 "INSTANCE_DISK_TEMPLATE": disk_template,
879 "INSTANCE_HYPERVISOR": hypervisor_name,
883 nic_count = len(nics)
884 for idx, (ip, mac, mode, link) in enumerate(nics):
887 env["INSTANCE_NIC%d_IP" % idx] = ip
888 env["INSTANCE_NIC%d_MAC" % idx] = mac
889 env["INSTANCE_NIC%d_MODE" % idx] = mode
890 env["INSTANCE_NIC%d_LINK" % idx] = link
891 if mode == constants.NIC_MODE_BRIDGED:
892 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
896 env["INSTANCE_NIC_COUNT"] = nic_count
899 disk_count = len(disks)
900 for idx, (size, mode) in enumerate(disks):
901 env["INSTANCE_DISK%d_SIZE" % idx] = size
902 env["INSTANCE_DISK%d_MODE" % idx] = mode
906 env["INSTANCE_DISK_COUNT"] = disk_count
908 for source, kind in [(bep, "BE"), (hvp, "HV")]:
909 for key, value in source.items():
910 env["INSTANCE_%s_%s" % (kind, key)] = value
915 def _NICListToTuple(lu, nics):
916 """Build a list of nic information tuples.
918 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
919 value in LUInstanceQueryData.
921 @type lu: L{LogicalUnit}
922 @param lu: the logical unit on whose behalf we execute
923 @type nics: list of L{objects.NIC}
924 @param nics: list of nics to convert to hooks tuples
928 cluster = lu.cfg.GetClusterInfo()
932 filled_params = cluster.SimpleFillNIC(nic.nicparams)
933 mode = filled_params[constants.NIC_MODE]
934 link = filled_params[constants.NIC_LINK]
935 hooks_nics.append((ip, mac, mode, link))
939 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
940 """Builds instance related env variables for hooks from an object.
942 @type lu: L{LogicalUnit}
943 @param lu: the logical unit on whose behalf we execute
944 @type instance: L{objects.Instance}
945 @param instance: the instance for which we should build the
948 @param override: dictionary with key/values that will override
951 @return: the hook environment dictionary
954 cluster = lu.cfg.GetClusterInfo()
955 bep = cluster.FillBE(instance)
956 hvp = cluster.FillHV(instance)
958 'name': instance.name,
959 'primary_node': instance.primary_node,
960 'secondary_nodes': instance.secondary_nodes,
961 'os_type': instance.os,
962 'status': instance.admin_up,
963 'memory': bep[constants.BE_MEMORY],
964 'vcpus': bep[constants.BE_VCPUS],
965 'nics': _NICListToTuple(lu, instance.nics),
966 'disk_template': instance.disk_template,
967 'disks': [(disk.size, disk.mode) for disk in instance.disks],
970 'hypervisor_name': instance.hypervisor,
973 args.update(override)
974 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
977 def _AdjustCandidatePool(lu, exceptions):
978 """Adjust the candidate pool after node operations.
981 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
983 lu.LogInfo("Promoted nodes to master candidate role: %s",
984 utils.CommaJoin(node.name for node in mod_list))
985 for name in mod_list:
986 lu.context.ReaddNode(name)
987 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
989 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
993 def _DecideSelfPromotion(lu, exceptions=None):
994 """Decide whether I should promote myself as a master candidate.
997 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
998 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
999 # the new node will increase mc_max with one, so:
1000 mc_should = min(mc_should + 1, cp_size)
1001 return mc_now < mc_should
1004 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005 """Check that the brigdes needed by a list of nics exist.
1008 cluster = lu.cfg.GetClusterInfo()
1009 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010 brlist = [params[constants.NIC_LINK] for params in paramslist
1011 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1013 result = lu.rpc.call_bridges_exist(target_node, brlist)
1014 result.Raise("Error checking bridges on destination node '%s'" %
1015 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1018 def _CheckInstanceBridgesExist(lu, instance, node=None):
1019 """Check that the brigdes needed by an instance exist.
1023 node = instance.primary_node
1024 _CheckNicsBridgesExist(lu, instance.nics, node)
1027 def _CheckOSVariant(os_obj, name):
1028 """Check whether an OS name conforms to the os variants specification.
1030 @type os_obj: L{objects.OS}
1031 @param os_obj: OS object to check
1033 @param name: OS name passed by the user, to check for validity
1036 if not os_obj.supported_variants:
1038 variant = objects.OS.GetVariant(name)
1040 raise errors.OpPrereqError("OS name must include a variant",
1043 if variant not in os_obj.supported_variants:
1044 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1047 def _GetNodeInstancesInner(cfg, fn):
1048 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1051 def _GetNodeInstances(cfg, node_name):
1052 """Returns a list of all primary and secondary instances on a node.
1056 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1059 def _GetNodePrimaryInstances(cfg, node_name):
1060 """Returns primary instances on a node.
1063 return _GetNodeInstancesInner(cfg,
1064 lambda inst: node_name == inst.primary_node)
1067 def _GetNodeSecondaryInstances(cfg, node_name):
1068 """Returns secondary instances on a node.
1071 return _GetNodeInstancesInner(cfg,
1072 lambda inst: node_name in inst.secondary_nodes)
1075 def _GetStorageTypeArgs(cfg, storage_type):
1076 """Returns the arguments for a storage type.
1079 # Special case for file storage
1080 if storage_type == constants.ST_FILE:
1081 # storage.FileStorage wants a list of storage directories
1082 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1087 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1090 for dev in instance.disks:
1091 cfg.SetDiskID(dev, node_name)
1093 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094 result.Raise("Failed to get disk status from node %s" % node_name,
1095 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1097 for idx, bdev_status in enumerate(result.payload):
1098 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1104 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105 """Check the sanity of iallocator and node arguments and use the
1106 cluster-wide iallocator if appropriate.
1108 Check that at most one of (iallocator, node) is specified. If none is
1109 specified, then the LU's opcode's iallocator slot is filled with the
1110 cluster-wide default iallocator.
1112 @type iallocator_slot: string
1113 @param iallocator_slot: the name of the opcode iallocator slot
1114 @type node_slot: string
1115 @param node_slot: the name of the opcode target node slot
1118 node = getattr(lu.op, node_slot, None)
1119 iallocator = getattr(lu.op, iallocator_slot, None)
1121 if node is not None and iallocator is not None:
1122 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1124 elif node is None and iallocator is None:
1125 default_iallocator = lu.cfg.GetDefaultIAllocator()
1126 if default_iallocator:
1127 setattr(lu.op, iallocator_slot, default_iallocator)
1129 raise errors.OpPrereqError("No iallocator or node given and no"
1130 " cluster-wide default iallocator found."
1131 " Please specify either an iallocator or a"
1132 " node, or set a cluster-wide default"
1136 class LUClusterPostInit(LogicalUnit):
1137 """Logical unit for running hooks after cluster initialization.
1140 HPATH = "cluster-init"
1141 HTYPE = constants.HTYPE_CLUSTER
1143 def BuildHooksEnv(self):
1148 "OP_TARGET": self.cfg.GetClusterName(),
1151 def BuildHooksNodes(self):
1152 """Build hooks nodes.
1155 return ([], [self.cfg.GetMasterNode()])
1157 def Exec(self, feedback_fn):
1164 class LUClusterDestroy(LogicalUnit):
1165 """Logical unit for destroying the cluster.
1168 HPATH = "cluster-destroy"
1169 HTYPE = constants.HTYPE_CLUSTER
1171 def BuildHooksEnv(self):
1176 "OP_TARGET": self.cfg.GetClusterName(),
1179 def BuildHooksNodes(self):
1180 """Build hooks nodes.
1185 def CheckPrereq(self):
1186 """Check prerequisites.
1188 This checks whether the cluster is empty.
1190 Any errors are signaled by raising errors.OpPrereqError.
1193 master = self.cfg.GetMasterNode()
1195 nodelist = self.cfg.GetNodeList()
1196 if len(nodelist) != 1 or nodelist[0] != master:
1197 raise errors.OpPrereqError("There are still %d node(s) in"
1198 " this cluster." % (len(nodelist) - 1),
1200 instancelist = self.cfg.GetInstanceList()
1202 raise errors.OpPrereqError("There are still %d instance(s) in"
1203 " this cluster." % len(instancelist),
1206 def Exec(self, feedback_fn):
1207 """Destroys the cluster.
1210 master = self.cfg.GetMasterNode()
1212 # Run post hooks on master node before it's removed
1213 _RunPostHook(self, master)
1215 result = self.rpc.call_node_stop_master(master, False)
1216 result.Raise("Could not disable the master role")
1221 def _VerifyCertificate(filename):
1222 """Verifies a certificate for LUClusterVerify.
1224 @type filename: string
1225 @param filename: Path to PEM file
1229 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230 utils.ReadFile(filename))
1231 except Exception, err: # pylint: disable-msg=W0703
1232 return (LUClusterVerify.ETYPE_ERROR,
1233 "Failed to load X509 certificate %s: %s" % (filename, err))
1236 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237 constants.SSL_CERT_EXPIRATION_ERROR)
1240 fnamemsg = "While verifying %s: %s" % (filename, msg)
1245 return (None, fnamemsg)
1246 elif errcode == utils.CERT_WARNING:
1247 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248 elif errcode == utils.CERT_ERROR:
1249 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1251 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1254 class LUClusterVerify(LogicalUnit):
1255 """Verifies the cluster status.
1258 HPATH = "cluster-verify"
1259 HTYPE = constants.HTYPE_CLUSTER
1262 TCLUSTER = "cluster"
1264 TINSTANCE = "instance"
1266 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1269 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1270 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1271 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1272 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1273 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1274 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1275 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1276 ENODEDRBD = (TNODE, "ENODEDRBD")
1277 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1278 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1279 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1280 ENODEHV = (TNODE, "ENODEHV")
1281 ENODELVM = (TNODE, "ENODELVM")
1282 ENODEN1 = (TNODE, "ENODEN1")
1283 ENODENET = (TNODE, "ENODENET")
1284 ENODEOS = (TNODE, "ENODEOS")
1285 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1286 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1287 ENODERPC = (TNODE, "ENODERPC")
1288 ENODESSH = (TNODE, "ENODESSH")
1289 ENODEVERSION = (TNODE, "ENODEVERSION")
1290 ENODESETUP = (TNODE, "ENODESETUP")
1291 ENODETIME = (TNODE, "ENODETIME")
1292 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1294 ETYPE_FIELD = "code"
1295 ETYPE_ERROR = "ERROR"
1296 ETYPE_WARNING = "WARNING"
1298 _HOOKS_INDENT_RE = re.compile("^", re.M)
1300 class NodeImage(object):
1301 """A class representing the logical and physical status of a node.
1304 @ivar name: the node name to which this object refers
1305 @ivar volumes: a structure as returned from
1306 L{ganeti.backend.GetVolumeList} (runtime)
1307 @ivar instances: a list of running instances (runtime)
1308 @ivar pinst: list of configured primary instances (config)
1309 @ivar sinst: list of configured secondary instances (config)
1310 @ivar sbp: dictionary of {primary-node: list of instances} for all
1311 instances for which this node is secondary (config)
1312 @ivar mfree: free memory, as reported by hypervisor (runtime)
1313 @ivar dfree: free disk, as reported by the node (runtime)
1314 @ivar offline: the offline status (config)
1315 @type rpc_fail: boolean
1316 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1317 not whether the individual keys were correct) (runtime)
1318 @type lvm_fail: boolean
1319 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1320 @type hyp_fail: boolean
1321 @ivar hyp_fail: whether the RPC call didn't return the instance list
1322 @type ghost: boolean
1323 @ivar ghost: whether this is a known node or not (config)
1324 @type os_fail: boolean
1325 @ivar os_fail: whether the RPC call didn't return valid OS data
1327 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1328 @type vm_capable: boolean
1329 @ivar vm_capable: whether the node can host instances
1332 def __init__(self, offline=False, name=None, vm_capable=True):
1341 self.offline = offline
1342 self.vm_capable = vm_capable
1343 self.rpc_fail = False
1344 self.lvm_fail = False
1345 self.hyp_fail = False
1347 self.os_fail = False
1350 def ExpandNames(self):
1351 self.needed_locks = {
1352 locking.LEVEL_NODE: locking.ALL_SET,
1353 locking.LEVEL_INSTANCE: locking.ALL_SET,
1355 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1357 def _Error(self, ecode, item, msg, *args, **kwargs):
1358 """Format an error message.
1360 Based on the opcode's error_codes parameter, either format a
1361 parseable error code, or a simpler error string.
1363 This must be called only from Exec and functions called from Exec.
1366 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1368 # first complete the msg
1371 # then format the whole message
1372 if self.op.error_codes:
1373 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1379 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1380 # and finally report it via the feedback_fn
1381 self._feedback_fn(" - %s" % msg)
1383 def _ErrorIf(self, cond, *args, **kwargs):
1384 """Log an error message if the passed condition is True.
1387 cond = bool(cond) or self.op.debug_simulate_errors
1389 self._Error(*args, **kwargs)
1390 # do not mark the operation as failed for WARN cases only
1391 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1392 self.bad = self.bad or cond
1394 def _VerifyNode(self, ninfo, nresult):
1395 """Perform some basic validation on data returned from a node.
1397 - check the result data structure is well formed and has all the
1399 - check ganeti version
1401 @type ninfo: L{objects.Node}
1402 @param ninfo: the node to check
1403 @param nresult: the results from the node
1405 @return: whether overall this call was successful (and we can expect
1406 reasonable values in the respose)
1410 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1412 # main result, nresult should be a non-empty dict
1413 test = not nresult or not isinstance(nresult, dict)
1414 _ErrorIf(test, self.ENODERPC, node,
1415 "unable to verify node: no data returned")
1419 # compares ganeti version
1420 local_version = constants.PROTOCOL_VERSION
1421 remote_version = nresult.get("version", None)
1422 test = not (remote_version and
1423 isinstance(remote_version, (list, tuple)) and
1424 len(remote_version) == 2)
1425 _ErrorIf(test, self.ENODERPC, node,
1426 "connection to node returned invalid data")
1430 test = local_version != remote_version[0]
1431 _ErrorIf(test, self.ENODEVERSION, node,
1432 "incompatible protocol versions: master %s,"
1433 " node %s", local_version, remote_version[0])
1437 # node seems compatible, we can actually try to look into its results
1439 # full package version
1440 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1441 self.ENODEVERSION, node,
1442 "software version mismatch: master %s, node %s",
1443 constants.RELEASE_VERSION, remote_version[1],
1444 code=self.ETYPE_WARNING)
1446 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1447 if ninfo.vm_capable and isinstance(hyp_result, dict):
1448 for hv_name, hv_result in hyp_result.iteritems():
1449 test = hv_result is not None
1450 _ErrorIf(test, self.ENODEHV, node,
1451 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1453 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1454 if ninfo.vm_capable and isinstance(hvp_result, list):
1455 for item, hv_name, hv_result in hvp_result:
1456 _ErrorIf(True, self.ENODEHV, node,
1457 "hypervisor %s parameter verify failure (source %s): %s",
1458 hv_name, item, hv_result)
1460 test = nresult.get(constants.NV_NODESETUP,
1461 ["Missing NODESETUP results"])
1462 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1467 def _VerifyNodeTime(self, ninfo, nresult,
1468 nvinfo_starttime, nvinfo_endtime):
1469 """Check the node time.
1471 @type ninfo: L{objects.Node}
1472 @param ninfo: the node to check
1473 @param nresult: the remote results for the node
1474 @param nvinfo_starttime: the start time of the RPC call
1475 @param nvinfo_endtime: the end time of the RPC call
1479 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1481 ntime = nresult.get(constants.NV_TIME, None)
1483 ntime_merged = utils.MergeTime(ntime)
1484 except (ValueError, TypeError):
1485 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1488 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1489 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1490 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1491 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1495 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1496 "Node time diverges by at least %s from master node time",
1499 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1500 """Check the node time.
1502 @type ninfo: L{objects.Node}
1503 @param ninfo: the node to check
1504 @param nresult: the remote results for the node
1505 @param vg_name: the configured VG name
1512 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514 # checks vg existence and size > 20G
1515 vglist = nresult.get(constants.NV_VGLIST, None)
1517 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1519 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1520 constants.MIN_VG_SIZE)
1521 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1524 pvlist = nresult.get(constants.NV_PVLIST, None)
1525 test = pvlist is None
1526 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1528 # check that ':' is not present in PV names, since it's a
1529 # special character for lvcreate (denotes the range of PEs to
1531 for _, pvname, owner_vg in pvlist:
1532 test = ":" in pvname
1533 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1534 " '%s' of VG '%s'", pvname, owner_vg)
1536 def _VerifyNodeNetwork(self, ninfo, nresult):
1537 """Check the node time.
1539 @type ninfo: L{objects.Node}
1540 @param ninfo: the node to check
1541 @param nresult: the remote results for the node
1545 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547 test = constants.NV_NODELIST not in nresult
1548 _ErrorIf(test, self.ENODESSH, node,
1549 "node hasn't returned node ssh connectivity data")
1551 if nresult[constants.NV_NODELIST]:
1552 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1553 _ErrorIf(True, self.ENODESSH, node,
1554 "ssh communication with node '%s': %s", a_node, a_msg)
1556 test = constants.NV_NODENETTEST not in nresult
1557 _ErrorIf(test, self.ENODENET, node,
1558 "node hasn't returned node tcp connectivity data")
1560 if nresult[constants.NV_NODENETTEST]:
1561 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1563 _ErrorIf(True, self.ENODENET, node,
1564 "tcp communication with node '%s': %s",
1565 anode, nresult[constants.NV_NODENETTEST][anode])
1567 test = constants.NV_MASTERIP not in nresult
1568 _ErrorIf(test, self.ENODENET, node,
1569 "node hasn't returned node master IP reachability data")
1571 if not nresult[constants.NV_MASTERIP]:
1572 if node == self.master_node:
1573 msg = "the master node cannot reach the master IP (not configured?)"
1575 msg = "cannot reach the master IP"
1576 _ErrorIf(True, self.ENODENET, node, msg)
1578 def _VerifyInstance(self, instance, instanceconfig, node_image,
1580 """Verify an instance.
1582 This function checks to see if the required block devices are
1583 available on the instance's node.
1586 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1587 node_current = instanceconfig.primary_node
1589 node_vol_should = {}
1590 instanceconfig.MapLVsByNode(node_vol_should)
1592 for node in node_vol_should:
1593 n_img = node_image[node]
1594 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595 # ignore missing volumes on offline or broken nodes
1597 for volume in node_vol_should[node]:
1598 test = volume not in n_img.volumes
1599 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1600 "volume %s missing on node %s", volume, node)
1602 if instanceconfig.admin_up:
1603 pri_img = node_image[node_current]
1604 test = instance not in pri_img.instances and not pri_img.offline
1605 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1606 "instance not running on its primary node %s",
1609 for node, n_img in node_image.items():
1610 if node != node_current:
1611 test = instance in n_img.instances
1612 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1613 "instance should not run on node %s", node)
1615 diskdata = [(nname, success, status, idx)
1616 for (nname, disks) in diskstatus.items()
1617 for idx, (success, status) in enumerate(disks)]
1619 for nname, success, bdev_status, idx in diskdata:
1620 # the 'ghost node' construction in Exec() ensures that we have a
1622 snode = node_image[nname]
1623 bad_snode = snode.ghost or snode.offline
1624 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1625 self.EINSTANCEFAULTYDISK, instance,
1626 "couldn't retrieve status for disk/%s on %s: %s",
1627 idx, nname, bdev_status)
1628 _ErrorIf((instanceconfig.admin_up and success and
1629 bdev_status.ldisk_status == constants.LDS_FAULTY),
1630 self.EINSTANCEFAULTYDISK, instance,
1631 "disk/%s on %s is faulty", idx, nname)
1633 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1634 """Verify if there are any unknown volumes in the cluster.
1636 The .os, .swap and backup volumes are ignored. All other volumes are
1637 reported as unknown.
1639 @type reserved: L{ganeti.utils.FieldSet}
1640 @param reserved: a FieldSet of reserved volume names
1643 for node, n_img in node_image.items():
1644 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1645 # skip non-healthy nodes
1647 for volume in n_img.volumes:
1648 test = ((node not in node_vol_should or
1649 volume not in node_vol_should[node]) and
1650 not reserved.Matches(volume))
1651 self._ErrorIf(test, self.ENODEORPHANLV, node,
1652 "volume %s is unknown", volume)
1654 def _VerifyOrphanInstances(self, instancelist, node_image):
1655 """Verify the list of running instances.
1657 This checks what instances are running but unknown to the cluster.
1660 for node, n_img in node_image.items():
1661 for o_inst in n_img.instances:
1662 test = o_inst not in instancelist
1663 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1664 "instance %s on node %s should not exist", o_inst, node)
1666 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1667 """Verify N+1 Memory Resilience.
1669 Check that if one single node dies we can still start all the
1670 instances it was primary for.
1673 cluster_info = self.cfg.GetClusterInfo()
1674 for node, n_img in node_image.items():
1675 # This code checks that every node which is now listed as
1676 # secondary has enough memory to host all instances it is
1677 # supposed to should a single other node in the cluster fail.
1678 # FIXME: not ready for failover to an arbitrary node
1679 # FIXME: does not support file-backed instances
1680 # WARNING: we currently take into account down instances as well
1681 # as up ones, considering that even if they're down someone
1682 # might want to start them even in the event of a node failure.
1684 # we're skipping offline nodes from the N+1 warning, since
1685 # most likely we don't have good memory infromation from them;
1686 # we already list instances living on such nodes, and that's
1689 for prinode, instances in n_img.sbp.items():
1691 for instance in instances:
1692 bep = cluster_info.FillBE(instance_cfg[instance])
1693 if bep[constants.BE_AUTO_BALANCE]:
1694 needed_mem += bep[constants.BE_MEMORY]
1695 test = n_img.mfree < needed_mem
1696 self._ErrorIf(test, self.ENODEN1, node,
1697 "not enough memory to accomodate instance failovers"
1698 " should node %s fail", prinode)
1701 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1702 (files_all, files_all_opt, files_mc, files_vm)):
1703 """Verifies file checksums collected from all nodes.
1705 @param errorif: Callback for reporting errors
1706 @param nodeinfo: List of L{objects.Node} objects
1707 @param master_node: Name of master node
1708 @param all_nvinfo: RPC results
1711 node_names = frozenset(node.name for node in nodeinfo)
1713 assert master_node in node_names
1714 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1715 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1716 "Found file listed in more than one file list"
1718 # Define functions determining which nodes to consider for a file
1719 file2nodefn = dict([(filename, fn)
1720 for (files, fn) in [(files_all, None),
1721 (files_all_opt, None),
1722 (files_mc, lambda node: (node.master_candidate or
1723 node.name == master_node)),
1724 (files_vm, lambda node: node.vm_capable)]
1725 for filename in files])
1727 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1729 for node in nodeinfo:
1730 nresult = all_nvinfo[node.name]
1732 if nresult.fail_msg or not nresult.payload:
1735 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1737 test = not (node_files and isinstance(node_files, dict))
1738 errorif(test, cls.ENODEFILECHECK, node.name,
1739 "Node did not return file checksum data")
1743 for (filename, checksum) in node_files.items():
1744 # Check if the file should be considered for a node
1745 fn = file2nodefn[filename]
1746 if fn is None or fn(node):
1747 fileinfo[filename].setdefault(checksum, set()).add(node.name)
1749 for (filename, checksums) in fileinfo.items():
1750 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1752 # Nodes having the file
1753 with_file = frozenset(node_name
1754 for nodes in fileinfo[filename].values()
1755 for node_name in nodes)
1757 # Nodes missing file
1758 missing_file = node_names - with_file
1760 if filename in files_all_opt:
1762 errorif(missing_file and missing_file != node_names,
1763 cls.ECLUSTERFILECHECK, None,
1764 "File %s is optional, but it must exist on all or no nodes (not"
1766 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1768 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1769 "File %s is missing from node(s) %s", filename,
1770 utils.CommaJoin(utils.NiceSort(missing_file)))
1772 # See if there are multiple versions of the file
1773 test = len(checksums) > 1
1775 variants = ["variant %s on %s" %
1776 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1777 for (idx, (checksum, nodes)) in
1778 enumerate(sorted(checksums.items()))]
1782 errorif(test, cls.ECLUSTERFILECHECK, None,
1783 "File %s found with %s different checksums (%s)",
1784 filename, len(checksums), "; ".join(variants))
1786 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1788 """Verifies and the node DRBD status.
1790 @type ninfo: L{objects.Node}
1791 @param ninfo: the node to check
1792 @param nresult: the remote results for the node
1793 @param instanceinfo: the dict of instances
1794 @param drbd_helper: the configured DRBD usermode helper
1795 @param drbd_map: the DRBD map as returned by
1796 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1800 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1803 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1804 test = (helper_result == None)
1805 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1806 "no drbd usermode helper returned")
1808 status, payload = helper_result
1810 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1811 "drbd usermode helper check unsuccessful: %s", payload)
1812 test = status and (payload != drbd_helper)
1813 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1814 "wrong drbd usermode helper: %s", payload)
1816 # compute the DRBD minors
1818 for minor, instance in drbd_map[node].items():
1819 test = instance not in instanceinfo
1820 _ErrorIf(test, self.ECLUSTERCFG, None,
1821 "ghost instance '%s' in temporary DRBD map", instance)
1822 # ghost instance should not be running, but otherwise we
1823 # don't give double warnings (both ghost instance and
1824 # unallocated minor in use)
1826 node_drbd[minor] = (instance, False)
1828 instance = instanceinfo[instance]
1829 node_drbd[minor] = (instance.name, instance.admin_up)
1831 # and now check them
1832 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1833 test = not isinstance(used_minors, (tuple, list))
1834 _ErrorIf(test, self.ENODEDRBD, node,
1835 "cannot parse drbd status file: %s", str(used_minors))
1837 # we cannot check drbd status
1840 for minor, (iname, must_exist) in node_drbd.items():
1841 test = minor not in used_minors and must_exist
1842 _ErrorIf(test, self.ENODEDRBD, node,
1843 "drbd minor %d of instance %s is not active", minor, iname)
1844 for minor in used_minors:
1845 test = minor not in node_drbd
1846 _ErrorIf(test, self.ENODEDRBD, node,
1847 "unallocated drbd minor %d is in use", minor)
1849 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1850 """Builds the node OS structures.
1852 @type ninfo: L{objects.Node}
1853 @param ninfo: the node to check
1854 @param nresult: the remote results for the node
1855 @param nimg: the node image object
1859 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861 remote_os = nresult.get(constants.NV_OSLIST, None)
1862 test = (not isinstance(remote_os, list) or
1863 not compat.all(isinstance(v, list) and len(v) == 7
1864 for v in remote_os))
1866 _ErrorIf(test, self.ENODEOS, node,
1867 "node hasn't returned valid OS data")
1876 for (name, os_path, status, diagnose,
1877 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1879 if name not in os_dict:
1882 # parameters is a list of lists instead of list of tuples due to
1883 # JSON lacking a real tuple type, fix it:
1884 parameters = [tuple(v) for v in parameters]
1885 os_dict[name].append((os_path, status, diagnose,
1886 set(variants), set(parameters), set(api_ver)))
1888 nimg.oslist = os_dict
1890 def _VerifyNodeOS(self, ninfo, nimg, base):
1891 """Verifies the node OS list.
1893 @type ninfo: L{objects.Node}
1894 @param ninfo: the node to check
1895 @param nimg: the node image object
1896 @param base: the 'template' node we match against (e.g. from the master)
1900 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1902 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1904 for os_name, os_data in nimg.oslist.items():
1905 assert os_data, "Empty OS status for OS %s?!" % os_name
1906 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1907 _ErrorIf(not f_status, self.ENODEOS, node,
1908 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1909 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1910 "OS '%s' has multiple entries (first one shadows the rest): %s",
1911 os_name, utils.CommaJoin([v[0] for v in os_data]))
1912 # this will catched in backend too
1913 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1914 and not f_var, self.ENODEOS, node,
1915 "OS %s with API at least %d does not declare any variant",
1916 os_name, constants.OS_API_V15)
1917 # comparisons with the 'base' image
1918 test = os_name not in base.oslist
1919 _ErrorIf(test, self.ENODEOS, node,
1920 "Extra OS %s not present on reference node (%s)",
1924 assert base.oslist[os_name], "Base node has empty OS status?"
1925 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1927 # base OS is invalid, skipping
1929 for kind, a, b in [("API version", f_api, b_api),
1930 ("variants list", f_var, b_var),
1931 ("parameters", f_param, b_param)]:
1932 _ErrorIf(a != b, self.ENODEOS, node,
1933 "OS %s %s differs from reference node %s: %s vs. %s",
1934 kind, os_name, base.name,
1935 utils.CommaJoin(a), utils.CommaJoin(b))
1937 # check any missing OSes
1938 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1939 _ErrorIf(missing, self.ENODEOS, node,
1940 "OSes present on reference node %s but missing on this node: %s",
1941 base.name, utils.CommaJoin(missing))
1943 def _VerifyOob(self, ninfo, nresult):
1944 """Verifies out of band functionality of a node.
1946 @type ninfo: L{objects.Node}
1947 @param ninfo: the node to check
1948 @param nresult: the remote results for the node
1952 # We just have to verify the paths on master and/or master candidates
1953 # as the oob helper is invoked on the master
1954 if ((ninfo.master_candidate or ninfo.master_capable) and
1955 constants.NV_OOB_PATHS in nresult):
1956 for path_result in nresult[constants.NV_OOB_PATHS]:
1957 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1959 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1960 """Verifies and updates the node volume data.
1962 This function will update a L{NodeImage}'s internal structures
1963 with data from the remote call.
1965 @type ninfo: L{objects.Node}
1966 @param ninfo: the node to check
1967 @param nresult: the remote results for the node
1968 @param nimg: the node image object
1969 @param vg_name: the configured VG name
1973 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1975 nimg.lvm_fail = True
1976 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1979 elif isinstance(lvdata, basestring):
1980 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1981 utils.SafeEncode(lvdata))
1982 elif not isinstance(lvdata, dict):
1983 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1985 nimg.volumes = lvdata
1986 nimg.lvm_fail = False
1988 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1989 """Verifies and updates the node instance list.
1991 If the listing was successful, then updates this node's instance
1992 list. Otherwise, it marks the RPC call as failed for the instance
1995 @type ninfo: L{objects.Node}
1996 @param ninfo: the node to check
1997 @param nresult: the remote results for the node
1998 @param nimg: the node image object
2001 idata = nresult.get(constants.NV_INSTANCELIST, None)
2002 test = not isinstance(idata, list)
2003 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2004 " (instancelist): %s", utils.SafeEncode(str(idata)))
2006 nimg.hyp_fail = True
2008 nimg.instances = idata
2010 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2011 """Verifies and computes a node information map
2013 @type ninfo: L{objects.Node}
2014 @param ninfo: the node to check
2015 @param nresult: the remote results for the node
2016 @param nimg: the node image object
2017 @param vg_name: the configured VG name
2021 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2023 # try to read free memory (from the hypervisor)
2024 hv_info = nresult.get(constants.NV_HVINFO, None)
2025 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2026 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2029 nimg.mfree = int(hv_info["memory_free"])
2030 except (ValueError, TypeError):
2031 _ErrorIf(True, self.ENODERPC, node,
2032 "node returned invalid nodeinfo, check hypervisor")
2034 # FIXME: devise a free space model for file based instances as well
2035 if vg_name is not None:
2036 test = (constants.NV_VGLIST not in nresult or
2037 vg_name not in nresult[constants.NV_VGLIST])
2038 _ErrorIf(test, self.ENODELVM, node,
2039 "node didn't return data for the volume group '%s'"
2040 " - it is either missing or broken", vg_name)
2043 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2044 except (ValueError, TypeError):
2045 _ErrorIf(True, self.ENODERPC, node,
2046 "node returned invalid LVM info, check LVM status")
2048 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2049 """Gets per-disk status information for all instances.
2051 @type nodelist: list of strings
2052 @param nodelist: Node names
2053 @type node_image: dict of (name, L{objects.Node})
2054 @param node_image: Node objects
2055 @type instanceinfo: dict of (name, L{objects.Instance})
2056 @param instanceinfo: Instance objects
2057 @rtype: {instance: {node: [(succes, payload)]}}
2058 @return: a dictionary of per-instance dictionaries with nodes as
2059 keys and disk information as values; the disk information is a
2060 list of tuples (success, payload)
2063 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2066 node_disks_devonly = {}
2067 diskless_instances = set()
2068 diskless = constants.DT_DISKLESS
2070 for nname in nodelist:
2071 node_instances = list(itertools.chain(node_image[nname].pinst,
2072 node_image[nname].sinst))
2073 diskless_instances.update(inst for inst in node_instances
2074 if instanceinfo[inst].disk_template == diskless)
2075 disks = [(inst, disk)
2076 for inst in node_instances
2077 for disk in instanceinfo[inst].disks]
2080 # No need to collect data
2083 node_disks[nname] = disks
2085 # Creating copies as SetDiskID below will modify the objects and that can
2086 # lead to incorrect data returned from nodes
2087 devonly = [dev.Copy() for (_, dev) in disks]
2090 self.cfg.SetDiskID(dev, nname)
2092 node_disks_devonly[nname] = devonly
2094 assert len(node_disks) == len(node_disks_devonly)
2096 # Collect data from all nodes with disks
2097 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2100 assert len(result) == len(node_disks)
2104 for (nname, nres) in result.items():
2105 disks = node_disks[nname]
2108 # No data from this node
2109 data = len(disks) * [(False, "node offline")]
2112 _ErrorIf(msg, self.ENODERPC, nname,
2113 "while getting disk information: %s", msg)
2115 # No data from this node
2116 data = len(disks) * [(False, msg)]
2119 for idx, i in enumerate(nres.payload):
2120 if isinstance(i, (tuple, list)) and len(i) == 2:
2123 logging.warning("Invalid result from node %s, entry %d: %s",
2125 data.append((False, "Invalid result from the remote node"))
2127 for ((inst, _), status) in zip(disks, data):
2128 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2130 # Add empty entries for diskless instances.
2131 for inst in diskless_instances:
2132 assert inst not in instdisk
2135 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2136 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2137 compat.all(isinstance(s, (tuple, list)) and
2138 len(s) == 2 for s in statuses)
2139 for inst, nnames in instdisk.items()
2140 for nname, statuses in nnames.items())
2141 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2145 def _VerifyHVP(self, hvp_data):
2146 """Verifies locally the syntax of the hypervisor parameters.
2149 for item, hv_name, hv_params in hvp_data:
2150 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2153 hv_class = hypervisor.GetHypervisor(hv_name)
2154 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2155 hv_class.CheckParameterSyntax(hv_params)
2156 except errors.GenericError, err:
2157 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2159 def BuildHooksEnv(self):
2162 Cluster-Verify hooks just ran in the post phase and their failure makes
2163 the output be logged in the verify output and the verification to fail.
2169 "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2172 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2173 for node in cfg.GetAllNodesInfo().values())
2177 def BuildHooksNodes(self):
2178 """Build hooks nodes.
2181 return ([], self.cfg.GetNodeList())
2183 def Exec(self, feedback_fn):
2184 """Verify integrity of cluster, performing various test on nodes.
2187 # This method has too many local variables. pylint: disable-msg=R0914
2189 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2190 verbose = self.op.verbose
2191 self._feedback_fn = feedback_fn
2192 feedback_fn("* Verifying global settings")
2193 for msg in self.cfg.VerifyConfig():
2194 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2196 # Check the cluster certificates
2197 for cert_filename in constants.ALL_CERT_FILES:
2198 (errcode, msg) = _VerifyCertificate(cert_filename)
2199 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2201 vg_name = self.cfg.GetVGName()
2202 drbd_helper = self.cfg.GetDRBDHelper()
2203 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2204 cluster = self.cfg.GetClusterInfo()
2205 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2206 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2207 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2208 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2209 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2210 for iname in instancelist)
2211 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2212 i_non_redundant = [] # Non redundant instances
2213 i_non_a_balanced = [] # Non auto-balanced instances
2214 n_offline = 0 # Count of offline nodes
2215 n_drained = 0 # Count of nodes being drained
2216 node_vol_should = {}
2218 # FIXME: verify OS list
2221 filemap = _ComputeAncillaryFiles(cluster, False)
2223 # do local checksums
2224 master_node = self.master_node = self.cfg.GetMasterNode()
2225 master_ip = self.cfg.GetMasterIP()
2227 # Compute the set of hypervisor parameters
2229 for hv_name in hypervisors:
2230 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2231 for os_name, os_hvp in cluster.os_hvp.items():
2232 for hv_name, hv_params in os_hvp.items():
2235 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2236 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2237 # TODO: collapse identical parameter values in a single one
2238 for instance in instanceinfo.values():
2239 if not instance.hvparams:
2241 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2242 cluster.FillHV(instance)))
2243 # and verify them locally
2244 self._VerifyHVP(hvp_data)
2246 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2247 node_verify_param = {
2248 constants.NV_FILELIST:
2249 utils.UniqueSequence(filename
2250 for files in filemap
2251 for filename in files),
2252 constants.NV_NODELIST: [node.name for node in nodeinfo
2253 if not node.offline],
2254 constants.NV_HYPERVISOR: hypervisors,
2255 constants.NV_HVPARAMS: hvp_data,
2256 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2257 node.secondary_ip) for node in nodeinfo
2258 if not node.offline],
2259 constants.NV_INSTANCELIST: hypervisors,
2260 constants.NV_VERSION: None,
2261 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2262 constants.NV_NODESETUP: None,
2263 constants.NV_TIME: None,
2264 constants.NV_MASTERIP: (master_node, master_ip),
2265 constants.NV_OSLIST: None,
2266 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2269 if vg_name is not None:
2270 node_verify_param[constants.NV_VGLIST] = None
2271 node_verify_param[constants.NV_LVLIST] = vg_name
2272 node_verify_param[constants.NV_PVLIST] = [vg_name]
2273 node_verify_param[constants.NV_DRBDLIST] = None
2276 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2278 # Build our expected cluster state
2279 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2281 vm_capable=node.vm_capable))
2282 for node in nodeinfo)
2286 for node in nodeinfo:
2287 path = _SupportsOob(self.cfg, node)
2288 if path and path not in oob_paths:
2289 oob_paths.append(path)
2292 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2294 for instance in instancelist:
2295 inst_config = instanceinfo[instance]
2297 for nname in inst_config.all_nodes:
2298 if nname not in node_image:
2300 gnode = self.NodeImage(name=nname)
2302 node_image[nname] = gnode
2304 inst_config.MapLVsByNode(node_vol_should)
2306 pnode = inst_config.primary_node
2307 node_image[pnode].pinst.append(instance)
2309 for snode in inst_config.secondary_nodes:
2310 nimg = node_image[snode]
2311 nimg.sinst.append(instance)
2312 if pnode not in nimg.sbp:
2313 nimg.sbp[pnode] = []
2314 nimg.sbp[pnode].append(instance)
2316 # At this point, we have the in-memory data structures complete,
2317 # except for the runtime information, which we'll gather next
2319 # Due to the way our RPC system works, exact response times cannot be
2320 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2321 # time before and after executing the request, we can at least have a time
2323 nvinfo_starttime = time.time()
2324 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2325 self.cfg.GetClusterName())
2326 nvinfo_endtime = time.time()
2328 all_drbd_map = self.cfg.ComputeDRBDMap()
2330 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2331 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2333 feedback_fn("* Verifying configuration file consistency")
2334 self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2336 feedback_fn("* Verifying node status")
2340 for node_i in nodeinfo:
2342 nimg = node_image[node]
2346 feedback_fn("* Skipping offline node %s" % (node,))
2350 if node == master_node:
2352 elif node_i.master_candidate:
2353 ntype = "master candidate"
2354 elif node_i.drained:
2360 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2362 msg = all_nvinfo[node].fail_msg
2363 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2365 nimg.rpc_fail = True
2368 nresult = all_nvinfo[node].payload
2370 nimg.call_ok = self._VerifyNode(node_i, nresult)
2371 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2372 self._VerifyNodeNetwork(node_i, nresult)
2373 self._VerifyOob(node_i, nresult)
2376 self._VerifyNodeLVM(node_i, nresult, vg_name)
2377 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2380 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2381 self._UpdateNodeInstances(node_i, nresult, nimg)
2382 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2383 self._UpdateNodeOS(node_i, nresult, nimg)
2384 if not nimg.os_fail:
2385 if refos_img is None:
2387 self._VerifyNodeOS(node_i, nimg, refos_img)
2389 feedback_fn("* Verifying instance status")
2390 for instance in instancelist:
2392 feedback_fn("* Verifying instance %s" % instance)
2393 inst_config = instanceinfo[instance]
2394 self._VerifyInstance(instance, inst_config, node_image,
2396 inst_nodes_offline = []
2398 pnode = inst_config.primary_node
2399 pnode_img = node_image[pnode]
2400 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2401 self.ENODERPC, pnode, "instance %s, connection to"
2402 " primary node failed", instance)
2404 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2405 self.EINSTANCEBADNODE, instance,
2406 "instance is marked as running and lives on offline node %s",
2407 inst_config.primary_node)
2409 # If the instance is non-redundant we cannot survive losing its primary
2410 # node, so we are not N+1 compliant. On the other hand we have no disk
2411 # templates with more than one secondary so that situation is not well
2413 # FIXME: does not support file-backed instances
2414 if not inst_config.secondary_nodes:
2415 i_non_redundant.append(instance)
2417 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2418 instance, "instance has multiple secondary nodes: %s",
2419 utils.CommaJoin(inst_config.secondary_nodes),
2420 code=self.ETYPE_WARNING)
2422 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2423 pnode = inst_config.primary_node
2424 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2425 instance_groups = {}
2427 for node in instance_nodes:
2428 instance_groups.setdefault(nodeinfo_byname[node].group,
2432 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2433 # Sort so that we always list the primary node first.
2434 for group, nodes in sorted(instance_groups.items(),
2435 key=lambda (_, nodes): pnode in nodes,
2438 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2439 instance, "instance has primary and secondary nodes in"
2440 " different groups: %s", utils.CommaJoin(pretty_list),
2441 code=self.ETYPE_WARNING)
2443 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2444 i_non_a_balanced.append(instance)
2446 for snode in inst_config.secondary_nodes:
2447 s_img = node_image[snode]
2448 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2449 "instance %s, connection to secondary node failed", instance)
2452 inst_nodes_offline.append(snode)
2454 # warn that the instance lives on offline nodes
2455 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2456 "instance has offline secondary node(s) %s",
2457 utils.CommaJoin(inst_nodes_offline))
2458 # ... or ghost/non-vm_capable nodes
2459 for node in inst_config.all_nodes:
2460 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2461 "instance lives on ghost node %s", node)
2462 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2463 instance, "instance lives on non-vm_capable node %s", node)
2465 feedback_fn("* Verifying orphan volumes")
2466 reserved = utils.FieldSet(*cluster.reserved_lvs)
2467 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2469 feedback_fn("* Verifying orphan instances")
2470 self._VerifyOrphanInstances(instancelist, node_image)
2472 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2473 feedback_fn("* Verifying N+1 Memory redundancy")
2474 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2476 feedback_fn("* Other Notes")
2478 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2479 % len(i_non_redundant))
2481 if i_non_a_balanced:
2482 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2483 % len(i_non_a_balanced))
2486 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2489 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2493 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2494 """Analyze the post-hooks' result
2496 This method analyses the hook result, handles it, and sends some
2497 nicely-formatted feedback back to the user.
2499 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2500 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2501 @param hooks_results: the results of the multi-node hooks rpc call
2502 @param feedback_fn: function used send feedback back to the caller
2503 @param lu_result: previous Exec result
2504 @return: the new Exec result, based on the previous result
2508 # We only really run POST phase hooks, and are only interested in
2510 if phase == constants.HOOKS_PHASE_POST:
2511 # Used to change hooks' output to proper indentation
2512 feedback_fn("* Hooks Results")
2513 assert hooks_results, "invalid result from hooks"
2515 for node_name in hooks_results:
2516 res = hooks_results[node_name]
2518 test = msg and not res.offline
2519 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2520 "Communication failure in hooks execution: %s", msg)
2521 if res.offline or msg:
2522 # No need to investigate payload if node is offline or gave an error.
2523 # override manually lu_result here as _ErrorIf only
2524 # overrides self.bad
2527 for script, hkr, output in res.payload:
2528 test = hkr == constants.HKR_FAIL
2529 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2530 "Script %s failed, output:", script)
2532 output = self._HOOKS_INDENT_RE.sub(' ', output)
2533 feedback_fn("%s" % output)
2539 class LUClusterVerifyDisks(NoHooksLU):
2540 """Verifies the cluster disks status.
2545 def ExpandNames(self):
2546 self.needed_locks = {
2547 locking.LEVEL_NODE: locking.ALL_SET,
2548 locking.LEVEL_INSTANCE: locking.ALL_SET,
2550 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2552 def Exec(self, feedback_fn):
2553 """Verify integrity of cluster disks.
2555 @rtype: tuple of three items
2556 @return: a tuple of (dict of node-to-node_error, list of instances
2557 which need activate-disks, dict of instance: (node, volume) for
2561 result = res_nodes, res_instances, res_missing = {}, [], {}
2563 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2564 instances = self.cfg.GetAllInstancesInfo().values()
2567 for inst in instances:
2569 if not inst.admin_up:
2571 inst.MapLVsByNode(inst_lvs)
2572 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2573 for node, vol_list in inst_lvs.iteritems():
2574 for vol in vol_list:
2575 nv_dict[(node, vol)] = inst
2580 node_lvs = self.rpc.call_lv_list(nodes, [])
2581 for node, node_res in node_lvs.items():
2582 if node_res.offline:
2584 msg = node_res.fail_msg
2586 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2587 res_nodes[node] = msg
2590 lvs = node_res.payload
2591 for lv_name, (_, _, lv_online) in lvs.items():
2592 inst = nv_dict.pop((node, lv_name), None)
2593 if (not lv_online and inst is not None
2594 and inst.name not in res_instances):
2595 res_instances.append(inst.name)
2597 # any leftover items in nv_dict are missing LVs, let's arrange the
2599 for key, inst in nv_dict.iteritems():
2600 if inst.name not in res_missing:
2601 res_missing[inst.name] = []
2602 res_missing[inst.name].append(key)
2607 class LUClusterRepairDiskSizes(NoHooksLU):
2608 """Verifies the cluster disks sizes.
2613 def ExpandNames(self):
2614 if self.op.instances:
2615 self.wanted_names = []
2616 for name in self.op.instances:
2617 full_name = _ExpandInstanceName(self.cfg, name)
2618 self.wanted_names.append(full_name)
2619 self.needed_locks = {
2620 locking.LEVEL_NODE: [],
2621 locking.LEVEL_INSTANCE: self.wanted_names,
2623 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2625 self.wanted_names = None
2626 self.needed_locks = {
2627 locking.LEVEL_NODE: locking.ALL_SET,
2628 locking.LEVEL_INSTANCE: locking.ALL_SET,
2630 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2632 def DeclareLocks(self, level):
2633 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2634 self._LockInstancesNodes(primary_only=True)
2636 def CheckPrereq(self):
2637 """Check prerequisites.
2639 This only checks the optional instance list against the existing names.
2642 if self.wanted_names is None:
2643 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2645 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2646 in self.wanted_names]
2648 def _EnsureChildSizes(self, disk):
2649 """Ensure children of the disk have the needed disk size.
2651 This is valid mainly for DRBD8 and fixes an issue where the
2652 children have smaller disk size.
2654 @param disk: an L{ganeti.objects.Disk} object
2657 if disk.dev_type == constants.LD_DRBD8:
2658 assert disk.children, "Empty children for DRBD8?"
2659 fchild = disk.children[0]
2660 mismatch = fchild.size < disk.size
2662 self.LogInfo("Child disk has size %d, parent %d, fixing",
2663 fchild.size, disk.size)
2664 fchild.size = disk.size
2666 # and we recurse on this child only, not on the metadev
2667 return self._EnsureChildSizes(fchild) or mismatch
2671 def Exec(self, feedback_fn):
2672 """Verify the size of cluster disks.
2675 # TODO: check child disks too
2676 # TODO: check differences in size between primary/secondary nodes
2678 for instance in self.wanted_instances:
2679 pnode = instance.primary_node
2680 if pnode not in per_node_disks:
2681 per_node_disks[pnode] = []
2682 for idx, disk in enumerate(instance.disks):
2683 per_node_disks[pnode].append((instance, idx, disk))
2686 for node, dskl in per_node_disks.items():
2687 newl = [v[2].Copy() for v in dskl]
2689 self.cfg.SetDiskID(dsk, node)
2690 result = self.rpc.call_blockdev_getsize(node, newl)
2692 self.LogWarning("Failure in blockdev_getsize call to node"
2693 " %s, ignoring", node)
2695 if len(result.payload) != len(dskl):
2696 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2697 " result.payload=%s", node, len(dskl), result.payload)
2698 self.LogWarning("Invalid result from node %s, ignoring node results",
2701 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2703 self.LogWarning("Disk %d of instance %s did not return size"
2704 " information, ignoring", idx, instance.name)
2706 if not isinstance(size, (int, long)):
2707 self.LogWarning("Disk %d of instance %s did not return valid"
2708 " size information, ignoring", idx, instance.name)
2711 if size != disk.size:
2712 self.LogInfo("Disk %d of instance %s has mismatched size,"
2713 " correcting: recorded %d, actual %d", idx,
2714 instance.name, disk.size, size)
2716 self.cfg.Update(instance, feedback_fn)
2717 changed.append((instance.name, idx, size))
2718 if self._EnsureChildSizes(disk):
2719 self.cfg.Update(instance, feedback_fn)
2720 changed.append((instance.name, idx, disk.size))
2724 class LUClusterRename(LogicalUnit):
2725 """Rename the cluster.
2728 HPATH = "cluster-rename"
2729 HTYPE = constants.HTYPE_CLUSTER
2731 def BuildHooksEnv(self):
2736 "OP_TARGET": self.cfg.GetClusterName(),
2737 "NEW_NAME": self.op.name,
2740 def BuildHooksNodes(self):
2741 """Build hooks nodes.
2744 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2746 def CheckPrereq(self):
2747 """Verify that the passed name is a valid one.
2750 hostname = netutils.GetHostname(name=self.op.name,
2751 family=self.cfg.GetPrimaryIPFamily())
2753 new_name = hostname.name
2754 self.ip = new_ip = hostname.ip
2755 old_name = self.cfg.GetClusterName()
2756 old_ip = self.cfg.GetMasterIP()
2757 if new_name == old_name and new_ip == old_ip:
2758 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2759 " cluster has changed",
2761 if new_ip != old_ip:
2762 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2763 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2764 " reachable on the network" %
2765 new_ip, errors.ECODE_NOTUNIQUE)
2767 self.op.name = new_name
2769 def Exec(self, feedback_fn):
2770 """Rename the cluster.
2773 clustername = self.op.name
2776 # shutdown the master IP
2777 master = self.cfg.GetMasterNode()
2778 result = self.rpc.call_node_stop_master(master, False)
2779 result.Raise("Could not disable the master role")
2782 cluster = self.cfg.GetClusterInfo()
2783 cluster.cluster_name = clustername
2784 cluster.master_ip = ip
2785 self.cfg.Update(cluster, feedback_fn)
2787 # update the known hosts file
2788 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2789 node_list = self.cfg.GetOnlineNodeList()
2791 node_list.remove(master)
2794 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2796 result = self.rpc.call_node_start_master(master, False, False)
2797 msg = result.fail_msg
2799 self.LogWarning("Could not re-enable the master role on"
2800 " the master, please restart manually: %s", msg)
2805 class LUClusterSetParams(LogicalUnit):
2806 """Change the parameters of the cluster.
2809 HPATH = "cluster-modify"
2810 HTYPE = constants.HTYPE_CLUSTER
2813 def CheckArguments(self):
2817 if self.op.uid_pool:
2818 uidpool.CheckUidPool(self.op.uid_pool)
2820 if self.op.add_uids:
2821 uidpool.CheckUidPool(self.op.add_uids)
2823 if self.op.remove_uids:
2824 uidpool.CheckUidPool(self.op.remove_uids)
2826 def ExpandNames(self):
2827 # FIXME: in the future maybe other cluster params won't require checking on
2828 # all nodes to be modified.
2829 self.needed_locks = {
2830 locking.LEVEL_NODE: locking.ALL_SET,
2832 self.share_locks[locking.LEVEL_NODE] = 1
2834 def BuildHooksEnv(self):
2839 "OP_TARGET": self.cfg.GetClusterName(),
2840 "NEW_VG_NAME": self.op.vg_name,
2843 def BuildHooksNodes(self):
2844 """Build hooks nodes.
2847 mn = self.cfg.GetMasterNode()
2850 def CheckPrereq(self):
2851 """Check prerequisites.
2853 This checks whether the given params don't conflict and
2854 if the given volume group is valid.
2857 if self.op.vg_name is not None and not self.op.vg_name:
2858 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2859 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2860 " instances exist", errors.ECODE_INVAL)
2862 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2863 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2864 raise errors.OpPrereqError("Cannot disable drbd helper while"
2865 " drbd-based instances exist",
2868 node_list = self.acquired_locks[locking.LEVEL_NODE]
2870 # if vg_name not None, checks given volume group on all nodes
2872 vglist = self.rpc.call_vg_list(node_list)
2873 for node in node_list:
2874 msg = vglist[node].fail_msg
2876 # ignoring down node
2877 self.LogWarning("Error while gathering data on node %s"
2878 " (ignoring node): %s", node, msg)
2880 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2882 constants.MIN_VG_SIZE)
2884 raise errors.OpPrereqError("Error on node '%s': %s" %
2885 (node, vgstatus), errors.ECODE_ENVIRON)
2887 if self.op.drbd_helper:
2888 # checks given drbd helper on all nodes
2889 helpers = self.rpc.call_drbd_helper(node_list)
2890 for node in node_list:
2891 ninfo = self.cfg.GetNodeInfo(node)
2893 self.LogInfo("Not checking drbd helper on offline node %s", node)
2895 msg = helpers[node].fail_msg
2897 raise errors.OpPrereqError("Error checking drbd helper on node"
2898 " '%s': %s" % (node, msg),
2899 errors.ECODE_ENVIRON)
2900 node_helper = helpers[node].payload
2901 if node_helper != self.op.drbd_helper:
2902 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2903 (node, node_helper), errors.ECODE_ENVIRON)
2905 self.cluster = cluster = self.cfg.GetClusterInfo()
2906 # validate params changes
2907 if self.op.beparams:
2908 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2909 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2911 if self.op.ndparams:
2912 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2913 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2915 if self.op.nicparams:
2916 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2917 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2918 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2921 # check all instances for consistency
2922 for instance in self.cfg.GetAllInstancesInfo().values():
2923 for nic_idx, nic in enumerate(instance.nics):
2924 params_copy = copy.deepcopy(nic.nicparams)
2925 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2927 # check parameter syntax
2929 objects.NIC.CheckParameterSyntax(params_filled)
2930 except errors.ConfigurationError, err:
2931 nic_errors.append("Instance %s, nic/%d: %s" %
2932 (instance.name, nic_idx, err))
2934 # if we're moving instances to routed, check that they have an ip
2935 target_mode = params_filled[constants.NIC_MODE]
2936 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2937 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2938 (instance.name, nic_idx))
2940 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2941 "\n".join(nic_errors))
2943 # hypervisor list/parameters
2944 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2945 if self.op.hvparams:
2946 for hv_name, hv_dict in self.op.hvparams.items():
2947 if hv_name not in self.new_hvparams:
2948 self.new_hvparams[hv_name] = hv_dict
2950 self.new_hvparams[hv_name].update(hv_dict)
2952 # os hypervisor parameters
2953 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2955 for os_name, hvs in self.op.os_hvp.items():
2956 if os_name not in self.new_os_hvp:
2957 self.new_os_hvp[os_name] = hvs
2959 for hv_name, hv_dict in hvs.items():
2960 if hv_name not in self.new_os_hvp[os_name]:
2961 self.new_os_hvp[os_name][hv_name] = hv_dict
2963 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2966 self.new_osp = objects.FillDict(cluster.osparams, {})
2967 if self.op.osparams:
2968 for os_name, osp in self.op.osparams.items():
2969 if os_name not in self.new_osp:
2970 self.new_osp[os_name] = {}
2972 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2975 if not self.new_osp[os_name]:
2976 # we removed all parameters
2977 del self.new_osp[os_name]
2979 # check the parameter validity (remote check)
2980 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2981 os_name, self.new_osp[os_name])
2983 # changes to the hypervisor list
2984 if self.op.enabled_hypervisors is not None:
2985 self.hv_list = self.op.enabled_hypervisors
2986 for hv in self.hv_list:
2987 # if the hypervisor doesn't already exist in the cluster
2988 # hvparams, we initialize it to empty, and then (in both
2989 # cases) we make sure to fill the defaults, as we might not
2990 # have a complete defaults list if the hypervisor wasn't
2992 if hv not in new_hvp:
2994 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2995 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2997 self.hv_list = cluster.enabled_hypervisors
2999 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3000 # either the enabled list has changed, or the parameters have, validate
3001 for hv_name, hv_params in self.new_hvparams.items():
3002 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3003 (self.op.enabled_hypervisors and
3004 hv_name in self.op.enabled_hypervisors)):
3005 # either this is a new hypervisor, or its parameters have changed
3006 hv_class = hypervisor.GetHypervisor(hv_name)
3007 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3008 hv_class.CheckParameterSyntax(hv_params)
3009 _CheckHVParams(self, node_list, hv_name, hv_params)
3012 # no need to check any newly-enabled hypervisors, since the
3013 # defaults have already been checked in the above code-block
3014 for os_name, os_hvp in self.new_os_hvp.items():
3015 for hv_name, hv_params in os_hvp.items():
3016 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3017 # we need to fill in the new os_hvp on top of the actual hv_p
3018 cluster_defaults = self.new_hvparams.get(hv_name, {})
3019 new_osp = objects.FillDict(cluster_defaults, hv_params)
3020 hv_class = hypervisor.GetHypervisor(hv_name)
3021 hv_class.CheckParameterSyntax(new_osp)
3022 _CheckHVParams(self, node_list, hv_name, new_osp)
3024 if self.op.default_iallocator:
3025 alloc_script = utils.FindFile(self.op.default_iallocator,
3026 constants.IALLOCATOR_SEARCH_PATH,
3028 if alloc_script is None:
3029 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3030 " specified" % self.op.default_iallocator,
3033 def Exec(self, feedback_fn):
3034 """Change the parameters of the cluster.
3037 if self.op.vg_name is not None:
3038 new_volume = self.op.vg_name
3041 if new_volume != self.cfg.GetVGName():
3042 self.cfg.SetVGName(new_volume)
3044 feedback_fn("Cluster LVM configuration already in desired"
3045 " state, not changing")
3046 if self.op.drbd_helper is not None:
3047 new_helper = self.op.drbd_helper
3050 if new_helper != self.cfg.GetDRBDHelper():
3051 self.cfg.SetDRBDHelper(new_helper)
3053 feedback_fn("Cluster DRBD helper already in desired state,"
3055 if self.op.hvparams:
3056 self.cluster.hvparams = self.new_hvparams
3058 self.cluster.os_hvp = self.new_os_hvp
3059 if self.op.enabled_hypervisors is not None:
3060 self.cluster.hvparams = self.new_hvparams
3061 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3062 if self.op.beparams:
3063 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3064 if self.op.nicparams:
3065 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3066 if self.op.osparams:
3067 self.cluster.osparams = self.new_osp
3068 if self.op.ndparams:
3069 self.cluster.ndparams = self.new_ndparams
3071 if self.op.candidate_pool_size is not None:
3072 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3073 # we need to update the pool size here, otherwise the save will fail
3074 _AdjustCandidatePool(self, [])
3076 if self.op.maintain_node_health is not None:
3077 self.cluster.maintain_node_health = self.op.maintain_node_health
3079 if self.op.prealloc_wipe_disks is not None:
3080 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3082 if self.op.add_uids is not None:
3083 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3085 if self.op.remove_uids is not None:
3086 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3088 if self.op.uid_pool is not None:
3089 self.cluster.uid_pool = self.op.uid_pool
3091 if self.op.default_iallocator is not None:
3092 self.cluster.default_iallocator = self.op.default_iallocator
3094 if self.op.reserved_lvs is not None:
3095 self.cluster.reserved_lvs = self.op.reserved_lvs
3097 def helper_os(aname, mods, desc):
3099 lst = getattr(self.cluster, aname)
3100 for key, val in mods:
3101 if key == constants.DDM_ADD:
3103 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3106 elif key == constants.DDM_REMOVE:
3110 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3112 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3114 if self.op.hidden_os:
3115 helper_os("hidden_os", self.op.hidden_os, "hidden")
3117 if self.op.blacklisted_os:
3118 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3120 if self.op.master_netdev:
3121 master = self.cfg.GetMasterNode()
3122 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3123 self.cluster.master_netdev)
3124 result = self.rpc.call_node_stop_master(master, False)
3125 result.Raise("Could not disable the master ip")
3126 feedback_fn("Changing master_netdev from %s to %s" %
3127 (self.cluster.master_netdev, self.op.master_netdev))
3128 self.cluster.master_netdev = self.op.master_netdev
3130 self.cfg.Update(self.cluster, feedback_fn)
3132 if self.op.master_netdev:
3133 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3134 self.op.master_netdev)
3135 result = self.rpc.call_node_start_master(master, False, False)
3137 self.LogWarning("Could not re-enable the master ip on"
3138 " the master, please restart manually: %s",
3142 def _UploadHelper(lu, nodes, fname):
3143 """Helper for uploading a file and showing warnings.
3146 if os.path.exists(fname):
3147 result = lu.rpc.call_upload_file(nodes, fname)
3148 for to_node, to_result in result.items():
3149 msg = to_result.fail_msg
3151 msg = ("Copy of file %s to node %s failed: %s" %
3152 (fname, to_node, msg))
3153 lu.proc.LogWarning(msg)
3156 def _ComputeAncillaryFiles(cluster, redist):
3157 """Compute files external to Ganeti which need to be consistent.
3159 @type redist: boolean
3160 @param redist: Whether to include files which need to be redistributed
3163 # Compute files for all nodes
3165 constants.SSH_KNOWN_HOSTS_FILE,
3166 constants.CONFD_HMAC_KEY,
3167 constants.CLUSTER_DOMAIN_SECRET_FILE,
3171 files_all.update(constants.ALL_CERT_FILES)
3172 files_all.update(ssconf.SimpleStore().GetFileList())
3174 if cluster.modify_etc_hosts:
3175 files_all.add(constants.ETC_HOSTS)
3177 # Files which must either exist on all nodes or on none
3178 files_all_opt = set([
3179 constants.RAPI_USERS_FILE,
3182 # Files which should only be on master candidates
3185 files_mc.add(constants.CLUSTER_CONF_FILE)
3187 # Files which should only be on VM-capable nodes
3188 files_vm = set(filename
3189 for hv_name in cluster.enabled_hypervisors
3190 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3192 # Filenames must be unique
3193 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3194 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3195 "Found file listed in more than one file list"
3197 return (files_all, files_all_opt, files_mc, files_vm)
3200 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3201 """Distribute additional files which are part of the cluster configuration.
3203 ConfigWriter takes care of distributing the config and ssconf files, but
3204 there are more files which should be distributed to all nodes. This function
3205 makes sure those are copied.
3207 @param lu: calling logical unit
3208 @param additional_nodes: list of nodes not in the config to distribute to
3209 @type additional_vm: boolean
3210 @param additional_vm: whether the additional nodes are vm-capable or not
3213 # Gather target nodes
3214 cluster = lu.cfg.GetClusterInfo()
3215 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3217 online_nodes = lu.cfg.GetOnlineNodeList()
3218 vm_nodes = lu.cfg.GetVmCapableNodeList()
3220 if additional_nodes is not None:
3221 online_nodes.extend(additional_nodes)
3223 vm_nodes.extend(additional_nodes)
3225 # Never distribute to master node
3226 for nodelist in [online_nodes, vm_nodes]:
3227 if master_info.name in nodelist:
3228 nodelist.remove(master_info.name)
3231 (files_all, files_all_opt, files_mc, files_vm) = \
3232 _ComputeAncillaryFiles(cluster, True)
3234 # Never re-distribute configuration file from here
3235 assert not (constants.CLUSTER_CONF_FILE in files_all or
3236 constants.CLUSTER_CONF_FILE in files_vm)
3237 assert not files_mc, "Master candidates not handled in this function"
3240 (online_nodes, files_all),
3241 (online_nodes, files_all_opt),
3242 (vm_nodes, files_vm),
3246 for (node_list, files) in filemap:
3248 _UploadHelper(lu, node_list, fname)
3251 class LUClusterRedistConf(NoHooksLU):
3252 """Force the redistribution of cluster configuration.
3254 This is a very simple LU.
3259 def ExpandNames(self):
3260 self.needed_locks = {
3261 locking.LEVEL_NODE: locking.ALL_SET,
3263 self.share_locks[locking.LEVEL_NODE] = 1
3265 def Exec(self, feedback_fn):
3266 """Redistribute the configuration.
3269 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3270 _RedistributeAncillaryFiles(self)
3273 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3274 """Sleep and poll for an instance's disk to sync.
3277 if not instance.disks or disks is not None and not disks:
3280 disks = _ExpandCheckDisks(instance, disks)
3283 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3285 node = instance.primary_node
3288 lu.cfg.SetDiskID(dev, node)
3290 # TODO: Convert to utils.Retry
3293 degr_retries = 10 # in seconds, as we sleep 1 second each time
3297 cumul_degraded = False
3298 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3299 msg = rstats.fail_msg
3301 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3304 raise errors.RemoteError("Can't contact node %s for mirror data,"
3305 " aborting." % node)
3308 rstats = rstats.payload
3310 for i, mstat in enumerate(rstats):
3312 lu.LogWarning("Can't compute data for node %s/%s",
3313 node, disks[i].iv_name)
3316 cumul_degraded = (cumul_degraded or
3317 (mstat.is_degraded and mstat.sync_percent is None))
3318 if mstat.sync_percent is not None:
3320 if mstat.estimated_time is not None:
3321 rem_time = ("%s remaining (estimated)" %
3322 utils.FormatSeconds(mstat.estimated_time))
3323 max_time = mstat.estimated_time
3325 rem_time = "no time estimate"
3326 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3327 (disks[i].iv_name, mstat.sync_percent, rem_time))
3329 # if we're done but degraded, let's do a few small retries, to
3330 # make sure we see a stable and not transient situation; therefore
3331 # we force restart of the loop
3332 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3333 logging.info("Degraded disks found, %d retries left", degr_retries)
3341 time.sleep(min(60, max_time))
3344 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3345 return not cumul_degraded
3348 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3349 """Check that mirrors are not degraded.
3351 The ldisk parameter, if True, will change the test from the
3352 is_degraded attribute (which represents overall non-ok status for
3353 the device(s)) to the ldisk (representing the local storage status).
3356 lu.cfg.SetDiskID(dev, node)
3360 if on_primary or dev.AssembleOnSecondary():
3361 rstats = lu.rpc.call_blockdev_find(node, dev)
3362 msg = rstats.fail_msg
3364 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3366 elif not rstats.payload:
3367 lu.LogWarning("Can't find disk on node %s", node)
3371 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3373 result = result and not rstats.payload.is_degraded
3376 for child in dev.children:
3377 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3382 class LUOobCommand(NoHooksLU):
3383 """Logical unit for OOB handling.
3387 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3389 def CheckPrereq(self):
3390 """Check prerequisites.
3393 - the node exists in the configuration
3396 Any errors are signaled by raising errors.OpPrereqError.
3400 self.master_node = self.cfg.GetMasterNode()
3402 assert self.op.power_delay >= 0.0
3404 if self.op.node_names:
3405 if self.op.command in self._SKIP_MASTER:
3406 if self.master_node in self.op.node_names:
3407 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3408 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3410 if master_oob_handler:
3411 additional_text = ("Run '%s %s %s' if you want to operate on the"
3412 " master regardless") % (master_oob_handler,
3416 additional_text = "The master node does not support out-of-band"
3418 raise errors.OpPrereqError(("Operating on the master node %s is not"
3419 " allowed for %s\n%s") %
3420 (self.master_node, self.op.command,
3421 additional_text), errors.ECODE_INVAL)
3423 self.op.node_names = self.cfg.GetNodeList()
3424 if self.op.command in self._SKIP_MASTER:
3425 self.op.node_names.remove(self.master_node)
3427 if self.op.command in self._SKIP_MASTER:
3428 assert self.master_node not in self.op.node_names
3430 for node_name in self.op.node_names:
3431 node = self.cfg.GetNodeInfo(node_name)
3434 raise errors.OpPrereqError("Node %s not found" % node_name,
3437 self.nodes.append(node)
3439 if (not self.op.ignore_status and
3440 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3441 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3442 " not marked offline") % node_name,
3445 def ExpandNames(self):
3446 """Gather locks we need.
3449 if self.op.node_names:
3450 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3451 for name in self.op.node_names]
3452 lock_names = self.op.node_names
3454 lock_names = locking.ALL_SET
3456 self.needed_locks = {
3457 locking.LEVEL_NODE: lock_names,
3460 def Exec(self, feedback_fn):
3461 """Execute OOB and return result if we expect any.
3464 master_node = self.master_node
3467 for idx, node in enumerate(self.nodes):
3468 node_entry = [(constants.RS_NORMAL, node.name)]
3469 ret.append(node_entry)
3471 oob_program = _SupportsOob(self.cfg, node)
3474 node_entry.append((constants.RS_UNAVAIL, None))
3477 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3478 self.op.command, oob_program, node.name)
3479 result = self.rpc.call_run_oob(master_node, oob_program,
3480 self.op.command, node.name,
3484 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3485 node.name, result.fail_msg)
3486 node_entry.append((constants.RS_NODATA, None))
3489 self._CheckPayload(result)
3490 except errors.OpExecError, err:
3491 self.LogWarning("The payload returned by '%s' is not valid: %s",
3493 node_entry.append((constants.RS_NODATA, None))
3495 if self.op.command == constants.OOB_HEALTH:
3496 # For health we should log important events
3497 for item, status in result.payload:
3498 if status in [constants.OOB_STATUS_WARNING,
3499 constants.OOB_STATUS_CRITICAL]:
3500 self.LogWarning("On node '%s' item '%s' has status '%s'",
3501 node.name, item, status)
3503 if self.op.command == constants.OOB_POWER_ON:
3505 elif self.op.command == constants.OOB_POWER_OFF:
3506 node.powered = False
3507 elif self.op.command == constants.OOB_POWER_STATUS:
3508 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3509 if powered != node.powered:
3510 logging.warning(("Recorded power state (%s) of node '%s' does not"
3511 " match actual power state (%s)"), node.powered,
3514 # For configuration changing commands we should update the node
3515 if self.op.command in (constants.OOB_POWER_ON,
3516 constants.OOB_POWER_OFF):
3517 self.cfg.Update(node, feedback_fn)
3519 node_entry.append((constants.RS_NORMAL, result.payload))
3521 if (self.op.command == constants.OOB_POWER_ON and
3522 idx < len(self.nodes) - 1):
3523 time.sleep(self.op.power_delay)
3527 def _CheckPayload(self, result):
3528 """Checks if the payload is valid.
3530 @param result: RPC result
3531 @raises errors.OpExecError: If payload is not valid
3535 if self.op.command == constants.OOB_HEALTH:
3536 if not isinstance(result.payload, list):
3537 errs.append("command 'health' is expected to return a list but got %s" %
3538 type(result.payload))
3540 for item, status in result.payload:
3541 if status not in constants.OOB_STATUSES:
3542 errs.append("health item '%s' has invalid status '%s'" %
3545 if self.op.command == constants.OOB_POWER_STATUS:
3546 if not isinstance(result.payload, dict):
3547 errs.append("power-status is expected to return a dict but got %s" %
3548 type(result.payload))
3550 if self.op.command in [
3551 constants.OOB_POWER_ON,
3552 constants.OOB_POWER_OFF,
3553 constants.OOB_POWER_CYCLE,
3555 if result.payload is not None:
3556 errs.append("%s is expected to not return payload but got '%s'" %
3557 (self.op.command, result.payload))
3560 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3561 utils.CommaJoin(errs))
3563 class _OsQuery(_QueryBase):
3564 FIELDS = query.OS_FIELDS
3566 def ExpandNames(self, lu):
3567 # Lock all nodes in shared mode
3568 # Temporary removal of locks, should be reverted later
3569 # TODO: reintroduce locks when they are lighter-weight
3570 lu.needed_locks = {}
3571 #self.share_locks[locking.LEVEL_NODE] = 1
3572 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3574 # The following variables interact with _QueryBase._GetNames
3576 self.wanted = self.names
3578 self.wanted = locking.ALL_SET
3580 self.do_locking = self.use_locking
3582 def DeclareLocks(self, lu, level):
3586 def _DiagnoseByOS(rlist):
3587 """Remaps a per-node return list into an a per-os per-node dictionary
3589 @param rlist: a map with node names as keys and OS objects as values
3592 @return: a dictionary with osnames as keys and as value another
3593 map, with nodes as keys and tuples of (path, status, diagnose,
3594 variants, parameters, api_versions) as values, eg::
3596 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3597 (/srv/..., False, "invalid api")],
3598 "node2": [(/srv/..., True, "", [], [])]}
3603 # we build here the list of nodes that didn't fail the RPC (at RPC
3604 # level), so that nodes with a non-responding node daemon don't
3605 # make all OSes invalid
3606 good_nodes = [node_name for node_name in rlist
3607 if not rlist[node_name].fail_msg]
3608 for node_name, nr in rlist.items():
3609 if nr.fail_msg or not nr.payload:
3611 for (name, path, status, diagnose, variants,
3612 params, api_versions) in nr.payload:
3613 if name not in all_os:
3614 # build a list of nodes for this os containing empty lists
3615 # for each node in node_list
3617 for nname in good_nodes:
3618 all_os[name][nname] = []
3619 # convert params from [name, help] to (name, help)
3620 params = [tuple(v) for v in params]
3621 all_os[name][node_name].append((path, status, diagnose,
3622 variants, params, api_versions))
3625 def _GetQueryData(self, lu):
3626 """Computes the list of nodes and their attributes.
3629 # Locking is not used
3630 assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3632 valid_nodes = [node.name
3633 for node in lu.cfg.GetAllNodesInfo().values()
3634 if not node.offline and node.vm_capable]
3635 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3636 cluster = lu.cfg.GetClusterInfo()
3640 for (os_name, os_data) in pol.items():
3641 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3642 hidden=(os_name in cluster.hidden_os),
3643 blacklisted=(os_name in cluster.blacklisted_os))
3647 api_versions = set()
3649 for idx, osl in enumerate(os_data.values()):
3650 info.valid = bool(info.valid and osl and osl[0][1])
3654 (node_variants, node_params, node_api) = osl[0][3:6]
3657 variants.update(node_variants)
3658 parameters.update(node_params)
3659 api_versions.update(node_api)
3661 # Filter out inconsistent values
3662 variants.intersection_update(node_variants)
3663 parameters.intersection_update(node_params)
3664 api_versions.intersection_update(node_api)
3666 info.variants = list(variants)
3667 info.parameters = list(parameters)
3668 info.api_versions = list(api_versions)
3670 data[os_name] = info
3672 # Prepare data in requested order
3673 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3677 class LUOsDiagnose(NoHooksLU):
3678 """Logical unit for OS diagnose/query.
3684 def _BuildFilter(fields, names):
3685 """Builds a filter for querying OSes.
3688 name_filter = qlang.MakeSimpleFilter("name", names)
3690 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3691 # respective field is not requested
3692 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3693 for fname in ["hidden", "blacklisted"]
3694 if fname not in fields]
3695 if "valid" not in fields:
3696 status_filter.append([qlang.OP_TRUE, "valid"])
3699 status_filter.insert(0, qlang.OP_AND)
3701 status_filter = None
3703 if name_filter and status_filter:
3704 return [qlang.OP_AND, name_filter, status_filter]
3708 return status_filter
3710 def CheckArguments(self):
3711 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3712 self.op.output_fields, False)
3714 def ExpandNames(self):
3715 self.oq.ExpandNames(self)
3717 def Exec(self, feedback_fn):
3718 return self.oq.OldStyleQuery(self)
3721 class LUNodeRemove(LogicalUnit):
3722 """Logical unit for removing a node.
3725 HPATH = "node-remove"
3726 HTYPE = constants.HTYPE_NODE
3728 def BuildHooksEnv(self):
3731 This doesn't run on the target node in the pre phase as a failed
3732 node would then be impossible to remove.
3736 "OP_TARGET": self.op.node_name,
3737 "NODE_NAME": self.op.node_name,
3740 def BuildHooksNodes(self):
3741 """Build hooks nodes.
3744 all_nodes = self.cfg.GetNodeList()
3746 all_nodes.remove(self.op.node_name)
3748 logging.warning("Node '%s', which is about to be removed, was not found"
3749 " in the list of all nodes", self.op.node_name)
3750 return (all_nodes, all_nodes)
3752 def CheckPrereq(self):
3753 """Check prerequisites.
3756 - the node exists in the configuration
3757 - it does not have primary or secondary instances
3758 - it's not the master
3760 Any errors are signaled by raising errors.OpPrereqError.
3763 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3764 node = self.cfg.GetNodeInfo(self.op.node_name)
3765 assert node is not None
3767 instance_list = self.cfg.GetInstanceList()
3769 masternode = self.cfg.GetMasterNode()
3770 if node.name == masternode:
3771 raise errors.OpPrereqError("Node is the master node,"
3772 " you need to failover first.",
3775 for instance_name in instance_list:
3776 instance = self.cfg.GetInstanceInfo(instance_name)
3777 if node.name in instance.all_nodes:
3778 raise errors.OpPrereqError("Instance %s is still running on the node,"
3779 " please remove first." % instance_name,
3781 self.op.node_name = node.name
3784 def Exec(self, feedback_fn):
3785 """Removes the node from the cluster.
3789 logging.info("Stopping the node daemon and removing configs from node %s",
3792 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3794 # Promote nodes to master candidate as needed
3795 _AdjustCandidatePool(self, exceptions=[node.name])
3796 self.context.RemoveNode(node.name)
3798 # Run post hooks on the node before it's removed
3799 _RunPostHook(self, node.name)
3801 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3802 msg = result.fail_msg
3804 self.LogWarning("Errors encountered on the remote node while leaving"
3805 " the cluster: %s", msg)
3807 # Remove node from our /etc/hosts
3808 if self.cfg.GetClusterInfo().modify_etc_hosts:
3809 master_node = self.cfg.GetMasterNode()
3810 result = self.rpc.call_etc_hosts_modify(master_node,
3811 constants.ETC_HOSTS_REMOVE,
3813 result.Raise("Can't update hosts file with new host data")
3814 _RedistributeAncillaryFiles(self)
3817 class _NodeQuery(_QueryBase):
3818 FIELDS = query.NODE_FIELDS
3820 def ExpandNames(self, lu):
3821 lu.needed_locks = {}
3822 lu.share_locks[locking.LEVEL_NODE] = 1
3825 self.wanted = _GetWantedNodes(lu, self.names)
3827 self.wanted = locking.ALL_SET
3829 self.do_locking = (self.use_locking and
3830 query.NQ_LIVE in self.requested_data)
3833 # if we don't request only static fields, we need to lock the nodes
3834 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3836 def DeclareLocks(self, lu, level):
3839 def _GetQueryData(self, lu):
3840 """Computes the list of nodes and their attributes.
3843 all_info = lu.cfg.GetAllNodesInfo()
3845 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3847 # Gather data as requested
3848 if query.NQ_LIVE in self.requested_data:
3849 # filter out non-vm_capable nodes
3850 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3852 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3853 lu.cfg.GetHypervisorType())
3854 live_data = dict((name, nresult.payload)
3855 for (name, nresult) in node_data.items()
3856 if not nresult.fail_msg and nresult.payload)
3860 if query.NQ_INST in self.requested_data:
3861 node_to_primary = dict([(name, set()) for name in nodenames])
3862 node_to_secondary = dict([(name, set()) for name in nodenames])
3864 inst_data = lu.cfg.GetAllInstancesInfo()
3866 for inst in inst_data.values():
3867 if inst.primary_node in node_to_primary:
3868 node_to_primary[inst.primary_node].add(inst.name)
3869 for secnode in inst.secondary_nodes:
3870 if secnode in node_to_secondary:
3871 node_to_secondary[secnode].add(inst.name)
3873 node_to_primary = None
3874 node_to_secondary = None
3876 if query.NQ_OOB in self.requested_data:
3877 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3878 for name, node in all_info.iteritems())
3882 if query.NQ_GROUP in self.requested_data:
3883 groups = lu.cfg.GetAllNodeGroupsInfo()
3887 return query.NodeQueryData([all_info[name] for name in nodenames],
3888 live_data, lu.cfg.GetMasterNode(),
3889 node_to_primary, node_to_secondary, groups,
3890 oob_support, lu.cfg.GetClusterInfo())
3893 class LUNodeQuery(NoHooksLU):
3894 """Logical unit for querying nodes.
3897 # pylint: disable-msg=W0142
3900 def CheckArguments(self):
3901 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3902 self.op.output_fields, self.op.use_locking)
3904 def ExpandNames(self):
3905 self.nq.ExpandNames(self)
3907 def Exec(self, feedback_fn):
3908 return self.nq.OldStyleQuery(self)
3911 class LUNodeQueryvols(NoHooksLU):
3912 """Logical unit for getting volumes on node(s).
3916 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3917 _FIELDS_STATIC = utils.FieldSet("node")
3919 def CheckArguments(self):
3920 _CheckOutputFields(static=self._FIELDS_STATIC,
3921 dynamic=self._FIELDS_DYNAMIC,
3922 selected=self.op.output_fields)
3924 def ExpandNames(self):
3925 self.needed_locks = {}
3926 self.share_locks[locking.LEVEL_NODE] = 1
3927 if not self.op.nodes:
3928 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3930 self.needed_locks[locking.LEVEL_NODE] = \
3931 _GetWantedNodes(self, self.op.nodes)
3933 def Exec(self, feedback_fn):
3934 """Computes the list of nodes and their attributes.
3937 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3938 volumes = self.rpc.call_node_volumes(nodenames)
3940 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3941 in self.cfg.GetInstanceList()]
3943 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3946 for node in nodenames:
3947 nresult = volumes[node]
3950 msg = nresult.fail_msg
3952 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3955 node_vols = nresult.payload[:]
3956 node_vols.sort(key=lambda vol: vol['dev'])
3958 for vol in node_vols:
3960 for field in self.op.output_fields:
3963 elif field == "phys":
3967 elif field == "name":
3969 elif field == "size":
3970 val = int(float(vol['size']))
3971 elif field == "instance":
3973 if node not in lv_by_node[inst]:
3975 if vol['name'] in lv_by_node[inst][node]:
3981 raise errors.ParameterError(field)
3982 node_output.append(str(val))
3984 output.append(node_output)
3989 class LUNodeQueryStorage(NoHooksLU):
3990 """Logical unit for getting information on storage units on node(s).
3993 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3996 def CheckArguments(self):
3997 _CheckOutputFields(static=self._FIELDS_STATIC,
3998 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3999 selected=self.op.output_fields)
4001 def ExpandNames(self):
4002 self.needed_locks = {}
4003 self.share_locks[locking.LEVEL_NODE] = 1
4006 self.needed_locks[locking.LEVEL_NODE] = \
4007 _GetWantedNodes(self, self.op.nodes)
4009 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4011 def Exec(self, feedback_fn):
4012 """Computes the list of nodes and their attributes.
4015 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4017 # Always get name to sort by
4018 if constants.SF_NAME in self.op.output_fields:
4019 fields = self.op.output_fields[:]
4021 fields = [constants.SF_NAME] + self.op.output_fields
4023 # Never ask for node or type as it's only known to the LU
4024 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4025 while extra in fields:
4026 fields.remove(extra)
4028 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4029 name_idx = field_idx[constants.SF_NAME]
4031 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4032 data = self.rpc.call_storage_list(self.nodes,
4033 self.op.storage_type, st_args,
4034 self.op.name, fields)
4038 for node in utils.NiceSort(self.nodes):
4039 nresult = data[node]
4043 msg = nresult.fail_msg
4045 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4048 rows = dict([(row[name_idx], row) for row in nresult.payload])
4050 for name in utils.NiceSort(rows.keys()):
4055 for field in self.op.output_fields:
4056 if field == constants.SF_NODE:
4058 elif field == constants.SF_TYPE:
4059 val = self.op.storage_type
4060 elif field in field_idx:
4061 val = row[field_idx[field]]
4063 raise errors.ParameterError(field)
4072 class _InstanceQuery(_QueryBase):
4073 FIELDS = query.INSTANCE_FIELDS
4075 def ExpandNames(self, lu):
4076 lu.needed_locks = {}
4077 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4078 lu.share_locks[locking.LEVEL_NODE] = 1
4081 self.wanted = _GetWantedInstances(lu, self.names)
4083 self.wanted = locking.ALL_SET
4085 self.do_locking = (self.use_locking and
4086 query.IQ_LIVE in self.requested_data)
4088 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4089 lu.needed_locks[locking.LEVEL_NODE] = []
4090 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4092 def DeclareLocks(self, lu, level):
4093 if level == locking.LEVEL_NODE and self.do_locking:
4094 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4096 def _GetQueryData(self, lu):
4097 """Computes the list of instances and their attributes.
4100 cluster = lu.cfg.GetClusterInfo()
4101 all_info = lu.cfg.GetAllInstancesInfo()
4103 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4105 instance_list = [all_info[name] for name in instance_names]
4106 nodes = frozenset(itertools.chain(*(inst.all_nodes
4107 for inst in instance_list)))
4108 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4111 wrongnode_inst = set()
4113 # Gather data as requested
4114 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4116 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4118 result = node_data[name]
4120 # offline nodes will be in both lists
4121 assert result.fail_msg
4122 offline_nodes.append(name)
4124 bad_nodes.append(name)
4125 elif result.payload:
4126 for inst in result.payload:
4127 if all_info[inst].primary_node == name:
4128 live_data.update(result.payload)
4130 wrongnode_inst.add(inst)
4131 # else no instance is alive
4135 if query.IQ_DISKUSAGE in self.requested_data:
4136 disk_usage = dict((inst.name,
4137 _ComputeDiskSize(inst.disk_template,
4138 [{constants.IDISK_SIZE: disk.size}
4139 for disk in inst.disks]))
4140 for inst in instance_list)
4144 if query.IQ_CONSOLE in self.requested_data:
4146 for inst in instance_list:
4147 if inst.name in live_data:
4148 # Instance is running
4149 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4151 consinfo[inst.name] = None
4152 assert set(consinfo.keys()) == set(instance_names)
4156 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4157 disk_usage, offline_nodes, bad_nodes,
4158 live_data, wrongnode_inst, consinfo)
4161 class LUQuery(NoHooksLU):
4162 """Query for resources/items of a certain kind.
4165 # pylint: disable-msg=W0142
4168 def CheckArguments(self):
4169 qcls = _GetQueryImplementation(self.op.what)
4171 self.impl = qcls(self.op.filter, self.op.fields, False)
4173 def ExpandNames(self):
4174 self.impl.ExpandNames(self)
4176 def DeclareLocks(self, level):
4177 self.impl.DeclareLocks(self, level)
4179 def Exec(self, feedback_fn):
4180 return self.impl.NewStyleQuery(self)
4183 class LUQueryFields(NoHooksLU):
4184 """Query for resources/items of a certain kind.
4187 # pylint: disable-msg=W0142
4190 def CheckArguments(self):
4191 self.qcls = _GetQueryImplementation(self.op.what)
4193 def ExpandNames(self):
4194 self.needed_locks = {}
4196 def Exec(self, feedback_fn):
4197 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4200 class LUNodeModifyStorage(NoHooksLU):
4201 """Logical unit for modifying a storage volume on a node.
4206 def CheckArguments(self):
4207 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4209 storage_type = self.op.storage_type
4212 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4214 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4215 " modified" % storage_type,
4218 diff = set(self.op.changes.keys()) - modifiable
4220 raise errors.OpPrereqError("The following fields can not be modified for"
4221 " storage units of type '%s': %r" %
4222 (storage_type, list(diff)),
4225 def ExpandNames(self):
4226 self.needed_locks = {
4227 locking.LEVEL_NODE: self.op.node_name,
4230 def Exec(self, feedback_fn):
4231 """Computes the list of nodes and their attributes.
4234 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4235 result = self.rpc.call_storage_modify(self.op.node_name,
4236 self.op.storage_type, st_args,
4237 self.op.name, self.op.changes)
4238 result.Raise("Failed to modify storage unit '%s' on %s" %
4239 (self.op.name, self.op.node_name))
4242 class LUNodeAdd(LogicalUnit):
4243 """Logical unit for adding node to the cluster.
4247 HTYPE = constants.HTYPE_NODE
4248 _NFLAGS = ["master_capable", "vm_capable"]
4250 def CheckArguments(self):
4251 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4252 # validate/normalize the node name
4253 self.hostname = netutils.GetHostname(name=self.op.node_name,
4254 family=self.primary_ip_family)
4255 self.op.node_name = self.hostname.name
4256 if self.op.readd and self.op.group:
4257 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4258 " being readded", errors.ECODE_INVAL)
4260 def BuildHooksEnv(self):
4263 This will run on all nodes before, and on all nodes + the new node after.
4267 "OP_TARGET": self.op.node_name,
4268 "NODE_NAME": self.op.node_name,
4269 "NODE_PIP": self.op.primary_ip,
4270 "NODE_SIP": self.op.secondary_ip,
4271 "MASTER_CAPABLE": str(self.op.master_capable),
4272 "VM_CAPABLE": str(self.op.vm_capable),
4275 def BuildHooksNodes(self):
4276 """Build hooks nodes.
4279 # Exclude added node
4280 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4281 post_nodes = pre_nodes + [self.op.node_name, ]
4283 return (pre_nodes, post_nodes)
4285 def CheckPrereq(self):
4286 """Check prerequisites.
4289 - the new node is not already in the config
4291 - its parameters (single/dual homed) matches the cluster
4293 Any errors are signaled by raising errors.OpPrereqError.
4297 hostname = self.hostname
4298 node = hostname.name
4299 primary_ip = self.op.primary_ip = hostname.ip
4300 if self.op.secondary_ip is None:
4301 if self.primary_ip_family == netutils.IP6Address.family:
4302 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4303 " IPv4 address must be given as secondary",
4305 self.op.secondary_ip = primary_ip
4307 secondary_ip = self.op.secondary_ip
4308 if not netutils.IP4Address.IsValid(secondary_ip):
4309 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4310 " address" % secondary_ip, errors.ECODE_INVAL)
4312 node_list = cfg.GetNodeList()
4313 if not self.op.readd and node in node_list:
4314 raise errors.OpPrereqError("Node %s is already in the configuration" %
4315 node, errors.ECODE_EXISTS)
4316 elif self.op.readd and node not in node_list:
4317 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4320 self.changed_primary_ip = False
4322 for existing_node_name in node_list:
4323 existing_node = cfg.GetNodeInfo(existing_node_name)
4325 if self.op.readd and node == existing_node_name:
4326 if existing_node.secondary_ip != secondary_ip:
4327 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4328 " address configuration as before",
4330 if existing_node.primary_ip != primary_ip:
4331 self.changed_primary_ip = True
4335 if (existing_node.primary_ip == primary_ip or
4336 existing_node.secondary_ip == primary_ip or
4337 existing_node.primary_ip == secondary_ip or
4338 existing_node.secondary_ip == secondary_ip):
4339 raise errors.OpPrereqError("New node ip address(es) conflict with"
4340 " existing node %s" % existing_node.name,
4341 errors.ECODE_NOTUNIQUE)
4343 # After this 'if' block, None is no longer a valid value for the
4344 # _capable op attributes
4346 old_node = self.cfg.GetNodeInfo(node)
4347 assert old_node is not None, "Can't retrieve locked node %s" % node
4348 for attr in self._NFLAGS:
4349 if getattr(self.op, attr) is None:
4350 setattr(self.op, attr, getattr(old_node, attr))
4352 for attr in self._NFLAGS:
4353 if getattr(self.op, attr) is None:
4354 setattr(self.op, attr, True)
4356 if self.op.readd and not self.op.vm_capable:
4357 pri, sec = cfg.GetNodeInstances(node)
4359 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4360 " flag set to false, but it already holds"
4361 " instances" % node,
4364 # check that the type of the node (single versus dual homed) is the
4365 # same as for the master
4366 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4367 master_singlehomed = myself.secondary_ip == myself.primary_ip
4368 newbie_singlehomed = secondary_ip == primary_ip
4369 if master_singlehomed != newbie_singlehomed:
4370 if master_singlehomed:
4371 raise errors.OpPrereqError("The master has no secondary ip but the"
4372 " new node has one",
4375 raise errors.OpPrereqError("The master has a secondary ip but the"
4376 " new node doesn't have one",
4379 # checks reachability
4380 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4381 raise errors.OpPrereqError("Node not reachable by ping",
4382 errors.ECODE_ENVIRON)
4384 if not newbie_singlehomed:
4385 # check reachability from my secondary ip to newbie's secondary ip
4386 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4387 source=myself.secondary_ip):
4388 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4389 " based ping to node daemon port",
4390 errors.ECODE_ENVIRON)
4397 if self.op.master_capable:
4398 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4400 self.master_candidate = False
4403 self.new_node = old_node
4405 node_group = cfg.LookupNodeGroup(self.op.group)
4406 self.new_node = objects.Node(name=node,
4407 primary_ip=primary_ip,
4408 secondary_ip=secondary_ip,
4409 master_candidate=self.master_candidate,
4410 offline=False, drained=False,
4413 if self.op.ndparams:
4414 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4416 def Exec(self, feedback_fn):
4417 """Adds the new node to the cluster.
4420 new_node = self.new_node
4421 node = new_node.name
4423 # We adding a new node so we assume it's powered
4424 new_node.powered = True
4426 # for re-adds, reset the offline/drained/master-candidate flags;
4427 # we need to reset here, otherwise offline would prevent RPC calls
4428 # later in the procedure; this also means that if the re-add
4429 # fails, we are left with a non-offlined, broken node
4431 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4432 self.LogInfo("Readding a node, the offline/drained flags were reset")
4433 # if we demote the node, we do cleanup later in the procedure
4434 new_node.master_candidate = self.master_candidate
4435 if self.changed_primary_ip:
4436 new_node.primary_ip = self.op.primary_ip
4438 # copy the master/vm_capable flags
4439 for attr in self._NFLAGS:
4440 setattr(new_node, attr, getattr(self.op, attr))
4442 # notify the user about any possible mc promotion
4443 if new_node.master_candidate:
4444 self.LogInfo("Node will be a master candidate")
4446 if self.op.ndparams:
4447 new_node.ndparams = self.op.ndparams
4449 new_node.ndparams = {}
4451 # check connectivity
4452 result = self.rpc.call_version([node])[node]
4453 result.Raise("Can't get version information from node %s" % node)
4454 if constants.PROTOCOL_VERSION == result.payload:
4455 logging.info("Communication to node %s fine, sw version %s match",
4456 node, result.payload)
4458 raise errors.OpExecError("Version mismatch master version %s,"
4459 " node version %s" %
4460 (constants.PROTOCOL_VERSION, result.payload))
4462 # Add node to our /etc/hosts, and add key to known_hosts
4463 if self.cfg.GetClusterInfo().modify_etc_hosts:
4464 master_node = self.cfg.GetMasterNode()
4465 result = self.rpc.call_etc_hosts_modify(master_node,
4466 constants.ETC_HOSTS_ADD,
4469 result.Raise("Can't update hosts file with new host data")
4471 if new_node.secondary_ip != new_node.primary_ip:
4472 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4475 node_verify_list = [self.cfg.GetMasterNode()]
4476 node_verify_param = {
4477 constants.NV_NODELIST: [node],
4478 # TODO: do a node-net-test as well?
4481 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4482 self.cfg.GetClusterName())
4483 for verifier in node_verify_list:
4484 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4485 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4487 for failed in nl_payload:
4488 feedback_fn("ssh/hostname verification failed"
4489 " (checking from %s): %s" %
4490 (verifier, nl_payload[failed]))
4491 raise errors.OpExecError("ssh/hostname verification failed.")
4494 _RedistributeAncillaryFiles(self)
4495 self.context.ReaddNode(new_node)
4496 # make sure we redistribute the config
4497 self.cfg.Update(new_node, feedback_fn)
4498 # and make sure the new node will not have old files around
4499 if not new_node.master_candidate:
4500 result = self.rpc.call_node_demote_from_mc(new_node.name)
4501 msg = result.fail_msg
4503 self.LogWarning("Node failed to demote itself from master"
4504 " candidate status: %s" % msg)
4506 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4507 additional_vm=self.op.vm_capable)
4508 self.context.AddNode(new_node, self.proc.GetECId())
4511 class LUNodeSetParams(LogicalUnit):
4512 """Modifies the parameters of a node.
4514 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4515 to the node role (as _ROLE_*)
4516 @cvar _R2F: a dictionary from node role to tuples of flags
4517 @cvar _FLAGS: a list of attribute names corresponding to the flags
4520 HPATH = "node-modify"
4521 HTYPE = constants.HTYPE_NODE
4523 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4525 (True, False, False): _ROLE_CANDIDATE,
4526 (False, True, False): _ROLE_DRAINED,
4527 (False, False, True): _ROLE_OFFLINE,
4528 (False, False, False): _ROLE_REGULAR,
4530 _R2F = dict((v, k) for k, v in _F2R.items())
4531 _FLAGS = ["master_candidate", "drained", "offline"]
4533 def CheckArguments(self):
4534 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4535 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4536 self.op.master_capable, self.op.vm_capable,
4537 self.op.secondary_ip, self.op.ndparams]
4538 if all_mods.count(None) == len(all_mods):
4539 raise errors.OpPrereqError("Please pass at least one modification",
4541 if all_mods.count(True) > 1:
4542 raise errors.OpPrereqError("Can't set the node into more than one"
4543 " state at the same time",
4546 # Boolean value that tells us whether we might be demoting from MC
4547 self.might_demote = (self.op.master_candidate == False or
4548 self.op.offline == True or
4549 self.op.drained == True or
4550 self.op.master_capable == False)
4552 if self.op.secondary_ip:
4553 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4554 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4555 " address" % self.op.secondary_ip,
4558 self.lock_all = self.op.auto_promote and self.might_demote
4559 self.lock_instances = self.op.secondary_ip is not None
4561 def ExpandNames(self):
4563 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4565 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4567 if self.lock_instances:
4568 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4570 def DeclareLocks(self, level):
4571 # If we have locked all instances, before waiting to lock nodes, release
4572 # all the ones living on nodes unrelated to the current operation.
4573 if level == locking.LEVEL_NODE and self.lock_instances:
4574 instances_release = []
4576 self.affected_instances = []
4577 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4578 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4579 instance = self.context.cfg.GetInstanceInfo(instance_name)
4580 i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4581 if i_mirrored and self.op.node_name in instance.all_nodes:
4582 instances_keep.append(instance_name)
4583 self.affected_instances.append(instance)
4585 instances_release.append(instance_name)
4586 if instances_release:
4587 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4588 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4590 def BuildHooksEnv(self):
4593 This runs on the master node.
4597 "OP_TARGET": self.op.node_name,
4598 "MASTER_CANDIDATE": str(self.op.master_candidate),
4599 "OFFLINE": str(self.op.offline),
4600 "DRAINED": str(self.op.drained),
4601 "MASTER_CAPABLE": str(self.op.master_capable),
4602 "VM_CAPABLE": str(self.op.vm_capable),
4605 def BuildHooksNodes(self):
4606 """Build hooks nodes.
4609 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4612 def CheckPrereq(self):
4613 """Check prerequisites.
4615 This only checks the instance list against the existing names.
4618 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4620 if (self.op.master_candidate is not None or
4621 self.op.drained is not None or
4622 self.op.offline is not None):
4623 # we can't change the master's node flags
4624 if self.op.node_name == self.cfg.GetMasterNode():
4625 raise errors.OpPrereqError("The master role can be changed"
4626 " only via master-failover",
4629 if self.op.master_candidate and not node.master_capable:
4630 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4631 " it a master candidate" % node.name,
4634 if self.op.vm_capable == False:
4635 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4637 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4638 " the vm_capable flag" % node.name,
4641 if node.master_candidate and self.might_demote and not self.lock_all:
4642 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4643 # check if after removing the current node, we're missing master
4645 (mc_remaining, mc_should, _) = \
4646 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4647 if mc_remaining < mc_should:
4648 raise errors.OpPrereqError("Not enough master candidates, please"
4649 " pass auto promote option to allow"
4650 " promotion", errors.ECODE_STATE)
4652 self.old_flags = old_flags = (node.master_candidate,
4653 node.drained, node.offline)
4654 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4655 self.old_role = old_role = self._F2R[old_flags]
4657 # Check for ineffective changes
4658 for attr in self._FLAGS:
4659 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4660 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4661 setattr(self.op, attr, None)
4663 # Past this point, any flag change to False means a transition
4664 # away from the respective state, as only real changes are kept
4666 # TODO: We might query the real power state if it supports OOB
4667 if _SupportsOob(self.cfg, node):
4668 if self.op.offline is False and not (node.powered or
4669 self.op.powered == True):
4670 raise errors.OpPrereqError(("Please power on node %s first before you"
4671 " can reset offline state") %
4673 elif self.op.powered is not None:
4674 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4675 " which does not support out-of-band"
4676 " handling") % self.op.node_name)
4678 # If we're being deofflined/drained, we'll MC ourself if needed
4679 if (self.op.drained == False or self.op.offline == False or
4680 (self.op.master_capable and not node.master_capable)):
4681 if _DecideSelfPromotion(self):
4682 self.op.master_candidate = True
4683 self.LogInfo("Auto-promoting node to master candidate")
4685 # If we're no longer master capable, we'll demote ourselves from MC
4686 if self.op.master_capable == False and node.master_candidate:
4687 self.LogInfo("Demoting from master candidate")
4688 self.op.master_candidate = False
4691 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4692 if self.op.master_candidate:
4693 new_role = self._ROLE_CANDIDATE
4694 elif self.op.drained:
4695 new_role = self._ROLE_DRAINED
4696 elif self.op.offline:
4697 new_role = self._ROLE_OFFLINE
4698 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4699 # False is still in new flags, which means we're un-setting (the
4701 new_role = self._ROLE_REGULAR
4702 else: # no new flags, nothing, keep old role
4705 self.new_role = new_role
4707 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4708 # Trying to transition out of offline status
4709 result = self.rpc.call_version([node.name])[node.name]
4711 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4712 " to report its version: %s" %
4713 (node.name, result.fail_msg),
4716 self.LogWarning("Transitioning node from offline to online state"
4717 " without using re-add. Please make sure the node"
4720 if self.op.secondary_ip:
4721 # Ok even without locking, because this can't be changed by any LU
4722 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4723 master_singlehomed = master.secondary_ip == master.primary_ip
4724 if master_singlehomed and self.op.secondary_ip:
4725 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4726 " homed cluster", errors.ECODE_INVAL)
4729 if self.affected_instances:
4730 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4731 " node has instances (%s) configured"
4732 " to use it" % self.affected_instances)
4734 # On online nodes, check that no instances are running, and that
4735 # the node has the new ip and we can reach it.
4736 for instance in self.affected_instances:
4737 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4739 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4740 if master.name != node.name:
4741 # check reachability from master secondary ip to new secondary ip
4742 if not netutils.TcpPing(self.op.secondary_ip,
4743 constants.DEFAULT_NODED_PORT,
4744 source=master.secondary_ip):
4745 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4746 " based ping to node daemon port",
4747 errors.ECODE_ENVIRON)
4749 if self.op.ndparams:
4750 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4751 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4752 self.new_ndparams = new_ndparams
4754 def Exec(self, feedback_fn):
4759 old_role = self.old_role
4760 new_role = self.new_role
4764 if self.op.ndparams:
4765 node.ndparams = self.new_ndparams
4767 if self.op.powered is not None:
4768 node.powered = self.op.powered
4770 for attr in ["master_capable", "vm_capable"]:
4771 val = getattr(self.op, attr)
4773 setattr(node, attr, val)
4774 result.append((attr, str(val)))
4776 if new_role != old_role:
4777 # Tell the node to demote itself, if no longer MC and not offline
4778 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4779 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4781 self.LogWarning("Node failed to demote itself: %s", msg)
4783 new_flags = self._R2F[new_role]
4784 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4786 result.append((desc, str(nf)))
4787 (node.master_candidate, node.drained, node.offline) = new_flags
4789 # we locked all nodes, we adjust the CP before updating this node
4791 _AdjustCandidatePool(self, [node.name])
4793 if self.op.secondary_ip:
4794 node.secondary_ip = self.op.secondary_ip
4795 result.append(("secondary_ip", self.op.secondary_ip))
4797 # this will trigger configuration file update, if needed
4798 self.cfg.Update(node, feedback_fn)
4800 # this will trigger job queue propagation or cleanup if the mc
4802 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4803 self.context.ReaddNode(node)
4808 class LUNodePowercycle(NoHooksLU):
4809 """Powercycles a node.
4814 def CheckArguments(self):
4815 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4816 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4817 raise errors.OpPrereqError("The node is the master and the force"
4818 " parameter was not set",
4821 def ExpandNames(self):
4822 """Locking for PowercycleNode.
4824 This is a last-resort option and shouldn't block on other
4825 jobs. Therefore, we grab no locks.
4828 self.needed_locks = {}
4830 def Exec(self, feedback_fn):
4834 result = self.rpc.call_node_powercycle(self.op.node_name,
4835 self.cfg.GetHypervisorType())
4836 result.Raise("Failed to schedule the reboot")
4837 return result.payload
4840 class LUClusterQuery(NoHooksLU):
4841 """Query cluster configuration.
4846 def ExpandNames(self):
4847 self.needed_locks = {}
4849 def Exec(self, feedback_fn):
4850 """Return cluster config.
4853 cluster = self.cfg.GetClusterInfo()
4856 # Filter just for enabled hypervisors
4857 for os_name, hv_dict in cluster.os_hvp.items():
4858 os_hvp[os_name] = {}
4859 for hv_name, hv_params in hv_dict.items():
4860 if hv_name in cluster.enabled_hypervisors:
4861 os_hvp[os_name][hv_name] = hv_params
4863 # Convert ip_family to ip_version
4864 primary_ip_version = constants.IP4_VERSION
4865 if cluster.primary_ip_family == netutils.IP6Address.family:
4866 primary_ip_version = constants.IP6_VERSION
4869 "software_version": constants.RELEASE_VERSION,
4870 "protocol_version": constants.PROTOCOL_VERSION,
4871 "config_version": constants.CONFIG_VERSION,
4872 "os_api_version": max(constants.OS_API_VERSIONS),
4873 "export_version": constants.EXPORT_VERSION,
4874 "architecture": (platform.architecture()[0], platform.machine()),
4875 "name": cluster.cluster_name,
4876 "master": cluster.master_node,
4877 "default_hypervisor": cluster.enabled_hypervisors[0],
4878 "enabled_hypervisors": cluster.enabled_hypervisors,
4879 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4880 for hypervisor_name in cluster.enabled_hypervisors]),
4882 "beparams": cluster.beparams,
4883 "osparams": cluster.osparams,
4884 "nicparams": cluster.nicparams,
4885 "ndparams": cluster.ndparams,
4886 "candidate_pool_size": cluster.candidate_pool_size,
4887 "master_netdev": cluster.master_netdev,
4888 "volume_group_name": cluster.volume_group_name,
4889 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4890 "file_storage_dir": cluster.file_storage_dir,
4891 "shared_file_storage_dir": cluster.shared_file_storage_dir,
4892 "maintain_node_health": cluster.maintain_node_health,
4893 "ctime": cluster.ctime,
4894 "mtime": cluster.mtime,
4895 "uuid": cluster.uuid,
4896 "tags": list(cluster.GetTags()),
4897 "uid_pool": cluster.uid_pool,
4898 "default_iallocator": cluster.default_iallocator,
4899 "reserved_lvs": cluster.reserved_lvs,
4900 "primary_ip_version": primary_ip_version,
4901 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4902 "hidden_os": cluster.hidden_os,
4903 "blacklisted_os": cluster.blacklisted_os,
4909 class LUClusterConfigQuery(NoHooksLU):
4910 """Return configuration values.
4914 _FIELDS_DYNAMIC = utils.FieldSet()
4915 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4916 "watcher_pause", "volume_group_name")
4918 def CheckArguments(self):
4919 _CheckOutputFields(static=self._FIELDS_STATIC,
4920 dynamic=self._FIELDS_DYNAMIC,
4921 selected=self.op.output_fields)
4923 def ExpandNames(self):
4924 self.needed_locks = {}
4926 def Exec(self, feedback_fn):
4927 """Dump a representation of the cluster config to the standard output.
4931 for field in self.op.output_fields:
4932 if field == "cluster_name":
4933 entry = self.cfg.GetClusterName()
4934 elif field == "master_node":
4935 entry = self.cfg.GetMasterNode()
4936 elif field == "drain_flag":
4937 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4938 elif field == "watcher_pause":
4939 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4940 elif field == "volume_group_name":
4941 entry = self.cfg.GetVGName()
4943 raise errors.ParameterError(field)
4944 values.append(entry)
4948 class LUInstanceActivateDisks(NoHooksLU):
4949 """Bring up an instance's disks.
4954 def ExpandNames(self):
4955 self._ExpandAndLockInstance()
4956 self.needed_locks[locking.LEVEL_NODE] = []
4957 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4959 def DeclareLocks(self, level):
4960 if level == locking.LEVEL_NODE:
4961 self._LockInstancesNodes()
4963 def CheckPrereq(self):
4964 """Check prerequisites.
4966 This checks that the instance is in the cluster.
4969 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4970 assert self.instance is not None, \
4971 "Cannot retrieve locked instance %s" % self.op.instance_name
4972 _CheckNodeOnline(self, self.instance.primary_node)
4974 def Exec(self, feedback_fn):
4975 """Activate the disks.
4978 disks_ok, disks_info = \
4979 _AssembleInstanceDisks(self, self.instance,
4980 ignore_size=self.op.ignore_size)
4982 raise errors.OpExecError("Cannot activate block devices")
4987 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4989 """Prepare the block devices for an instance.
4991 This sets up the block devices on all nodes.
4993 @type lu: L{LogicalUnit}
4994 @param lu: the logical unit on whose behalf we execute
4995 @type instance: L{objects.Instance}
4996 @param instance: the instance for whose disks we assemble
4997 @type disks: list of L{objects.Disk} or None
4998 @param disks: which disks to assemble (or all, if None)
4999 @type ignore_secondaries: boolean
5000 @param ignore_secondaries: if true, errors on secondary nodes
5001 won't result in an error return from the function
5002 @type ignore_size: boolean
5003 @param ignore_size: if true, the current known size of the disk
5004 will not be used during the disk activation, useful for cases
5005 when the size is wrong
5006 @return: False if the operation failed, otherwise a list of
5007 (host, instance_visible_name, node_visible_name)
5008 with the mapping from node devices to instance devices
5013 iname = instance.name
5014 disks = _ExpandCheckDisks(instance, disks)
5016 # With the two passes mechanism we try to reduce the window of
5017 # opportunity for the race condition of switching DRBD to primary
5018 # before handshaking occured, but we do not eliminate it
5020 # The proper fix would be to wait (with some limits) until the
5021 # connection has been made and drbd transitions from WFConnection
5022 # into any other network-connected state (Connected, SyncTarget,
5025 # 1st pass, assemble on all nodes in secondary mode
5026 for idx, inst_disk in enumerate(disks):
5027 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5029 node_disk = node_disk.Copy()
5030 node_disk.UnsetSize()
5031 lu.cfg.SetDiskID(node_disk, node)
5032 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5033 msg = result.fail_msg
5035 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5036 " (is_primary=False, pass=1): %s",
5037 inst_disk.iv_name, node, msg)
5038 if not ignore_secondaries:
5041 # FIXME: race condition on drbd migration to primary
5043 # 2nd pass, do only the primary node
5044 for idx, inst_disk in enumerate(disks):
5047 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5048 if node != instance.primary_node:
5051 node_disk = node_disk.Copy()
5052 node_disk.UnsetSize()
5053 lu.cfg.SetDiskID(node_disk, node)
5054 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5055 msg = result.fail_msg
5057 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5058 " (is_primary=True, pass=2): %s",
5059 inst_disk.iv_name, node, msg)
5062 dev_path = result.payload
5064 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5066 # leave the disks configured for the primary node
5067 # this is a workaround that would be fixed better by
5068 # improving the logical/physical id handling
5070 lu.cfg.SetDiskID(disk, instance.primary_node)
5072 return disks_ok, device_info
5075 def _StartInstanceDisks(lu, instance, force):
5076 """Start the disks of an instance.
5079 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5080 ignore_secondaries=force)
5082 _ShutdownInstanceDisks(lu, instance)
5083 if force is not None and not force:
5084 lu.proc.LogWarning("", hint="If the message above refers to a"
5086 " you can retry the operation using '--force'.")
5087 raise errors.OpExecError("Disk consistency error")
5090 class LUInstanceDeactivateDisks(NoHooksLU):
5091 """Shutdown an instance's disks.
5096 def ExpandNames(self):
5097 self._ExpandAndLockInstance()
5098 self.needed_locks[locking.LEVEL_NODE] = []
5099 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5101 def DeclareLocks(self, level):
5102 if level == locking.LEVEL_NODE:
5103 self._LockInstancesNodes()
5105 def CheckPrereq(self):
5106 """Check prerequisites.
5108 This checks that the instance is in the cluster.
5111 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5112 assert self.instance is not None, \
5113 "Cannot retrieve locked instance %s" % self.op.instance_name
5115 def Exec(self, feedback_fn):
5116 """Deactivate the disks
5119 instance = self.instance
5121 _ShutdownInstanceDisks(self, instance)
5123 _SafeShutdownInstanceDisks(self, instance)
5126 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5127 """Shutdown block devices of an instance.
5129 This function checks if an instance is running, before calling
5130 _ShutdownInstanceDisks.
5133 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5134 _ShutdownInstanceDisks(lu, instance, disks=disks)
5137 def _ExpandCheckDisks(instance, disks):
5138 """Return the instance disks selected by the disks list
5140 @type disks: list of L{objects.Disk} or None
5141 @param disks: selected disks
5142 @rtype: list of L{objects.Disk}
5143 @return: selected instance disks to act on
5147 return instance.disks
5149 if not set(disks).issubset(instance.disks):
5150 raise errors.ProgrammerError("Can only act on disks belonging to the"
5155 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5156 """Shutdown block devices of an instance.
5158 This does the shutdown on all nodes of the instance.
5160 If the ignore_primary is false, errors on the primary node are
5165 disks = _ExpandCheckDisks(instance, disks)
5168 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5169 lu.cfg.SetDiskID(top_disk, node)
5170 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5171 msg = result.fail_msg
5173 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5174 disk.iv_name, node, msg)
5175 if ((node == instance.primary_node and not ignore_primary) or
5176 (node != instance.primary_node and not result.offline)):
5181 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5182 """Checks if a node has enough free memory.
5184 This function check if a given node has the needed amount of free
5185 memory. In case the node has less memory or we cannot get the
5186 information from the node, this function raise an OpPrereqError
5189 @type lu: C{LogicalUnit}
5190 @param lu: a logical unit from which we get configuration data
5192 @param node: the node to check
5193 @type reason: C{str}
5194 @param reason: string to use in the error message
5195 @type requested: C{int}
5196 @param requested: the amount of memory in MiB to check for
5197 @type hypervisor_name: C{str}
5198 @param hypervisor_name: the hypervisor to ask for memory stats
5199 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5200 we cannot check the node
5203 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5204 nodeinfo[node].Raise("Can't get data from node %s" % node,
5205 prereq=True, ecode=errors.ECODE_ENVIRON)
5206 free_mem = nodeinfo[node].payload.get('memory_free', None)
5207 if not isinstance(free_mem, int):
5208 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5209 " was '%s'" % (node, free_mem),
5210 errors.ECODE_ENVIRON)
5211 if requested > free_mem:
5212 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5213 " needed %s MiB, available %s MiB" %
5214 (node, reason, requested, free_mem),
5218 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5219 """Checks if nodes have enough free disk space in the all VGs.
5221 This function check if all given nodes have the needed amount of
5222 free disk. In case any node has less disk or we cannot get the
5223 information from the node, this function raise an OpPrereqError
5226 @type lu: C{LogicalUnit}
5227 @param lu: a logical unit from which we get configuration data
5228 @type nodenames: C{list}
5229 @param nodenames: the list of node names to check
5230 @type req_sizes: C{dict}
5231 @param req_sizes: the hash of vg and corresponding amount of disk in
5233 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5234 or we cannot check the node
5237 for vg, req_size in req_sizes.items():
5238 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5241 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5242 """Checks if nodes have enough free disk space in the specified VG.
5244 This function check if all given nodes have the needed amount of
5245 free disk. In case any node has less disk or we cannot get the
5246 information from the node, this function raise an OpPrereqError
5249 @type lu: C{LogicalUnit}
5250 @param lu: a logical unit from which we get configuration data
5251 @type nodenames: C{list}
5252 @param nodenames: the list of node names to check
5254 @param vg: the volume group to check
5255 @type requested: C{int}
5256 @param requested: the amount of disk in MiB to check for
5257 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5258 or we cannot check the node
5261 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5262 for node in nodenames:
5263 info = nodeinfo[node]
5264 info.Raise("Cannot get current information from node %s" % node,
5265 prereq=True, ecode=errors.ECODE_ENVIRON)
5266 vg_free = info.payload.get("vg_free", None)
5267 if not isinstance(vg_free, int):
5268 raise errors.OpPrereqError("Can't compute free disk space on node"
5269 " %s for vg %s, result was '%s'" %
5270 (node, vg, vg_free), errors.ECODE_ENVIRON)
5271 if requested > vg_free:
5272 raise errors.OpPrereqError("Not enough disk space on target node %s"
5273 " vg %s: required %d MiB, available %d MiB" %
5274 (node, vg, requested, vg_free),
5278 class LUInstanceStartup(LogicalUnit):
5279 """Starts an instance.
5282 HPATH = "instance-start"
5283 HTYPE = constants.HTYPE_INSTANCE
5286 def CheckArguments(self):
5288 if self.op.beparams:
5289 # fill the beparams dict
5290 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5292 def ExpandNames(self):
5293 self._ExpandAndLockInstance()
5295 def BuildHooksEnv(self):
5298 This runs on master, primary and secondary nodes of the instance.
5302 "FORCE": self.op.force,
5305 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5309 def BuildHooksNodes(self):
5310 """Build hooks nodes.
5313 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5316 def CheckPrereq(self):
5317 """Check prerequisites.
5319 This checks that the instance is in the cluster.
5322 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5323 assert self.instance is not None, \
5324 "Cannot retrieve locked instance %s" % self.op.instance_name
5327 if self.op.hvparams:
5328 # check hypervisor parameter syntax (locally)
5329 cluster = self.cfg.GetClusterInfo()
5330 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5331 filled_hvp = cluster.FillHV(instance)
5332 filled_hvp.update(self.op.hvparams)
5333 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5334 hv_type.CheckParameterSyntax(filled_hvp)
5335 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5337 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5339 if self.primary_offline and self.op.ignore_offline_nodes:
5340 self.proc.LogWarning("Ignoring offline primary node")
5342 if self.op.hvparams or self.op.beparams:
5343 self.proc.LogWarning("Overridden parameters are ignored")
5345 _CheckNodeOnline(self, instance.primary_node)
5347 bep = self.cfg.GetClusterInfo().FillBE(instance)
5349 # check bridges existence
5350 _CheckInstanceBridgesExist(self, instance)
5352 remote_info = self.rpc.call_instance_info(instance.primary_node,
5354 instance.hypervisor)
5355 remote_info.Raise("Error checking node %s" % instance.primary_node,
5356 prereq=True, ecode=errors.ECODE_ENVIRON)
5357 if not remote_info.payload: # not running already
5358 _CheckNodeFreeMemory(self, instance.primary_node,
5359 "starting instance %s" % instance.name,
5360 bep[constants.BE_MEMORY], instance.hypervisor)
5362 def Exec(self, feedback_fn):
5363 """Start the instance.
5366 instance = self.instance
5367 force = self.op.force
5369 self.cfg.MarkInstanceUp(instance.name)
5371 if self.primary_offline:
5372 assert self.op.ignore_offline_nodes
5373 self.proc.LogInfo("Primary node offline, marked instance as started")
5375 node_current = instance.primary_node
5377 _StartInstanceDisks(self, instance, force)
5379 result = self.rpc.call_instance_start(node_current, instance,
5380 self.op.hvparams, self.op.beparams)
5381 msg = result.fail_msg
5383 _ShutdownInstanceDisks(self, instance)
5384 raise errors.OpExecError("Could not start instance: %s" % msg)
5387 class LUInstanceReboot(LogicalUnit):
5388 """Reboot an instance.
5391 HPATH = "instance-reboot"
5392 HTYPE = constants.HTYPE_INSTANCE
5395 def ExpandNames(self):
5396 self._ExpandAndLockInstance()
5398 def BuildHooksEnv(self):
5401 This runs on master, primary and secondary nodes of the instance.
5405 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5406 "REBOOT_TYPE": self.op.reboot_type,
5407 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5410 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5414 def BuildHooksNodes(self):
5415 """Build hooks nodes.
5418 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5421 def CheckPrereq(self):
5422 """Check prerequisites.
5424 This checks that the instance is in the cluster.
5427 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5428 assert self.instance is not None, \
5429 "Cannot retrieve locked instance %s" % self.op.instance_name
5431 _CheckNodeOnline(self, instance.primary_node)
5433 # check bridges existence
5434 _CheckInstanceBridgesExist(self, instance)
5436 def Exec(self, feedback_fn):
5437 """Reboot the instance.
5440 instance = self.instance
5441 ignore_secondaries = self.op.ignore_secondaries
5442 reboot_type = self.op.reboot_type
5444 remote_info = self.rpc.call_instance_info(instance.primary_node,
5446 instance.hypervisor)
5447 remote_info.Raise("Error checking node %s" % instance.primary_node)
5448 instance_running = bool(remote_info.payload)
5450 node_current = instance.primary_node
5452 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5453 constants.INSTANCE_REBOOT_HARD]:
5454 for disk in instance.disks:
5455 self.cfg.SetDiskID(disk, node_current)
5456 result = self.rpc.call_instance_reboot(node_current, instance,
5458 self.op.shutdown_timeout)
5459 result.Raise("Could not reboot instance")
5461 if instance_running:
5462 result = self.rpc.call_instance_shutdown(node_current, instance,
5463 self.op.shutdown_timeout)
5464 result.Raise("Could not shutdown instance for full reboot")
5465 _ShutdownInstanceDisks(self, instance)
5467 self.LogInfo("Instance %s was already stopped, starting now",
5469 _StartInstanceDisks(self, instance, ignore_secondaries)
5470 result = self.rpc.call_instance_start(node_current, instance, None, None)
5471 msg = result.fail_msg
5473 _ShutdownInstanceDisks(self, instance)
5474 raise errors.OpExecError("Could not start instance for"
5475 " full reboot: %s" % msg)
5477 self.cfg.MarkInstanceUp(instance.name)
5480 class LUInstanceShutdown(LogicalUnit):
5481 """Shutdown an instance.
5484 HPATH = "instance-stop"
5485 HTYPE = constants.HTYPE_INSTANCE
5488 def ExpandNames(self):
5489 self._ExpandAndLockInstance()
5491 def BuildHooksEnv(self):
5494 This runs on master, primary and secondary nodes of the instance.
5497 env = _BuildInstanceHookEnvByObject(self, self.instance)
5498 env["TIMEOUT"] = self.op.timeout
5501 def BuildHooksNodes(self):
5502 """Build hooks nodes.
5505 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5508 def CheckPrereq(self):
5509 """Check prerequisites.
5511 This checks that the instance is in the cluster.
5514 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5515 assert self.instance is not None, \
5516 "Cannot retrieve locked instance %s" % self.op.instance_name
5518 self.primary_offline = \
5519 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5521 if self.primary_offline and self.op.ignore_offline_nodes:
5522 self.proc.LogWarning("Ignoring offline primary node")
5524 _CheckNodeOnline(self, self.instance.primary_node)
5526 def Exec(self, feedback_fn):
5527 """Shutdown the instance.
5530 instance = self.instance
5531 node_current = instance.primary_node
5532 timeout = self.op.timeout
5534 self.cfg.MarkInstanceDown(instance.name)
5536 if self.primary_offline:
5537 assert self.op.ignore_offline_nodes
5538 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5540 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5541 msg = result.fail_msg
5543 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5545 _ShutdownInstanceDisks(self, instance)
5548 class LUInstanceReinstall(LogicalUnit):
5549 """Reinstall an instance.
5552 HPATH = "instance-reinstall"
5553 HTYPE = constants.HTYPE_INSTANCE
5556 def ExpandNames(self):
5557 self._ExpandAndLockInstance()
5559 def BuildHooksEnv(self):
5562 This runs on master, primary and secondary nodes of the instance.
5565 return _BuildInstanceHookEnvByObject(self, self.instance)
5567 def BuildHooksNodes(self):
5568 """Build hooks nodes.
5571 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5574 def CheckPrereq(self):
5575 """Check prerequisites.
5577 This checks that the instance is in the cluster and is not running.
5580 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5581 assert instance is not None, \
5582 "Cannot retrieve locked instance %s" % self.op.instance_name
5583 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5584 " offline, cannot reinstall")
5585 for node in instance.secondary_nodes:
5586 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5587 " cannot reinstall")
5589 if instance.disk_template == constants.DT_DISKLESS:
5590 raise errors.OpPrereqError("Instance '%s' has no disks" %
5591 self.op.instance_name,
5593 _CheckInstanceDown(self, instance, "cannot reinstall")
5595 if self.op.os_type is not None:
5597 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5598 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5599 instance_os = self.op.os_type
5601 instance_os = instance.os
5603 nodelist = list(instance.all_nodes)
5605 if self.op.osparams:
5606 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5607 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5608 self.os_inst = i_osdict # the new dict (without defaults)
5612 self.instance = instance
5614 def Exec(self, feedback_fn):
5615 """Reinstall the instance.
5618 inst = self.instance
5620 if self.op.os_type is not None:
5621 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5622 inst.os = self.op.os_type
5623 # Write to configuration
5624 self.cfg.Update(inst, feedback_fn)
5626 _StartInstanceDisks(self, inst, None)
5628 feedback_fn("Running the instance OS create scripts...")
5629 # FIXME: pass debug option from opcode to backend
5630 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5631 self.op.debug_level,
5632 osparams=self.os_inst)
5633 result.Raise("Could not install OS for instance %s on node %s" %
5634 (inst.name, inst.primary_node))
5636 _ShutdownInstanceDisks(self, inst)
5639 class LUInstanceRecreateDisks(LogicalUnit):
5640 """Recreate an instance's missing disks.
5643 HPATH = "instance-recreate-disks"
5644 HTYPE = constants.HTYPE_INSTANCE
5647 def ExpandNames(self):
5648 self._ExpandAndLockInstance()
5650 def BuildHooksEnv(self):
5653 This runs on master, primary and secondary nodes of the instance.
5656 return _BuildInstanceHookEnvByObject(self, self.instance)
5658 def BuildHooksNodes(self):
5659 """Build hooks nodes.
5662 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5665 def CheckPrereq(self):
5666 """Check prerequisites.
5668 This checks that the instance is in the cluster and is not running.
5671 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672 assert instance is not None, \
5673 "Cannot retrieve locked instance %s" % self.op.instance_name
5674 _CheckNodeOnline(self, instance.primary_node)
5676 if instance.disk_template == constants.DT_DISKLESS:
5677 raise errors.OpPrereqError("Instance '%s' has no disks" %
5678 self.op.instance_name, errors.ECODE_INVAL)
5679 _CheckInstanceDown(self, instance, "cannot recreate disks")
5681 if not self.op.disks:
5682 self.op.disks = range(len(instance.disks))
5684 for idx in self.op.disks:
5685 if idx >= len(instance.disks):
5686 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5689 self.instance = instance
5691 def Exec(self, feedback_fn):
5692 """Recreate the disks.
5696 for idx, _ in enumerate(self.instance.disks):
5697 if idx not in self.op.disks: # disk idx has not been passed in
5701 _CreateDisks(self, self.instance, to_skip=to_skip)
5704 class LUInstanceRename(LogicalUnit):
5705 """Rename an instance.
5708 HPATH = "instance-rename"
5709 HTYPE = constants.HTYPE_INSTANCE
5711 def CheckArguments(self):
5715 if self.op.ip_check and not self.op.name_check:
5716 # TODO: make the ip check more flexible and not depend on the name check
5717 raise errors.OpPrereqError("Cannot do ip check without a name check",
5720 def BuildHooksEnv(self):
5723 This runs on master, primary and secondary nodes of the instance.
5726 env = _BuildInstanceHookEnvByObject(self, self.instance)
5727 env["INSTANCE_NEW_NAME"] = self.op.new_name
5730 def BuildHooksNodes(self):
5731 """Build hooks nodes.
5734 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5737 def CheckPrereq(self):
5738 """Check prerequisites.
5740 This checks that the instance is in the cluster and is not running.
5743 self.op.instance_name = _ExpandInstanceName(self.cfg,
5744 self.op.instance_name)
5745 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5746 assert instance is not None
5747 _CheckNodeOnline(self, instance.primary_node)
5748 _CheckInstanceDown(self, instance, "cannot rename")
5749 self.instance = instance
5751 new_name = self.op.new_name
5752 if self.op.name_check:
5753 hostname = netutils.GetHostname(name=new_name)
5754 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5756 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5757 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5758 " same as given hostname '%s'") %
5759 (hostname.name, self.op.new_name),
5761 new_name = self.op.new_name = hostname.name
5762 if (self.op.ip_check and
5763 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5764 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5765 (hostname.ip, new_name),
5766 errors.ECODE_NOTUNIQUE)
5768 instance_list = self.cfg.GetInstanceList()
5769 if new_name in instance_list and new_name != instance.name:
5770 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5771 new_name, errors.ECODE_EXISTS)
5773 def Exec(self, feedback_fn):
5774 """Rename the instance.
5777 inst = self.instance
5778 old_name = inst.name
5780 rename_file_storage = False
5781 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5782 self.op.new_name != inst.name):
5783 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5784 rename_file_storage = True
5786 self.cfg.RenameInstance(inst.name, self.op.new_name)
5787 # Change the instance lock. This is definitely safe while we hold the BGL
5788 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5789 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5791 # re-read the instance from the configuration after rename
5792 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5794 if rename_file_storage:
5795 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5796 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5797 old_file_storage_dir,
5798 new_file_storage_dir)
5799 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5800 " (but the instance has been renamed in Ganeti)" %
5801 (inst.primary_node, old_file_storage_dir,
5802 new_file_storage_dir))
5804 _StartInstanceDisks(self, inst, None)
5806 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5807 old_name, self.op.debug_level)
5808 msg = result.fail_msg
5810 msg = ("Could not run OS rename script for instance %s on node %s"
5811 " (but the instance has been renamed in Ganeti): %s" %
5812 (inst.name, inst.primary_node, msg))
5813 self.proc.LogWarning(msg)
5815 _ShutdownInstanceDisks(self, inst)
5820 class LUInstanceRemove(LogicalUnit):
5821 """Remove an instance.
5824 HPATH = "instance-remove"
5825 HTYPE = constants.HTYPE_INSTANCE
5828 def ExpandNames(self):
5829 self._ExpandAndLockInstance()
5830 self.needed_locks[locking.LEVEL_NODE] = []
5831 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5833 def DeclareLocks(self, level):
5834 if level == locking.LEVEL_NODE:
5835 self._LockInstancesNodes()
5837 def BuildHooksEnv(self):
5840 This runs on master, primary and secondary nodes of the instance.
5843 env = _BuildInstanceHookEnvByObject(self, self.instance)
5844 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5847 def BuildHooksNodes(self):
5848 """Build hooks nodes.
5851 nl = [self.cfg.GetMasterNode()]
5852 nl_post = list(self.instance.all_nodes) + nl
5853 return (nl, nl_post)
5855 def CheckPrereq(self):
5856 """Check prerequisites.
5858 This checks that the instance is in the cluster.
5861 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5862 assert self.instance is not None, \
5863 "Cannot retrieve locked instance %s" % self.op.instance_name
5865 def Exec(self, feedback_fn):
5866 """Remove the instance.
5869 instance = self.instance
5870 logging.info("Shutting down instance %s on node %s",
5871 instance.name, instance.primary_node)
5873 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5874 self.op.shutdown_timeout)
5875 msg = result.fail_msg
5877 if self.op.ignore_failures:
5878 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5880 raise errors.OpExecError("Could not shutdown instance %s on"
5882 (instance.name, instance.primary_node, msg))
5884 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5887 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5888 """Utility function to remove an instance.
5891 logging.info("Removing block devices for instance %s", instance.name)
5893 if not _RemoveDisks(lu, instance):
5894 if not ignore_failures:
5895 raise errors.OpExecError("Can't remove instance's disks")
5896 feedback_fn("Warning: can't remove instance's disks")
5898 logging.info("Removing instance %s out of cluster config", instance.name)
5900 lu.cfg.RemoveInstance(instance.name)
5902 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5903 "Instance lock removal conflict"
5905 # Remove lock for the instance
5906 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5909 class LUInstanceQuery(NoHooksLU):
5910 """Logical unit for querying instances.
5913 # pylint: disable-msg=W0142
5916 def CheckArguments(self):
5917 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5918 self.op.output_fields, self.op.use_locking)
5920 def ExpandNames(self):
5921 self.iq.ExpandNames(self)
5923 def DeclareLocks(self, level):
5924 self.iq.DeclareLocks(self, level)
5926 def Exec(self, feedback_fn):
5927 return self.iq.OldStyleQuery(self)
5930 class LUInstanceFailover(LogicalUnit):
5931 """Failover an instance.
5934 HPATH = "instance-failover"
5935 HTYPE = constants.HTYPE_INSTANCE
5938 def CheckArguments(self):
5939 """Check the arguments.
5942 self.iallocator = getattr(self.op, "iallocator", None)
5943 self.target_node = getattr(self.op, "target_node", None)
5945 def ExpandNames(self):
5946 self._ExpandAndLockInstance()
5948 if self.op.target_node is not None:
5949 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5951 self.needed_locks[locking.LEVEL_NODE] = []
5952 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5954 ignore_consistency = self.op.ignore_consistency
5955 shutdown_timeout = self.op.shutdown_timeout
5956 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5958 iallocator=self.op.iallocator,
5959 target_node=self.op.target_node,
5961 ignore_consistency=ignore_consistency,
5962 shutdown_timeout=shutdown_timeout)
5963 self.tasklets = [self._migrater]
5965 def DeclareLocks(self, level):
5966 if level == locking.LEVEL_NODE:
5967 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5968 if instance.disk_template in constants.DTS_EXT_MIRROR:
5969 if self.op.target_node is None:
5970 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5972 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5973 self.op.target_node]
5974 del self.recalculate_locks[locking.LEVEL_NODE]
5976 self._LockInstancesNodes()
5978 def BuildHooksEnv(self):
5981 This runs on master, primary and secondary nodes of the instance.
5984 instance = self._migrater.instance
5985 source_node = instance.primary_node
5986 target_node = self._migrater.target_node
5988 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5989 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5990 "OLD_PRIMARY": source_node,
5991 "NEW_PRIMARY": target_node,
5994 if instance.disk_template in constants.DTS_INT_MIRROR:
5995 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5996 env["NEW_SECONDARY"] = source_node
5998 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6000 env.update(_BuildInstanceHookEnvByObject(self, instance))
6004 def BuildHooksNodes(self):
6005 """Build hooks nodes.
6008 instance = self._migrater.instance
6009 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6010 return (nl, nl + [instance.primary_node])
6013 class LUInstanceMigrate(LogicalUnit):
6014 """Migrate an instance.
6016 This is migration without shutting down, compared to the failover,
6017 which is done with shutdown.
6020 HPATH = "instance-migrate"
6021 HTYPE = constants.HTYPE_INSTANCE
6024 def ExpandNames(self):
6025 self._ExpandAndLockInstance()
6027 if self.op.target_node is not None:
6028 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6030 self.needed_locks[locking.LEVEL_NODE] = []
6031 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6033 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6034 cleanup=self.op.cleanup,
6035 iallocator=self.op.iallocator,
6036 target_node=self.op.target_node,
6038 fallback=self.op.allow_failover)
6039 self.tasklets = [self._migrater]
6041 def DeclareLocks(self, level):
6042 if level == locking.LEVEL_NODE:
6043 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6044 if instance.disk_template in constants.DTS_EXT_MIRROR:
6045 if self.op.target_node is None:
6046 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6048 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6049 self.op.target_node]
6050 del self.recalculate_locks[locking.LEVEL_NODE]
6052 self._LockInstancesNodes()
6054 def BuildHooksEnv(self):
6057 This runs on master, primary and secondary nodes of the instance.
6060 instance = self._migrater.instance
6061 source_node = instance.primary_node
6062 target_node = self._migrater.target_node
6063 env = _BuildInstanceHookEnvByObject(self, instance)
6065 "MIGRATE_LIVE": self._migrater.live,
6066 "MIGRATE_CLEANUP": self.op.cleanup,
6067 "OLD_PRIMARY": source_node,
6068 "NEW_PRIMARY": target_node,
6071 if instance.disk_template in constants.DTS_INT_MIRROR:
6072 env["OLD_SECONDARY"] = target_node
6073 env["NEW_SECONDARY"] = source_node
6075 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6079 def BuildHooksNodes(self):
6080 """Build hooks nodes.
6083 instance = self._migrater.instance
6084 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6085 return (nl, nl + [instance.primary_node])
6088 class LUInstanceMove(LogicalUnit):
6089 """Move an instance by data-copying.
6092 HPATH = "instance-move"
6093 HTYPE = constants.HTYPE_INSTANCE
6096 def ExpandNames(self):
6097 self._ExpandAndLockInstance()
6098 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6099 self.op.target_node = target_node
6100 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6101 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6103 def DeclareLocks(self, level):
6104 if level == locking.LEVEL_NODE:
6105 self._LockInstancesNodes(primary_only=True)
6107 def BuildHooksEnv(self):
6110 This runs on master, primary and secondary nodes of the instance.
6114 "TARGET_NODE": self.op.target_node,
6115 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6117 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6120 def BuildHooksNodes(self):
6121 """Build hooks nodes.
6125 self.cfg.GetMasterNode(),
6126 self.instance.primary_node,
6127 self.op.target_node,
6131 def CheckPrereq(self):
6132 """Check prerequisites.
6134 This checks that the instance is in the cluster.
6137 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6138 assert self.instance is not None, \
6139 "Cannot retrieve locked instance %s" % self.op.instance_name
6141 node = self.cfg.GetNodeInfo(self.op.target_node)
6142 assert node is not None, \
6143 "Cannot retrieve locked node %s" % self.op.target_node
6145 self.target_node = target_node = node.name
6147 if target_node == instance.primary_node:
6148 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6149 (instance.name, target_node),
6152 bep = self.cfg.GetClusterInfo().FillBE(instance)
6154 for idx, dsk in enumerate(instance.disks):
6155 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6156 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6157 " cannot copy" % idx, errors.ECODE_STATE)
6159 _CheckNodeOnline(self, target_node)
6160 _CheckNodeNotDrained(self, target_node)
6161 _CheckNodeVmCapable(self, target_node)
6163 if instance.admin_up:
6164 # check memory requirements on the secondary node
6165 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6166 instance.name, bep[constants.BE_MEMORY],
6167 instance.hypervisor)
6169 self.LogInfo("Not checking memory on the secondary node as"
6170 " instance will not be started")
6172 # check bridge existance
6173 _CheckInstanceBridgesExist(self, instance, node=target_node)
6175 def Exec(self, feedback_fn):
6176 """Move an instance.
6178 The move is done by shutting it down on its present node, copying
6179 the data over (slow) and starting it on the new node.
6182 instance = self.instance
6184 source_node = instance.primary_node
6185 target_node = self.target_node
6187 self.LogInfo("Shutting down instance %s on source node %s",
6188 instance.name, source_node)
6190 result = self.rpc.call_instance_shutdown(source_node, instance,
6191 self.op.shutdown_timeout)
6192 msg = result.fail_msg
6194 if self.op.ignore_consistency:
6195 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6196 " Proceeding anyway. Please make sure node"
6197 " %s is down. Error details: %s",
6198 instance.name, source_node, source_node, msg)
6200 raise errors.OpExecError("Could not shutdown instance %s on"
6202 (instance.name, source_node, msg))
6204 # create the target disks
6206 _CreateDisks(self, instance, target_node=target_node)
6207 except errors.OpExecError:
6208 self.LogWarning("Device creation failed, reverting...")
6210 _RemoveDisks(self, instance, target_node=target_node)
6212 self.cfg.ReleaseDRBDMinors(instance.name)
6215 cluster_name = self.cfg.GetClusterInfo().cluster_name
6218 # activate, get path, copy the data over
6219 for idx, disk in enumerate(instance.disks):
6220 self.LogInfo("Copying data for disk %d", idx)
6221 result = self.rpc.call_blockdev_assemble(target_node, disk,
6222 instance.name, True, idx)
6224 self.LogWarning("Can't assemble newly created disk %d: %s",
6225 idx, result.fail_msg)
6226 errs.append(result.fail_msg)
6228 dev_path = result.payload
6229 result = self.rpc.call_blockdev_export(source_node, disk,
6230 target_node, dev_path,
6233 self.LogWarning("Can't copy data over for disk %d: %s",
6234 idx, result.fail_msg)
6235 errs.append(result.fail_msg)
6239 self.LogWarning("Some disks failed to copy, aborting")
6241 _RemoveDisks(self, instance, target_node=target_node)
6243 self.cfg.ReleaseDRBDMinors(instance.name)
6244 raise errors.OpExecError("Errors during disk copy: %s" %
6247 instance.primary_node = target_node
6248 self.cfg.Update(instance, feedback_fn)
6250 self.LogInfo("Removing the disks on the original node")
6251 _RemoveDisks(self, instance, target_node=source_node)
6253 # Only start the instance if it's marked as up
6254 if instance.admin_up:
6255 self.LogInfo("Starting instance %s on node %s",
6256 instance.name, target_node)
6258 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6259 ignore_secondaries=True)
6261 _ShutdownInstanceDisks(self, instance)
6262 raise errors.OpExecError("Can't activate the instance's disks")
6264 result = self.rpc.call_instance_start(target_node, instance, None, None)
6265 msg = result.fail_msg
6267 _ShutdownInstanceDisks(self, instance)
6268 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6269 (instance.name, target_node, msg))
6272 class LUNodeMigrate(LogicalUnit):
6273 """Migrate all instances from a node.
6276 HPATH = "node-migrate"
6277 HTYPE = constants.HTYPE_NODE
6280 def CheckArguments(self):
6281 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6283 def ExpandNames(self):
6284 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6286 self.needed_locks = {}
6288 # Create tasklets for migrating instances for all instances on this node
6292 self.lock_all_nodes = False
6294 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6295 logging.debug("Migrating instance %s", inst.name)
6296 names.append(inst.name)
6298 tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6299 iallocator=self.op.iallocator,
6302 if inst.disk_template in constants.DTS_EXT_MIRROR:
6303 # We need to lock all nodes, as the iallocator will choose the
6304 # destination nodes afterwards
6305 self.lock_all_nodes = True
6307 self.tasklets = tasklets
6309 # Declare node locks
6310 if self.lock_all_nodes:
6311 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6313 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6314 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6316 # Declare instance locks
6317 self.needed_locks[locking.LEVEL_INSTANCE] = names
6319 def DeclareLocks(self, level):
6320 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6321 self._LockInstancesNodes()
6323 def BuildHooksEnv(self):
6326 This runs on the master, the primary and all the secondaries.
6330 "NODE_NAME": self.op.node_name,
6333 def BuildHooksNodes(self):
6334 """Build hooks nodes.
6337 nl = [self.cfg.GetMasterNode()]
6341 class TLMigrateInstance(Tasklet):
6342 """Tasklet class for instance migration.
6345 @ivar live: whether the migration will be done live or non-live;
6346 this variable is initalized only after CheckPrereq has run
6347 @type cleanup: boolean
6348 @ivar cleanup: Wheater we cleanup from a failed migration
6349 @type iallocator: string
6350 @ivar iallocator: The iallocator used to determine target_node
6351 @type target_node: string
6352 @ivar target_node: If given, the target_node to reallocate the instance to
6353 @type failover: boolean
6354 @ivar failover: Whether operation results in failover or migration
6355 @type fallback: boolean
6356 @ivar fallback: Whether fallback to failover is allowed if migration not
6358 @type ignore_consistency: boolean
6359 @ivar ignore_consistency: Wheter we should ignore consistency between source
6361 @type shutdown_timeout: int
6362 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6365 def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6366 target_node=None, failover=False, fallback=False,
6367 ignore_consistency=False,
6368 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6369 """Initializes this class.
6372 Tasklet.__init__(self, lu)
6375 self.instance_name = instance_name
6376 self.cleanup = cleanup
6377 self.live = False # will be overridden later
6378 self.iallocator = iallocator
6379 self.target_node = target_node
6380 self.failover = failover
6381 self.fallback = fallback
6382 self.ignore_consistency = ignore_consistency
6383 self.shutdown_timeout = shutdown_timeout
6385 def CheckPrereq(self):
6386 """Check prerequisites.
6388 This checks that the instance is in the cluster.
6391 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6392 instance = self.cfg.GetInstanceInfo(instance_name)
6393 assert instance is not None
6394 self.instance = instance
6396 if (not self.cleanup and not instance.admin_up and not self.failover and
6398 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6400 self.failover = True
6402 if instance.disk_template not in constants.DTS_MIRRORED:
6407 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6408 " %s" % (instance.disk_template, text),
6411 if instance.disk_template in constants.DTS_EXT_MIRROR:
6412 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6415 self._RunAllocator()
6417 # self.target_node is already populated, either directly or by the
6419 target_node = self.target_node
6421 if len(self.lu.tasklets) == 1:
6422 # It is safe to remove locks only when we're the only tasklet in the LU
6423 nodes_keep = [instance.primary_node, self.target_node]
6424 nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6425 if node not in nodes_keep]
6426 self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6427 self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6430 secondary_nodes = instance.secondary_nodes
6431 if not secondary_nodes:
6432 raise errors.ConfigurationError("No secondary node but using"
6433 " %s disk template" %
6434 instance.disk_template)
6435 target_node = secondary_nodes[0]
6436 if self.iallocator or (self.target_node and
6437 self.target_node != target_node):
6439 text = "failed over"
6442 raise errors.OpPrereqError("Instances with disk template %s cannot"
6443 " be %s over to arbitrary nodes"
6444 " (neither an iallocator nor a target"
6445 " node can be passed)" %
6446 (text, instance.disk_template),
6449 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6451 # check memory requirements on the secondary node
6452 if not self.failover or instance.admin_up:
6453 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6454 instance.name, i_be[constants.BE_MEMORY],
6455 instance.hypervisor)
6457 self.lu.LogInfo("Not checking memory on the secondary node as"
6458 " instance will not be started")
6460 # check bridge existance
6461 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6463 if not self.cleanup:
6464 _CheckNodeNotDrained(self.lu, target_node)
6465 if not self.failover:
6466 result = self.rpc.call_instance_migratable(instance.primary_node,
6468 if result.fail_msg and self.fallback:
6469 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6471 self.failover = True
6473 result.Raise("Can't migrate, please use failover",
6474 prereq=True, ecode=errors.ECODE_STATE)
6476 assert not (self.failover and self.cleanup)
6478 def _RunAllocator(self):
6479 """Run the allocator based on input opcode.
6482 ial = IAllocator(self.cfg, self.rpc,
6483 mode=constants.IALLOCATOR_MODE_RELOC,
6484 name=self.instance_name,
6485 # TODO See why hail breaks with a single node below
6486 relocate_from=[self.instance.primary_node,
6487 self.instance.primary_node],
6490 ial.Run(self.iallocator)
6493 raise errors.OpPrereqError("Can't compute nodes using"
6494 " iallocator '%s': %s" %
6495 (self.iallocator, ial.info),
6497 if len(ial.result) != ial.required_nodes:
6498 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6499 " of nodes (%s), required %s" %
6500 (self.iallocator, len(ial.result),
6501 ial.required_nodes), errors.ECODE_FAULT)
6502 self.target_node = ial.result[0]
6503 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6504 self.instance_name, self.iallocator,
6505 utils.CommaJoin(ial.result))
6507 if not self.failover:
6508 if self.lu.op.live is not None and self.lu.op.mode is not None:
6509 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6510 " parameters are accepted",
6512 if self.lu.op.live is not None:
6514 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6516 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6517 # reset the 'live' parameter to None so that repeated
6518 # invocations of CheckPrereq do not raise an exception
6519 self.lu.op.live = None
6520 elif self.lu.op.mode is None:
6521 # read the default value from the hypervisor
6522 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6524 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6526 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6528 # Failover is never live
6531 def _WaitUntilSync(self):
6532 """Poll with custom rpc for disk sync.
6534 This uses our own step-based rpc call.
6537 self.feedback_fn("* wait until resync is done")
6541 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6543 self.instance.disks)
6545 for node, nres in result.items():
6546 nres.Raise("Cannot resync disks on node %s" % node)
6547 node_done, node_percent = nres.payload
6548 all_done = all_done and node_done
6549 if node_percent is not None:
6550 min_percent = min(min_percent, node_percent)
6552 if min_percent < 100:
6553 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6556 def _EnsureSecondary(self, node):
6557 """Demote a node to secondary.
6560 self.feedback_fn("* switching node %s to secondary mode" % node)
6562 for dev in self.instance.disks:
6563 self.cfg.SetDiskID(dev, node)
6565 result = self.rpc.call_blockdev_close(node, self.instance.name,
6566 self.instance.disks)
6567 result.Raise("Cannot change disk to secondary on node %s" % node)
6569 def _GoStandalone(self):
6570 """Disconnect from the network.
6573 self.feedback_fn("* changing into standalone mode")
6574 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6575 self.instance.disks)
6576 for node, nres in result.items():
6577 nres.Raise("Cannot disconnect disks node %s" % node)
6579 def _GoReconnect(self, multimaster):
6580 """Reconnect to the network.
6586 msg = "single-master"
6587 self.feedback_fn("* changing disks into %s mode" % msg)
6588 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6589 self.instance.disks,
6590 self.instance.name, multimaster)
6591 for node, nres in result.items():
6592 nres.Raise("Cannot change disks config on node %s" % node)
6594 def _ExecCleanup(self):
6595 """Try to cleanup after a failed migration.
6597 The cleanup is done by:
6598 - check that the instance is running only on one node
6599 (and update the config if needed)
6600 - change disks on its secondary node to secondary
6601 - wait until disks are fully synchronized
6602 - disconnect from the network
6603 - change disks into single-master mode
6604 - wait again until disks are fully synchronized
6607 instance = self.instance
6608 target_node = self.target_node
6609 source_node = self.source_node
6611 # check running on only one node
6612 self.feedback_fn("* checking where the instance actually runs"
6613 " (if this hangs, the hypervisor might be in"
6615 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6616 for node, result in ins_l.items():
6617 result.Raise("Can't contact node %s" % node)
6619 runningon_source = instance.name in ins_l[source_node].payload
6620 runningon_target = instance.name in ins_l[target_node].payload
6622 if runningon_source and runningon_target:
6623 raise errors.OpExecError("Instance seems to be running on two nodes,"
6624 " or the hypervisor is confused. You will have"
6625 " to ensure manually that it runs only on one"
6626 " and restart this operation.")
6628 if not (runningon_source or runningon_target):
6629 raise errors.OpExecError("Instance does not seem to be running at all."
6630 " In this case, it's safer to repair by"
6631 " running 'gnt-instance stop' to ensure disk"
6632 " shutdown, and then restarting it.")
6634 if runningon_target:
6635 # the migration has actually succeeded, we need to update the config
6636 self.feedback_fn("* instance running on secondary node (%s),"
6637 " updating config" % target_node)
6638 instance.primary_node = target_node
6639 self.cfg.Update(instance, self.feedback_fn)
6640 demoted_node = source_node
6642 self.feedback_fn("* instance confirmed to be running on its"
6643 " primary node (%s)" % source_node)
6644 demoted_node = target_node
6646 if instance.disk_template in constants.DTS_INT_MIRROR:
6647 self._EnsureSecondary(demoted_node)
6649 self._WaitUntilSync()
6650 except errors.OpExecError:
6651 # we ignore here errors, since if the device is standalone, it
6652 # won't be able to sync
6654 self._GoStandalone()
6655 self._GoReconnect(False)
6656 self._WaitUntilSync()
6658 self.feedback_fn("* done")
6660 def _RevertDiskStatus(self):
6661 """Try to revert the disk status after a failed migration.
6664 target_node = self.target_node
6665 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6669 self._EnsureSecondary(target_node)
6670 self._GoStandalone()
6671 self._GoReconnect(False)
6672 self._WaitUntilSync()
6673 except errors.OpExecError, err:
6674 self.lu.LogWarning("Migration failed and I can't reconnect the"
6675 " drives: error '%s'\n"
6676 "Please look and recover the instance status" %
6679 def _AbortMigration(self):
6680 """Call the hypervisor code to abort a started migration.
6683 instance = self.instance
6684 target_node = self.target_node
6685 migration_info = self.migration_info
6687 abort_result = self.rpc.call_finalize_migration(target_node,
6691 abort_msg = abort_result.fail_msg
6693 logging.error("Aborting migration failed on target node %s: %s",
6694 target_node, abort_msg)
6695 # Don't raise an exception here, as we stil have to try to revert the
6696 # disk status, even if this step failed.
6698 def _ExecMigration(self):
6699 """Migrate an instance.
6701 The migrate is done by:
6702 - change the disks into dual-master mode
6703 - wait until disks are fully synchronized again
6704 - migrate the instance
6705 - change disks on the new secondary node (the old primary) to secondary
6706 - wait until disks are fully synchronized
6707 - change disks into single-master mode
6710 instance = self.instance
6711 target_node = self.target_node
6712 source_node = self.source_node
6714 self.feedback_fn("* checking disk consistency between source and target")
6715 for dev in instance.disks:
6716 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6717 raise errors.OpExecError("Disk %s is degraded or not fully"
6718 " synchronized on target node,"
6719 " aborting migrate." % dev.iv_name)
6721 # First get the migration information from the remote node
6722 result = self.rpc.call_migration_info(source_node, instance)
6723 msg = result.fail_msg
6725 log_err = ("Failed fetching source migration information from %s: %s" %
6727 logging.error(log_err)
6728 raise errors.OpExecError(log_err)
6730 self.migration_info = migration_info = result.payload
6732 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6733 # Then switch the disks to master/master mode
6734 self._EnsureSecondary(target_node)
6735 self._GoStandalone()
6736 self._GoReconnect(True)
6737 self._WaitUntilSync()
6739 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6740 result = self.rpc.call_accept_instance(target_node,
6743 self.nodes_ip[target_node])
6745 msg = result.fail_msg
6747 logging.error("Instance pre-migration failed, trying to revert"
6748 " disk status: %s", msg)
6749 self.feedback_fn("Pre-migration failed, aborting")
6750 self._AbortMigration()
6751 self._RevertDiskStatus()
6752 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6753 (instance.name, msg))
6755 self.feedback_fn("* migrating instance to %s" % target_node)
6757 result = self.rpc.call_instance_migrate(source_node, instance,
6758 self.nodes_ip[target_node],
6760 msg = result.fail_msg
6762 logging.error("Instance migration failed, trying to revert"
6763 " disk status: %s", msg)
6764 self.feedback_fn("Migration failed, aborting")
6765 self._AbortMigration()
6766 self._RevertDiskStatus()
6767 raise errors.OpExecError("Could not migrate instance %s: %s" %
6768 (instance.name, msg))
6771 instance.primary_node = target_node
6772 # distribute new instance config to the other nodes
6773 self.cfg.Update(instance, self.feedback_fn)
6775 result = self.rpc.call_finalize_migration(target_node,
6779 msg = result.fail_msg
6781 logging.error("Instance migration succeeded, but finalization failed:"
6783 raise errors.OpExecError("Could not finalize instance migration: %s" %
6786 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6787 self._EnsureSecondary(source_node)
6788 self._WaitUntilSync()
6789 self._GoStandalone()
6790 self._GoReconnect(False)
6791 self._WaitUntilSync()
6793 self.feedback_fn("* done")
6795 def _ExecFailover(self):
6796 """Failover an instance.
6798 The failover is done by shutting it down on its present node and
6799 starting it on the secondary.
6802 instance = self.instance
6803 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6805 source_node = instance.primary_node
6806 target_node = self.target_node
6808 if instance.admin_up:
6809 self.feedback_fn("* checking disk consistency between source and target")
6810 for dev in instance.disks:
6811 # for drbd, these are drbd over lvm
6812 if not _CheckDiskConsistency(self, dev, target_node, False):
6813 if not self.ignore_consistency:
6814 raise errors.OpExecError("Disk %s is degraded on target node,"
6815 " aborting failover." % dev.iv_name)
6817 self.feedback_fn("* not checking disk consistency as instance is not"
6820 self.feedback_fn("* shutting down instance on source node")
6821 logging.info("Shutting down instance %s on node %s",
6822 instance.name, source_node)
6824 result = self.rpc.call_instance_shutdown(source_node, instance,
6825 self.shutdown_timeout)
6826 msg = result.fail_msg
6828 if self.ignore_consistency or primary_node.offline:
6829 self.lu.LogWarning("Could not shutdown instance %s on node %s."
6830 " Proceeding anyway. Please make sure node"
6831 " %s is down. Error details: %s",
6832 instance.name, source_node, source_node, msg)
6834 raise errors.OpExecError("Could not shutdown instance %s on"
6836 (instance.name, source_node, msg))
6838 self.feedback_fn("* deactivating the instance's disks on source node")
6839 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6840 raise errors.OpExecError("Can't shut down the instance's disks.")
6842 instance.primary_node = target_node
6843 # distribute new instance config to the other nodes
6844 self.cfg.Update(instance, self.feedback_fn)
6846 # Only start the instance if it's marked as up
6847 if instance.admin_up:
6848 self.feedback_fn("* activating the instance's disks on target node")
6849 logging.info("Starting instance %s on node %s",
6850 instance.name, target_node)
6852 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6853 ignore_secondaries=True)
6855 _ShutdownInstanceDisks(self, instance)
6856 raise errors.OpExecError("Can't activate the instance's disks")
6858 self.feedback_fn("* starting the instance on the target node")
6859 result = self.rpc.call_instance_start(target_node, instance, None, None)
6860 msg = result.fail_msg
6862 _ShutdownInstanceDisks(self, instance)
6863 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6864 (instance.name, target_node, msg))
6866 def Exec(self, feedback_fn):
6867 """Perform the migration.
6870 self.feedback_fn = feedback_fn
6871 self.source_node = self.instance.primary_node
6873 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6874 if self.instance.disk_template in constants.DTS_INT_MIRROR:
6875 self.target_node = self.instance.secondary_nodes[0]
6876 # Otherwise self.target_node has been populated either
6877 # directly, or through an iallocator.
6879 self.all_nodes = [self.source_node, self.target_node]
6881 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6882 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6886 feedback_fn("Failover instance %s" % self.instance.name)
6887 self._ExecFailover()
6889 feedback_fn("Migrating instance %s" % self.instance.name)
6892 return self._ExecCleanup()
6894 return self._ExecMigration()
6897 def _CreateBlockDev(lu, node, instance, device, force_create,
6899 """Create a tree of block devices on a given node.
6901 If this device type has to be created on secondaries, create it and
6904 If not, just recurse to children keeping the same 'force' value.
6906 @param lu: the lu on whose behalf we execute
6907 @param node: the node on which to create the device
6908 @type instance: L{objects.Instance}
6909 @param instance: the instance which owns the device
6910 @type device: L{objects.Disk}
6911 @param device: the device to create
6912 @type force_create: boolean
6913 @param force_create: whether to force creation of this device; this
6914 will be change to True whenever we find a device which has
6915 CreateOnSecondary() attribute
6916 @param info: the extra 'metadata' we should attach to the device
6917 (this will be represented as a LVM tag)
6918 @type force_open: boolean
6919 @param force_open: this parameter will be passes to the
6920 L{backend.BlockdevCreate} function where it specifies
6921 whether we run on primary or not, and it affects both
6922 the child assembly and the device own Open() execution
6925 if device.CreateOnSecondary():
6929 for child in device.children:
6930 _CreateBlockDev(lu, node, instance, child, force_create,
6933 if not force_create:
6936 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6939 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6940 """Create a single block device on a given node.
6942 This will not recurse over children of the device, so they must be
6945 @param lu: the lu on whose behalf we execute
6946 @param node: the node on which to create the device
6947 @type instance: L{objects.Instance}
6948 @param instance: the instance which owns the device
6949 @type device: L{objects.Disk}
6950 @param device: the device to create
6951 @param info: the extra 'metadata' we should attach to the device
6952 (this will be represented as a LVM tag)
6953 @type force_open: boolean
6954 @param force_open: this parameter will be passes to the
6955 L{backend.BlockdevCreate} function where it specifies
6956 whether we run on primary or not, and it affects both
6957 the child assembly and the device own Open() execution
6960 lu.cfg.SetDiskID(device, node)
6961 result = lu.rpc.call_blockdev_create(node, device, device.size,
6962 instance.name, force_open, info)
6963 result.Raise("Can't create block device %s on"
6964 " node %s for instance %s" % (device, node, instance.name))
6965 if device.physical_id is None:
6966 device.physical_id = result.payload
6969 def _GenerateUniqueNames(lu, exts):
6970 """Generate a suitable LV name.
6972 This will generate a logical volume name for the given instance.
6977 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6978 results.append("%s%s" % (new_id, val))
6982 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6984 """Generate a drbd8 device complete with its children.
6987 port = lu.cfg.AllocatePort()
6988 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6989 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6990 logical_id=(vgname, names[0]))
6991 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6992 logical_id=(vgname, names[1]))
6993 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6994 logical_id=(primary, secondary, port,
6997 children=[dev_data, dev_meta],
7002 def _GenerateDiskTemplate(lu, template_name,
7003 instance_name, primary_node,
7004 secondary_nodes, disk_info,
7005 file_storage_dir, file_driver,
7006 base_index, feedback_fn):
7007 """Generate the entire disk layout for a given template type.
7010 #TODO: compute space requirements
7012 vgname = lu.cfg.GetVGName()
7013 disk_count = len(disk_info)
7015 if template_name == constants.DT_DISKLESS:
7017 elif template_name == constants.DT_PLAIN:
7018 if len(secondary_nodes) != 0:
7019 raise errors.ProgrammerError("Wrong template configuration")
7021 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7022 for i in range(disk_count)])
7023 for idx, disk in enumerate(disk_info):
7024 disk_index = idx + base_index
7025 vg = disk.get(constants.IDISK_VG, vgname)
7026 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7027 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7028 size=disk[constants.IDISK_SIZE],
7029 logical_id=(vg, names[idx]),
7030 iv_name="disk/%d" % disk_index,
7031 mode=disk[constants.IDISK_MODE])
7032 disks.append(disk_dev)
7033 elif template_name == constants.DT_DRBD8:
7034 if len(secondary_nodes) != 1:
7035 raise errors.ProgrammerError("Wrong template configuration")
7036 remote_node = secondary_nodes[0]
7037 minors = lu.cfg.AllocateDRBDMinor(
7038 [primary_node, remote_node] * len(disk_info), instance_name)
7041 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7042 for i in range(disk_count)]):
7043 names.append(lv_prefix + "_data")
7044 names.append(lv_prefix + "_meta")
7045 for idx, disk in enumerate(disk_info):
7046 disk_index = idx + base_index
7047 vg = disk.get(constants.IDISK_VG, vgname)
7048 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7049 disk[constants.IDISK_SIZE], vg,
7050 names[idx * 2:idx * 2 + 2],
7051 "disk/%d" % disk_index,
7052 minors[idx * 2], minors[idx * 2 + 1])
7053 disk_dev.mode = disk[constants.IDISK_MODE]
7054 disks.append(disk_dev)
7055 elif template_name == constants.DT_FILE:
7056 if len(secondary_nodes) != 0:
7057 raise errors.ProgrammerError("Wrong template configuration")
7059 opcodes.RequireFileStorage()
7061 for idx, disk in enumerate(disk_info):
7062 disk_index = idx + base_index
7063 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7064 size=disk[constants.IDISK_SIZE],
7065 iv_name="disk/%d" % disk_index,
7066 logical_id=(file_driver,
7067 "%s/disk%d" % (file_storage_dir,
7069 mode=disk[constants.IDISK_MODE])
7070 disks.append(disk_dev)
7071 elif template_name == constants.DT_SHARED_FILE:
7072 if len(secondary_nodes) != 0:
7073 raise errors.ProgrammerError("Wrong template configuration")
7075 opcodes.RequireSharedFileStorage()
7077 for idx, disk in enumerate(disk_info):
7078 disk_index = idx + base_index
7079 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7080 size=disk[constants.IDISK_SIZE],
7081 iv_name="disk/%d" % disk_index,
7082 logical_id=(file_driver,
7083 "%s/disk%d" % (file_storage_dir,
7085 mode=disk[constants.IDISK_MODE])
7086 disks.append(disk_dev)
7087 elif template_name == constants.DT_BLOCK:
7088 if len(secondary_nodes) != 0:
7089 raise errors.ProgrammerError("Wrong template configuration")
7091 for idx, disk in enumerate(disk_info):
7092 disk_index = idx + base_index
7093 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7094 size=disk[constants.IDISK_SIZE],
7095 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7096 disk[constants.IDISK_ADOPT]),
7097 iv_name="disk/%d" % disk_index,
7098 mode=disk[constants.IDISK_MODE])
7099 disks.append(disk_dev)
7102 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7106 def _GetInstanceInfoText(instance):
7107 """Compute that text that should be added to the disk's metadata.
7110 return "originstname+%s" % instance.name
7113 def _CalcEta(time_taken, written, total_size):
7114 """Calculates the ETA based on size written and total size.
7116 @param time_taken: The time taken so far
7117 @param written: amount written so far
7118 @param total_size: The total size of data to be written
7119 @return: The remaining time in seconds
7122 avg_time = time_taken / float(written)
7123 return (total_size - written) * avg_time
7126 def _WipeDisks(lu, instance):
7127 """Wipes instance disks.
7129 @type lu: L{LogicalUnit}
7130 @param lu: the logical unit on whose behalf we execute
7131 @type instance: L{objects.Instance}
7132 @param instance: the instance whose disks we should create
7133 @return: the success of the wipe
7136 node = instance.primary_node
7138 for device in instance.disks:
7139 lu.cfg.SetDiskID(device, node)
7141 logging.info("Pause sync of instance %s disks", instance.name)
7142 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7144 for idx, success in enumerate(result.payload):
7146 logging.warn("pause-sync of instance %s for disks %d failed",
7150 for idx, device in enumerate(instance.disks):
7151 lu.LogInfo("* Wiping disk %d", idx)
7152 logging.info("Wiping disk %d for instance %s, node %s",
7153 idx, instance.name, node)
7155 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7156 # MAX_WIPE_CHUNK at max
7157 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7158 constants.MIN_WIPE_CHUNK_PERCENT)
7163 start_time = time.time()
7165 while offset < size:
7166 wipe_size = min(wipe_chunk_size, size - offset)
7167 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7168 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7169 (idx, offset, wipe_size))
7172 if now - last_output >= 60:
7173 eta = _CalcEta(now - start_time, offset, size)
7174 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7175 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7178 logging.info("Resume sync of instance %s disks", instance.name)
7180 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7182 for idx, success in enumerate(result.payload):
7184 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7185 " look at the status and troubleshoot the issue.", idx)
7186 logging.warn("resume-sync of instance %s for disks %d failed",
7190 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7191 """Create all disks for an instance.
7193 This abstracts away some work from AddInstance.
7195 @type lu: L{LogicalUnit}
7196 @param lu: the logical unit on whose behalf we execute
7197 @type instance: L{objects.Instance}
7198 @param instance: the instance whose disks we should create
7200 @param to_skip: list of indices to skip
7201 @type target_node: string
7202 @param target_node: if passed, overrides the target node for creation
7204 @return: the success of the creation
7207 info = _GetInstanceInfoText(instance)
7208 if target_node is None:
7209 pnode = instance.primary_node
7210 all_nodes = instance.all_nodes
7215 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7216 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7217 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7219 result.Raise("Failed to create directory '%s' on"
7220 " node %s" % (file_storage_dir, pnode))
7222 # Note: this needs to be kept in sync with adding of disks in
7223 # LUInstanceSetParams
7224 for idx, device in enumerate(instance.disks):
7225 if to_skip and idx in to_skip:
7227 logging.info("Creating volume %s for instance %s",
7228 device.iv_name, instance.name)
7230 for node in all_nodes:
7231 f_create = node == pnode
7232 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7235 def _RemoveDisks(lu, instance, target_node=None):
7236 """Remove all disks for an instance.
7238 This abstracts away some work from `AddInstance()` and
7239 `RemoveInstance()`. Note that in case some of the devices couldn't
7240 be removed, the removal will continue with the other ones (compare
7241 with `_CreateDisks()`).
7243 @type lu: L{LogicalUnit}
7244 @param lu: the logical unit on whose behalf we execute
7245 @type instance: L{objects.Instance}
7246 @param instance: the instance whose disks we should remove
7247 @type target_node: string
7248 @param target_node: used to override the node on which to remove the disks
7250 @return: the success of the removal
7253 logging.info("Removing block devices for instance %s", instance.name)
7256 for device in instance.disks:
7258 edata = [(target_node, device)]
7260 edata = device.ComputeNodeTree(instance.primary_node)
7261 for node, disk in edata:
7262 lu.cfg.SetDiskID(disk, node)
7263 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7265 lu.LogWarning("Could not remove block device %s on node %s,"
7266 " continuing anyway: %s", device.iv_name, node, msg)
7269 if instance.disk_template == constants.DT_FILE:
7270 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7274 tgt = instance.primary_node
7275 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7277 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7278 file_storage_dir, instance.primary_node, result.fail_msg)
7284 def _ComputeDiskSizePerVG(disk_template, disks):
7285 """Compute disk size requirements in the volume group
7288 def _compute(disks, payload):
7289 """Universal algorithm.
7294 vgs[disk[constants.IDISK_VG]] = \
7295 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7299 # Required free disk space as a function of disk and swap space
7301 constants.DT_DISKLESS: {},
7302 constants.DT_PLAIN: _compute(disks, 0),
7303 # 128 MB are added for drbd metadata for each disk
7304 constants.DT_DRBD8: _compute(disks, 128),
7305 constants.DT_FILE: {},
7306 constants.DT_SHARED_FILE: {},
7309 if disk_template not in req_size_dict:
7310 raise errors.ProgrammerError("Disk template '%s' size requirement"
7311 " is unknown" % disk_template)
7313 return req_size_dict[disk_template]
7316 def _ComputeDiskSize(disk_template, disks):
7317 """Compute disk size requirements in the volume group
7320 # Required free disk space as a function of disk and swap space
7322 constants.DT_DISKLESS: None,
7323 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7324 # 128 MB are added for drbd metadata for each disk
7325 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7326 constants.DT_FILE: None,
7327 constants.DT_SHARED_FILE: 0,
7328 constants.DT_BLOCK: 0,
7331 if disk_template not in req_size_dict:
7332 raise errors.ProgrammerError("Disk template '%s' size requirement"
7333 " is unknown" % disk_template)
7335 return req_size_dict[disk_template]
7338 def _FilterVmNodes(lu, nodenames):
7339 """Filters out non-vm_capable nodes from a list.
7341 @type lu: L{LogicalUnit}
7342 @param lu: the logical unit for which we check
7343 @type nodenames: list
7344 @param nodenames: the list of nodes on which we should check
7346 @return: the list of vm-capable nodes
7349 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7350 return [name for name in nodenames if name not in vm_nodes]
7353 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7354 """Hypervisor parameter validation.
7356 This function abstract the hypervisor parameter validation to be
7357 used in both instance create and instance modify.
7359 @type lu: L{LogicalUnit}
7360 @param lu: the logical unit for which we check
7361 @type nodenames: list
7362 @param nodenames: the list of nodes on which we should check
7363 @type hvname: string
7364 @param hvname: the name of the hypervisor we should use
7365 @type hvparams: dict
7366 @param hvparams: the parameters which we need to check
7367 @raise errors.OpPrereqError: if the parameters are not valid
7370 nodenames = _FilterVmNodes(lu, nodenames)
7371 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7374 for node in nodenames:
7378 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7381 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7382 """OS parameters validation.
7384 @type lu: L{LogicalUnit}
7385 @param lu: the logical unit for which we check
7386 @type required: boolean
7387 @param required: whether the validation should fail if the OS is not
7389 @type nodenames: list
7390 @param nodenames: the list of nodes on which we should check
7391 @type osname: string
7392 @param osname: the name of the hypervisor we should use
7393 @type osparams: dict
7394 @param osparams: the parameters which we need to check
7395 @raise errors.OpPrereqError: if the parameters are not valid
7398 nodenames = _FilterVmNodes(lu, nodenames)
7399 result = lu.rpc.call_os_validate(required, nodenames, osname,
7400 [constants.OS_VALIDATE_PARAMETERS],
7402 for node, nres in result.items():
7403 # we don't check for offline cases since this should be run only
7404 # against the master node and/or an instance's nodes
7405 nres.Raise("OS Parameters validation failed on node %s" % node)
7406 if not nres.payload:
7407 lu.LogInfo("OS %s not found on node %s, validation skipped",
7411 class LUInstanceCreate(LogicalUnit):
7412 """Create an instance.
7415 HPATH = "instance-add"
7416 HTYPE = constants.HTYPE_INSTANCE
7419 def CheckArguments(self):
7423 # do not require name_check to ease forward/backward compatibility
7425 if self.op.no_install and self.op.start:
7426 self.LogInfo("No-installation mode selected, disabling startup")
7427 self.op.start = False
7428 # validate/normalize the instance name
7429 self.op.instance_name = \
7430 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7432 if self.op.ip_check and not self.op.name_check:
7433 # TODO: make the ip check more flexible and not depend on the name check
7434 raise errors.OpPrereqError("Cannot do ip check without a name check",
7437 # check nics' parameter names
7438 for nic in self.op.nics:
7439 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7441 # check disks. parameter names and consistent adopt/no-adopt strategy
7442 has_adopt = has_no_adopt = False
7443 for disk in self.op.disks:
7444 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7445 if constants.IDISK_ADOPT in disk:
7449 if has_adopt and has_no_adopt:
7450 raise errors.OpPrereqError("Either all disks are adopted or none is",
7453 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7454 raise errors.OpPrereqError("Disk adoption is not supported for the"
7455 " '%s' disk template" %
7456 self.op.disk_template,
7458 if self.op.iallocator is not None:
7459 raise errors.OpPrereqError("Disk adoption not allowed with an"
7460 " iallocator script", errors.ECODE_INVAL)
7461 if self.op.mode == constants.INSTANCE_IMPORT:
7462 raise errors.OpPrereqError("Disk adoption not allowed for"
7463 " instance import", errors.ECODE_INVAL)
7465 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7466 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7467 " but no 'adopt' parameter given" %
7468 self.op.disk_template,
7471 self.adopt_disks = has_adopt
7473 # instance name verification
7474 if self.op.name_check:
7475 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7476 self.op.instance_name = self.hostname1.name
7477 # used in CheckPrereq for ip ping check
7478 self.check_ip = self.hostname1.ip
7480 self.check_ip = None
7482 # file storage checks
7483 if (self.op.file_driver and
7484 not self.op.file_driver in constants.FILE_DRIVER):
7485 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7486 self.op.file_driver, errors.ECODE_INVAL)
7488 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7489 raise errors.OpPrereqError("File storage directory path not absolute",
7492 ### Node/iallocator related checks
7493 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7495 if self.op.pnode is not None:
7496 if self.op.disk_template in constants.DTS_INT_MIRROR:
7497 if self.op.snode is None:
7498 raise errors.OpPrereqError("The networked disk templates need"
7499 " a mirror node", errors.ECODE_INVAL)
7501 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7503 self.op.snode = None
7505 self._cds = _GetClusterDomainSecret()
7507 if self.op.mode == constants.INSTANCE_IMPORT:
7508 # On import force_variant must be True, because if we forced it at
7509 # initial install, our only chance when importing it back is that it
7511 self.op.force_variant = True
7513 if self.op.no_install:
7514 self.LogInfo("No-installation mode has no effect during import")
7516 elif self.op.mode == constants.INSTANCE_CREATE:
7517 if self.op.os_type is None:
7518 raise errors.OpPrereqError("No guest OS specified",
7520 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7521 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7522 " installation" % self.op.os_type,
7524 if self.op.disk_template is None:
7525 raise errors.OpPrereqError("No disk template specified",
7528 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7529 # Check handshake to ensure both clusters have the same domain secret
7530 src_handshake = self.op.source_handshake
7531 if not src_handshake:
7532 raise errors.OpPrereqError("Missing source handshake",
7535 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7538 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7541 # Load and check source CA
7542 self.source_x509_ca_pem = self.op.source_x509_ca
7543 if not self.source_x509_ca_pem:
7544 raise errors.OpPrereqError("Missing source X509 CA",
7548 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7550 except OpenSSL.crypto.Error, err:
7551 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7552 (err, ), errors.ECODE_INVAL)
7554 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7555 if errcode is not None:
7556 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7559 self.source_x509_ca = cert
7561 src_instance_name = self.op.source_instance_name
7562 if not src_instance_name:
7563 raise errors.OpPrereqError("Missing source instance name",
7566 self.source_instance_name = \
7567 netutils.GetHostname(name=src_instance_name).name
7570 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7571 self.op.mode, errors.ECODE_INVAL)
7573 def ExpandNames(self):
7574 """ExpandNames for CreateInstance.
7576 Figure out the right locks for instance creation.
7579 self.needed_locks = {}
7581 instance_name = self.op.instance_name
7582 # this is just a preventive check, but someone might still add this
7583 # instance in the meantime, and creation will fail at lock-add time
7584 if instance_name in self.cfg.GetInstanceList():
7585 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7586 instance_name, errors.ECODE_EXISTS)
7588 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7590 if self.op.iallocator:
7591 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7593 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7594 nodelist = [self.op.pnode]
7595 if self.op.snode is not None:
7596 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7597 nodelist.append(self.op.snode)
7598 self.needed_locks[locking.LEVEL_NODE] = nodelist
7600 # in case of import lock the source node too
7601 if self.op.mode == constants.INSTANCE_IMPORT:
7602 src_node = self.op.src_node
7603 src_path = self.op.src_path
7605 if src_path is None:
7606 self.op.src_path = src_path = self.op.instance_name
7608 if src_node is None:
7609 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7610 self.op.src_node = None
7611 if os.path.isabs(src_path):
7612 raise errors.OpPrereqError("Importing an instance from an absolute"
7613 " path requires a source node option.",
7616 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7617 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7618 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7619 if not os.path.isabs(src_path):
7620 self.op.src_path = src_path = \
7621 utils.PathJoin(constants.EXPORT_DIR, src_path)
7623 def _RunAllocator(self):
7624 """Run the allocator based on input opcode.
7627 nics = [n.ToDict() for n in self.nics]
7628 ial = IAllocator(self.cfg, self.rpc,
7629 mode=constants.IALLOCATOR_MODE_ALLOC,
7630 name=self.op.instance_name,
7631 disk_template=self.op.disk_template,
7634 vcpus=self.be_full[constants.BE_VCPUS],
7635 mem_size=self.be_full[constants.BE_MEMORY],
7638 hypervisor=self.op.hypervisor,
7641 ial.Run(self.op.iallocator)
7644 raise errors.OpPrereqError("Can't compute nodes using"
7645 " iallocator '%s': %s" %
7646 (self.op.iallocator, ial.info),
7648 if len(ial.result) != ial.required_nodes:
7649 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7650 " of nodes (%s), required %s" %
7651 (self.op.iallocator, len(ial.result),
7652 ial.required_nodes), errors.ECODE_FAULT)
7653 self.op.pnode = ial.result[0]
7654 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7655 self.op.instance_name, self.op.iallocator,
7656 utils.CommaJoin(ial.result))
7657 if ial.required_nodes == 2:
7658 self.op.snode = ial.result[1]
7660 def BuildHooksEnv(self):
7663 This runs on master, primary and secondary nodes of the instance.
7667 "ADD_MODE": self.op.mode,
7669 if self.op.mode == constants.INSTANCE_IMPORT:
7670 env["SRC_NODE"] = self.op.src_node
7671 env["SRC_PATH"] = self.op.src_path
7672 env["SRC_IMAGES"] = self.src_images
7674 env.update(_BuildInstanceHookEnv(
7675 name=self.op.instance_name,
7676 primary_node=self.op.pnode,
7677 secondary_nodes=self.secondaries,
7678 status=self.op.start,
7679 os_type=self.op.os_type,
7680 memory=self.be_full[constants.BE_MEMORY],
7681 vcpus=self.be_full[constants.BE_VCPUS],
7682 nics=_NICListToTuple(self, self.nics),
7683 disk_template=self.op.disk_template,
7684 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7685 for d in self.disks],
7688 hypervisor_name=self.op.hypervisor,
7693 def BuildHooksNodes(self):
7694 """Build hooks nodes.
7697 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7700 def _ReadExportInfo(self):
7701 """Reads the export information from disk.
7703 It will override the opcode source node and path with the actual
7704 information, if these two were not specified before.
7706 @return: the export information
7709 assert self.op.mode == constants.INSTANCE_IMPORT
7711 src_node = self.op.src_node
7712 src_path = self.op.src_path
7714 if src_node is None:
7715 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7716 exp_list = self.rpc.call_export_list(locked_nodes)
7718 for node in exp_list:
7719 if exp_list[node].fail_msg:
7721 if src_path in exp_list[node].payload:
7723 self.op.src_node = src_node = node
7724 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7728 raise errors.OpPrereqError("No export found for relative path %s" %
7729 src_path, errors.ECODE_INVAL)
7731 _CheckNodeOnline(self, src_node)
7732 result = self.rpc.call_export_info(src_node, src_path)
7733 result.Raise("No export or invalid export found in dir %s" % src_path)
7735 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7736 if not export_info.has_section(constants.INISECT_EXP):
7737 raise errors.ProgrammerError("Corrupted export config",
7738 errors.ECODE_ENVIRON)
7740 ei_version = export_info.get(constants.INISECT_EXP, "version")
7741 if (int(ei_version) != constants.EXPORT_VERSION):
7742 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7743 (ei_version, constants.EXPORT_VERSION),
7744 errors.ECODE_ENVIRON)
7747 def _ReadExportParams(self, einfo):
7748 """Use export parameters as defaults.
7750 In case the opcode doesn't specify (as in override) some instance
7751 parameters, then try to use them from the export information, if
7755 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7757 if self.op.disk_template is None:
7758 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7759 self.op.disk_template = einfo.get(constants.INISECT_INS,
7762 raise errors.OpPrereqError("No disk template specified and the export"
7763 " is missing the disk_template information",
7766 if not self.op.disks:
7767 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7769 # TODO: import the disk iv_name too
7770 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7771 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7772 disks.append({constants.IDISK_SIZE: disk_sz})
7773 self.op.disks = disks
7775 raise errors.OpPrereqError("No disk info specified and the export"
7776 " is missing the disk information",
7779 if (not self.op.nics and
7780 einfo.has_option(constants.INISECT_INS, "nic_count")):
7782 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7784 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7785 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7790 if (self.op.hypervisor is None and
7791 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7792 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7793 if einfo.has_section(constants.INISECT_HYP):
7794 # use the export parameters but do not override the ones
7795 # specified by the user
7796 for name, value in einfo.items(constants.INISECT_HYP):
7797 if name not in self.op.hvparams:
7798 self.op.hvparams[name] = value
7800 if einfo.has_section(constants.INISECT_BEP):
7801 # use the parameters, without overriding
7802 for name, value in einfo.items(constants.INISECT_BEP):
7803 if name not in self.op.beparams:
7804 self.op.beparams[name] = value
7806 # try to read the parameters old style, from the main section
7807 for name in constants.BES_PARAMETERS:
7808 if (name not in self.op.beparams and
7809 einfo.has_option(constants.INISECT_INS, name)):
7810 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7812 if einfo.has_section(constants.INISECT_OSP):
7813 # use the parameters, without overriding
7814 for name, value in einfo.items(constants.INISECT_OSP):
7815 if name not in self.op.osparams:
7816 self.op.osparams[name] = value
7818 def _RevertToDefaults(self, cluster):
7819 """Revert the instance parameters to the default values.
7823 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7824 for name in self.op.hvparams.keys():
7825 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7826 del self.op.hvparams[name]
7828 be_defs = cluster.SimpleFillBE({})
7829 for name in self.op.beparams.keys():
7830 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7831 del self.op.beparams[name]
7833 nic_defs = cluster.SimpleFillNIC({})
7834 for nic in self.op.nics:
7835 for name in constants.NICS_PARAMETERS:
7836 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7839 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7840 for name in self.op.osparams.keys():
7841 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7842 del self.op.osparams[name]
7844 def CheckPrereq(self):
7845 """Check prerequisites.
7848 if self.op.mode == constants.INSTANCE_IMPORT:
7849 export_info = self._ReadExportInfo()
7850 self._ReadExportParams(export_info)
7852 if (not self.cfg.GetVGName() and
7853 self.op.disk_template not in constants.DTS_NOT_LVM):
7854 raise errors.OpPrereqError("Cluster does not support lvm-based"
7855 " instances", errors.ECODE_STATE)
7857 if self.op.hypervisor is None:
7858 self.op.hypervisor = self.cfg.GetHypervisorType()
7860 cluster = self.cfg.GetClusterInfo()
7861 enabled_hvs = cluster.enabled_hypervisors
7862 if self.op.hypervisor not in enabled_hvs:
7863 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7864 " cluster (%s)" % (self.op.hypervisor,
7865 ",".join(enabled_hvs)),
7868 # check hypervisor parameter syntax (locally)
7869 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7870 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7872 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7873 hv_type.CheckParameterSyntax(filled_hvp)
7874 self.hv_full = filled_hvp
7875 # check that we don't specify global parameters on an instance
7876 _CheckGlobalHvParams(self.op.hvparams)
7878 # fill and remember the beparams dict
7879 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7880 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7882 # build os parameters
7883 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7885 # now that hvp/bep are in final format, let's reset to defaults,
7887 if self.op.identify_defaults:
7888 self._RevertToDefaults(cluster)
7892 for idx, nic in enumerate(self.op.nics):
7893 nic_mode_req = nic.get(constants.INIC_MODE, None)
7894 nic_mode = nic_mode_req
7895 if nic_mode is None:
7896 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7898 # in routed mode, for the first nic, the default ip is 'auto'
7899 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7900 default_ip_mode = constants.VALUE_AUTO
7902 default_ip_mode = constants.VALUE_NONE
7904 # ip validity checks
7905 ip = nic.get(constants.INIC_IP, default_ip_mode)
7906 if ip is None or ip.lower() == constants.VALUE_NONE:
7908 elif ip.lower() == constants.VALUE_AUTO:
7909 if not self.op.name_check:
7910 raise errors.OpPrereqError("IP address set to auto but name checks"
7911 " have been skipped",
7913 nic_ip = self.hostname1.ip
7915 if not netutils.IPAddress.IsValid(ip):
7916 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7920 # TODO: check the ip address for uniqueness
7921 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7922 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7925 # MAC address verification
7926 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7927 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7928 mac = utils.NormalizeAndValidateMac(mac)
7931 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7932 except errors.ReservationError:
7933 raise errors.OpPrereqError("MAC address %s already in use"
7934 " in cluster" % mac,
7935 errors.ECODE_NOTUNIQUE)
7937 # Build nic parameters
7938 link = nic.get(constants.INIC_LINK, None)
7941 nicparams[constants.NIC_MODE] = nic_mode_req
7943 nicparams[constants.NIC_LINK] = link
7945 check_params = cluster.SimpleFillNIC(nicparams)
7946 objects.NIC.CheckParameterSyntax(check_params)
7947 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7949 # disk checks/pre-build
7950 default_vg = self.cfg.GetVGName()
7952 for disk in self.op.disks:
7953 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7954 if mode not in constants.DISK_ACCESS_SET:
7955 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7956 mode, errors.ECODE_INVAL)
7957 size = disk.get(constants.IDISK_SIZE, None)
7959 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7962 except (TypeError, ValueError):
7963 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7966 constants.IDISK_SIZE: size,
7967 constants.IDISK_MODE: mode,
7968 constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7970 if constants.IDISK_ADOPT in disk:
7971 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7972 self.disks.append(new_disk)
7974 if self.op.mode == constants.INSTANCE_IMPORT:
7976 # Check that the new instance doesn't have less disks than the export
7977 instance_disks = len(self.disks)
7978 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7979 if instance_disks < export_disks:
7980 raise errors.OpPrereqError("Not enough disks to import."
7981 " (instance: %d, export: %d)" %
7982 (instance_disks, export_disks),
7986 for idx in range(export_disks):
7987 option = 'disk%d_dump' % idx
7988 if export_info.has_option(constants.INISECT_INS, option):
7989 # FIXME: are the old os-es, disk sizes, etc. useful?
7990 export_name = export_info.get(constants.INISECT_INS, option)
7991 image = utils.PathJoin(self.op.src_path, export_name)
7992 disk_images.append(image)
7994 disk_images.append(False)
7996 self.src_images = disk_images
7998 old_name = export_info.get(constants.INISECT_INS, 'name')
8000 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8001 except (TypeError, ValueError), err:
8002 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8003 " an integer: %s" % str(err),
8005 if self.op.instance_name == old_name:
8006 for idx, nic in enumerate(self.nics):
8007 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8008 nic_mac_ini = 'nic%d_mac' % idx
8009 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8011 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8013 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8014 if self.op.ip_check:
8015 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8016 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8017 (self.check_ip, self.op.instance_name),
8018 errors.ECODE_NOTUNIQUE)
8020 #### mac address generation
8021 # By generating here the mac address both the allocator and the hooks get
8022 # the real final mac address rather than the 'auto' or 'generate' value.
8023 # There is a race condition between the generation and the instance object
8024 # creation, which means that we know the mac is valid now, but we're not
8025 # sure it will be when we actually add the instance. If things go bad
8026 # adding the instance will abort because of a duplicate mac, and the
8027 # creation job will fail.
8028 for nic in self.nics:
8029 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8030 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8034 if self.op.iallocator is not None:
8035 self._RunAllocator()
8037 #### node related checks
8039 # check primary node
8040 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8041 assert self.pnode is not None, \
8042 "Cannot retrieve locked node %s" % self.op.pnode
8044 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8045 pnode.name, errors.ECODE_STATE)
8047 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8048 pnode.name, errors.ECODE_STATE)
8049 if not pnode.vm_capable:
8050 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8051 " '%s'" % pnode.name, errors.ECODE_STATE)
8053 self.secondaries = []
8055 # mirror node verification
8056 if self.op.disk_template in constants.DTS_INT_MIRROR:
8057 if self.op.snode == pnode.name:
8058 raise errors.OpPrereqError("The secondary node cannot be the"
8059 " primary node.", errors.ECODE_INVAL)
8060 _CheckNodeOnline(self, self.op.snode)
8061 _CheckNodeNotDrained(self, self.op.snode)
8062 _CheckNodeVmCapable(self, self.op.snode)
8063 self.secondaries.append(self.op.snode)
8065 nodenames = [pnode.name] + self.secondaries
8067 if not self.adopt_disks:
8068 # Check lv size requirements, if not adopting
8069 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8070 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8072 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8073 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8074 disk[constants.IDISK_ADOPT])
8075 for disk in self.disks])
8076 if len(all_lvs) != len(self.disks):
8077 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8079 for lv_name in all_lvs:
8081 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8082 # to ReserveLV uses the same syntax
8083 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8084 except errors.ReservationError:
8085 raise errors.OpPrereqError("LV named %s used by another instance" %
8086 lv_name, errors.ECODE_NOTUNIQUE)
8088 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8089 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8091 node_lvs = self.rpc.call_lv_list([pnode.name],
8092 vg_names.payload.keys())[pnode.name]
8093 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8094 node_lvs = node_lvs.payload
8096 delta = all_lvs.difference(node_lvs.keys())
8098 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8099 utils.CommaJoin(delta),
8101 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8103 raise errors.OpPrereqError("Online logical volumes found, cannot"
8104 " adopt: %s" % utils.CommaJoin(online_lvs),
8106 # update the size of disk based on what is found
8107 for dsk in self.disks:
8108 dsk[constants.IDISK_SIZE] = \
8109 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8110 dsk[constants.IDISK_ADOPT])][0]))
8112 elif self.op.disk_template == constants.DT_BLOCK:
8113 # Normalize and de-duplicate device paths
8114 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8115 for disk in self.disks])
8116 if len(all_disks) != len(self.disks):
8117 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8119 baddisks = [d for d in all_disks
8120 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8122 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8123 " cannot be adopted" %
8124 (", ".join(baddisks),
8125 constants.ADOPTABLE_BLOCKDEV_ROOT),
8128 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8129 list(all_disks))[pnode.name]
8130 node_disks.Raise("Cannot get block device information from node %s" %
8132 node_disks = node_disks.payload
8133 delta = all_disks.difference(node_disks.keys())
8135 raise errors.OpPrereqError("Missing block device(s): %s" %
8136 utils.CommaJoin(delta),
8138 for dsk in self.disks:
8139 dsk[constants.IDISK_SIZE] = \
8140 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8142 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8144 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8145 # check OS parameters (remotely)
8146 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8148 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8150 # memory check on primary node
8152 _CheckNodeFreeMemory(self, self.pnode.name,
8153 "creating instance %s" % self.op.instance_name,
8154 self.be_full[constants.BE_MEMORY],
8157 self.dry_run_result = list(nodenames)
8159 def Exec(self, feedback_fn):
8160 """Create and add the instance to the cluster.
8163 instance = self.op.instance_name
8164 pnode_name = self.pnode.name
8166 ht_kind = self.op.hypervisor
8167 if ht_kind in constants.HTS_REQ_PORT:
8168 network_port = self.cfg.AllocatePort()
8172 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8173 # this is needed because os.path.join does not accept None arguments
8174 if self.op.file_storage_dir is None:
8175 string_file_storage_dir = ""
8177 string_file_storage_dir = self.op.file_storage_dir
8179 # build the full file storage dir path
8180 if self.op.disk_template == constants.DT_SHARED_FILE:
8181 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8183 get_fsd_fn = self.cfg.GetFileStorageDir
8185 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8186 string_file_storage_dir, instance)
8188 file_storage_dir = ""
8190 disks = _GenerateDiskTemplate(self,
8191 self.op.disk_template,
8192 instance, pnode_name,
8196 self.op.file_driver,
8200 iobj = objects.Instance(name=instance, os=self.op.os_type,
8201 primary_node=pnode_name,
8202 nics=self.nics, disks=disks,
8203 disk_template=self.op.disk_template,
8205 network_port=network_port,
8206 beparams=self.op.beparams,
8207 hvparams=self.op.hvparams,
8208 hypervisor=self.op.hypervisor,
8209 osparams=self.op.osparams,
8212 if self.adopt_disks:
8213 if self.op.disk_template == constants.DT_PLAIN:
8214 # rename LVs to the newly-generated names; we need to construct
8215 # 'fake' LV disks with the old data, plus the new unique_id
8216 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8218 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8219 rename_to.append(t_dsk.logical_id)
8220 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8221 self.cfg.SetDiskID(t_dsk, pnode_name)
8222 result = self.rpc.call_blockdev_rename(pnode_name,
8223 zip(tmp_disks, rename_to))
8224 result.Raise("Failed to rename adoped LVs")
8226 feedback_fn("* creating instance disks...")
8228 _CreateDisks(self, iobj)
8229 except errors.OpExecError:
8230 self.LogWarning("Device creation failed, reverting...")
8232 _RemoveDisks(self, iobj)
8234 self.cfg.ReleaseDRBDMinors(instance)
8237 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8238 feedback_fn("* wiping instance disks...")
8240 _WipeDisks(self, iobj)
8241 except errors.OpExecError:
8242 self.LogWarning("Device wiping failed, reverting...")
8244 _RemoveDisks(self, iobj)
8246 self.cfg.ReleaseDRBDMinors(instance)
8249 feedback_fn("adding instance %s to cluster config" % instance)
8251 self.cfg.AddInstance(iobj, self.proc.GetECId())
8253 # Declare that we don't want to remove the instance lock anymore, as we've
8254 # added the instance to the config
8255 del self.remove_locks[locking.LEVEL_INSTANCE]
8256 # Unlock all the nodes
8257 if self.op.mode == constants.INSTANCE_IMPORT:
8258 nodes_keep = [self.op.src_node]
8259 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8260 if node != self.op.src_node]
8261 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8262 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8264 self.context.glm.release(locking.LEVEL_NODE)
8265 del self.acquired_locks[locking.LEVEL_NODE]
8267 if self.op.wait_for_sync:
8268 disk_abort = not _WaitForSync(self, iobj)
8269 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8270 # make sure the disks are not degraded (still sync-ing is ok)
8272 feedback_fn("* checking mirrors status")
8273 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8278 _RemoveDisks(self, iobj)
8279 self.cfg.RemoveInstance(iobj.name)
8280 # Make sure the instance lock gets removed
8281 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8282 raise errors.OpExecError("There are some degraded disks for"
8285 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8286 if self.op.mode == constants.INSTANCE_CREATE:
8287 if not self.op.no_install:
8288 feedback_fn("* running the instance OS create scripts...")
8289 # FIXME: pass debug option from opcode to backend
8290 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8291 self.op.debug_level)
8292 result.Raise("Could not add os for instance %s"
8293 " on node %s" % (instance, pnode_name))
8295 elif self.op.mode == constants.INSTANCE_IMPORT:
8296 feedback_fn("* running the instance OS import scripts...")
8300 for idx, image in enumerate(self.src_images):
8304 # FIXME: pass debug option from opcode to backend
8305 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8306 constants.IEIO_FILE, (image, ),
8307 constants.IEIO_SCRIPT,
8308 (iobj.disks[idx], idx),
8310 transfers.append(dt)
8313 masterd.instance.TransferInstanceData(self, feedback_fn,
8314 self.op.src_node, pnode_name,
8315 self.pnode.secondary_ip,
8317 if not compat.all(import_result):
8318 self.LogWarning("Some disks for instance %s on node %s were not"
8319 " imported successfully" % (instance, pnode_name))
8321 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8322 feedback_fn("* preparing remote import...")
8323 # The source cluster will stop the instance before attempting to make a
8324 # connection. In some cases stopping an instance can take a long time,
8325 # hence the shutdown timeout is added to the connection timeout.
8326 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8327 self.op.source_shutdown_timeout)
8328 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8330 assert iobj.primary_node == self.pnode.name
8332 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8333 self.source_x509_ca,
8334 self._cds, timeouts)
8335 if not compat.all(disk_results):
8336 # TODO: Should the instance still be started, even if some disks
8337 # failed to import (valid for local imports, too)?
8338 self.LogWarning("Some disks for instance %s on node %s were not"
8339 " imported successfully" % (instance, pnode_name))
8341 # Run rename script on newly imported instance
8342 assert iobj.name == instance
8343 feedback_fn("Running rename script for %s" % instance)
8344 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8345 self.source_instance_name,
8346 self.op.debug_level)
8348 self.LogWarning("Failed to run rename script for %s on node"
8349 " %s: %s" % (instance, pnode_name, result.fail_msg))
8352 # also checked in the prereq part
8353 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8357 iobj.admin_up = True
8358 self.cfg.Update(iobj, feedback_fn)
8359 logging.info("Starting instance %s on node %s", instance, pnode_name)
8360 feedback_fn("* starting instance...")
8361 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8362 result.Raise("Could not start instance")
8364 return list(iobj.all_nodes)
8367 class LUInstanceConsole(NoHooksLU):
8368 """Connect to an instance's console.
8370 This is somewhat special in that it returns the command line that
8371 you need to run on the master node in order to connect to the
8377 def ExpandNames(self):
8378 self._ExpandAndLockInstance()
8380 def CheckPrereq(self):
8381 """Check prerequisites.
8383 This checks that the instance is in the cluster.
8386 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8387 assert self.instance is not None, \
8388 "Cannot retrieve locked instance %s" % self.op.instance_name
8389 _CheckNodeOnline(self, self.instance.primary_node)
8391 def Exec(self, feedback_fn):
8392 """Connect to the console of an instance
8395 instance = self.instance
8396 node = instance.primary_node
8398 node_insts = self.rpc.call_instance_list([node],
8399 [instance.hypervisor])[node]
8400 node_insts.Raise("Can't get node information from %s" % node)
8402 if instance.name not in node_insts.payload:
8403 if instance.admin_up:
8404 state = constants.INSTST_ERRORDOWN
8406 state = constants.INSTST_ADMINDOWN
8407 raise errors.OpExecError("Instance %s is not running (state %s)" %
8408 (instance.name, state))
8410 logging.debug("Connecting to console of %s on %s", instance.name, node)
8412 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8415 def _GetInstanceConsole(cluster, instance):
8416 """Returns console information for an instance.
8418 @type cluster: L{objects.Cluster}
8419 @type instance: L{objects.Instance}
8423 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8424 # beparams and hvparams are passed separately, to avoid editing the
8425 # instance and then saving the defaults in the instance itself.
8426 hvparams = cluster.FillHV(instance)
8427 beparams = cluster.FillBE(instance)
8428 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8430 assert console.instance == instance.name
8431 assert console.Validate()
8433 return console.ToDict()
8436 class LUInstanceReplaceDisks(LogicalUnit):
8437 """Replace the disks of an instance.
8440 HPATH = "mirrors-replace"
8441 HTYPE = constants.HTYPE_INSTANCE
8444 def CheckArguments(self):
8445 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8448 def ExpandNames(self):
8449 self._ExpandAndLockInstance()
8451 if self.op.iallocator is not None:
8452 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8454 elif self.op.remote_node is not None:
8455 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8456 self.op.remote_node = remote_node
8458 # Warning: do not remove the locking of the new secondary here
8459 # unless DRBD8.AddChildren is changed to work in parallel;
8460 # currently it doesn't since parallel invocations of
8461 # FindUnusedMinor will conflict
8462 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8463 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8466 self.needed_locks[locking.LEVEL_NODE] = []
8467 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8469 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8470 self.op.iallocator, self.op.remote_node,
8471 self.op.disks, False, self.op.early_release)
8473 self.tasklets = [self.replacer]
8475 def DeclareLocks(self, level):
8476 # If we're not already locking all nodes in the set we have to declare the
8477 # instance's primary/secondary nodes.
8478 if (level == locking.LEVEL_NODE and
8479 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8480 self._LockInstancesNodes()
8482 def BuildHooksEnv(self):
8485 This runs on the master, the primary and all the secondaries.
8488 instance = self.replacer.instance
8490 "MODE": self.op.mode,
8491 "NEW_SECONDARY": self.op.remote_node,
8492 "OLD_SECONDARY": instance.secondary_nodes[0],
8494 env.update(_BuildInstanceHookEnvByObject(self, instance))
8497 def BuildHooksNodes(self):
8498 """Build hooks nodes.
8501 instance = self.replacer.instance
8503 self.cfg.GetMasterNode(),
8504 instance.primary_node,
8506 if self.op.remote_node is not None:
8507 nl.append(self.op.remote_node)
8511 class TLReplaceDisks(Tasklet):
8512 """Replaces disks for an instance.
8514 Note: Locking is not within the scope of this class.
8517 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8518 disks, delay_iallocator, early_release):
8519 """Initializes this class.
8522 Tasklet.__init__(self, lu)
8525 self.instance_name = instance_name
8527 self.iallocator_name = iallocator_name
8528 self.remote_node = remote_node
8530 self.delay_iallocator = delay_iallocator
8531 self.early_release = early_release
8534 self.instance = None
8535 self.new_node = None
8536 self.target_node = None
8537 self.other_node = None
8538 self.remote_node_info = None
8539 self.node_secondary_ip = None
8542 def CheckArguments(mode, remote_node, iallocator):
8543 """Helper function for users of this class.
8546 # check for valid parameter combination
8547 if mode == constants.REPLACE_DISK_CHG:
8548 if remote_node is None and iallocator is None:
8549 raise errors.OpPrereqError("When changing the secondary either an"
8550 " iallocator script must be used or the"
8551 " new node given", errors.ECODE_INVAL)
8553 if remote_node is not None and iallocator is not None:
8554 raise errors.OpPrereqError("Give either the iallocator or the new"
8555 " secondary, not both", errors.ECODE_INVAL)
8557 elif remote_node is not None or iallocator is not None:
8558 # Not replacing the secondary
8559 raise errors.OpPrereqError("The iallocator and new node options can"
8560 " only be used when changing the"
8561 " secondary node", errors.ECODE_INVAL)
8564 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8565 """Compute a new secondary node using an IAllocator.
8568 ial = IAllocator(lu.cfg, lu.rpc,
8569 mode=constants.IALLOCATOR_MODE_RELOC,
8571 relocate_from=relocate_from)
8573 ial.Run(iallocator_name)
8576 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8577 " %s" % (iallocator_name, ial.info),
8580 if len(ial.result) != ial.required_nodes:
8581 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8582 " of nodes (%s), required %s" %
8584 len(ial.result), ial.required_nodes),
8587 remote_node_name = ial.result[0]
8589 lu.LogInfo("Selected new secondary for instance '%s': %s",
8590 instance_name, remote_node_name)
8592 return remote_node_name
8594 def _FindFaultyDisks(self, node_name):
8595 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8598 def _CheckDisksActivated(self, instance):
8599 """Checks if the instance disks are activated.
8601 @param instance: The instance to check disks
8602 @return: True if they are activated, False otherwise
8605 nodes = instance.all_nodes
8607 for idx, dev in enumerate(instance.disks):
8609 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8610 self.cfg.SetDiskID(dev, node)
8612 result = self.rpc.call_blockdev_find(node, dev)
8616 elif result.fail_msg or not result.payload:
8622 def CheckPrereq(self):
8623 """Check prerequisites.
8625 This checks that the instance is in the cluster.
8628 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8629 assert instance is not None, \
8630 "Cannot retrieve locked instance %s" % self.instance_name
8632 if instance.disk_template != constants.DT_DRBD8:
8633 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8634 " instances", errors.ECODE_INVAL)
8636 if len(instance.secondary_nodes) != 1:
8637 raise errors.OpPrereqError("The instance has a strange layout,"
8638 " expected one secondary but found %d" %
8639 len(instance.secondary_nodes),
8642 if not self.delay_iallocator:
8643 self._CheckPrereq2()
8645 def _CheckPrereq2(self):
8646 """Check prerequisites, second part.
8648 This function should always be part of CheckPrereq. It was separated and is
8649 now called from Exec because during node evacuation iallocator was only
8650 called with an unmodified cluster model, not taking planned changes into
8654 instance = self.instance
8655 secondary_node = instance.secondary_nodes[0]
8657 if self.iallocator_name is None:
8658 remote_node = self.remote_node
8660 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8661 instance.name, instance.secondary_nodes)
8663 if remote_node is not None:
8664 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8665 assert self.remote_node_info is not None, \
8666 "Cannot retrieve locked node %s" % remote_node
8668 self.remote_node_info = None
8670 if remote_node == self.instance.primary_node:
8671 raise errors.OpPrereqError("The specified node is the primary node of"
8672 " the instance.", errors.ECODE_INVAL)
8674 if remote_node == secondary_node:
8675 raise errors.OpPrereqError("The specified node is already the"
8676 " secondary node of the instance.",
8679 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8680 constants.REPLACE_DISK_CHG):
8681 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8684 if self.mode == constants.REPLACE_DISK_AUTO:
8685 if not self._CheckDisksActivated(instance):
8686 raise errors.OpPrereqError("Please run activate-disks on instance %s"
8687 " first" % self.instance_name,
8689 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8690 faulty_secondary = self._FindFaultyDisks(secondary_node)
8692 if faulty_primary and faulty_secondary:
8693 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8694 " one node and can not be repaired"
8695 " automatically" % self.instance_name,
8699 self.disks = faulty_primary
8700 self.target_node = instance.primary_node
8701 self.other_node = secondary_node
8702 check_nodes = [self.target_node, self.other_node]
8703 elif faulty_secondary:
8704 self.disks = faulty_secondary
8705 self.target_node = secondary_node
8706 self.other_node = instance.primary_node
8707 check_nodes = [self.target_node, self.other_node]
8713 # Non-automatic modes
8714 if self.mode == constants.REPLACE_DISK_PRI:
8715 self.target_node = instance.primary_node
8716 self.other_node = secondary_node
8717 check_nodes = [self.target_node, self.other_node]
8719 elif self.mode == constants.REPLACE_DISK_SEC:
8720 self.target_node = secondary_node
8721 self.other_node = instance.primary_node
8722 check_nodes = [self.target_node, self.other_node]
8724 elif self.mode == constants.REPLACE_DISK_CHG:
8725 self.new_node = remote_node
8726 self.other_node = instance.primary_node
8727 self.target_node = secondary_node
8728 check_nodes = [self.new_node, self.other_node]
8730 _CheckNodeNotDrained(self.lu, remote_node)
8731 _CheckNodeVmCapable(self.lu, remote_node)
8733 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8734 assert old_node_info is not None
8735 if old_node_info.offline and not self.early_release:
8736 # doesn't make sense to delay the release
8737 self.early_release = True
8738 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8739 " early-release mode", secondary_node)
8742 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8745 # If not specified all disks should be replaced
8747 self.disks = range(len(self.instance.disks))
8749 for node in check_nodes:
8750 _CheckNodeOnline(self.lu, node)
8752 # Check whether disks are valid
8753 for disk_idx in self.disks:
8754 instance.FindDisk(disk_idx)
8756 # Get secondary node IP addresses
8759 for node_name in [self.target_node, self.other_node, self.new_node]:
8760 if node_name is not None:
8761 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8763 self.node_secondary_ip = node_2nd_ip
8765 def Exec(self, feedback_fn):
8766 """Execute disk replacement.
8768 This dispatches the disk replacement to the appropriate handler.
8771 if self.delay_iallocator:
8772 self._CheckPrereq2()
8775 feedback_fn("No disks need replacement")
8778 feedback_fn("Replacing disk(s) %s for %s" %
8779 (utils.CommaJoin(self.disks), self.instance.name))
8781 activate_disks = (not self.instance.admin_up)
8783 # Activate the instance disks if we're replacing them on a down instance
8785 _StartInstanceDisks(self.lu, self.instance, True)
8788 # Should we replace the secondary node?
8789 if self.new_node is not None:
8790 fn = self._ExecDrbd8Secondary
8792 fn = self._ExecDrbd8DiskOnly
8794 return fn(feedback_fn)
8797 # Deactivate the instance disks if we're replacing them on a
8800 _SafeShutdownInstanceDisks(self.lu, self.instance)
8802 def _CheckVolumeGroup(self, nodes):
8803 self.lu.LogInfo("Checking volume groups")
8805 vgname = self.cfg.GetVGName()
8807 # Make sure volume group exists on all involved nodes
8808 results = self.rpc.call_vg_list(nodes)
8810 raise errors.OpExecError("Can't list volume groups on the nodes")
8814 res.Raise("Error checking node %s" % node)
8815 if vgname not in res.payload:
8816 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8819 def _CheckDisksExistence(self, nodes):
8820 # Check disk existence
8821 for idx, dev in enumerate(self.instance.disks):
8822 if idx not in self.disks:
8826 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8827 self.cfg.SetDiskID(dev, node)
8829 result = self.rpc.call_blockdev_find(node, dev)
8831 msg = result.fail_msg
8832 if msg or not result.payload:
8834 msg = "disk not found"
8835 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8838 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8839 for idx, dev in enumerate(self.instance.disks):
8840 if idx not in self.disks:
8843 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8846 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8848 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8849 " replace disks for instance %s" %
8850 (node_name, self.instance.name))
8852 def _CreateNewStorage(self, node_name):
8853 vgname = self.cfg.GetVGName()
8856 for idx, dev in enumerate(self.instance.disks):
8857 if idx not in self.disks:
8860 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8862 self.cfg.SetDiskID(dev, node_name)
8864 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8865 names = _GenerateUniqueNames(self.lu, lv_names)
8867 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8868 logical_id=(vgname, names[0]))
8869 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8870 logical_id=(vgname, names[1]))
8872 new_lvs = [lv_data, lv_meta]
8873 old_lvs = dev.children
8874 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8876 # we pass force_create=True to force the LVM creation
8877 for new_lv in new_lvs:
8878 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8879 _GetInstanceInfoText(self.instance), False)
8883 def _CheckDevices(self, node_name, iv_names):
8884 for name, (dev, _, _) in iv_names.iteritems():
8885 self.cfg.SetDiskID(dev, node_name)
8887 result = self.rpc.call_blockdev_find(node_name, dev)
8889 msg = result.fail_msg
8890 if msg or not result.payload:
8892 msg = "disk not found"
8893 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8896 if result.payload.is_degraded:
8897 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8899 def _RemoveOldStorage(self, node_name, iv_names):
8900 for name, (_, old_lvs, _) in iv_names.iteritems():
8901 self.lu.LogInfo("Remove logical volumes for %s" % name)
8904 self.cfg.SetDiskID(lv, node_name)
8906 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8908 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8909 hint="remove unused LVs manually")
8911 def _ReleaseNodeLock(self, node_name):
8912 """Releases the lock for a given node."""
8913 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8915 def _ExecDrbd8DiskOnly(self, feedback_fn):
8916 """Replace a disk on the primary or secondary for DRBD 8.
8918 The algorithm for replace is quite complicated:
8920 1. for each disk to be replaced:
8922 1. create new LVs on the target node with unique names
8923 1. detach old LVs from the drbd device
8924 1. rename old LVs to name_replaced.<time_t>
8925 1. rename new LVs to old LVs
8926 1. attach the new LVs (with the old names now) to the drbd device
8928 1. wait for sync across all devices
8930 1. for each modified disk:
8932 1. remove old LVs (which have the name name_replaces.<time_t>)
8934 Failures are not very well handled.
8939 # Step: check device activation
8940 self.lu.LogStep(1, steps_total, "Check device existence")
8941 self._CheckDisksExistence([self.other_node, self.target_node])
8942 self._CheckVolumeGroup([self.target_node, self.other_node])
8944 # Step: check other node consistency
8945 self.lu.LogStep(2, steps_total, "Check peer consistency")
8946 self._CheckDisksConsistency(self.other_node,
8947 self.other_node == self.instance.primary_node,
8950 # Step: create new storage
8951 self.lu.LogStep(3, steps_total, "Allocate new storage")
8952 iv_names = self._CreateNewStorage(self.target_node)
8954 # Step: for each lv, detach+rename*2+attach
8955 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8956 for dev, old_lvs, new_lvs in iv_names.itervalues():
8957 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8959 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8961 result.Raise("Can't detach drbd from local storage on node"
8962 " %s for device %s" % (self.target_node, dev.iv_name))
8964 #cfg.Update(instance)
8966 # ok, we created the new LVs, so now we know we have the needed
8967 # storage; as such, we proceed on the target node to rename
8968 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8969 # using the assumption that logical_id == physical_id (which in
8970 # turn is the unique_id on that node)
8972 # FIXME(iustin): use a better name for the replaced LVs
8973 temp_suffix = int(time.time())
8974 ren_fn = lambda d, suff: (d.physical_id[0],
8975 d.physical_id[1] + "_replaced-%s" % suff)
8977 # Build the rename list based on what LVs exist on the node
8978 rename_old_to_new = []
8979 for to_ren in old_lvs:
8980 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8981 if not result.fail_msg and result.payload:
8983 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8985 self.lu.LogInfo("Renaming the old LVs on the target node")
8986 result = self.rpc.call_blockdev_rename(self.target_node,
8988 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8990 # Now we rename the new LVs to the old LVs
8991 self.lu.LogInfo("Renaming the new LVs on the target node")
8992 rename_new_to_old = [(new, old.physical_id)
8993 for old, new in zip(old_lvs, new_lvs)]
8994 result = self.rpc.call_blockdev_rename(self.target_node,
8996 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8998 for old, new in zip(old_lvs, new_lvs):
8999 new.logical_id = old.logical_id
9000 self.cfg.SetDiskID(new, self.target_node)
9002 for disk in old_lvs:
9003 disk.logical_id = ren_fn(disk, temp_suffix)
9004 self.cfg.SetDiskID(disk, self.target_node)
9006 # Now that the new lvs have the old name, we can add them to the device
9007 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9008 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9010 msg = result.fail_msg
9012 for new_lv in new_lvs:
9013 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9016 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9017 hint=("cleanup manually the unused logical"
9019 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9021 dev.children = new_lvs
9023 self.cfg.Update(self.instance, feedback_fn)
9026 if self.early_release:
9027 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9029 self._RemoveOldStorage(self.target_node, iv_names)
9030 # WARNING: we release both node locks here, do not do other RPCs
9031 # than WaitForSync to the primary node
9032 self._ReleaseNodeLock([self.target_node, self.other_node])
9035 # This can fail as the old devices are degraded and _WaitForSync
9036 # does a combined result over all disks, so we don't check its return value
9037 self.lu.LogStep(cstep, steps_total, "Sync devices")
9039 _WaitForSync(self.lu, self.instance)
9041 # Check all devices manually
9042 self._CheckDevices(self.instance.primary_node, iv_names)
9044 # Step: remove old storage
9045 if not self.early_release:
9046 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9048 self._RemoveOldStorage(self.target_node, iv_names)
9050 def _ExecDrbd8Secondary(self, feedback_fn):
9051 """Replace the secondary node for DRBD 8.
9053 The algorithm for replace is quite complicated:
9054 - for all disks of the instance:
9055 - create new LVs on the new node with same names
9056 - shutdown the drbd device on the old secondary
9057 - disconnect the drbd network on the primary
9058 - create the drbd device on the new secondary
9059 - network attach the drbd on the primary, using an artifice:
9060 the drbd code for Attach() will connect to the network if it
9061 finds a device which is connected to the good local disks but
9063 - wait for sync across all devices
9064 - remove all disks from the old secondary
9066 Failures are not very well handled.
9071 # Step: check device activation
9072 self.lu.LogStep(1, steps_total, "Check device existence")
9073 self._CheckDisksExistence([self.instance.primary_node])
9074 self._CheckVolumeGroup([self.instance.primary_node])
9076 # Step: check other node consistency
9077 self.lu.LogStep(2, steps_total, "Check peer consistency")
9078 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9080 # Step: create new storage
9081 self.lu.LogStep(3, steps_total, "Allocate new storage")
9082 for idx, dev in enumerate(self.instance.disks):
9083 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9084 (self.new_node, idx))
9085 # we pass force_create=True to force LVM creation
9086 for new_lv in dev.children:
9087 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9088 _GetInstanceInfoText(self.instance), False)
9090 # Step 4: dbrd minors and drbd setups changes
9091 # after this, we must manually remove the drbd minors on both the
9092 # error and the success paths
9093 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9094 minors = self.cfg.AllocateDRBDMinor([self.new_node
9095 for dev in self.instance.disks],
9097 logging.debug("Allocated minors %r", minors)
9100 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9101 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9102 (self.new_node, idx))
9103 # create new devices on new_node; note that we create two IDs:
9104 # one without port, so the drbd will be activated without
9105 # networking information on the new node at this stage, and one
9106 # with network, for the latter activation in step 4
9107 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9108 if self.instance.primary_node == o_node1:
9111 assert self.instance.primary_node == o_node2, "Three-node instance?"
9114 new_alone_id = (self.instance.primary_node, self.new_node, None,
9115 p_minor, new_minor, o_secret)
9116 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9117 p_minor, new_minor, o_secret)
9119 iv_names[idx] = (dev, dev.children, new_net_id)
9120 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9122 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9123 logical_id=new_alone_id,
9124 children=dev.children,
9127 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9128 _GetInstanceInfoText(self.instance), False)
9129 except errors.GenericError:
9130 self.cfg.ReleaseDRBDMinors(self.instance.name)
9133 # We have new devices, shutdown the drbd on the old secondary
9134 for idx, dev in enumerate(self.instance.disks):
9135 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9136 self.cfg.SetDiskID(dev, self.target_node)
9137 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9139 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9140 "node: %s" % (idx, msg),
9141 hint=("Please cleanup this device manually as"
9142 " soon as possible"))
9144 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9145 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9146 self.node_secondary_ip,
9147 self.instance.disks)\
9148 [self.instance.primary_node]
9150 msg = result.fail_msg
9152 # detaches didn't succeed (unlikely)
9153 self.cfg.ReleaseDRBDMinors(self.instance.name)
9154 raise errors.OpExecError("Can't detach the disks from the network on"
9155 " old node: %s" % (msg,))
9157 # if we managed to detach at least one, we update all the disks of
9158 # the instance to point to the new secondary
9159 self.lu.LogInfo("Updating instance configuration")
9160 for dev, _, new_logical_id in iv_names.itervalues():
9161 dev.logical_id = new_logical_id
9162 self.cfg.SetDiskID(dev, self.instance.primary_node)
9164 self.cfg.Update(self.instance, feedback_fn)
9166 # and now perform the drbd attach
9167 self.lu.LogInfo("Attaching primary drbds to new secondary"
9168 " (standalone => connected)")
9169 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9171 self.node_secondary_ip,
9172 self.instance.disks,
9175 for to_node, to_result in result.items():
9176 msg = to_result.fail_msg
9178 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9180 hint=("please do a gnt-instance info to see the"
9181 " status of disks"))
9183 if self.early_release:
9184 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9186 self._RemoveOldStorage(self.target_node, iv_names)
9187 # WARNING: we release all node locks here, do not do other RPCs
9188 # than WaitForSync to the primary node
9189 self._ReleaseNodeLock([self.instance.primary_node,
9194 # This can fail as the old devices are degraded and _WaitForSync
9195 # does a combined result over all disks, so we don't check its return value
9196 self.lu.LogStep(cstep, steps_total, "Sync devices")
9198 _WaitForSync(self.lu, self.instance)
9200 # Check all devices manually
9201 self._CheckDevices(self.instance.primary_node, iv_names)
9203 # Step: remove old storage
9204 if not self.early_release:
9205 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9206 self._RemoveOldStorage(self.target_node, iv_names)
9209 class LURepairNodeStorage(NoHooksLU):
9210 """Repairs the volume group on a node.
9215 def CheckArguments(self):
9216 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9218 storage_type = self.op.storage_type
9220 if (constants.SO_FIX_CONSISTENCY not in
9221 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9222 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9223 " repaired" % storage_type,
9226 def ExpandNames(self):
9227 self.needed_locks = {
9228 locking.LEVEL_NODE: [self.op.node_name],
9231 def _CheckFaultyDisks(self, instance, node_name):
9232 """Ensure faulty disks abort the opcode or at least warn."""
9234 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9236 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9237 " node '%s'" % (instance.name, node_name),
9239 except errors.OpPrereqError, err:
9240 if self.op.ignore_consistency:
9241 self.proc.LogWarning(str(err.args[0]))
9245 def CheckPrereq(self):
9246 """Check prerequisites.
9249 # Check whether any instance on this node has faulty disks
9250 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9251 if not inst.admin_up:
9253 check_nodes = set(inst.all_nodes)
9254 check_nodes.discard(self.op.node_name)
9255 for inst_node_name in check_nodes:
9256 self._CheckFaultyDisks(inst, inst_node_name)
9258 def Exec(self, feedback_fn):
9259 feedback_fn("Repairing storage unit '%s' on %s ..." %
9260 (self.op.name, self.op.node_name))
9262 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9263 result = self.rpc.call_storage_execute(self.op.node_name,
9264 self.op.storage_type, st_args,
9266 constants.SO_FIX_CONSISTENCY)
9267 result.Raise("Failed to repair storage unit '%s' on %s" %
9268 (self.op.name, self.op.node_name))
9271 class LUNodeEvacStrategy(NoHooksLU):
9272 """Computes the node evacuation strategy.
9277 def CheckArguments(self):
9278 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9280 def ExpandNames(self):
9281 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9282 self.needed_locks = locks = {}
9283 if self.op.remote_node is None:
9284 locks[locking.LEVEL_NODE] = locking.ALL_SET
9286 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9287 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9289 def Exec(self, feedback_fn):
9290 if self.op.remote_node is not None:
9292 for node in self.op.nodes:
9293 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9296 if i.primary_node == self.op.remote_node:
9297 raise errors.OpPrereqError("Node %s is the primary node of"
9298 " instance %s, cannot use it as"
9300 (self.op.remote_node, i.name),
9302 result.append([i.name, self.op.remote_node])
9304 ial = IAllocator(self.cfg, self.rpc,
9305 mode=constants.IALLOCATOR_MODE_MEVAC,
9306 evac_nodes=self.op.nodes)
9307 ial.Run(self.op.iallocator, validate=True)
9309 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9315 class LUInstanceGrowDisk(LogicalUnit):
9316 """Grow a disk of an instance.
9320 HTYPE = constants.HTYPE_INSTANCE
9323 def ExpandNames(self):
9324 self._ExpandAndLockInstance()
9325 self.needed_locks[locking.LEVEL_NODE] = []
9326 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9328 def DeclareLocks(self, level):
9329 if level == locking.LEVEL_NODE:
9330 self._LockInstancesNodes()
9332 def BuildHooksEnv(self):
9335 This runs on the master, the primary and all the secondaries.
9339 "DISK": self.op.disk,
9340 "AMOUNT": self.op.amount,
9342 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9345 def BuildHooksNodes(self):
9346 """Build hooks nodes.
9349 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9352 def CheckPrereq(self):
9353 """Check prerequisites.
9355 This checks that the instance is in the cluster.
9358 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9359 assert instance is not None, \
9360 "Cannot retrieve locked instance %s" % self.op.instance_name
9361 nodenames = list(instance.all_nodes)
9362 for node in nodenames:
9363 _CheckNodeOnline(self, node)
9365 self.instance = instance
9367 if instance.disk_template not in constants.DTS_GROWABLE:
9368 raise errors.OpPrereqError("Instance's disk layout does not support"
9369 " growing.", errors.ECODE_INVAL)
9371 self.disk = instance.FindDisk(self.op.disk)
9373 if instance.disk_template not in (constants.DT_FILE,
9374 constants.DT_SHARED_FILE):
9375 # TODO: check the free disk space for file, when that feature will be
9377 _CheckNodesFreeDiskPerVG(self, nodenames,
9378 self.disk.ComputeGrowth(self.op.amount))
9380 def Exec(self, feedback_fn):
9381 """Execute disk grow.
9384 instance = self.instance
9387 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9389 raise errors.OpExecError("Cannot activate block device to grow")
9391 for node in instance.all_nodes:
9392 self.cfg.SetDiskID(disk, node)
9393 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9394 result.Raise("Grow request failed to node %s" % node)
9396 # TODO: Rewrite code to work properly
9397 # DRBD goes into sync mode for a short amount of time after executing the
9398 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9399 # calling "resize" in sync mode fails. Sleeping for a short amount of
9400 # time is a work-around.
9403 disk.RecordGrow(self.op.amount)
9404 self.cfg.Update(instance, feedback_fn)
9405 if self.op.wait_for_sync:
9406 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9408 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9409 " status.\nPlease check the instance.")
9410 if not instance.admin_up:
9411 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9412 elif not instance.admin_up:
9413 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9414 " not supposed to be running because no wait for"
9415 " sync mode was requested.")
9418 class LUInstanceQueryData(NoHooksLU):
9419 """Query runtime instance data.
9424 def ExpandNames(self):
9425 self.needed_locks = {}
9426 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9428 if self.op.instances:
9429 self.wanted_names = []
9430 for name in self.op.instances:
9431 full_name = _ExpandInstanceName(self.cfg, name)
9432 self.wanted_names.append(full_name)
9433 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9435 self.wanted_names = None
9436 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9438 self.needed_locks[locking.LEVEL_NODE] = []
9439 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9441 def DeclareLocks(self, level):
9442 if level == locking.LEVEL_NODE:
9443 self._LockInstancesNodes()
9445 def CheckPrereq(self):
9446 """Check prerequisites.
9448 This only checks the optional instance list against the existing names.
9451 if self.wanted_names is None:
9452 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9454 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9455 in self.wanted_names]
9457 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9458 """Returns the status of a block device
9461 if self.op.static or not node:
9464 self.cfg.SetDiskID(dev, node)
9466 result = self.rpc.call_blockdev_find(node, dev)
9470 result.Raise("Can't compute disk status for %s" % instance_name)
9472 status = result.payload
9476 return (status.dev_path, status.major, status.minor,
9477 status.sync_percent, status.estimated_time,
9478 status.is_degraded, status.ldisk_status)
9480 def _ComputeDiskStatus(self, instance, snode, dev):
9481 """Compute block device status.
9484 if dev.dev_type in constants.LDS_DRBD:
9485 # we change the snode then (otherwise we use the one passed in)
9486 if dev.logical_id[0] == instance.primary_node:
9487 snode = dev.logical_id[1]
9489 snode = dev.logical_id[0]
9491 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9493 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9496 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9497 for child in dev.children]
9502 "iv_name": dev.iv_name,
9503 "dev_type": dev.dev_type,
9504 "logical_id": dev.logical_id,
9505 "physical_id": dev.physical_id,
9506 "pstatus": dev_pstatus,
9507 "sstatus": dev_sstatus,
9508 "children": dev_children,
9515 def Exec(self, feedback_fn):
9516 """Gather and return data"""
9519 cluster = self.cfg.GetClusterInfo()
9521 for instance in self.wanted_instances:
9522 if not self.op.static:
9523 remote_info = self.rpc.call_instance_info(instance.primary_node,
9525 instance.hypervisor)
9526 remote_info.Raise("Error checking node %s" % instance.primary_node)
9527 remote_info = remote_info.payload
9528 if remote_info and "state" in remote_info:
9531 remote_state = "down"
9534 if instance.admin_up:
9537 config_state = "down"
9539 disks = [self._ComputeDiskStatus(instance, None, device)
9540 for device in instance.disks]
9543 "name": instance.name,
9544 "config_state": config_state,
9545 "run_state": remote_state,
9546 "pnode": instance.primary_node,
9547 "snodes": instance.secondary_nodes,
9549 # this happens to be the same format used for hooks
9550 "nics": _NICListToTuple(self, instance.nics),
9551 "disk_template": instance.disk_template,
9553 "hypervisor": instance.hypervisor,
9554 "network_port": instance.network_port,
9555 "hv_instance": instance.hvparams,
9556 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9557 "be_instance": instance.beparams,
9558 "be_actual": cluster.FillBE(instance),
9559 "os_instance": instance.osparams,
9560 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9561 "serial_no": instance.serial_no,
9562 "mtime": instance.mtime,
9563 "ctime": instance.ctime,
9564 "uuid": instance.uuid,
9567 result[instance.name] = idict
9572 class LUInstanceSetParams(LogicalUnit):
9573 """Modifies an instances's parameters.
9576 HPATH = "instance-modify"
9577 HTYPE = constants.HTYPE_INSTANCE
9580 def CheckArguments(self):
9581 if not (self.op.nics or self.op.disks or self.op.disk_template or
9582 self.op.hvparams or self.op.beparams or self.op.os_name):
9583 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9585 if self.op.hvparams:
9586 _CheckGlobalHvParams(self.op.hvparams)
9590 for disk_op, disk_dict in self.op.disks:
9591 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9592 if disk_op == constants.DDM_REMOVE:
9595 elif disk_op == constants.DDM_ADD:
9598 if not isinstance(disk_op, int):
9599 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9600 if not isinstance(disk_dict, dict):
9601 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9602 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9604 if disk_op == constants.DDM_ADD:
9605 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9606 if mode not in constants.DISK_ACCESS_SET:
9607 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9609 size = disk_dict.get(constants.IDISK_SIZE, None)
9611 raise errors.OpPrereqError("Required disk parameter size missing",
9615 except (TypeError, ValueError), err:
9616 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9617 str(err), errors.ECODE_INVAL)
9618 disk_dict[constants.IDISK_SIZE] = size
9620 # modification of disk
9621 if constants.IDISK_SIZE in disk_dict:
9622 raise errors.OpPrereqError("Disk size change not possible, use"
9623 " grow-disk", errors.ECODE_INVAL)
9625 if disk_addremove > 1:
9626 raise errors.OpPrereqError("Only one disk add or remove operation"
9627 " supported at a time", errors.ECODE_INVAL)
9629 if self.op.disks and self.op.disk_template is not None:
9630 raise errors.OpPrereqError("Disk template conversion and other disk"
9631 " changes not supported at the same time",
9634 if (self.op.disk_template and
9635 self.op.disk_template in constants.DTS_INT_MIRROR and
9636 self.op.remote_node is None):
9637 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9638 " one requires specifying a secondary node",
9643 for nic_op, nic_dict in self.op.nics:
9644 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9645 if nic_op == constants.DDM_REMOVE:
9648 elif nic_op == constants.DDM_ADD:
9651 if not isinstance(nic_op, int):
9652 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9653 if not isinstance(nic_dict, dict):
9654 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9655 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9657 # nic_dict should be a dict
9658 nic_ip = nic_dict.get(constants.INIC_IP, None)
9659 if nic_ip is not None:
9660 if nic_ip.lower() == constants.VALUE_NONE:
9661 nic_dict[constants.INIC_IP] = None
9663 if not netutils.IPAddress.IsValid(nic_ip):
9664 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9667 nic_bridge = nic_dict.get('bridge', None)
9668 nic_link = nic_dict.get(constants.INIC_LINK, None)
9669 if nic_bridge and nic_link:
9670 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9671 " at the same time", errors.ECODE_INVAL)
9672 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9673 nic_dict['bridge'] = None
9674 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9675 nic_dict[constants.INIC_LINK] = None
9677 if nic_op == constants.DDM_ADD:
9678 nic_mac = nic_dict.get(constants.INIC_MAC, None)
9680 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9682 if constants.INIC_MAC in nic_dict:
9683 nic_mac = nic_dict[constants.INIC_MAC]
9684 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9685 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9687 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9688 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9689 " modifying an existing nic",
9692 if nic_addremove > 1:
9693 raise errors.OpPrereqError("Only one NIC add or remove operation"
9694 " supported at a time", errors.ECODE_INVAL)
9696 def ExpandNames(self):
9697 self._ExpandAndLockInstance()
9698 self.needed_locks[locking.LEVEL_NODE] = []
9699 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9701 def DeclareLocks(self, level):
9702 if level == locking.LEVEL_NODE:
9703 self._LockInstancesNodes()
9704 if self.op.disk_template and self.op.remote_node:
9705 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9706 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9708 def BuildHooksEnv(self):
9711 This runs on the master, primary and secondaries.
9715 if constants.BE_MEMORY in self.be_new:
9716 args['memory'] = self.be_new[constants.BE_MEMORY]
9717 if constants.BE_VCPUS in self.be_new:
9718 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9719 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9720 # information at all.
9723 nic_override = dict(self.op.nics)
9724 for idx, nic in enumerate(self.instance.nics):
9725 if idx in nic_override:
9726 this_nic_override = nic_override[idx]
9728 this_nic_override = {}
9729 if constants.INIC_IP in this_nic_override:
9730 ip = this_nic_override[constants.INIC_IP]
9733 if constants.INIC_MAC in this_nic_override:
9734 mac = this_nic_override[constants.INIC_MAC]
9737 if idx in self.nic_pnew:
9738 nicparams = self.nic_pnew[idx]
9740 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9741 mode = nicparams[constants.NIC_MODE]
9742 link = nicparams[constants.NIC_LINK]
9743 args['nics'].append((ip, mac, mode, link))
9744 if constants.DDM_ADD in nic_override:
9745 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9746 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9747 nicparams = self.nic_pnew[constants.DDM_ADD]
9748 mode = nicparams[constants.NIC_MODE]
9749 link = nicparams[constants.NIC_LINK]
9750 args['nics'].append((ip, mac, mode, link))
9751 elif constants.DDM_REMOVE in nic_override:
9752 del args['nics'][-1]
9754 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9755 if self.op.disk_template:
9756 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9760 def BuildHooksNodes(self):
9761 """Build hooks nodes.
9764 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9767 def CheckPrereq(self):
9768 """Check prerequisites.
9770 This only checks the instance list against the existing names.
9773 # checking the new params on the primary/secondary nodes
9775 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9776 cluster = self.cluster = self.cfg.GetClusterInfo()
9777 assert self.instance is not None, \
9778 "Cannot retrieve locked instance %s" % self.op.instance_name
9779 pnode = instance.primary_node
9780 nodelist = list(instance.all_nodes)
9783 if self.op.os_name and not self.op.force:
9784 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9785 self.op.force_variant)
9786 instance_os = self.op.os_name
9788 instance_os = instance.os
9790 if self.op.disk_template:
9791 if instance.disk_template == self.op.disk_template:
9792 raise errors.OpPrereqError("Instance already has disk template %s" %
9793 instance.disk_template, errors.ECODE_INVAL)
9795 if (instance.disk_template,
9796 self.op.disk_template) not in self._DISK_CONVERSIONS:
9797 raise errors.OpPrereqError("Unsupported disk template conversion from"
9798 " %s to %s" % (instance.disk_template,
9799 self.op.disk_template),
9801 _CheckInstanceDown(self, instance, "cannot change disk template")
9802 if self.op.disk_template in constants.DTS_INT_MIRROR:
9803 if self.op.remote_node == pnode:
9804 raise errors.OpPrereqError("Given new secondary node %s is the same"
9805 " as the primary node of the instance" %
9806 self.op.remote_node, errors.ECODE_STATE)
9807 _CheckNodeOnline(self, self.op.remote_node)
9808 _CheckNodeNotDrained(self, self.op.remote_node)
9809 # FIXME: here we assume that the old instance type is DT_PLAIN
9810 assert instance.disk_template == constants.DT_PLAIN
9811 disks = [{constants.IDISK_SIZE: d.size,
9812 constants.IDISK_VG: d.logical_id[0]}
9813 for d in instance.disks]
9814 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9815 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9817 # hvparams processing
9818 if self.op.hvparams:
9819 hv_type = instance.hypervisor
9820 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9821 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9822 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9825 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9826 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9827 self.hv_new = hv_new # the new actual values
9828 self.hv_inst = i_hvdict # the new dict (without defaults)
9830 self.hv_new = self.hv_inst = {}
9832 # beparams processing
9833 if self.op.beparams:
9834 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9836 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9837 be_new = cluster.SimpleFillBE(i_bedict)
9838 self.be_new = be_new # the new actual values
9839 self.be_inst = i_bedict # the new dict (without defaults)
9841 self.be_new = self.be_inst = {}
9843 # osparams processing
9844 if self.op.osparams:
9845 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9846 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9847 self.os_inst = i_osdict # the new dict (without defaults)
9853 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9854 mem_check_list = [pnode]
9855 if be_new[constants.BE_AUTO_BALANCE]:
9856 # either we changed auto_balance to yes or it was from before
9857 mem_check_list.extend(instance.secondary_nodes)
9858 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9859 instance.hypervisor)
9860 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9861 instance.hypervisor)
9862 pninfo = nodeinfo[pnode]
9863 msg = pninfo.fail_msg
9865 # Assume the primary node is unreachable and go ahead
9866 self.warn.append("Can't get info from primary node %s: %s" %
9868 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9869 self.warn.append("Node data from primary node %s doesn't contain"
9870 " free memory information" % pnode)
9871 elif instance_info.fail_msg:
9872 self.warn.append("Can't get instance runtime information: %s" %
9873 instance_info.fail_msg)
9875 if instance_info.payload:
9876 current_mem = int(instance_info.payload['memory'])
9878 # Assume instance not running
9879 # (there is a slight race condition here, but it's not very probable,
9880 # and we have no other way to check)
9882 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9883 pninfo.payload['memory_free'])
9885 raise errors.OpPrereqError("This change will prevent the instance"
9886 " from starting, due to %d MB of memory"
9887 " missing on its primary node" % miss_mem,
9890 if be_new[constants.BE_AUTO_BALANCE]:
9891 for node, nres in nodeinfo.items():
9892 if node not in instance.secondary_nodes:
9896 self.warn.append("Can't get info from secondary node %s: %s" %
9898 elif not isinstance(nres.payload.get('memory_free', None), int):
9899 self.warn.append("Secondary node %s didn't return free"
9900 " memory information" % node)
9901 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9902 self.warn.append("Not enough memory to failover instance to"
9903 " secondary node %s" % node)
9908 for nic_op, nic_dict in self.op.nics:
9909 if nic_op == constants.DDM_REMOVE:
9910 if not instance.nics:
9911 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9914 if nic_op != constants.DDM_ADD:
9916 if not instance.nics:
9917 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9918 " no NICs" % nic_op,
9920 if nic_op < 0 or nic_op >= len(instance.nics):
9921 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9923 (nic_op, len(instance.nics) - 1),
9925 old_nic_params = instance.nics[nic_op].nicparams
9926 old_nic_ip = instance.nics[nic_op].ip
9931 update_params_dict = dict([(key, nic_dict[key])
9932 for key in constants.NICS_PARAMETERS
9933 if key in nic_dict])
9935 if 'bridge' in nic_dict:
9936 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9938 new_nic_params = _GetUpdatedParams(old_nic_params,
9940 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9941 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9942 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9943 self.nic_pinst[nic_op] = new_nic_params
9944 self.nic_pnew[nic_op] = new_filled_nic_params
9945 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9947 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9948 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9949 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9951 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9953 self.warn.append(msg)
9955 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9956 if new_nic_mode == constants.NIC_MODE_ROUTED:
9957 if constants.INIC_IP in nic_dict:
9958 nic_ip = nic_dict[constants.INIC_IP]
9962 raise errors.OpPrereqError('Cannot set the nic ip to None'
9963 ' on a routed nic', errors.ECODE_INVAL)
9964 if constants.INIC_MAC in nic_dict:
9965 nic_mac = nic_dict[constants.INIC_MAC]
9967 raise errors.OpPrereqError('Cannot set the nic mac to None',
9969 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9970 # otherwise generate the mac
9971 nic_dict[constants.INIC_MAC] = \
9972 self.cfg.GenerateMAC(self.proc.GetECId())
9974 # or validate/reserve the current one
9976 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9977 except errors.ReservationError:
9978 raise errors.OpPrereqError("MAC address %s already in use"
9979 " in cluster" % nic_mac,
9980 errors.ECODE_NOTUNIQUE)
9983 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9984 raise errors.OpPrereqError("Disk operations not supported for"
9985 " diskless instances",
9987 for disk_op, _ in self.op.disks:
9988 if disk_op == constants.DDM_REMOVE:
9989 if len(instance.disks) == 1:
9990 raise errors.OpPrereqError("Cannot remove the last disk of"
9991 " an instance", errors.ECODE_INVAL)
9992 _CheckInstanceDown(self, instance, "cannot remove disks")
9994 if (disk_op == constants.DDM_ADD and
9995 len(instance.disks) >= constants.MAX_DISKS):
9996 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9997 " add more" % constants.MAX_DISKS,
9999 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10001 if disk_op < 0 or disk_op >= len(instance.disks):
10002 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10004 (disk_op, len(instance.disks)),
10005 errors.ECODE_INVAL)
10009 def _ConvertPlainToDrbd(self, feedback_fn):
10010 """Converts an instance from plain to drbd.
10013 feedback_fn("Converting template to drbd")
10014 instance = self.instance
10015 pnode = instance.primary_node
10016 snode = self.op.remote_node
10018 # create a fake disk info for _GenerateDiskTemplate
10019 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10020 for d in instance.disks]
10021 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10022 instance.name, pnode, [snode],
10023 disk_info, None, None, 0, feedback_fn)
10024 info = _GetInstanceInfoText(instance)
10025 feedback_fn("Creating aditional volumes...")
10026 # first, create the missing data and meta devices
10027 for disk in new_disks:
10028 # unfortunately this is... not too nice
10029 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10031 for child in disk.children:
10032 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10033 # at this stage, all new LVs have been created, we can rename the
10035 feedback_fn("Renaming original volumes...")
10036 rename_list = [(o, n.children[0].logical_id)
10037 for (o, n) in zip(instance.disks, new_disks)]
10038 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10039 result.Raise("Failed to rename original LVs")
10041 feedback_fn("Initializing DRBD devices...")
10042 # all child devices are in place, we can now create the DRBD devices
10043 for disk in new_disks:
10044 for node in [pnode, snode]:
10045 f_create = node == pnode
10046 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10048 # at this point, the instance has been modified
10049 instance.disk_template = constants.DT_DRBD8
10050 instance.disks = new_disks
10051 self.cfg.Update(instance, feedback_fn)
10053 # disks are created, waiting for sync
10054 disk_abort = not _WaitForSync(self, instance)
10056 raise errors.OpExecError("There are some degraded disks for"
10057 " this instance, please cleanup manually")
10059 def _ConvertDrbdToPlain(self, feedback_fn):
10060 """Converts an instance from drbd to plain.
10063 instance = self.instance
10064 assert len(instance.secondary_nodes) == 1
10065 pnode = instance.primary_node
10066 snode = instance.secondary_nodes[0]
10067 feedback_fn("Converting template to plain")
10069 old_disks = instance.disks
10070 new_disks = [d.children[0] for d in old_disks]
10072 # copy over size and mode
10073 for parent, child in zip(old_disks, new_disks):
10074 child.size = parent.size
10075 child.mode = parent.mode
10077 # update instance structure
10078 instance.disks = new_disks
10079 instance.disk_template = constants.DT_PLAIN
10080 self.cfg.Update(instance, feedback_fn)
10082 feedback_fn("Removing volumes on the secondary node...")
10083 for disk in old_disks:
10084 self.cfg.SetDiskID(disk, snode)
10085 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10087 self.LogWarning("Could not remove block device %s on node %s,"
10088 " continuing anyway: %s", disk.iv_name, snode, msg)
10090 feedback_fn("Removing unneeded volumes on the primary node...")
10091 for idx, disk in enumerate(old_disks):
10092 meta = disk.children[1]
10093 self.cfg.SetDiskID(meta, pnode)
10094 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10096 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10097 " continuing anyway: %s", idx, pnode, msg)
10099 def Exec(self, feedback_fn):
10100 """Modifies an instance.
10102 All parameters take effect only at the next restart of the instance.
10105 # Process here the warnings from CheckPrereq, as we don't have a
10106 # feedback_fn there.
10107 for warn in self.warn:
10108 feedback_fn("WARNING: %s" % warn)
10111 instance = self.instance
10113 for disk_op, disk_dict in self.op.disks:
10114 if disk_op == constants.DDM_REMOVE:
10115 # remove the last disk
10116 device = instance.disks.pop()
10117 device_idx = len(instance.disks)
10118 for node, disk in device.ComputeNodeTree(instance.primary_node):
10119 self.cfg.SetDiskID(disk, node)
10120 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10122 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10123 " continuing anyway", device_idx, node, msg)
10124 result.append(("disk/%d" % device_idx, "remove"))
10125 elif disk_op == constants.DDM_ADD:
10127 if instance.disk_template in (constants.DT_FILE,
10128 constants.DT_SHARED_FILE):
10129 file_driver, file_path = instance.disks[0].logical_id
10130 file_path = os.path.dirname(file_path)
10132 file_driver = file_path = None
10133 disk_idx_base = len(instance.disks)
10134 new_disk = _GenerateDiskTemplate(self,
10135 instance.disk_template,
10136 instance.name, instance.primary_node,
10137 instance.secondary_nodes,
10141 disk_idx_base, feedback_fn)[0]
10142 instance.disks.append(new_disk)
10143 info = _GetInstanceInfoText(instance)
10145 logging.info("Creating volume %s for instance %s",
10146 new_disk.iv_name, instance.name)
10147 # Note: this needs to be kept in sync with _CreateDisks
10149 for node in instance.all_nodes:
10150 f_create = node == instance.primary_node
10152 _CreateBlockDev(self, node, instance, new_disk,
10153 f_create, info, f_create)
10154 except errors.OpExecError, err:
10155 self.LogWarning("Failed to create volume %s (%s) on"
10157 new_disk.iv_name, new_disk, node, err)
10158 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10159 (new_disk.size, new_disk.mode)))
10161 # change a given disk
10162 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10163 result.append(("disk.mode/%d" % disk_op,
10164 disk_dict[constants.IDISK_MODE]))
10166 if self.op.disk_template:
10167 r_shut = _ShutdownInstanceDisks(self, instance)
10169 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10170 " proceed with disk template conversion")
10171 mode = (instance.disk_template, self.op.disk_template)
10173 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10175 self.cfg.ReleaseDRBDMinors(instance.name)
10177 result.append(("disk_template", self.op.disk_template))
10180 for nic_op, nic_dict in self.op.nics:
10181 if nic_op == constants.DDM_REMOVE:
10182 # remove the last nic
10183 del instance.nics[-1]
10184 result.append(("nic.%d" % len(instance.nics), "remove"))
10185 elif nic_op == constants.DDM_ADD:
10186 # mac and bridge should be set, by now
10187 mac = nic_dict[constants.INIC_MAC]
10188 ip = nic_dict.get(constants.INIC_IP, None)
10189 nicparams = self.nic_pinst[constants.DDM_ADD]
10190 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10191 instance.nics.append(new_nic)
10192 result.append(("nic.%d" % (len(instance.nics) - 1),
10193 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10194 (new_nic.mac, new_nic.ip,
10195 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10196 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10199 for key in (constants.INIC_MAC, constants.INIC_IP):
10200 if key in nic_dict:
10201 setattr(instance.nics[nic_op], key, nic_dict[key])
10202 if nic_op in self.nic_pinst:
10203 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10204 for key, val in nic_dict.iteritems():
10205 result.append(("nic.%s/%d" % (key, nic_op), val))
10208 if self.op.hvparams:
10209 instance.hvparams = self.hv_inst
10210 for key, val in self.op.hvparams.iteritems():
10211 result.append(("hv/%s" % key, val))
10214 if self.op.beparams:
10215 instance.beparams = self.be_inst
10216 for key, val in self.op.beparams.iteritems():
10217 result.append(("be/%s" % key, val))
10220 if self.op.os_name:
10221 instance.os = self.op.os_name
10224 if self.op.osparams:
10225 instance.osparams = self.os_inst
10226 for key, val in self.op.osparams.iteritems():
10227 result.append(("os/%s" % key, val))
10229 self.cfg.Update(instance, feedback_fn)
10233 _DISK_CONVERSIONS = {
10234 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10235 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10239 class LUBackupQuery(NoHooksLU):
10240 """Query the exports list
10245 def ExpandNames(self):
10246 self.needed_locks = {}
10247 self.share_locks[locking.LEVEL_NODE] = 1
10248 if not self.op.nodes:
10249 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10251 self.needed_locks[locking.LEVEL_NODE] = \
10252 _GetWantedNodes(self, self.op.nodes)
10254 def Exec(self, feedback_fn):
10255 """Compute the list of all the exported system images.
10258 @return: a dictionary with the structure node->(export-list)
10259 where export-list is a list of the instances exported on
10263 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10264 rpcresult = self.rpc.call_export_list(self.nodes)
10266 for node in rpcresult:
10267 if rpcresult[node].fail_msg:
10268 result[node] = False
10270 result[node] = rpcresult[node].payload
10275 class LUBackupPrepare(NoHooksLU):
10276 """Prepares an instance for an export and returns useful information.
10281 def ExpandNames(self):
10282 self._ExpandAndLockInstance()
10284 def CheckPrereq(self):
10285 """Check prerequisites.
10288 instance_name = self.op.instance_name
10290 self.instance = self.cfg.GetInstanceInfo(instance_name)
10291 assert self.instance is not None, \
10292 "Cannot retrieve locked instance %s" % self.op.instance_name
10293 _CheckNodeOnline(self, self.instance.primary_node)
10295 self._cds = _GetClusterDomainSecret()
10297 def Exec(self, feedback_fn):
10298 """Prepares an instance for an export.
10301 instance = self.instance
10303 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10304 salt = utils.GenerateSecret(8)
10306 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10307 result = self.rpc.call_x509_cert_create(instance.primary_node,
10308 constants.RIE_CERT_VALIDITY)
10309 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10311 (name, cert_pem) = result.payload
10313 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10317 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10318 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10320 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10326 class LUBackupExport(LogicalUnit):
10327 """Export an instance to an image in the cluster.
10330 HPATH = "instance-export"
10331 HTYPE = constants.HTYPE_INSTANCE
10334 def CheckArguments(self):
10335 """Check the arguments.
10338 self.x509_key_name = self.op.x509_key_name
10339 self.dest_x509_ca_pem = self.op.destination_x509_ca
10341 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10342 if not self.x509_key_name:
10343 raise errors.OpPrereqError("Missing X509 key name for encryption",
10344 errors.ECODE_INVAL)
10346 if not self.dest_x509_ca_pem:
10347 raise errors.OpPrereqError("Missing destination X509 CA",
10348 errors.ECODE_INVAL)
10350 def ExpandNames(self):
10351 self._ExpandAndLockInstance()
10353 # Lock all nodes for local exports
10354 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10355 # FIXME: lock only instance primary and destination node
10357 # Sad but true, for now we have do lock all nodes, as we don't know where
10358 # the previous export might be, and in this LU we search for it and
10359 # remove it from its current node. In the future we could fix this by:
10360 # - making a tasklet to search (share-lock all), then create the
10361 # new one, then one to remove, after
10362 # - removing the removal operation altogether
10363 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10365 def DeclareLocks(self, level):
10366 """Last minute lock declaration."""
10367 # All nodes are locked anyway, so nothing to do here.
10369 def BuildHooksEnv(self):
10370 """Build hooks env.
10372 This will run on the master, primary node and target node.
10376 "EXPORT_MODE": self.op.mode,
10377 "EXPORT_NODE": self.op.target_node,
10378 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10379 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10380 # TODO: Generic function for boolean env variables
10381 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10384 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10388 def BuildHooksNodes(self):
10389 """Build hooks nodes.
10392 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10394 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10395 nl.append(self.op.target_node)
10399 def CheckPrereq(self):
10400 """Check prerequisites.
10402 This checks that the instance and node names are valid.
10405 instance_name = self.op.instance_name
10407 self.instance = self.cfg.GetInstanceInfo(instance_name)
10408 assert self.instance is not None, \
10409 "Cannot retrieve locked instance %s" % self.op.instance_name
10410 _CheckNodeOnline(self, self.instance.primary_node)
10412 if (self.op.remove_instance and self.instance.admin_up and
10413 not self.op.shutdown):
10414 raise errors.OpPrereqError("Can not remove instance without shutting it"
10417 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10418 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10419 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10420 assert self.dst_node is not None
10422 _CheckNodeOnline(self, self.dst_node.name)
10423 _CheckNodeNotDrained(self, self.dst_node.name)
10426 self.dest_disk_info = None
10427 self.dest_x509_ca = None
10429 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10430 self.dst_node = None
10432 if len(self.op.target_node) != len(self.instance.disks):
10433 raise errors.OpPrereqError(("Received destination information for %s"
10434 " disks, but instance %s has %s disks") %
10435 (len(self.op.target_node), instance_name,
10436 len(self.instance.disks)),
10437 errors.ECODE_INVAL)
10439 cds = _GetClusterDomainSecret()
10441 # Check X509 key name
10443 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10444 except (TypeError, ValueError), err:
10445 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10447 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10448 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10449 errors.ECODE_INVAL)
10451 # Load and verify CA
10453 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10454 except OpenSSL.crypto.Error, err:
10455 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10456 (err, ), errors.ECODE_INVAL)
10458 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10459 if errcode is not None:
10460 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10461 (msg, ), errors.ECODE_INVAL)
10463 self.dest_x509_ca = cert
10465 # Verify target information
10467 for idx, disk_data in enumerate(self.op.target_node):
10469 (host, port, magic) = \
10470 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10471 except errors.GenericError, err:
10472 raise errors.OpPrereqError("Target info for disk %s: %s" %
10473 (idx, err), errors.ECODE_INVAL)
10475 disk_info.append((host, port, magic))
10477 assert len(disk_info) == len(self.op.target_node)
10478 self.dest_disk_info = disk_info
10481 raise errors.ProgrammerError("Unhandled export mode %r" %
10484 # instance disk type verification
10485 # TODO: Implement export support for file-based disks
10486 for disk in self.instance.disks:
10487 if disk.dev_type == constants.LD_FILE:
10488 raise errors.OpPrereqError("Export not supported for instances with"
10489 " file-based disks", errors.ECODE_INVAL)
10491 def _CleanupExports(self, feedback_fn):
10492 """Removes exports of current instance from all other nodes.
10494 If an instance in a cluster with nodes A..D was exported to node C, its
10495 exports will be removed from the nodes A, B and D.
10498 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10500 nodelist = self.cfg.GetNodeList()
10501 nodelist.remove(self.dst_node.name)
10503 # on one-node clusters nodelist will be empty after the removal
10504 # if we proceed the backup would be removed because OpBackupQuery
10505 # substitutes an empty list with the full cluster node list.
10506 iname = self.instance.name
10508 feedback_fn("Removing old exports for instance %s" % iname)
10509 exportlist = self.rpc.call_export_list(nodelist)
10510 for node in exportlist:
10511 if exportlist[node].fail_msg:
10513 if iname in exportlist[node].payload:
10514 msg = self.rpc.call_export_remove(node, iname).fail_msg
10516 self.LogWarning("Could not remove older export for instance %s"
10517 " on node %s: %s", iname, node, msg)
10519 def Exec(self, feedback_fn):
10520 """Export an instance to an image in the cluster.
10523 assert self.op.mode in constants.EXPORT_MODES
10525 instance = self.instance
10526 src_node = instance.primary_node
10528 if self.op.shutdown:
10529 # shutdown the instance, but not the disks
10530 feedback_fn("Shutting down instance %s" % instance.name)
10531 result = self.rpc.call_instance_shutdown(src_node, instance,
10532 self.op.shutdown_timeout)
10533 # TODO: Maybe ignore failures if ignore_remove_failures is set
10534 result.Raise("Could not shutdown instance %s on"
10535 " node %s" % (instance.name, src_node))
10537 # set the disks ID correctly since call_instance_start needs the
10538 # correct drbd minor to create the symlinks
10539 for disk in instance.disks:
10540 self.cfg.SetDiskID(disk, src_node)
10542 activate_disks = (not instance.admin_up)
10545 # Activate the instance disks if we'exporting a stopped instance
10546 feedback_fn("Activating disks for %s" % instance.name)
10547 _StartInstanceDisks(self, instance, None)
10550 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10553 helper.CreateSnapshots()
10555 if (self.op.shutdown and instance.admin_up and
10556 not self.op.remove_instance):
10557 assert not activate_disks
10558 feedback_fn("Starting instance %s" % instance.name)
10559 result = self.rpc.call_instance_start(src_node, instance, None, None)
10560 msg = result.fail_msg
10562 feedback_fn("Failed to start instance: %s" % msg)
10563 _ShutdownInstanceDisks(self, instance)
10564 raise errors.OpExecError("Could not start instance: %s" % msg)
10566 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10567 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10568 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10569 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10570 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10572 (key_name, _, _) = self.x509_key_name
10575 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10578 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10579 key_name, dest_ca_pem,
10584 # Check for backwards compatibility
10585 assert len(dresults) == len(instance.disks)
10586 assert compat.all(isinstance(i, bool) for i in dresults), \
10587 "Not all results are boolean: %r" % dresults
10591 feedback_fn("Deactivating disks for %s" % instance.name)
10592 _ShutdownInstanceDisks(self, instance)
10594 if not (compat.all(dresults) and fin_resu):
10597 failures.append("export finalization")
10598 if not compat.all(dresults):
10599 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10601 failures.append("disk export: disk(s) %s" % fdsk)
10603 raise errors.OpExecError("Export failed, errors in %s" %
10604 utils.CommaJoin(failures))
10606 # At this point, the export was successful, we can cleanup/finish
10608 # Remove instance if requested
10609 if self.op.remove_instance:
10610 feedback_fn("Removing instance %s" % instance.name)
10611 _RemoveInstance(self, feedback_fn, instance,
10612 self.op.ignore_remove_failures)
10614 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10615 self._CleanupExports(feedback_fn)
10617 return fin_resu, dresults
10620 class LUBackupRemove(NoHooksLU):
10621 """Remove exports related to the named instance.
10626 def ExpandNames(self):
10627 self.needed_locks = {}
10628 # We need all nodes to be locked in order for RemoveExport to work, but we
10629 # don't need to lock the instance itself, as nothing will happen to it (and
10630 # we can remove exports also for a removed instance)
10631 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10633 def Exec(self, feedback_fn):
10634 """Remove any export.
10637 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10638 # If the instance was not found we'll try with the name that was passed in.
10639 # This will only work if it was an FQDN, though.
10641 if not instance_name:
10643 instance_name = self.op.instance_name
10645 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10646 exportlist = self.rpc.call_export_list(locked_nodes)
10648 for node in exportlist:
10649 msg = exportlist[node].fail_msg
10651 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10653 if instance_name in exportlist[node].payload:
10655 result = self.rpc.call_export_remove(node, instance_name)
10656 msg = result.fail_msg
10658 logging.error("Could not remove export for instance %s"
10659 " on node %s: %s", instance_name, node, msg)
10661 if fqdn_warn and not found:
10662 feedback_fn("Export not found. If trying to remove an export belonging"
10663 " to a deleted instance please use its Fully Qualified"
10667 class LUGroupAdd(LogicalUnit):
10668 """Logical unit for creating node groups.
10671 HPATH = "group-add"
10672 HTYPE = constants.HTYPE_GROUP
10675 def ExpandNames(self):
10676 # We need the new group's UUID here so that we can create and acquire the
10677 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10678 # that it should not check whether the UUID exists in the configuration.
10679 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10680 self.needed_locks = {}
10681 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10683 def CheckPrereq(self):
10684 """Check prerequisites.
10686 This checks that the given group name is not an existing node group
10691 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10692 except errors.OpPrereqError:
10695 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10696 " node group (UUID: %s)" %
10697 (self.op.group_name, existing_uuid),
10698 errors.ECODE_EXISTS)
10700 if self.op.ndparams:
10701 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10703 def BuildHooksEnv(self):
10704 """Build hooks env.
10708 "GROUP_NAME": self.op.group_name,
10711 def BuildHooksNodes(self):
10712 """Build hooks nodes.
10715 mn = self.cfg.GetMasterNode()
10716 return ([mn], [mn])
10718 def Exec(self, feedback_fn):
10719 """Add the node group to the cluster.
10722 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10723 uuid=self.group_uuid,
10724 alloc_policy=self.op.alloc_policy,
10725 ndparams=self.op.ndparams)
10727 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10728 del self.remove_locks[locking.LEVEL_NODEGROUP]
10731 class LUGroupAssignNodes(NoHooksLU):
10732 """Logical unit for assigning nodes to groups.
10737 def ExpandNames(self):
10738 # These raise errors.OpPrereqError on their own:
10739 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10740 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10742 # We want to lock all the affected nodes and groups. We have readily
10743 # available the list of nodes, and the *destination* group. To gather the
10744 # list of "source" groups, we need to fetch node information.
10745 self.node_data = self.cfg.GetAllNodesInfo()
10746 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10747 affected_groups.add(self.group_uuid)
10749 self.needed_locks = {
10750 locking.LEVEL_NODEGROUP: list(affected_groups),
10751 locking.LEVEL_NODE: self.op.nodes,
10754 def CheckPrereq(self):
10755 """Check prerequisites.
10758 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10759 instance_data = self.cfg.GetAllInstancesInfo()
10761 if self.group is None:
10762 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10763 (self.op.group_name, self.group_uuid))
10765 (new_splits, previous_splits) = \
10766 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10767 for node in self.op.nodes],
10768 self.node_data, instance_data)
10771 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10773 if not self.op.force:
10774 raise errors.OpExecError("The following instances get split by this"
10775 " change and --force was not given: %s" %
10778 self.LogWarning("This operation will split the following instances: %s",
10781 if previous_splits:
10782 self.LogWarning("In addition, these already-split instances continue"
10783 " to be spit across groups: %s",
10784 utils.CommaJoin(utils.NiceSort(previous_splits)))
10786 def Exec(self, feedback_fn):
10787 """Assign nodes to a new group.
10790 for node in self.op.nodes:
10791 self.node_data[node].group = self.group_uuid
10793 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10796 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10797 """Check for split instances after a node assignment.
10799 This method considers a series of node assignments as an atomic operation,
10800 and returns information about split instances after applying the set of
10803 In particular, it returns information about newly split instances, and
10804 instances that were already split, and remain so after the change.
10806 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10809 @type changes: list of (node_name, new_group_uuid) pairs.
10810 @param changes: list of node assignments to consider.
10811 @param node_data: a dict with data for all nodes
10812 @param instance_data: a dict with all instances to consider
10813 @rtype: a two-tuple
10814 @return: a list of instances that were previously okay and result split as a
10815 consequence of this change, and a list of instances that were previously
10816 split and this change does not fix.
10819 changed_nodes = dict((node, group) for node, group in changes
10820 if node_data[node].group != group)
10822 all_split_instances = set()
10823 previously_split_instances = set()
10825 def InstanceNodes(instance):
10826 return [instance.primary_node] + list(instance.secondary_nodes)
10828 for inst in instance_data.values():
10829 if inst.disk_template not in constants.DTS_INT_MIRROR:
10832 instance_nodes = InstanceNodes(inst)
10834 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10835 previously_split_instances.add(inst.name)
10837 if len(set(changed_nodes.get(node, node_data[node].group)
10838 for node in instance_nodes)) > 1:
10839 all_split_instances.add(inst.name)
10841 return (list(all_split_instances - previously_split_instances),
10842 list(previously_split_instances & all_split_instances))
10845 class _GroupQuery(_QueryBase):
10846 FIELDS = query.GROUP_FIELDS
10848 def ExpandNames(self, lu):
10849 lu.needed_locks = {}
10851 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10852 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10855 self.wanted = [name_to_uuid[name]
10856 for name in utils.NiceSort(name_to_uuid.keys())]
10858 # Accept names to be either names or UUIDs.
10861 all_uuid = frozenset(self._all_groups.keys())
10863 for name in self.names:
10864 if name in all_uuid:
10865 self.wanted.append(name)
10866 elif name in name_to_uuid:
10867 self.wanted.append(name_to_uuid[name])
10869 missing.append(name)
10872 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10873 errors.ECODE_NOENT)
10875 def DeclareLocks(self, lu, level):
10878 def _GetQueryData(self, lu):
10879 """Computes the list of node groups and their attributes.
10882 do_nodes = query.GQ_NODE in self.requested_data
10883 do_instances = query.GQ_INST in self.requested_data
10885 group_to_nodes = None
10886 group_to_instances = None
10888 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10889 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10890 # latter GetAllInstancesInfo() is not enough, for we have to go through
10891 # instance->node. Hence, we will need to process nodes even if we only need
10892 # instance information.
10893 if do_nodes or do_instances:
10894 all_nodes = lu.cfg.GetAllNodesInfo()
10895 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10898 for node in all_nodes.values():
10899 if node.group in group_to_nodes:
10900 group_to_nodes[node.group].append(node.name)
10901 node_to_group[node.name] = node.group
10904 all_instances = lu.cfg.GetAllInstancesInfo()
10905 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10907 for instance in all_instances.values():
10908 node = instance.primary_node
10909 if node in node_to_group:
10910 group_to_instances[node_to_group[node]].append(instance.name)
10913 # Do not pass on node information if it was not requested.
10914 group_to_nodes = None
10916 return query.GroupQueryData([self._all_groups[uuid]
10917 for uuid in self.wanted],
10918 group_to_nodes, group_to_instances)
10921 class LUGroupQuery(NoHooksLU):
10922 """Logical unit for querying node groups.
10927 def CheckArguments(self):
10928 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10929 self.op.output_fields, False)
10931 def ExpandNames(self):
10932 self.gq.ExpandNames(self)
10934 def Exec(self, feedback_fn):
10935 return self.gq.OldStyleQuery(self)
10938 class LUGroupSetParams(LogicalUnit):
10939 """Modifies the parameters of a node group.
10942 HPATH = "group-modify"
10943 HTYPE = constants.HTYPE_GROUP
10946 def CheckArguments(self):
10949 self.op.alloc_policy,
10952 if all_changes.count(None) == len(all_changes):
10953 raise errors.OpPrereqError("Please pass at least one modification",
10954 errors.ECODE_INVAL)
10956 def ExpandNames(self):
10957 # This raises errors.OpPrereqError on its own:
10958 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10960 self.needed_locks = {
10961 locking.LEVEL_NODEGROUP: [self.group_uuid],
10964 def CheckPrereq(self):
10965 """Check prerequisites.
10968 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10970 if self.group is None:
10971 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10972 (self.op.group_name, self.group_uuid))
10974 if self.op.ndparams:
10975 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10976 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10977 self.new_ndparams = new_ndparams
10979 def BuildHooksEnv(self):
10980 """Build hooks env.
10984 "GROUP_NAME": self.op.group_name,
10985 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10988 def BuildHooksNodes(self):
10989 """Build hooks nodes.
10992 mn = self.cfg.GetMasterNode()
10993 return ([mn], [mn])
10995 def Exec(self, feedback_fn):
10996 """Modifies the node group.
11001 if self.op.ndparams:
11002 self.group.ndparams = self.new_ndparams
11003 result.append(("ndparams", str(self.group.ndparams)))
11005 if self.op.alloc_policy:
11006 self.group.alloc_policy = self.op.alloc_policy
11008 self.cfg.Update(self.group, feedback_fn)
11013 class LUGroupRemove(LogicalUnit):
11014 HPATH = "group-remove"
11015 HTYPE = constants.HTYPE_GROUP
11018 def ExpandNames(self):
11019 # This will raises errors.OpPrereqError on its own:
11020 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11021 self.needed_locks = {
11022 locking.LEVEL_NODEGROUP: [self.group_uuid],
11025 def CheckPrereq(self):
11026 """Check prerequisites.
11028 This checks that the given group name exists as a node group, that is
11029 empty (i.e., contains no nodes), and that is not the last group of the
11033 # Verify that the group is empty.
11034 group_nodes = [node.name
11035 for node in self.cfg.GetAllNodesInfo().values()
11036 if node.group == self.group_uuid]
11039 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11041 (self.op.group_name,
11042 utils.CommaJoin(utils.NiceSort(group_nodes))),
11043 errors.ECODE_STATE)
11045 # Verify the cluster would not be left group-less.
11046 if len(self.cfg.GetNodeGroupList()) == 1:
11047 raise errors.OpPrereqError("Group '%s' is the only group,"
11048 " cannot be removed" %
11049 self.op.group_name,
11050 errors.ECODE_STATE)
11052 def BuildHooksEnv(self):
11053 """Build hooks env.
11057 "GROUP_NAME": self.op.group_name,
11060 def BuildHooksNodes(self):
11061 """Build hooks nodes.
11064 mn = self.cfg.GetMasterNode()
11065 return ([mn], [mn])
11067 def Exec(self, feedback_fn):
11068 """Remove the node group.
11072 self.cfg.RemoveNodeGroup(self.group_uuid)
11073 except errors.ConfigurationError:
11074 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11075 (self.op.group_name, self.group_uuid))
11077 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11080 class LUGroupRename(LogicalUnit):
11081 HPATH = "group-rename"
11082 HTYPE = constants.HTYPE_GROUP
11085 def ExpandNames(self):
11086 # This raises errors.OpPrereqError on its own:
11087 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11089 self.needed_locks = {
11090 locking.LEVEL_NODEGROUP: [self.group_uuid],
11093 def CheckPrereq(self):
11094 """Check prerequisites.
11096 Ensures requested new name is not yet used.
11100 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11101 except errors.OpPrereqError:
11104 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11105 " node group (UUID: %s)" %
11106 (self.op.new_name, new_name_uuid),
11107 errors.ECODE_EXISTS)
11109 def BuildHooksEnv(self):
11110 """Build hooks env.
11114 "OLD_NAME": self.op.group_name,
11115 "NEW_NAME": self.op.new_name,
11118 def BuildHooksNodes(self):
11119 """Build hooks nodes.
11122 mn = self.cfg.GetMasterNode()
11124 all_nodes = self.cfg.GetAllNodesInfo()
11125 all_nodes.pop(mn, None)
11128 run_nodes.extend(node.name for node in all_nodes.values()
11129 if node.group == self.group_uuid)
11131 return (run_nodes, run_nodes)
11133 def Exec(self, feedback_fn):
11134 """Rename the node group.
11137 group = self.cfg.GetNodeGroup(self.group_uuid)
11140 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11141 (self.op.group_name, self.group_uuid))
11143 group.name = self.op.new_name
11144 self.cfg.Update(group, feedback_fn)
11146 return self.op.new_name
11149 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11150 """Generic tags LU.
11152 This is an abstract class which is the parent of all the other tags LUs.
11156 def ExpandNames(self):
11157 self.needed_locks = {}
11158 if self.op.kind == constants.TAG_NODE:
11159 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11160 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11161 elif self.op.kind == constants.TAG_INSTANCE:
11162 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11163 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11165 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11166 # not possible to acquire the BGL based on opcode parameters)
11168 def CheckPrereq(self):
11169 """Check prerequisites.
11172 if self.op.kind == constants.TAG_CLUSTER:
11173 self.target = self.cfg.GetClusterInfo()
11174 elif self.op.kind == constants.TAG_NODE:
11175 self.target = self.cfg.GetNodeInfo(self.op.name)
11176 elif self.op.kind == constants.TAG_INSTANCE:
11177 self.target = self.cfg.GetInstanceInfo(self.op.name)
11179 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11180 str(self.op.kind), errors.ECODE_INVAL)
11183 class LUTagsGet(TagsLU):
11184 """Returns the tags of a given object.
11189 def ExpandNames(self):
11190 TagsLU.ExpandNames(self)
11192 # Share locks as this is only a read operation
11193 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11195 def Exec(self, feedback_fn):
11196 """Returns the tag list.
11199 return list(self.target.GetTags())
11202 class LUTagsSearch(NoHooksLU):
11203 """Searches the tags for a given pattern.
11208 def ExpandNames(self):
11209 self.needed_locks = {}
11211 def CheckPrereq(self):
11212 """Check prerequisites.
11214 This checks the pattern passed for validity by compiling it.
11218 self.re = re.compile(self.op.pattern)
11219 except re.error, err:
11220 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11221 (self.op.pattern, err), errors.ECODE_INVAL)
11223 def Exec(self, feedback_fn):
11224 """Returns the tag list.
11228 tgts = [("/cluster", cfg.GetClusterInfo())]
11229 ilist = cfg.GetAllInstancesInfo().values()
11230 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11231 nlist = cfg.GetAllNodesInfo().values()
11232 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11234 for path, target in tgts:
11235 for tag in target.GetTags():
11236 if self.re.search(tag):
11237 results.append((path, tag))
11241 class LUTagsSet(TagsLU):
11242 """Sets a tag on a given object.
11247 def CheckPrereq(self):
11248 """Check prerequisites.
11250 This checks the type and length of the tag name and value.
11253 TagsLU.CheckPrereq(self)
11254 for tag in self.op.tags:
11255 objects.TaggableObject.ValidateTag(tag)
11257 def Exec(self, feedback_fn):
11262 for tag in self.op.tags:
11263 self.target.AddTag(tag)
11264 except errors.TagError, err:
11265 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11266 self.cfg.Update(self.target, feedback_fn)
11269 class LUTagsDel(TagsLU):
11270 """Delete a list of tags from a given object.
11275 def CheckPrereq(self):
11276 """Check prerequisites.
11278 This checks that we have the given tag.
11281 TagsLU.CheckPrereq(self)
11282 for tag in self.op.tags:
11283 objects.TaggableObject.ValidateTag(tag)
11284 del_tags = frozenset(self.op.tags)
11285 cur_tags = self.target.GetTags()
11287 diff_tags = del_tags - cur_tags
11289 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11290 raise errors.OpPrereqError("Tag(s) %s not found" %
11291 (utils.CommaJoin(diff_names), ),
11292 errors.ECODE_NOENT)
11294 def Exec(self, feedback_fn):
11295 """Remove the tag from the object.
11298 for tag in self.op.tags:
11299 self.target.RemoveTag(tag)
11300 self.cfg.Update(self.target, feedback_fn)
11303 class LUTestDelay(NoHooksLU):
11304 """Sleep for a specified amount of time.
11306 This LU sleeps on the master and/or nodes for a specified amount of
11312 def ExpandNames(self):
11313 """Expand names and set required locks.
11315 This expands the node list, if any.
11318 self.needed_locks = {}
11319 if self.op.on_nodes:
11320 # _GetWantedNodes can be used here, but is not always appropriate to use
11321 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11322 # more information.
11323 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11324 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11326 def _TestDelay(self):
11327 """Do the actual sleep.
11330 if self.op.on_master:
11331 if not utils.TestDelay(self.op.duration):
11332 raise errors.OpExecError("Error during master delay test")
11333 if self.op.on_nodes:
11334 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11335 for node, node_result in result.items():
11336 node_result.Raise("Failure during rpc call to node %s" % node)
11338 def Exec(self, feedback_fn):
11339 """Execute the test delay opcode, with the wanted repetitions.
11342 if self.op.repeat == 0:
11345 top_value = self.op.repeat - 1
11346 for i in range(self.op.repeat):
11347 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11351 class LUTestJqueue(NoHooksLU):
11352 """Utility LU to test some aspects of the job queue.
11357 # Must be lower than default timeout for WaitForJobChange to see whether it
11358 # notices changed jobs
11359 _CLIENT_CONNECT_TIMEOUT = 20.0
11360 _CLIENT_CONFIRM_TIMEOUT = 60.0
11363 def _NotifyUsingSocket(cls, cb, errcls):
11364 """Opens a Unix socket and waits for another program to connect.
11367 @param cb: Callback to send socket name to client
11368 @type errcls: class
11369 @param errcls: Exception class to use for errors
11372 # Using a temporary directory as there's no easy way to create temporary
11373 # sockets without writing a custom loop around tempfile.mktemp and
11375 tmpdir = tempfile.mkdtemp()
11377 tmpsock = utils.PathJoin(tmpdir, "sock")
11379 logging.debug("Creating temporary socket at %s", tmpsock)
11380 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11385 # Send details to client
11388 # Wait for client to connect before continuing
11389 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11391 (conn, _) = sock.accept()
11392 except socket.error, err:
11393 raise errcls("Client didn't connect in time (%s)" % err)
11397 # Remove as soon as client is connected
11398 shutil.rmtree(tmpdir)
11400 # Wait for client to close
11403 # pylint: disable-msg=E1101
11404 # Instance of '_socketobject' has no ... member
11405 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11407 except socket.error, err:
11408 raise errcls("Client failed to confirm notification (%s)" % err)
11412 def _SendNotification(self, test, arg, sockname):
11413 """Sends a notification to the client.
11416 @param test: Test name
11417 @param arg: Test argument (depends on test)
11418 @type sockname: string
11419 @param sockname: Socket path
11422 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11424 def _Notify(self, prereq, test, arg):
11425 """Notifies the client of a test.
11428 @param prereq: Whether this is a prereq-phase test
11430 @param test: Test name
11431 @param arg: Test argument (depends on test)
11435 errcls = errors.OpPrereqError
11437 errcls = errors.OpExecError
11439 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11443 def CheckArguments(self):
11444 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11445 self.expandnames_calls = 0
11447 def ExpandNames(self):
11448 checkargs_calls = getattr(self, "checkargs_calls", 0)
11449 if checkargs_calls < 1:
11450 raise errors.ProgrammerError("CheckArguments was not called")
11452 self.expandnames_calls += 1
11454 if self.op.notify_waitlock:
11455 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11457 self.LogInfo("Expanding names")
11459 # Get lock on master node (just to get a lock, not for a particular reason)
11460 self.needed_locks = {
11461 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11464 def Exec(self, feedback_fn):
11465 if self.expandnames_calls < 1:
11466 raise errors.ProgrammerError("ExpandNames was not called")
11468 if self.op.notify_exec:
11469 self._Notify(False, constants.JQT_EXEC, None)
11471 self.LogInfo("Executing")
11473 if self.op.log_messages:
11474 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11475 for idx, msg in enumerate(self.op.log_messages):
11476 self.LogInfo("Sending log message %s", idx + 1)
11477 feedback_fn(constants.JQT_MSGPREFIX + msg)
11478 # Report how many test messages have been sent
11479 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11482 raise errors.OpExecError("Opcode failure was requested")
11487 class IAllocator(object):
11488 """IAllocator framework.
11490 An IAllocator instance has three sets of attributes:
11491 - cfg that is needed to query the cluster
11492 - input data (all members of the _KEYS class attribute are required)
11493 - four buffer attributes (in|out_data|text), that represent the
11494 input (to the external script) in text and data structure format,
11495 and the output from it, again in two formats
11496 - the result variables from the script (success, info, nodes) for
11500 # pylint: disable-msg=R0902
11501 # lots of instance attributes
11503 "name", "mem_size", "disks", "disk_template",
11504 "os", "tags", "nics", "vcpus", "hypervisor",
11507 "name", "relocate_from",
11513 def __init__(self, cfg, rpc, mode, **kwargs):
11516 # init buffer variables
11517 self.in_text = self.out_text = self.in_data = self.out_data = None
11518 # init all input fields so that pylint is happy
11520 self.mem_size = self.disks = self.disk_template = None
11521 self.os = self.tags = self.nics = self.vcpus = None
11522 self.hypervisor = None
11523 self.relocate_from = None
11525 self.evac_nodes = None
11527 self.required_nodes = None
11528 # init result fields
11529 self.success = self.info = self.result = None
11530 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11531 keyset = self._ALLO_KEYS
11532 fn = self._AddNewInstance
11533 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11534 keyset = self._RELO_KEYS
11535 fn = self._AddRelocateInstance
11536 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11537 keyset = self._EVAC_KEYS
11538 fn = self._AddEvacuateNodes
11540 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11541 " IAllocator" % self.mode)
11543 if key not in keyset:
11544 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11545 " IAllocator" % key)
11546 setattr(self, key, kwargs[key])
11549 if key not in kwargs:
11550 raise errors.ProgrammerError("Missing input parameter '%s' to"
11551 " IAllocator" % key)
11552 self._BuildInputData(fn)
11554 def _ComputeClusterData(self):
11555 """Compute the generic allocator input data.
11557 This is the data that is independent of the actual operation.
11561 cluster_info = cfg.GetClusterInfo()
11564 "version": constants.IALLOCATOR_VERSION,
11565 "cluster_name": cfg.GetClusterName(),
11566 "cluster_tags": list(cluster_info.GetTags()),
11567 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11568 # we don't have job IDs
11570 ninfo = cfg.GetAllNodesInfo()
11571 iinfo = cfg.GetAllInstancesInfo().values()
11572 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11575 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11577 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11578 hypervisor_name = self.hypervisor
11579 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11580 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11581 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11582 hypervisor_name = cluster_info.enabled_hypervisors[0]
11584 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11587 self.rpc.call_all_instances_info(node_list,
11588 cluster_info.enabled_hypervisors)
11590 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11592 config_ndata = self._ComputeBasicNodeData(ninfo)
11593 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11594 i_list, config_ndata)
11595 assert len(data["nodes"]) == len(ninfo), \
11596 "Incomplete node data computed"
11598 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11600 self.in_data = data
11603 def _ComputeNodeGroupData(cfg):
11604 """Compute node groups data.
11608 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11610 "name": gdata.name,
11611 "alloc_policy": gdata.alloc_policy,
11616 def _ComputeBasicNodeData(node_cfg):
11617 """Compute global node data.
11620 @returns: a dict of name: (node dict, node config)
11624 for ninfo in node_cfg.values():
11625 # fill in static (config-based) values
11627 "tags": list(ninfo.GetTags()),
11628 "primary_ip": ninfo.primary_ip,
11629 "secondary_ip": ninfo.secondary_ip,
11630 "offline": ninfo.offline,
11631 "drained": ninfo.drained,
11632 "master_candidate": ninfo.master_candidate,
11633 "group": ninfo.group,
11634 "master_capable": ninfo.master_capable,
11635 "vm_capable": ninfo.vm_capable,
11638 node_results[ninfo.name] = pnr
11640 return node_results
11643 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11645 """Compute global node data.
11647 @param node_results: the basic node structures as filled from the config
11650 # make a copy of the current dict
11651 node_results = dict(node_results)
11652 for nname, nresult in node_data.items():
11653 assert nname in node_results, "Missing basic data for node %s" % nname
11654 ninfo = node_cfg[nname]
11656 if not (ninfo.offline or ninfo.drained):
11657 nresult.Raise("Can't get data for node %s" % nname)
11658 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11660 remote_info = nresult.payload
11662 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11663 'vg_size', 'vg_free', 'cpu_total']:
11664 if attr not in remote_info:
11665 raise errors.OpExecError("Node '%s' didn't return attribute"
11666 " '%s'" % (nname, attr))
11667 if not isinstance(remote_info[attr], int):
11668 raise errors.OpExecError("Node '%s' returned invalid value"
11670 (nname, attr, remote_info[attr]))
11671 # compute memory used by primary instances
11672 i_p_mem = i_p_up_mem = 0
11673 for iinfo, beinfo in i_list:
11674 if iinfo.primary_node == nname:
11675 i_p_mem += beinfo[constants.BE_MEMORY]
11676 if iinfo.name not in node_iinfo[nname].payload:
11679 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11680 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11681 remote_info['memory_free'] -= max(0, i_mem_diff)
11684 i_p_up_mem += beinfo[constants.BE_MEMORY]
11686 # compute memory used by instances
11688 "total_memory": remote_info['memory_total'],
11689 "reserved_memory": remote_info['memory_dom0'],
11690 "free_memory": remote_info['memory_free'],
11691 "total_disk": remote_info['vg_size'],
11692 "free_disk": remote_info['vg_free'],
11693 "total_cpus": remote_info['cpu_total'],
11694 "i_pri_memory": i_p_mem,
11695 "i_pri_up_memory": i_p_up_mem,
11697 pnr_dyn.update(node_results[nname])
11698 node_results[nname] = pnr_dyn
11700 return node_results
11703 def _ComputeInstanceData(cluster_info, i_list):
11704 """Compute global instance data.
11708 for iinfo, beinfo in i_list:
11710 for nic in iinfo.nics:
11711 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11712 nic_dict = {"mac": nic.mac,
11714 "mode": filled_params[constants.NIC_MODE],
11715 "link": filled_params[constants.NIC_LINK],
11717 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11718 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11719 nic_data.append(nic_dict)
11721 "tags": list(iinfo.GetTags()),
11722 "admin_up": iinfo.admin_up,
11723 "vcpus": beinfo[constants.BE_VCPUS],
11724 "memory": beinfo[constants.BE_MEMORY],
11726 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11728 "disks": [{constants.IDISK_SIZE: dsk.size,
11729 constants.IDISK_MODE: dsk.mode}
11730 for dsk in iinfo.disks],
11731 "disk_template": iinfo.disk_template,
11732 "hypervisor": iinfo.hypervisor,
11734 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11736 instance_data[iinfo.name] = pir
11738 return instance_data
11740 def _AddNewInstance(self):
11741 """Add new instance data to allocator structure.
11743 This in combination with _AllocatorGetClusterData will create the
11744 correct structure needed as input for the allocator.
11746 The checks for the completeness of the opcode must have already been
11750 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11752 if self.disk_template in constants.DTS_INT_MIRROR:
11753 self.required_nodes = 2
11755 self.required_nodes = 1
11758 "disk_template": self.disk_template,
11761 "vcpus": self.vcpus,
11762 "memory": self.mem_size,
11763 "disks": self.disks,
11764 "disk_space_total": disk_space,
11766 "required_nodes": self.required_nodes,
11770 def _AddRelocateInstance(self):
11771 """Add relocate instance data to allocator structure.
11773 This in combination with _IAllocatorGetClusterData will create the
11774 correct structure needed as input for the allocator.
11776 The checks for the completeness of the opcode must have already been
11780 instance = self.cfg.GetInstanceInfo(self.name)
11781 if instance is None:
11782 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11783 " IAllocator" % self.name)
11785 if instance.disk_template not in constants.DTS_MIRRORED:
11786 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11787 errors.ECODE_INVAL)
11789 if instance.disk_template in constants.DTS_INT_MIRROR and \
11790 len(instance.secondary_nodes) != 1:
11791 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11792 errors.ECODE_STATE)
11794 self.required_nodes = 1
11795 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11796 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11800 "disk_space_total": disk_space,
11801 "required_nodes": self.required_nodes,
11802 "relocate_from": self.relocate_from,
11806 def _AddEvacuateNodes(self):
11807 """Add evacuate nodes data to allocator structure.
11811 "evac_nodes": self.evac_nodes
11815 def _BuildInputData(self, fn):
11816 """Build input data structures.
11819 self._ComputeClusterData()
11822 request["type"] = self.mode
11823 self.in_data["request"] = request
11825 self.in_text = serializer.Dump(self.in_data)
11827 def Run(self, name, validate=True, call_fn=None):
11828 """Run an instance allocator and return the results.
11831 if call_fn is None:
11832 call_fn = self.rpc.call_iallocator_runner
11834 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11835 result.Raise("Failure while running the iallocator script")
11837 self.out_text = result.payload
11839 self._ValidateResult()
11841 def _ValidateResult(self):
11842 """Process the allocator results.
11844 This will process and if successful save the result in
11845 self.out_data and the other parameters.
11849 rdict = serializer.Load(self.out_text)
11850 except Exception, err:
11851 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11853 if not isinstance(rdict, dict):
11854 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11856 # TODO: remove backwards compatiblity in later versions
11857 if "nodes" in rdict and "result" not in rdict:
11858 rdict["result"] = rdict["nodes"]
11861 for key in "success", "info", "result":
11862 if key not in rdict:
11863 raise errors.OpExecError("Can't parse iallocator results:"
11864 " missing key '%s'" % key)
11865 setattr(self, key, rdict[key])
11867 if not isinstance(rdict["result"], list):
11868 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11870 self.out_data = rdict
11873 class LUTestAllocator(NoHooksLU):
11874 """Run allocator tests.
11876 This LU runs the allocator tests
11879 def CheckPrereq(self):
11880 """Check prerequisites.
11882 This checks the opcode parameters depending on the director and mode test.
11885 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11886 for attr in ["mem_size", "disks", "disk_template",
11887 "os", "tags", "nics", "vcpus"]:
11888 if not hasattr(self.op, attr):
11889 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11890 attr, errors.ECODE_INVAL)
11891 iname = self.cfg.ExpandInstanceName(self.op.name)
11892 if iname is not None:
11893 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11894 iname, errors.ECODE_EXISTS)
11895 if not isinstance(self.op.nics, list):
11896 raise errors.OpPrereqError("Invalid parameter 'nics'",
11897 errors.ECODE_INVAL)
11898 if not isinstance(self.op.disks, list):
11899 raise errors.OpPrereqError("Invalid parameter 'disks'",
11900 errors.ECODE_INVAL)
11901 for row in self.op.disks:
11902 if (not isinstance(row, dict) or
11903 "size" not in row or
11904 not isinstance(row["size"], int) or
11905 "mode" not in row or
11906 row["mode"] not in ['r', 'w']):
11907 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11908 " parameter", errors.ECODE_INVAL)
11909 if self.op.hypervisor is None:
11910 self.op.hypervisor = self.cfg.GetHypervisorType()
11911 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11912 fname = _ExpandInstanceName(self.cfg, self.op.name)
11913 self.op.name = fname
11914 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11915 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11916 if not hasattr(self.op, "evac_nodes"):
11917 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11918 " opcode input", errors.ECODE_INVAL)
11920 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11921 self.op.mode, errors.ECODE_INVAL)
11923 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11924 if self.op.allocator is None:
11925 raise errors.OpPrereqError("Missing allocator name",
11926 errors.ECODE_INVAL)
11927 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11928 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11929 self.op.direction, errors.ECODE_INVAL)
11931 def Exec(self, feedback_fn):
11932 """Run the allocator test.
11935 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11936 ial = IAllocator(self.cfg, self.rpc,
11939 mem_size=self.op.mem_size,
11940 disks=self.op.disks,
11941 disk_template=self.op.disk_template,
11945 vcpus=self.op.vcpus,
11946 hypervisor=self.op.hypervisor,
11948 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11949 ial = IAllocator(self.cfg, self.rpc,
11952 relocate_from=list(self.relocate_from),
11954 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11955 ial = IAllocator(self.cfg, self.rpc,
11957 evac_nodes=self.op.evac_nodes)
11959 raise errors.ProgrammerError("Uncatched mode %s in"
11960 " LUTestAllocator.Exec", self.op.mode)
11962 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11963 result = ial.in_text
11965 ial.Run(self.op.allocator, validate=False)
11966 result = ial.out_text
11970 #: Query type implementations
11972 constants.QR_INSTANCE: _InstanceQuery,
11973 constants.QR_NODE: _NodeQuery,
11974 constants.QR_GROUP: _GroupQuery,
11975 constants.QR_OS: _OsQuery,
11978 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11981 def _GetQueryImplementation(name):
11982 """Returns the implemtnation for a query type.
11984 @param name: Query type, must be one of L{constants.QR_VIA_OP}
11988 return _QUERY_IMPL[name]
11990 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11991 errors.ECODE_INVAL)