4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
62 import ganeti.masterd.instance # pylint: disable-msg=W0611
65 def _SupportsOob(cfg, node):
66 """Tells if node supports OOB.
68 @type cfg: L{config.ConfigWriter}
69 @param cfg: The cluster configuration
70 @type node: L{objects.Node}
72 @return: The OOB script if supported or an empty string otherwise
75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
134 self.context = context
136 # Dicts used to declare locking needs to mcpu
137 self.needed_locks = None
138 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
140 self.remove_locks = {}
141 # Used to force good behavior when calling helper functions
142 self.recalculate_locks = {}
144 self.Log = processor.Log # pylint: disable-msg=C0103
145 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148 # support for dry-run
149 self.dry_run_result = None
150 # support for generic debug attribute
151 if (not hasattr(self.op, "debug_level") or
152 not isinstance(self.op.debug_level, int)):
153 self.op.debug_level = 0
158 # Validate opcode parameters and set defaults
159 self.op.Validate(True)
161 self.CheckArguments()
163 def CheckArguments(self):
164 """Check syntactic validity for the opcode arguments.
166 This method is for doing a simple syntactic check and ensure
167 validity of opcode parameters, without any cluster-related
168 checks. While the same can be accomplished in ExpandNames and/or
169 CheckPrereq, doing these separate is better because:
171 - ExpandNames is left as as purely a lock-related function
172 - CheckPrereq is run after we have acquired locks (and possible
175 The function is allowed to change the self.op attribute so that
176 later methods can no longer worry about missing parameters.
181 def ExpandNames(self):
182 """Expand names for this LU.
184 This method is called before starting to execute the opcode, and it should
185 update all the parameters of the opcode to their canonical form (e.g. a
186 short node name must be fully expanded after this method has successfully
187 completed). This way locking, hooks, logging, etc. can work correctly.
189 LUs which implement this method must also populate the self.needed_locks
190 member, as a dict with lock levels as keys, and a list of needed lock names
193 - use an empty dict if you don't need any lock
194 - if you don't need any lock at a particular level omit that level
195 - don't put anything for the BGL level
196 - if you want all locks at a level use locking.ALL_SET as a value
198 If you need to share locks (rather than acquire them exclusively) at one
199 level you can modify self.share_locks, setting a true value (usually 1) for
200 that level. By default locks are not shared.
202 This function can also define a list of tasklets, which then will be
203 executed in order instead of the usual LU-level CheckPrereq and Exec
204 functions, if those are not defined by the LU.
208 # Acquire all nodes and one instance
209 self.needed_locks = {
210 locking.LEVEL_NODE: locking.ALL_SET,
211 locking.LEVEL_INSTANCE: ['instance1.example.com'],
213 # Acquire just two nodes
214 self.needed_locks = {
215 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218 self.needed_locks = {} # No, you can't leave it to the default value None
221 # The implementation of this method is mandatory only if the new LU is
222 # concurrent, so that old LUs don't need to be changed all at the same
225 self.needed_locks = {} # Exclusive LUs don't need locks.
227 raise NotImplementedError
229 def DeclareLocks(self, level):
230 """Declare LU locking needs for a level
232 While most LUs can just declare their locking needs at ExpandNames time,
233 sometimes there's the need to calculate some locks after having acquired
234 the ones before. This function is called just before acquiring locks at a
235 particular level, but after acquiring the ones at lower levels, and permits
236 such calculations. It can be used to modify self.needed_locks, and by
237 default it does nothing.
239 This function is only called if you have something already set in
240 self.needed_locks for the level.
242 @param level: Locking level which is going to be locked
243 @type level: member of ganeti.locking.LEVELS
247 def CheckPrereq(self):
248 """Check prerequisites for this LU.
250 This method should check that the prerequisites for the execution
251 of this LU are fulfilled. It can do internode communication, but
252 it should be idempotent - no cluster or system changes are
255 The method should raise errors.OpPrereqError in case something is
256 not fulfilled. Its return value is ignored.
258 This method should also update all the parameters of the opcode to
259 their canonical form if it hasn't been done by ExpandNames before.
262 if self.tasklets is not None:
263 for (idx, tl) in enumerate(self.tasklets):
264 logging.debug("Checking prerequisites for tasklet %s/%s",
265 idx + 1, len(self.tasklets))
270 def Exec(self, feedback_fn):
273 This method should implement the actual work. It should raise
274 errors.OpExecError for failures that are somewhat dealt with in
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283 raise NotImplementedError
285 def BuildHooksEnv(self):
286 """Build hooks environment for this LU.
289 @return: Dictionary containing the environment that will be used for
290 running the hooks for this LU. The keys of the dict must not be prefixed
291 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292 will extend the environment with additional variables. If no environment
293 should be defined, an empty dictionary should be returned (not C{None}).
294 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
298 raise NotImplementedError
300 def BuildHooksNodes(self):
301 """Build list of nodes to run LU's hooks.
303 @rtype: tuple; (list, list)
304 @return: Tuple containing a list of node names on which the hook
305 should run before the execution and a list of node names on which the
306 hook should run after the execution. No nodes should be returned as an
307 empty list (and not None).
308 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
312 raise NotImplementedError
314 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315 """Notify the LU about the results of its hooks.
317 This method is called every time a hooks phase is executed, and notifies
318 the Logical Unit about the hooks' result. The LU can then use it to alter
319 its result based on the hooks. By default the method does nothing and the
320 previous result is passed back unchanged but any LU can define it if it
321 wants to use the local cluster hook-scripts somehow.
323 @param phase: one of L{constants.HOOKS_PHASE_POST} or
324 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325 @param hook_results: the results of the multi-node hooks rpc call
326 @param feedback_fn: function used send feedback back to the caller
327 @param lu_result: the previous Exec result this LU had, or None
329 @return: the new Exec result, based on the previous result
333 # API must be kept, thus we ignore the unused argument and could
334 # be a function warnings
335 # pylint: disable-msg=W0613,R0201
338 def _ExpandAndLockInstance(self):
339 """Helper function to expand and lock an instance.
341 Many LUs that work on an instance take its name in self.op.instance_name
342 and need to expand it and then declare the expanded name for locking. This
343 function does it, and then updates self.op.instance_name to the expanded
344 name. It also initializes needed_locks as a dict, if this hasn't been done
348 if self.needed_locks is None:
349 self.needed_locks = {}
351 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352 "_ExpandAndLockInstance called with instance-level locks set"
353 self.op.instance_name = _ExpandInstanceName(self.cfg,
354 self.op.instance_name)
355 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
357 def _LockInstancesNodes(self, primary_only=False):
358 """Helper function to declare instances' nodes for locking.
360 This function should be called after locking one or more instances to lock
361 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362 with all primary or secondary nodes for instances already locked and
363 present in self.needed_locks[locking.LEVEL_INSTANCE].
365 It should be called from DeclareLocks, and for safety only works if
366 self.recalculate_locks[locking.LEVEL_NODE] is set.
368 In the future it may grow parameters to just lock some instance's nodes, or
369 to just lock primaries or secondary nodes, if needed.
371 If should be called in DeclareLocks in a way similar to::
373 if level == locking.LEVEL_NODE:
374 self._LockInstancesNodes()
376 @type primary_only: boolean
377 @param primary_only: only lock primary nodes of locked instances
380 assert locking.LEVEL_NODE in self.recalculate_locks, \
381 "_LockInstancesNodes helper function called with no nodes to recalculate"
383 # TODO: check if we're really been called with the instance locks held
385 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386 # future we might want to have different behaviors depending on the value
387 # of self.recalculate_locks[locking.LEVEL_NODE]
389 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390 instance = self.context.cfg.GetInstanceInfo(instance_name)
391 wanted_nodes.append(instance.primary_node)
393 wanted_nodes.extend(instance.secondary_nodes)
395 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
400 del self.recalculate_locks[locking.LEVEL_NODE]
403 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404 """Simple LU which runs no hooks.
406 This LU is intended as a parent for other LogicalUnits which will
407 run no hooks, in order to reduce duplicate code.
413 def BuildHooksEnv(self):
414 """Empty BuildHooksEnv for NoHooksLu.
416 This just raises an error.
419 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
421 def BuildHooksNodes(self):
422 """Empty BuildHooksNodes for NoHooksLU.
425 raise AssertionError("BuildHooksNodes called for NoHooksLU")
429 """Tasklet base class.
431 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432 they can mix legacy code with tasklets. Locking needs to be done in the LU,
433 tasklets know nothing about locks.
435 Subclasses must follow these rules:
436 - Implement CheckPrereq
440 def __init__(self, lu):
447 def CheckPrereq(self):
448 """Check prerequisites for this tasklets.
450 This method should check whether the prerequisites for the execution of
451 this tasklet are fulfilled. It can do internode communication, but it
452 should be idempotent - no cluster or system changes are allowed.
454 The method should raise errors.OpPrereqError in case something is not
455 fulfilled. Its return value is ignored.
457 This method should also update all parameters to their canonical form if it
458 hasn't been done before.
463 def Exec(self, feedback_fn):
464 """Execute the tasklet.
466 This method should implement the actual work. It should raise
467 errors.OpExecError for failures that are somewhat dealt with in code, or
471 raise NotImplementedError
475 """Base for query utility classes.
478 #: Attribute holding field definitions
481 def __init__(self, filter_, fields, use_locking):
482 """Initializes this class.
485 self.use_locking = use_locking
487 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
489 self.requested_data = self.query.RequestedData()
490 self.names = self.query.RequestedNames()
492 # Sort only if no names were requested
493 self.sort_by_name = not self.names
495 self.do_locking = None
498 def _GetNames(self, lu, all_names, lock_level):
499 """Helper function to determine names asked for in the query.
503 names = lu.glm.list_owned(lock_level)
507 if self.wanted == locking.ALL_SET:
508 assert not self.names
509 # caller didn't specify names, so ordering is not important
510 return utils.NiceSort(names)
512 # caller specified names and we must keep the same order
514 assert not self.do_locking or lu.glm.is_owned(lock_level)
516 missing = set(self.wanted).difference(names)
518 raise errors.OpExecError("Some items were removed before retrieving"
519 " their data: %s" % missing)
521 # Return expanded names
524 def ExpandNames(self, lu):
525 """Expand names for this query.
527 See L{LogicalUnit.ExpandNames}.
530 raise NotImplementedError()
532 def DeclareLocks(self, lu, level):
533 """Declare locks for this query.
535 See L{LogicalUnit.DeclareLocks}.
538 raise NotImplementedError()
540 def _GetQueryData(self, lu):
541 """Collects all data for this query.
543 @return: Query data object
546 raise NotImplementedError()
548 def NewStyleQuery(self, lu):
549 """Collect data and execute query.
552 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553 sort_by_name=self.sort_by_name)
555 def OldStyleQuery(self, lu):
556 """Collect data and execute query.
559 return self.query.OldStyleQuery(self._GetQueryData(lu),
560 sort_by_name=self.sort_by_name)
563 def _GetWantedNodes(lu, nodes):
564 """Returns list of checked and expanded node names.
566 @type lu: L{LogicalUnit}
567 @param lu: the logical unit on whose behalf we execute
569 @param nodes: list of node names or None for all nodes
571 @return: the list of nodes, sorted
572 @raise errors.ProgrammerError: if the nodes parameter is wrong type
576 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
578 return utils.NiceSort(lu.cfg.GetNodeList())
581 def _GetWantedInstances(lu, instances):
582 """Returns list of checked and expanded instance names.
584 @type lu: L{LogicalUnit}
585 @param lu: the logical unit on whose behalf we execute
586 @type instances: list
587 @param instances: list of instance names or None for all instances
589 @return: the list of instances, sorted
590 @raise errors.OpPrereqError: if the instances parameter is wrong type
591 @raise errors.OpPrereqError: if any of the passed instances is not found
595 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
597 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
601 def _GetUpdatedParams(old_params, update_dict,
602 use_default=True, use_none=False):
603 """Return the new version of a parameter dictionary.
605 @type old_params: dict
606 @param old_params: old parameters
607 @type update_dict: dict
608 @param update_dict: dict containing new parameter values, or
609 constants.VALUE_DEFAULT to reset the parameter to its default
611 @param use_default: boolean
612 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613 values as 'to be deleted' values
614 @param use_none: boolean
615 @type use_none: whether to recognise C{None} values as 'to be
618 @return: the new parameter dictionary
621 params_copy = copy.deepcopy(old_params)
622 for key, val in update_dict.iteritems():
623 if ((use_default and val == constants.VALUE_DEFAULT) or
624 (use_none and val is None)):
630 params_copy[key] = val
634 def _ReleaseLocks(lu, level, names=None, keep=None):
635 """Releases locks owned by an LU.
637 @type lu: L{LogicalUnit}
638 @param level: Lock level
639 @type names: list or None
640 @param names: Names of locks to release
641 @type keep: list or None
642 @param keep: Names of locks to retain
645 assert not (keep is not None and names is not None), \
646 "Only one of the 'names' and the 'keep' parameters can be given"
648 if names is not None:
649 should_release = names.__contains__
651 should_release = lambda name: name not in keep
653 should_release = None
659 # Determine which locks to release
660 for name in lu.glm.list_owned(level):
661 if should_release(name):
666 assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
668 # Release just some locks
669 lu.glm.release(level, names=release)
671 assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
674 lu.glm.release(level)
676 assert not lu.glm.is_owned(level), "No locks should be owned"
679 def _RunPostHook(lu, node_name):
680 """Runs the post-hook for an opcode on a single node.
683 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
685 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
687 # pylint: disable-msg=W0702
688 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
691 def _CheckOutputFields(static, dynamic, selected):
692 """Checks whether all selected fields are valid.
694 @type static: L{utils.FieldSet}
695 @param static: static fields set
696 @type dynamic: L{utils.FieldSet}
697 @param dynamic: dynamic fields set
704 delta = f.NonMatching(selected)
706 raise errors.OpPrereqError("Unknown output fields selected: %s"
707 % ",".join(delta), errors.ECODE_INVAL)
710 def _CheckGlobalHvParams(params):
711 """Validates that given hypervisor params are not global ones.
713 This will ensure that instances don't get customised versions of
717 used_globals = constants.HVC_GLOBALS.intersection(params)
719 msg = ("The following hypervisor parameters are global and cannot"
720 " be customized at instance level, please modify them at"
721 " cluster level: %s" % utils.CommaJoin(used_globals))
722 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
725 def _CheckNodeOnline(lu, node, msg=None):
726 """Ensure that a given node is online.
728 @param lu: the LU on behalf of which we make the check
729 @param node: the node to check
730 @param msg: if passed, should be a message to replace the default one
731 @raise errors.OpPrereqError: if the node is offline
735 msg = "Can't use offline node"
736 if lu.cfg.GetNodeInfo(node).offline:
737 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
740 def _CheckNodeNotDrained(lu, node):
741 """Ensure that a given node is not drained.
743 @param lu: the LU on behalf of which we make the check
744 @param node: the node to check
745 @raise errors.OpPrereqError: if the node is drained
748 if lu.cfg.GetNodeInfo(node).drained:
749 raise errors.OpPrereqError("Can't use drained node %s" % node,
753 def _CheckNodeVmCapable(lu, node):
754 """Ensure that a given node is vm capable.
756 @param lu: the LU on behalf of which we make the check
757 @param node: the node to check
758 @raise errors.OpPrereqError: if the node is not vm capable
761 if not lu.cfg.GetNodeInfo(node).vm_capable:
762 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
766 def _CheckNodeHasOS(lu, node, os_name, force_variant):
767 """Ensure that a node supports a given OS.
769 @param lu: the LU on behalf of which we make the check
770 @param node: the node to check
771 @param os_name: the OS to query about
772 @param force_variant: whether to ignore variant errors
773 @raise errors.OpPrereqError: if the node is not supporting the OS
776 result = lu.rpc.call_os_get(node, os_name)
777 result.Raise("OS '%s' not in supported OS list for node %s" %
779 prereq=True, ecode=errors.ECODE_INVAL)
780 if not force_variant:
781 _CheckOSVariant(result.payload, os_name)
784 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785 """Ensure that a node has the given secondary ip.
787 @type lu: L{LogicalUnit}
788 @param lu: the LU on behalf of which we make the check
790 @param node: the node to check
791 @type secondary_ip: string
792 @param secondary_ip: the ip to check
793 @type prereq: boolean
794 @param prereq: whether to throw a prerequisite or an execute error
795 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
799 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800 result.Raise("Failure checking secondary ip on node %s" % node,
801 prereq=prereq, ecode=errors.ECODE_ENVIRON)
802 if not result.payload:
803 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804 " please fix and re-run this command" % secondary_ip)
806 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
808 raise errors.OpExecError(msg)
811 def _GetClusterDomainSecret():
812 """Reads the cluster domain secret.
815 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
819 def _CheckInstanceDown(lu, instance, reason):
820 """Ensure that an instance is not running."""
821 if instance.admin_up:
822 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823 (instance.name, reason), errors.ECODE_STATE)
825 pnode = instance.primary_node
826 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827 ins_l.Raise("Can't contact node %s for instance information" % pnode,
828 prereq=True, ecode=errors.ECODE_ENVIRON)
830 if instance.name in ins_l.payload:
831 raise errors.OpPrereqError("Instance %s is running, %s" %
832 (instance.name, reason), errors.ECODE_STATE)
835 def _ExpandItemName(fn, name, kind):
836 """Expand an item name.
838 @param fn: the function to use for expansion
839 @param name: requested item name
840 @param kind: text description ('Node' or 'Instance')
841 @return: the resolved (full) name
842 @raise errors.OpPrereqError: if the item is not found
846 if full_name is None:
847 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
852 def _ExpandNodeName(cfg, name):
853 """Wrapper over L{_ExpandItemName} for nodes."""
854 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
857 def _ExpandInstanceName(cfg, name):
858 """Wrapper over L{_ExpandItemName} for instance."""
859 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
862 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863 memory, vcpus, nics, disk_template, disks,
864 bep, hvp, hypervisor_name):
865 """Builds instance related env variables for hooks
867 This builds the hook environment from individual variables.
870 @param name: the name of the instance
871 @type primary_node: string
872 @param primary_node: the name of the instance's primary node
873 @type secondary_nodes: list
874 @param secondary_nodes: list of secondary nodes as strings
875 @type os_type: string
876 @param os_type: the name of the instance's OS
877 @type status: boolean
878 @param status: the should_run status of the instance
880 @param memory: the memory size of the instance
882 @param vcpus: the count of VCPUs the instance has
884 @param nics: list of tuples (ip, mac, mode, link) representing
885 the NICs the instance has
886 @type disk_template: string
887 @param disk_template: the disk template of the instance
889 @param disks: the list of (size, mode) pairs
891 @param bep: the backend parameters for the instance
893 @param hvp: the hypervisor parameters for the instance
894 @type hypervisor_name: string
895 @param hypervisor_name: the hypervisor for the instance
897 @return: the hook environment for this instance
906 "INSTANCE_NAME": name,
907 "INSTANCE_PRIMARY": primary_node,
908 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
909 "INSTANCE_OS_TYPE": os_type,
910 "INSTANCE_STATUS": str_status,
911 "INSTANCE_MEMORY": memory,
912 "INSTANCE_VCPUS": vcpus,
913 "INSTANCE_DISK_TEMPLATE": disk_template,
914 "INSTANCE_HYPERVISOR": hypervisor_name,
918 nic_count = len(nics)
919 for idx, (ip, mac, mode, link) in enumerate(nics):
922 env["INSTANCE_NIC%d_IP" % idx] = ip
923 env["INSTANCE_NIC%d_MAC" % idx] = mac
924 env["INSTANCE_NIC%d_MODE" % idx] = mode
925 env["INSTANCE_NIC%d_LINK" % idx] = link
926 if mode == constants.NIC_MODE_BRIDGED:
927 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
931 env["INSTANCE_NIC_COUNT"] = nic_count
934 disk_count = len(disks)
935 for idx, (size, mode) in enumerate(disks):
936 env["INSTANCE_DISK%d_SIZE" % idx] = size
937 env["INSTANCE_DISK%d_MODE" % idx] = mode
941 env["INSTANCE_DISK_COUNT"] = disk_count
943 for source, kind in [(bep, "BE"), (hvp, "HV")]:
944 for key, value in source.items():
945 env["INSTANCE_%s_%s" % (kind, key)] = value
950 def _NICListToTuple(lu, nics):
951 """Build a list of nic information tuples.
953 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
954 value in LUInstanceQueryData.
956 @type lu: L{LogicalUnit}
957 @param lu: the logical unit on whose behalf we execute
958 @type nics: list of L{objects.NIC}
959 @param nics: list of nics to convert to hooks tuples
963 cluster = lu.cfg.GetClusterInfo()
967 filled_params = cluster.SimpleFillNIC(nic.nicparams)
968 mode = filled_params[constants.NIC_MODE]
969 link = filled_params[constants.NIC_LINK]
970 hooks_nics.append((ip, mac, mode, link))
974 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
975 """Builds instance related env variables for hooks from an object.
977 @type lu: L{LogicalUnit}
978 @param lu: the logical unit on whose behalf we execute
979 @type instance: L{objects.Instance}
980 @param instance: the instance for which we should build the
983 @param override: dictionary with key/values that will override
986 @return: the hook environment dictionary
989 cluster = lu.cfg.GetClusterInfo()
990 bep = cluster.FillBE(instance)
991 hvp = cluster.FillHV(instance)
993 'name': instance.name,
994 'primary_node': instance.primary_node,
995 'secondary_nodes': instance.secondary_nodes,
996 'os_type': instance.os,
997 'status': instance.admin_up,
998 'memory': bep[constants.BE_MEMORY],
999 'vcpus': bep[constants.BE_VCPUS],
1000 'nics': _NICListToTuple(lu, instance.nics),
1001 'disk_template': instance.disk_template,
1002 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1005 'hypervisor_name': instance.hypervisor,
1008 args.update(override)
1009 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1012 def _AdjustCandidatePool(lu, exceptions):
1013 """Adjust the candidate pool after node operations.
1016 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1018 lu.LogInfo("Promoted nodes to master candidate role: %s",
1019 utils.CommaJoin(node.name for node in mod_list))
1020 for name in mod_list:
1021 lu.context.ReaddNode(name)
1022 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1024 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1028 def _DecideSelfPromotion(lu, exceptions=None):
1029 """Decide whether I should promote myself as a master candidate.
1032 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1033 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1034 # the new node will increase mc_max with one, so:
1035 mc_should = min(mc_should + 1, cp_size)
1036 return mc_now < mc_should
1039 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1040 """Check that the brigdes needed by a list of nics exist.
1043 cluster = lu.cfg.GetClusterInfo()
1044 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1045 brlist = [params[constants.NIC_LINK] for params in paramslist
1046 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1048 result = lu.rpc.call_bridges_exist(target_node, brlist)
1049 result.Raise("Error checking bridges on destination node '%s'" %
1050 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1053 def _CheckInstanceBridgesExist(lu, instance, node=None):
1054 """Check that the brigdes needed by an instance exist.
1058 node = instance.primary_node
1059 _CheckNicsBridgesExist(lu, instance.nics, node)
1062 def _CheckOSVariant(os_obj, name):
1063 """Check whether an OS name conforms to the os variants specification.
1065 @type os_obj: L{objects.OS}
1066 @param os_obj: OS object to check
1068 @param name: OS name passed by the user, to check for validity
1071 if not os_obj.supported_variants:
1073 variant = objects.OS.GetVariant(name)
1075 raise errors.OpPrereqError("OS name must include a variant",
1078 if variant not in os_obj.supported_variants:
1079 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1082 def _GetNodeInstancesInner(cfg, fn):
1083 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1086 def _GetNodeInstances(cfg, node_name):
1087 """Returns a list of all primary and secondary instances on a node.
1091 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1094 def _GetNodePrimaryInstances(cfg, node_name):
1095 """Returns primary instances on a node.
1098 return _GetNodeInstancesInner(cfg,
1099 lambda inst: node_name == inst.primary_node)
1102 def _GetNodeSecondaryInstances(cfg, node_name):
1103 """Returns secondary instances on a node.
1106 return _GetNodeInstancesInner(cfg,
1107 lambda inst: node_name in inst.secondary_nodes)
1110 def _GetStorageTypeArgs(cfg, storage_type):
1111 """Returns the arguments for a storage type.
1114 # Special case for file storage
1115 if storage_type == constants.ST_FILE:
1116 # storage.FileStorage wants a list of storage directories
1117 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1122 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1125 for dev in instance.disks:
1126 cfg.SetDiskID(dev, node_name)
1128 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1129 result.Raise("Failed to get disk status from node %s" % node_name,
1130 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1132 for idx, bdev_status in enumerate(result.payload):
1133 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1139 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1140 """Check the sanity of iallocator and node arguments and use the
1141 cluster-wide iallocator if appropriate.
1143 Check that at most one of (iallocator, node) is specified. If none is
1144 specified, then the LU's opcode's iallocator slot is filled with the
1145 cluster-wide default iallocator.
1147 @type iallocator_slot: string
1148 @param iallocator_slot: the name of the opcode iallocator slot
1149 @type node_slot: string
1150 @param node_slot: the name of the opcode target node slot
1153 node = getattr(lu.op, node_slot, None)
1154 iallocator = getattr(lu.op, iallocator_slot, None)
1156 if node is not None and iallocator is not None:
1157 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1159 elif node is None and iallocator is None:
1160 default_iallocator = lu.cfg.GetDefaultIAllocator()
1161 if default_iallocator:
1162 setattr(lu.op, iallocator_slot, default_iallocator)
1164 raise errors.OpPrereqError("No iallocator or node given and no"
1165 " cluster-wide default iallocator found;"
1166 " please specify either an iallocator or a"
1167 " node, or set a cluster-wide default"
1171 class LUClusterPostInit(LogicalUnit):
1172 """Logical unit for running hooks after cluster initialization.
1175 HPATH = "cluster-init"
1176 HTYPE = constants.HTYPE_CLUSTER
1178 def BuildHooksEnv(self):
1183 "OP_TARGET": self.cfg.GetClusterName(),
1186 def BuildHooksNodes(self):
1187 """Build hooks nodes.
1190 return ([], [self.cfg.GetMasterNode()])
1192 def Exec(self, feedback_fn):
1199 class LUClusterDestroy(LogicalUnit):
1200 """Logical unit for destroying the cluster.
1203 HPATH = "cluster-destroy"
1204 HTYPE = constants.HTYPE_CLUSTER
1206 def BuildHooksEnv(self):
1211 "OP_TARGET": self.cfg.GetClusterName(),
1214 def BuildHooksNodes(self):
1215 """Build hooks nodes.
1220 def CheckPrereq(self):
1221 """Check prerequisites.
1223 This checks whether the cluster is empty.
1225 Any errors are signaled by raising errors.OpPrereqError.
1228 master = self.cfg.GetMasterNode()
1230 nodelist = self.cfg.GetNodeList()
1231 if len(nodelist) != 1 or nodelist[0] != master:
1232 raise errors.OpPrereqError("There are still %d node(s) in"
1233 " this cluster." % (len(nodelist) - 1),
1235 instancelist = self.cfg.GetInstanceList()
1237 raise errors.OpPrereqError("There are still %d instance(s) in"
1238 " this cluster." % len(instancelist),
1241 def Exec(self, feedback_fn):
1242 """Destroys the cluster.
1245 master = self.cfg.GetMasterNode()
1247 # Run post hooks on master node before it's removed
1248 _RunPostHook(self, master)
1250 result = self.rpc.call_node_stop_master(master, False)
1251 result.Raise("Could not disable the master role")
1256 def _VerifyCertificate(filename):
1257 """Verifies a certificate for LUClusterVerifyConfig.
1259 @type filename: string
1260 @param filename: Path to PEM file
1264 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1265 utils.ReadFile(filename))
1266 except Exception, err: # pylint: disable-msg=W0703
1267 return (LUClusterVerifyConfig.ETYPE_ERROR,
1268 "Failed to load X509 certificate %s: %s" % (filename, err))
1271 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1272 constants.SSL_CERT_EXPIRATION_ERROR)
1275 fnamemsg = "While verifying %s: %s" % (filename, msg)
1280 return (None, fnamemsg)
1281 elif errcode == utils.CERT_WARNING:
1282 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1283 elif errcode == utils.CERT_ERROR:
1284 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1286 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1289 def _GetAllHypervisorParameters(cluster, instances):
1290 """Compute the set of all hypervisor parameters.
1292 @type cluster: L{objects.Cluster}
1293 @param cluster: the cluster object
1294 @param instances: list of L{objects.Instance}
1295 @param instances: additional instances from which to obtain parameters
1296 @rtype: list of (origin, hypervisor, parameters)
1297 @return: a list with all parameters found, indicating the hypervisor they
1298 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1303 for hv_name in cluster.enabled_hypervisors:
1304 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1306 for os_name, os_hvp in cluster.os_hvp.items():
1307 for hv_name, hv_params in os_hvp.items():
1309 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1310 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1312 # TODO: collapse identical parameter values in a single one
1313 for instance in instances:
1314 if instance.hvparams:
1315 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1316 cluster.FillHV(instance)))
1321 class _VerifyErrors(object):
1322 """Mix-in for cluster/group verify LUs.
1324 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1325 self.op and self._feedback_fn to be available.)
1328 TCLUSTER = "cluster"
1330 TINSTANCE = "instance"
1332 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1333 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1334 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1335 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1336 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1337 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1338 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1339 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1340 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1341 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1342 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1343 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1344 ENODEDRBD = (TNODE, "ENODEDRBD")
1345 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1346 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1347 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1348 ENODEHV = (TNODE, "ENODEHV")
1349 ENODELVM = (TNODE, "ENODELVM")
1350 ENODEN1 = (TNODE, "ENODEN1")
1351 ENODENET = (TNODE, "ENODENET")
1352 ENODEOS = (TNODE, "ENODEOS")
1353 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1354 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1355 ENODERPC = (TNODE, "ENODERPC")
1356 ENODESSH = (TNODE, "ENODESSH")
1357 ENODEVERSION = (TNODE, "ENODEVERSION")
1358 ENODESETUP = (TNODE, "ENODESETUP")
1359 ENODETIME = (TNODE, "ENODETIME")
1360 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1362 ETYPE_FIELD = "code"
1363 ETYPE_ERROR = "ERROR"
1364 ETYPE_WARNING = "WARNING"
1366 def _Error(self, ecode, item, msg, *args, **kwargs):
1367 """Format an error message.
1369 Based on the opcode's error_codes parameter, either format a
1370 parseable error code, or a simpler error string.
1372 This must be called only from Exec and functions called from Exec.
1375 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1377 # first complete the msg
1380 # then format the whole message
1381 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1382 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1388 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1389 # and finally report it via the feedback_fn
1390 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1392 def _ErrorIf(self, cond, *args, **kwargs):
1393 """Log an error message if the passed condition is True.
1397 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1399 self._Error(*args, **kwargs)
1400 # do not mark the operation as failed for WARN cases only
1401 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1402 self.bad = self.bad or cond
1405 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1406 """Verifies the cluster config.
1411 def _VerifyHVP(self, hvp_data):
1412 """Verifies locally the syntax of the hypervisor parameters.
1415 for item, hv_name, hv_params in hvp_data:
1416 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1419 hv_class = hypervisor.GetHypervisor(hv_name)
1420 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1421 hv_class.CheckParameterSyntax(hv_params)
1422 except errors.GenericError, err:
1423 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1425 def ExpandNames(self):
1426 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1427 self.all_node_info = self.cfg.GetAllNodesInfo()
1428 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1429 self.needed_locks = {}
1431 def Exec(self, feedback_fn):
1432 """Verify integrity of cluster, performing various test on nodes.
1436 self._feedback_fn = feedback_fn
1438 feedback_fn("* Verifying cluster config")
1440 for msg in self.cfg.VerifyConfig():
1441 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1443 feedback_fn("* Verifying cluster certificate files")
1445 for cert_filename in constants.ALL_CERT_FILES:
1446 (errcode, msg) = _VerifyCertificate(cert_filename)
1447 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1449 feedback_fn("* Verifying hypervisor parameters")
1451 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1452 self.all_inst_info.values()))
1454 feedback_fn("* Verifying all nodes belong to an existing group")
1456 # We do this verification here because, should this bogus circumstance
1457 # occur, it would never be catched by VerifyGroup, which only acts on
1458 # nodes/instances reachable from existing node groups.
1460 dangling_nodes = set(node.name for node in self.all_node_info.values()
1461 if node.group not in self.all_group_info)
1463 dangling_instances = {}
1464 no_node_instances = []
1466 for inst in self.all_inst_info.values():
1467 if inst.primary_node in dangling_nodes:
1468 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1469 elif inst.primary_node not in self.all_node_info:
1470 no_node_instances.append(inst.name)
1475 utils.CommaJoin(dangling_instances.get(node.name,
1477 for node in dangling_nodes]
1479 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1480 "the following nodes (and their instances) belong to a non"
1481 " existing group: %s", utils.CommaJoin(pretty_dangling))
1483 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1484 "the following instances have a non-existing primary-node:"
1485 " %s", utils.CommaJoin(no_node_instances))
1487 return (not self.bad, [g.name for g in self.all_group_info.values()])
1490 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1491 """Verifies the status of a node group.
1494 HPATH = "cluster-verify"
1495 HTYPE = constants.HTYPE_CLUSTER
1498 _HOOKS_INDENT_RE = re.compile("^", re.M)
1500 class NodeImage(object):
1501 """A class representing the logical and physical status of a node.
1504 @ivar name: the node name to which this object refers
1505 @ivar volumes: a structure as returned from
1506 L{ganeti.backend.GetVolumeList} (runtime)
1507 @ivar instances: a list of running instances (runtime)
1508 @ivar pinst: list of configured primary instances (config)
1509 @ivar sinst: list of configured secondary instances (config)
1510 @ivar sbp: dictionary of {primary-node: list of instances} for all
1511 instances for which this node is secondary (config)
1512 @ivar mfree: free memory, as reported by hypervisor (runtime)
1513 @ivar dfree: free disk, as reported by the node (runtime)
1514 @ivar offline: the offline status (config)
1515 @type rpc_fail: boolean
1516 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1517 not whether the individual keys were correct) (runtime)
1518 @type lvm_fail: boolean
1519 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1520 @type hyp_fail: boolean
1521 @ivar hyp_fail: whether the RPC call didn't return the instance list
1522 @type ghost: boolean
1523 @ivar ghost: whether this is a known node or not (config)
1524 @type os_fail: boolean
1525 @ivar os_fail: whether the RPC call didn't return valid OS data
1527 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1528 @type vm_capable: boolean
1529 @ivar vm_capable: whether the node can host instances
1532 def __init__(self, offline=False, name=None, vm_capable=True):
1541 self.offline = offline
1542 self.vm_capable = vm_capable
1543 self.rpc_fail = False
1544 self.lvm_fail = False
1545 self.hyp_fail = False
1547 self.os_fail = False
1550 def ExpandNames(self):
1551 # This raises errors.OpPrereqError on its own:
1552 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1554 all_node_info = self.cfg.GetAllNodesInfo()
1555 all_inst_info = self.cfg.GetAllInstancesInfo()
1557 node_names = set(node.name
1558 for node in all_node_info.values()
1559 if node.group == self.group_uuid)
1561 inst_names = [inst.name
1562 for inst in all_inst_info.values()
1563 if inst.primary_node in node_names]
1565 # In Exec(), we warn about mirrored instances that have primary and
1566 # secondary living in separate node groups. To fully verify that
1567 # volumes for these instances are healthy, we will need to do an
1568 # extra call to their secondaries. We ensure here those nodes will
1570 for inst in inst_names:
1571 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1572 node_names.update(all_inst_info[inst].secondary_nodes)
1574 self.needed_locks = {
1575 locking.LEVEL_NODEGROUP: [self.group_uuid],
1576 locking.LEVEL_NODE: list(node_names),
1577 locking.LEVEL_INSTANCE: inst_names,
1580 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1582 def CheckPrereq(self):
1583 self.all_node_info = self.cfg.GetAllNodesInfo()
1584 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1586 group_nodes = set(node.name
1587 for node in self.all_node_info.values()
1588 if node.group == self.group_uuid)
1590 group_instances = set(inst.name
1591 for inst in self.all_inst_info.values()
1592 if inst.primary_node in group_nodes)
1595 group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1597 unlocked_instances = \
1598 group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1601 raise errors.OpPrereqError("missing lock for nodes: %s" %
1602 utils.CommaJoin(unlocked_nodes))
1604 if unlocked_instances:
1605 raise errors.OpPrereqError("missing lock for instances: %s" %
1606 utils.CommaJoin(unlocked_instances))
1608 self.my_node_names = utils.NiceSort(group_nodes)
1609 self.my_inst_names = utils.NiceSort(group_instances)
1611 self.my_node_info = dict((name, self.all_node_info[name])
1612 for name in self.my_node_names)
1614 self.my_inst_info = dict((name, self.all_inst_info[name])
1615 for name in self.my_inst_names)
1617 # We detect here the nodes that will need the extra RPC calls for verifying
1618 # split LV volumes; they should be locked.
1619 extra_lv_nodes = set()
1621 for inst in self.my_inst_info.values():
1622 if inst.disk_template in constants.DTS_INT_MIRROR:
1623 group = self.my_node_info[inst.primary_node].group
1624 for nname in inst.secondary_nodes:
1625 if self.all_node_info[nname].group != group:
1626 extra_lv_nodes.add(nname)
1628 unlocked_lv_nodes = \
1629 extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1631 if unlocked_lv_nodes:
1632 raise errors.OpPrereqError("these nodes could be locked: %s" %
1633 utils.CommaJoin(unlocked_lv_nodes))
1634 self.extra_lv_nodes = list(extra_lv_nodes)
1636 def _VerifyNode(self, ninfo, nresult):
1637 """Perform some basic validation on data returned from a node.
1639 - check the result data structure is well formed and has all the
1641 - check ganeti version
1643 @type ninfo: L{objects.Node}
1644 @param ninfo: the node to check
1645 @param nresult: the results from the node
1647 @return: whether overall this call was successful (and we can expect
1648 reasonable values in the respose)
1652 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1654 # main result, nresult should be a non-empty dict
1655 test = not nresult or not isinstance(nresult, dict)
1656 _ErrorIf(test, self.ENODERPC, node,
1657 "unable to verify node: no data returned")
1661 # compares ganeti version
1662 local_version = constants.PROTOCOL_VERSION
1663 remote_version = nresult.get("version", None)
1664 test = not (remote_version and
1665 isinstance(remote_version, (list, tuple)) and
1666 len(remote_version) == 2)
1667 _ErrorIf(test, self.ENODERPC, node,
1668 "connection to node returned invalid data")
1672 test = local_version != remote_version[0]
1673 _ErrorIf(test, self.ENODEVERSION, node,
1674 "incompatible protocol versions: master %s,"
1675 " node %s", local_version, remote_version[0])
1679 # node seems compatible, we can actually try to look into its results
1681 # full package version
1682 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1683 self.ENODEVERSION, node,
1684 "software version mismatch: master %s, node %s",
1685 constants.RELEASE_VERSION, remote_version[1],
1686 code=self.ETYPE_WARNING)
1688 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1689 if ninfo.vm_capable and isinstance(hyp_result, dict):
1690 for hv_name, hv_result in hyp_result.iteritems():
1691 test = hv_result is not None
1692 _ErrorIf(test, self.ENODEHV, node,
1693 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1695 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1696 if ninfo.vm_capable and isinstance(hvp_result, list):
1697 for item, hv_name, hv_result in hvp_result:
1698 _ErrorIf(True, self.ENODEHV, node,
1699 "hypervisor %s parameter verify failure (source %s): %s",
1700 hv_name, item, hv_result)
1702 test = nresult.get(constants.NV_NODESETUP,
1703 ["Missing NODESETUP results"])
1704 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1709 def _VerifyNodeTime(self, ninfo, nresult,
1710 nvinfo_starttime, nvinfo_endtime):
1711 """Check the node time.
1713 @type ninfo: L{objects.Node}
1714 @param ninfo: the node to check
1715 @param nresult: the remote results for the node
1716 @param nvinfo_starttime: the start time of the RPC call
1717 @param nvinfo_endtime: the end time of the RPC call
1721 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1723 ntime = nresult.get(constants.NV_TIME, None)
1725 ntime_merged = utils.MergeTime(ntime)
1726 except (ValueError, TypeError):
1727 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1730 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1731 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1732 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1733 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1737 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1738 "Node time diverges by at least %s from master node time",
1741 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1742 """Check the node LVM results.
1744 @type ninfo: L{objects.Node}
1745 @param ninfo: the node to check
1746 @param nresult: the remote results for the node
1747 @param vg_name: the configured VG name
1754 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1756 # checks vg existence and size > 20G
1757 vglist = nresult.get(constants.NV_VGLIST, None)
1759 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1761 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1762 constants.MIN_VG_SIZE)
1763 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1766 pvlist = nresult.get(constants.NV_PVLIST, None)
1767 test = pvlist is None
1768 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1770 # check that ':' is not present in PV names, since it's a
1771 # special character for lvcreate (denotes the range of PEs to
1773 for _, pvname, owner_vg in pvlist:
1774 test = ":" in pvname
1775 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1776 " '%s' of VG '%s'", pvname, owner_vg)
1778 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1779 """Check the node bridges.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param bridges: the expected list of bridges
1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1793 missing = nresult.get(constants.NV_BRIDGES, None)
1794 test = not isinstance(missing, list)
1795 _ErrorIf(test, self.ENODENET, node,
1796 "did not return valid bridge information")
1798 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1799 utils.CommaJoin(sorted(missing)))
1801 def _VerifyNodeNetwork(self, ninfo, nresult):
1802 """Check the node network connectivity results.
1804 @type ninfo: L{objects.Node}
1805 @param ninfo: the node to check
1806 @param nresult: the remote results for the node
1810 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1812 test = constants.NV_NODELIST not in nresult
1813 _ErrorIf(test, self.ENODESSH, node,
1814 "node hasn't returned node ssh connectivity data")
1816 if nresult[constants.NV_NODELIST]:
1817 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1818 _ErrorIf(True, self.ENODESSH, node,
1819 "ssh communication with node '%s': %s", a_node, a_msg)
1821 test = constants.NV_NODENETTEST not in nresult
1822 _ErrorIf(test, self.ENODENET, node,
1823 "node hasn't returned node tcp connectivity data")
1825 if nresult[constants.NV_NODENETTEST]:
1826 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1828 _ErrorIf(True, self.ENODENET, node,
1829 "tcp communication with node '%s': %s",
1830 anode, nresult[constants.NV_NODENETTEST][anode])
1832 test = constants.NV_MASTERIP not in nresult
1833 _ErrorIf(test, self.ENODENET, node,
1834 "node hasn't returned node master IP reachability data")
1836 if not nresult[constants.NV_MASTERIP]:
1837 if node == self.master_node:
1838 msg = "the master node cannot reach the master IP (not configured?)"
1840 msg = "cannot reach the master IP"
1841 _ErrorIf(True, self.ENODENET, node, msg)
1843 def _VerifyInstance(self, instance, instanceconfig, node_image,
1845 """Verify an instance.
1847 This function checks to see if the required block devices are
1848 available on the instance's node.
1851 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1852 node_current = instanceconfig.primary_node
1854 node_vol_should = {}
1855 instanceconfig.MapLVsByNode(node_vol_should)
1857 for node in node_vol_should:
1858 n_img = node_image[node]
1859 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1860 # ignore missing volumes on offline or broken nodes
1862 for volume in node_vol_should[node]:
1863 test = volume not in n_img.volumes
1864 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1865 "volume %s missing on node %s", volume, node)
1867 if instanceconfig.admin_up:
1868 pri_img = node_image[node_current]
1869 test = instance not in pri_img.instances and not pri_img.offline
1870 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1871 "instance not running on its primary node %s",
1874 diskdata = [(nname, success, status, idx)
1875 for (nname, disks) in diskstatus.items()
1876 for idx, (success, status) in enumerate(disks)]
1878 for nname, success, bdev_status, idx in diskdata:
1879 # the 'ghost node' construction in Exec() ensures that we have a
1881 snode = node_image[nname]
1882 bad_snode = snode.ghost or snode.offline
1883 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1884 self.EINSTANCEFAULTYDISK, instance,
1885 "couldn't retrieve status for disk/%s on %s: %s",
1886 idx, nname, bdev_status)
1887 _ErrorIf((instanceconfig.admin_up and success and
1888 bdev_status.ldisk_status == constants.LDS_FAULTY),
1889 self.EINSTANCEFAULTYDISK, instance,
1890 "disk/%s on %s is faulty", idx, nname)
1892 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1893 """Verify if there are any unknown volumes in the cluster.
1895 The .os, .swap and backup volumes are ignored. All other volumes are
1896 reported as unknown.
1898 @type reserved: L{ganeti.utils.FieldSet}
1899 @param reserved: a FieldSet of reserved volume names
1902 for node, n_img in node_image.items():
1903 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1904 # skip non-healthy nodes
1906 for volume in n_img.volumes:
1907 test = ((node not in node_vol_should or
1908 volume not in node_vol_should[node]) and
1909 not reserved.Matches(volume))
1910 self._ErrorIf(test, self.ENODEORPHANLV, node,
1911 "volume %s is unknown", volume)
1913 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1914 """Verify N+1 Memory Resilience.
1916 Check that if one single node dies we can still start all the
1917 instances it was primary for.
1920 cluster_info = self.cfg.GetClusterInfo()
1921 for node, n_img in node_image.items():
1922 # This code checks that every node which is now listed as
1923 # secondary has enough memory to host all instances it is
1924 # supposed to should a single other node in the cluster fail.
1925 # FIXME: not ready for failover to an arbitrary node
1926 # FIXME: does not support file-backed instances
1927 # WARNING: we currently take into account down instances as well
1928 # as up ones, considering that even if they're down someone
1929 # might want to start them even in the event of a node failure.
1931 # we're skipping offline nodes from the N+1 warning, since
1932 # most likely we don't have good memory infromation from them;
1933 # we already list instances living on such nodes, and that's
1936 for prinode, instances in n_img.sbp.items():
1938 for instance in instances:
1939 bep = cluster_info.FillBE(instance_cfg[instance])
1940 if bep[constants.BE_AUTO_BALANCE]:
1941 needed_mem += bep[constants.BE_MEMORY]
1942 test = n_img.mfree < needed_mem
1943 self._ErrorIf(test, self.ENODEN1, node,
1944 "not enough memory to accomodate instance failovers"
1945 " should node %s fail (%dMiB needed, %dMiB available)",
1946 prinode, needed_mem, n_img.mfree)
1949 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1950 (files_all, files_all_opt, files_mc, files_vm)):
1951 """Verifies file checksums collected from all nodes.
1953 @param errorif: Callback for reporting errors
1954 @param nodeinfo: List of L{objects.Node} objects
1955 @param master_node: Name of master node
1956 @param all_nvinfo: RPC results
1959 node_names = frozenset(node.name for node in nodeinfo)
1961 assert master_node in node_names
1962 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1963 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1964 "Found file listed in more than one file list"
1966 # Define functions determining which nodes to consider for a file
1967 file2nodefn = dict([(filename, fn)
1968 for (files, fn) in [(files_all, None),
1969 (files_all_opt, None),
1970 (files_mc, lambda node: (node.master_candidate or
1971 node.name == master_node)),
1972 (files_vm, lambda node: node.vm_capable)]
1973 for filename in files])
1975 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1977 for node in nodeinfo:
1978 nresult = all_nvinfo[node.name]
1980 if nresult.fail_msg or not nresult.payload:
1983 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1985 test = not (node_files and isinstance(node_files, dict))
1986 errorif(test, cls.ENODEFILECHECK, node.name,
1987 "Node did not return file checksum data")
1991 for (filename, checksum) in node_files.items():
1992 # Check if the file should be considered for a node
1993 fn = file2nodefn[filename]
1994 if fn is None or fn(node):
1995 fileinfo[filename].setdefault(checksum, set()).add(node.name)
1997 for (filename, checksums) in fileinfo.items():
1998 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2000 # Nodes having the file
2001 with_file = frozenset(node_name
2002 for nodes in fileinfo[filename].values()
2003 for node_name in nodes)
2005 # Nodes missing file
2006 missing_file = node_names - with_file
2008 if filename in files_all_opt:
2010 errorif(missing_file and missing_file != node_names,
2011 cls.ECLUSTERFILECHECK, None,
2012 "File %s is optional, but it must exist on all or no nodes (not"
2014 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2016 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2017 "File %s is missing from node(s) %s", filename,
2018 utils.CommaJoin(utils.NiceSort(missing_file)))
2020 # See if there are multiple versions of the file
2021 test = len(checksums) > 1
2023 variants = ["variant %s on %s" %
2024 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2025 for (idx, (checksum, nodes)) in
2026 enumerate(sorted(checksums.items()))]
2030 errorif(test, cls.ECLUSTERFILECHECK, None,
2031 "File %s found with %s different checksums (%s)",
2032 filename, len(checksums), "; ".join(variants))
2034 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2036 """Verifies and the node DRBD status.
2038 @type ninfo: L{objects.Node}
2039 @param ninfo: the node to check
2040 @param nresult: the remote results for the node
2041 @param instanceinfo: the dict of instances
2042 @param drbd_helper: the configured DRBD usermode helper
2043 @param drbd_map: the DRBD map as returned by
2044 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2048 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2051 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2052 test = (helper_result == None)
2053 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2054 "no drbd usermode helper returned")
2056 status, payload = helper_result
2058 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2059 "drbd usermode helper check unsuccessful: %s", payload)
2060 test = status and (payload != drbd_helper)
2061 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2062 "wrong drbd usermode helper: %s", payload)
2064 # compute the DRBD minors
2066 for minor, instance in drbd_map[node].items():
2067 test = instance not in instanceinfo
2068 _ErrorIf(test, self.ECLUSTERCFG, None,
2069 "ghost instance '%s' in temporary DRBD map", instance)
2070 # ghost instance should not be running, but otherwise we
2071 # don't give double warnings (both ghost instance and
2072 # unallocated minor in use)
2074 node_drbd[minor] = (instance, False)
2076 instance = instanceinfo[instance]
2077 node_drbd[minor] = (instance.name, instance.admin_up)
2079 # and now check them
2080 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2081 test = not isinstance(used_minors, (tuple, list))
2082 _ErrorIf(test, self.ENODEDRBD, node,
2083 "cannot parse drbd status file: %s", str(used_minors))
2085 # we cannot check drbd status
2088 for minor, (iname, must_exist) in node_drbd.items():
2089 test = minor not in used_minors and must_exist
2090 _ErrorIf(test, self.ENODEDRBD, node,
2091 "drbd minor %d of instance %s is not active", minor, iname)
2092 for minor in used_minors:
2093 test = minor not in node_drbd
2094 _ErrorIf(test, self.ENODEDRBD, node,
2095 "unallocated drbd minor %d is in use", minor)
2097 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2098 """Builds the node OS structures.
2100 @type ninfo: L{objects.Node}
2101 @param ninfo: the node to check
2102 @param nresult: the remote results for the node
2103 @param nimg: the node image object
2107 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2109 remote_os = nresult.get(constants.NV_OSLIST, None)
2110 test = (not isinstance(remote_os, list) or
2111 not compat.all(isinstance(v, list) and len(v) == 7
2112 for v in remote_os))
2114 _ErrorIf(test, self.ENODEOS, node,
2115 "node hasn't returned valid OS data")
2124 for (name, os_path, status, diagnose,
2125 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2127 if name not in os_dict:
2130 # parameters is a list of lists instead of list of tuples due to
2131 # JSON lacking a real tuple type, fix it:
2132 parameters = [tuple(v) for v in parameters]
2133 os_dict[name].append((os_path, status, diagnose,
2134 set(variants), set(parameters), set(api_ver)))
2136 nimg.oslist = os_dict
2138 def _VerifyNodeOS(self, ninfo, nimg, base):
2139 """Verifies the node OS list.
2141 @type ninfo: L{objects.Node}
2142 @param ninfo: the node to check
2143 @param nimg: the node image object
2144 @param base: the 'template' node we match against (e.g. from the master)
2148 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2150 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2152 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2153 for os_name, os_data in nimg.oslist.items():
2154 assert os_data, "Empty OS status for OS %s?!" % os_name
2155 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2156 _ErrorIf(not f_status, self.ENODEOS, node,
2157 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2158 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2159 "OS '%s' has multiple entries (first one shadows the rest): %s",
2160 os_name, utils.CommaJoin([v[0] for v in os_data]))
2161 # this will catched in backend too
2162 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2163 and not f_var, self.ENODEOS, node,
2164 "OS %s with API at least %d does not declare any variant",
2165 os_name, constants.OS_API_V15)
2166 # comparisons with the 'base' image
2167 test = os_name not in base.oslist
2168 _ErrorIf(test, self.ENODEOS, node,
2169 "Extra OS %s not present on reference node (%s)",
2173 assert base.oslist[os_name], "Base node has empty OS status?"
2174 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2176 # base OS is invalid, skipping
2178 for kind, a, b in [("API version", f_api, b_api),
2179 ("variants list", f_var, b_var),
2180 ("parameters", beautify_params(f_param),
2181 beautify_params(b_param))]:
2182 _ErrorIf(a != b, self.ENODEOS, node,
2183 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2184 kind, os_name, base.name,
2185 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2187 # check any missing OSes
2188 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2189 _ErrorIf(missing, self.ENODEOS, node,
2190 "OSes present on reference node %s but missing on this node: %s",
2191 base.name, utils.CommaJoin(missing))
2193 def _VerifyOob(self, ninfo, nresult):
2194 """Verifies out of band functionality of a node.
2196 @type ninfo: L{objects.Node}
2197 @param ninfo: the node to check
2198 @param nresult: the remote results for the node
2202 # We just have to verify the paths on master and/or master candidates
2203 # as the oob helper is invoked on the master
2204 if ((ninfo.master_candidate or ninfo.master_capable) and
2205 constants.NV_OOB_PATHS in nresult):
2206 for path_result in nresult[constants.NV_OOB_PATHS]:
2207 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2209 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2210 """Verifies and updates the node volume data.
2212 This function will update a L{NodeImage}'s internal structures
2213 with data from the remote call.
2215 @type ninfo: L{objects.Node}
2216 @param ninfo: the node to check
2217 @param nresult: the remote results for the node
2218 @param nimg: the node image object
2219 @param vg_name: the configured VG name
2223 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2225 nimg.lvm_fail = True
2226 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2229 elif isinstance(lvdata, basestring):
2230 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2231 utils.SafeEncode(lvdata))
2232 elif not isinstance(lvdata, dict):
2233 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2235 nimg.volumes = lvdata
2236 nimg.lvm_fail = False
2238 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2239 """Verifies and updates the node instance list.
2241 If the listing was successful, then updates this node's instance
2242 list. Otherwise, it marks the RPC call as failed for the instance
2245 @type ninfo: L{objects.Node}
2246 @param ninfo: the node to check
2247 @param nresult: the remote results for the node
2248 @param nimg: the node image object
2251 idata = nresult.get(constants.NV_INSTANCELIST, None)
2252 test = not isinstance(idata, list)
2253 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2254 " (instancelist): %s", utils.SafeEncode(str(idata)))
2256 nimg.hyp_fail = True
2258 nimg.instances = idata
2260 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2261 """Verifies and computes a node information map
2263 @type ninfo: L{objects.Node}
2264 @param ninfo: the node to check
2265 @param nresult: the remote results for the node
2266 @param nimg: the node image object
2267 @param vg_name: the configured VG name
2271 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2273 # try to read free memory (from the hypervisor)
2274 hv_info = nresult.get(constants.NV_HVINFO, None)
2275 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2276 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2279 nimg.mfree = int(hv_info["memory_free"])
2280 except (ValueError, TypeError):
2281 _ErrorIf(True, self.ENODERPC, node,
2282 "node returned invalid nodeinfo, check hypervisor")
2284 # FIXME: devise a free space model for file based instances as well
2285 if vg_name is not None:
2286 test = (constants.NV_VGLIST not in nresult or
2287 vg_name not in nresult[constants.NV_VGLIST])
2288 _ErrorIf(test, self.ENODELVM, node,
2289 "node didn't return data for the volume group '%s'"
2290 " - it is either missing or broken", vg_name)
2293 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2294 except (ValueError, TypeError):
2295 _ErrorIf(True, self.ENODERPC, node,
2296 "node returned invalid LVM info, check LVM status")
2298 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2299 """Gets per-disk status information for all instances.
2301 @type nodelist: list of strings
2302 @param nodelist: Node names
2303 @type node_image: dict of (name, L{objects.Node})
2304 @param node_image: Node objects
2305 @type instanceinfo: dict of (name, L{objects.Instance})
2306 @param instanceinfo: Instance objects
2307 @rtype: {instance: {node: [(succes, payload)]}}
2308 @return: a dictionary of per-instance dictionaries with nodes as
2309 keys and disk information as values; the disk information is a
2310 list of tuples (success, payload)
2313 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2316 node_disks_devonly = {}
2317 diskless_instances = set()
2318 diskless = constants.DT_DISKLESS
2320 for nname in nodelist:
2321 node_instances = list(itertools.chain(node_image[nname].pinst,
2322 node_image[nname].sinst))
2323 diskless_instances.update(inst for inst in node_instances
2324 if instanceinfo[inst].disk_template == diskless)
2325 disks = [(inst, disk)
2326 for inst in node_instances
2327 for disk in instanceinfo[inst].disks]
2330 # No need to collect data
2333 node_disks[nname] = disks
2335 # Creating copies as SetDiskID below will modify the objects and that can
2336 # lead to incorrect data returned from nodes
2337 devonly = [dev.Copy() for (_, dev) in disks]
2340 self.cfg.SetDiskID(dev, nname)
2342 node_disks_devonly[nname] = devonly
2344 assert len(node_disks) == len(node_disks_devonly)
2346 # Collect data from all nodes with disks
2347 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2350 assert len(result) == len(node_disks)
2354 for (nname, nres) in result.items():
2355 disks = node_disks[nname]
2358 # No data from this node
2359 data = len(disks) * [(False, "node offline")]
2362 _ErrorIf(msg, self.ENODERPC, nname,
2363 "while getting disk information: %s", msg)
2365 # No data from this node
2366 data = len(disks) * [(False, msg)]
2369 for idx, i in enumerate(nres.payload):
2370 if isinstance(i, (tuple, list)) and len(i) == 2:
2373 logging.warning("Invalid result from node %s, entry %d: %s",
2375 data.append((False, "Invalid result from the remote node"))
2377 for ((inst, _), status) in zip(disks, data):
2378 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2380 # Add empty entries for diskless instances.
2381 for inst in diskless_instances:
2382 assert inst not in instdisk
2385 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2386 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2387 compat.all(isinstance(s, (tuple, list)) and
2388 len(s) == 2 for s in statuses)
2389 for inst, nnames in instdisk.items()
2390 for nname, statuses in nnames.items())
2391 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2395 def BuildHooksEnv(self):
2398 Cluster-Verify hooks just ran in the post phase and their failure makes
2399 the output be logged in the verify output and the verification to fail.
2403 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2406 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2407 for node in self.my_node_info.values())
2411 def BuildHooksNodes(self):
2412 """Build hooks nodes.
2415 assert self.my_node_names, ("Node list not gathered,"
2416 " has CheckPrereq been executed?")
2417 return ([], self.my_node_names)
2419 def Exec(self, feedback_fn):
2420 """Verify integrity of the node group, performing various test on nodes.
2423 # This method has too many local variables. pylint: disable-msg=R0914
2425 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2426 verbose = self.op.verbose
2427 self._feedback_fn = feedback_fn
2429 vg_name = self.cfg.GetVGName()
2430 drbd_helper = self.cfg.GetDRBDHelper()
2431 cluster = self.cfg.GetClusterInfo()
2432 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2433 hypervisors = cluster.enabled_hypervisors
2434 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2436 i_non_redundant = [] # Non redundant instances
2437 i_non_a_balanced = [] # Non auto-balanced instances
2438 n_offline = 0 # Count of offline nodes
2439 n_drained = 0 # Count of nodes being drained
2440 node_vol_should = {}
2442 # FIXME: verify OS list
2445 filemap = _ComputeAncillaryFiles(cluster, False)
2447 # do local checksums
2448 master_node = self.master_node = self.cfg.GetMasterNode()
2449 master_ip = self.cfg.GetMasterIP()
2451 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2453 # We will make nodes contact all nodes in their group, and one node from
2454 # every other group.
2455 # TODO: should it be a *random* node, different every time?
2456 online_nodes = [node.name for node in node_data_list if not node.offline]
2457 other_group_nodes = {}
2459 for name in sorted(self.all_node_info):
2460 node = self.all_node_info[name]
2461 if (node.group not in other_group_nodes
2462 and node.group != self.group_uuid
2463 and not node.offline):
2464 other_group_nodes[node.group] = node.name
2466 node_verify_param = {
2467 constants.NV_FILELIST:
2468 utils.UniqueSequence(filename
2469 for files in filemap
2470 for filename in files),
2471 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2472 constants.NV_HYPERVISOR: hypervisors,
2473 constants.NV_HVPARAMS:
2474 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2475 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2476 for node in node_data_list
2477 if not node.offline],
2478 constants.NV_INSTANCELIST: hypervisors,
2479 constants.NV_VERSION: None,
2480 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2481 constants.NV_NODESETUP: None,
2482 constants.NV_TIME: None,
2483 constants.NV_MASTERIP: (master_node, master_ip),
2484 constants.NV_OSLIST: None,
2485 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2488 if vg_name is not None:
2489 node_verify_param[constants.NV_VGLIST] = None
2490 node_verify_param[constants.NV_LVLIST] = vg_name
2491 node_verify_param[constants.NV_PVLIST] = [vg_name]
2492 node_verify_param[constants.NV_DRBDLIST] = None
2495 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2498 # FIXME: this needs to be changed per node-group, not cluster-wide
2500 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2501 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2502 bridges.add(default_nicpp[constants.NIC_LINK])
2503 for instance in self.my_inst_info.values():
2504 for nic in instance.nics:
2505 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2506 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2507 bridges.add(full_nic[constants.NIC_LINK])
2510 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2512 # Build our expected cluster state
2513 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2515 vm_capable=node.vm_capable))
2516 for node in node_data_list)
2520 for node in self.all_node_info.values():
2521 path = _SupportsOob(self.cfg, node)
2522 if path and path not in oob_paths:
2523 oob_paths.append(path)
2526 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2528 for instance in self.my_inst_names:
2529 inst_config = self.my_inst_info[instance]
2531 for nname in inst_config.all_nodes:
2532 if nname not in node_image:
2533 gnode = self.NodeImage(name=nname)
2534 gnode.ghost = (nname not in self.all_node_info)
2535 node_image[nname] = gnode
2537 inst_config.MapLVsByNode(node_vol_should)
2539 pnode = inst_config.primary_node
2540 node_image[pnode].pinst.append(instance)
2542 for snode in inst_config.secondary_nodes:
2543 nimg = node_image[snode]
2544 nimg.sinst.append(instance)
2545 if pnode not in nimg.sbp:
2546 nimg.sbp[pnode] = []
2547 nimg.sbp[pnode].append(instance)
2549 # At this point, we have the in-memory data structures complete,
2550 # except for the runtime information, which we'll gather next
2552 # Due to the way our RPC system works, exact response times cannot be
2553 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2554 # time before and after executing the request, we can at least have a time
2556 nvinfo_starttime = time.time()
2557 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2559 self.cfg.GetClusterName())
2560 if self.extra_lv_nodes and vg_name is not None:
2562 self.rpc.call_node_verify(self.extra_lv_nodes,
2563 {constants.NV_LVLIST: vg_name},
2564 self.cfg.GetClusterName())
2566 extra_lv_nvinfo = {}
2567 nvinfo_endtime = time.time()
2569 all_drbd_map = self.cfg.ComputeDRBDMap()
2571 feedback_fn("* Gathering disk information (%s nodes)" %
2572 len(self.my_node_names))
2573 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2576 feedback_fn("* Verifying configuration file consistency")
2578 # If not all nodes are being checked, we need to make sure the master node
2579 # and a non-checked vm_capable node are in the list.
2580 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2582 vf_nvinfo = all_nvinfo.copy()
2583 vf_node_info = list(self.my_node_info.values())
2584 additional_nodes = []
2585 if master_node not in self.my_node_info:
2586 additional_nodes.append(master_node)
2587 vf_node_info.append(self.all_node_info[master_node])
2588 # Add the first vm_capable node we find which is not included
2589 for node in absent_nodes:
2590 nodeinfo = self.all_node_info[node]
2591 if nodeinfo.vm_capable and not nodeinfo.offline:
2592 additional_nodes.append(node)
2593 vf_node_info.append(self.all_node_info[node])
2595 key = constants.NV_FILELIST
2596 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2597 {key: node_verify_param[key]},
2598 self.cfg.GetClusterName()))
2600 vf_nvinfo = all_nvinfo
2601 vf_node_info = self.my_node_info.values()
2603 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2605 feedback_fn("* Verifying node status")
2609 for node_i in node_data_list:
2611 nimg = node_image[node]
2615 feedback_fn("* Skipping offline node %s" % (node,))
2619 if node == master_node:
2621 elif node_i.master_candidate:
2622 ntype = "master candidate"
2623 elif node_i.drained:
2629 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2631 msg = all_nvinfo[node].fail_msg
2632 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2634 nimg.rpc_fail = True
2637 nresult = all_nvinfo[node].payload
2639 nimg.call_ok = self._VerifyNode(node_i, nresult)
2640 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2641 self._VerifyNodeNetwork(node_i, nresult)
2642 self._VerifyOob(node_i, nresult)
2645 self._VerifyNodeLVM(node_i, nresult, vg_name)
2646 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2649 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2650 self._UpdateNodeInstances(node_i, nresult, nimg)
2651 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2652 self._UpdateNodeOS(node_i, nresult, nimg)
2654 if not nimg.os_fail:
2655 if refos_img is None:
2657 self._VerifyNodeOS(node_i, nimg, refos_img)
2658 self._VerifyNodeBridges(node_i, nresult, bridges)
2660 # Check whether all running instancies are primary for the node. (This
2661 # can no longer be done from _VerifyInstance below, since some of the
2662 # wrong instances could be from other node groups.)
2663 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2665 for inst in non_primary_inst:
2666 test = inst in self.all_inst_info
2667 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2668 "instance should not run on node %s", node_i.name)
2669 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2670 "node is running unknown instance %s", inst)
2672 for node, result in extra_lv_nvinfo.items():
2673 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2674 node_image[node], vg_name)
2676 feedback_fn("* Verifying instance status")
2677 for instance in self.my_inst_names:
2679 feedback_fn("* Verifying instance %s" % instance)
2680 inst_config = self.my_inst_info[instance]
2681 self._VerifyInstance(instance, inst_config, node_image,
2683 inst_nodes_offline = []
2685 pnode = inst_config.primary_node
2686 pnode_img = node_image[pnode]
2687 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2688 self.ENODERPC, pnode, "instance %s, connection to"
2689 " primary node failed", instance)
2691 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2692 self.EINSTANCEBADNODE, instance,
2693 "instance is marked as running and lives on offline node %s",
2694 inst_config.primary_node)
2696 # If the instance is non-redundant we cannot survive losing its primary
2697 # node, so we are not N+1 compliant. On the other hand we have no disk
2698 # templates with more than one secondary so that situation is not well
2700 # FIXME: does not support file-backed instances
2701 if not inst_config.secondary_nodes:
2702 i_non_redundant.append(instance)
2704 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2705 instance, "instance has multiple secondary nodes: %s",
2706 utils.CommaJoin(inst_config.secondary_nodes),
2707 code=self.ETYPE_WARNING)
2709 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2710 pnode = inst_config.primary_node
2711 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2712 instance_groups = {}
2714 for node in instance_nodes:
2715 instance_groups.setdefault(self.all_node_info[node].group,
2719 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2720 # Sort so that we always list the primary node first.
2721 for group, nodes in sorted(instance_groups.items(),
2722 key=lambda (_, nodes): pnode in nodes,
2725 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2726 instance, "instance has primary and secondary nodes in"
2727 " different groups: %s", utils.CommaJoin(pretty_list),
2728 code=self.ETYPE_WARNING)
2730 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2731 i_non_a_balanced.append(instance)
2733 for snode in inst_config.secondary_nodes:
2734 s_img = node_image[snode]
2735 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2736 "instance %s, connection to secondary node failed", instance)
2739 inst_nodes_offline.append(snode)
2741 # warn that the instance lives on offline nodes
2742 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2743 "instance has offline secondary node(s) %s",
2744 utils.CommaJoin(inst_nodes_offline))
2745 # ... or ghost/non-vm_capable nodes
2746 for node in inst_config.all_nodes:
2747 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2748 "instance lives on ghost node %s", node)
2749 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2750 instance, "instance lives on non-vm_capable node %s", node)
2752 feedback_fn("* Verifying orphan volumes")
2753 reserved = utils.FieldSet(*cluster.reserved_lvs)
2755 # We will get spurious "unknown volume" warnings if any node of this group
2756 # is secondary for an instance whose primary is in another group. To avoid
2757 # them, we find these instances and add their volumes to node_vol_should.
2758 for inst in self.all_inst_info.values():
2759 for secondary in inst.secondary_nodes:
2760 if (secondary in self.my_node_info
2761 and inst.name not in self.my_inst_info):
2762 inst.MapLVsByNode(node_vol_should)
2765 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2767 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2768 feedback_fn("* Verifying N+1 Memory redundancy")
2769 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2771 feedback_fn("* Other Notes")
2773 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2774 % len(i_non_redundant))
2776 if i_non_a_balanced:
2777 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2778 % len(i_non_a_balanced))
2781 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2784 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2788 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2789 """Analyze the post-hooks' result
2791 This method analyses the hook result, handles it, and sends some
2792 nicely-formatted feedback back to the user.
2794 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2795 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2796 @param hooks_results: the results of the multi-node hooks rpc call
2797 @param feedback_fn: function used send feedback back to the caller
2798 @param lu_result: previous Exec result
2799 @return: the new Exec result, based on the previous result
2803 # We only really run POST phase hooks, and are only interested in
2805 if phase == constants.HOOKS_PHASE_POST:
2806 # Used to change hooks' output to proper indentation
2807 feedback_fn("* Hooks Results")
2808 assert hooks_results, "invalid result from hooks"
2810 for node_name in hooks_results:
2811 res = hooks_results[node_name]
2813 test = msg and not res.offline
2814 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2815 "Communication failure in hooks execution: %s", msg)
2816 if res.offline or msg:
2817 # No need to investigate payload if node is offline or gave an error.
2818 # override manually lu_result here as _ErrorIf only
2819 # overrides self.bad
2822 for script, hkr, output in res.payload:
2823 test = hkr == constants.HKR_FAIL
2824 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2825 "Script %s failed, output:", script)
2827 output = self._HOOKS_INDENT_RE.sub(' ', output)
2828 feedback_fn("%s" % output)
2834 class LUClusterVerifyDisks(NoHooksLU):
2835 """Verifies the cluster disks status.
2840 def ExpandNames(self):
2841 self.needed_locks = {
2842 locking.LEVEL_NODE: locking.ALL_SET,
2843 locking.LEVEL_INSTANCE: locking.ALL_SET,
2845 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2847 def Exec(self, feedback_fn):
2848 """Verify integrity of cluster disks.
2850 @rtype: tuple of three items
2851 @return: a tuple of (dict of node-to-node_error, list of instances
2852 which need activate-disks, dict of instance: (node, volume) for
2856 result = res_nodes, res_instances, res_missing = {}, [], {}
2858 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2859 instances = self.cfg.GetAllInstancesInfo().values()
2862 for inst in instances:
2864 if not inst.admin_up:
2866 inst.MapLVsByNode(inst_lvs)
2867 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2868 for node, vol_list in inst_lvs.iteritems():
2869 for vol in vol_list:
2870 nv_dict[(node, vol)] = inst
2875 node_lvs = self.rpc.call_lv_list(nodes, [])
2876 for node, node_res in node_lvs.items():
2877 if node_res.offline:
2879 msg = node_res.fail_msg
2881 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2882 res_nodes[node] = msg
2885 lvs = node_res.payload
2886 for lv_name, (_, _, lv_online) in lvs.items():
2887 inst = nv_dict.pop((node, lv_name), None)
2888 if (not lv_online and inst is not None
2889 and inst.name not in res_instances):
2890 res_instances.append(inst.name)
2892 # any leftover items in nv_dict are missing LVs, let's arrange the
2894 for key, inst in nv_dict.iteritems():
2895 if inst.name not in res_missing:
2896 res_missing[inst.name] = []
2897 res_missing[inst.name].append(key)
2902 class LUClusterRepairDiskSizes(NoHooksLU):
2903 """Verifies the cluster disks sizes.
2908 def ExpandNames(self):
2909 if self.op.instances:
2910 self.wanted_names = _GetWantedInstances(self, self.op.instances)
2911 self.needed_locks = {
2912 locking.LEVEL_NODE: [],
2913 locking.LEVEL_INSTANCE: self.wanted_names,
2915 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2917 self.wanted_names = None
2918 self.needed_locks = {
2919 locking.LEVEL_NODE: locking.ALL_SET,
2920 locking.LEVEL_INSTANCE: locking.ALL_SET,
2922 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2924 def DeclareLocks(self, level):
2925 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2926 self._LockInstancesNodes(primary_only=True)
2928 def CheckPrereq(self):
2929 """Check prerequisites.
2931 This only checks the optional instance list against the existing names.
2934 if self.wanted_names is None:
2935 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2937 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2938 in self.wanted_names]
2940 def _EnsureChildSizes(self, disk):
2941 """Ensure children of the disk have the needed disk size.
2943 This is valid mainly for DRBD8 and fixes an issue where the
2944 children have smaller disk size.
2946 @param disk: an L{ganeti.objects.Disk} object
2949 if disk.dev_type == constants.LD_DRBD8:
2950 assert disk.children, "Empty children for DRBD8?"
2951 fchild = disk.children[0]
2952 mismatch = fchild.size < disk.size
2954 self.LogInfo("Child disk has size %d, parent %d, fixing",
2955 fchild.size, disk.size)
2956 fchild.size = disk.size
2958 # and we recurse on this child only, not on the metadev
2959 return self._EnsureChildSizes(fchild) or mismatch
2963 def Exec(self, feedback_fn):
2964 """Verify the size of cluster disks.
2967 # TODO: check child disks too
2968 # TODO: check differences in size between primary/secondary nodes
2970 for instance in self.wanted_instances:
2971 pnode = instance.primary_node
2972 if pnode not in per_node_disks:
2973 per_node_disks[pnode] = []
2974 for idx, disk in enumerate(instance.disks):
2975 per_node_disks[pnode].append((instance, idx, disk))
2978 for node, dskl in per_node_disks.items():
2979 newl = [v[2].Copy() for v in dskl]
2981 self.cfg.SetDiskID(dsk, node)
2982 result = self.rpc.call_blockdev_getsize(node, newl)
2984 self.LogWarning("Failure in blockdev_getsize call to node"
2985 " %s, ignoring", node)
2987 if len(result.payload) != len(dskl):
2988 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2989 " result.payload=%s", node, len(dskl), result.payload)
2990 self.LogWarning("Invalid result from node %s, ignoring node results",
2993 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2995 self.LogWarning("Disk %d of instance %s did not return size"
2996 " information, ignoring", idx, instance.name)
2998 if not isinstance(size, (int, long)):
2999 self.LogWarning("Disk %d of instance %s did not return valid"
3000 " size information, ignoring", idx, instance.name)
3003 if size != disk.size:
3004 self.LogInfo("Disk %d of instance %s has mismatched size,"
3005 " correcting: recorded %d, actual %d", idx,
3006 instance.name, disk.size, size)
3008 self.cfg.Update(instance, feedback_fn)
3009 changed.append((instance.name, idx, size))
3010 if self._EnsureChildSizes(disk):
3011 self.cfg.Update(instance, feedback_fn)
3012 changed.append((instance.name, idx, disk.size))
3016 class LUClusterRename(LogicalUnit):
3017 """Rename the cluster.
3020 HPATH = "cluster-rename"
3021 HTYPE = constants.HTYPE_CLUSTER
3023 def BuildHooksEnv(self):
3028 "OP_TARGET": self.cfg.GetClusterName(),
3029 "NEW_NAME": self.op.name,
3032 def BuildHooksNodes(self):
3033 """Build hooks nodes.
3036 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3038 def CheckPrereq(self):
3039 """Verify that the passed name is a valid one.
3042 hostname = netutils.GetHostname(name=self.op.name,
3043 family=self.cfg.GetPrimaryIPFamily())
3045 new_name = hostname.name
3046 self.ip = new_ip = hostname.ip
3047 old_name = self.cfg.GetClusterName()
3048 old_ip = self.cfg.GetMasterIP()
3049 if new_name == old_name and new_ip == old_ip:
3050 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3051 " cluster has changed",
3053 if new_ip != old_ip:
3054 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3055 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3056 " reachable on the network" %
3057 new_ip, errors.ECODE_NOTUNIQUE)
3059 self.op.name = new_name
3061 def Exec(self, feedback_fn):
3062 """Rename the cluster.
3065 clustername = self.op.name
3068 # shutdown the master IP
3069 master = self.cfg.GetMasterNode()
3070 result = self.rpc.call_node_stop_master(master, False)
3071 result.Raise("Could not disable the master role")
3074 cluster = self.cfg.GetClusterInfo()
3075 cluster.cluster_name = clustername
3076 cluster.master_ip = ip
3077 self.cfg.Update(cluster, feedback_fn)
3079 # update the known hosts file
3080 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3081 node_list = self.cfg.GetOnlineNodeList()
3083 node_list.remove(master)
3086 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3088 result = self.rpc.call_node_start_master(master, False, False)
3089 msg = result.fail_msg
3091 self.LogWarning("Could not re-enable the master role on"
3092 " the master, please restart manually: %s", msg)
3097 class LUClusterSetParams(LogicalUnit):
3098 """Change the parameters of the cluster.
3101 HPATH = "cluster-modify"
3102 HTYPE = constants.HTYPE_CLUSTER
3105 def CheckArguments(self):
3109 if self.op.uid_pool:
3110 uidpool.CheckUidPool(self.op.uid_pool)
3112 if self.op.add_uids:
3113 uidpool.CheckUidPool(self.op.add_uids)
3115 if self.op.remove_uids:
3116 uidpool.CheckUidPool(self.op.remove_uids)
3118 def ExpandNames(self):
3119 # FIXME: in the future maybe other cluster params won't require checking on
3120 # all nodes to be modified.
3121 self.needed_locks = {
3122 locking.LEVEL_NODE: locking.ALL_SET,
3124 self.share_locks[locking.LEVEL_NODE] = 1
3126 def BuildHooksEnv(self):
3131 "OP_TARGET": self.cfg.GetClusterName(),
3132 "NEW_VG_NAME": self.op.vg_name,
3135 def BuildHooksNodes(self):
3136 """Build hooks nodes.
3139 mn = self.cfg.GetMasterNode()
3142 def CheckPrereq(self):
3143 """Check prerequisites.
3145 This checks whether the given params don't conflict and
3146 if the given volume group is valid.
3149 if self.op.vg_name is not None and not self.op.vg_name:
3150 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3151 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3152 " instances exist", errors.ECODE_INVAL)
3154 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3155 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3156 raise errors.OpPrereqError("Cannot disable drbd helper while"
3157 " drbd-based instances exist",
3160 node_list = self.glm.list_owned(locking.LEVEL_NODE)
3162 # if vg_name not None, checks given volume group on all nodes
3164 vglist = self.rpc.call_vg_list(node_list)
3165 for node in node_list:
3166 msg = vglist[node].fail_msg
3168 # ignoring down node
3169 self.LogWarning("Error while gathering data on node %s"
3170 " (ignoring node): %s", node, msg)
3172 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3174 constants.MIN_VG_SIZE)
3176 raise errors.OpPrereqError("Error on node '%s': %s" %
3177 (node, vgstatus), errors.ECODE_ENVIRON)
3179 if self.op.drbd_helper:
3180 # checks given drbd helper on all nodes
3181 helpers = self.rpc.call_drbd_helper(node_list)
3182 for node in node_list:
3183 ninfo = self.cfg.GetNodeInfo(node)
3185 self.LogInfo("Not checking drbd helper on offline node %s", node)
3187 msg = helpers[node].fail_msg
3189 raise errors.OpPrereqError("Error checking drbd helper on node"
3190 " '%s': %s" % (node, msg),
3191 errors.ECODE_ENVIRON)
3192 node_helper = helpers[node].payload
3193 if node_helper != self.op.drbd_helper:
3194 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3195 (node, node_helper), errors.ECODE_ENVIRON)
3197 self.cluster = cluster = self.cfg.GetClusterInfo()
3198 # validate params changes
3199 if self.op.beparams:
3200 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3201 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3203 if self.op.ndparams:
3204 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3205 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3207 # TODO: we need a more general way to handle resetting
3208 # cluster-level parameters to default values
3209 if self.new_ndparams["oob_program"] == "":
3210 self.new_ndparams["oob_program"] = \
3211 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3213 if self.op.nicparams:
3214 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3215 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3216 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3219 # check all instances for consistency
3220 for instance in self.cfg.GetAllInstancesInfo().values():
3221 for nic_idx, nic in enumerate(instance.nics):
3222 params_copy = copy.deepcopy(nic.nicparams)
3223 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3225 # check parameter syntax
3227 objects.NIC.CheckParameterSyntax(params_filled)
3228 except errors.ConfigurationError, err:
3229 nic_errors.append("Instance %s, nic/%d: %s" %
3230 (instance.name, nic_idx, err))
3232 # if we're moving instances to routed, check that they have an ip
3233 target_mode = params_filled[constants.NIC_MODE]
3234 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3235 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3236 " address" % (instance.name, nic_idx))
3238 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3239 "\n".join(nic_errors))
3241 # hypervisor list/parameters
3242 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3243 if self.op.hvparams:
3244 for hv_name, hv_dict in self.op.hvparams.items():
3245 if hv_name not in self.new_hvparams:
3246 self.new_hvparams[hv_name] = hv_dict
3248 self.new_hvparams[hv_name].update(hv_dict)
3250 # os hypervisor parameters
3251 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3253 for os_name, hvs in self.op.os_hvp.items():
3254 if os_name not in self.new_os_hvp:
3255 self.new_os_hvp[os_name] = hvs
3257 for hv_name, hv_dict in hvs.items():
3258 if hv_name not in self.new_os_hvp[os_name]:
3259 self.new_os_hvp[os_name][hv_name] = hv_dict
3261 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3264 self.new_osp = objects.FillDict(cluster.osparams, {})
3265 if self.op.osparams:
3266 for os_name, osp in self.op.osparams.items():
3267 if os_name not in self.new_osp:
3268 self.new_osp[os_name] = {}
3270 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3273 if not self.new_osp[os_name]:
3274 # we removed all parameters
3275 del self.new_osp[os_name]
3277 # check the parameter validity (remote check)
3278 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3279 os_name, self.new_osp[os_name])
3281 # changes to the hypervisor list
3282 if self.op.enabled_hypervisors is not None:
3283 self.hv_list = self.op.enabled_hypervisors
3284 for hv in self.hv_list:
3285 # if the hypervisor doesn't already exist in the cluster
3286 # hvparams, we initialize it to empty, and then (in both
3287 # cases) we make sure to fill the defaults, as we might not
3288 # have a complete defaults list if the hypervisor wasn't
3290 if hv not in new_hvp:
3292 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3293 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3295 self.hv_list = cluster.enabled_hypervisors
3297 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3298 # either the enabled list has changed, or the parameters have, validate
3299 for hv_name, hv_params in self.new_hvparams.items():
3300 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3301 (self.op.enabled_hypervisors and
3302 hv_name in self.op.enabled_hypervisors)):
3303 # either this is a new hypervisor, or its parameters have changed
3304 hv_class = hypervisor.GetHypervisor(hv_name)
3305 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3306 hv_class.CheckParameterSyntax(hv_params)
3307 _CheckHVParams(self, node_list, hv_name, hv_params)
3310 # no need to check any newly-enabled hypervisors, since the
3311 # defaults have already been checked in the above code-block
3312 for os_name, os_hvp in self.new_os_hvp.items():
3313 for hv_name, hv_params in os_hvp.items():
3314 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3315 # we need to fill in the new os_hvp on top of the actual hv_p
3316 cluster_defaults = self.new_hvparams.get(hv_name, {})
3317 new_osp = objects.FillDict(cluster_defaults, hv_params)
3318 hv_class = hypervisor.GetHypervisor(hv_name)
3319 hv_class.CheckParameterSyntax(new_osp)
3320 _CheckHVParams(self, node_list, hv_name, new_osp)
3322 if self.op.default_iallocator:
3323 alloc_script = utils.FindFile(self.op.default_iallocator,
3324 constants.IALLOCATOR_SEARCH_PATH,
3326 if alloc_script is None:
3327 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3328 " specified" % self.op.default_iallocator,
3331 def Exec(self, feedback_fn):
3332 """Change the parameters of the cluster.
3335 if self.op.vg_name is not None:
3336 new_volume = self.op.vg_name
3339 if new_volume != self.cfg.GetVGName():
3340 self.cfg.SetVGName(new_volume)
3342 feedback_fn("Cluster LVM configuration already in desired"
3343 " state, not changing")
3344 if self.op.drbd_helper is not None:
3345 new_helper = self.op.drbd_helper
3348 if new_helper != self.cfg.GetDRBDHelper():
3349 self.cfg.SetDRBDHelper(new_helper)
3351 feedback_fn("Cluster DRBD helper already in desired state,"
3353 if self.op.hvparams:
3354 self.cluster.hvparams = self.new_hvparams
3356 self.cluster.os_hvp = self.new_os_hvp
3357 if self.op.enabled_hypervisors is not None:
3358 self.cluster.hvparams = self.new_hvparams
3359 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3360 if self.op.beparams:
3361 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3362 if self.op.nicparams:
3363 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3364 if self.op.osparams:
3365 self.cluster.osparams = self.new_osp
3366 if self.op.ndparams:
3367 self.cluster.ndparams = self.new_ndparams
3369 if self.op.candidate_pool_size is not None:
3370 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3371 # we need to update the pool size here, otherwise the save will fail
3372 _AdjustCandidatePool(self, [])
3374 if self.op.maintain_node_health is not None:
3375 self.cluster.maintain_node_health = self.op.maintain_node_health
3377 if self.op.prealloc_wipe_disks is not None:
3378 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3380 if self.op.add_uids is not None:
3381 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3383 if self.op.remove_uids is not None:
3384 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3386 if self.op.uid_pool is not None:
3387 self.cluster.uid_pool = self.op.uid_pool
3389 if self.op.default_iallocator is not None:
3390 self.cluster.default_iallocator = self.op.default_iallocator
3392 if self.op.reserved_lvs is not None:
3393 self.cluster.reserved_lvs = self.op.reserved_lvs
3395 def helper_os(aname, mods, desc):
3397 lst = getattr(self.cluster, aname)
3398 for key, val in mods:
3399 if key == constants.DDM_ADD:
3401 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3404 elif key == constants.DDM_REMOVE:
3408 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3410 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3412 if self.op.hidden_os:
3413 helper_os("hidden_os", self.op.hidden_os, "hidden")
3415 if self.op.blacklisted_os:
3416 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3418 if self.op.master_netdev:
3419 master = self.cfg.GetMasterNode()
3420 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3421 self.cluster.master_netdev)
3422 result = self.rpc.call_node_stop_master(master, False)
3423 result.Raise("Could not disable the master ip")
3424 feedback_fn("Changing master_netdev from %s to %s" %
3425 (self.cluster.master_netdev, self.op.master_netdev))
3426 self.cluster.master_netdev = self.op.master_netdev
3428 self.cfg.Update(self.cluster, feedback_fn)
3430 if self.op.master_netdev:
3431 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3432 self.op.master_netdev)
3433 result = self.rpc.call_node_start_master(master, False, False)
3435 self.LogWarning("Could not re-enable the master ip on"
3436 " the master, please restart manually: %s",
3440 def _UploadHelper(lu, nodes, fname):
3441 """Helper for uploading a file and showing warnings.
3444 if os.path.exists(fname):
3445 result = lu.rpc.call_upload_file(nodes, fname)
3446 for to_node, to_result in result.items():
3447 msg = to_result.fail_msg
3449 msg = ("Copy of file %s to node %s failed: %s" %
3450 (fname, to_node, msg))
3451 lu.proc.LogWarning(msg)
3454 def _ComputeAncillaryFiles(cluster, redist):
3455 """Compute files external to Ganeti which need to be consistent.
3457 @type redist: boolean
3458 @param redist: Whether to include files which need to be redistributed
3461 # Compute files for all nodes
3463 constants.SSH_KNOWN_HOSTS_FILE,
3464 constants.CONFD_HMAC_KEY,
3465 constants.CLUSTER_DOMAIN_SECRET_FILE,
3469 files_all.update(constants.ALL_CERT_FILES)
3470 files_all.update(ssconf.SimpleStore().GetFileList())
3472 if cluster.modify_etc_hosts:
3473 files_all.add(constants.ETC_HOSTS)
3475 # Files which must either exist on all nodes or on none
3476 files_all_opt = set([
3477 constants.RAPI_USERS_FILE,
3480 # Files which should only be on master candidates
3483 files_mc.add(constants.CLUSTER_CONF_FILE)
3485 # Files which should only be on VM-capable nodes
3486 files_vm = set(filename
3487 for hv_name in cluster.enabled_hypervisors
3488 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3490 # Filenames must be unique
3491 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3492 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3493 "Found file listed in more than one file list"
3495 return (files_all, files_all_opt, files_mc, files_vm)
3498 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3499 """Distribute additional files which are part of the cluster configuration.
3501 ConfigWriter takes care of distributing the config and ssconf files, but
3502 there are more files which should be distributed to all nodes. This function
3503 makes sure those are copied.
3505 @param lu: calling logical unit
3506 @param additional_nodes: list of nodes not in the config to distribute to
3507 @type additional_vm: boolean
3508 @param additional_vm: whether the additional nodes are vm-capable or not
3511 # Gather target nodes
3512 cluster = lu.cfg.GetClusterInfo()
3513 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3515 online_nodes = lu.cfg.GetOnlineNodeList()
3516 vm_nodes = lu.cfg.GetVmCapableNodeList()
3518 if additional_nodes is not None:
3519 online_nodes.extend(additional_nodes)
3521 vm_nodes.extend(additional_nodes)
3523 # Never distribute to master node
3524 for nodelist in [online_nodes, vm_nodes]:
3525 if master_info.name in nodelist:
3526 nodelist.remove(master_info.name)
3529 (files_all, files_all_opt, files_mc, files_vm) = \
3530 _ComputeAncillaryFiles(cluster, True)
3532 # Never re-distribute configuration file from here
3533 assert not (constants.CLUSTER_CONF_FILE in files_all or
3534 constants.CLUSTER_CONF_FILE in files_vm)
3535 assert not files_mc, "Master candidates not handled in this function"
3538 (online_nodes, files_all),
3539 (online_nodes, files_all_opt),
3540 (vm_nodes, files_vm),
3544 for (node_list, files) in filemap:
3546 _UploadHelper(lu, node_list, fname)
3549 class LUClusterRedistConf(NoHooksLU):
3550 """Force the redistribution of cluster configuration.
3552 This is a very simple LU.
3557 def ExpandNames(self):
3558 self.needed_locks = {
3559 locking.LEVEL_NODE: locking.ALL_SET,
3561 self.share_locks[locking.LEVEL_NODE] = 1
3563 def Exec(self, feedback_fn):
3564 """Redistribute the configuration.
3567 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3568 _RedistributeAncillaryFiles(self)
3571 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3572 """Sleep and poll for an instance's disk to sync.
3575 if not instance.disks or disks is not None and not disks:
3578 disks = _ExpandCheckDisks(instance, disks)
3581 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3583 node = instance.primary_node
3586 lu.cfg.SetDiskID(dev, node)
3588 # TODO: Convert to utils.Retry
3591 degr_retries = 10 # in seconds, as we sleep 1 second each time
3595 cumul_degraded = False
3596 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3597 msg = rstats.fail_msg
3599 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3602 raise errors.RemoteError("Can't contact node %s for mirror data,"
3603 " aborting." % node)
3606 rstats = rstats.payload
3608 for i, mstat in enumerate(rstats):
3610 lu.LogWarning("Can't compute data for node %s/%s",
3611 node, disks[i].iv_name)
3614 cumul_degraded = (cumul_degraded or
3615 (mstat.is_degraded and mstat.sync_percent is None))
3616 if mstat.sync_percent is not None:
3618 if mstat.estimated_time is not None:
3619 rem_time = ("%s remaining (estimated)" %
3620 utils.FormatSeconds(mstat.estimated_time))
3621 max_time = mstat.estimated_time
3623 rem_time = "no time estimate"
3624 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3625 (disks[i].iv_name, mstat.sync_percent, rem_time))
3627 # if we're done but degraded, let's do a few small retries, to
3628 # make sure we see a stable and not transient situation; therefore
3629 # we force restart of the loop
3630 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3631 logging.info("Degraded disks found, %d retries left", degr_retries)
3639 time.sleep(min(60, max_time))
3642 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3643 return not cumul_degraded
3646 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3647 """Check that mirrors are not degraded.
3649 The ldisk parameter, if True, will change the test from the
3650 is_degraded attribute (which represents overall non-ok status for
3651 the device(s)) to the ldisk (representing the local storage status).
3654 lu.cfg.SetDiskID(dev, node)
3658 if on_primary or dev.AssembleOnSecondary():
3659 rstats = lu.rpc.call_blockdev_find(node, dev)
3660 msg = rstats.fail_msg
3662 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3664 elif not rstats.payload:
3665 lu.LogWarning("Can't find disk on node %s", node)
3669 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3671 result = result and not rstats.payload.is_degraded
3674 for child in dev.children:
3675 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3680 class LUOobCommand(NoHooksLU):
3681 """Logical unit for OOB handling.
3685 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3687 def ExpandNames(self):
3688 """Gather locks we need.
3691 if self.op.node_names:
3692 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3693 lock_names = self.op.node_names
3695 lock_names = locking.ALL_SET
3697 self.needed_locks = {
3698 locking.LEVEL_NODE: lock_names,
3701 def CheckPrereq(self):
3702 """Check prerequisites.
3705 - the node exists in the configuration
3708 Any errors are signaled by raising errors.OpPrereqError.
3712 self.master_node = self.cfg.GetMasterNode()
3714 assert self.op.power_delay >= 0.0
3716 if self.op.node_names:
3717 if (self.op.command in self._SKIP_MASTER and
3718 self.master_node in self.op.node_names):
3719 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3720 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3722 if master_oob_handler:
3723 additional_text = ("run '%s %s %s' if you want to operate on the"
3724 " master regardless") % (master_oob_handler,
3728 additional_text = "it does not support out-of-band operations"
3730 raise errors.OpPrereqError(("Operating on the master node %s is not"
3731 " allowed for %s; %s") %
3732 (self.master_node, self.op.command,
3733 additional_text), errors.ECODE_INVAL)
3735 self.op.node_names = self.cfg.GetNodeList()
3736 if self.op.command in self._SKIP_MASTER:
3737 self.op.node_names.remove(self.master_node)
3739 if self.op.command in self._SKIP_MASTER:
3740 assert self.master_node not in self.op.node_names
3742 for node_name in self.op.node_names:
3743 node = self.cfg.GetNodeInfo(node_name)
3746 raise errors.OpPrereqError("Node %s not found" % node_name,
3749 self.nodes.append(node)
3751 if (not self.op.ignore_status and
3752 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3753 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3754 " not marked offline") % node_name,
3757 def Exec(self, feedback_fn):
3758 """Execute OOB and return result if we expect any.
3761 master_node = self.master_node
3764 for idx, node in enumerate(utils.NiceSort(self.nodes,
3765 key=lambda node: node.name)):
3766 node_entry = [(constants.RS_NORMAL, node.name)]
3767 ret.append(node_entry)
3769 oob_program = _SupportsOob(self.cfg, node)
3772 node_entry.append((constants.RS_UNAVAIL, None))
3775 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3776 self.op.command, oob_program, node.name)
3777 result = self.rpc.call_run_oob(master_node, oob_program,
3778 self.op.command, node.name,
3782 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3783 node.name, result.fail_msg)
3784 node_entry.append((constants.RS_NODATA, None))
3787 self._CheckPayload(result)
3788 except errors.OpExecError, err:
3789 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3791 node_entry.append((constants.RS_NODATA, None))
3793 if self.op.command == constants.OOB_HEALTH:
3794 # For health we should log important events
3795 for item, status in result.payload:
3796 if status in [constants.OOB_STATUS_WARNING,
3797 constants.OOB_STATUS_CRITICAL]:
3798 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3799 item, node.name, status)
3801 if self.op.command == constants.OOB_POWER_ON:
3803 elif self.op.command == constants.OOB_POWER_OFF:
3804 node.powered = False
3805 elif self.op.command == constants.OOB_POWER_STATUS:
3806 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3807 if powered != node.powered:
3808 logging.warning(("Recorded power state (%s) of node '%s' does not"
3809 " match actual power state (%s)"), node.powered,
3812 # For configuration changing commands we should update the node
3813 if self.op.command in (constants.OOB_POWER_ON,
3814 constants.OOB_POWER_OFF):
3815 self.cfg.Update(node, feedback_fn)
3817 node_entry.append((constants.RS_NORMAL, result.payload))
3819 if (self.op.command == constants.OOB_POWER_ON and
3820 idx < len(self.nodes) - 1):
3821 time.sleep(self.op.power_delay)
3825 def _CheckPayload(self, result):
3826 """Checks if the payload is valid.
3828 @param result: RPC result
3829 @raises errors.OpExecError: If payload is not valid
3833 if self.op.command == constants.OOB_HEALTH:
3834 if not isinstance(result.payload, list):
3835 errs.append("command 'health' is expected to return a list but got %s" %
3836 type(result.payload))
3838 for item, status in result.payload:
3839 if status not in constants.OOB_STATUSES:
3840 errs.append("health item '%s' has invalid status '%s'" %
3843 if self.op.command == constants.OOB_POWER_STATUS:
3844 if not isinstance(result.payload, dict):
3845 errs.append("power-status is expected to return a dict but got %s" %
3846 type(result.payload))
3848 if self.op.command in [
3849 constants.OOB_POWER_ON,
3850 constants.OOB_POWER_OFF,
3851 constants.OOB_POWER_CYCLE,
3853 if result.payload is not None:
3854 errs.append("%s is expected to not return payload but got '%s'" %
3855 (self.op.command, result.payload))
3858 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3859 utils.CommaJoin(errs))
3861 class _OsQuery(_QueryBase):
3862 FIELDS = query.OS_FIELDS
3864 def ExpandNames(self, lu):
3865 # Lock all nodes in shared mode
3866 # Temporary removal of locks, should be reverted later
3867 # TODO: reintroduce locks when they are lighter-weight
3868 lu.needed_locks = {}
3869 #self.share_locks[locking.LEVEL_NODE] = 1
3870 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3872 # The following variables interact with _QueryBase._GetNames
3874 self.wanted = self.names
3876 self.wanted = locking.ALL_SET
3878 self.do_locking = self.use_locking
3880 def DeclareLocks(self, lu, level):
3884 def _DiagnoseByOS(rlist):
3885 """Remaps a per-node return list into an a per-os per-node dictionary
3887 @param rlist: a map with node names as keys and OS objects as values
3890 @return: a dictionary with osnames as keys and as value another
3891 map, with nodes as keys and tuples of (path, status, diagnose,
3892 variants, parameters, api_versions) as values, eg::
3894 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3895 (/srv/..., False, "invalid api")],
3896 "node2": [(/srv/..., True, "", [], [])]}
3901 # we build here the list of nodes that didn't fail the RPC (at RPC
3902 # level), so that nodes with a non-responding node daemon don't
3903 # make all OSes invalid
3904 good_nodes = [node_name for node_name in rlist
3905 if not rlist[node_name].fail_msg]
3906 for node_name, nr in rlist.items():
3907 if nr.fail_msg or not nr.payload:
3909 for (name, path, status, diagnose, variants,
3910 params, api_versions) in nr.payload:
3911 if name not in all_os:
3912 # build a list of nodes for this os containing empty lists
3913 # for each node in node_list
3915 for nname in good_nodes:
3916 all_os[name][nname] = []
3917 # convert params from [name, help] to (name, help)
3918 params = [tuple(v) for v in params]
3919 all_os[name][node_name].append((path, status, diagnose,
3920 variants, params, api_versions))
3923 def _GetQueryData(self, lu):
3924 """Computes the list of nodes and their attributes.
3927 # Locking is not used
3928 assert not (compat.any(lu.glm.is_owned(level)
3929 for level in locking.LEVELS
3930 if level != locking.LEVEL_CLUSTER) or
3931 self.do_locking or self.use_locking)
3933 valid_nodes = [node.name
3934 for node in lu.cfg.GetAllNodesInfo().values()
3935 if not node.offline and node.vm_capable]
3936 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3937 cluster = lu.cfg.GetClusterInfo()
3941 for (os_name, os_data) in pol.items():
3942 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3943 hidden=(os_name in cluster.hidden_os),
3944 blacklisted=(os_name in cluster.blacklisted_os))
3948 api_versions = set()
3950 for idx, osl in enumerate(os_data.values()):
3951 info.valid = bool(info.valid and osl and osl[0][1])
3955 (node_variants, node_params, node_api) = osl[0][3:6]
3958 variants.update(node_variants)
3959 parameters.update(node_params)
3960 api_versions.update(node_api)
3962 # Filter out inconsistent values
3963 variants.intersection_update(node_variants)
3964 parameters.intersection_update(node_params)
3965 api_versions.intersection_update(node_api)
3967 info.variants = list(variants)
3968 info.parameters = list(parameters)
3969 info.api_versions = list(api_versions)
3971 data[os_name] = info
3973 # Prepare data in requested order
3974 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3978 class LUOsDiagnose(NoHooksLU):
3979 """Logical unit for OS diagnose/query.
3985 def _BuildFilter(fields, names):
3986 """Builds a filter for querying OSes.
3989 name_filter = qlang.MakeSimpleFilter("name", names)
3991 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3992 # respective field is not requested
3993 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3994 for fname in ["hidden", "blacklisted"]
3995 if fname not in fields]
3996 if "valid" not in fields:
3997 status_filter.append([qlang.OP_TRUE, "valid"])
4000 status_filter.insert(0, qlang.OP_AND)
4002 status_filter = None
4004 if name_filter and status_filter:
4005 return [qlang.OP_AND, name_filter, status_filter]
4009 return status_filter
4011 def CheckArguments(self):
4012 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4013 self.op.output_fields, False)
4015 def ExpandNames(self):
4016 self.oq.ExpandNames(self)
4018 def Exec(self, feedback_fn):
4019 return self.oq.OldStyleQuery(self)
4022 class LUNodeRemove(LogicalUnit):
4023 """Logical unit for removing a node.
4026 HPATH = "node-remove"
4027 HTYPE = constants.HTYPE_NODE
4029 def BuildHooksEnv(self):
4032 This doesn't run on the target node in the pre phase as a failed
4033 node would then be impossible to remove.
4037 "OP_TARGET": self.op.node_name,
4038 "NODE_NAME": self.op.node_name,
4041 def BuildHooksNodes(self):
4042 """Build hooks nodes.
4045 all_nodes = self.cfg.GetNodeList()
4047 all_nodes.remove(self.op.node_name)
4049 logging.warning("Node '%s', which is about to be removed, was not found"
4050 " in the list of all nodes", self.op.node_name)
4051 return (all_nodes, all_nodes)
4053 def CheckPrereq(self):
4054 """Check prerequisites.
4057 - the node exists in the configuration
4058 - it does not have primary or secondary instances
4059 - it's not the master
4061 Any errors are signaled by raising errors.OpPrereqError.
4064 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4065 node = self.cfg.GetNodeInfo(self.op.node_name)
4066 assert node is not None
4068 instance_list = self.cfg.GetInstanceList()
4070 masternode = self.cfg.GetMasterNode()
4071 if node.name == masternode:
4072 raise errors.OpPrereqError("Node is the master node, failover to another"
4073 " node is required", errors.ECODE_INVAL)
4075 for instance_name in instance_list:
4076 instance = self.cfg.GetInstanceInfo(instance_name)
4077 if node.name in instance.all_nodes:
4078 raise errors.OpPrereqError("Instance %s is still running on the node,"
4079 " please remove first" % instance_name,
4081 self.op.node_name = node.name
4084 def Exec(self, feedback_fn):
4085 """Removes the node from the cluster.
4089 logging.info("Stopping the node daemon and removing configs from node %s",
4092 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4094 # Promote nodes to master candidate as needed
4095 _AdjustCandidatePool(self, exceptions=[node.name])
4096 self.context.RemoveNode(node.name)
4098 # Run post hooks on the node before it's removed
4099 _RunPostHook(self, node.name)
4101 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4102 msg = result.fail_msg
4104 self.LogWarning("Errors encountered on the remote node while leaving"
4105 " the cluster: %s", msg)
4107 # Remove node from our /etc/hosts
4108 if self.cfg.GetClusterInfo().modify_etc_hosts:
4109 master_node = self.cfg.GetMasterNode()
4110 result = self.rpc.call_etc_hosts_modify(master_node,
4111 constants.ETC_HOSTS_REMOVE,
4113 result.Raise("Can't update hosts file with new host data")
4114 _RedistributeAncillaryFiles(self)
4117 class _NodeQuery(_QueryBase):
4118 FIELDS = query.NODE_FIELDS
4120 def ExpandNames(self, lu):
4121 lu.needed_locks = {}
4122 lu.share_locks[locking.LEVEL_NODE] = 1
4125 self.wanted = _GetWantedNodes(lu, self.names)
4127 self.wanted = locking.ALL_SET
4129 self.do_locking = (self.use_locking and
4130 query.NQ_LIVE in self.requested_data)
4133 # if we don't request only static fields, we need to lock the nodes
4134 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4136 def DeclareLocks(self, lu, level):
4139 def _GetQueryData(self, lu):
4140 """Computes the list of nodes and their attributes.
4143 all_info = lu.cfg.GetAllNodesInfo()
4145 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4147 # Gather data as requested
4148 if query.NQ_LIVE in self.requested_data:
4149 # filter out non-vm_capable nodes
4150 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4152 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4153 lu.cfg.GetHypervisorType())
4154 live_data = dict((name, nresult.payload)
4155 for (name, nresult) in node_data.items()
4156 if not nresult.fail_msg and nresult.payload)
4160 if query.NQ_INST in self.requested_data:
4161 node_to_primary = dict([(name, set()) for name in nodenames])
4162 node_to_secondary = dict([(name, set()) for name in nodenames])
4164 inst_data = lu.cfg.GetAllInstancesInfo()
4166 for inst in inst_data.values():
4167 if inst.primary_node in node_to_primary:
4168 node_to_primary[inst.primary_node].add(inst.name)
4169 for secnode in inst.secondary_nodes:
4170 if secnode in node_to_secondary:
4171 node_to_secondary[secnode].add(inst.name)
4173 node_to_primary = None
4174 node_to_secondary = None
4176 if query.NQ_OOB in self.requested_data:
4177 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4178 for name, node in all_info.iteritems())
4182 if query.NQ_GROUP in self.requested_data:
4183 groups = lu.cfg.GetAllNodeGroupsInfo()
4187 return query.NodeQueryData([all_info[name] for name in nodenames],
4188 live_data, lu.cfg.GetMasterNode(),
4189 node_to_primary, node_to_secondary, groups,
4190 oob_support, lu.cfg.GetClusterInfo())
4193 class LUNodeQuery(NoHooksLU):
4194 """Logical unit for querying nodes.
4197 # pylint: disable-msg=W0142
4200 def CheckArguments(self):
4201 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4202 self.op.output_fields, self.op.use_locking)
4204 def ExpandNames(self):
4205 self.nq.ExpandNames(self)
4207 def Exec(self, feedback_fn):
4208 return self.nq.OldStyleQuery(self)
4211 class LUNodeQueryvols(NoHooksLU):
4212 """Logical unit for getting volumes on node(s).
4216 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4217 _FIELDS_STATIC = utils.FieldSet("node")
4219 def CheckArguments(self):
4220 _CheckOutputFields(static=self._FIELDS_STATIC,
4221 dynamic=self._FIELDS_DYNAMIC,
4222 selected=self.op.output_fields)
4224 def ExpandNames(self):
4225 self.needed_locks = {}
4226 self.share_locks[locking.LEVEL_NODE] = 1
4227 if not self.op.nodes:
4228 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4230 self.needed_locks[locking.LEVEL_NODE] = \
4231 _GetWantedNodes(self, self.op.nodes)
4233 def Exec(self, feedback_fn):
4234 """Computes the list of nodes and their attributes.
4237 nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4238 volumes = self.rpc.call_node_volumes(nodenames)
4240 ilist = [self.cfg.GetInstanceInfo(iname) for iname
4241 in self.cfg.GetInstanceList()]
4243 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4246 for node in nodenames:
4247 nresult = volumes[node]
4250 msg = nresult.fail_msg
4252 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4255 node_vols = nresult.payload[:]
4256 node_vols.sort(key=lambda vol: vol['dev'])
4258 for vol in node_vols:
4260 for field in self.op.output_fields:
4263 elif field == "phys":
4267 elif field == "name":
4269 elif field == "size":
4270 val = int(float(vol['size']))
4271 elif field == "instance":
4273 if node not in lv_by_node[inst]:
4275 if vol['name'] in lv_by_node[inst][node]:
4281 raise errors.ParameterError(field)
4282 node_output.append(str(val))
4284 output.append(node_output)
4289 class LUNodeQueryStorage(NoHooksLU):
4290 """Logical unit for getting information on storage units on node(s).
4293 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4296 def CheckArguments(self):
4297 _CheckOutputFields(static=self._FIELDS_STATIC,
4298 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4299 selected=self.op.output_fields)
4301 def ExpandNames(self):
4302 self.needed_locks = {}
4303 self.share_locks[locking.LEVEL_NODE] = 1
4306 self.needed_locks[locking.LEVEL_NODE] = \
4307 _GetWantedNodes(self, self.op.nodes)
4309 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4311 def Exec(self, feedback_fn):
4312 """Computes the list of nodes and their attributes.
4315 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4317 # Always get name to sort by
4318 if constants.SF_NAME in self.op.output_fields:
4319 fields = self.op.output_fields[:]
4321 fields = [constants.SF_NAME] + self.op.output_fields
4323 # Never ask for node or type as it's only known to the LU
4324 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4325 while extra in fields:
4326 fields.remove(extra)
4328 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4329 name_idx = field_idx[constants.SF_NAME]
4331 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4332 data = self.rpc.call_storage_list(self.nodes,
4333 self.op.storage_type, st_args,
4334 self.op.name, fields)
4338 for node in utils.NiceSort(self.nodes):
4339 nresult = data[node]
4343 msg = nresult.fail_msg
4345 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4348 rows = dict([(row[name_idx], row) for row in nresult.payload])
4350 for name in utils.NiceSort(rows.keys()):
4355 for field in self.op.output_fields:
4356 if field == constants.SF_NODE:
4358 elif field == constants.SF_TYPE:
4359 val = self.op.storage_type
4360 elif field in field_idx:
4361 val = row[field_idx[field]]
4363 raise errors.ParameterError(field)
4372 class _InstanceQuery(_QueryBase):
4373 FIELDS = query.INSTANCE_FIELDS
4375 def ExpandNames(self, lu):
4376 lu.needed_locks = {}
4377 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4378 lu.share_locks[locking.LEVEL_NODE] = 1
4381 self.wanted = _GetWantedInstances(lu, self.names)
4383 self.wanted = locking.ALL_SET
4385 self.do_locking = (self.use_locking and
4386 query.IQ_LIVE in self.requested_data)
4388 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4389 lu.needed_locks[locking.LEVEL_NODE] = []
4390 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4392 def DeclareLocks(self, lu, level):
4393 if level == locking.LEVEL_NODE and self.do_locking:
4394 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4396 def _GetQueryData(self, lu):
4397 """Computes the list of instances and their attributes.
4400 cluster = lu.cfg.GetClusterInfo()
4401 all_info = lu.cfg.GetAllInstancesInfo()
4403 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4405 instance_list = [all_info[name] for name in instance_names]
4406 nodes = frozenset(itertools.chain(*(inst.all_nodes
4407 for inst in instance_list)))
4408 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4411 wrongnode_inst = set()
4413 # Gather data as requested
4414 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4416 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4418 result = node_data[name]
4420 # offline nodes will be in both lists
4421 assert result.fail_msg
4422 offline_nodes.append(name)
4424 bad_nodes.append(name)
4425 elif result.payload:
4426 for inst in result.payload:
4427 if inst in all_info:
4428 if all_info[inst].primary_node == name:
4429 live_data.update(result.payload)
4431 wrongnode_inst.add(inst)
4433 # orphan instance; we don't list it here as we don't
4434 # handle this case yet in the output of instance listing
4435 logging.warning("Orphan instance '%s' found on node %s",
4437 # else no instance is alive
4441 if query.IQ_DISKUSAGE in self.requested_data:
4442 disk_usage = dict((inst.name,
4443 _ComputeDiskSize(inst.disk_template,
4444 [{constants.IDISK_SIZE: disk.size}
4445 for disk in inst.disks]))
4446 for inst in instance_list)
4450 if query.IQ_CONSOLE in self.requested_data:
4452 for inst in instance_list:
4453 if inst.name in live_data:
4454 # Instance is running
4455 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4457 consinfo[inst.name] = None
4458 assert set(consinfo.keys()) == set(instance_names)
4462 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4463 disk_usage, offline_nodes, bad_nodes,
4464 live_data, wrongnode_inst, consinfo)
4467 class LUQuery(NoHooksLU):
4468 """Query for resources/items of a certain kind.
4471 # pylint: disable-msg=W0142
4474 def CheckArguments(self):
4475 qcls = _GetQueryImplementation(self.op.what)
4477 self.impl = qcls(self.op.filter, self.op.fields, False)
4479 def ExpandNames(self):
4480 self.impl.ExpandNames(self)
4482 def DeclareLocks(self, level):
4483 self.impl.DeclareLocks(self, level)
4485 def Exec(self, feedback_fn):
4486 return self.impl.NewStyleQuery(self)
4489 class LUQueryFields(NoHooksLU):
4490 """Query for resources/items of a certain kind.
4493 # pylint: disable-msg=W0142
4496 def CheckArguments(self):
4497 self.qcls = _GetQueryImplementation(self.op.what)
4499 def ExpandNames(self):
4500 self.needed_locks = {}
4502 def Exec(self, feedback_fn):
4503 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4506 class LUNodeModifyStorage(NoHooksLU):
4507 """Logical unit for modifying a storage volume on a node.
4512 def CheckArguments(self):
4513 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4515 storage_type = self.op.storage_type
4518 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4520 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4521 " modified" % storage_type,
4524 diff = set(self.op.changes.keys()) - modifiable
4526 raise errors.OpPrereqError("The following fields can not be modified for"
4527 " storage units of type '%s': %r" %
4528 (storage_type, list(diff)),
4531 def ExpandNames(self):
4532 self.needed_locks = {
4533 locking.LEVEL_NODE: self.op.node_name,
4536 def Exec(self, feedback_fn):
4537 """Computes the list of nodes and their attributes.
4540 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4541 result = self.rpc.call_storage_modify(self.op.node_name,
4542 self.op.storage_type, st_args,
4543 self.op.name, self.op.changes)
4544 result.Raise("Failed to modify storage unit '%s' on %s" %
4545 (self.op.name, self.op.node_name))
4548 class LUNodeAdd(LogicalUnit):
4549 """Logical unit for adding node to the cluster.
4553 HTYPE = constants.HTYPE_NODE
4554 _NFLAGS = ["master_capable", "vm_capable"]
4556 def CheckArguments(self):
4557 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4558 # validate/normalize the node name
4559 self.hostname = netutils.GetHostname(name=self.op.node_name,
4560 family=self.primary_ip_family)
4561 self.op.node_name = self.hostname.name
4563 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4564 raise errors.OpPrereqError("Cannot readd the master node",
4567 if self.op.readd and self.op.group:
4568 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4569 " being readded", errors.ECODE_INVAL)
4571 def BuildHooksEnv(self):
4574 This will run on all nodes before, and on all nodes + the new node after.
4578 "OP_TARGET": self.op.node_name,
4579 "NODE_NAME": self.op.node_name,
4580 "NODE_PIP": self.op.primary_ip,
4581 "NODE_SIP": self.op.secondary_ip,
4582 "MASTER_CAPABLE": str(self.op.master_capable),
4583 "VM_CAPABLE": str(self.op.vm_capable),
4586 def BuildHooksNodes(self):
4587 """Build hooks nodes.
4590 # Exclude added node
4591 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4592 post_nodes = pre_nodes + [self.op.node_name, ]
4594 return (pre_nodes, post_nodes)
4596 def CheckPrereq(self):
4597 """Check prerequisites.
4600 - the new node is not already in the config
4602 - its parameters (single/dual homed) matches the cluster
4604 Any errors are signaled by raising errors.OpPrereqError.
4608 hostname = self.hostname
4609 node = hostname.name
4610 primary_ip = self.op.primary_ip = hostname.ip
4611 if self.op.secondary_ip is None:
4612 if self.primary_ip_family == netutils.IP6Address.family:
4613 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4614 " IPv4 address must be given as secondary",
4616 self.op.secondary_ip = primary_ip
4618 secondary_ip = self.op.secondary_ip
4619 if not netutils.IP4Address.IsValid(secondary_ip):
4620 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4621 " address" % secondary_ip, errors.ECODE_INVAL)
4623 node_list = cfg.GetNodeList()
4624 if not self.op.readd and node in node_list:
4625 raise errors.OpPrereqError("Node %s is already in the configuration" %
4626 node, errors.ECODE_EXISTS)
4627 elif self.op.readd and node not in node_list:
4628 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4631 self.changed_primary_ip = False
4633 for existing_node_name in node_list:
4634 existing_node = cfg.GetNodeInfo(existing_node_name)
4636 if self.op.readd and node == existing_node_name:
4637 if existing_node.secondary_ip != secondary_ip:
4638 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4639 " address configuration as before",
4641 if existing_node.primary_ip != primary_ip:
4642 self.changed_primary_ip = True
4646 if (existing_node.primary_ip == primary_ip or
4647 existing_node.secondary_ip == primary_ip or
4648 existing_node.primary_ip == secondary_ip or
4649 existing_node.secondary_ip == secondary_ip):
4650 raise errors.OpPrereqError("New node ip address(es) conflict with"
4651 " existing node %s" % existing_node.name,
4652 errors.ECODE_NOTUNIQUE)
4654 # After this 'if' block, None is no longer a valid value for the
4655 # _capable op attributes
4657 old_node = self.cfg.GetNodeInfo(node)
4658 assert old_node is not None, "Can't retrieve locked node %s" % node
4659 for attr in self._NFLAGS:
4660 if getattr(self.op, attr) is None:
4661 setattr(self.op, attr, getattr(old_node, attr))
4663 for attr in self._NFLAGS:
4664 if getattr(self.op, attr) is None:
4665 setattr(self.op, attr, True)
4667 if self.op.readd and not self.op.vm_capable:
4668 pri, sec = cfg.GetNodeInstances(node)
4670 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4671 " flag set to false, but it already holds"
4672 " instances" % node,
4675 # check that the type of the node (single versus dual homed) is the
4676 # same as for the master
4677 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4678 master_singlehomed = myself.secondary_ip == myself.primary_ip
4679 newbie_singlehomed = secondary_ip == primary_ip
4680 if master_singlehomed != newbie_singlehomed:
4681 if master_singlehomed:
4682 raise errors.OpPrereqError("The master has no secondary ip but the"
4683 " new node has one",
4686 raise errors.OpPrereqError("The master has a secondary ip but the"
4687 " new node doesn't have one",
4690 # checks reachability
4691 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4692 raise errors.OpPrereqError("Node not reachable by ping",
4693 errors.ECODE_ENVIRON)
4695 if not newbie_singlehomed:
4696 # check reachability from my secondary ip to newbie's secondary ip
4697 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4698 source=myself.secondary_ip):
4699 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4700 " based ping to node daemon port",
4701 errors.ECODE_ENVIRON)
4708 if self.op.master_capable:
4709 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4711 self.master_candidate = False
4714 self.new_node = old_node
4716 node_group = cfg.LookupNodeGroup(self.op.group)
4717 self.new_node = objects.Node(name=node,
4718 primary_ip=primary_ip,
4719 secondary_ip=secondary_ip,
4720 master_candidate=self.master_candidate,
4721 offline=False, drained=False,
4724 if self.op.ndparams:
4725 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4727 def Exec(self, feedback_fn):
4728 """Adds the new node to the cluster.
4731 new_node = self.new_node
4732 node = new_node.name
4734 # We adding a new node so we assume it's powered
4735 new_node.powered = True
4737 # for re-adds, reset the offline/drained/master-candidate flags;
4738 # we need to reset here, otherwise offline would prevent RPC calls
4739 # later in the procedure; this also means that if the re-add
4740 # fails, we are left with a non-offlined, broken node
4742 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4743 self.LogInfo("Readding a node, the offline/drained flags were reset")
4744 # if we demote the node, we do cleanup later in the procedure
4745 new_node.master_candidate = self.master_candidate
4746 if self.changed_primary_ip:
4747 new_node.primary_ip = self.op.primary_ip
4749 # copy the master/vm_capable flags
4750 for attr in self._NFLAGS:
4751 setattr(new_node, attr, getattr(self.op, attr))
4753 # notify the user about any possible mc promotion
4754 if new_node.master_candidate:
4755 self.LogInfo("Node will be a master candidate")
4757 if self.op.ndparams:
4758 new_node.ndparams = self.op.ndparams
4760 new_node.ndparams = {}
4762 # check connectivity
4763 result = self.rpc.call_version([node])[node]
4764 result.Raise("Can't get version information from node %s" % node)
4765 if constants.PROTOCOL_VERSION == result.payload:
4766 logging.info("Communication to node %s fine, sw version %s match",
4767 node, result.payload)
4769 raise errors.OpExecError("Version mismatch master version %s,"
4770 " node version %s" %
4771 (constants.PROTOCOL_VERSION, result.payload))
4773 # Add node to our /etc/hosts, and add key to known_hosts
4774 if self.cfg.GetClusterInfo().modify_etc_hosts:
4775 master_node = self.cfg.GetMasterNode()
4776 result = self.rpc.call_etc_hosts_modify(master_node,
4777 constants.ETC_HOSTS_ADD,
4780 result.Raise("Can't update hosts file with new host data")
4782 if new_node.secondary_ip != new_node.primary_ip:
4783 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4786 node_verify_list = [self.cfg.GetMasterNode()]
4787 node_verify_param = {
4788 constants.NV_NODELIST: [node],
4789 # TODO: do a node-net-test as well?
4792 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4793 self.cfg.GetClusterName())
4794 for verifier in node_verify_list:
4795 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4796 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4798 for failed in nl_payload:
4799 feedback_fn("ssh/hostname verification failed"
4800 " (checking from %s): %s" %
4801 (verifier, nl_payload[failed]))
4802 raise errors.OpExecError("ssh/hostname verification failed")
4805 _RedistributeAncillaryFiles(self)
4806 self.context.ReaddNode(new_node)
4807 # make sure we redistribute the config
4808 self.cfg.Update(new_node, feedback_fn)
4809 # and make sure the new node will not have old files around
4810 if not new_node.master_candidate:
4811 result = self.rpc.call_node_demote_from_mc(new_node.name)
4812 msg = result.fail_msg
4814 self.LogWarning("Node failed to demote itself from master"
4815 " candidate status: %s" % msg)
4817 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4818 additional_vm=self.op.vm_capable)
4819 self.context.AddNode(new_node, self.proc.GetECId())
4822 class LUNodeSetParams(LogicalUnit):
4823 """Modifies the parameters of a node.
4825 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4826 to the node role (as _ROLE_*)
4827 @cvar _R2F: a dictionary from node role to tuples of flags
4828 @cvar _FLAGS: a list of attribute names corresponding to the flags
4831 HPATH = "node-modify"
4832 HTYPE = constants.HTYPE_NODE
4834 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4836 (True, False, False): _ROLE_CANDIDATE,
4837 (False, True, False): _ROLE_DRAINED,
4838 (False, False, True): _ROLE_OFFLINE,
4839 (False, False, False): _ROLE_REGULAR,
4841 _R2F = dict((v, k) for k, v in _F2R.items())
4842 _FLAGS = ["master_candidate", "drained", "offline"]
4844 def CheckArguments(self):
4845 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4846 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4847 self.op.master_capable, self.op.vm_capable,
4848 self.op.secondary_ip, self.op.ndparams]
4849 if all_mods.count(None) == len(all_mods):
4850 raise errors.OpPrereqError("Please pass at least one modification",
4852 if all_mods.count(True) > 1:
4853 raise errors.OpPrereqError("Can't set the node into more than one"
4854 " state at the same time",
4857 # Boolean value that tells us whether we might be demoting from MC
4858 self.might_demote = (self.op.master_candidate == False or
4859 self.op.offline == True or
4860 self.op.drained == True or
4861 self.op.master_capable == False)
4863 if self.op.secondary_ip:
4864 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4865 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4866 " address" % self.op.secondary_ip,
4869 self.lock_all = self.op.auto_promote and self.might_demote
4870 self.lock_instances = self.op.secondary_ip is not None
4872 def ExpandNames(self):
4874 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4876 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4878 if self.lock_instances:
4879 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4881 def DeclareLocks(self, level):
4882 # If we have locked all instances, before waiting to lock nodes, release
4883 # all the ones living on nodes unrelated to the current operation.
4884 if level == locking.LEVEL_NODE and self.lock_instances:
4885 self.affected_instances = []
4886 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4889 # Build list of instances to release
4890 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4891 instance = self.context.cfg.GetInstanceInfo(instance_name)
4892 if (instance.disk_template in constants.DTS_INT_MIRROR and
4893 self.op.node_name in instance.all_nodes):
4894 instances_keep.append(instance_name)
4895 self.affected_instances.append(instance)
4897 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4899 assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4900 set(instances_keep))
4902 def BuildHooksEnv(self):
4905 This runs on the master node.
4909 "OP_TARGET": self.op.node_name,
4910 "MASTER_CANDIDATE": str(self.op.master_candidate),
4911 "OFFLINE": str(self.op.offline),
4912 "DRAINED": str(self.op.drained),
4913 "MASTER_CAPABLE": str(self.op.master_capable),
4914 "VM_CAPABLE": str(self.op.vm_capable),
4917 def BuildHooksNodes(self):
4918 """Build hooks nodes.
4921 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4924 def CheckPrereq(self):
4925 """Check prerequisites.
4927 This only checks the instance list against the existing names.
4930 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4932 if (self.op.master_candidate is not None or
4933 self.op.drained is not None or
4934 self.op.offline is not None):
4935 # we can't change the master's node flags
4936 if self.op.node_name == self.cfg.GetMasterNode():
4937 raise errors.OpPrereqError("The master role can be changed"
4938 " only via master-failover",
4941 if self.op.master_candidate and not node.master_capable:
4942 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4943 " it a master candidate" % node.name,
4946 if self.op.vm_capable == False:
4947 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4949 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4950 " the vm_capable flag" % node.name,
4953 if node.master_candidate and self.might_demote and not self.lock_all:
4954 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4955 # check if after removing the current node, we're missing master
4957 (mc_remaining, mc_should, _) = \
4958 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4959 if mc_remaining < mc_should:
4960 raise errors.OpPrereqError("Not enough master candidates, please"
4961 " pass auto promote option to allow"
4962 " promotion", errors.ECODE_STATE)
4964 self.old_flags = old_flags = (node.master_candidate,
4965 node.drained, node.offline)
4966 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4967 self.old_role = old_role = self._F2R[old_flags]
4969 # Check for ineffective changes
4970 for attr in self._FLAGS:
4971 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4972 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4973 setattr(self.op, attr, None)
4975 # Past this point, any flag change to False means a transition
4976 # away from the respective state, as only real changes are kept
4978 # TODO: We might query the real power state if it supports OOB
4979 if _SupportsOob(self.cfg, node):
4980 if self.op.offline is False and not (node.powered or
4981 self.op.powered == True):
4982 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4983 " offline status can be reset") %
4985 elif self.op.powered is not None:
4986 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4987 " as it does not support out-of-band"
4988 " handling") % self.op.node_name)
4990 # If we're being deofflined/drained, we'll MC ourself if needed
4991 if (self.op.drained == False or self.op.offline == False or
4992 (self.op.master_capable and not node.master_capable)):
4993 if _DecideSelfPromotion(self):
4994 self.op.master_candidate = True
4995 self.LogInfo("Auto-promoting node to master candidate")
4997 # If we're no longer master capable, we'll demote ourselves from MC
4998 if self.op.master_capable == False and node.master_candidate:
4999 self.LogInfo("Demoting from master candidate")
5000 self.op.master_candidate = False
5003 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5004 if self.op.master_candidate:
5005 new_role = self._ROLE_CANDIDATE
5006 elif self.op.drained:
5007 new_role = self._ROLE_DRAINED
5008 elif self.op.offline:
5009 new_role = self._ROLE_OFFLINE
5010 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5011 # False is still in new flags, which means we're un-setting (the
5013 new_role = self._ROLE_REGULAR
5014 else: # no new flags, nothing, keep old role
5017 self.new_role = new_role
5019 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5020 # Trying to transition out of offline status
5021 result = self.rpc.call_version([node.name])[node.name]
5023 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5024 " to report its version: %s" %
5025 (node.name, result.fail_msg),
5028 self.LogWarning("Transitioning node from offline to online state"
5029 " without using re-add. Please make sure the node"
5032 if self.op.secondary_ip:
5033 # Ok even without locking, because this can't be changed by any LU
5034 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5035 master_singlehomed = master.secondary_ip == master.primary_ip
5036 if master_singlehomed and self.op.secondary_ip:
5037 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5038 " homed cluster", errors.ECODE_INVAL)
5041 if self.affected_instances:
5042 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5043 " node has instances (%s) configured"
5044 " to use it" % self.affected_instances)
5046 # On online nodes, check that no instances are running, and that
5047 # the node has the new ip and we can reach it.
5048 for instance in self.affected_instances:
5049 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5051 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5052 if master.name != node.name:
5053 # check reachability from master secondary ip to new secondary ip
5054 if not netutils.TcpPing(self.op.secondary_ip,
5055 constants.DEFAULT_NODED_PORT,
5056 source=master.secondary_ip):
5057 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5058 " based ping to node daemon port",
5059 errors.ECODE_ENVIRON)
5061 if self.op.ndparams:
5062 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5063 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5064 self.new_ndparams = new_ndparams
5066 def Exec(self, feedback_fn):
5071 old_role = self.old_role
5072 new_role = self.new_role
5076 if self.op.ndparams:
5077 node.ndparams = self.new_ndparams
5079 if self.op.powered is not None:
5080 node.powered = self.op.powered
5082 for attr in ["master_capable", "vm_capable"]:
5083 val = getattr(self.op, attr)
5085 setattr(node, attr, val)
5086 result.append((attr, str(val)))
5088 if new_role != old_role:
5089 # Tell the node to demote itself, if no longer MC and not offline
5090 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5091 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5093 self.LogWarning("Node failed to demote itself: %s", msg)
5095 new_flags = self._R2F[new_role]
5096 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5098 result.append((desc, str(nf)))
5099 (node.master_candidate, node.drained, node.offline) = new_flags
5101 # we locked all nodes, we adjust the CP before updating this node
5103 _AdjustCandidatePool(self, [node.name])
5105 if self.op.secondary_ip:
5106 node.secondary_ip = self.op.secondary_ip
5107 result.append(("secondary_ip", self.op.secondary_ip))
5109 # this will trigger configuration file update, if needed
5110 self.cfg.Update(node, feedback_fn)
5112 # this will trigger job queue propagation or cleanup if the mc
5114 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5115 self.context.ReaddNode(node)
5120 class LUNodePowercycle(NoHooksLU):
5121 """Powercycles a node.
5126 def CheckArguments(self):
5127 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5128 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5129 raise errors.OpPrereqError("The node is the master and the force"
5130 " parameter was not set",
5133 def ExpandNames(self):
5134 """Locking for PowercycleNode.
5136 This is a last-resort option and shouldn't block on other
5137 jobs. Therefore, we grab no locks.
5140 self.needed_locks = {}
5142 def Exec(self, feedback_fn):
5146 result = self.rpc.call_node_powercycle(self.op.node_name,
5147 self.cfg.GetHypervisorType())
5148 result.Raise("Failed to schedule the reboot")
5149 return result.payload
5152 class LUClusterQuery(NoHooksLU):
5153 """Query cluster configuration.
5158 def ExpandNames(self):
5159 self.needed_locks = {}
5161 def Exec(self, feedback_fn):
5162 """Return cluster config.
5165 cluster = self.cfg.GetClusterInfo()
5168 # Filter just for enabled hypervisors
5169 for os_name, hv_dict in cluster.os_hvp.items():
5170 os_hvp[os_name] = {}
5171 for hv_name, hv_params in hv_dict.items():
5172 if hv_name in cluster.enabled_hypervisors:
5173 os_hvp[os_name][hv_name] = hv_params
5175 # Convert ip_family to ip_version
5176 primary_ip_version = constants.IP4_VERSION
5177 if cluster.primary_ip_family == netutils.IP6Address.family:
5178 primary_ip_version = constants.IP6_VERSION
5181 "software_version": constants.RELEASE_VERSION,
5182 "protocol_version": constants.PROTOCOL_VERSION,
5183 "config_version": constants.CONFIG_VERSION,
5184 "os_api_version": max(constants.OS_API_VERSIONS),
5185 "export_version": constants.EXPORT_VERSION,
5186 "architecture": (platform.architecture()[0], platform.machine()),
5187 "name": cluster.cluster_name,
5188 "master": cluster.master_node,
5189 "default_hypervisor": cluster.enabled_hypervisors[0],
5190 "enabled_hypervisors": cluster.enabled_hypervisors,
5191 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5192 for hypervisor_name in cluster.enabled_hypervisors]),
5194 "beparams": cluster.beparams,
5195 "osparams": cluster.osparams,
5196 "nicparams": cluster.nicparams,
5197 "ndparams": cluster.ndparams,
5198 "candidate_pool_size": cluster.candidate_pool_size,
5199 "master_netdev": cluster.master_netdev,
5200 "volume_group_name": cluster.volume_group_name,
5201 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5202 "file_storage_dir": cluster.file_storage_dir,
5203 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5204 "maintain_node_health": cluster.maintain_node_health,
5205 "ctime": cluster.ctime,
5206 "mtime": cluster.mtime,
5207 "uuid": cluster.uuid,
5208 "tags": list(cluster.GetTags()),
5209 "uid_pool": cluster.uid_pool,
5210 "default_iallocator": cluster.default_iallocator,
5211 "reserved_lvs": cluster.reserved_lvs,
5212 "primary_ip_version": primary_ip_version,
5213 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5214 "hidden_os": cluster.hidden_os,
5215 "blacklisted_os": cluster.blacklisted_os,
5221 class LUClusterConfigQuery(NoHooksLU):
5222 """Return configuration values.
5226 _FIELDS_DYNAMIC = utils.FieldSet()
5227 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5228 "watcher_pause", "volume_group_name")
5230 def CheckArguments(self):
5231 _CheckOutputFields(static=self._FIELDS_STATIC,
5232 dynamic=self._FIELDS_DYNAMIC,
5233 selected=self.op.output_fields)
5235 def ExpandNames(self):
5236 self.needed_locks = {}
5238 def Exec(self, feedback_fn):
5239 """Dump a representation of the cluster config to the standard output.
5243 for field in self.op.output_fields:
5244 if field == "cluster_name":
5245 entry = self.cfg.GetClusterName()
5246 elif field == "master_node":
5247 entry = self.cfg.GetMasterNode()
5248 elif field == "drain_flag":
5249 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5250 elif field == "watcher_pause":
5251 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5252 elif field == "volume_group_name":
5253 entry = self.cfg.GetVGName()
5255 raise errors.ParameterError(field)
5256 values.append(entry)
5260 class LUInstanceActivateDisks(NoHooksLU):
5261 """Bring up an instance's disks.
5266 def ExpandNames(self):
5267 self._ExpandAndLockInstance()
5268 self.needed_locks[locking.LEVEL_NODE] = []
5269 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5271 def DeclareLocks(self, level):
5272 if level == locking.LEVEL_NODE:
5273 self._LockInstancesNodes()
5275 def CheckPrereq(self):
5276 """Check prerequisites.
5278 This checks that the instance is in the cluster.
5281 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5282 assert self.instance is not None, \
5283 "Cannot retrieve locked instance %s" % self.op.instance_name
5284 _CheckNodeOnline(self, self.instance.primary_node)
5286 def Exec(self, feedback_fn):
5287 """Activate the disks.
5290 disks_ok, disks_info = \
5291 _AssembleInstanceDisks(self, self.instance,
5292 ignore_size=self.op.ignore_size)
5294 raise errors.OpExecError("Cannot activate block devices")
5299 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5301 """Prepare the block devices for an instance.
5303 This sets up the block devices on all nodes.
5305 @type lu: L{LogicalUnit}
5306 @param lu: the logical unit on whose behalf we execute
5307 @type instance: L{objects.Instance}
5308 @param instance: the instance for whose disks we assemble
5309 @type disks: list of L{objects.Disk} or None
5310 @param disks: which disks to assemble (or all, if None)
5311 @type ignore_secondaries: boolean
5312 @param ignore_secondaries: if true, errors on secondary nodes
5313 won't result in an error return from the function
5314 @type ignore_size: boolean
5315 @param ignore_size: if true, the current known size of the disk
5316 will not be used during the disk activation, useful for cases
5317 when the size is wrong
5318 @return: False if the operation failed, otherwise a list of
5319 (host, instance_visible_name, node_visible_name)
5320 with the mapping from node devices to instance devices
5325 iname = instance.name
5326 disks = _ExpandCheckDisks(instance, disks)
5328 # With the two passes mechanism we try to reduce the window of
5329 # opportunity for the race condition of switching DRBD to primary
5330 # before handshaking occured, but we do not eliminate it
5332 # The proper fix would be to wait (with some limits) until the
5333 # connection has been made and drbd transitions from WFConnection
5334 # into any other network-connected state (Connected, SyncTarget,
5337 # 1st pass, assemble on all nodes in secondary mode
5338 for idx, inst_disk in enumerate(disks):
5339 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5341 node_disk = node_disk.Copy()
5342 node_disk.UnsetSize()
5343 lu.cfg.SetDiskID(node_disk, node)
5344 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5345 msg = result.fail_msg
5347 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5348 " (is_primary=False, pass=1): %s",
5349 inst_disk.iv_name, node, msg)
5350 if not ignore_secondaries:
5353 # FIXME: race condition on drbd migration to primary
5355 # 2nd pass, do only the primary node
5356 for idx, inst_disk in enumerate(disks):
5359 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5360 if node != instance.primary_node:
5363 node_disk = node_disk.Copy()
5364 node_disk.UnsetSize()
5365 lu.cfg.SetDiskID(node_disk, node)
5366 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5367 msg = result.fail_msg
5369 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5370 " (is_primary=True, pass=2): %s",
5371 inst_disk.iv_name, node, msg)
5374 dev_path = result.payload
5376 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5378 # leave the disks configured for the primary node
5379 # this is a workaround that would be fixed better by
5380 # improving the logical/physical id handling
5382 lu.cfg.SetDiskID(disk, instance.primary_node)
5384 return disks_ok, device_info
5387 def _StartInstanceDisks(lu, instance, force):
5388 """Start the disks of an instance.
5391 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5392 ignore_secondaries=force)
5394 _ShutdownInstanceDisks(lu, instance)
5395 if force is not None and not force:
5396 lu.proc.LogWarning("", hint="If the message above refers to a"
5398 " you can retry the operation using '--force'.")
5399 raise errors.OpExecError("Disk consistency error")
5402 class LUInstanceDeactivateDisks(NoHooksLU):
5403 """Shutdown an instance's disks.
5408 def ExpandNames(self):
5409 self._ExpandAndLockInstance()
5410 self.needed_locks[locking.LEVEL_NODE] = []
5411 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5413 def DeclareLocks(self, level):
5414 if level == locking.LEVEL_NODE:
5415 self._LockInstancesNodes()
5417 def CheckPrereq(self):
5418 """Check prerequisites.
5420 This checks that the instance is in the cluster.
5423 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5424 assert self.instance is not None, \
5425 "Cannot retrieve locked instance %s" % self.op.instance_name
5427 def Exec(self, feedback_fn):
5428 """Deactivate the disks
5431 instance = self.instance
5433 _ShutdownInstanceDisks(self, instance)
5435 _SafeShutdownInstanceDisks(self, instance)
5438 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5439 """Shutdown block devices of an instance.
5441 This function checks if an instance is running, before calling
5442 _ShutdownInstanceDisks.
5445 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5446 _ShutdownInstanceDisks(lu, instance, disks=disks)
5449 def _ExpandCheckDisks(instance, disks):
5450 """Return the instance disks selected by the disks list
5452 @type disks: list of L{objects.Disk} or None
5453 @param disks: selected disks
5454 @rtype: list of L{objects.Disk}
5455 @return: selected instance disks to act on
5459 return instance.disks
5461 if not set(disks).issubset(instance.disks):
5462 raise errors.ProgrammerError("Can only act on disks belonging to the"
5467 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5468 """Shutdown block devices of an instance.
5470 This does the shutdown on all nodes of the instance.
5472 If the ignore_primary is false, errors on the primary node are
5477 disks = _ExpandCheckDisks(instance, disks)
5480 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5481 lu.cfg.SetDiskID(top_disk, node)
5482 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5483 msg = result.fail_msg
5485 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5486 disk.iv_name, node, msg)
5487 if ((node == instance.primary_node and not ignore_primary) or
5488 (node != instance.primary_node and not result.offline)):
5493 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5494 """Checks if a node has enough free memory.
5496 This function check if a given node has the needed amount of free
5497 memory. In case the node has less memory or we cannot get the
5498 information from the node, this function raise an OpPrereqError
5501 @type lu: C{LogicalUnit}
5502 @param lu: a logical unit from which we get configuration data
5504 @param node: the node to check
5505 @type reason: C{str}
5506 @param reason: string to use in the error message
5507 @type requested: C{int}
5508 @param requested: the amount of memory in MiB to check for
5509 @type hypervisor_name: C{str}
5510 @param hypervisor_name: the hypervisor to ask for memory stats
5511 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5512 we cannot check the node
5515 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5516 nodeinfo[node].Raise("Can't get data from node %s" % node,
5517 prereq=True, ecode=errors.ECODE_ENVIRON)
5518 free_mem = nodeinfo[node].payload.get('memory_free', None)
5519 if not isinstance(free_mem, int):
5520 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5521 " was '%s'" % (node, free_mem),
5522 errors.ECODE_ENVIRON)
5523 if requested > free_mem:
5524 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5525 " needed %s MiB, available %s MiB" %
5526 (node, reason, requested, free_mem),
5530 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5531 """Checks if nodes have enough free disk space in the all VGs.
5533 This function check if all given nodes have the needed amount of
5534 free disk. In case any node has less disk or we cannot get the
5535 information from the node, this function raise an OpPrereqError
5538 @type lu: C{LogicalUnit}
5539 @param lu: a logical unit from which we get configuration data
5540 @type nodenames: C{list}
5541 @param nodenames: the list of node names to check
5542 @type req_sizes: C{dict}
5543 @param req_sizes: the hash of vg and corresponding amount of disk in
5545 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5546 or we cannot check the node
5549 for vg, req_size in req_sizes.items():
5550 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5553 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5554 """Checks if nodes have enough free disk space in the specified VG.
5556 This function check if all given nodes have the needed amount of
5557 free disk. In case any node has less disk or we cannot get the
5558 information from the node, this function raise an OpPrereqError
5561 @type lu: C{LogicalUnit}
5562 @param lu: a logical unit from which we get configuration data
5563 @type nodenames: C{list}
5564 @param nodenames: the list of node names to check
5566 @param vg: the volume group to check
5567 @type requested: C{int}
5568 @param requested: the amount of disk in MiB to check for
5569 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5570 or we cannot check the node
5573 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5574 for node in nodenames:
5575 info = nodeinfo[node]
5576 info.Raise("Cannot get current information from node %s" % node,
5577 prereq=True, ecode=errors.ECODE_ENVIRON)
5578 vg_free = info.payload.get("vg_free", None)
5579 if not isinstance(vg_free, int):
5580 raise errors.OpPrereqError("Can't compute free disk space on node"
5581 " %s for vg %s, result was '%s'" %
5582 (node, vg, vg_free), errors.ECODE_ENVIRON)
5583 if requested > vg_free:
5584 raise errors.OpPrereqError("Not enough disk space on target node %s"
5585 " vg %s: required %d MiB, available %d MiB" %
5586 (node, vg, requested, vg_free),
5590 class LUInstanceStartup(LogicalUnit):
5591 """Starts an instance.
5594 HPATH = "instance-start"
5595 HTYPE = constants.HTYPE_INSTANCE
5598 def CheckArguments(self):
5600 if self.op.beparams:
5601 # fill the beparams dict
5602 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5604 def ExpandNames(self):
5605 self._ExpandAndLockInstance()
5607 def BuildHooksEnv(self):
5610 This runs on master, primary and secondary nodes of the instance.
5614 "FORCE": self.op.force,
5617 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5621 def BuildHooksNodes(self):
5622 """Build hooks nodes.
5625 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5628 def CheckPrereq(self):
5629 """Check prerequisites.
5631 This checks that the instance is in the cluster.
5634 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5635 assert self.instance is not None, \
5636 "Cannot retrieve locked instance %s" % self.op.instance_name
5639 if self.op.hvparams:
5640 # check hypervisor parameter syntax (locally)
5641 cluster = self.cfg.GetClusterInfo()
5642 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5643 filled_hvp = cluster.FillHV(instance)
5644 filled_hvp.update(self.op.hvparams)
5645 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5646 hv_type.CheckParameterSyntax(filled_hvp)
5647 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5649 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5651 if self.primary_offline and self.op.ignore_offline_nodes:
5652 self.proc.LogWarning("Ignoring offline primary node")
5654 if self.op.hvparams or self.op.beparams:
5655 self.proc.LogWarning("Overridden parameters are ignored")
5657 _CheckNodeOnline(self, instance.primary_node)
5659 bep = self.cfg.GetClusterInfo().FillBE(instance)
5661 # check bridges existence
5662 _CheckInstanceBridgesExist(self, instance)
5664 remote_info = self.rpc.call_instance_info(instance.primary_node,
5666 instance.hypervisor)
5667 remote_info.Raise("Error checking node %s" % instance.primary_node,
5668 prereq=True, ecode=errors.ECODE_ENVIRON)
5669 if not remote_info.payload: # not running already
5670 _CheckNodeFreeMemory(self, instance.primary_node,
5671 "starting instance %s" % instance.name,
5672 bep[constants.BE_MEMORY], instance.hypervisor)
5674 def Exec(self, feedback_fn):
5675 """Start the instance.
5678 instance = self.instance
5679 force = self.op.force
5681 if not self.op.no_remember:
5682 self.cfg.MarkInstanceUp(instance.name)
5684 if self.primary_offline:
5685 assert self.op.ignore_offline_nodes
5686 self.proc.LogInfo("Primary node offline, marked instance as started")
5688 node_current = instance.primary_node
5690 _StartInstanceDisks(self, instance, force)
5692 result = self.rpc.call_instance_start(node_current, instance,
5693 self.op.hvparams, self.op.beparams)
5694 msg = result.fail_msg
5696 _ShutdownInstanceDisks(self, instance)
5697 raise errors.OpExecError("Could not start instance: %s" % msg)
5700 class LUInstanceReboot(LogicalUnit):
5701 """Reboot an instance.
5704 HPATH = "instance-reboot"
5705 HTYPE = constants.HTYPE_INSTANCE
5708 def ExpandNames(self):
5709 self._ExpandAndLockInstance()
5711 def BuildHooksEnv(self):
5714 This runs on master, primary and secondary nodes of the instance.
5718 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5719 "REBOOT_TYPE": self.op.reboot_type,
5720 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5723 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5727 def BuildHooksNodes(self):
5728 """Build hooks nodes.
5731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5734 def CheckPrereq(self):
5735 """Check prerequisites.
5737 This checks that the instance is in the cluster.
5740 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5741 assert self.instance is not None, \
5742 "Cannot retrieve locked instance %s" % self.op.instance_name
5744 _CheckNodeOnline(self, instance.primary_node)
5746 # check bridges existence
5747 _CheckInstanceBridgesExist(self, instance)
5749 def Exec(self, feedback_fn):
5750 """Reboot the instance.
5753 instance = self.instance
5754 ignore_secondaries = self.op.ignore_secondaries
5755 reboot_type = self.op.reboot_type
5757 remote_info = self.rpc.call_instance_info(instance.primary_node,
5759 instance.hypervisor)
5760 remote_info.Raise("Error checking node %s" % instance.primary_node)
5761 instance_running = bool(remote_info.payload)
5763 node_current = instance.primary_node
5765 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5766 constants.INSTANCE_REBOOT_HARD]:
5767 for disk in instance.disks:
5768 self.cfg.SetDiskID(disk, node_current)
5769 result = self.rpc.call_instance_reboot(node_current, instance,
5771 self.op.shutdown_timeout)
5772 result.Raise("Could not reboot instance")
5774 if instance_running:
5775 result = self.rpc.call_instance_shutdown(node_current, instance,
5776 self.op.shutdown_timeout)
5777 result.Raise("Could not shutdown instance for full reboot")
5778 _ShutdownInstanceDisks(self, instance)
5780 self.LogInfo("Instance %s was already stopped, starting now",
5782 _StartInstanceDisks(self, instance, ignore_secondaries)
5783 result = self.rpc.call_instance_start(node_current, instance, None, None)
5784 msg = result.fail_msg
5786 _ShutdownInstanceDisks(self, instance)
5787 raise errors.OpExecError("Could not start instance for"
5788 " full reboot: %s" % msg)
5790 self.cfg.MarkInstanceUp(instance.name)
5793 class LUInstanceShutdown(LogicalUnit):
5794 """Shutdown an instance.
5797 HPATH = "instance-stop"
5798 HTYPE = constants.HTYPE_INSTANCE
5801 def ExpandNames(self):
5802 self._ExpandAndLockInstance()
5804 def BuildHooksEnv(self):
5807 This runs on master, primary and secondary nodes of the instance.
5810 env = _BuildInstanceHookEnvByObject(self, self.instance)
5811 env["TIMEOUT"] = self.op.timeout
5814 def BuildHooksNodes(self):
5815 """Build hooks nodes.
5818 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5821 def CheckPrereq(self):
5822 """Check prerequisites.
5824 This checks that the instance is in the cluster.
5827 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5828 assert self.instance is not None, \
5829 "Cannot retrieve locked instance %s" % self.op.instance_name
5831 self.primary_offline = \
5832 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5834 if self.primary_offline and self.op.ignore_offline_nodes:
5835 self.proc.LogWarning("Ignoring offline primary node")
5837 _CheckNodeOnline(self, self.instance.primary_node)
5839 def Exec(self, feedback_fn):
5840 """Shutdown the instance.
5843 instance = self.instance
5844 node_current = instance.primary_node
5845 timeout = self.op.timeout
5847 if not self.op.no_remember:
5848 self.cfg.MarkInstanceDown(instance.name)
5850 if self.primary_offline:
5851 assert self.op.ignore_offline_nodes
5852 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5854 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5855 msg = result.fail_msg
5857 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5859 _ShutdownInstanceDisks(self, instance)
5862 class LUInstanceReinstall(LogicalUnit):
5863 """Reinstall an instance.
5866 HPATH = "instance-reinstall"
5867 HTYPE = constants.HTYPE_INSTANCE
5870 def ExpandNames(self):
5871 self._ExpandAndLockInstance()
5873 def BuildHooksEnv(self):
5876 This runs on master, primary and secondary nodes of the instance.
5879 return _BuildInstanceHookEnvByObject(self, self.instance)
5881 def BuildHooksNodes(self):
5882 """Build hooks nodes.
5885 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5888 def CheckPrereq(self):
5889 """Check prerequisites.
5891 This checks that the instance is in the cluster and is not running.
5894 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5895 assert instance is not None, \
5896 "Cannot retrieve locked instance %s" % self.op.instance_name
5897 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5898 " offline, cannot reinstall")
5899 for node in instance.secondary_nodes:
5900 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5901 " cannot reinstall")
5903 if instance.disk_template == constants.DT_DISKLESS:
5904 raise errors.OpPrereqError("Instance '%s' has no disks" %
5905 self.op.instance_name,
5907 _CheckInstanceDown(self, instance, "cannot reinstall")
5909 if self.op.os_type is not None:
5911 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5912 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5913 instance_os = self.op.os_type
5915 instance_os = instance.os
5917 nodelist = list(instance.all_nodes)
5919 if self.op.osparams:
5920 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5921 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5922 self.os_inst = i_osdict # the new dict (without defaults)
5926 self.instance = instance
5928 def Exec(self, feedback_fn):
5929 """Reinstall the instance.
5932 inst = self.instance
5934 if self.op.os_type is not None:
5935 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5936 inst.os = self.op.os_type
5937 # Write to configuration
5938 self.cfg.Update(inst, feedback_fn)
5940 _StartInstanceDisks(self, inst, None)
5942 feedback_fn("Running the instance OS create scripts...")
5943 # FIXME: pass debug option from opcode to backend
5944 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5945 self.op.debug_level,
5946 osparams=self.os_inst)
5947 result.Raise("Could not install OS for instance %s on node %s" %
5948 (inst.name, inst.primary_node))
5950 _ShutdownInstanceDisks(self, inst)
5953 class LUInstanceRecreateDisks(LogicalUnit):
5954 """Recreate an instance's missing disks.
5957 HPATH = "instance-recreate-disks"
5958 HTYPE = constants.HTYPE_INSTANCE
5961 def CheckArguments(self):
5962 # normalise the disk list
5963 self.op.disks = sorted(frozenset(self.op.disks))
5965 def ExpandNames(self):
5966 self._ExpandAndLockInstance()
5967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5969 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5970 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5972 self.needed_locks[locking.LEVEL_NODE] = []
5974 def DeclareLocks(self, level):
5975 if level == locking.LEVEL_NODE:
5976 # if we replace the nodes, we only need to lock the old primary,
5977 # otherwise we need to lock all nodes for disk re-creation
5978 primary_only = bool(self.op.nodes)
5979 self._LockInstancesNodes(primary_only=primary_only)
5981 def BuildHooksEnv(self):
5984 This runs on master, primary and secondary nodes of the instance.
5987 return _BuildInstanceHookEnvByObject(self, self.instance)
5989 def BuildHooksNodes(self):
5990 """Build hooks nodes.
5993 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5996 def CheckPrereq(self):
5997 """Check prerequisites.
5999 This checks that the instance is in the cluster and is not running.
6002 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6003 assert instance is not None, \
6004 "Cannot retrieve locked instance %s" % self.op.instance_name
6006 if len(self.op.nodes) != len(instance.all_nodes):
6007 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6008 " %d replacement nodes were specified" %
6009 (instance.name, len(instance.all_nodes),
6010 len(self.op.nodes)),
6012 assert instance.disk_template != constants.DT_DRBD8 or \
6013 len(self.op.nodes) == 2
6014 assert instance.disk_template != constants.DT_PLAIN or \
6015 len(self.op.nodes) == 1
6016 primary_node = self.op.nodes[0]
6018 primary_node = instance.primary_node
6019 _CheckNodeOnline(self, primary_node)
6021 if instance.disk_template == constants.DT_DISKLESS:
6022 raise errors.OpPrereqError("Instance '%s' has no disks" %
6023 self.op.instance_name, errors.ECODE_INVAL)
6024 # if we replace nodes *and* the old primary is offline, we don't
6026 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6027 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6028 if not (self.op.nodes and old_pnode.offline):
6029 _CheckInstanceDown(self, instance, "cannot recreate disks")
6031 if not self.op.disks:
6032 self.op.disks = range(len(instance.disks))
6034 for idx in self.op.disks:
6035 if idx >= len(instance.disks):
6036 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6038 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6039 raise errors.OpPrereqError("Can't recreate disks partially and"
6040 " change the nodes at the same time",
6042 self.instance = instance
6044 def Exec(self, feedback_fn):
6045 """Recreate the disks.
6048 # change primary node, if needed
6050 self.instance.primary_node = self.op.nodes[0]
6051 self.LogWarning("Changing the instance's nodes, you will have to"
6052 " remove any disks left on the older nodes manually")
6055 for idx, disk in enumerate(self.instance.disks):
6056 if idx not in self.op.disks: # disk idx has not been passed in
6059 # update secondaries for disks, if needed
6061 if disk.dev_type == constants.LD_DRBD8:
6062 # need to update the nodes
6063 assert len(self.op.nodes) == 2
6064 logical_id = list(disk.logical_id)
6065 logical_id[0] = self.op.nodes[0]
6066 logical_id[1] = self.op.nodes[1]
6067 disk.logical_id = tuple(logical_id)
6070 self.cfg.Update(self.instance, feedback_fn)
6072 _CreateDisks(self, self.instance, to_skip=to_skip)
6075 class LUInstanceRename(LogicalUnit):
6076 """Rename an instance.
6079 HPATH = "instance-rename"
6080 HTYPE = constants.HTYPE_INSTANCE
6082 def CheckArguments(self):
6086 if self.op.ip_check and not self.op.name_check:
6087 # TODO: make the ip check more flexible and not depend on the name check
6088 raise errors.OpPrereqError("IP address check requires a name check",
6091 def BuildHooksEnv(self):
6094 This runs on master, primary and secondary nodes of the instance.
6097 env = _BuildInstanceHookEnvByObject(self, self.instance)
6098 env["INSTANCE_NEW_NAME"] = self.op.new_name
6101 def BuildHooksNodes(self):
6102 """Build hooks nodes.
6105 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6108 def CheckPrereq(self):
6109 """Check prerequisites.
6111 This checks that the instance is in the cluster and is not running.
6114 self.op.instance_name = _ExpandInstanceName(self.cfg,
6115 self.op.instance_name)
6116 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6117 assert instance is not None
6118 _CheckNodeOnline(self, instance.primary_node)
6119 _CheckInstanceDown(self, instance, "cannot rename")
6120 self.instance = instance
6122 new_name = self.op.new_name
6123 if self.op.name_check:
6124 hostname = netutils.GetHostname(name=new_name)
6125 if hostname != new_name:
6126 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6128 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6129 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6130 " same as given hostname '%s'") %
6131 (hostname.name, self.op.new_name),
6133 new_name = self.op.new_name = hostname.name
6134 if (self.op.ip_check and
6135 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6136 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6137 (hostname.ip, new_name),
6138 errors.ECODE_NOTUNIQUE)
6140 instance_list = self.cfg.GetInstanceList()
6141 if new_name in instance_list and new_name != instance.name:
6142 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6143 new_name, errors.ECODE_EXISTS)
6145 def Exec(self, feedback_fn):
6146 """Rename the instance.
6149 inst = self.instance
6150 old_name = inst.name
6152 rename_file_storage = False
6153 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
6154 self.op.new_name != inst.name):
6155 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6156 rename_file_storage = True
6158 self.cfg.RenameInstance(inst.name, self.op.new_name)
6159 # Change the instance lock. This is definitely safe while we hold the BGL.
6160 # Otherwise the new lock would have to be added in acquired mode.
6162 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6163 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6165 # re-read the instance from the configuration after rename
6166 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6168 if rename_file_storage:
6169 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6170 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6171 old_file_storage_dir,
6172 new_file_storage_dir)
6173 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6174 " (but the instance has been renamed in Ganeti)" %
6175 (inst.primary_node, old_file_storage_dir,
6176 new_file_storage_dir))
6178 _StartInstanceDisks(self, inst, None)
6180 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6181 old_name, self.op.debug_level)
6182 msg = result.fail_msg
6184 msg = ("Could not run OS rename script for instance %s on node %s"
6185 " (but the instance has been renamed in Ganeti): %s" %
6186 (inst.name, inst.primary_node, msg))
6187 self.proc.LogWarning(msg)
6189 _ShutdownInstanceDisks(self, inst)
6194 class LUInstanceRemove(LogicalUnit):
6195 """Remove an instance.
6198 HPATH = "instance-remove"
6199 HTYPE = constants.HTYPE_INSTANCE
6202 def ExpandNames(self):
6203 self._ExpandAndLockInstance()
6204 self.needed_locks[locking.LEVEL_NODE] = []
6205 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6207 def DeclareLocks(self, level):
6208 if level == locking.LEVEL_NODE:
6209 self._LockInstancesNodes()
6211 def BuildHooksEnv(self):
6214 This runs on master, primary and secondary nodes of the instance.
6217 env = _BuildInstanceHookEnvByObject(self, self.instance)
6218 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6221 def BuildHooksNodes(self):
6222 """Build hooks nodes.
6225 nl = [self.cfg.GetMasterNode()]
6226 nl_post = list(self.instance.all_nodes) + nl
6227 return (nl, nl_post)
6229 def CheckPrereq(self):
6230 """Check prerequisites.
6232 This checks that the instance is in the cluster.
6235 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6236 assert self.instance is not None, \
6237 "Cannot retrieve locked instance %s" % self.op.instance_name
6239 def Exec(self, feedback_fn):
6240 """Remove the instance.
6243 instance = self.instance
6244 logging.info("Shutting down instance %s on node %s",
6245 instance.name, instance.primary_node)
6247 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6248 self.op.shutdown_timeout)
6249 msg = result.fail_msg
6251 if self.op.ignore_failures:
6252 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6254 raise errors.OpExecError("Could not shutdown instance %s on"
6256 (instance.name, instance.primary_node, msg))
6258 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6261 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6262 """Utility function to remove an instance.
6265 logging.info("Removing block devices for instance %s", instance.name)
6267 if not _RemoveDisks(lu, instance):
6268 if not ignore_failures:
6269 raise errors.OpExecError("Can't remove instance's disks")
6270 feedback_fn("Warning: can't remove instance's disks")
6272 logging.info("Removing instance %s out of cluster config", instance.name)
6274 lu.cfg.RemoveInstance(instance.name)
6276 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6277 "Instance lock removal conflict"
6279 # Remove lock for the instance
6280 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6283 class LUInstanceQuery(NoHooksLU):
6284 """Logical unit for querying instances.
6287 # pylint: disable-msg=W0142
6290 def CheckArguments(self):
6291 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6292 self.op.output_fields, self.op.use_locking)
6294 def ExpandNames(self):
6295 self.iq.ExpandNames(self)
6297 def DeclareLocks(self, level):
6298 self.iq.DeclareLocks(self, level)
6300 def Exec(self, feedback_fn):
6301 return self.iq.OldStyleQuery(self)
6304 class LUInstanceFailover(LogicalUnit):
6305 """Failover an instance.
6308 HPATH = "instance-failover"
6309 HTYPE = constants.HTYPE_INSTANCE
6312 def CheckArguments(self):
6313 """Check the arguments.
6316 self.iallocator = getattr(self.op, "iallocator", None)
6317 self.target_node = getattr(self.op, "target_node", None)
6319 def ExpandNames(self):
6320 self._ExpandAndLockInstance()
6322 if self.op.target_node is not None:
6323 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6325 self.needed_locks[locking.LEVEL_NODE] = []
6326 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6328 ignore_consistency = self.op.ignore_consistency
6329 shutdown_timeout = self.op.shutdown_timeout
6330 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6333 ignore_consistency=ignore_consistency,
6334 shutdown_timeout=shutdown_timeout)
6335 self.tasklets = [self._migrater]
6337 def DeclareLocks(self, level):
6338 if level == locking.LEVEL_NODE:
6339 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6340 if instance.disk_template in constants.DTS_EXT_MIRROR:
6341 if self.op.target_node is None:
6342 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6344 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6345 self.op.target_node]
6346 del self.recalculate_locks[locking.LEVEL_NODE]
6348 self._LockInstancesNodes()
6350 def BuildHooksEnv(self):
6353 This runs on master, primary and secondary nodes of the instance.
6356 instance = self._migrater.instance
6357 source_node = instance.primary_node
6358 target_node = self.op.target_node
6360 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6361 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6362 "OLD_PRIMARY": source_node,
6363 "NEW_PRIMARY": target_node,
6366 if instance.disk_template in constants.DTS_INT_MIRROR:
6367 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6368 env["NEW_SECONDARY"] = source_node
6370 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6372 env.update(_BuildInstanceHookEnvByObject(self, instance))
6376 def BuildHooksNodes(self):
6377 """Build hooks nodes.
6380 instance = self._migrater.instance
6381 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6382 return (nl, nl + [instance.primary_node])
6385 class LUInstanceMigrate(LogicalUnit):
6386 """Migrate an instance.
6388 This is migration without shutting down, compared to the failover,
6389 which is done with shutdown.
6392 HPATH = "instance-migrate"
6393 HTYPE = constants.HTYPE_INSTANCE
6396 def ExpandNames(self):
6397 self._ExpandAndLockInstance()
6399 if self.op.target_node is not None:
6400 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6402 self.needed_locks[locking.LEVEL_NODE] = []
6403 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6405 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6406 cleanup=self.op.cleanup,
6408 fallback=self.op.allow_failover)
6409 self.tasklets = [self._migrater]
6411 def DeclareLocks(self, level):
6412 if level == locking.LEVEL_NODE:
6413 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6414 if instance.disk_template in constants.DTS_EXT_MIRROR:
6415 if self.op.target_node is None:
6416 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6418 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6419 self.op.target_node]
6420 del self.recalculate_locks[locking.LEVEL_NODE]
6422 self._LockInstancesNodes()
6424 def BuildHooksEnv(self):
6427 This runs on master, primary and secondary nodes of the instance.
6430 instance = self._migrater.instance
6431 source_node = instance.primary_node
6432 target_node = self.op.target_node
6433 env = _BuildInstanceHookEnvByObject(self, instance)
6435 "MIGRATE_LIVE": self._migrater.live,
6436 "MIGRATE_CLEANUP": self.op.cleanup,
6437 "OLD_PRIMARY": source_node,
6438 "NEW_PRIMARY": target_node,
6441 if instance.disk_template in constants.DTS_INT_MIRROR:
6442 env["OLD_SECONDARY"] = target_node
6443 env["NEW_SECONDARY"] = source_node
6445 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6449 def BuildHooksNodes(self):
6450 """Build hooks nodes.
6453 instance = self._migrater.instance
6454 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6455 return (nl, nl + [instance.primary_node])
6458 class LUInstanceMove(LogicalUnit):
6459 """Move an instance by data-copying.
6462 HPATH = "instance-move"
6463 HTYPE = constants.HTYPE_INSTANCE
6466 def ExpandNames(self):
6467 self._ExpandAndLockInstance()
6468 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6469 self.op.target_node = target_node
6470 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6471 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6473 def DeclareLocks(self, level):
6474 if level == locking.LEVEL_NODE:
6475 self._LockInstancesNodes(primary_only=True)
6477 def BuildHooksEnv(self):
6480 This runs on master, primary and secondary nodes of the instance.
6484 "TARGET_NODE": self.op.target_node,
6485 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6487 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6490 def BuildHooksNodes(self):
6491 """Build hooks nodes.
6495 self.cfg.GetMasterNode(),
6496 self.instance.primary_node,
6497 self.op.target_node,
6501 def CheckPrereq(self):
6502 """Check prerequisites.
6504 This checks that the instance is in the cluster.
6507 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6508 assert self.instance is not None, \
6509 "Cannot retrieve locked instance %s" % self.op.instance_name
6511 node = self.cfg.GetNodeInfo(self.op.target_node)
6512 assert node is not None, \
6513 "Cannot retrieve locked node %s" % self.op.target_node
6515 self.target_node = target_node = node.name
6517 if target_node == instance.primary_node:
6518 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6519 (instance.name, target_node),
6522 bep = self.cfg.GetClusterInfo().FillBE(instance)
6524 for idx, dsk in enumerate(instance.disks):
6525 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6526 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6527 " cannot copy" % idx, errors.ECODE_STATE)
6529 _CheckNodeOnline(self, target_node)
6530 _CheckNodeNotDrained(self, target_node)
6531 _CheckNodeVmCapable(self, target_node)
6533 if instance.admin_up:
6534 # check memory requirements on the secondary node
6535 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6536 instance.name, bep[constants.BE_MEMORY],
6537 instance.hypervisor)
6539 self.LogInfo("Not checking memory on the secondary node as"
6540 " instance will not be started")
6542 # check bridge existance
6543 _CheckInstanceBridgesExist(self, instance, node=target_node)
6545 def Exec(self, feedback_fn):
6546 """Move an instance.
6548 The move is done by shutting it down on its present node, copying
6549 the data over (slow) and starting it on the new node.
6552 instance = self.instance
6554 source_node = instance.primary_node
6555 target_node = self.target_node
6557 self.LogInfo("Shutting down instance %s on source node %s",
6558 instance.name, source_node)
6560 result = self.rpc.call_instance_shutdown(source_node, instance,
6561 self.op.shutdown_timeout)
6562 msg = result.fail_msg
6564 if self.op.ignore_consistency:
6565 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6566 " Proceeding anyway. Please make sure node"
6567 " %s is down. Error details: %s",
6568 instance.name, source_node, source_node, msg)
6570 raise errors.OpExecError("Could not shutdown instance %s on"
6572 (instance.name, source_node, msg))
6574 # create the target disks
6576 _CreateDisks(self, instance, target_node=target_node)
6577 except errors.OpExecError:
6578 self.LogWarning("Device creation failed, reverting...")
6580 _RemoveDisks(self, instance, target_node=target_node)
6582 self.cfg.ReleaseDRBDMinors(instance.name)
6585 cluster_name = self.cfg.GetClusterInfo().cluster_name
6588 # activate, get path, copy the data over
6589 for idx, disk in enumerate(instance.disks):
6590 self.LogInfo("Copying data for disk %d", idx)
6591 result = self.rpc.call_blockdev_assemble(target_node, disk,
6592 instance.name, True, idx)
6594 self.LogWarning("Can't assemble newly created disk %d: %s",
6595 idx, result.fail_msg)
6596 errs.append(result.fail_msg)
6598 dev_path = result.payload
6599 result = self.rpc.call_blockdev_export(source_node, disk,
6600 target_node, dev_path,
6603 self.LogWarning("Can't copy data over for disk %d: %s",
6604 idx, result.fail_msg)
6605 errs.append(result.fail_msg)
6609 self.LogWarning("Some disks failed to copy, aborting")
6611 _RemoveDisks(self, instance, target_node=target_node)
6613 self.cfg.ReleaseDRBDMinors(instance.name)
6614 raise errors.OpExecError("Errors during disk copy: %s" %
6617 instance.primary_node = target_node
6618 self.cfg.Update(instance, feedback_fn)
6620 self.LogInfo("Removing the disks on the original node")
6621 _RemoveDisks(self, instance, target_node=source_node)
6623 # Only start the instance if it's marked as up
6624 if instance.admin_up:
6625 self.LogInfo("Starting instance %s on node %s",
6626 instance.name, target_node)
6628 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6629 ignore_secondaries=True)
6631 _ShutdownInstanceDisks(self, instance)
6632 raise errors.OpExecError("Can't activate the instance's disks")
6634 result = self.rpc.call_instance_start(target_node, instance, None, None)
6635 msg = result.fail_msg
6637 _ShutdownInstanceDisks(self, instance)
6638 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6639 (instance.name, target_node, msg))
6642 class LUNodeMigrate(LogicalUnit):
6643 """Migrate all instances from a node.
6646 HPATH = "node-migrate"
6647 HTYPE = constants.HTYPE_NODE
6650 def CheckArguments(self):
6653 def ExpandNames(self):
6654 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6656 self.needed_locks = {}
6658 # Create tasklets for migrating instances for all instances on this node
6662 self.lock_all_nodes = False
6664 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6665 logging.debug("Migrating instance %s", inst.name)
6666 names.append(inst.name)
6668 tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6670 if inst.disk_template in constants.DTS_EXT_MIRROR:
6671 # We need to lock all nodes, as the iallocator will choose the
6672 # destination nodes afterwards
6673 self.lock_all_nodes = True
6675 self.tasklets = tasklets
6677 # Declare node locks
6678 if self.lock_all_nodes:
6679 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6681 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6682 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6684 # Declare instance locks
6685 self.needed_locks[locking.LEVEL_INSTANCE] = names
6687 def DeclareLocks(self, level):
6688 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6689 self._LockInstancesNodes()
6691 def BuildHooksEnv(self):
6694 This runs on the master, the primary and all the secondaries.
6698 "NODE_NAME": self.op.node_name,
6701 def BuildHooksNodes(self):
6702 """Build hooks nodes.
6705 nl = [self.cfg.GetMasterNode()]
6709 class TLMigrateInstance(Tasklet):
6710 """Tasklet class for instance migration.
6713 @ivar live: whether the migration will be done live or non-live;
6714 this variable is initalized only after CheckPrereq has run
6715 @type cleanup: boolean
6716 @ivar cleanup: Wheater we cleanup from a failed migration
6717 @type iallocator: string
6718 @ivar iallocator: The iallocator used to determine target_node
6719 @type target_node: string
6720 @ivar target_node: If given, the target_node to reallocate the instance to
6721 @type failover: boolean
6722 @ivar failover: Whether operation results in failover or migration
6723 @type fallback: boolean
6724 @ivar fallback: Whether fallback to failover is allowed if migration not
6726 @type ignore_consistency: boolean
6727 @ivar ignore_consistency: Wheter we should ignore consistency between source
6729 @type shutdown_timeout: int
6730 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6733 def __init__(self, lu, instance_name, cleanup=False,
6734 failover=False, fallback=False,
6735 ignore_consistency=False,
6736 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6737 """Initializes this class.
6740 Tasklet.__init__(self, lu)
6743 self.instance_name = instance_name
6744 self.cleanup = cleanup
6745 self.live = False # will be overridden later
6746 self.failover = failover
6747 self.fallback = fallback
6748 self.ignore_consistency = ignore_consistency
6749 self.shutdown_timeout = shutdown_timeout
6751 def CheckPrereq(self):
6752 """Check prerequisites.
6754 This checks that the instance is in the cluster.
6757 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6758 instance = self.cfg.GetInstanceInfo(instance_name)
6759 assert instance is not None
6760 self.instance = instance
6762 if (not self.cleanup and not instance.admin_up and not self.failover and
6764 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6766 self.failover = True
6768 if instance.disk_template not in constants.DTS_MIRRORED:
6773 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6774 " %s" % (instance.disk_template, text),
6777 if instance.disk_template in constants.DTS_EXT_MIRROR:
6778 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6780 if self.lu.op.iallocator:
6781 self._RunAllocator()
6783 # We set set self.target_node as it is required by
6785 self.target_node = self.lu.op.target_node
6787 # self.target_node is already populated, either directly or by the
6789 target_node = self.target_node
6790 if self.target_node == instance.primary_node:
6791 raise errors.OpPrereqError("Cannot migrate instance %s"
6792 " to its primary (%s)" %
6793 (instance.name, instance.primary_node))
6795 if len(self.lu.tasklets) == 1:
6796 # It is safe to release locks only when we're the only tasklet
6798 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6799 keep=[instance.primary_node, self.target_node])
6802 secondary_nodes = instance.secondary_nodes
6803 if not secondary_nodes:
6804 raise errors.ConfigurationError("No secondary node but using"
6805 " %s disk template" %
6806 instance.disk_template)
6807 target_node = secondary_nodes[0]
6808 if self.lu.op.iallocator or (self.lu.op.target_node and
6809 self.lu.op.target_node != target_node):
6811 text = "failed over"
6814 raise errors.OpPrereqError("Instances with disk template %s cannot"
6815 " be %s to arbitrary nodes"
6816 " (neither an iallocator nor a target"
6817 " node can be passed)" %
6818 (instance.disk_template, text),
6821 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6823 # check memory requirements on the secondary node
6824 if not self.failover or instance.admin_up:
6825 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6826 instance.name, i_be[constants.BE_MEMORY],
6827 instance.hypervisor)
6829 self.lu.LogInfo("Not checking memory on the secondary node as"
6830 " instance will not be started")
6832 # check bridge existance
6833 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6835 if not self.cleanup:
6836 _CheckNodeNotDrained(self.lu, target_node)
6837 if not self.failover:
6838 result = self.rpc.call_instance_migratable(instance.primary_node,
6840 if result.fail_msg and self.fallback:
6841 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6843 self.failover = True
6845 result.Raise("Can't migrate, please use failover",
6846 prereq=True, ecode=errors.ECODE_STATE)
6848 assert not (self.failover and self.cleanup)
6850 if not self.failover:
6851 if self.lu.op.live is not None and self.lu.op.mode is not None:
6852 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6853 " parameters are accepted",
6855 if self.lu.op.live is not None:
6857 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6859 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6860 # reset the 'live' parameter to None so that repeated
6861 # invocations of CheckPrereq do not raise an exception
6862 self.lu.op.live = None
6863 elif self.lu.op.mode is None:
6864 # read the default value from the hypervisor
6865 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6867 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6869 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6871 # Failover is never live
6874 def _RunAllocator(self):
6875 """Run the allocator based on input opcode.
6878 ial = IAllocator(self.cfg, self.rpc,
6879 mode=constants.IALLOCATOR_MODE_RELOC,
6880 name=self.instance_name,
6881 # TODO See why hail breaks with a single node below
6882 relocate_from=[self.instance.primary_node,
6883 self.instance.primary_node],
6886 ial.Run(self.lu.op.iallocator)
6889 raise errors.OpPrereqError("Can't compute nodes using"
6890 " iallocator '%s': %s" %
6891 (self.lu.op.iallocator, ial.info),
6893 if len(ial.result) != ial.required_nodes:
6894 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6895 " of nodes (%s), required %s" %
6896 (self.lu.op.iallocator, len(ial.result),
6897 ial.required_nodes), errors.ECODE_FAULT)
6898 self.target_node = ial.result[0]
6899 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6900 self.instance_name, self.lu.op.iallocator,
6901 utils.CommaJoin(ial.result))
6903 def _WaitUntilSync(self):
6904 """Poll with custom rpc for disk sync.
6906 This uses our own step-based rpc call.
6909 self.feedback_fn("* wait until resync is done")
6913 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6915 self.instance.disks)
6917 for node, nres in result.items():
6918 nres.Raise("Cannot resync disks on node %s" % node)
6919 node_done, node_percent = nres.payload
6920 all_done = all_done and node_done
6921 if node_percent is not None:
6922 min_percent = min(min_percent, node_percent)
6924 if min_percent < 100:
6925 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6928 def _EnsureSecondary(self, node):
6929 """Demote a node to secondary.
6932 self.feedback_fn("* switching node %s to secondary mode" % node)
6934 for dev in self.instance.disks:
6935 self.cfg.SetDiskID(dev, node)
6937 result = self.rpc.call_blockdev_close(node, self.instance.name,
6938 self.instance.disks)
6939 result.Raise("Cannot change disk to secondary on node %s" % node)
6941 def _GoStandalone(self):
6942 """Disconnect from the network.
6945 self.feedback_fn("* changing into standalone mode")
6946 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6947 self.instance.disks)
6948 for node, nres in result.items():
6949 nres.Raise("Cannot disconnect disks node %s" % node)
6951 def _GoReconnect(self, multimaster):
6952 """Reconnect to the network.
6958 msg = "single-master"
6959 self.feedback_fn("* changing disks into %s mode" % msg)
6960 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6961 self.instance.disks,
6962 self.instance.name, multimaster)
6963 for node, nres in result.items():
6964 nres.Raise("Cannot change disks config on node %s" % node)
6966 def _ExecCleanup(self):
6967 """Try to cleanup after a failed migration.
6969 The cleanup is done by:
6970 - check that the instance is running only on one node
6971 (and update the config if needed)
6972 - change disks on its secondary node to secondary
6973 - wait until disks are fully synchronized
6974 - disconnect from the network
6975 - change disks into single-master mode
6976 - wait again until disks are fully synchronized
6979 instance = self.instance
6980 target_node = self.target_node
6981 source_node = self.source_node
6983 # check running on only one node
6984 self.feedback_fn("* checking where the instance actually runs"
6985 " (if this hangs, the hypervisor might be in"
6987 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6988 for node, result in ins_l.items():
6989 result.Raise("Can't contact node %s" % node)
6991 runningon_source = instance.name in ins_l[source_node].payload
6992 runningon_target = instance.name in ins_l[target_node].payload
6994 if runningon_source and runningon_target:
6995 raise errors.OpExecError("Instance seems to be running on two nodes,"
6996 " or the hypervisor is confused; you will have"
6997 " to ensure manually that it runs only on one"
6998 " and restart this operation")
7000 if not (runningon_source or runningon_target):
7001 raise errors.OpExecError("Instance does not seem to be running at all;"
7002 " in this case it's safer to repair by"
7003 " running 'gnt-instance stop' to ensure disk"
7004 " shutdown, and then restarting it")
7006 if runningon_target:
7007 # the migration has actually succeeded, we need to update the config
7008 self.feedback_fn("* instance running on secondary node (%s),"
7009 " updating config" % target_node)
7010 instance.primary_node = target_node
7011 self.cfg.Update(instance, self.feedback_fn)
7012 demoted_node = source_node
7014 self.feedback_fn("* instance confirmed to be running on its"
7015 " primary node (%s)" % source_node)
7016 demoted_node = target_node
7018 if instance.disk_template in constants.DTS_INT_MIRROR:
7019 self._EnsureSecondary(demoted_node)
7021 self._WaitUntilSync()
7022 except errors.OpExecError:
7023 # we ignore here errors, since if the device is standalone, it
7024 # won't be able to sync
7026 self._GoStandalone()
7027 self._GoReconnect(False)
7028 self._WaitUntilSync()
7030 self.feedback_fn("* done")
7032 def _RevertDiskStatus(self):
7033 """Try to revert the disk status after a failed migration.
7036 target_node = self.target_node
7037 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7041 self._EnsureSecondary(target_node)
7042 self._GoStandalone()
7043 self._GoReconnect(False)
7044 self._WaitUntilSync()
7045 except errors.OpExecError, err:
7046 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7047 " please try to recover the instance manually;"
7048 " error '%s'" % str(err))
7050 def _AbortMigration(self):
7051 """Call the hypervisor code to abort a started migration.
7054 instance = self.instance
7055 target_node = self.target_node
7056 migration_info = self.migration_info
7058 abort_result = self.rpc.call_finalize_migration(target_node,
7062 abort_msg = abort_result.fail_msg
7064 logging.error("Aborting migration failed on target node %s: %s",
7065 target_node, abort_msg)
7066 # Don't raise an exception here, as we stil have to try to revert the
7067 # disk status, even if this step failed.
7069 def _ExecMigration(self):
7070 """Migrate an instance.
7072 The migrate is done by:
7073 - change the disks into dual-master mode
7074 - wait until disks are fully synchronized again
7075 - migrate the instance
7076 - change disks on the new secondary node (the old primary) to secondary
7077 - wait until disks are fully synchronized
7078 - change disks into single-master mode
7081 instance = self.instance
7082 target_node = self.target_node
7083 source_node = self.source_node
7085 self.feedback_fn("* checking disk consistency between source and target")
7086 for dev in instance.disks:
7087 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7088 raise errors.OpExecError("Disk %s is degraded or not fully"
7089 " synchronized on target node,"
7090 " aborting migration" % dev.iv_name)
7092 # First get the migration information from the remote node
7093 result = self.rpc.call_migration_info(source_node, instance)
7094 msg = result.fail_msg
7096 log_err = ("Failed fetching source migration information from %s: %s" %
7098 logging.error(log_err)
7099 raise errors.OpExecError(log_err)
7101 self.migration_info = migration_info = result.payload
7103 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7104 # Then switch the disks to master/master mode
7105 self._EnsureSecondary(target_node)
7106 self._GoStandalone()
7107 self._GoReconnect(True)
7108 self._WaitUntilSync()
7110 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7111 result = self.rpc.call_accept_instance(target_node,
7114 self.nodes_ip[target_node])
7116 msg = result.fail_msg
7118 logging.error("Instance pre-migration failed, trying to revert"
7119 " disk status: %s", msg)
7120 self.feedback_fn("Pre-migration failed, aborting")
7121 self._AbortMigration()
7122 self._RevertDiskStatus()
7123 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7124 (instance.name, msg))
7126 self.feedback_fn("* migrating instance to %s" % target_node)
7127 result = self.rpc.call_instance_migrate(source_node, instance,
7128 self.nodes_ip[target_node],
7130 msg = result.fail_msg
7132 logging.error("Instance migration failed, trying to revert"
7133 " disk status: %s", msg)
7134 self.feedback_fn("Migration failed, aborting")
7135 self._AbortMigration()
7136 self._RevertDiskStatus()
7137 raise errors.OpExecError("Could not migrate instance %s: %s" %
7138 (instance.name, msg))
7140 instance.primary_node = target_node
7141 # distribute new instance config to the other nodes
7142 self.cfg.Update(instance, self.feedback_fn)
7144 result = self.rpc.call_finalize_migration(target_node,
7148 msg = result.fail_msg
7150 logging.error("Instance migration succeeded, but finalization failed:"
7152 raise errors.OpExecError("Could not finalize instance migration: %s" %
7155 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7156 self._EnsureSecondary(source_node)
7157 self._WaitUntilSync()
7158 self._GoStandalone()
7159 self._GoReconnect(False)
7160 self._WaitUntilSync()
7162 self.feedback_fn("* done")
7164 def _ExecFailover(self):
7165 """Failover an instance.
7167 The failover is done by shutting it down on its present node and
7168 starting it on the secondary.
7171 instance = self.instance
7172 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7174 source_node = instance.primary_node
7175 target_node = self.target_node
7177 if instance.admin_up:
7178 self.feedback_fn("* checking disk consistency between source and target")
7179 for dev in instance.disks:
7180 # for drbd, these are drbd over lvm
7181 if not _CheckDiskConsistency(self, dev, target_node, False):
7182 if not self.ignore_consistency:
7183 raise errors.OpExecError("Disk %s is degraded on target node,"
7184 " aborting failover" % dev.iv_name)
7186 self.feedback_fn("* not checking disk consistency as instance is not"
7189 self.feedback_fn("* shutting down instance on source node")
7190 logging.info("Shutting down instance %s on node %s",
7191 instance.name, source_node)
7193 result = self.rpc.call_instance_shutdown(source_node, instance,
7194 self.shutdown_timeout)
7195 msg = result.fail_msg
7197 if self.ignore_consistency or primary_node.offline:
7198 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7199 " proceeding anyway; please make sure node"
7200 " %s is down; error details: %s",
7201 instance.name, source_node, source_node, msg)
7203 raise errors.OpExecError("Could not shutdown instance %s on"
7205 (instance.name, source_node, msg))
7207 self.feedback_fn("* deactivating the instance's disks on source node")
7208 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7209 raise errors.OpExecError("Can't shut down the instance's disks.")
7211 instance.primary_node = target_node
7212 # distribute new instance config to the other nodes
7213 self.cfg.Update(instance, self.feedback_fn)
7215 # Only start the instance if it's marked as up
7216 if instance.admin_up:
7217 self.feedback_fn("* activating the instance's disks on target node")
7218 logging.info("Starting instance %s on node %s",
7219 instance.name, target_node)
7221 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7222 ignore_secondaries=True)
7224 _ShutdownInstanceDisks(self, instance)
7225 raise errors.OpExecError("Can't activate the instance's disks")
7227 self.feedback_fn("* starting the instance on the target node")
7228 result = self.rpc.call_instance_start(target_node, instance, None, None)
7229 msg = result.fail_msg
7231 _ShutdownInstanceDisks(self, instance)
7232 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7233 (instance.name, target_node, msg))
7235 def Exec(self, feedback_fn):
7236 """Perform the migration.
7239 self.feedback_fn = feedback_fn
7240 self.source_node = self.instance.primary_node
7242 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7243 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7244 self.target_node = self.instance.secondary_nodes[0]
7245 # Otherwise self.target_node has been populated either
7246 # directly, or through an iallocator.
7248 self.all_nodes = [self.source_node, self.target_node]
7250 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7251 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7255 feedback_fn("Failover instance %s" % self.instance.name)
7256 self._ExecFailover()
7258 feedback_fn("Migrating instance %s" % self.instance.name)
7261 return self._ExecCleanup()
7263 return self._ExecMigration()
7266 def _CreateBlockDev(lu, node, instance, device, force_create,
7268 """Create a tree of block devices on a given node.
7270 If this device type has to be created on secondaries, create it and
7273 If not, just recurse to children keeping the same 'force' value.
7275 @param lu: the lu on whose behalf we execute
7276 @param node: the node on which to create the device
7277 @type instance: L{objects.Instance}
7278 @param instance: the instance which owns the device
7279 @type device: L{objects.Disk}
7280 @param device: the device to create
7281 @type force_create: boolean
7282 @param force_create: whether to force creation of this device; this
7283 will be change to True whenever we find a device which has
7284 CreateOnSecondary() attribute
7285 @param info: the extra 'metadata' we should attach to the device
7286 (this will be represented as a LVM tag)
7287 @type force_open: boolean
7288 @param force_open: this parameter will be passes to the
7289 L{backend.BlockdevCreate} function where it specifies
7290 whether we run on primary or not, and it affects both
7291 the child assembly and the device own Open() execution
7294 if device.CreateOnSecondary():
7298 for child in device.children:
7299 _CreateBlockDev(lu, node, instance, child, force_create,
7302 if not force_create:
7305 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7308 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7309 """Create a single block device on a given node.
7311 This will not recurse over children of the device, so they must be
7314 @param lu: the lu on whose behalf we execute
7315 @param node: the node on which to create the device
7316 @type instance: L{objects.Instance}
7317 @param instance: the instance which owns the device
7318 @type device: L{objects.Disk}
7319 @param device: the device to create
7320 @param info: the extra 'metadata' we should attach to the device
7321 (this will be represented as a LVM tag)
7322 @type force_open: boolean
7323 @param force_open: this parameter will be passes to the
7324 L{backend.BlockdevCreate} function where it specifies
7325 whether we run on primary or not, and it affects both
7326 the child assembly and the device own Open() execution
7329 lu.cfg.SetDiskID(device, node)
7330 result = lu.rpc.call_blockdev_create(node, device, device.size,
7331 instance.name, force_open, info)
7332 result.Raise("Can't create block device %s on"
7333 " node %s for instance %s" % (device, node, instance.name))
7334 if device.physical_id is None:
7335 device.physical_id = result.payload
7338 def _GenerateUniqueNames(lu, exts):
7339 """Generate a suitable LV name.
7341 This will generate a logical volume name for the given instance.
7346 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7347 results.append("%s%s" % (new_id, val))
7351 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7352 iv_name, p_minor, s_minor):
7353 """Generate a drbd8 device complete with its children.
7356 assert len(vgnames) == len(names) == 2
7357 port = lu.cfg.AllocatePort()
7358 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7359 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7360 logical_id=(vgnames[0], names[0]))
7361 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7362 logical_id=(vgnames[1], names[1]))
7363 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7364 logical_id=(primary, secondary, port,
7367 children=[dev_data, dev_meta],
7372 def _GenerateDiskTemplate(lu, template_name,
7373 instance_name, primary_node,
7374 secondary_nodes, disk_info,
7375 file_storage_dir, file_driver,
7376 base_index, feedback_fn):
7377 """Generate the entire disk layout for a given template type.
7380 #TODO: compute space requirements
7382 vgname = lu.cfg.GetVGName()
7383 disk_count = len(disk_info)
7385 if template_name == constants.DT_DISKLESS:
7387 elif template_name == constants.DT_PLAIN:
7388 if len(secondary_nodes) != 0:
7389 raise errors.ProgrammerError("Wrong template configuration")
7391 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7392 for i in range(disk_count)])
7393 for idx, disk in enumerate(disk_info):
7394 disk_index = idx + base_index
7395 vg = disk.get(constants.IDISK_VG, vgname)
7396 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7397 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7398 size=disk[constants.IDISK_SIZE],
7399 logical_id=(vg, names[idx]),
7400 iv_name="disk/%d" % disk_index,
7401 mode=disk[constants.IDISK_MODE])
7402 disks.append(disk_dev)
7403 elif template_name == constants.DT_DRBD8:
7404 if len(secondary_nodes) != 1:
7405 raise errors.ProgrammerError("Wrong template configuration")
7406 remote_node = secondary_nodes[0]
7407 minors = lu.cfg.AllocateDRBDMinor(
7408 [primary_node, remote_node] * len(disk_info), instance_name)
7411 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7412 for i in range(disk_count)]):
7413 names.append(lv_prefix + "_data")
7414 names.append(lv_prefix + "_meta")
7415 for idx, disk in enumerate(disk_info):
7416 disk_index = idx + base_index
7417 data_vg = disk.get(constants.IDISK_VG, vgname)
7418 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7419 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7420 disk[constants.IDISK_SIZE],
7422 names[idx * 2:idx * 2 + 2],
7423 "disk/%d" % disk_index,
7424 minors[idx * 2], minors[idx * 2 + 1])
7425 disk_dev.mode = disk[constants.IDISK_MODE]
7426 disks.append(disk_dev)
7427 elif template_name == constants.DT_FILE:
7428 if len(secondary_nodes) != 0:
7429 raise errors.ProgrammerError("Wrong template configuration")
7431 opcodes.RequireFileStorage()
7433 for idx, disk in enumerate(disk_info):
7434 disk_index = idx + base_index
7435 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7436 size=disk[constants.IDISK_SIZE],
7437 iv_name="disk/%d" % disk_index,
7438 logical_id=(file_driver,
7439 "%s/disk%d" % (file_storage_dir,
7441 mode=disk[constants.IDISK_MODE])
7442 disks.append(disk_dev)
7443 elif template_name == constants.DT_SHARED_FILE:
7444 if len(secondary_nodes) != 0:
7445 raise errors.ProgrammerError("Wrong template configuration")
7447 opcodes.RequireSharedFileStorage()
7449 for idx, disk in enumerate(disk_info):
7450 disk_index = idx + base_index
7451 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7452 size=disk[constants.IDISK_SIZE],
7453 iv_name="disk/%d" % disk_index,
7454 logical_id=(file_driver,
7455 "%s/disk%d" % (file_storage_dir,
7457 mode=disk[constants.IDISK_MODE])
7458 disks.append(disk_dev)
7459 elif template_name == constants.DT_BLOCK:
7460 if len(secondary_nodes) != 0:
7461 raise errors.ProgrammerError("Wrong template configuration")
7463 for idx, disk in enumerate(disk_info):
7464 disk_index = idx + base_index
7465 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7466 size=disk[constants.IDISK_SIZE],
7467 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7468 disk[constants.IDISK_ADOPT]),
7469 iv_name="disk/%d" % disk_index,
7470 mode=disk[constants.IDISK_MODE])
7471 disks.append(disk_dev)
7474 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7478 def _GetInstanceInfoText(instance):
7479 """Compute that text that should be added to the disk's metadata.
7482 return "originstname+%s" % instance.name
7485 def _CalcEta(time_taken, written, total_size):
7486 """Calculates the ETA based on size written and total size.
7488 @param time_taken: The time taken so far
7489 @param written: amount written so far
7490 @param total_size: The total size of data to be written
7491 @return: The remaining time in seconds
7494 avg_time = time_taken / float(written)
7495 return (total_size - written) * avg_time
7498 def _WipeDisks(lu, instance):
7499 """Wipes instance disks.
7501 @type lu: L{LogicalUnit}
7502 @param lu: the logical unit on whose behalf we execute
7503 @type instance: L{objects.Instance}
7504 @param instance: the instance whose disks we should create
7505 @return: the success of the wipe
7508 node = instance.primary_node
7510 for device in instance.disks:
7511 lu.cfg.SetDiskID(device, node)
7513 logging.info("Pause sync of instance %s disks", instance.name)
7514 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7516 for idx, success in enumerate(result.payload):
7518 logging.warn("pause-sync of instance %s for disks %d failed",
7522 for idx, device in enumerate(instance.disks):
7523 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7524 # MAX_WIPE_CHUNK at max
7525 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7526 constants.MIN_WIPE_CHUNK_PERCENT)
7527 # we _must_ make this an int, otherwise rounding errors will
7529 wipe_chunk_size = int(wipe_chunk_size)
7531 lu.LogInfo("* Wiping disk %d", idx)
7532 logging.info("Wiping disk %d for instance %s, node %s using"
7533 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7538 start_time = time.time()
7540 while offset < size:
7541 wipe_size = min(wipe_chunk_size, size - offset)
7542 logging.debug("Wiping disk %d, offset %s, chunk %s",
7543 idx, offset, wipe_size)
7544 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7545 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7546 (idx, offset, wipe_size))
7549 if now - last_output >= 60:
7550 eta = _CalcEta(now - start_time, offset, size)
7551 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7552 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7555 logging.info("Resume sync of instance %s disks", instance.name)
7557 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7559 for idx, success in enumerate(result.payload):
7561 lu.LogWarning("Resume sync of disk %d failed, please have a"
7562 " look at the status and troubleshoot the issue", idx)
7563 logging.warn("resume-sync of instance %s for disks %d failed",
7567 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7568 """Create all disks for an instance.
7570 This abstracts away some work from AddInstance.
7572 @type lu: L{LogicalUnit}
7573 @param lu: the logical unit on whose behalf we execute
7574 @type instance: L{objects.Instance}
7575 @param instance: the instance whose disks we should create
7577 @param to_skip: list of indices to skip
7578 @type target_node: string
7579 @param target_node: if passed, overrides the target node for creation
7581 @return: the success of the creation
7584 info = _GetInstanceInfoText(instance)
7585 if target_node is None:
7586 pnode = instance.primary_node
7587 all_nodes = instance.all_nodes
7592 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7593 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7594 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7596 result.Raise("Failed to create directory '%s' on"
7597 " node %s" % (file_storage_dir, pnode))
7599 # Note: this needs to be kept in sync with adding of disks in
7600 # LUInstanceSetParams
7601 for idx, device in enumerate(instance.disks):
7602 if to_skip and idx in to_skip:
7604 logging.info("Creating volume %s for instance %s",
7605 device.iv_name, instance.name)
7607 for node in all_nodes:
7608 f_create = node == pnode
7609 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7612 def _RemoveDisks(lu, instance, target_node=None):
7613 """Remove all disks for an instance.
7615 This abstracts away some work from `AddInstance()` and
7616 `RemoveInstance()`. Note that in case some of the devices couldn't
7617 be removed, the removal will continue with the other ones (compare
7618 with `_CreateDisks()`).
7620 @type lu: L{LogicalUnit}
7621 @param lu: the logical unit on whose behalf we execute
7622 @type instance: L{objects.Instance}
7623 @param instance: the instance whose disks we should remove
7624 @type target_node: string
7625 @param target_node: used to override the node on which to remove the disks
7627 @return: the success of the removal
7630 logging.info("Removing block devices for instance %s", instance.name)
7633 for device in instance.disks:
7635 edata = [(target_node, device)]
7637 edata = device.ComputeNodeTree(instance.primary_node)
7638 for node, disk in edata:
7639 lu.cfg.SetDiskID(disk, node)
7640 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7642 lu.LogWarning("Could not remove block device %s on node %s,"
7643 " continuing anyway: %s", device.iv_name, node, msg)
7646 if instance.disk_template == constants.DT_FILE:
7647 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7651 tgt = instance.primary_node
7652 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7654 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7655 file_storage_dir, instance.primary_node, result.fail_msg)
7661 def _ComputeDiskSizePerVG(disk_template, disks):
7662 """Compute disk size requirements in the volume group
7665 def _compute(disks, payload):
7666 """Universal algorithm.
7671 vgs[disk[constants.IDISK_VG]] = \
7672 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7676 # Required free disk space as a function of disk and swap space
7678 constants.DT_DISKLESS: {},
7679 constants.DT_PLAIN: _compute(disks, 0),
7680 # 128 MB are added for drbd metadata for each disk
7681 constants.DT_DRBD8: _compute(disks, 128),
7682 constants.DT_FILE: {},
7683 constants.DT_SHARED_FILE: {},
7686 if disk_template not in req_size_dict:
7687 raise errors.ProgrammerError("Disk template '%s' size requirement"
7688 " is unknown" % disk_template)
7690 return req_size_dict[disk_template]
7693 def _ComputeDiskSize(disk_template, disks):
7694 """Compute disk size requirements in the volume group
7697 # Required free disk space as a function of disk and swap space
7699 constants.DT_DISKLESS: None,
7700 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7701 # 128 MB are added for drbd metadata for each disk
7702 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7703 constants.DT_FILE: None,
7704 constants.DT_SHARED_FILE: 0,
7705 constants.DT_BLOCK: 0,
7708 if disk_template not in req_size_dict:
7709 raise errors.ProgrammerError("Disk template '%s' size requirement"
7710 " is unknown" % disk_template)
7712 return req_size_dict[disk_template]
7715 def _FilterVmNodes(lu, nodenames):
7716 """Filters out non-vm_capable nodes from a list.
7718 @type lu: L{LogicalUnit}
7719 @param lu: the logical unit for which we check
7720 @type nodenames: list
7721 @param nodenames: the list of nodes on which we should check
7723 @return: the list of vm-capable nodes
7726 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7727 return [name for name in nodenames if name not in vm_nodes]
7730 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7731 """Hypervisor parameter validation.
7733 This function abstract the hypervisor parameter validation to be
7734 used in both instance create and instance modify.
7736 @type lu: L{LogicalUnit}
7737 @param lu: the logical unit for which we check
7738 @type nodenames: list
7739 @param nodenames: the list of nodes on which we should check
7740 @type hvname: string
7741 @param hvname: the name of the hypervisor we should use
7742 @type hvparams: dict
7743 @param hvparams: the parameters which we need to check
7744 @raise errors.OpPrereqError: if the parameters are not valid
7747 nodenames = _FilterVmNodes(lu, nodenames)
7748 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7751 for node in nodenames:
7755 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7758 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7759 """OS parameters validation.
7761 @type lu: L{LogicalUnit}
7762 @param lu: the logical unit for which we check
7763 @type required: boolean
7764 @param required: whether the validation should fail if the OS is not
7766 @type nodenames: list
7767 @param nodenames: the list of nodes on which we should check
7768 @type osname: string
7769 @param osname: the name of the hypervisor we should use
7770 @type osparams: dict
7771 @param osparams: the parameters which we need to check
7772 @raise errors.OpPrereqError: if the parameters are not valid
7775 nodenames = _FilterVmNodes(lu, nodenames)
7776 result = lu.rpc.call_os_validate(required, nodenames, osname,
7777 [constants.OS_VALIDATE_PARAMETERS],
7779 for node, nres in result.items():
7780 # we don't check for offline cases since this should be run only
7781 # against the master node and/or an instance's nodes
7782 nres.Raise("OS Parameters validation failed on node %s" % node)
7783 if not nres.payload:
7784 lu.LogInfo("OS %s not found on node %s, validation skipped",
7788 class LUInstanceCreate(LogicalUnit):
7789 """Create an instance.
7792 HPATH = "instance-add"
7793 HTYPE = constants.HTYPE_INSTANCE
7796 def CheckArguments(self):
7800 # do not require name_check to ease forward/backward compatibility
7802 if self.op.no_install and self.op.start:
7803 self.LogInfo("No-installation mode selected, disabling startup")
7804 self.op.start = False
7805 # validate/normalize the instance name
7806 self.op.instance_name = \
7807 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7809 if self.op.ip_check and not self.op.name_check:
7810 # TODO: make the ip check more flexible and not depend on the name check
7811 raise errors.OpPrereqError("Cannot do IP address check without a name"
7812 " check", errors.ECODE_INVAL)
7814 # check nics' parameter names
7815 for nic in self.op.nics:
7816 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7818 # check disks. parameter names and consistent adopt/no-adopt strategy
7819 has_adopt = has_no_adopt = False
7820 for disk in self.op.disks:
7821 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7822 if constants.IDISK_ADOPT in disk:
7826 if has_adopt and has_no_adopt:
7827 raise errors.OpPrereqError("Either all disks are adopted or none is",
7830 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7831 raise errors.OpPrereqError("Disk adoption is not supported for the"
7832 " '%s' disk template" %
7833 self.op.disk_template,
7835 if self.op.iallocator is not None:
7836 raise errors.OpPrereqError("Disk adoption not allowed with an"
7837 " iallocator script", errors.ECODE_INVAL)
7838 if self.op.mode == constants.INSTANCE_IMPORT:
7839 raise errors.OpPrereqError("Disk adoption not allowed for"
7840 " instance import", errors.ECODE_INVAL)
7842 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7843 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7844 " but no 'adopt' parameter given" %
7845 self.op.disk_template,
7848 self.adopt_disks = has_adopt
7850 # instance name verification
7851 if self.op.name_check:
7852 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7853 self.op.instance_name = self.hostname1.name
7854 # used in CheckPrereq for ip ping check
7855 self.check_ip = self.hostname1.ip
7857 self.check_ip = None
7859 # file storage checks
7860 if (self.op.file_driver and
7861 not self.op.file_driver in constants.FILE_DRIVER):
7862 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7863 self.op.file_driver, errors.ECODE_INVAL)
7865 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7866 raise errors.OpPrereqError("File storage directory path not absolute",
7869 ### Node/iallocator related checks
7870 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7872 if self.op.pnode is not None:
7873 if self.op.disk_template in constants.DTS_INT_MIRROR:
7874 if self.op.snode is None:
7875 raise errors.OpPrereqError("The networked disk templates need"
7876 " a mirror node", errors.ECODE_INVAL)
7878 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7880 self.op.snode = None
7882 self._cds = _GetClusterDomainSecret()
7884 if self.op.mode == constants.INSTANCE_IMPORT:
7885 # On import force_variant must be True, because if we forced it at
7886 # initial install, our only chance when importing it back is that it
7888 self.op.force_variant = True
7890 if self.op.no_install:
7891 self.LogInfo("No-installation mode has no effect during import")
7893 elif self.op.mode == constants.INSTANCE_CREATE:
7894 if self.op.os_type is None:
7895 raise errors.OpPrereqError("No guest OS specified",
7897 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7898 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7899 " installation" % self.op.os_type,
7901 if self.op.disk_template is None:
7902 raise errors.OpPrereqError("No disk template specified",
7905 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7906 # Check handshake to ensure both clusters have the same domain secret
7907 src_handshake = self.op.source_handshake
7908 if not src_handshake:
7909 raise errors.OpPrereqError("Missing source handshake",
7912 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7915 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7918 # Load and check source CA
7919 self.source_x509_ca_pem = self.op.source_x509_ca
7920 if not self.source_x509_ca_pem:
7921 raise errors.OpPrereqError("Missing source X509 CA",
7925 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7927 except OpenSSL.crypto.Error, err:
7928 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7929 (err, ), errors.ECODE_INVAL)
7931 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7932 if errcode is not None:
7933 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7936 self.source_x509_ca = cert
7938 src_instance_name = self.op.source_instance_name
7939 if not src_instance_name:
7940 raise errors.OpPrereqError("Missing source instance name",
7943 self.source_instance_name = \
7944 netutils.GetHostname(name=src_instance_name).name
7947 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7948 self.op.mode, errors.ECODE_INVAL)
7950 def ExpandNames(self):
7951 """ExpandNames for CreateInstance.
7953 Figure out the right locks for instance creation.
7956 self.needed_locks = {}
7958 instance_name = self.op.instance_name
7959 # this is just a preventive check, but someone might still add this
7960 # instance in the meantime, and creation will fail at lock-add time
7961 if instance_name in self.cfg.GetInstanceList():
7962 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7963 instance_name, errors.ECODE_EXISTS)
7965 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7967 if self.op.iallocator:
7968 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7970 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7971 nodelist = [self.op.pnode]
7972 if self.op.snode is not None:
7973 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7974 nodelist.append(self.op.snode)
7975 self.needed_locks[locking.LEVEL_NODE] = nodelist
7977 # in case of import lock the source node too
7978 if self.op.mode == constants.INSTANCE_IMPORT:
7979 src_node = self.op.src_node
7980 src_path = self.op.src_path
7982 if src_path is None:
7983 self.op.src_path = src_path = self.op.instance_name
7985 if src_node is None:
7986 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7987 self.op.src_node = None
7988 if os.path.isabs(src_path):
7989 raise errors.OpPrereqError("Importing an instance from an absolute"
7990 " path requires a source node option",
7993 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7994 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7995 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7996 if not os.path.isabs(src_path):
7997 self.op.src_path = src_path = \
7998 utils.PathJoin(constants.EXPORT_DIR, src_path)
8000 def _RunAllocator(self):
8001 """Run the allocator based on input opcode.
8004 nics = [n.ToDict() for n in self.nics]
8005 ial = IAllocator(self.cfg, self.rpc,
8006 mode=constants.IALLOCATOR_MODE_ALLOC,
8007 name=self.op.instance_name,
8008 disk_template=self.op.disk_template,
8011 vcpus=self.be_full[constants.BE_VCPUS],
8012 mem_size=self.be_full[constants.BE_MEMORY],
8015 hypervisor=self.op.hypervisor,
8018 ial.Run(self.op.iallocator)
8021 raise errors.OpPrereqError("Can't compute nodes using"
8022 " iallocator '%s': %s" %
8023 (self.op.iallocator, ial.info),
8025 if len(ial.result) != ial.required_nodes:
8026 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8027 " of nodes (%s), required %s" %
8028 (self.op.iallocator, len(ial.result),
8029 ial.required_nodes), errors.ECODE_FAULT)
8030 self.op.pnode = ial.result[0]
8031 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8032 self.op.instance_name, self.op.iallocator,
8033 utils.CommaJoin(ial.result))
8034 if ial.required_nodes == 2:
8035 self.op.snode = ial.result[1]
8037 def BuildHooksEnv(self):
8040 This runs on master, primary and secondary nodes of the instance.
8044 "ADD_MODE": self.op.mode,
8046 if self.op.mode == constants.INSTANCE_IMPORT:
8047 env["SRC_NODE"] = self.op.src_node
8048 env["SRC_PATH"] = self.op.src_path
8049 env["SRC_IMAGES"] = self.src_images
8051 env.update(_BuildInstanceHookEnv(
8052 name=self.op.instance_name,
8053 primary_node=self.op.pnode,
8054 secondary_nodes=self.secondaries,
8055 status=self.op.start,
8056 os_type=self.op.os_type,
8057 memory=self.be_full[constants.BE_MEMORY],
8058 vcpus=self.be_full[constants.BE_VCPUS],
8059 nics=_NICListToTuple(self, self.nics),
8060 disk_template=self.op.disk_template,
8061 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8062 for d in self.disks],
8065 hypervisor_name=self.op.hypervisor,
8070 def BuildHooksNodes(self):
8071 """Build hooks nodes.
8074 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8077 def _ReadExportInfo(self):
8078 """Reads the export information from disk.
8080 It will override the opcode source node and path with the actual
8081 information, if these two were not specified before.
8083 @return: the export information
8086 assert self.op.mode == constants.INSTANCE_IMPORT
8088 src_node = self.op.src_node
8089 src_path = self.op.src_path
8091 if src_node is None:
8092 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8093 exp_list = self.rpc.call_export_list(locked_nodes)
8095 for node in exp_list:
8096 if exp_list[node].fail_msg:
8098 if src_path in exp_list[node].payload:
8100 self.op.src_node = src_node = node
8101 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8105 raise errors.OpPrereqError("No export found for relative path %s" %
8106 src_path, errors.ECODE_INVAL)
8108 _CheckNodeOnline(self, src_node)
8109 result = self.rpc.call_export_info(src_node, src_path)
8110 result.Raise("No export or invalid export found in dir %s" % src_path)
8112 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8113 if not export_info.has_section(constants.INISECT_EXP):
8114 raise errors.ProgrammerError("Corrupted export config",
8115 errors.ECODE_ENVIRON)
8117 ei_version = export_info.get(constants.INISECT_EXP, "version")
8118 if (int(ei_version) != constants.EXPORT_VERSION):
8119 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8120 (ei_version, constants.EXPORT_VERSION),
8121 errors.ECODE_ENVIRON)
8124 def _ReadExportParams(self, einfo):
8125 """Use export parameters as defaults.
8127 In case the opcode doesn't specify (as in override) some instance
8128 parameters, then try to use them from the export information, if
8132 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8134 if self.op.disk_template is None:
8135 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8136 self.op.disk_template = einfo.get(constants.INISECT_INS,
8139 raise errors.OpPrereqError("No disk template specified and the export"
8140 " is missing the disk_template information",
8143 if not self.op.disks:
8144 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8146 # TODO: import the disk iv_name too
8147 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8148 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8149 disks.append({constants.IDISK_SIZE: disk_sz})
8150 self.op.disks = disks
8152 raise errors.OpPrereqError("No disk info specified and the export"
8153 " is missing the disk information",
8156 if (not self.op.nics and
8157 einfo.has_option(constants.INISECT_INS, "nic_count")):
8159 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8161 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8162 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8167 if (self.op.hypervisor is None and
8168 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8169 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8170 if einfo.has_section(constants.INISECT_HYP):
8171 # use the export parameters but do not override the ones
8172 # specified by the user
8173 for name, value in einfo.items(constants.INISECT_HYP):
8174 if name not in self.op.hvparams:
8175 self.op.hvparams[name] = value
8177 if einfo.has_section(constants.INISECT_BEP):
8178 # use the parameters, without overriding
8179 for name, value in einfo.items(constants.INISECT_BEP):
8180 if name not in self.op.beparams:
8181 self.op.beparams[name] = value
8183 # try to read the parameters old style, from the main section
8184 for name in constants.BES_PARAMETERS:
8185 if (name not in self.op.beparams and
8186 einfo.has_option(constants.INISECT_INS, name)):
8187 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8189 if einfo.has_section(constants.INISECT_OSP):
8190 # use the parameters, without overriding
8191 for name, value in einfo.items(constants.INISECT_OSP):
8192 if name not in self.op.osparams:
8193 self.op.osparams[name] = value
8195 def _RevertToDefaults(self, cluster):
8196 """Revert the instance parameters to the default values.
8200 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8201 for name in self.op.hvparams.keys():
8202 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8203 del self.op.hvparams[name]
8205 be_defs = cluster.SimpleFillBE({})
8206 for name in self.op.beparams.keys():
8207 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8208 del self.op.beparams[name]
8210 nic_defs = cluster.SimpleFillNIC({})
8211 for nic in self.op.nics:
8212 for name in constants.NICS_PARAMETERS:
8213 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8216 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8217 for name in self.op.osparams.keys():
8218 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8219 del self.op.osparams[name]
8221 def CheckPrereq(self):
8222 """Check prerequisites.
8225 if self.op.mode == constants.INSTANCE_IMPORT:
8226 export_info = self._ReadExportInfo()
8227 self._ReadExportParams(export_info)
8229 if (not self.cfg.GetVGName() and
8230 self.op.disk_template not in constants.DTS_NOT_LVM):
8231 raise errors.OpPrereqError("Cluster does not support lvm-based"
8232 " instances", errors.ECODE_STATE)
8234 if self.op.hypervisor is None:
8235 self.op.hypervisor = self.cfg.GetHypervisorType()
8237 cluster = self.cfg.GetClusterInfo()
8238 enabled_hvs = cluster.enabled_hypervisors
8239 if self.op.hypervisor not in enabled_hvs:
8240 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8241 " cluster (%s)" % (self.op.hypervisor,
8242 ",".join(enabled_hvs)),
8245 # check hypervisor parameter syntax (locally)
8246 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8247 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8249 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8250 hv_type.CheckParameterSyntax(filled_hvp)
8251 self.hv_full = filled_hvp
8252 # check that we don't specify global parameters on an instance
8253 _CheckGlobalHvParams(self.op.hvparams)
8255 # fill and remember the beparams dict
8256 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8257 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8259 # build os parameters
8260 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8262 # now that hvp/bep are in final format, let's reset to defaults,
8264 if self.op.identify_defaults:
8265 self._RevertToDefaults(cluster)
8269 for idx, nic in enumerate(self.op.nics):
8270 nic_mode_req = nic.get(constants.INIC_MODE, None)
8271 nic_mode = nic_mode_req
8272 if nic_mode is None:
8273 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8275 # in routed mode, for the first nic, the default ip is 'auto'
8276 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8277 default_ip_mode = constants.VALUE_AUTO
8279 default_ip_mode = constants.VALUE_NONE
8281 # ip validity checks
8282 ip = nic.get(constants.INIC_IP, default_ip_mode)
8283 if ip is None or ip.lower() == constants.VALUE_NONE:
8285 elif ip.lower() == constants.VALUE_AUTO:
8286 if not self.op.name_check:
8287 raise errors.OpPrereqError("IP address set to auto but name checks"
8288 " have been skipped",
8290 nic_ip = self.hostname1.ip
8292 if not netutils.IPAddress.IsValid(ip):
8293 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8297 # TODO: check the ip address for uniqueness
8298 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8299 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8302 # MAC address verification
8303 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8304 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8305 mac = utils.NormalizeAndValidateMac(mac)
8308 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8309 except errors.ReservationError:
8310 raise errors.OpPrereqError("MAC address %s already in use"
8311 " in cluster" % mac,
8312 errors.ECODE_NOTUNIQUE)
8314 # Build nic parameters
8315 link = nic.get(constants.INIC_LINK, None)
8318 nicparams[constants.NIC_MODE] = nic_mode_req
8320 nicparams[constants.NIC_LINK] = link
8322 check_params = cluster.SimpleFillNIC(nicparams)
8323 objects.NIC.CheckParameterSyntax(check_params)
8324 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8326 # disk checks/pre-build
8327 default_vg = self.cfg.GetVGName()
8329 for disk in self.op.disks:
8330 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8331 if mode not in constants.DISK_ACCESS_SET:
8332 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8333 mode, errors.ECODE_INVAL)
8334 size = disk.get(constants.IDISK_SIZE, None)
8336 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8339 except (TypeError, ValueError):
8340 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8343 data_vg = disk.get(constants.IDISK_VG, default_vg)
8345 constants.IDISK_SIZE: size,
8346 constants.IDISK_MODE: mode,
8347 constants.IDISK_VG: data_vg,
8348 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8350 if constants.IDISK_ADOPT in disk:
8351 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8352 self.disks.append(new_disk)
8354 if self.op.mode == constants.INSTANCE_IMPORT:
8356 # Check that the new instance doesn't have less disks than the export
8357 instance_disks = len(self.disks)
8358 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8359 if instance_disks < export_disks:
8360 raise errors.OpPrereqError("Not enough disks to import."
8361 " (instance: %d, export: %d)" %
8362 (instance_disks, export_disks),
8366 for idx in range(export_disks):
8367 option = 'disk%d_dump' % idx
8368 if export_info.has_option(constants.INISECT_INS, option):
8369 # FIXME: are the old os-es, disk sizes, etc. useful?
8370 export_name = export_info.get(constants.INISECT_INS, option)
8371 image = utils.PathJoin(self.op.src_path, export_name)
8372 disk_images.append(image)
8374 disk_images.append(False)
8376 self.src_images = disk_images
8378 old_name = export_info.get(constants.INISECT_INS, 'name')
8380 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8381 except (TypeError, ValueError), err:
8382 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8383 " an integer: %s" % str(err),
8385 if self.op.instance_name == old_name:
8386 for idx, nic in enumerate(self.nics):
8387 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8388 nic_mac_ini = 'nic%d_mac' % idx
8389 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8391 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8393 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8394 if self.op.ip_check:
8395 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8396 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8397 (self.check_ip, self.op.instance_name),
8398 errors.ECODE_NOTUNIQUE)
8400 #### mac address generation
8401 # By generating here the mac address both the allocator and the hooks get
8402 # the real final mac address rather than the 'auto' or 'generate' value.
8403 # There is a race condition between the generation and the instance object
8404 # creation, which means that we know the mac is valid now, but we're not
8405 # sure it will be when we actually add the instance. If things go bad
8406 # adding the instance will abort because of a duplicate mac, and the
8407 # creation job will fail.
8408 for nic in self.nics:
8409 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8410 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8414 if self.op.iallocator is not None:
8415 self._RunAllocator()
8417 #### node related checks
8419 # check primary node
8420 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8421 assert self.pnode is not None, \
8422 "Cannot retrieve locked node %s" % self.op.pnode
8424 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8425 pnode.name, errors.ECODE_STATE)
8427 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8428 pnode.name, errors.ECODE_STATE)
8429 if not pnode.vm_capable:
8430 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8431 " '%s'" % pnode.name, errors.ECODE_STATE)
8433 self.secondaries = []
8435 # mirror node verification
8436 if self.op.disk_template in constants.DTS_INT_MIRROR:
8437 if self.op.snode == pnode.name:
8438 raise errors.OpPrereqError("The secondary node cannot be the"
8439 " primary node", errors.ECODE_INVAL)
8440 _CheckNodeOnline(self, self.op.snode)
8441 _CheckNodeNotDrained(self, self.op.snode)
8442 _CheckNodeVmCapable(self, self.op.snode)
8443 self.secondaries.append(self.op.snode)
8445 nodenames = [pnode.name] + self.secondaries
8447 if not self.adopt_disks:
8448 # Check lv size requirements, if not adopting
8449 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8450 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8452 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8453 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8454 disk[constants.IDISK_ADOPT])
8455 for disk in self.disks])
8456 if len(all_lvs) != len(self.disks):
8457 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8459 for lv_name in all_lvs:
8461 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8462 # to ReserveLV uses the same syntax
8463 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8464 except errors.ReservationError:
8465 raise errors.OpPrereqError("LV named %s used by another instance" %
8466 lv_name, errors.ECODE_NOTUNIQUE)
8468 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8469 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8471 node_lvs = self.rpc.call_lv_list([pnode.name],
8472 vg_names.payload.keys())[pnode.name]
8473 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8474 node_lvs = node_lvs.payload
8476 delta = all_lvs.difference(node_lvs.keys())
8478 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8479 utils.CommaJoin(delta),
8481 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8483 raise errors.OpPrereqError("Online logical volumes found, cannot"
8484 " adopt: %s" % utils.CommaJoin(online_lvs),
8486 # update the size of disk based on what is found
8487 for dsk in self.disks:
8488 dsk[constants.IDISK_SIZE] = \
8489 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8490 dsk[constants.IDISK_ADOPT])][0]))
8492 elif self.op.disk_template == constants.DT_BLOCK:
8493 # Normalize and de-duplicate device paths
8494 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8495 for disk in self.disks])
8496 if len(all_disks) != len(self.disks):
8497 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8499 baddisks = [d for d in all_disks
8500 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8502 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8503 " cannot be adopted" %
8504 (", ".join(baddisks),
8505 constants.ADOPTABLE_BLOCKDEV_ROOT),
8508 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8509 list(all_disks))[pnode.name]
8510 node_disks.Raise("Cannot get block device information from node %s" %
8512 node_disks = node_disks.payload
8513 delta = all_disks.difference(node_disks.keys())
8515 raise errors.OpPrereqError("Missing block device(s): %s" %
8516 utils.CommaJoin(delta),
8518 for dsk in self.disks:
8519 dsk[constants.IDISK_SIZE] = \
8520 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8522 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8524 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8525 # check OS parameters (remotely)
8526 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8528 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8530 # memory check on primary node
8532 _CheckNodeFreeMemory(self, self.pnode.name,
8533 "creating instance %s" % self.op.instance_name,
8534 self.be_full[constants.BE_MEMORY],
8537 self.dry_run_result = list(nodenames)
8539 def Exec(self, feedback_fn):
8540 """Create and add the instance to the cluster.
8543 instance = self.op.instance_name
8544 pnode_name = self.pnode.name
8546 ht_kind = self.op.hypervisor
8547 if ht_kind in constants.HTS_REQ_PORT:
8548 network_port = self.cfg.AllocatePort()
8552 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8553 # this is needed because os.path.join does not accept None arguments
8554 if self.op.file_storage_dir is None:
8555 string_file_storage_dir = ""
8557 string_file_storage_dir = self.op.file_storage_dir
8559 # build the full file storage dir path
8560 if self.op.disk_template == constants.DT_SHARED_FILE:
8561 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8563 get_fsd_fn = self.cfg.GetFileStorageDir
8565 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8566 string_file_storage_dir, instance)
8568 file_storage_dir = ""
8570 disks = _GenerateDiskTemplate(self,
8571 self.op.disk_template,
8572 instance, pnode_name,
8576 self.op.file_driver,
8580 iobj = objects.Instance(name=instance, os=self.op.os_type,
8581 primary_node=pnode_name,
8582 nics=self.nics, disks=disks,
8583 disk_template=self.op.disk_template,
8585 network_port=network_port,
8586 beparams=self.op.beparams,
8587 hvparams=self.op.hvparams,
8588 hypervisor=self.op.hypervisor,
8589 osparams=self.op.osparams,
8592 if self.adopt_disks:
8593 if self.op.disk_template == constants.DT_PLAIN:
8594 # rename LVs to the newly-generated names; we need to construct
8595 # 'fake' LV disks with the old data, plus the new unique_id
8596 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8598 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8599 rename_to.append(t_dsk.logical_id)
8600 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8601 self.cfg.SetDiskID(t_dsk, pnode_name)
8602 result = self.rpc.call_blockdev_rename(pnode_name,
8603 zip(tmp_disks, rename_to))
8604 result.Raise("Failed to rename adoped LVs")
8606 feedback_fn("* creating instance disks...")
8608 _CreateDisks(self, iobj)
8609 except errors.OpExecError:
8610 self.LogWarning("Device creation failed, reverting...")
8612 _RemoveDisks(self, iobj)
8614 self.cfg.ReleaseDRBDMinors(instance)
8617 feedback_fn("adding instance %s to cluster config" % instance)
8619 self.cfg.AddInstance(iobj, self.proc.GetECId())
8621 # Declare that we don't want to remove the instance lock anymore, as we've
8622 # added the instance to the config
8623 del self.remove_locks[locking.LEVEL_INSTANCE]
8625 if self.op.mode == constants.INSTANCE_IMPORT:
8626 # Release unused nodes
8627 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8630 _ReleaseLocks(self, locking.LEVEL_NODE)
8633 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8634 feedback_fn("* wiping instance disks...")
8636 _WipeDisks(self, iobj)
8637 except errors.OpExecError, err:
8638 logging.exception("Wiping disks failed")
8639 self.LogWarning("Wiping instance disks failed (%s)", err)
8643 # Something is already wrong with the disks, don't do anything else
8645 elif self.op.wait_for_sync:
8646 disk_abort = not _WaitForSync(self, iobj)
8647 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8648 # make sure the disks are not degraded (still sync-ing is ok)
8650 feedback_fn("* checking mirrors status")
8651 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8656 _RemoveDisks(self, iobj)
8657 self.cfg.RemoveInstance(iobj.name)
8658 # Make sure the instance lock gets removed
8659 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8660 raise errors.OpExecError("There are some degraded disks for"
8663 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8664 if self.op.mode == constants.INSTANCE_CREATE:
8665 if not self.op.no_install:
8666 feedback_fn("* running the instance OS create scripts...")
8667 # FIXME: pass debug option from opcode to backend
8668 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8669 self.op.debug_level)
8670 result.Raise("Could not add os for instance %s"
8671 " on node %s" % (instance, pnode_name))
8673 elif self.op.mode == constants.INSTANCE_IMPORT:
8674 feedback_fn("* running the instance OS import scripts...")
8678 for idx, image in enumerate(self.src_images):
8682 # FIXME: pass debug option from opcode to backend
8683 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8684 constants.IEIO_FILE, (image, ),
8685 constants.IEIO_SCRIPT,
8686 (iobj.disks[idx], idx),
8688 transfers.append(dt)
8691 masterd.instance.TransferInstanceData(self, feedback_fn,
8692 self.op.src_node, pnode_name,
8693 self.pnode.secondary_ip,
8695 if not compat.all(import_result):
8696 self.LogWarning("Some disks for instance %s on node %s were not"
8697 " imported successfully" % (instance, pnode_name))
8699 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8700 feedback_fn("* preparing remote import...")
8701 # The source cluster will stop the instance before attempting to make a
8702 # connection. In some cases stopping an instance can take a long time,
8703 # hence the shutdown timeout is added to the connection timeout.
8704 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8705 self.op.source_shutdown_timeout)
8706 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8708 assert iobj.primary_node == self.pnode.name
8710 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8711 self.source_x509_ca,
8712 self._cds, timeouts)
8713 if not compat.all(disk_results):
8714 # TODO: Should the instance still be started, even if some disks
8715 # failed to import (valid for local imports, too)?
8716 self.LogWarning("Some disks for instance %s on node %s were not"
8717 " imported successfully" % (instance, pnode_name))
8719 # Run rename script on newly imported instance
8720 assert iobj.name == instance
8721 feedback_fn("Running rename script for %s" % instance)
8722 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8723 self.source_instance_name,
8724 self.op.debug_level)
8726 self.LogWarning("Failed to run rename script for %s on node"
8727 " %s: %s" % (instance, pnode_name, result.fail_msg))
8730 # also checked in the prereq part
8731 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8735 iobj.admin_up = True
8736 self.cfg.Update(iobj, feedback_fn)
8737 logging.info("Starting instance %s on node %s", instance, pnode_name)
8738 feedback_fn("* starting instance...")
8739 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8740 result.Raise("Could not start instance")
8742 return list(iobj.all_nodes)
8745 class LUInstanceConsole(NoHooksLU):
8746 """Connect to an instance's console.
8748 This is somewhat special in that it returns the command line that
8749 you need to run on the master node in order to connect to the
8755 def ExpandNames(self):
8756 self._ExpandAndLockInstance()
8758 def CheckPrereq(self):
8759 """Check prerequisites.
8761 This checks that the instance is in the cluster.
8764 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8765 assert self.instance is not None, \
8766 "Cannot retrieve locked instance %s" % self.op.instance_name
8767 _CheckNodeOnline(self, self.instance.primary_node)
8769 def Exec(self, feedback_fn):
8770 """Connect to the console of an instance
8773 instance = self.instance
8774 node = instance.primary_node
8776 node_insts = self.rpc.call_instance_list([node],
8777 [instance.hypervisor])[node]
8778 node_insts.Raise("Can't get node information from %s" % node)
8780 if instance.name not in node_insts.payload:
8781 if instance.admin_up:
8782 state = constants.INSTST_ERRORDOWN
8784 state = constants.INSTST_ADMINDOWN
8785 raise errors.OpExecError("Instance %s is not running (state %s)" %
8786 (instance.name, state))
8788 logging.debug("Connecting to console of %s on %s", instance.name, node)
8790 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8793 def _GetInstanceConsole(cluster, instance):
8794 """Returns console information for an instance.
8796 @type cluster: L{objects.Cluster}
8797 @type instance: L{objects.Instance}
8801 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8802 # beparams and hvparams are passed separately, to avoid editing the
8803 # instance and then saving the defaults in the instance itself.
8804 hvparams = cluster.FillHV(instance)
8805 beparams = cluster.FillBE(instance)
8806 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8808 assert console.instance == instance.name
8809 assert console.Validate()
8811 return console.ToDict()
8814 class LUInstanceReplaceDisks(LogicalUnit):
8815 """Replace the disks of an instance.
8818 HPATH = "mirrors-replace"
8819 HTYPE = constants.HTYPE_INSTANCE
8822 def CheckArguments(self):
8823 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8826 def ExpandNames(self):
8827 self._ExpandAndLockInstance()
8829 assert locking.LEVEL_NODE not in self.needed_locks
8830 assert locking.LEVEL_NODEGROUP not in self.needed_locks
8832 assert self.op.iallocator is None or self.op.remote_node is None, \
8833 "Conflicting options"
8835 if self.op.remote_node is not None:
8836 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8838 # Warning: do not remove the locking of the new secondary here
8839 # unless DRBD8.AddChildren is changed to work in parallel;
8840 # currently it doesn't since parallel invocations of
8841 # FindUnusedMinor will conflict
8842 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8843 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8845 self.needed_locks[locking.LEVEL_NODE] = []
8846 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8848 if self.op.iallocator is not None:
8849 # iallocator will select a new node in the same group
8850 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8852 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8853 self.op.iallocator, self.op.remote_node,
8854 self.op.disks, False, self.op.early_release)
8856 self.tasklets = [self.replacer]
8858 def DeclareLocks(self, level):
8859 if level == locking.LEVEL_NODEGROUP:
8860 assert self.op.remote_node is None
8861 assert self.op.iallocator is not None
8862 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8864 self.share_locks[locking.LEVEL_NODEGROUP] = 1
8865 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8866 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8868 elif level == locking.LEVEL_NODE:
8869 if self.op.iallocator is not None:
8870 assert self.op.remote_node is None
8871 assert not self.needed_locks[locking.LEVEL_NODE]
8873 # Lock member nodes of all locked groups
8874 self.needed_locks[locking.LEVEL_NODE] = [node_name
8875 for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8876 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8878 self._LockInstancesNodes()
8880 def BuildHooksEnv(self):
8883 This runs on the master, the primary and all the secondaries.
8886 instance = self.replacer.instance
8888 "MODE": self.op.mode,
8889 "NEW_SECONDARY": self.op.remote_node,
8890 "OLD_SECONDARY": instance.secondary_nodes[0],
8892 env.update(_BuildInstanceHookEnvByObject(self, instance))
8895 def BuildHooksNodes(self):
8896 """Build hooks nodes.
8899 instance = self.replacer.instance
8901 self.cfg.GetMasterNode(),
8902 instance.primary_node,
8904 if self.op.remote_node is not None:
8905 nl.append(self.op.remote_node)
8908 def CheckPrereq(self):
8909 """Check prerequisites.
8912 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8913 self.op.iallocator is None)
8915 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8917 groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8918 if owned_groups != groups:
8919 raise errors.OpExecError("Node groups used by instance '%s' changed"
8920 " since lock was acquired, current list is %r,"
8921 " used to be '%s'" %
8922 (self.op.instance_name,
8923 utils.CommaJoin(groups),
8924 utils.CommaJoin(owned_groups)))
8926 return LogicalUnit.CheckPrereq(self)
8929 class TLReplaceDisks(Tasklet):
8930 """Replaces disks for an instance.
8932 Note: Locking is not within the scope of this class.
8935 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8936 disks, delay_iallocator, early_release):
8937 """Initializes this class.
8940 Tasklet.__init__(self, lu)
8943 self.instance_name = instance_name
8945 self.iallocator_name = iallocator_name
8946 self.remote_node = remote_node
8948 self.delay_iallocator = delay_iallocator
8949 self.early_release = early_release
8952 self.instance = None
8953 self.new_node = None
8954 self.target_node = None
8955 self.other_node = None
8956 self.remote_node_info = None
8957 self.node_secondary_ip = None
8960 def CheckArguments(mode, remote_node, iallocator):
8961 """Helper function for users of this class.
8964 # check for valid parameter combination
8965 if mode == constants.REPLACE_DISK_CHG:
8966 if remote_node is None and iallocator is None:
8967 raise errors.OpPrereqError("When changing the secondary either an"
8968 " iallocator script must be used or the"
8969 " new node given", errors.ECODE_INVAL)
8971 if remote_node is not None and iallocator is not None:
8972 raise errors.OpPrereqError("Give either the iallocator or the new"
8973 " secondary, not both", errors.ECODE_INVAL)
8975 elif remote_node is not None or iallocator is not None:
8976 # Not replacing the secondary
8977 raise errors.OpPrereqError("The iallocator and new node options can"
8978 " only be used when changing the"
8979 " secondary node", errors.ECODE_INVAL)
8982 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8983 """Compute a new secondary node using an IAllocator.
8986 ial = IAllocator(lu.cfg, lu.rpc,
8987 mode=constants.IALLOCATOR_MODE_RELOC,
8989 relocate_from=relocate_from)
8991 ial.Run(iallocator_name)
8994 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8995 " %s" % (iallocator_name, ial.info),
8998 if len(ial.result) != ial.required_nodes:
8999 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9000 " of nodes (%s), required %s" %
9002 len(ial.result), ial.required_nodes),
9005 remote_node_name = ial.result[0]
9007 lu.LogInfo("Selected new secondary for instance '%s': %s",
9008 instance_name, remote_node_name)
9010 return remote_node_name
9012 def _FindFaultyDisks(self, node_name):
9013 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9016 def _CheckDisksActivated(self, instance):
9017 """Checks if the instance disks are activated.
9019 @param instance: The instance to check disks
9020 @return: True if they are activated, False otherwise
9023 nodes = instance.all_nodes
9025 for idx, dev in enumerate(instance.disks):
9027 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9028 self.cfg.SetDiskID(dev, node)
9030 result = self.rpc.call_blockdev_find(node, dev)
9034 elif result.fail_msg or not result.payload:
9039 def CheckPrereq(self):
9040 """Check prerequisites.
9042 This checks that the instance is in the cluster.
9045 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9046 assert instance is not None, \
9047 "Cannot retrieve locked instance %s" % self.instance_name
9049 if instance.disk_template != constants.DT_DRBD8:
9050 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9051 " instances", errors.ECODE_INVAL)
9053 if len(instance.secondary_nodes) != 1:
9054 raise errors.OpPrereqError("The instance has a strange layout,"
9055 " expected one secondary but found %d" %
9056 len(instance.secondary_nodes),
9059 if not self.delay_iallocator:
9060 self._CheckPrereq2()
9062 def _CheckPrereq2(self):
9063 """Check prerequisites, second part.
9065 This function should always be part of CheckPrereq. It was separated and is
9066 now called from Exec because during node evacuation iallocator was only
9067 called with an unmodified cluster model, not taking planned changes into
9071 instance = self.instance
9072 secondary_node = instance.secondary_nodes[0]
9074 if self.iallocator_name is None:
9075 remote_node = self.remote_node
9077 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9078 instance.name, instance.secondary_nodes)
9080 if remote_node is None:
9081 self.remote_node_info = None
9083 assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9084 "Remote node '%s' is not locked" % remote_node
9086 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9087 assert self.remote_node_info is not None, \
9088 "Cannot retrieve locked node %s" % remote_node
9090 if remote_node == self.instance.primary_node:
9091 raise errors.OpPrereqError("The specified node is the primary node of"
9092 " the instance", errors.ECODE_INVAL)
9094 if remote_node == secondary_node:
9095 raise errors.OpPrereqError("The specified node is already the"
9096 " secondary node of the instance",
9099 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9100 constants.REPLACE_DISK_CHG):
9101 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9104 if self.mode == constants.REPLACE_DISK_AUTO:
9105 if not self._CheckDisksActivated(instance):
9106 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9107 " first" % self.instance_name,
9109 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9110 faulty_secondary = self._FindFaultyDisks(secondary_node)
9112 if faulty_primary and faulty_secondary:
9113 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9114 " one node and can not be repaired"
9115 " automatically" % self.instance_name,
9119 self.disks = faulty_primary
9120 self.target_node = instance.primary_node
9121 self.other_node = secondary_node
9122 check_nodes = [self.target_node, self.other_node]
9123 elif faulty_secondary:
9124 self.disks = faulty_secondary
9125 self.target_node = secondary_node
9126 self.other_node = instance.primary_node
9127 check_nodes = [self.target_node, self.other_node]
9133 # Non-automatic modes
9134 if self.mode == constants.REPLACE_DISK_PRI:
9135 self.target_node = instance.primary_node
9136 self.other_node = secondary_node
9137 check_nodes = [self.target_node, self.other_node]
9139 elif self.mode == constants.REPLACE_DISK_SEC:
9140 self.target_node = secondary_node
9141 self.other_node = instance.primary_node
9142 check_nodes = [self.target_node, self.other_node]
9144 elif self.mode == constants.REPLACE_DISK_CHG:
9145 self.new_node = remote_node
9146 self.other_node = instance.primary_node
9147 self.target_node = secondary_node
9148 check_nodes = [self.new_node, self.other_node]
9150 _CheckNodeNotDrained(self.lu, remote_node)
9151 _CheckNodeVmCapable(self.lu, remote_node)
9153 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9154 assert old_node_info is not None
9155 if old_node_info.offline and not self.early_release:
9156 # doesn't make sense to delay the release
9157 self.early_release = True
9158 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9159 " early-release mode", secondary_node)
9162 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9165 # If not specified all disks should be replaced
9167 self.disks = range(len(self.instance.disks))
9169 for node in check_nodes:
9170 _CheckNodeOnline(self.lu, node)
9172 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9175 if node_name is not None)
9177 # Release unneeded node locks
9178 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9180 # Release any owned node group
9181 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9182 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9184 # Check whether disks are valid
9185 for disk_idx in self.disks:
9186 instance.FindDisk(disk_idx)
9188 # Get secondary node IP addresses
9189 self.node_secondary_ip = \
9190 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9191 for node_name in touched_nodes)
9193 def Exec(self, feedback_fn):
9194 """Execute disk replacement.
9196 This dispatches the disk replacement to the appropriate handler.
9199 if self.delay_iallocator:
9200 self._CheckPrereq2()
9203 # Verify owned locks before starting operation
9204 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9205 assert set(owned_locks) == set(self.node_secondary_ip), \
9206 ("Incorrect node locks, owning %s, expected %s" %
9207 (owned_locks, self.node_secondary_ip.keys()))
9209 owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9210 assert list(owned_locks) == [self.instance_name], \
9211 "Instance '%s' not locked" % self.instance_name
9213 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9214 "Should not own any node group lock at this point"
9217 feedback_fn("No disks need replacement")
9220 feedback_fn("Replacing disk(s) %s for %s" %
9221 (utils.CommaJoin(self.disks), self.instance.name))
9223 activate_disks = (not self.instance.admin_up)
9225 # Activate the instance disks if we're replacing them on a down instance
9227 _StartInstanceDisks(self.lu, self.instance, True)
9230 # Should we replace the secondary node?
9231 if self.new_node is not None:
9232 fn = self._ExecDrbd8Secondary
9234 fn = self._ExecDrbd8DiskOnly
9236 result = fn(feedback_fn)
9238 # Deactivate the instance disks if we're replacing them on a
9241 _SafeShutdownInstanceDisks(self.lu, self.instance)
9244 # Verify owned locks
9245 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9246 nodes = frozenset(self.node_secondary_ip)
9247 assert ((self.early_release and not owned_locks) or
9248 (not self.early_release and not (set(owned_locks) - nodes))), \
9249 ("Not owning the correct locks, early_release=%s, owned=%r,"
9250 " nodes=%r" % (self.early_release, owned_locks, nodes))
9254 def _CheckVolumeGroup(self, nodes):
9255 self.lu.LogInfo("Checking volume groups")
9257 vgname = self.cfg.GetVGName()
9259 # Make sure volume group exists on all involved nodes
9260 results = self.rpc.call_vg_list(nodes)
9262 raise errors.OpExecError("Can't list volume groups on the nodes")
9266 res.Raise("Error checking node %s" % node)
9267 if vgname not in res.payload:
9268 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9271 def _CheckDisksExistence(self, nodes):
9272 # Check disk existence
9273 for idx, dev in enumerate(self.instance.disks):
9274 if idx not in self.disks:
9278 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9279 self.cfg.SetDiskID(dev, node)
9281 result = self.rpc.call_blockdev_find(node, dev)
9283 msg = result.fail_msg
9284 if msg or not result.payload:
9286 msg = "disk not found"
9287 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9290 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9291 for idx, dev in enumerate(self.instance.disks):
9292 if idx not in self.disks:
9295 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9298 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9300 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9301 " replace disks for instance %s" %
9302 (node_name, self.instance.name))
9304 def _CreateNewStorage(self, node_name):
9307 for idx, dev in enumerate(self.instance.disks):
9308 if idx not in self.disks:
9311 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9313 self.cfg.SetDiskID(dev, node_name)
9315 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9316 names = _GenerateUniqueNames(self.lu, lv_names)
9318 vg_data = dev.children[0].logical_id[0]
9319 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9320 logical_id=(vg_data, names[0]))
9321 vg_meta = dev.children[1].logical_id[0]
9322 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9323 logical_id=(vg_meta, names[1]))
9325 new_lvs = [lv_data, lv_meta]
9326 old_lvs = dev.children
9327 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9329 # we pass force_create=True to force the LVM creation
9330 for new_lv in new_lvs:
9331 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9332 _GetInstanceInfoText(self.instance), False)
9336 def _CheckDevices(self, node_name, iv_names):
9337 for name, (dev, _, _) in iv_names.iteritems():
9338 self.cfg.SetDiskID(dev, node_name)
9340 result = self.rpc.call_blockdev_find(node_name, dev)
9342 msg = result.fail_msg
9343 if msg or not result.payload:
9345 msg = "disk not found"
9346 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9349 if result.payload.is_degraded:
9350 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9352 def _RemoveOldStorage(self, node_name, iv_names):
9353 for name, (_, old_lvs, _) in iv_names.iteritems():
9354 self.lu.LogInfo("Remove logical volumes for %s" % name)
9357 self.cfg.SetDiskID(lv, node_name)
9359 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9361 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9362 hint="remove unused LVs manually")
9364 def _ExecDrbd8DiskOnly(self, feedback_fn):
9365 """Replace a disk on the primary or secondary for DRBD 8.
9367 The algorithm for replace is quite complicated:
9369 1. for each disk to be replaced:
9371 1. create new LVs on the target node with unique names
9372 1. detach old LVs from the drbd device
9373 1. rename old LVs to name_replaced.<time_t>
9374 1. rename new LVs to old LVs
9375 1. attach the new LVs (with the old names now) to the drbd device
9377 1. wait for sync across all devices
9379 1. for each modified disk:
9381 1. remove old LVs (which have the name name_replaces.<time_t>)
9383 Failures are not very well handled.
9388 # Step: check device activation
9389 self.lu.LogStep(1, steps_total, "Check device existence")
9390 self._CheckDisksExistence([self.other_node, self.target_node])
9391 self._CheckVolumeGroup([self.target_node, self.other_node])
9393 # Step: check other node consistency
9394 self.lu.LogStep(2, steps_total, "Check peer consistency")
9395 self._CheckDisksConsistency(self.other_node,
9396 self.other_node == self.instance.primary_node,
9399 # Step: create new storage
9400 self.lu.LogStep(3, steps_total, "Allocate new storage")
9401 iv_names = self._CreateNewStorage(self.target_node)
9403 # Step: for each lv, detach+rename*2+attach
9404 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9405 for dev, old_lvs, new_lvs in iv_names.itervalues():
9406 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9408 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9410 result.Raise("Can't detach drbd from local storage on node"
9411 " %s for device %s" % (self.target_node, dev.iv_name))
9413 #cfg.Update(instance)
9415 # ok, we created the new LVs, so now we know we have the needed
9416 # storage; as such, we proceed on the target node to rename
9417 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9418 # using the assumption that logical_id == physical_id (which in
9419 # turn is the unique_id on that node)
9421 # FIXME(iustin): use a better name for the replaced LVs
9422 temp_suffix = int(time.time())
9423 ren_fn = lambda d, suff: (d.physical_id[0],
9424 d.physical_id[1] + "_replaced-%s" % suff)
9426 # Build the rename list based on what LVs exist on the node
9427 rename_old_to_new = []
9428 for to_ren in old_lvs:
9429 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9430 if not result.fail_msg and result.payload:
9432 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9434 self.lu.LogInfo("Renaming the old LVs on the target node")
9435 result = self.rpc.call_blockdev_rename(self.target_node,
9437 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9439 # Now we rename the new LVs to the old LVs
9440 self.lu.LogInfo("Renaming the new LVs on the target node")
9441 rename_new_to_old = [(new, old.physical_id)
9442 for old, new in zip(old_lvs, new_lvs)]
9443 result = self.rpc.call_blockdev_rename(self.target_node,
9445 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9447 for old, new in zip(old_lvs, new_lvs):
9448 new.logical_id = old.logical_id
9449 self.cfg.SetDiskID(new, self.target_node)
9451 for disk in old_lvs:
9452 disk.logical_id = ren_fn(disk, temp_suffix)
9453 self.cfg.SetDiskID(disk, self.target_node)
9455 # Now that the new lvs have the old name, we can add them to the device
9456 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9457 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9459 msg = result.fail_msg
9461 for new_lv in new_lvs:
9462 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9465 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9466 hint=("cleanup manually the unused logical"
9468 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9470 dev.children = new_lvs
9472 self.cfg.Update(self.instance, feedback_fn)
9475 if self.early_release:
9476 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9478 self._RemoveOldStorage(self.target_node, iv_names)
9479 # WARNING: we release both node locks here, do not do other RPCs
9480 # than WaitForSync to the primary node
9481 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9482 names=[self.target_node, self.other_node])
9485 # This can fail as the old devices are degraded and _WaitForSync
9486 # does a combined result over all disks, so we don't check its return value
9487 self.lu.LogStep(cstep, steps_total, "Sync devices")
9489 _WaitForSync(self.lu, self.instance)
9491 # Check all devices manually
9492 self._CheckDevices(self.instance.primary_node, iv_names)
9494 # Step: remove old storage
9495 if not self.early_release:
9496 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9498 self._RemoveOldStorage(self.target_node, iv_names)
9500 def _ExecDrbd8Secondary(self, feedback_fn):
9501 """Replace the secondary node for DRBD 8.
9503 The algorithm for replace is quite complicated:
9504 - for all disks of the instance:
9505 - create new LVs on the new node with same names
9506 - shutdown the drbd device on the old secondary
9507 - disconnect the drbd network on the primary
9508 - create the drbd device on the new secondary
9509 - network attach the drbd on the primary, using an artifice:
9510 the drbd code for Attach() will connect to the network if it
9511 finds a device which is connected to the good local disks but
9513 - wait for sync across all devices
9514 - remove all disks from the old secondary
9516 Failures are not very well handled.
9521 # Step: check device activation
9522 self.lu.LogStep(1, steps_total, "Check device existence")
9523 self._CheckDisksExistence([self.instance.primary_node])
9524 self._CheckVolumeGroup([self.instance.primary_node])
9526 # Step: check other node consistency
9527 self.lu.LogStep(2, steps_total, "Check peer consistency")
9528 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9530 # Step: create new storage
9531 self.lu.LogStep(3, steps_total, "Allocate new storage")
9532 for idx, dev in enumerate(self.instance.disks):
9533 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9534 (self.new_node, idx))
9535 # we pass force_create=True to force LVM creation
9536 for new_lv in dev.children:
9537 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9538 _GetInstanceInfoText(self.instance), False)
9540 # Step 4: dbrd minors and drbd setups changes
9541 # after this, we must manually remove the drbd minors on both the
9542 # error and the success paths
9543 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9544 minors = self.cfg.AllocateDRBDMinor([self.new_node
9545 for dev in self.instance.disks],
9547 logging.debug("Allocated minors %r", minors)
9550 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9551 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9552 (self.new_node, idx))
9553 # create new devices on new_node; note that we create two IDs:
9554 # one without port, so the drbd will be activated without
9555 # networking information on the new node at this stage, and one
9556 # with network, for the latter activation in step 4
9557 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9558 if self.instance.primary_node == o_node1:
9561 assert self.instance.primary_node == o_node2, "Three-node instance?"
9564 new_alone_id = (self.instance.primary_node, self.new_node, None,
9565 p_minor, new_minor, o_secret)
9566 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9567 p_minor, new_minor, o_secret)
9569 iv_names[idx] = (dev, dev.children, new_net_id)
9570 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9572 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9573 logical_id=new_alone_id,
9574 children=dev.children,
9577 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9578 _GetInstanceInfoText(self.instance), False)
9579 except errors.GenericError:
9580 self.cfg.ReleaseDRBDMinors(self.instance.name)
9583 # We have new devices, shutdown the drbd on the old secondary
9584 for idx, dev in enumerate(self.instance.disks):
9585 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9586 self.cfg.SetDiskID(dev, self.target_node)
9587 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9589 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9590 "node: %s" % (idx, msg),
9591 hint=("Please cleanup this device manually as"
9592 " soon as possible"))
9594 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9595 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9596 self.node_secondary_ip,
9597 self.instance.disks)\
9598 [self.instance.primary_node]
9600 msg = result.fail_msg
9602 # detaches didn't succeed (unlikely)
9603 self.cfg.ReleaseDRBDMinors(self.instance.name)
9604 raise errors.OpExecError("Can't detach the disks from the network on"
9605 " old node: %s" % (msg,))
9607 # if we managed to detach at least one, we update all the disks of
9608 # the instance to point to the new secondary
9609 self.lu.LogInfo("Updating instance configuration")
9610 for dev, _, new_logical_id in iv_names.itervalues():
9611 dev.logical_id = new_logical_id
9612 self.cfg.SetDiskID(dev, self.instance.primary_node)
9614 self.cfg.Update(self.instance, feedback_fn)
9616 # and now perform the drbd attach
9617 self.lu.LogInfo("Attaching primary drbds to new secondary"
9618 " (standalone => connected)")
9619 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9621 self.node_secondary_ip,
9622 self.instance.disks,
9625 for to_node, to_result in result.items():
9626 msg = to_result.fail_msg
9628 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9630 hint=("please do a gnt-instance info to see the"
9631 " status of disks"))
9633 if self.early_release:
9634 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9636 self._RemoveOldStorage(self.target_node, iv_names)
9637 # WARNING: we release all node locks here, do not do other RPCs
9638 # than WaitForSync to the primary node
9639 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9640 names=[self.instance.primary_node,
9645 # This can fail as the old devices are degraded and _WaitForSync
9646 # does a combined result over all disks, so we don't check its return value
9647 self.lu.LogStep(cstep, steps_total, "Sync devices")
9649 _WaitForSync(self.lu, self.instance)
9651 # Check all devices manually
9652 self._CheckDevices(self.instance.primary_node, iv_names)
9654 # Step: remove old storage
9655 if not self.early_release:
9656 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9657 self._RemoveOldStorage(self.target_node, iv_names)
9660 class LURepairNodeStorage(NoHooksLU):
9661 """Repairs the volume group on a node.
9666 def CheckArguments(self):
9667 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9669 storage_type = self.op.storage_type
9671 if (constants.SO_FIX_CONSISTENCY not in
9672 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9673 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9674 " repaired" % storage_type,
9677 def ExpandNames(self):
9678 self.needed_locks = {
9679 locking.LEVEL_NODE: [self.op.node_name],
9682 def _CheckFaultyDisks(self, instance, node_name):
9683 """Ensure faulty disks abort the opcode or at least warn."""
9685 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9687 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9688 " node '%s'" % (instance.name, node_name),
9690 except errors.OpPrereqError, err:
9691 if self.op.ignore_consistency:
9692 self.proc.LogWarning(str(err.args[0]))
9696 def CheckPrereq(self):
9697 """Check prerequisites.
9700 # Check whether any instance on this node has faulty disks
9701 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9702 if not inst.admin_up:
9704 check_nodes = set(inst.all_nodes)
9705 check_nodes.discard(self.op.node_name)
9706 for inst_node_name in check_nodes:
9707 self._CheckFaultyDisks(inst, inst_node_name)
9709 def Exec(self, feedback_fn):
9710 feedback_fn("Repairing storage unit '%s' on %s ..." %
9711 (self.op.name, self.op.node_name))
9713 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9714 result = self.rpc.call_storage_execute(self.op.node_name,
9715 self.op.storage_type, st_args,
9717 constants.SO_FIX_CONSISTENCY)
9718 result.Raise("Failed to repair storage unit '%s' on %s" %
9719 (self.op.name, self.op.node_name))
9722 class LUNodeEvacStrategy(NoHooksLU):
9723 """Computes the node evacuation strategy.
9728 def CheckArguments(self):
9729 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9731 def ExpandNames(self):
9732 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9733 self.needed_locks = locks = {}
9734 if self.op.remote_node is None:
9735 locks[locking.LEVEL_NODE] = locking.ALL_SET
9737 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9738 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9740 def Exec(self, feedback_fn):
9742 for node in self.op.nodes:
9743 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9747 if self.op.remote_node is not None:
9750 if i.primary_node == self.op.remote_node:
9751 raise errors.OpPrereqError("Node %s is the primary node of"
9752 " instance %s, cannot use it as"
9754 (self.op.remote_node, i.name),
9756 result.append([i.name, self.op.remote_node])
9758 ial = IAllocator(self.cfg, self.rpc,
9759 mode=constants.IALLOCATOR_MODE_MEVAC,
9760 evac_nodes=self.op.nodes)
9761 ial.Run(self.op.iallocator, validate=True)
9763 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9769 class LUInstanceGrowDisk(LogicalUnit):
9770 """Grow a disk of an instance.
9774 HTYPE = constants.HTYPE_INSTANCE
9777 def ExpandNames(self):
9778 self._ExpandAndLockInstance()
9779 self.needed_locks[locking.LEVEL_NODE] = []
9780 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9782 def DeclareLocks(self, level):
9783 if level == locking.LEVEL_NODE:
9784 self._LockInstancesNodes()
9786 def BuildHooksEnv(self):
9789 This runs on the master, the primary and all the secondaries.
9793 "DISK": self.op.disk,
9794 "AMOUNT": self.op.amount,
9796 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9799 def BuildHooksNodes(self):
9800 """Build hooks nodes.
9803 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9806 def CheckPrereq(self):
9807 """Check prerequisites.
9809 This checks that the instance is in the cluster.
9812 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9813 assert instance is not None, \
9814 "Cannot retrieve locked instance %s" % self.op.instance_name
9815 nodenames = list(instance.all_nodes)
9816 for node in nodenames:
9817 _CheckNodeOnline(self, node)
9819 self.instance = instance
9821 if instance.disk_template not in constants.DTS_GROWABLE:
9822 raise errors.OpPrereqError("Instance's disk layout does not support"
9823 " growing", errors.ECODE_INVAL)
9825 self.disk = instance.FindDisk(self.op.disk)
9827 if instance.disk_template not in (constants.DT_FILE,
9828 constants.DT_SHARED_FILE):
9829 # TODO: check the free disk space for file, when that feature will be
9831 _CheckNodesFreeDiskPerVG(self, nodenames,
9832 self.disk.ComputeGrowth(self.op.amount))
9834 def Exec(self, feedback_fn):
9835 """Execute disk grow.
9838 instance = self.instance
9841 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9843 raise errors.OpExecError("Cannot activate block device to grow")
9845 # First run all grow ops in dry-run mode
9846 for node in instance.all_nodes:
9847 self.cfg.SetDiskID(disk, node)
9848 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9849 result.Raise("Grow request failed to node %s" % node)
9851 # We know that (as far as we can test) operations across different
9852 # nodes will succeed, time to run it for real
9853 for node in instance.all_nodes:
9854 self.cfg.SetDiskID(disk, node)
9855 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9856 result.Raise("Grow request failed to node %s" % node)
9858 # TODO: Rewrite code to work properly
9859 # DRBD goes into sync mode for a short amount of time after executing the
9860 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9861 # calling "resize" in sync mode fails. Sleeping for a short amount of
9862 # time is a work-around.
9865 disk.RecordGrow(self.op.amount)
9866 self.cfg.Update(instance, feedback_fn)
9867 if self.op.wait_for_sync:
9868 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9870 self.proc.LogWarning("Disk sync-ing has not returned a good"
9871 " status; please check the instance")
9872 if not instance.admin_up:
9873 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9874 elif not instance.admin_up:
9875 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9876 " not supposed to be running because no wait for"
9877 " sync mode was requested")
9880 class LUInstanceQueryData(NoHooksLU):
9881 """Query runtime instance data.
9886 def ExpandNames(self):
9887 self.needed_locks = {}
9889 # Use locking if requested or when non-static information is wanted
9890 if not (self.op.static or self.op.use_locking):
9891 self.LogWarning("Non-static data requested, locks need to be acquired")
9892 self.op.use_locking = True
9894 if self.op.instances or not self.op.use_locking:
9895 # Expand instance names right here
9896 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9898 # Will use acquired locks
9899 self.wanted_names = None
9901 if self.op.use_locking:
9902 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9904 if self.wanted_names is None:
9905 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9907 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9909 self.needed_locks[locking.LEVEL_NODE] = []
9910 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9911 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9913 def DeclareLocks(self, level):
9914 if self.op.use_locking and level == locking.LEVEL_NODE:
9915 self._LockInstancesNodes()
9917 def CheckPrereq(self):
9918 """Check prerequisites.
9920 This only checks the optional instance list against the existing names.
9923 if self.wanted_names is None:
9924 assert self.op.use_locking, "Locking was not used"
9925 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9927 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9928 for name in self.wanted_names]
9930 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9931 """Returns the status of a block device
9934 if self.op.static or not node:
9937 self.cfg.SetDiskID(dev, node)
9939 result = self.rpc.call_blockdev_find(node, dev)
9943 result.Raise("Can't compute disk status for %s" % instance_name)
9945 status = result.payload
9949 return (status.dev_path, status.major, status.minor,
9950 status.sync_percent, status.estimated_time,
9951 status.is_degraded, status.ldisk_status)
9953 def _ComputeDiskStatus(self, instance, snode, dev):
9954 """Compute block device status.
9957 if dev.dev_type in constants.LDS_DRBD:
9958 # we change the snode then (otherwise we use the one passed in)
9959 if dev.logical_id[0] == instance.primary_node:
9960 snode = dev.logical_id[1]
9962 snode = dev.logical_id[0]
9964 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9966 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9969 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9970 for child in dev.children]
9975 "iv_name": dev.iv_name,
9976 "dev_type": dev.dev_type,
9977 "logical_id": dev.logical_id,
9978 "physical_id": dev.physical_id,
9979 "pstatus": dev_pstatus,
9980 "sstatus": dev_sstatus,
9981 "children": dev_children,
9986 def Exec(self, feedback_fn):
9987 """Gather and return data"""
9990 cluster = self.cfg.GetClusterInfo()
9992 for instance in self.wanted_instances:
9993 if not self.op.static:
9994 remote_info = self.rpc.call_instance_info(instance.primary_node,
9996 instance.hypervisor)
9997 remote_info.Raise("Error checking node %s" % instance.primary_node)
9998 remote_info = remote_info.payload
9999 if remote_info and "state" in remote_info:
10000 remote_state = "up"
10002 remote_state = "down"
10004 remote_state = None
10005 if instance.admin_up:
10006 config_state = "up"
10008 config_state = "down"
10010 disks = [self._ComputeDiskStatus(instance, None, device)
10011 for device in instance.disks]
10013 result[instance.name] = {
10014 "name": instance.name,
10015 "config_state": config_state,
10016 "run_state": remote_state,
10017 "pnode": instance.primary_node,
10018 "snodes": instance.secondary_nodes,
10020 # this happens to be the same format used for hooks
10021 "nics": _NICListToTuple(self, instance.nics),
10022 "disk_template": instance.disk_template,
10024 "hypervisor": instance.hypervisor,
10025 "network_port": instance.network_port,
10026 "hv_instance": instance.hvparams,
10027 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10028 "be_instance": instance.beparams,
10029 "be_actual": cluster.FillBE(instance),
10030 "os_instance": instance.osparams,
10031 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10032 "serial_no": instance.serial_no,
10033 "mtime": instance.mtime,
10034 "ctime": instance.ctime,
10035 "uuid": instance.uuid,
10041 class LUInstanceSetParams(LogicalUnit):
10042 """Modifies an instances's parameters.
10045 HPATH = "instance-modify"
10046 HTYPE = constants.HTYPE_INSTANCE
10049 def CheckArguments(self):
10050 if not (self.op.nics or self.op.disks or self.op.disk_template or
10051 self.op.hvparams or self.op.beparams or self.op.os_name):
10052 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10054 if self.op.hvparams:
10055 _CheckGlobalHvParams(self.op.hvparams)
10059 for disk_op, disk_dict in self.op.disks:
10060 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10061 if disk_op == constants.DDM_REMOVE:
10062 disk_addremove += 1
10064 elif disk_op == constants.DDM_ADD:
10065 disk_addremove += 1
10067 if not isinstance(disk_op, int):
10068 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10069 if not isinstance(disk_dict, dict):
10070 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10071 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10073 if disk_op == constants.DDM_ADD:
10074 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10075 if mode not in constants.DISK_ACCESS_SET:
10076 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10077 errors.ECODE_INVAL)
10078 size = disk_dict.get(constants.IDISK_SIZE, None)
10080 raise errors.OpPrereqError("Required disk parameter size missing",
10081 errors.ECODE_INVAL)
10084 except (TypeError, ValueError), err:
10085 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10086 str(err), errors.ECODE_INVAL)
10087 disk_dict[constants.IDISK_SIZE] = size
10089 # modification of disk
10090 if constants.IDISK_SIZE in disk_dict:
10091 raise errors.OpPrereqError("Disk size change not possible, use"
10092 " grow-disk", errors.ECODE_INVAL)
10094 if disk_addremove > 1:
10095 raise errors.OpPrereqError("Only one disk add or remove operation"
10096 " supported at a time", errors.ECODE_INVAL)
10098 if self.op.disks and self.op.disk_template is not None:
10099 raise errors.OpPrereqError("Disk template conversion and other disk"
10100 " changes not supported at the same time",
10101 errors.ECODE_INVAL)
10103 if (self.op.disk_template and
10104 self.op.disk_template in constants.DTS_INT_MIRROR and
10105 self.op.remote_node is None):
10106 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10107 " one requires specifying a secondary node",
10108 errors.ECODE_INVAL)
10112 for nic_op, nic_dict in self.op.nics:
10113 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10114 if nic_op == constants.DDM_REMOVE:
10117 elif nic_op == constants.DDM_ADD:
10120 if not isinstance(nic_op, int):
10121 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10122 if not isinstance(nic_dict, dict):
10123 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10124 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10126 # nic_dict should be a dict
10127 nic_ip = nic_dict.get(constants.INIC_IP, None)
10128 if nic_ip is not None:
10129 if nic_ip.lower() == constants.VALUE_NONE:
10130 nic_dict[constants.INIC_IP] = None
10132 if not netutils.IPAddress.IsValid(nic_ip):
10133 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10134 errors.ECODE_INVAL)
10136 nic_bridge = nic_dict.get('bridge', None)
10137 nic_link = nic_dict.get(constants.INIC_LINK, None)
10138 if nic_bridge and nic_link:
10139 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10140 " at the same time", errors.ECODE_INVAL)
10141 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10142 nic_dict['bridge'] = None
10143 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10144 nic_dict[constants.INIC_LINK] = None
10146 if nic_op == constants.DDM_ADD:
10147 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10148 if nic_mac is None:
10149 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10151 if constants.INIC_MAC in nic_dict:
10152 nic_mac = nic_dict[constants.INIC_MAC]
10153 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10154 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10156 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10157 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10158 " modifying an existing nic",
10159 errors.ECODE_INVAL)
10161 if nic_addremove > 1:
10162 raise errors.OpPrereqError("Only one NIC add or remove operation"
10163 " supported at a time", errors.ECODE_INVAL)
10165 def ExpandNames(self):
10166 self._ExpandAndLockInstance()
10167 self.needed_locks[locking.LEVEL_NODE] = []
10168 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10170 def DeclareLocks(self, level):
10171 if level == locking.LEVEL_NODE:
10172 self._LockInstancesNodes()
10173 if self.op.disk_template and self.op.remote_node:
10174 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10175 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10177 def BuildHooksEnv(self):
10178 """Build hooks env.
10180 This runs on the master, primary and secondaries.
10184 if constants.BE_MEMORY in self.be_new:
10185 args['memory'] = self.be_new[constants.BE_MEMORY]
10186 if constants.BE_VCPUS in self.be_new:
10187 args['vcpus'] = self.be_new[constants.BE_VCPUS]
10188 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10189 # information at all.
10192 nic_override = dict(self.op.nics)
10193 for idx, nic in enumerate(self.instance.nics):
10194 if idx in nic_override:
10195 this_nic_override = nic_override[idx]
10197 this_nic_override = {}
10198 if constants.INIC_IP in this_nic_override:
10199 ip = this_nic_override[constants.INIC_IP]
10202 if constants.INIC_MAC in this_nic_override:
10203 mac = this_nic_override[constants.INIC_MAC]
10206 if idx in self.nic_pnew:
10207 nicparams = self.nic_pnew[idx]
10209 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10210 mode = nicparams[constants.NIC_MODE]
10211 link = nicparams[constants.NIC_LINK]
10212 args['nics'].append((ip, mac, mode, link))
10213 if constants.DDM_ADD in nic_override:
10214 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10215 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10216 nicparams = self.nic_pnew[constants.DDM_ADD]
10217 mode = nicparams[constants.NIC_MODE]
10218 link = nicparams[constants.NIC_LINK]
10219 args['nics'].append((ip, mac, mode, link))
10220 elif constants.DDM_REMOVE in nic_override:
10221 del args['nics'][-1]
10223 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10224 if self.op.disk_template:
10225 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10229 def BuildHooksNodes(self):
10230 """Build hooks nodes.
10233 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10236 def CheckPrereq(self):
10237 """Check prerequisites.
10239 This only checks the instance list against the existing names.
10242 # checking the new params on the primary/secondary nodes
10244 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10245 cluster = self.cluster = self.cfg.GetClusterInfo()
10246 assert self.instance is not None, \
10247 "Cannot retrieve locked instance %s" % self.op.instance_name
10248 pnode = instance.primary_node
10249 nodelist = list(instance.all_nodes)
10252 if self.op.os_name and not self.op.force:
10253 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10254 self.op.force_variant)
10255 instance_os = self.op.os_name
10257 instance_os = instance.os
10259 if self.op.disk_template:
10260 if instance.disk_template == self.op.disk_template:
10261 raise errors.OpPrereqError("Instance already has disk template %s" %
10262 instance.disk_template, errors.ECODE_INVAL)
10264 if (instance.disk_template,
10265 self.op.disk_template) not in self._DISK_CONVERSIONS:
10266 raise errors.OpPrereqError("Unsupported disk template conversion from"
10267 " %s to %s" % (instance.disk_template,
10268 self.op.disk_template),
10269 errors.ECODE_INVAL)
10270 _CheckInstanceDown(self, instance, "cannot change disk template")
10271 if self.op.disk_template in constants.DTS_INT_MIRROR:
10272 if self.op.remote_node == pnode:
10273 raise errors.OpPrereqError("Given new secondary node %s is the same"
10274 " as the primary node of the instance" %
10275 self.op.remote_node, errors.ECODE_STATE)
10276 _CheckNodeOnline(self, self.op.remote_node)
10277 _CheckNodeNotDrained(self, self.op.remote_node)
10278 # FIXME: here we assume that the old instance type is DT_PLAIN
10279 assert instance.disk_template == constants.DT_PLAIN
10280 disks = [{constants.IDISK_SIZE: d.size,
10281 constants.IDISK_VG: d.logical_id[0]}
10282 for d in instance.disks]
10283 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10284 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10286 # hvparams processing
10287 if self.op.hvparams:
10288 hv_type = instance.hypervisor
10289 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10290 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10291 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10294 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10295 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10296 self.hv_new = hv_new # the new actual values
10297 self.hv_inst = i_hvdict # the new dict (without defaults)
10299 self.hv_new = self.hv_inst = {}
10301 # beparams processing
10302 if self.op.beparams:
10303 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10305 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10306 be_new = cluster.SimpleFillBE(i_bedict)
10307 self.be_new = be_new # the new actual values
10308 self.be_inst = i_bedict # the new dict (without defaults)
10310 self.be_new = self.be_inst = {}
10311 be_old = cluster.FillBE(instance)
10313 # osparams processing
10314 if self.op.osparams:
10315 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10316 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10317 self.os_inst = i_osdict # the new dict (without defaults)
10323 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10324 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10325 mem_check_list = [pnode]
10326 if be_new[constants.BE_AUTO_BALANCE]:
10327 # either we changed auto_balance to yes or it was from before
10328 mem_check_list.extend(instance.secondary_nodes)
10329 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10330 instance.hypervisor)
10331 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10332 instance.hypervisor)
10333 pninfo = nodeinfo[pnode]
10334 msg = pninfo.fail_msg
10336 # Assume the primary node is unreachable and go ahead
10337 self.warn.append("Can't get info from primary node %s: %s" %
10339 elif not isinstance(pninfo.payload.get('memory_free', None), int):
10340 self.warn.append("Node data from primary node %s doesn't contain"
10341 " free memory information" % pnode)
10342 elif instance_info.fail_msg:
10343 self.warn.append("Can't get instance runtime information: %s" %
10344 instance_info.fail_msg)
10346 if instance_info.payload:
10347 current_mem = int(instance_info.payload['memory'])
10349 # Assume instance not running
10350 # (there is a slight race condition here, but it's not very probable,
10351 # and we have no other way to check)
10353 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10354 pninfo.payload['memory_free'])
10356 raise errors.OpPrereqError("This change will prevent the instance"
10357 " from starting, due to %d MB of memory"
10358 " missing on its primary node" % miss_mem,
10359 errors.ECODE_NORES)
10361 if be_new[constants.BE_AUTO_BALANCE]:
10362 for node, nres in nodeinfo.items():
10363 if node not in instance.secondary_nodes:
10365 nres.Raise("Can't get info from secondary node %s" % node,
10366 prereq=True, ecode=errors.ECODE_STATE)
10367 if not isinstance(nres.payload.get('memory_free', None), int):
10368 raise errors.OpPrereqError("Secondary node %s didn't return free"
10369 " memory information" % node,
10370 errors.ECODE_STATE)
10371 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10372 raise errors.OpPrereqError("This change will prevent the instance"
10373 " from failover to its secondary node"
10374 " %s, due to not enough memory" % node,
10375 errors.ECODE_STATE)
10379 self.nic_pinst = {}
10380 for nic_op, nic_dict in self.op.nics:
10381 if nic_op == constants.DDM_REMOVE:
10382 if not instance.nics:
10383 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10384 errors.ECODE_INVAL)
10386 if nic_op != constants.DDM_ADD:
10388 if not instance.nics:
10389 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10390 " no NICs" % nic_op,
10391 errors.ECODE_INVAL)
10392 if nic_op < 0 or nic_op >= len(instance.nics):
10393 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10395 (nic_op, len(instance.nics) - 1),
10396 errors.ECODE_INVAL)
10397 old_nic_params = instance.nics[nic_op].nicparams
10398 old_nic_ip = instance.nics[nic_op].ip
10400 old_nic_params = {}
10403 update_params_dict = dict([(key, nic_dict[key])
10404 for key in constants.NICS_PARAMETERS
10405 if key in nic_dict])
10407 if 'bridge' in nic_dict:
10408 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10410 new_nic_params = _GetUpdatedParams(old_nic_params,
10411 update_params_dict)
10412 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10413 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10414 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10415 self.nic_pinst[nic_op] = new_nic_params
10416 self.nic_pnew[nic_op] = new_filled_nic_params
10417 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10419 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10420 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10421 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10423 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10425 self.warn.append(msg)
10427 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10428 if new_nic_mode == constants.NIC_MODE_ROUTED:
10429 if constants.INIC_IP in nic_dict:
10430 nic_ip = nic_dict[constants.INIC_IP]
10432 nic_ip = old_nic_ip
10434 raise errors.OpPrereqError('Cannot set the nic ip to None'
10435 ' on a routed nic', errors.ECODE_INVAL)
10436 if constants.INIC_MAC in nic_dict:
10437 nic_mac = nic_dict[constants.INIC_MAC]
10438 if nic_mac is None:
10439 raise errors.OpPrereqError('Cannot set the nic mac to None',
10440 errors.ECODE_INVAL)
10441 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10442 # otherwise generate the mac
10443 nic_dict[constants.INIC_MAC] = \
10444 self.cfg.GenerateMAC(self.proc.GetECId())
10446 # or validate/reserve the current one
10448 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10449 except errors.ReservationError:
10450 raise errors.OpPrereqError("MAC address %s already in use"
10451 " in cluster" % nic_mac,
10452 errors.ECODE_NOTUNIQUE)
10455 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10456 raise errors.OpPrereqError("Disk operations not supported for"
10457 " diskless instances",
10458 errors.ECODE_INVAL)
10459 for disk_op, _ in self.op.disks:
10460 if disk_op == constants.DDM_REMOVE:
10461 if len(instance.disks) == 1:
10462 raise errors.OpPrereqError("Cannot remove the last disk of"
10463 " an instance", errors.ECODE_INVAL)
10464 _CheckInstanceDown(self, instance, "cannot remove disks")
10466 if (disk_op == constants.DDM_ADD and
10467 len(instance.disks) >= constants.MAX_DISKS):
10468 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10469 " add more" % constants.MAX_DISKS,
10470 errors.ECODE_STATE)
10471 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10473 if disk_op < 0 or disk_op >= len(instance.disks):
10474 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10476 (disk_op, len(instance.disks)),
10477 errors.ECODE_INVAL)
10481 def _ConvertPlainToDrbd(self, feedback_fn):
10482 """Converts an instance from plain to drbd.
10485 feedback_fn("Converting template to drbd")
10486 instance = self.instance
10487 pnode = instance.primary_node
10488 snode = self.op.remote_node
10490 # create a fake disk info for _GenerateDiskTemplate
10491 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10492 constants.IDISK_VG: d.logical_id[0]}
10493 for d in instance.disks]
10494 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10495 instance.name, pnode, [snode],
10496 disk_info, None, None, 0, feedback_fn)
10497 info = _GetInstanceInfoText(instance)
10498 feedback_fn("Creating aditional volumes...")
10499 # first, create the missing data and meta devices
10500 for disk in new_disks:
10501 # unfortunately this is... not too nice
10502 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10504 for child in disk.children:
10505 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10506 # at this stage, all new LVs have been created, we can rename the
10508 feedback_fn("Renaming original volumes...")
10509 rename_list = [(o, n.children[0].logical_id)
10510 for (o, n) in zip(instance.disks, new_disks)]
10511 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10512 result.Raise("Failed to rename original LVs")
10514 feedback_fn("Initializing DRBD devices...")
10515 # all child devices are in place, we can now create the DRBD devices
10516 for disk in new_disks:
10517 for node in [pnode, snode]:
10518 f_create = node == pnode
10519 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10521 # at this point, the instance has been modified
10522 instance.disk_template = constants.DT_DRBD8
10523 instance.disks = new_disks
10524 self.cfg.Update(instance, feedback_fn)
10526 # disks are created, waiting for sync
10527 disk_abort = not _WaitForSync(self, instance,
10528 oneshot=not self.op.wait_for_sync)
10530 raise errors.OpExecError("There are some degraded disks for"
10531 " this instance, please cleanup manually")
10533 def _ConvertDrbdToPlain(self, feedback_fn):
10534 """Converts an instance from drbd to plain.
10537 instance = self.instance
10538 assert len(instance.secondary_nodes) == 1
10539 pnode = instance.primary_node
10540 snode = instance.secondary_nodes[0]
10541 feedback_fn("Converting template to plain")
10543 old_disks = instance.disks
10544 new_disks = [d.children[0] for d in old_disks]
10546 # copy over size and mode
10547 for parent, child in zip(old_disks, new_disks):
10548 child.size = parent.size
10549 child.mode = parent.mode
10551 # update instance structure
10552 instance.disks = new_disks
10553 instance.disk_template = constants.DT_PLAIN
10554 self.cfg.Update(instance, feedback_fn)
10556 feedback_fn("Removing volumes on the secondary node...")
10557 for disk in old_disks:
10558 self.cfg.SetDiskID(disk, snode)
10559 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10561 self.LogWarning("Could not remove block device %s on node %s,"
10562 " continuing anyway: %s", disk.iv_name, snode, msg)
10564 feedback_fn("Removing unneeded volumes on the primary node...")
10565 for idx, disk in enumerate(old_disks):
10566 meta = disk.children[1]
10567 self.cfg.SetDiskID(meta, pnode)
10568 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10570 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10571 " continuing anyway: %s", idx, pnode, msg)
10573 def Exec(self, feedback_fn):
10574 """Modifies an instance.
10576 All parameters take effect only at the next restart of the instance.
10579 # Process here the warnings from CheckPrereq, as we don't have a
10580 # feedback_fn there.
10581 for warn in self.warn:
10582 feedback_fn("WARNING: %s" % warn)
10585 instance = self.instance
10587 for disk_op, disk_dict in self.op.disks:
10588 if disk_op == constants.DDM_REMOVE:
10589 # remove the last disk
10590 device = instance.disks.pop()
10591 device_idx = len(instance.disks)
10592 for node, disk in device.ComputeNodeTree(instance.primary_node):
10593 self.cfg.SetDiskID(disk, node)
10594 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10596 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10597 " continuing anyway", device_idx, node, msg)
10598 result.append(("disk/%d" % device_idx, "remove"))
10599 elif disk_op == constants.DDM_ADD:
10601 if instance.disk_template in (constants.DT_FILE,
10602 constants.DT_SHARED_FILE):
10603 file_driver, file_path = instance.disks[0].logical_id
10604 file_path = os.path.dirname(file_path)
10606 file_driver = file_path = None
10607 disk_idx_base = len(instance.disks)
10608 new_disk = _GenerateDiskTemplate(self,
10609 instance.disk_template,
10610 instance.name, instance.primary_node,
10611 instance.secondary_nodes,
10615 disk_idx_base, feedback_fn)[0]
10616 instance.disks.append(new_disk)
10617 info = _GetInstanceInfoText(instance)
10619 logging.info("Creating volume %s for instance %s",
10620 new_disk.iv_name, instance.name)
10621 # Note: this needs to be kept in sync with _CreateDisks
10623 for node in instance.all_nodes:
10624 f_create = node == instance.primary_node
10626 _CreateBlockDev(self, node, instance, new_disk,
10627 f_create, info, f_create)
10628 except errors.OpExecError, err:
10629 self.LogWarning("Failed to create volume %s (%s) on"
10631 new_disk.iv_name, new_disk, node, err)
10632 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10633 (new_disk.size, new_disk.mode)))
10635 # change a given disk
10636 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10637 result.append(("disk.mode/%d" % disk_op,
10638 disk_dict[constants.IDISK_MODE]))
10640 if self.op.disk_template:
10641 r_shut = _ShutdownInstanceDisks(self, instance)
10643 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10644 " proceed with disk template conversion")
10645 mode = (instance.disk_template, self.op.disk_template)
10647 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10649 self.cfg.ReleaseDRBDMinors(instance.name)
10651 result.append(("disk_template", self.op.disk_template))
10654 for nic_op, nic_dict in self.op.nics:
10655 if nic_op == constants.DDM_REMOVE:
10656 # remove the last nic
10657 del instance.nics[-1]
10658 result.append(("nic.%d" % len(instance.nics), "remove"))
10659 elif nic_op == constants.DDM_ADD:
10660 # mac and bridge should be set, by now
10661 mac = nic_dict[constants.INIC_MAC]
10662 ip = nic_dict.get(constants.INIC_IP, None)
10663 nicparams = self.nic_pinst[constants.DDM_ADD]
10664 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10665 instance.nics.append(new_nic)
10666 result.append(("nic.%d" % (len(instance.nics) - 1),
10667 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10668 (new_nic.mac, new_nic.ip,
10669 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10670 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10673 for key in (constants.INIC_MAC, constants.INIC_IP):
10674 if key in nic_dict:
10675 setattr(instance.nics[nic_op], key, nic_dict[key])
10676 if nic_op in self.nic_pinst:
10677 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10678 for key, val in nic_dict.iteritems():
10679 result.append(("nic.%s/%d" % (key, nic_op), val))
10682 if self.op.hvparams:
10683 instance.hvparams = self.hv_inst
10684 for key, val in self.op.hvparams.iteritems():
10685 result.append(("hv/%s" % key, val))
10688 if self.op.beparams:
10689 instance.beparams = self.be_inst
10690 for key, val in self.op.beparams.iteritems():
10691 result.append(("be/%s" % key, val))
10694 if self.op.os_name:
10695 instance.os = self.op.os_name
10698 if self.op.osparams:
10699 instance.osparams = self.os_inst
10700 for key, val in self.op.osparams.iteritems():
10701 result.append(("os/%s" % key, val))
10703 self.cfg.Update(instance, feedback_fn)
10707 _DISK_CONVERSIONS = {
10708 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10709 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10713 class LUBackupQuery(NoHooksLU):
10714 """Query the exports list
10719 def ExpandNames(self):
10720 self.needed_locks = {}
10721 self.share_locks[locking.LEVEL_NODE] = 1
10722 if not self.op.nodes:
10723 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10725 self.needed_locks[locking.LEVEL_NODE] = \
10726 _GetWantedNodes(self, self.op.nodes)
10728 def Exec(self, feedback_fn):
10729 """Compute the list of all the exported system images.
10732 @return: a dictionary with the structure node->(export-list)
10733 where export-list is a list of the instances exported on
10737 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10738 rpcresult = self.rpc.call_export_list(self.nodes)
10740 for node in rpcresult:
10741 if rpcresult[node].fail_msg:
10742 result[node] = False
10744 result[node] = rpcresult[node].payload
10749 class LUBackupPrepare(NoHooksLU):
10750 """Prepares an instance for an export and returns useful information.
10755 def ExpandNames(self):
10756 self._ExpandAndLockInstance()
10758 def CheckPrereq(self):
10759 """Check prerequisites.
10762 instance_name = self.op.instance_name
10764 self.instance = self.cfg.GetInstanceInfo(instance_name)
10765 assert self.instance is not None, \
10766 "Cannot retrieve locked instance %s" % self.op.instance_name
10767 _CheckNodeOnline(self, self.instance.primary_node)
10769 self._cds = _GetClusterDomainSecret()
10771 def Exec(self, feedback_fn):
10772 """Prepares an instance for an export.
10775 instance = self.instance
10777 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10778 salt = utils.GenerateSecret(8)
10780 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10781 result = self.rpc.call_x509_cert_create(instance.primary_node,
10782 constants.RIE_CERT_VALIDITY)
10783 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10785 (name, cert_pem) = result.payload
10787 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10791 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10792 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10794 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10800 class LUBackupExport(LogicalUnit):
10801 """Export an instance to an image in the cluster.
10804 HPATH = "instance-export"
10805 HTYPE = constants.HTYPE_INSTANCE
10808 def CheckArguments(self):
10809 """Check the arguments.
10812 self.x509_key_name = self.op.x509_key_name
10813 self.dest_x509_ca_pem = self.op.destination_x509_ca
10815 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10816 if not self.x509_key_name:
10817 raise errors.OpPrereqError("Missing X509 key name for encryption",
10818 errors.ECODE_INVAL)
10820 if not self.dest_x509_ca_pem:
10821 raise errors.OpPrereqError("Missing destination X509 CA",
10822 errors.ECODE_INVAL)
10824 def ExpandNames(self):
10825 self._ExpandAndLockInstance()
10827 # Lock all nodes for local exports
10828 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10829 # FIXME: lock only instance primary and destination node
10831 # Sad but true, for now we have do lock all nodes, as we don't know where
10832 # the previous export might be, and in this LU we search for it and
10833 # remove it from its current node. In the future we could fix this by:
10834 # - making a tasklet to search (share-lock all), then create the
10835 # new one, then one to remove, after
10836 # - removing the removal operation altogether
10837 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10839 def DeclareLocks(self, level):
10840 """Last minute lock declaration."""
10841 # All nodes are locked anyway, so nothing to do here.
10843 def BuildHooksEnv(self):
10844 """Build hooks env.
10846 This will run on the master, primary node and target node.
10850 "EXPORT_MODE": self.op.mode,
10851 "EXPORT_NODE": self.op.target_node,
10852 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10853 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10854 # TODO: Generic function for boolean env variables
10855 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10858 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10862 def BuildHooksNodes(self):
10863 """Build hooks nodes.
10866 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10868 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10869 nl.append(self.op.target_node)
10873 def CheckPrereq(self):
10874 """Check prerequisites.
10876 This checks that the instance and node names are valid.
10879 instance_name = self.op.instance_name
10881 self.instance = self.cfg.GetInstanceInfo(instance_name)
10882 assert self.instance is not None, \
10883 "Cannot retrieve locked instance %s" % self.op.instance_name
10884 _CheckNodeOnline(self, self.instance.primary_node)
10886 if (self.op.remove_instance and self.instance.admin_up and
10887 not self.op.shutdown):
10888 raise errors.OpPrereqError("Can not remove instance without shutting it"
10891 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10892 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10893 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10894 assert self.dst_node is not None
10896 _CheckNodeOnline(self, self.dst_node.name)
10897 _CheckNodeNotDrained(self, self.dst_node.name)
10900 self.dest_disk_info = None
10901 self.dest_x509_ca = None
10903 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10904 self.dst_node = None
10906 if len(self.op.target_node) != len(self.instance.disks):
10907 raise errors.OpPrereqError(("Received destination information for %s"
10908 " disks, but instance %s has %s disks") %
10909 (len(self.op.target_node), instance_name,
10910 len(self.instance.disks)),
10911 errors.ECODE_INVAL)
10913 cds = _GetClusterDomainSecret()
10915 # Check X509 key name
10917 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10918 except (TypeError, ValueError), err:
10919 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10921 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10922 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10923 errors.ECODE_INVAL)
10925 # Load and verify CA
10927 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10928 except OpenSSL.crypto.Error, err:
10929 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10930 (err, ), errors.ECODE_INVAL)
10932 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10933 if errcode is not None:
10934 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10935 (msg, ), errors.ECODE_INVAL)
10937 self.dest_x509_ca = cert
10939 # Verify target information
10941 for idx, disk_data in enumerate(self.op.target_node):
10943 (host, port, magic) = \
10944 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10945 except errors.GenericError, err:
10946 raise errors.OpPrereqError("Target info for disk %s: %s" %
10947 (idx, err), errors.ECODE_INVAL)
10949 disk_info.append((host, port, magic))
10951 assert len(disk_info) == len(self.op.target_node)
10952 self.dest_disk_info = disk_info
10955 raise errors.ProgrammerError("Unhandled export mode %r" %
10958 # instance disk type verification
10959 # TODO: Implement export support for file-based disks
10960 for disk in self.instance.disks:
10961 if disk.dev_type == constants.LD_FILE:
10962 raise errors.OpPrereqError("Export not supported for instances with"
10963 " file-based disks", errors.ECODE_INVAL)
10965 def _CleanupExports(self, feedback_fn):
10966 """Removes exports of current instance from all other nodes.
10968 If an instance in a cluster with nodes A..D was exported to node C, its
10969 exports will be removed from the nodes A, B and D.
10972 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10974 nodelist = self.cfg.GetNodeList()
10975 nodelist.remove(self.dst_node.name)
10977 # on one-node clusters nodelist will be empty after the removal
10978 # if we proceed the backup would be removed because OpBackupQuery
10979 # substitutes an empty list with the full cluster node list.
10980 iname = self.instance.name
10982 feedback_fn("Removing old exports for instance %s" % iname)
10983 exportlist = self.rpc.call_export_list(nodelist)
10984 for node in exportlist:
10985 if exportlist[node].fail_msg:
10987 if iname in exportlist[node].payload:
10988 msg = self.rpc.call_export_remove(node, iname).fail_msg
10990 self.LogWarning("Could not remove older export for instance %s"
10991 " on node %s: %s", iname, node, msg)
10993 def Exec(self, feedback_fn):
10994 """Export an instance to an image in the cluster.
10997 assert self.op.mode in constants.EXPORT_MODES
10999 instance = self.instance
11000 src_node = instance.primary_node
11002 if self.op.shutdown:
11003 # shutdown the instance, but not the disks
11004 feedback_fn("Shutting down instance %s" % instance.name)
11005 result = self.rpc.call_instance_shutdown(src_node, instance,
11006 self.op.shutdown_timeout)
11007 # TODO: Maybe ignore failures if ignore_remove_failures is set
11008 result.Raise("Could not shutdown instance %s on"
11009 " node %s" % (instance.name, src_node))
11011 # set the disks ID correctly since call_instance_start needs the
11012 # correct drbd minor to create the symlinks
11013 for disk in instance.disks:
11014 self.cfg.SetDiskID(disk, src_node)
11016 activate_disks = (not instance.admin_up)
11019 # Activate the instance disks if we'exporting a stopped instance
11020 feedback_fn("Activating disks for %s" % instance.name)
11021 _StartInstanceDisks(self, instance, None)
11024 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11027 helper.CreateSnapshots()
11029 if (self.op.shutdown and instance.admin_up and
11030 not self.op.remove_instance):
11031 assert not activate_disks
11032 feedback_fn("Starting instance %s" % instance.name)
11033 result = self.rpc.call_instance_start(src_node, instance, None, None)
11034 msg = result.fail_msg
11036 feedback_fn("Failed to start instance: %s" % msg)
11037 _ShutdownInstanceDisks(self, instance)
11038 raise errors.OpExecError("Could not start instance: %s" % msg)
11040 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11041 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11042 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11043 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11044 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11046 (key_name, _, _) = self.x509_key_name
11049 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11052 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11053 key_name, dest_ca_pem,
11058 # Check for backwards compatibility
11059 assert len(dresults) == len(instance.disks)
11060 assert compat.all(isinstance(i, bool) for i in dresults), \
11061 "Not all results are boolean: %r" % dresults
11065 feedback_fn("Deactivating disks for %s" % instance.name)
11066 _ShutdownInstanceDisks(self, instance)
11068 if not (compat.all(dresults) and fin_resu):
11071 failures.append("export finalization")
11072 if not compat.all(dresults):
11073 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11075 failures.append("disk export: disk(s) %s" % fdsk)
11077 raise errors.OpExecError("Export failed, errors in %s" %
11078 utils.CommaJoin(failures))
11080 # At this point, the export was successful, we can cleanup/finish
11082 # Remove instance if requested
11083 if self.op.remove_instance:
11084 feedback_fn("Removing instance %s" % instance.name)
11085 _RemoveInstance(self, feedback_fn, instance,
11086 self.op.ignore_remove_failures)
11088 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11089 self._CleanupExports(feedback_fn)
11091 return fin_resu, dresults
11094 class LUBackupRemove(NoHooksLU):
11095 """Remove exports related to the named instance.
11100 def ExpandNames(self):
11101 self.needed_locks = {}
11102 # We need all nodes to be locked in order for RemoveExport to work, but we
11103 # don't need to lock the instance itself, as nothing will happen to it (and
11104 # we can remove exports also for a removed instance)
11105 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11107 def Exec(self, feedback_fn):
11108 """Remove any export.
11111 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11112 # If the instance was not found we'll try with the name that was passed in.
11113 # This will only work if it was an FQDN, though.
11115 if not instance_name:
11117 instance_name = self.op.instance_name
11119 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11120 exportlist = self.rpc.call_export_list(locked_nodes)
11122 for node in exportlist:
11123 msg = exportlist[node].fail_msg
11125 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11127 if instance_name in exportlist[node].payload:
11129 result = self.rpc.call_export_remove(node, instance_name)
11130 msg = result.fail_msg
11132 logging.error("Could not remove export for instance %s"
11133 " on node %s: %s", instance_name, node, msg)
11135 if fqdn_warn and not found:
11136 feedback_fn("Export not found. If trying to remove an export belonging"
11137 " to a deleted instance please use its Fully Qualified"
11141 class LUGroupAdd(LogicalUnit):
11142 """Logical unit for creating node groups.
11145 HPATH = "group-add"
11146 HTYPE = constants.HTYPE_GROUP
11149 def ExpandNames(self):
11150 # We need the new group's UUID here so that we can create and acquire the
11151 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11152 # that it should not check whether the UUID exists in the configuration.
11153 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11154 self.needed_locks = {}
11155 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11157 def CheckPrereq(self):
11158 """Check prerequisites.
11160 This checks that the given group name is not an existing node group
11165 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11166 except errors.OpPrereqError:
11169 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11170 " node group (UUID: %s)" %
11171 (self.op.group_name, existing_uuid),
11172 errors.ECODE_EXISTS)
11174 if self.op.ndparams:
11175 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11177 def BuildHooksEnv(self):
11178 """Build hooks env.
11182 "GROUP_NAME": self.op.group_name,
11185 def BuildHooksNodes(self):
11186 """Build hooks nodes.
11189 mn = self.cfg.GetMasterNode()
11190 return ([mn], [mn])
11192 def Exec(self, feedback_fn):
11193 """Add the node group to the cluster.
11196 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11197 uuid=self.group_uuid,
11198 alloc_policy=self.op.alloc_policy,
11199 ndparams=self.op.ndparams)
11201 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11202 del self.remove_locks[locking.LEVEL_NODEGROUP]
11205 class LUGroupAssignNodes(NoHooksLU):
11206 """Logical unit for assigning nodes to groups.
11211 def ExpandNames(self):
11212 # These raise errors.OpPrereqError on their own:
11213 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11214 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11216 # We want to lock all the affected nodes and groups. We have readily
11217 # available the list of nodes, and the *destination* group. To gather the
11218 # list of "source" groups, we need to fetch node information later on.
11219 self.needed_locks = {
11220 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11221 locking.LEVEL_NODE: self.op.nodes,
11224 def DeclareLocks(self, level):
11225 if level == locking.LEVEL_NODEGROUP:
11226 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11228 # Try to get all affected nodes' groups without having the group or node
11229 # lock yet. Needs verification later in the code flow.
11230 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11232 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11234 def CheckPrereq(self):
11235 """Check prerequisites.
11238 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11239 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11240 frozenset(self.op.nodes))
11242 expected_locks = (set([self.group_uuid]) |
11243 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11244 actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11245 if actual_locks != expected_locks:
11246 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11247 " current groups are '%s', used to be '%s'" %
11248 (utils.CommaJoin(expected_locks),
11249 utils.CommaJoin(actual_locks)))
11251 self.node_data = self.cfg.GetAllNodesInfo()
11252 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11253 instance_data = self.cfg.GetAllInstancesInfo()
11255 if self.group is None:
11256 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11257 (self.op.group_name, self.group_uuid))
11259 (new_splits, previous_splits) = \
11260 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11261 for node in self.op.nodes],
11262 self.node_data, instance_data)
11265 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11267 if not self.op.force:
11268 raise errors.OpExecError("The following instances get split by this"
11269 " change and --force was not given: %s" %
11272 self.LogWarning("This operation will split the following instances: %s",
11275 if previous_splits:
11276 self.LogWarning("In addition, these already-split instances continue"
11277 " to be split across groups: %s",
11278 utils.CommaJoin(utils.NiceSort(previous_splits)))
11280 def Exec(self, feedback_fn):
11281 """Assign nodes to a new group.
11284 for node in self.op.nodes:
11285 self.node_data[node].group = self.group_uuid
11287 # FIXME: Depends on side-effects of modifying the result of
11288 # C{cfg.GetAllNodesInfo}
11290 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11293 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11294 """Check for split instances after a node assignment.
11296 This method considers a series of node assignments as an atomic operation,
11297 and returns information about split instances after applying the set of
11300 In particular, it returns information about newly split instances, and
11301 instances that were already split, and remain so after the change.
11303 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11306 @type changes: list of (node_name, new_group_uuid) pairs.
11307 @param changes: list of node assignments to consider.
11308 @param node_data: a dict with data for all nodes
11309 @param instance_data: a dict with all instances to consider
11310 @rtype: a two-tuple
11311 @return: a list of instances that were previously okay and result split as a
11312 consequence of this change, and a list of instances that were previously
11313 split and this change does not fix.
11316 changed_nodes = dict((node, group) for node, group in changes
11317 if node_data[node].group != group)
11319 all_split_instances = set()
11320 previously_split_instances = set()
11322 def InstanceNodes(instance):
11323 return [instance.primary_node] + list(instance.secondary_nodes)
11325 for inst in instance_data.values():
11326 if inst.disk_template not in constants.DTS_INT_MIRROR:
11329 instance_nodes = InstanceNodes(inst)
11331 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11332 previously_split_instances.add(inst.name)
11334 if len(set(changed_nodes.get(node, node_data[node].group)
11335 for node in instance_nodes)) > 1:
11336 all_split_instances.add(inst.name)
11338 return (list(all_split_instances - previously_split_instances),
11339 list(previously_split_instances & all_split_instances))
11342 class _GroupQuery(_QueryBase):
11343 FIELDS = query.GROUP_FIELDS
11345 def ExpandNames(self, lu):
11346 lu.needed_locks = {}
11348 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11349 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11352 self.wanted = [name_to_uuid[name]
11353 for name in utils.NiceSort(name_to_uuid.keys())]
11355 # Accept names to be either names or UUIDs.
11358 all_uuid = frozenset(self._all_groups.keys())
11360 for name in self.names:
11361 if name in all_uuid:
11362 self.wanted.append(name)
11363 elif name in name_to_uuid:
11364 self.wanted.append(name_to_uuid[name])
11366 missing.append(name)
11369 raise errors.OpPrereqError("Some groups do not exist: %s" %
11370 utils.CommaJoin(missing),
11371 errors.ECODE_NOENT)
11373 def DeclareLocks(self, lu, level):
11376 def _GetQueryData(self, lu):
11377 """Computes the list of node groups and their attributes.
11380 do_nodes = query.GQ_NODE in self.requested_data
11381 do_instances = query.GQ_INST in self.requested_data
11383 group_to_nodes = None
11384 group_to_instances = None
11386 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11387 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11388 # latter GetAllInstancesInfo() is not enough, for we have to go through
11389 # instance->node. Hence, we will need to process nodes even if we only need
11390 # instance information.
11391 if do_nodes or do_instances:
11392 all_nodes = lu.cfg.GetAllNodesInfo()
11393 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11396 for node in all_nodes.values():
11397 if node.group in group_to_nodes:
11398 group_to_nodes[node.group].append(node.name)
11399 node_to_group[node.name] = node.group
11402 all_instances = lu.cfg.GetAllInstancesInfo()
11403 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11405 for instance in all_instances.values():
11406 node = instance.primary_node
11407 if node in node_to_group:
11408 group_to_instances[node_to_group[node]].append(instance.name)
11411 # Do not pass on node information if it was not requested.
11412 group_to_nodes = None
11414 return query.GroupQueryData([self._all_groups[uuid]
11415 for uuid in self.wanted],
11416 group_to_nodes, group_to_instances)
11419 class LUGroupQuery(NoHooksLU):
11420 """Logical unit for querying node groups.
11425 def CheckArguments(self):
11426 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11427 self.op.output_fields, False)
11429 def ExpandNames(self):
11430 self.gq.ExpandNames(self)
11432 def Exec(self, feedback_fn):
11433 return self.gq.OldStyleQuery(self)
11436 class LUGroupSetParams(LogicalUnit):
11437 """Modifies the parameters of a node group.
11440 HPATH = "group-modify"
11441 HTYPE = constants.HTYPE_GROUP
11444 def CheckArguments(self):
11447 self.op.alloc_policy,
11450 if all_changes.count(None) == len(all_changes):
11451 raise errors.OpPrereqError("Please pass at least one modification",
11452 errors.ECODE_INVAL)
11454 def ExpandNames(self):
11455 # This raises errors.OpPrereqError on its own:
11456 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11458 self.needed_locks = {
11459 locking.LEVEL_NODEGROUP: [self.group_uuid],
11462 def CheckPrereq(self):
11463 """Check prerequisites.
11466 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11468 if self.group is None:
11469 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11470 (self.op.group_name, self.group_uuid))
11472 if self.op.ndparams:
11473 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11474 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11475 self.new_ndparams = new_ndparams
11477 def BuildHooksEnv(self):
11478 """Build hooks env.
11482 "GROUP_NAME": self.op.group_name,
11483 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11486 def BuildHooksNodes(self):
11487 """Build hooks nodes.
11490 mn = self.cfg.GetMasterNode()
11491 return ([mn], [mn])
11493 def Exec(self, feedback_fn):
11494 """Modifies the node group.
11499 if self.op.ndparams:
11500 self.group.ndparams = self.new_ndparams
11501 result.append(("ndparams", str(self.group.ndparams)))
11503 if self.op.alloc_policy:
11504 self.group.alloc_policy = self.op.alloc_policy
11506 self.cfg.Update(self.group, feedback_fn)
11511 class LUGroupRemove(LogicalUnit):
11512 HPATH = "group-remove"
11513 HTYPE = constants.HTYPE_GROUP
11516 def ExpandNames(self):
11517 # This will raises errors.OpPrereqError on its own:
11518 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11519 self.needed_locks = {
11520 locking.LEVEL_NODEGROUP: [self.group_uuid],
11523 def CheckPrereq(self):
11524 """Check prerequisites.
11526 This checks that the given group name exists as a node group, that is
11527 empty (i.e., contains no nodes), and that is not the last group of the
11531 # Verify that the group is empty.
11532 group_nodes = [node.name
11533 for node in self.cfg.GetAllNodesInfo().values()
11534 if node.group == self.group_uuid]
11537 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11539 (self.op.group_name,
11540 utils.CommaJoin(utils.NiceSort(group_nodes))),
11541 errors.ECODE_STATE)
11543 # Verify the cluster would not be left group-less.
11544 if len(self.cfg.GetNodeGroupList()) == 1:
11545 raise errors.OpPrereqError("Group '%s' is the only group,"
11546 " cannot be removed" %
11547 self.op.group_name,
11548 errors.ECODE_STATE)
11550 def BuildHooksEnv(self):
11551 """Build hooks env.
11555 "GROUP_NAME": self.op.group_name,
11558 def BuildHooksNodes(self):
11559 """Build hooks nodes.
11562 mn = self.cfg.GetMasterNode()
11563 return ([mn], [mn])
11565 def Exec(self, feedback_fn):
11566 """Remove the node group.
11570 self.cfg.RemoveNodeGroup(self.group_uuid)
11571 except errors.ConfigurationError:
11572 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11573 (self.op.group_name, self.group_uuid))
11575 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11578 class LUGroupRename(LogicalUnit):
11579 HPATH = "group-rename"
11580 HTYPE = constants.HTYPE_GROUP
11583 def ExpandNames(self):
11584 # This raises errors.OpPrereqError on its own:
11585 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11587 self.needed_locks = {
11588 locking.LEVEL_NODEGROUP: [self.group_uuid],
11591 def CheckPrereq(self):
11592 """Check prerequisites.
11594 Ensures requested new name is not yet used.
11598 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11599 except errors.OpPrereqError:
11602 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11603 " node group (UUID: %s)" %
11604 (self.op.new_name, new_name_uuid),
11605 errors.ECODE_EXISTS)
11607 def BuildHooksEnv(self):
11608 """Build hooks env.
11612 "OLD_NAME": self.op.group_name,
11613 "NEW_NAME": self.op.new_name,
11616 def BuildHooksNodes(self):
11617 """Build hooks nodes.
11620 mn = self.cfg.GetMasterNode()
11622 all_nodes = self.cfg.GetAllNodesInfo()
11623 all_nodes.pop(mn, None)
11626 run_nodes.extend(node.name for node in all_nodes.values()
11627 if node.group == self.group_uuid)
11629 return (run_nodes, run_nodes)
11631 def Exec(self, feedback_fn):
11632 """Rename the node group.
11635 group = self.cfg.GetNodeGroup(self.group_uuid)
11638 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11639 (self.op.group_name, self.group_uuid))
11641 group.name = self.op.new_name
11642 self.cfg.Update(group, feedback_fn)
11644 return self.op.new_name
11647 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11648 """Generic tags LU.
11650 This is an abstract class which is the parent of all the other tags LUs.
11653 def ExpandNames(self):
11654 self.group_uuid = None
11655 self.needed_locks = {}
11656 if self.op.kind == constants.TAG_NODE:
11657 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11658 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11659 elif self.op.kind == constants.TAG_INSTANCE:
11660 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11661 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11662 elif self.op.kind == constants.TAG_NODEGROUP:
11663 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11665 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11666 # not possible to acquire the BGL based on opcode parameters)
11668 def CheckPrereq(self):
11669 """Check prerequisites.
11672 if self.op.kind == constants.TAG_CLUSTER:
11673 self.target = self.cfg.GetClusterInfo()
11674 elif self.op.kind == constants.TAG_NODE:
11675 self.target = self.cfg.GetNodeInfo(self.op.name)
11676 elif self.op.kind == constants.TAG_INSTANCE:
11677 self.target = self.cfg.GetInstanceInfo(self.op.name)
11678 elif self.op.kind == constants.TAG_NODEGROUP:
11679 self.target = self.cfg.GetNodeGroup(self.group_uuid)
11681 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11682 str(self.op.kind), errors.ECODE_INVAL)
11685 class LUTagsGet(TagsLU):
11686 """Returns the tags of a given object.
11691 def ExpandNames(self):
11692 TagsLU.ExpandNames(self)
11694 # Share locks as this is only a read operation
11695 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11697 def Exec(self, feedback_fn):
11698 """Returns the tag list.
11701 return list(self.target.GetTags())
11704 class LUTagsSearch(NoHooksLU):
11705 """Searches the tags for a given pattern.
11710 def ExpandNames(self):
11711 self.needed_locks = {}
11713 def CheckPrereq(self):
11714 """Check prerequisites.
11716 This checks the pattern passed for validity by compiling it.
11720 self.re = re.compile(self.op.pattern)
11721 except re.error, err:
11722 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11723 (self.op.pattern, err), errors.ECODE_INVAL)
11725 def Exec(self, feedback_fn):
11726 """Returns the tag list.
11730 tgts = [("/cluster", cfg.GetClusterInfo())]
11731 ilist = cfg.GetAllInstancesInfo().values()
11732 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11733 nlist = cfg.GetAllNodesInfo().values()
11734 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11735 tgts.extend(("/nodegroup/%s" % n.name, n)
11736 for n in cfg.GetAllNodeGroupsInfo().values())
11738 for path, target in tgts:
11739 for tag in target.GetTags():
11740 if self.re.search(tag):
11741 results.append((path, tag))
11745 class LUTagsSet(TagsLU):
11746 """Sets a tag on a given object.
11751 def CheckPrereq(self):
11752 """Check prerequisites.
11754 This checks the type and length of the tag name and value.
11757 TagsLU.CheckPrereq(self)
11758 for tag in self.op.tags:
11759 objects.TaggableObject.ValidateTag(tag)
11761 def Exec(self, feedback_fn):
11766 for tag in self.op.tags:
11767 self.target.AddTag(tag)
11768 except errors.TagError, err:
11769 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11770 self.cfg.Update(self.target, feedback_fn)
11773 class LUTagsDel(TagsLU):
11774 """Delete a list of tags from a given object.
11779 def CheckPrereq(self):
11780 """Check prerequisites.
11782 This checks that we have the given tag.
11785 TagsLU.CheckPrereq(self)
11786 for tag in self.op.tags:
11787 objects.TaggableObject.ValidateTag(tag)
11788 del_tags = frozenset(self.op.tags)
11789 cur_tags = self.target.GetTags()
11791 diff_tags = del_tags - cur_tags
11793 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11794 raise errors.OpPrereqError("Tag(s) %s not found" %
11795 (utils.CommaJoin(diff_names), ),
11796 errors.ECODE_NOENT)
11798 def Exec(self, feedback_fn):
11799 """Remove the tag from the object.
11802 for tag in self.op.tags:
11803 self.target.RemoveTag(tag)
11804 self.cfg.Update(self.target, feedback_fn)
11807 class LUTestDelay(NoHooksLU):
11808 """Sleep for a specified amount of time.
11810 This LU sleeps on the master and/or nodes for a specified amount of
11816 def ExpandNames(self):
11817 """Expand names and set required locks.
11819 This expands the node list, if any.
11822 self.needed_locks = {}
11823 if self.op.on_nodes:
11824 # _GetWantedNodes can be used here, but is not always appropriate to use
11825 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11826 # more information.
11827 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11828 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11830 def _TestDelay(self):
11831 """Do the actual sleep.
11834 if self.op.on_master:
11835 if not utils.TestDelay(self.op.duration):
11836 raise errors.OpExecError("Error during master delay test")
11837 if self.op.on_nodes:
11838 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11839 for node, node_result in result.items():
11840 node_result.Raise("Failure during rpc call to node %s" % node)
11842 def Exec(self, feedback_fn):
11843 """Execute the test delay opcode, with the wanted repetitions.
11846 if self.op.repeat == 0:
11849 top_value = self.op.repeat - 1
11850 for i in range(self.op.repeat):
11851 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11855 class LUTestJqueue(NoHooksLU):
11856 """Utility LU to test some aspects of the job queue.
11861 # Must be lower than default timeout for WaitForJobChange to see whether it
11862 # notices changed jobs
11863 _CLIENT_CONNECT_TIMEOUT = 20.0
11864 _CLIENT_CONFIRM_TIMEOUT = 60.0
11867 def _NotifyUsingSocket(cls, cb, errcls):
11868 """Opens a Unix socket and waits for another program to connect.
11871 @param cb: Callback to send socket name to client
11872 @type errcls: class
11873 @param errcls: Exception class to use for errors
11876 # Using a temporary directory as there's no easy way to create temporary
11877 # sockets without writing a custom loop around tempfile.mktemp and
11879 tmpdir = tempfile.mkdtemp()
11881 tmpsock = utils.PathJoin(tmpdir, "sock")
11883 logging.debug("Creating temporary socket at %s", tmpsock)
11884 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11889 # Send details to client
11892 # Wait for client to connect before continuing
11893 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11895 (conn, _) = sock.accept()
11896 except socket.error, err:
11897 raise errcls("Client didn't connect in time (%s)" % err)
11901 # Remove as soon as client is connected
11902 shutil.rmtree(tmpdir)
11904 # Wait for client to close
11907 # pylint: disable-msg=E1101
11908 # Instance of '_socketobject' has no ... member
11909 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11911 except socket.error, err:
11912 raise errcls("Client failed to confirm notification (%s)" % err)
11916 def _SendNotification(self, test, arg, sockname):
11917 """Sends a notification to the client.
11920 @param test: Test name
11921 @param arg: Test argument (depends on test)
11922 @type sockname: string
11923 @param sockname: Socket path
11926 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11928 def _Notify(self, prereq, test, arg):
11929 """Notifies the client of a test.
11932 @param prereq: Whether this is a prereq-phase test
11934 @param test: Test name
11935 @param arg: Test argument (depends on test)
11939 errcls = errors.OpPrereqError
11941 errcls = errors.OpExecError
11943 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11947 def CheckArguments(self):
11948 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11949 self.expandnames_calls = 0
11951 def ExpandNames(self):
11952 checkargs_calls = getattr(self, "checkargs_calls", 0)
11953 if checkargs_calls < 1:
11954 raise errors.ProgrammerError("CheckArguments was not called")
11956 self.expandnames_calls += 1
11958 if self.op.notify_waitlock:
11959 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11961 self.LogInfo("Expanding names")
11963 # Get lock on master node (just to get a lock, not for a particular reason)
11964 self.needed_locks = {
11965 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11968 def Exec(self, feedback_fn):
11969 if self.expandnames_calls < 1:
11970 raise errors.ProgrammerError("ExpandNames was not called")
11972 if self.op.notify_exec:
11973 self._Notify(False, constants.JQT_EXEC, None)
11975 self.LogInfo("Executing")
11977 if self.op.log_messages:
11978 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11979 for idx, msg in enumerate(self.op.log_messages):
11980 self.LogInfo("Sending log message %s", idx + 1)
11981 feedback_fn(constants.JQT_MSGPREFIX + msg)
11982 # Report how many test messages have been sent
11983 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11986 raise errors.OpExecError("Opcode failure was requested")
11991 class IAllocator(object):
11992 """IAllocator framework.
11994 An IAllocator instance has three sets of attributes:
11995 - cfg that is needed to query the cluster
11996 - input data (all members of the _KEYS class attribute are required)
11997 - four buffer attributes (in|out_data|text), that represent the
11998 input (to the external script) in text and data structure format,
11999 and the output from it, again in two formats
12000 - the result variables from the script (success, info, nodes) for
12004 # pylint: disable-msg=R0902
12005 # lots of instance attributes
12007 def __init__(self, cfg, rpc, mode, **kwargs):
12010 # init buffer variables
12011 self.in_text = self.out_text = self.in_data = self.out_data = None
12012 # init all input fields so that pylint is happy
12014 self.mem_size = self.disks = self.disk_template = None
12015 self.os = self.tags = self.nics = self.vcpus = None
12016 self.hypervisor = None
12017 self.relocate_from = None
12019 self.evac_nodes = None
12020 self.instances = None
12021 self.reloc_mode = None
12022 self.target_groups = None
12024 self.required_nodes = None
12025 # init result fields
12026 self.success = self.info = self.result = None
12029 (fn, keyset, self._result_check) = self._MODE_DATA[self.mode]
12031 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12032 " IAllocator" % self.mode)
12035 if key not in keyset:
12036 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12037 " IAllocator" % key)
12038 setattr(self, key, kwargs[key])
12041 if key not in kwargs:
12042 raise errors.ProgrammerError("Missing input parameter '%s' to"
12043 " IAllocator" % key)
12044 self._BuildInputData(compat.partial(fn, self))
12046 def _ComputeClusterData(self):
12047 """Compute the generic allocator input data.
12049 This is the data that is independent of the actual operation.
12053 cluster_info = cfg.GetClusterInfo()
12056 "version": constants.IALLOCATOR_VERSION,
12057 "cluster_name": cfg.GetClusterName(),
12058 "cluster_tags": list(cluster_info.GetTags()),
12059 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12060 # we don't have job IDs
12062 ninfo = cfg.GetAllNodesInfo()
12063 iinfo = cfg.GetAllInstancesInfo().values()
12064 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12067 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12069 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12070 hypervisor_name = self.hypervisor
12071 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12072 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12073 elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
12074 constants.IALLOCATOR_MODE_MRELOC):
12075 hypervisor_name = cluster_info.enabled_hypervisors[0]
12077 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12080 self.rpc.call_all_instances_info(node_list,
12081 cluster_info.enabled_hypervisors)
12083 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12085 config_ndata = self._ComputeBasicNodeData(ninfo)
12086 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12087 i_list, config_ndata)
12088 assert len(data["nodes"]) == len(ninfo), \
12089 "Incomplete node data computed"
12091 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12093 self.in_data = data
12096 def _ComputeNodeGroupData(cfg):
12097 """Compute node groups data.
12100 ng = dict((guuid, {
12101 "name": gdata.name,
12102 "alloc_policy": gdata.alloc_policy,
12104 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12109 def _ComputeBasicNodeData(node_cfg):
12110 """Compute global node data.
12113 @returns: a dict of name: (node dict, node config)
12116 # fill in static (config-based) values
12117 node_results = dict((ninfo.name, {
12118 "tags": list(ninfo.GetTags()),
12119 "primary_ip": ninfo.primary_ip,
12120 "secondary_ip": ninfo.secondary_ip,
12121 "offline": ninfo.offline,
12122 "drained": ninfo.drained,
12123 "master_candidate": ninfo.master_candidate,
12124 "group": ninfo.group,
12125 "master_capable": ninfo.master_capable,
12126 "vm_capable": ninfo.vm_capable,
12128 for ninfo in node_cfg.values())
12130 return node_results
12133 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12135 """Compute global node data.
12137 @param node_results: the basic node structures as filled from the config
12140 # make a copy of the current dict
12141 node_results = dict(node_results)
12142 for nname, nresult in node_data.items():
12143 assert nname in node_results, "Missing basic data for node %s" % nname
12144 ninfo = node_cfg[nname]
12146 if not (ninfo.offline or ninfo.drained):
12147 nresult.Raise("Can't get data for node %s" % nname)
12148 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12150 remote_info = nresult.payload
12152 for attr in ['memory_total', 'memory_free', 'memory_dom0',
12153 'vg_size', 'vg_free', 'cpu_total']:
12154 if attr not in remote_info:
12155 raise errors.OpExecError("Node '%s' didn't return attribute"
12156 " '%s'" % (nname, attr))
12157 if not isinstance(remote_info[attr], int):
12158 raise errors.OpExecError("Node '%s' returned invalid value"
12160 (nname, attr, remote_info[attr]))
12161 # compute memory used by primary instances
12162 i_p_mem = i_p_up_mem = 0
12163 for iinfo, beinfo in i_list:
12164 if iinfo.primary_node == nname:
12165 i_p_mem += beinfo[constants.BE_MEMORY]
12166 if iinfo.name not in node_iinfo[nname].payload:
12169 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12170 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12171 remote_info['memory_free'] -= max(0, i_mem_diff)
12174 i_p_up_mem += beinfo[constants.BE_MEMORY]
12176 # compute memory used by instances
12178 "total_memory": remote_info['memory_total'],
12179 "reserved_memory": remote_info['memory_dom0'],
12180 "free_memory": remote_info['memory_free'],
12181 "total_disk": remote_info['vg_size'],
12182 "free_disk": remote_info['vg_free'],
12183 "total_cpus": remote_info['cpu_total'],
12184 "i_pri_memory": i_p_mem,
12185 "i_pri_up_memory": i_p_up_mem,
12187 pnr_dyn.update(node_results[nname])
12188 node_results[nname] = pnr_dyn
12190 return node_results
12193 def _ComputeInstanceData(cluster_info, i_list):
12194 """Compute global instance data.
12198 for iinfo, beinfo in i_list:
12200 for nic in iinfo.nics:
12201 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12205 "mode": filled_params[constants.NIC_MODE],
12206 "link": filled_params[constants.NIC_LINK],
12208 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12209 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12210 nic_data.append(nic_dict)
12212 "tags": list(iinfo.GetTags()),
12213 "admin_up": iinfo.admin_up,
12214 "vcpus": beinfo[constants.BE_VCPUS],
12215 "memory": beinfo[constants.BE_MEMORY],
12217 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12219 "disks": [{constants.IDISK_SIZE: dsk.size,
12220 constants.IDISK_MODE: dsk.mode}
12221 for dsk in iinfo.disks],
12222 "disk_template": iinfo.disk_template,
12223 "hypervisor": iinfo.hypervisor,
12225 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12227 instance_data[iinfo.name] = pir
12229 return instance_data
12231 def _AddNewInstance(self):
12232 """Add new instance data to allocator structure.
12234 This in combination with _AllocatorGetClusterData will create the
12235 correct structure needed as input for the allocator.
12237 The checks for the completeness of the opcode must have already been
12241 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12243 if self.disk_template in constants.DTS_INT_MIRROR:
12244 self.required_nodes = 2
12246 self.required_nodes = 1
12250 "disk_template": self.disk_template,
12253 "vcpus": self.vcpus,
12254 "memory": self.mem_size,
12255 "disks": self.disks,
12256 "disk_space_total": disk_space,
12258 "required_nodes": self.required_nodes,
12263 def _AddRelocateInstance(self):
12264 """Add relocate instance data to allocator structure.
12266 This in combination with _IAllocatorGetClusterData will create the
12267 correct structure needed as input for the allocator.
12269 The checks for the completeness of the opcode must have already been
12273 instance = self.cfg.GetInstanceInfo(self.name)
12274 if instance is None:
12275 raise errors.ProgrammerError("Unknown instance '%s' passed to"
12276 " IAllocator" % self.name)
12278 if instance.disk_template not in constants.DTS_MIRRORED:
12279 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12280 errors.ECODE_INVAL)
12282 if instance.disk_template in constants.DTS_INT_MIRROR and \
12283 len(instance.secondary_nodes) != 1:
12284 raise errors.OpPrereqError("Instance has not exactly one secondary node",
12285 errors.ECODE_STATE)
12287 self.required_nodes = 1
12288 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12289 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12293 "disk_space_total": disk_space,
12294 "required_nodes": self.required_nodes,
12295 "relocate_from": self.relocate_from,
12299 def _AddEvacuateNodes(self):
12300 """Add evacuate nodes data to allocator structure.
12304 "evac_nodes": self.evac_nodes
12308 def _AddMultiRelocate(self):
12309 """Get data for multi-relocate requests.
12313 "instances": self.instances,
12314 "reloc_mode": self.reloc_mode,
12315 "target_groups": self.target_groups,
12318 def _BuildInputData(self, fn):
12319 """Build input data structures.
12322 self._ComputeClusterData()
12325 request["type"] = self.mode
12326 self.in_data["request"] = request
12328 self.in_text = serializer.Dump(self.in_data)
12331 constants.IALLOCATOR_MODE_ALLOC:
12333 ["name", "mem_size", "disks", "disk_template", "os", "tags", "nics",
12334 "vcpus", "hypervisor"], ht.TList),
12335 constants.IALLOCATOR_MODE_RELOC:
12336 (_AddRelocateInstance, ["name", "relocate_from"], ht.TList),
12337 constants.IALLOCATOR_MODE_MEVAC:
12338 (_AddEvacuateNodes, ["evac_nodes"],
12339 ht.TListOf(ht.TAnd(ht.TIsLength(2),
12340 ht.TListOf(ht.TString)))),
12341 constants.IALLOCATOR_MODE_MRELOC:
12342 (_AddMultiRelocate, ["instances", "reloc_mode", "target_groups"],
12343 ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12344 # pylint: disable-msg=E1101
12345 # Class '...' has no 'OP_ID' member
12346 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12347 opcodes.OpInstanceMigrate.OP_ID,
12348 opcodes.OpInstanceReplaceDisks.OP_ID])
12352 def Run(self, name, validate=True, call_fn=None):
12353 """Run an instance allocator and return the results.
12356 if call_fn is None:
12357 call_fn = self.rpc.call_iallocator_runner
12359 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12360 result.Raise("Failure while running the iallocator script")
12362 self.out_text = result.payload
12364 self._ValidateResult()
12366 def _ValidateResult(self):
12367 """Process the allocator results.
12369 This will process and if successful save the result in
12370 self.out_data and the other parameters.
12374 rdict = serializer.Load(self.out_text)
12375 except Exception, err:
12376 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12378 if not isinstance(rdict, dict):
12379 raise errors.OpExecError("Can't parse iallocator results: not a dict")
12381 # TODO: remove backwards compatiblity in later versions
12382 if "nodes" in rdict and "result" not in rdict:
12383 rdict["result"] = rdict["nodes"]
12386 for key in "success", "info", "result":
12387 if key not in rdict:
12388 raise errors.OpExecError("Can't parse iallocator results:"
12389 " missing key '%s'" % key)
12390 setattr(self, key, rdict[key])
12392 if not self._result_check(self.result):
12393 raise errors.OpExecError("Iallocator returned invalid result,"
12394 " expected %s, got %s" %
12395 (self._result_check, self.result),
12396 errors.ECODE_INVAL)
12398 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12399 constants.IALLOCATOR_MODE_MEVAC):
12400 node2group = dict((name, ndata["group"])
12401 for (name, ndata) in self.in_data["nodes"].items())
12403 fn = compat.partial(self._NodesToGroups, node2group,
12404 self.in_data["nodegroups"])
12406 if self.mode == constants.IALLOCATOR_MODE_RELOC:
12407 assert self.relocate_from is not None
12408 assert self.required_nodes == 1
12410 request_groups = fn(self.relocate_from)
12411 result_groups = fn(rdict["result"])
12413 if result_groups != request_groups:
12414 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12415 " differ from original groups (%s)" %
12416 (utils.CommaJoin(result_groups),
12417 utils.CommaJoin(request_groups)))
12418 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12419 request_groups = fn(self.evac_nodes)
12420 for (instance_name, secnode) in self.result:
12421 result_groups = fn([secnode])
12422 if result_groups != request_groups:
12423 raise errors.OpExecError("Iallocator returned new secondary node"
12424 " '%s' (group '%s') for instance '%s'"
12425 " which is not in original group '%s'" %
12426 (secnode, utils.CommaJoin(result_groups),
12428 utils.CommaJoin(request_groups)))
12430 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12432 self.out_data = rdict
12435 def _NodesToGroups(node2group, groups, nodes):
12436 """Returns a list of unique group names for a list of nodes.
12438 @type node2group: dict
12439 @param node2group: Map from node name to group UUID
12441 @param groups: Group information
12443 @param nodes: Node names
12450 group_uuid = node2group[node]
12452 # Ignore unknown node
12456 group = groups[group_uuid]
12458 # Can't find group, let's use UUID
12459 group_name = group_uuid
12461 group_name = group["name"]
12463 result.add(group_name)
12465 return sorted(result)
12468 class LUTestAllocator(NoHooksLU):
12469 """Run allocator tests.
12471 This LU runs the allocator tests
12474 def CheckPrereq(self):
12475 """Check prerequisites.
12477 This checks the opcode parameters depending on the director and mode test.
12480 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12481 for attr in ["mem_size", "disks", "disk_template",
12482 "os", "tags", "nics", "vcpus"]:
12483 if not hasattr(self.op, attr):
12484 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12485 attr, errors.ECODE_INVAL)
12486 iname = self.cfg.ExpandInstanceName(self.op.name)
12487 if iname is not None:
12488 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12489 iname, errors.ECODE_EXISTS)
12490 if not isinstance(self.op.nics, list):
12491 raise errors.OpPrereqError("Invalid parameter 'nics'",
12492 errors.ECODE_INVAL)
12493 if not isinstance(self.op.disks, list):
12494 raise errors.OpPrereqError("Invalid parameter 'disks'",
12495 errors.ECODE_INVAL)
12496 for row in self.op.disks:
12497 if (not isinstance(row, dict) or
12498 "size" not in row or
12499 not isinstance(row["size"], int) or
12500 "mode" not in row or
12501 row["mode"] not in ['r', 'w']):
12502 raise errors.OpPrereqError("Invalid contents of the 'disks'"
12503 " parameter", errors.ECODE_INVAL)
12504 if self.op.hypervisor is None:
12505 self.op.hypervisor = self.cfg.GetHypervisorType()
12506 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12507 fname = _ExpandInstanceName(self.cfg, self.op.name)
12508 self.op.name = fname
12509 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12510 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12511 if not hasattr(self.op, "evac_nodes"):
12512 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12513 " opcode input", errors.ECODE_INVAL)
12514 elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12515 if self.op.instances:
12516 self.op.instances = _GetWantedInstances(self, self.op.instances)
12518 raise errors.OpPrereqError("Missing instances to relocate",
12519 errors.ECODE_INVAL)
12521 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12522 self.op.mode, errors.ECODE_INVAL)
12524 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12525 if self.op.allocator is None:
12526 raise errors.OpPrereqError("Missing allocator name",
12527 errors.ECODE_INVAL)
12528 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12529 raise errors.OpPrereqError("Wrong allocator test '%s'" %
12530 self.op.direction, errors.ECODE_INVAL)
12532 def Exec(self, feedback_fn):
12533 """Run the allocator test.
12536 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12537 ial = IAllocator(self.cfg, self.rpc,
12540 mem_size=self.op.mem_size,
12541 disks=self.op.disks,
12542 disk_template=self.op.disk_template,
12546 vcpus=self.op.vcpus,
12547 hypervisor=self.op.hypervisor,
12549 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12550 ial = IAllocator(self.cfg, self.rpc,
12553 relocate_from=list(self.relocate_from),
12555 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12556 ial = IAllocator(self.cfg, self.rpc,
12558 evac_nodes=self.op.evac_nodes)
12559 elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12560 ial = IAllocator(self.cfg, self.rpc,
12562 instances=self.op.instances,
12563 reloc_mode=self.op.reloc_mode,
12564 target_groups=self.op.target_groups)
12566 raise errors.ProgrammerError("Uncatched mode %s in"
12567 " LUTestAllocator.Exec", self.op.mode)
12569 if self.op.direction == constants.IALLOCATOR_DIR_IN:
12570 result = ial.in_text
12572 ial.Run(self.op.allocator, validate=False)
12573 result = ial.out_text
12577 #: Query type implementations
12579 constants.QR_INSTANCE: _InstanceQuery,
12580 constants.QR_NODE: _NodeQuery,
12581 constants.QR_GROUP: _GroupQuery,
12582 constants.QR_OS: _OsQuery,
12585 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12588 def _GetQueryImplementation(name):
12589 """Returns the implemtnation for a query type.
12591 @param name: Query type, must be one of L{constants.QR_VIA_OP}
12595 return _QUERY_IMPL[name]
12597 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12598 errors.ECODE_INVAL)