4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
62 import ganeti.masterd.instance # pylint: disable-msg=W0611
65 def _SupportsOob(cfg, node):
66 """Tells if node supports OOB.
68 @type cfg: L{config.ConfigWriter}
69 @param cfg: The cluster configuration
70 @type node: L{objects.Node}
72 @return: The OOB script if supported or an empty string otherwise
75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
134 self.context = context
136 # Dicts used to declare locking needs to mcpu
137 self.needed_locks = None
138 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
140 self.remove_locks = {}
141 # Used to force good behavior when calling helper functions
142 self.recalculate_locks = {}
144 self.Log = processor.Log # pylint: disable-msg=C0103
145 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148 # support for dry-run
149 self.dry_run_result = None
150 # support for generic debug attribute
151 if (not hasattr(self.op, "debug_level") or
152 not isinstance(self.op.debug_level, int)):
153 self.op.debug_level = 0
158 # Validate opcode parameters and set defaults
159 self.op.Validate(True)
161 self.CheckArguments()
163 def CheckArguments(self):
164 """Check syntactic validity for the opcode arguments.
166 This method is for doing a simple syntactic check and ensure
167 validity of opcode parameters, without any cluster-related
168 checks. While the same can be accomplished in ExpandNames and/or
169 CheckPrereq, doing these separate is better because:
171 - ExpandNames is left as as purely a lock-related function
172 - CheckPrereq is run after we have acquired locks (and possible
175 The function is allowed to change the self.op attribute so that
176 later methods can no longer worry about missing parameters.
181 def ExpandNames(self):
182 """Expand names for this LU.
184 This method is called before starting to execute the opcode, and it should
185 update all the parameters of the opcode to their canonical form (e.g. a
186 short node name must be fully expanded after this method has successfully
187 completed). This way locking, hooks, logging, etc. can work correctly.
189 LUs which implement this method must also populate the self.needed_locks
190 member, as a dict with lock levels as keys, and a list of needed lock names
193 - use an empty dict if you don't need any lock
194 - if you don't need any lock at a particular level omit that level
195 - don't put anything for the BGL level
196 - if you want all locks at a level use locking.ALL_SET as a value
198 If you need to share locks (rather than acquire them exclusively) at one
199 level you can modify self.share_locks, setting a true value (usually 1) for
200 that level. By default locks are not shared.
202 This function can also define a list of tasklets, which then will be
203 executed in order instead of the usual LU-level CheckPrereq and Exec
204 functions, if those are not defined by the LU.
208 # Acquire all nodes and one instance
209 self.needed_locks = {
210 locking.LEVEL_NODE: locking.ALL_SET,
211 locking.LEVEL_INSTANCE: ['instance1.example.com'],
213 # Acquire just two nodes
214 self.needed_locks = {
215 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218 self.needed_locks = {} # No, you can't leave it to the default value None
221 # The implementation of this method is mandatory only if the new LU is
222 # concurrent, so that old LUs don't need to be changed all at the same
225 self.needed_locks = {} # Exclusive LUs don't need locks.
227 raise NotImplementedError
229 def DeclareLocks(self, level):
230 """Declare LU locking needs for a level
232 While most LUs can just declare their locking needs at ExpandNames time,
233 sometimes there's the need to calculate some locks after having acquired
234 the ones before. This function is called just before acquiring locks at a
235 particular level, but after acquiring the ones at lower levels, and permits
236 such calculations. It can be used to modify self.needed_locks, and by
237 default it does nothing.
239 This function is only called if you have something already set in
240 self.needed_locks for the level.
242 @param level: Locking level which is going to be locked
243 @type level: member of ganeti.locking.LEVELS
247 def CheckPrereq(self):
248 """Check prerequisites for this LU.
250 This method should check that the prerequisites for the execution
251 of this LU are fulfilled. It can do internode communication, but
252 it should be idempotent - no cluster or system changes are
255 The method should raise errors.OpPrereqError in case something is
256 not fulfilled. Its return value is ignored.
258 This method should also update all the parameters of the opcode to
259 their canonical form if it hasn't been done by ExpandNames before.
262 if self.tasklets is not None:
263 for (idx, tl) in enumerate(self.tasklets):
264 logging.debug("Checking prerequisites for tasklet %s/%s",
265 idx + 1, len(self.tasklets))
270 def Exec(self, feedback_fn):
273 This method should implement the actual work. It should raise
274 errors.OpExecError for failures that are somewhat dealt with in
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283 raise NotImplementedError
285 def BuildHooksEnv(self):
286 """Build hooks environment for this LU.
289 @return: Dictionary containing the environment that will be used for
290 running the hooks for this LU. The keys of the dict must not be prefixed
291 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292 will extend the environment with additional variables. If no environment
293 should be defined, an empty dictionary should be returned (not C{None}).
294 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
298 raise NotImplementedError
300 def BuildHooksNodes(self):
301 """Build list of nodes to run LU's hooks.
303 @rtype: tuple; (list, list)
304 @return: Tuple containing a list of node names on which the hook
305 should run before the execution and a list of node names on which the
306 hook should run after the execution. No nodes should be returned as an
307 empty list (and not None).
308 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
312 raise NotImplementedError
314 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315 """Notify the LU about the results of its hooks.
317 This method is called every time a hooks phase is executed, and notifies
318 the Logical Unit about the hooks' result. The LU can then use it to alter
319 its result based on the hooks. By default the method does nothing and the
320 previous result is passed back unchanged but any LU can define it if it
321 wants to use the local cluster hook-scripts somehow.
323 @param phase: one of L{constants.HOOKS_PHASE_POST} or
324 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325 @param hook_results: the results of the multi-node hooks rpc call
326 @param feedback_fn: function used send feedback back to the caller
327 @param lu_result: the previous Exec result this LU had, or None
329 @return: the new Exec result, based on the previous result
333 # API must be kept, thus we ignore the unused argument and could
334 # be a function warnings
335 # pylint: disable-msg=W0613,R0201
338 def _ExpandAndLockInstance(self):
339 """Helper function to expand and lock an instance.
341 Many LUs that work on an instance take its name in self.op.instance_name
342 and need to expand it and then declare the expanded name for locking. This
343 function does it, and then updates self.op.instance_name to the expanded
344 name. It also initializes needed_locks as a dict, if this hasn't been done
348 if self.needed_locks is None:
349 self.needed_locks = {}
351 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352 "_ExpandAndLockInstance called with instance-level locks set"
353 self.op.instance_name = _ExpandInstanceName(self.cfg,
354 self.op.instance_name)
355 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
357 def _LockInstancesNodes(self, primary_only=False):
358 """Helper function to declare instances' nodes for locking.
360 This function should be called after locking one or more instances to lock
361 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362 with all primary or secondary nodes for instances already locked and
363 present in self.needed_locks[locking.LEVEL_INSTANCE].
365 It should be called from DeclareLocks, and for safety only works if
366 self.recalculate_locks[locking.LEVEL_NODE] is set.
368 In the future it may grow parameters to just lock some instance's nodes, or
369 to just lock primaries or secondary nodes, if needed.
371 If should be called in DeclareLocks in a way similar to::
373 if level == locking.LEVEL_NODE:
374 self._LockInstancesNodes()
376 @type primary_only: boolean
377 @param primary_only: only lock primary nodes of locked instances
380 assert locking.LEVEL_NODE in self.recalculate_locks, \
381 "_LockInstancesNodes helper function called with no nodes to recalculate"
383 # TODO: check if we're really been called with the instance locks held
385 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386 # future we might want to have different behaviors depending on the value
387 # of self.recalculate_locks[locking.LEVEL_NODE]
389 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390 instance = self.context.cfg.GetInstanceInfo(instance_name)
391 wanted_nodes.append(instance.primary_node)
393 wanted_nodes.extend(instance.secondary_nodes)
395 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
400 del self.recalculate_locks[locking.LEVEL_NODE]
403 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404 """Simple LU which runs no hooks.
406 This LU is intended as a parent for other LogicalUnits which will
407 run no hooks, in order to reduce duplicate code.
413 def BuildHooksEnv(self):
414 """Empty BuildHooksEnv for NoHooksLu.
416 This just raises an error.
419 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
421 def BuildHooksNodes(self):
422 """Empty BuildHooksNodes for NoHooksLU.
425 raise AssertionError("BuildHooksNodes called for NoHooksLU")
429 """Tasklet base class.
431 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432 they can mix legacy code with tasklets. Locking needs to be done in the LU,
433 tasklets know nothing about locks.
435 Subclasses must follow these rules:
436 - Implement CheckPrereq
440 def __init__(self, lu):
447 def CheckPrereq(self):
448 """Check prerequisites for this tasklets.
450 This method should check whether the prerequisites for the execution of
451 this tasklet are fulfilled. It can do internode communication, but it
452 should be idempotent - no cluster or system changes are allowed.
454 The method should raise errors.OpPrereqError in case something is not
455 fulfilled. Its return value is ignored.
457 This method should also update all parameters to their canonical form if it
458 hasn't been done before.
463 def Exec(self, feedback_fn):
464 """Execute the tasklet.
466 This method should implement the actual work. It should raise
467 errors.OpExecError for failures that are somewhat dealt with in code, or
471 raise NotImplementedError
475 """Base for query utility classes.
478 #: Attribute holding field definitions
481 def __init__(self, filter_, fields, use_locking):
482 """Initializes this class.
485 self.use_locking = use_locking
487 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
489 self.requested_data = self.query.RequestedData()
490 self.names = self.query.RequestedNames()
492 # Sort only if no names were requested
493 self.sort_by_name = not self.names
495 self.do_locking = None
498 def _GetNames(self, lu, all_names, lock_level):
499 """Helper function to determine names asked for in the query.
503 names = lu.glm.list_owned(lock_level)
507 if self.wanted == locking.ALL_SET:
508 assert not self.names
509 # caller didn't specify names, so ordering is not important
510 return utils.NiceSort(names)
512 # caller specified names and we must keep the same order
514 assert not self.do_locking or lu.glm.is_owned(lock_level)
516 missing = set(self.wanted).difference(names)
518 raise errors.OpExecError("Some items were removed before retrieving"
519 " their data: %s" % missing)
521 # Return expanded names
524 def ExpandNames(self, lu):
525 """Expand names for this query.
527 See L{LogicalUnit.ExpandNames}.
530 raise NotImplementedError()
532 def DeclareLocks(self, lu, level):
533 """Declare locks for this query.
535 See L{LogicalUnit.DeclareLocks}.
538 raise NotImplementedError()
540 def _GetQueryData(self, lu):
541 """Collects all data for this query.
543 @return: Query data object
546 raise NotImplementedError()
548 def NewStyleQuery(self, lu):
549 """Collect data and execute query.
552 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553 sort_by_name=self.sort_by_name)
555 def OldStyleQuery(self, lu):
556 """Collect data and execute query.
559 return self.query.OldStyleQuery(self._GetQueryData(lu),
560 sort_by_name=self.sort_by_name)
563 def _GetWantedNodes(lu, nodes):
564 """Returns list of checked and expanded node names.
566 @type lu: L{LogicalUnit}
567 @param lu: the logical unit on whose behalf we execute
569 @param nodes: list of node names or None for all nodes
571 @return: the list of nodes, sorted
572 @raise errors.ProgrammerError: if the nodes parameter is wrong type
576 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
578 return utils.NiceSort(lu.cfg.GetNodeList())
581 def _GetWantedInstances(lu, instances):
582 """Returns list of checked and expanded instance names.
584 @type lu: L{LogicalUnit}
585 @param lu: the logical unit on whose behalf we execute
586 @type instances: list
587 @param instances: list of instance names or None for all instances
589 @return: the list of instances, sorted
590 @raise errors.OpPrereqError: if the instances parameter is wrong type
591 @raise errors.OpPrereqError: if any of the passed instances is not found
595 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
597 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
601 def _GetUpdatedParams(old_params, update_dict,
602 use_default=True, use_none=False):
603 """Return the new version of a parameter dictionary.
605 @type old_params: dict
606 @param old_params: old parameters
607 @type update_dict: dict
608 @param update_dict: dict containing new parameter values, or
609 constants.VALUE_DEFAULT to reset the parameter to its default
611 @param use_default: boolean
612 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613 values as 'to be deleted' values
614 @param use_none: boolean
615 @type use_none: whether to recognise C{None} values as 'to be
618 @return: the new parameter dictionary
621 params_copy = copy.deepcopy(old_params)
622 for key, val in update_dict.iteritems():
623 if ((use_default and val == constants.VALUE_DEFAULT) or
624 (use_none and val is None)):
630 params_copy[key] = val
634 def _ReleaseLocks(lu, level, names=None, keep=None):
635 """Releases locks owned by an LU.
637 @type lu: L{LogicalUnit}
638 @param level: Lock level
639 @type names: list or None
640 @param names: Names of locks to release
641 @type keep: list or None
642 @param keep: Names of locks to retain
645 assert not (keep is not None and names is not None), \
646 "Only one of the 'names' and the 'keep' parameters can be given"
648 if names is not None:
649 should_release = names.__contains__
651 should_release = lambda name: name not in keep
653 should_release = None
659 # Determine which locks to release
660 for name in lu.glm.list_owned(level):
661 if should_release(name):
666 assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
668 # Release just some locks
669 lu.glm.release(level, names=release)
671 assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
674 lu.glm.release(level)
676 assert not lu.glm.is_owned(level), "No locks should be owned"
679 def _RunPostHook(lu, node_name):
680 """Runs the post-hook for an opcode on a single node.
683 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
685 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
687 # pylint: disable-msg=W0702
688 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
691 def _CheckOutputFields(static, dynamic, selected):
692 """Checks whether all selected fields are valid.
694 @type static: L{utils.FieldSet}
695 @param static: static fields set
696 @type dynamic: L{utils.FieldSet}
697 @param dynamic: dynamic fields set
704 delta = f.NonMatching(selected)
706 raise errors.OpPrereqError("Unknown output fields selected: %s"
707 % ",".join(delta), errors.ECODE_INVAL)
710 def _CheckGlobalHvParams(params):
711 """Validates that given hypervisor params are not global ones.
713 This will ensure that instances don't get customised versions of
717 used_globals = constants.HVC_GLOBALS.intersection(params)
719 msg = ("The following hypervisor parameters are global and cannot"
720 " be customized at instance level, please modify them at"
721 " cluster level: %s" % utils.CommaJoin(used_globals))
722 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
725 def _CheckNodeOnline(lu, node, msg=None):
726 """Ensure that a given node is online.
728 @param lu: the LU on behalf of which we make the check
729 @param node: the node to check
730 @param msg: if passed, should be a message to replace the default one
731 @raise errors.OpPrereqError: if the node is offline
735 msg = "Can't use offline node"
736 if lu.cfg.GetNodeInfo(node).offline:
737 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
740 def _CheckNodeNotDrained(lu, node):
741 """Ensure that a given node is not drained.
743 @param lu: the LU on behalf of which we make the check
744 @param node: the node to check
745 @raise errors.OpPrereqError: if the node is drained
748 if lu.cfg.GetNodeInfo(node).drained:
749 raise errors.OpPrereqError("Can't use drained node %s" % node,
753 def _CheckNodeVmCapable(lu, node):
754 """Ensure that a given node is vm capable.
756 @param lu: the LU on behalf of which we make the check
757 @param node: the node to check
758 @raise errors.OpPrereqError: if the node is not vm capable
761 if not lu.cfg.GetNodeInfo(node).vm_capable:
762 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
766 def _CheckNodeHasOS(lu, node, os_name, force_variant):
767 """Ensure that a node supports a given OS.
769 @param lu: the LU on behalf of which we make the check
770 @param node: the node to check
771 @param os_name: the OS to query about
772 @param force_variant: whether to ignore variant errors
773 @raise errors.OpPrereqError: if the node is not supporting the OS
776 result = lu.rpc.call_os_get(node, os_name)
777 result.Raise("OS '%s' not in supported OS list for node %s" %
779 prereq=True, ecode=errors.ECODE_INVAL)
780 if not force_variant:
781 _CheckOSVariant(result.payload, os_name)
784 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785 """Ensure that a node has the given secondary ip.
787 @type lu: L{LogicalUnit}
788 @param lu: the LU on behalf of which we make the check
790 @param node: the node to check
791 @type secondary_ip: string
792 @param secondary_ip: the ip to check
793 @type prereq: boolean
794 @param prereq: whether to throw a prerequisite or an execute error
795 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
799 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800 result.Raise("Failure checking secondary ip on node %s" % node,
801 prereq=prereq, ecode=errors.ECODE_ENVIRON)
802 if not result.payload:
803 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804 " please fix and re-run this command" % secondary_ip)
806 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
808 raise errors.OpExecError(msg)
811 def _GetClusterDomainSecret():
812 """Reads the cluster domain secret.
815 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
819 def _CheckInstanceDown(lu, instance, reason):
820 """Ensure that an instance is not running."""
821 if instance.admin_up:
822 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823 (instance.name, reason), errors.ECODE_STATE)
825 pnode = instance.primary_node
826 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827 ins_l.Raise("Can't contact node %s for instance information" % pnode,
828 prereq=True, ecode=errors.ECODE_ENVIRON)
830 if instance.name in ins_l.payload:
831 raise errors.OpPrereqError("Instance %s is running, %s" %
832 (instance.name, reason), errors.ECODE_STATE)
835 def _ExpandItemName(fn, name, kind):
836 """Expand an item name.
838 @param fn: the function to use for expansion
839 @param name: requested item name
840 @param kind: text description ('Node' or 'Instance')
841 @return: the resolved (full) name
842 @raise errors.OpPrereqError: if the item is not found
846 if full_name is None:
847 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
852 def _ExpandNodeName(cfg, name):
853 """Wrapper over L{_ExpandItemName} for nodes."""
854 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
857 def _ExpandInstanceName(cfg, name):
858 """Wrapper over L{_ExpandItemName} for instance."""
859 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
862 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863 memory, vcpus, nics, disk_template, disks,
864 bep, hvp, hypervisor_name, tags):
865 """Builds instance related env variables for hooks
867 This builds the hook environment from individual variables.
870 @param name: the name of the instance
871 @type primary_node: string
872 @param primary_node: the name of the instance's primary node
873 @type secondary_nodes: list
874 @param secondary_nodes: list of secondary nodes as strings
875 @type os_type: string
876 @param os_type: the name of the instance's OS
877 @type status: boolean
878 @param status: the should_run status of the instance
880 @param memory: the memory size of the instance
882 @param vcpus: the count of VCPUs the instance has
884 @param nics: list of tuples (ip, mac, mode, link) representing
885 the NICs the instance has
886 @type disk_template: string
887 @param disk_template: the disk template of the instance
889 @param disks: the list of (size, mode) pairs
891 @param bep: the backend parameters for the instance
893 @param hvp: the hypervisor parameters for the instance
894 @type hypervisor_name: string
895 @param hypervisor_name: the hypervisor for the instance
897 @param tags: list of instance tags as strings
899 @return: the hook environment for this instance
908 "INSTANCE_NAME": name,
909 "INSTANCE_PRIMARY": primary_node,
910 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
911 "INSTANCE_OS_TYPE": os_type,
912 "INSTANCE_STATUS": str_status,
913 "INSTANCE_MEMORY": memory,
914 "INSTANCE_VCPUS": vcpus,
915 "INSTANCE_DISK_TEMPLATE": disk_template,
916 "INSTANCE_HYPERVISOR": hypervisor_name,
920 nic_count = len(nics)
921 for idx, (ip, mac, mode, link) in enumerate(nics):
924 env["INSTANCE_NIC%d_IP" % idx] = ip
925 env["INSTANCE_NIC%d_MAC" % idx] = mac
926 env["INSTANCE_NIC%d_MODE" % idx] = mode
927 env["INSTANCE_NIC%d_LINK" % idx] = link
928 if mode == constants.NIC_MODE_BRIDGED:
929 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
933 env["INSTANCE_NIC_COUNT"] = nic_count
936 disk_count = len(disks)
937 for idx, (size, mode) in enumerate(disks):
938 env["INSTANCE_DISK%d_SIZE" % idx] = size
939 env["INSTANCE_DISK%d_MODE" % idx] = mode
943 env["INSTANCE_DISK_COUNT"] = disk_count
948 env["INSTANCE_TAGS"] = " ".join(tags)
950 for source, kind in [(bep, "BE"), (hvp, "HV")]:
951 for key, value in source.items():
952 env["INSTANCE_%s_%s" % (kind, key)] = value
957 def _NICListToTuple(lu, nics):
958 """Build a list of nic information tuples.
960 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
961 value in LUInstanceQueryData.
963 @type lu: L{LogicalUnit}
964 @param lu: the logical unit on whose behalf we execute
965 @type nics: list of L{objects.NIC}
966 @param nics: list of nics to convert to hooks tuples
970 cluster = lu.cfg.GetClusterInfo()
974 filled_params = cluster.SimpleFillNIC(nic.nicparams)
975 mode = filled_params[constants.NIC_MODE]
976 link = filled_params[constants.NIC_LINK]
977 hooks_nics.append((ip, mac, mode, link))
981 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
982 """Builds instance related env variables for hooks from an object.
984 @type lu: L{LogicalUnit}
985 @param lu: the logical unit on whose behalf we execute
986 @type instance: L{objects.Instance}
987 @param instance: the instance for which we should build the
990 @param override: dictionary with key/values that will override
993 @return: the hook environment dictionary
996 cluster = lu.cfg.GetClusterInfo()
997 bep = cluster.FillBE(instance)
998 hvp = cluster.FillHV(instance)
1000 'name': instance.name,
1001 'primary_node': instance.primary_node,
1002 'secondary_nodes': instance.secondary_nodes,
1003 'os_type': instance.os,
1004 'status': instance.admin_up,
1005 'memory': bep[constants.BE_MEMORY],
1006 'vcpus': bep[constants.BE_VCPUS],
1007 'nics': _NICListToTuple(lu, instance.nics),
1008 'disk_template': instance.disk_template,
1009 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1012 'hypervisor_name': instance.hypervisor,
1013 'tags': instance.tags,
1016 args.update(override)
1017 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1020 def _AdjustCandidatePool(lu, exceptions):
1021 """Adjust the candidate pool after node operations.
1024 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1026 lu.LogInfo("Promoted nodes to master candidate role: %s",
1027 utils.CommaJoin(node.name for node in mod_list))
1028 for name in mod_list:
1029 lu.context.ReaddNode(name)
1030 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1032 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1036 def _DecideSelfPromotion(lu, exceptions=None):
1037 """Decide whether I should promote myself as a master candidate.
1040 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042 # the new node will increase mc_max with one, so:
1043 mc_should = min(mc_should + 1, cp_size)
1044 return mc_now < mc_should
1047 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048 """Check that the brigdes needed by a list of nics exist.
1051 cluster = lu.cfg.GetClusterInfo()
1052 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053 brlist = [params[constants.NIC_LINK] for params in paramslist
1054 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1056 result = lu.rpc.call_bridges_exist(target_node, brlist)
1057 result.Raise("Error checking bridges on destination node '%s'" %
1058 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1061 def _CheckInstanceBridgesExist(lu, instance, node=None):
1062 """Check that the brigdes needed by an instance exist.
1066 node = instance.primary_node
1067 _CheckNicsBridgesExist(lu, instance.nics, node)
1070 def _CheckOSVariant(os_obj, name):
1071 """Check whether an OS name conforms to the os variants specification.
1073 @type os_obj: L{objects.OS}
1074 @param os_obj: OS object to check
1076 @param name: OS name passed by the user, to check for validity
1079 if not os_obj.supported_variants:
1081 variant = objects.OS.GetVariant(name)
1083 raise errors.OpPrereqError("OS name must include a variant",
1086 if variant not in os_obj.supported_variants:
1087 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1090 def _GetNodeInstancesInner(cfg, fn):
1091 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1094 def _GetNodeInstances(cfg, node_name):
1095 """Returns a list of all primary and secondary instances on a node.
1099 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1102 def _GetNodePrimaryInstances(cfg, node_name):
1103 """Returns primary instances on a node.
1106 return _GetNodeInstancesInner(cfg,
1107 lambda inst: node_name == inst.primary_node)
1110 def _GetNodeSecondaryInstances(cfg, node_name):
1111 """Returns secondary instances on a node.
1114 return _GetNodeInstancesInner(cfg,
1115 lambda inst: node_name in inst.secondary_nodes)
1118 def _GetStorageTypeArgs(cfg, storage_type):
1119 """Returns the arguments for a storage type.
1122 # Special case for file storage
1123 if storage_type == constants.ST_FILE:
1124 # storage.FileStorage wants a list of storage directories
1125 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1130 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1133 for dev in instance.disks:
1134 cfg.SetDiskID(dev, node_name)
1136 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1137 result.Raise("Failed to get disk status from node %s" % node_name,
1138 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1140 for idx, bdev_status in enumerate(result.payload):
1141 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1147 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1148 """Check the sanity of iallocator and node arguments and use the
1149 cluster-wide iallocator if appropriate.
1151 Check that at most one of (iallocator, node) is specified. If none is
1152 specified, then the LU's opcode's iallocator slot is filled with the
1153 cluster-wide default iallocator.
1155 @type iallocator_slot: string
1156 @param iallocator_slot: the name of the opcode iallocator slot
1157 @type node_slot: string
1158 @param node_slot: the name of the opcode target node slot
1161 node = getattr(lu.op, node_slot, None)
1162 iallocator = getattr(lu.op, iallocator_slot, None)
1164 if node is not None and iallocator is not None:
1165 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1167 elif node is None and iallocator is None:
1168 default_iallocator = lu.cfg.GetDefaultIAllocator()
1169 if default_iallocator:
1170 setattr(lu.op, iallocator_slot, default_iallocator)
1172 raise errors.OpPrereqError("No iallocator or node given and no"
1173 " cluster-wide default iallocator found;"
1174 " please specify either an iallocator or a"
1175 " node, or set a cluster-wide default"
1179 class LUClusterPostInit(LogicalUnit):
1180 """Logical unit for running hooks after cluster initialization.
1183 HPATH = "cluster-init"
1184 HTYPE = constants.HTYPE_CLUSTER
1186 def BuildHooksEnv(self):
1191 "OP_TARGET": self.cfg.GetClusterName(),
1194 def BuildHooksNodes(self):
1195 """Build hooks nodes.
1198 return ([], [self.cfg.GetMasterNode()])
1200 def Exec(self, feedback_fn):
1207 class LUClusterDestroy(LogicalUnit):
1208 """Logical unit for destroying the cluster.
1211 HPATH = "cluster-destroy"
1212 HTYPE = constants.HTYPE_CLUSTER
1214 def BuildHooksEnv(self):
1219 "OP_TARGET": self.cfg.GetClusterName(),
1222 def BuildHooksNodes(self):
1223 """Build hooks nodes.
1228 def CheckPrereq(self):
1229 """Check prerequisites.
1231 This checks whether the cluster is empty.
1233 Any errors are signaled by raising errors.OpPrereqError.
1236 master = self.cfg.GetMasterNode()
1238 nodelist = self.cfg.GetNodeList()
1239 if len(nodelist) != 1 or nodelist[0] != master:
1240 raise errors.OpPrereqError("There are still %d node(s) in"
1241 " this cluster." % (len(nodelist) - 1),
1243 instancelist = self.cfg.GetInstanceList()
1245 raise errors.OpPrereqError("There are still %d instance(s) in"
1246 " this cluster." % len(instancelist),
1249 def Exec(self, feedback_fn):
1250 """Destroys the cluster.
1253 master = self.cfg.GetMasterNode()
1255 # Run post hooks on master node before it's removed
1256 _RunPostHook(self, master)
1258 result = self.rpc.call_node_stop_master(master, False)
1259 result.Raise("Could not disable the master role")
1264 def _VerifyCertificate(filename):
1265 """Verifies a certificate for L{LUClusterVerifyConfig}.
1267 @type filename: string
1268 @param filename: Path to PEM file
1272 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273 utils.ReadFile(filename))
1274 except Exception, err: # pylint: disable-msg=W0703
1275 return (LUClusterVerifyConfig.ETYPE_ERROR,
1276 "Failed to load X509 certificate %s: %s" % (filename, err))
1279 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280 constants.SSL_CERT_EXPIRATION_ERROR)
1283 fnamemsg = "While verifying %s: %s" % (filename, msg)
1288 return (None, fnamemsg)
1289 elif errcode == utils.CERT_WARNING:
1290 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1291 elif errcode == utils.CERT_ERROR:
1292 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1294 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1297 def _GetAllHypervisorParameters(cluster, instances):
1298 """Compute the set of all hypervisor parameters.
1300 @type cluster: L{objects.Cluster}
1301 @param cluster: the cluster object
1302 @param instances: list of L{objects.Instance}
1303 @param instances: additional instances from which to obtain parameters
1304 @rtype: list of (origin, hypervisor, parameters)
1305 @return: a list with all parameters found, indicating the hypervisor they
1306 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1311 for hv_name in cluster.enabled_hypervisors:
1312 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1314 for os_name, os_hvp in cluster.os_hvp.items():
1315 for hv_name, hv_params in os_hvp.items():
1317 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1318 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1320 # TODO: collapse identical parameter values in a single one
1321 for instance in instances:
1322 if instance.hvparams:
1323 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1324 cluster.FillHV(instance)))
1329 class _VerifyErrors(object):
1330 """Mix-in for cluster/group verify LUs.
1332 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1333 self.op and self._feedback_fn to be available.)
1336 TCLUSTER = "cluster"
1338 TINSTANCE = "instance"
1340 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1341 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1342 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1343 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1344 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1345 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1346 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1347 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1348 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1349 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1350 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1351 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1352 ENODEDRBD = (TNODE, "ENODEDRBD")
1353 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1354 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1355 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1356 ENODEHV = (TNODE, "ENODEHV")
1357 ENODELVM = (TNODE, "ENODELVM")
1358 ENODEN1 = (TNODE, "ENODEN1")
1359 ENODENET = (TNODE, "ENODENET")
1360 ENODEOS = (TNODE, "ENODEOS")
1361 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1362 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1363 ENODERPC = (TNODE, "ENODERPC")
1364 ENODESSH = (TNODE, "ENODESSH")
1365 ENODEVERSION = (TNODE, "ENODEVERSION")
1366 ENODESETUP = (TNODE, "ENODESETUP")
1367 ENODETIME = (TNODE, "ENODETIME")
1368 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1370 ETYPE_FIELD = "code"
1371 ETYPE_ERROR = "ERROR"
1372 ETYPE_WARNING = "WARNING"
1374 def _Error(self, ecode, item, msg, *args, **kwargs):
1375 """Format an error message.
1377 Based on the opcode's error_codes parameter, either format a
1378 parseable error code, or a simpler error string.
1380 This must be called only from Exec and functions called from Exec.
1383 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1385 # first complete the msg
1388 # then format the whole message
1389 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1390 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1396 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397 # and finally report it via the feedback_fn
1398 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1400 def _ErrorIf(self, cond, *args, **kwargs):
1401 """Log an error message if the passed condition is True.
1405 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1407 self._Error(*args, **kwargs)
1408 # do not mark the operation as failed for WARN cases only
1409 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410 self.bad = self.bad or cond
1413 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1414 """Verifies the cluster config.
1419 def _VerifyHVP(self, hvp_data):
1420 """Verifies locally the syntax of the hypervisor parameters.
1423 for item, hv_name, hv_params in hvp_data:
1424 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1427 hv_class = hypervisor.GetHypervisor(hv_name)
1428 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1429 hv_class.CheckParameterSyntax(hv_params)
1430 except errors.GenericError, err:
1431 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1433 def ExpandNames(self):
1434 # Information can be safely retrieved as the BGL is acquired in exclusive
1436 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1437 self.all_node_info = self.cfg.GetAllNodesInfo()
1438 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1439 self.needed_locks = {}
1441 def Exec(self, feedback_fn):
1442 """Verify integrity of cluster, performing various test on nodes.
1446 self._feedback_fn = feedback_fn
1448 feedback_fn("* Verifying cluster config")
1450 for msg in self.cfg.VerifyConfig():
1451 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1453 feedback_fn("* Verifying cluster certificate files")
1455 for cert_filename in constants.ALL_CERT_FILES:
1456 (errcode, msg) = _VerifyCertificate(cert_filename)
1457 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1459 feedback_fn("* Verifying hypervisor parameters")
1461 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1462 self.all_inst_info.values()))
1464 feedback_fn("* Verifying all nodes belong to an existing group")
1466 # We do this verification here because, should this bogus circumstance
1467 # occur, it would never be caught by VerifyGroup, which only acts on
1468 # nodes/instances reachable from existing node groups.
1470 dangling_nodes = set(node.name for node in self.all_node_info.values()
1471 if node.group not in self.all_group_info)
1473 dangling_instances = {}
1474 no_node_instances = []
1476 for inst in self.all_inst_info.values():
1477 if inst.primary_node in dangling_nodes:
1478 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1479 elif inst.primary_node not in self.all_node_info:
1480 no_node_instances.append(inst.name)
1485 utils.CommaJoin(dangling_instances.get(node.name,
1487 for node in dangling_nodes]
1489 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1490 "the following nodes (and their instances) belong to a non"
1491 " existing group: %s", utils.CommaJoin(pretty_dangling))
1493 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1494 "the following instances have a non-existing primary-node:"
1495 " %s", utils.CommaJoin(no_node_instances))
1497 return (not self.bad, [g.name for g in self.all_group_info.values()])
1500 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1501 """Verifies the status of a node group.
1504 HPATH = "cluster-verify"
1505 HTYPE = constants.HTYPE_CLUSTER
1508 _HOOKS_INDENT_RE = re.compile("^", re.M)
1510 class NodeImage(object):
1511 """A class representing the logical and physical status of a node.
1514 @ivar name: the node name to which this object refers
1515 @ivar volumes: a structure as returned from
1516 L{ganeti.backend.GetVolumeList} (runtime)
1517 @ivar instances: a list of running instances (runtime)
1518 @ivar pinst: list of configured primary instances (config)
1519 @ivar sinst: list of configured secondary instances (config)
1520 @ivar sbp: dictionary of {primary-node: list of instances} for all
1521 instances for which this node is secondary (config)
1522 @ivar mfree: free memory, as reported by hypervisor (runtime)
1523 @ivar dfree: free disk, as reported by the node (runtime)
1524 @ivar offline: the offline status (config)
1525 @type rpc_fail: boolean
1526 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1527 not whether the individual keys were correct) (runtime)
1528 @type lvm_fail: boolean
1529 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1530 @type hyp_fail: boolean
1531 @ivar hyp_fail: whether the RPC call didn't return the instance list
1532 @type ghost: boolean
1533 @ivar ghost: whether this is a known node or not (config)
1534 @type os_fail: boolean
1535 @ivar os_fail: whether the RPC call didn't return valid OS data
1537 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1538 @type vm_capable: boolean
1539 @ivar vm_capable: whether the node can host instances
1542 def __init__(self, offline=False, name=None, vm_capable=True):
1551 self.offline = offline
1552 self.vm_capable = vm_capable
1553 self.rpc_fail = False
1554 self.lvm_fail = False
1555 self.hyp_fail = False
1557 self.os_fail = False
1560 def ExpandNames(self):
1561 # This raises errors.OpPrereqError on its own:
1562 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1564 # Get instances in node group; this is unsafe and needs verification later
1565 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1567 self.needed_locks = {
1568 locking.LEVEL_INSTANCE: inst_names,
1569 locking.LEVEL_NODEGROUP: [self.group_uuid],
1570 locking.LEVEL_NODE: [],
1573 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1575 def DeclareLocks(self, level):
1576 if level == locking.LEVEL_NODE:
1577 # Get members of node group; this is unsafe and needs verification later
1578 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1580 all_inst_info = self.cfg.GetAllInstancesInfo()
1582 # In Exec(), we warn about mirrored instances that have primary and
1583 # secondary living in separate node groups. To fully verify that
1584 # volumes for these instances are healthy, we will need to do an
1585 # extra call to their secondaries. We ensure here those nodes will
1587 for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1588 # Important: access only the instances whose lock is owned
1589 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1590 nodes.update(all_inst_info[inst].secondary_nodes)
1592 self.needed_locks[locking.LEVEL_NODE] = nodes
1594 def CheckPrereq(self):
1595 group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1596 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1599 group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1601 unlocked_instances = \
1602 group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1605 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1606 utils.CommaJoin(unlocked_nodes))
1608 if unlocked_instances:
1609 raise errors.OpPrereqError("Missing lock for instances: %s" %
1610 utils.CommaJoin(unlocked_instances))
1612 self.all_node_info = self.cfg.GetAllNodesInfo()
1613 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1615 self.my_node_names = utils.NiceSort(group_nodes)
1616 self.my_inst_names = utils.NiceSort(group_instances)
1618 self.my_node_info = dict((name, self.all_node_info[name])
1619 for name in self.my_node_names)
1621 self.my_inst_info = dict((name, self.all_inst_info[name])
1622 for name in self.my_inst_names)
1624 # We detect here the nodes that will need the extra RPC calls for verifying
1625 # split LV volumes; they should be locked.
1626 extra_lv_nodes = set()
1628 for inst in self.my_inst_info.values():
1629 if inst.disk_template in constants.DTS_INT_MIRROR:
1630 group = self.my_node_info[inst.primary_node].group
1631 for nname in inst.secondary_nodes:
1632 if self.all_node_info[nname].group != group:
1633 extra_lv_nodes.add(nname)
1635 unlocked_lv_nodes = \
1636 extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1638 if unlocked_lv_nodes:
1639 raise errors.OpPrereqError("these nodes could be locked: %s" %
1640 utils.CommaJoin(unlocked_lv_nodes))
1641 self.extra_lv_nodes = list(extra_lv_nodes)
1643 def _VerifyNode(self, ninfo, nresult):
1644 """Perform some basic validation on data returned from a node.
1646 - check the result data structure is well formed and has all the
1648 - check ganeti version
1650 @type ninfo: L{objects.Node}
1651 @param ninfo: the node to check
1652 @param nresult: the results from the node
1654 @return: whether overall this call was successful (and we can expect
1655 reasonable values in the respose)
1659 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661 # main result, nresult should be a non-empty dict
1662 test = not nresult or not isinstance(nresult, dict)
1663 _ErrorIf(test, self.ENODERPC, node,
1664 "unable to verify node: no data returned")
1668 # compares ganeti version
1669 local_version = constants.PROTOCOL_VERSION
1670 remote_version = nresult.get("version", None)
1671 test = not (remote_version and
1672 isinstance(remote_version, (list, tuple)) and
1673 len(remote_version) == 2)
1674 _ErrorIf(test, self.ENODERPC, node,
1675 "connection to node returned invalid data")
1679 test = local_version != remote_version[0]
1680 _ErrorIf(test, self.ENODEVERSION, node,
1681 "incompatible protocol versions: master %s,"
1682 " node %s", local_version, remote_version[0])
1686 # node seems compatible, we can actually try to look into its results
1688 # full package version
1689 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1690 self.ENODEVERSION, node,
1691 "software version mismatch: master %s, node %s",
1692 constants.RELEASE_VERSION, remote_version[1],
1693 code=self.ETYPE_WARNING)
1695 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1696 if ninfo.vm_capable and isinstance(hyp_result, dict):
1697 for hv_name, hv_result in hyp_result.iteritems():
1698 test = hv_result is not None
1699 _ErrorIf(test, self.ENODEHV, node,
1700 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1702 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1703 if ninfo.vm_capable and isinstance(hvp_result, list):
1704 for item, hv_name, hv_result in hvp_result:
1705 _ErrorIf(True, self.ENODEHV, node,
1706 "hypervisor %s parameter verify failure (source %s): %s",
1707 hv_name, item, hv_result)
1709 test = nresult.get(constants.NV_NODESETUP,
1710 ["Missing NODESETUP results"])
1711 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1716 def _VerifyNodeTime(self, ninfo, nresult,
1717 nvinfo_starttime, nvinfo_endtime):
1718 """Check the node time.
1720 @type ninfo: L{objects.Node}
1721 @param ninfo: the node to check
1722 @param nresult: the remote results for the node
1723 @param nvinfo_starttime: the start time of the RPC call
1724 @param nvinfo_endtime: the end time of the RPC call
1728 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730 ntime = nresult.get(constants.NV_TIME, None)
1732 ntime_merged = utils.MergeTime(ntime)
1733 except (ValueError, TypeError):
1734 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1737 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1738 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1739 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1740 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1744 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1745 "Node time diverges by at least %s from master node time",
1748 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1749 """Check the node LVM results.
1751 @type ninfo: L{objects.Node}
1752 @param ninfo: the node to check
1753 @param nresult: the remote results for the node
1754 @param vg_name: the configured VG name
1761 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1763 # checks vg existence and size > 20G
1764 vglist = nresult.get(constants.NV_VGLIST, None)
1766 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1768 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1769 constants.MIN_VG_SIZE)
1770 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1773 pvlist = nresult.get(constants.NV_PVLIST, None)
1774 test = pvlist is None
1775 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1777 # check that ':' is not present in PV names, since it's a
1778 # special character for lvcreate (denotes the range of PEs to
1780 for _, pvname, owner_vg in pvlist:
1781 test = ":" in pvname
1782 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1783 " '%s' of VG '%s'", pvname, owner_vg)
1785 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1786 """Check the node bridges.
1788 @type ninfo: L{objects.Node}
1789 @param ninfo: the node to check
1790 @param nresult: the remote results for the node
1791 @param bridges: the expected list of bridges
1798 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1800 missing = nresult.get(constants.NV_BRIDGES, None)
1801 test = not isinstance(missing, list)
1802 _ErrorIf(test, self.ENODENET, node,
1803 "did not return valid bridge information")
1805 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1806 utils.CommaJoin(sorted(missing)))
1808 def _VerifyNodeNetwork(self, ninfo, nresult):
1809 """Check the node network connectivity results.
1811 @type ninfo: L{objects.Node}
1812 @param ninfo: the node to check
1813 @param nresult: the remote results for the node
1817 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1819 test = constants.NV_NODELIST not in nresult
1820 _ErrorIf(test, self.ENODESSH, node,
1821 "node hasn't returned node ssh connectivity data")
1823 if nresult[constants.NV_NODELIST]:
1824 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1825 _ErrorIf(True, self.ENODESSH, node,
1826 "ssh communication with node '%s': %s", a_node, a_msg)
1828 test = constants.NV_NODENETTEST not in nresult
1829 _ErrorIf(test, self.ENODENET, node,
1830 "node hasn't returned node tcp connectivity data")
1832 if nresult[constants.NV_NODENETTEST]:
1833 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1835 _ErrorIf(True, self.ENODENET, node,
1836 "tcp communication with node '%s': %s",
1837 anode, nresult[constants.NV_NODENETTEST][anode])
1839 test = constants.NV_MASTERIP not in nresult
1840 _ErrorIf(test, self.ENODENET, node,
1841 "node hasn't returned node master IP reachability data")
1843 if not nresult[constants.NV_MASTERIP]:
1844 if node == self.master_node:
1845 msg = "the master node cannot reach the master IP (not configured?)"
1847 msg = "cannot reach the master IP"
1848 _ErrorIf(True, self.ENODENET, node, msg)
1850 def _VerifyInstance(self, instance, instanceconfig, node_image,
1852 """Verify an instance.
1854 This function checks to see if the required block devices are
1855 available on the instance's node.
1858 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1859 node_current = instanceconfig.primary_node
1861 node_vol_should = {}
1862 instanceconfig.MapLVsByNode(node_vol_should)
1864 for node in node_vol_should:
1865 n_img = node_image[node]
1866 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1867 # ignore missing volumes on offline or broken nodes
1869 for volume in node_vol_should[node]:
1870 test = volume not in n_img.volumes
1871 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1872 "volume %s missing on node %s", volume, node)
1874 if instanceconfig.admin_up:
1875 pri_img = node_image[node_current]
1876 test = instance not in pri_img.instances and not pri_img.offline
1877 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1878 "instance not running on its primary node %s",
1881 diskdata = [(nname, success, status, idx)
1882 for (nname, disks) in diskstatus.items()
1883 for idx, (success, status) in enumerate(disks)]
1885 for nname, success, bdev_status, idx in diskdata:
1886 # the 'ghost node' construction in Exec() ensures that we have a
1888 snode = node_image[nname]
1889 bad_snode = snode.ghost or snode.offline
1890 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1891 self.EINSTANCEFAULTYDISK, instance,
1892 "couldn't retrieve status for disk/%s on %s: %s",
1893 idx, nname, bdev_status)
1894 _ErrorIf((instanceconfig.admin_up and success and
1895 bdev_status.ldisk_status == constants.LDS_FAULTY),
1896 self.EINSTANCEFAULTYDISK, instance,
1897 "disk/%s on %s is faulty", idx, nname)
1899 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1900 """Verify if there are any unknown volumes in the cluster.
1902 The .os, .swap and backup volumes are ignored. All other volumes are
1903 reported as unknown.
1905 @type reserved: L{ganeti.utils.FieldSet}
1906 @param reserved: a FieldSet of reserved volume names
1909 for node, n_img in node_image.items():
1910 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1911 # skip non-healthy nodes
1913 for volume in n_img.volumes:
1914 test = ((node not in node_vol_should or
1915 volume not in node_vol_should[node]) and
1916 not reserved.Matches(volume))
1917 self._ErrorIf(test, self.ENODEORPHANLV, node,
1918 "volume %s is unknown", volume)
1920 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1921 """Verify N+1 Memory Resilience.
1923 Check that if one single node dies we can still start all the
1924 instances it was primary for.
1927 cluster_info = self.cfg.GetClusterInfo()
1928 for node, n_img in node_image.items():
1929 # This code checks that every node which is now listed as
1930 # secondary has enough memory to host all instances it is
1931 # supposed to should a single other node in the cluster fail.
1932 # FIXME: not ready for failover to an arbitrary node
1933 # FIXME: does not support file-backed instances
1934 # WARNING: we currently take into account down instances as well
1935 # as up ones, considering that even if they're down someone
1936 # might want to start them even in the event of a node failure.
1938 # we're skipping offline nodes from the N+1 warning, since
1939 # most likely we don't have good memory infromation from them;
1940 # we already list instances living on such nodes, and that's
1943 for prinode, instances in n_img.sbp.items():
1945 for instance in instances:
1946 bep = cluster_info.FillBE(instance_cfg[instance])
1947 if bep[constants.BE_AUTO_BALANCE]:
1948 needed_mem += bep[constants.BE_MEMORY]
1949 test = n_img.mfree < needed_mem
1950 self._ErrorIf(test, self.ENODEN1, node,
1951 "not enough memory to accomodate instance failovers"
1952 " should node %s fail (%dMiB needed, %dMiB available)",
1953 prinode, needed_mem, n_img.mfree)
1956 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1957 (files_all, files_all_opt, files_mc, files_vm)):
1958 """Verifies file checksums collected from all nodes.
1960 @param errorif: Callback for reporting errors
1961 @param nodeinfo: List of L{objects.Node} objects
1962 @param master_node: Name of master node
1963 @param all_nvinfo: RPC results
1966 node_names = frozenset(node.name for node in nodeinfo)
1968 assert master_node in node_names
1969 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1970 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1971 "Found file listed in more than one file list"
1973 # Define functions determining which nodes to consider for a file
1974 file2nodefn = dict([(filename, fn)
1975 for (files, fn) in [(files_all, None),
1976 (files_all_opt, None),
1977 (files_mc, lambda node: (node.master_candidate or
1978 node.name == master_node)),
1979 (files_vm, lambda node: node.vm_capable)]
1980 for filename in files])
1982 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1984 for node in nodeinfo:
1985 nresult = all_nvinfo[node.name]
1987 if nresult.fail_msg or not nresult.payload:
1990 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1992 test = not (node_files and isinstance(node_files, dict))
1993 errorif(test, cls.ENODEFILECHECK, node.name,
1994 "Node did not return file checksum data")
1998 for (filename, checksum) in node_files.items():
1999 # Check if the file should be considered for a node
2000 fn = file2nodefn[filename]
2001 if fn is None or fn(node):
2002 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2004 for (filename, checksums) in fileinfo.items():
2005 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2007 # Nodes having the file
2008 with_file = frozenset(node_name
2009 for nodes in fileinfo[filename].values()
2010 for node_name in nodes)
2012 # Nodes missing file
2013 missing_file = node_names - with_file
2015 if filename in files_all_opt:
2017 errorif(missing_file and missing_file != node_names,
2018 cls.ECLUSTERFILECHECK, None,
2019 "File %s is optional, but it must exist on all or no"
2020 " nodes (not found on %s)",
2021 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2023 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2024 "File %s is missing from node(s) %s", filename,
2025 utils.CommaJoin(utils.NiceSort(missing_file)))
2027 # See if there are multiple versions of the file
2028 test = len(checksums) > 1
2030 variants = ["variant %s on %s" %
2031 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2032 for (idx, (checksum, nodes)) in
2033 enumerate(sorted(checksums.items()))]
2037 errorif(test, cls.ECLUSTERFILECHECK, None,
2038 "File %s found with %s different checksums (%s)",
2039 filename, len(checksums), "; ".join(variants))
2041 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2043 """Verifies and the node DRBD status.
2045 @type ninfo: L{objects.Node}
2046 @param ninfo: the node to check
2047 @param nresult: the remote results for the node
2048 @param instanceinfo: the dict of instances
2049 @param drbd_helper: the configured DRBD usermode helper
2050 @param drbd_map: the DRBD map as returned by
2051 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2055 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2058 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2059 test = (helper_result == None)
2060 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2061 "no drbd usermode helper returned")
2063 status, payload = helper_result
2065 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2066 "drbd usermode helper check unsuccessful: %s", payload)
2067 test = status and (payload != drbd_helper)
2068 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2069 "wrong drbd usermode helper: %s", payload)
2071 # compute the DRBD minors
2073 for minor, instance in drbd_map[node].items():
2074 test = instance not in instanceinfo
2075 _ErrorIf(test, self.ECLUSTERCFG, None,
2076 "ghost instance '%s' in temporary DRBD map", instance)
2077 # ghost instance should not be running, but otherwise we
2078 # don't give double warnings (both ghost instance and
2079 # unallocated minor in use)
2081 node_drbd[minor] = (instance, False)
2083 instance = instanceinfo[instance]
2084 node_drbd[minor] = (instance.name, instance.admin_up)
2086 # and now check them
2087 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2088 test = not isinstance(used_minors, (tuple, list))
2089 _ErrorIf(test, self.ENODEDRBD, node,
2090 "cannot parse drbd status file: %s", str(used_minors))
2092 # we cannot check drbd status
2095 for minor, (iname, must_exist) in node_drbd.items():
2096 test = minor not in used_minors and must_exist
2097 _ErrorIf(test, self.ENODEDRBD, node,
2098 "drbd minor %d of instance %s is not active", minor, iname)
2099 for minor in used_minors:
2100 test = minor not in node_drbd
2101 _ErrorIf(test, self.ENODEDRBD, node,
2102 "unallocated drbd minor %d is in use", minor)
2104 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2105 """Builds the node OS structures.
2107 @type ninfo: L{objects.Node}
2108 @param ninfo: the node to check
2109 @param nresult: the remote results for the node
2110 @param nimg: the node image object
2114 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2116 remote_os = nresult.get(constants.NV_OSLIST, None)
2117 test = (not isinstance(remote_os, list) or
2118 not compat.all(isinstance(v, list) and len(v) == 7
2119 for v in remote_os))
2121 _ErrorIf(test, self.ENODEOS, node,
2122 "node hasn't returned valid OS data")
2131 for (name, os_path, status, diagnose,
2132 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2134 if name not in os_dict:
2137 # parameters is a list of lists instead of list of tuples due to
2138 # JSON lacking a real tuple type, fix it:
2139 parameters = [tuple(v) for v in parameters]
2140 os_dict[name].append((os_path, status, diagnose,
2141 set(variants), set(parameters), set(api_ver)))
2143 nimg.oslist = os_dict
2145 def _VerifyNodeOS(self, ninfo, nimg, base):
2146 """Verifies the node OS list.
2148 @type ninfo: L{objects.Node}
2149 @param ninfo: the node to check
2150 @param nimg: the node image object
2151 @param base: the 'template' node we match against (e.g. from the master)
2155 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2157 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2159 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2160 for os_name, os_data in nimg.oslist.items():
2161 assert os_data, "Empty OS status for OS %s?!" % os_name
2162 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2163 _ErrorIf(not f_status, self.ENODEOS, node,
2164 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2165 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2166 "OS '%s' has multiple entries (first one shadows the rest): %s",
2167 os_name, utils.CommaJoin([v[0] for v in os_data]))
2168 # this will catched in backend too
2169 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2170 and not f_var, self.ENODEOS, node,
2171 "OS %s with API at least %d does not declare any variant",
2172 os_name, constants.OS_API_V15)
2173 # comparisons with the 'base' image
2174 test = os_name not in base.oslist
2175 _ErrorIf(test, self.ENODEOS, node,
2176 "Extra OS %s not present on reference node (%s)",
2180 assert base.oslist[os_name], "Base node has empty OS status?"
2181 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2183 # base OS is invalid, skipping
2185 for kind, a, b in [("API version", f_api, b_api),
2186 ("variants list", f_var, b_var),
2187 ("parameters", beautify_params(f_param),
2188 beautify_params(b_param))]:
2189 _ErrorIf(a != b, self.ENODEOS, node,
2190 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2191 kind, os_name, base.name,
2192 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2194 # check any missing OSes
2195 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2196 _ErrorIf(missing, self.ENODEOS, node,
2197 "OSes present on reference node %s but missing on this node: %s",
2198 base.name, utils.CommaJoin(missing))
2200 def _VerifyOob(self, ninfo, nresult):
2201 """Verifies out of band functionality of a node.
2203 @type ninfo: L{objects.Node}
2204 @param ninfo: the node to check
2205 @param nresult: the remote results for the node
2209 # We just have to verify the paths on master and/or master candidates
2210 # as the oob helper is invoked on the master
2211 if ((ninfo.master_candidate or ninfo.master_capable) and
2212 constants.NV_OOB_PATHS in nresult):
2213 for path_result in nresult[constants.NV_OOB_PATHS]:
2214 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2216 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2217 """Verifies and updates the node volume data.
2219 This function will update a L{NodeImage}'s internal structures
2220 with data from the remote call.
2222 @type ninfo: L{objects.Node}
2223 @param ninfo: the node to check
2224 @param nresult: the remote results for the node
2225 @param nimg: the node image object
2226 @param vg_name: the configured VG name
2230 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2232 nimg.lvm_fail = True
2233 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2236 elif isinstance(lvdata, basestring):
2237 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2238 utils.SafeEncode(lvdata))
2239 elif not isinstance(lvdata, dict):
2240 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2242 nimg.volumes = lvdata
2243 nimg.lvm_fail = False
2245 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2246 """Verifies and updates the node instance list.
2248 If the listing was successful, then updates this node's instance
2249 list. Otherwise, it marks the RPC call as failed for the instance
2252 @type ninfo: L{objects.Node}
2253 @param ninfo: the node to check
2254 @param nresult: the remote results for the node
2255 @param nimg: the node image object
2258 idata = nresult.get(constants.NV_INSTANCELIST, None)
2259 test = not isinstance(idata, list)
2260 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2261 " (instancelist): %s", utils.SafeEncode(str(idata)))
2263 nimg.hyp_fail = True
2265 nimg.instances = idata
2267 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2268 """Verifies and computes a node information map
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param nimg: the node image object
2274 @param vg_name: the configured VG name
2278 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2280 # try to read free memory (from the hypervisor)
2281 hv_info = nresult.get(constants.NV_HVINFO, None)
2282 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2283 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2286 nimg.mfree = int(hv_info["memory_free"])
2287 except (ValueError, TypeError):
2288 _ErrorIf(True, self.ENODERPC, node,
2289 "node returned invalid nodeinfo, check hypervisor")
2291 # FIXME: devise a free space model for file based instances as well
2292 if vg_name is not None:
2293 test = (constants.NV_VGLIST not in nresult or
2294 vg_name not in nresult[constants.NV_VGLIST])
2295 _ErrorIf(test, self.ENODELVM, node,
2296 "node didn't return data for the volume group '%s'"
2297 " - it is either missing or broken", vg_name)
2300 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2301 except (ValueError, TypeError):
2302 _ErrorIf(True, self.ENODERPC, node,
2303 "node returned invalid LVM info, check LVM status")
2305 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2306 """Gets per-disk status information for all instances.
2308 @type nodelist: list of strings
2309 @param nodelist: Node names
2310 @type node_image: dict of (name, L{objects.Node})
2311 @param node_image: Node objects
2312 @type instanceinfo: dict of (name, L{objects.Instance})
2313 @param instanceinfo: Instance objects
2314 @rtype: {instance: {node: [(succes, payload)]}}
2315 @return: a dictionary of per-instance dictionaries with nodes as
2316 keys and disk information as values; the disk information is a
2317 list of tuples (success, payload)
2320 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2323 node_disks_devonly = {}
2324 diskless_instances = set()
2325 diskless = constants.DT_DISKLESS
2327 for nname in nodelist:
2328 node_instances = list(itertools.chain(node_image[nname].pinst,
2329 node_image[nname].sinst))
2330 diskless_instances.update(inst for inst in node_instances
2331 if instanceinfo[inst].disk_template == diskless)
2332 disks = [(inst, disk)
2333 for inst in node_instances
2334 for disk in instanceinfo[inst].disks]
2337 # No need to collect data
2340 node_disks[nname] = disks
2342 # Creating copies as SetDiskID below will modify the objects and that can
2343 # lead to incorrect data returned from nodes
2344 devonly = [dev.Copy() for (_, dev) in disks]
2347 self.cfg.SetDiskID(dev, nname)
2349 node_disks_devonly[nname] = devonly
2351 assert len(node_disks) == len(node_disks_devonly)
2353 # Collect data from all nodes with disks
2354 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2357 assert len(result) == len(node_disks)
2361 for (nname, nres) in result.items():
2362 disks = node_disks[nname]
2365 # No data from this node
2366 data = len(disks) * [(False, "node offline")]
2369 _ErrorIf(msg, self.ENODERPC, nname,
2370 "while getting disk information: %s", msg)
2372 # No data from this node
2373 data = len(disks) * [(False, msg)]
2376 for idx, i in enumerate(nres.payload):
2377 if isinstance(i, (tuple, list)) and len(i) == 2:
2380 logging.warning("Invalid result from node %s, entry %d: %s",
2382 data.append((False, "Invalid result from the remote node"))
2384 for ((inst, _), status) in zip(disks, data):
2385 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2387 # Add empty entries for diskless instances.
2388 for inst in diskless_instances:
2389 assert inst not in instdisk
2392 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2393 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2394 compat.all(isinstance(s, (tuple, list)) and
2395 len(s) == 2 for s in statuses)
2396 for inst, nnames in instdisk.items()
2397 for nname, statuses in nnames.items())
2398 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2402 def BuildHooksEnv(self):
2405 Cluster-Verify hooks just ran in the post phase and their failure makes
2406 the output be logged in the verify output and the verification to fail.
2410 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2413 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2414 for node in self.my_node_info.values())
2418 def BuildHooksNodes(self):
2419 """Build hooks nodes.
2422 return ([], self.my_node_names)
2424 def Exec(self, feedback_fn):
2425 """Verify integrity of the node group, performing various test on nodes.
2428 # This method has too many local variables. pylint: disable-msg=R0914
2430 if not self.my_node_names:
2432 feedback_fn("* Empty node group, skipping verification")
2436 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2437 verbose = self.op.verbose
2438 self._feedback_fn = feedback_fn
2440 vg_name = self.cfg.GetVGName()
2441 drbd_helper = self.cfg.GetDRBDHelper()
2442 cluster = self.cfg.GetClusterInfo()
2443 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2444 hypervisors = cluster.enabled_hypervisors
2445 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2447 i_non_redundant = [] # Non redundant instances
2448 i_non_a_balanced = [] # Non auto-balanced instances
2449 n_offline = 0 # Count of offline nodes
2450 n_drained = 0 # Count of nodes being drained
2451 node_vol_should = {}
2453 # FIXME: verify OS list
2456 filemap = _ComputeAncillaryFiles(cluster, False)
2458 # do local checksums
2459 master_node = self.master_node = self.cfg.GetMasterNode()
2460 master_ip = self.cfg.GetMasterIP()
2462 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2464 # We will make nodes contact all nodes in their group, and one node from
2465 # every other group.
2466 # TODO: should it be a *random* node, different every time?
2467 online_nodes = [node.name for node in node_data_list if not node.offline]
2468 other_group_nodes = {}
2470 for name in sorted(self.all_node_info):
2471 node = self.all_node_info[name]
2472 if (node.group not in other_group_nodes
2473 and node.group != self.group_uuid
2474 and not node.offline):
2475 other_group_nodes[node.group] = node.name
2477 node_verify_param = {
2478 constants.NV_FILELIST:
2479 utils.UniqueSequence(filename
2480 for files in filemap
2481 for filename in files),
2482 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2483 constants.NV_HYPERVISOR: hypervisors,
2484 constants.NV_HVPARAMS:
2485 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2486 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2487 for node in node_data_list
2488 if not node.offline],
2489 constants.NV_INSTANCELIST: hypervisors,
2490 constants.NV_VERSION: None,
2491 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2492 constants.NV_NODESETUP: None,
2493 constants.NV_TIME: None,
2494 constants.NV_MASTERIP: (master_node, master_ip),
2495 constants.NV_OSLIST: None,
2496 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2499 if vg_name is not None:
2500 node_verify_param[constants.NV_VGLIST] = None
2501 node_verify_param[constants.NV_LVLIST] = vg_name
2502 node_verify_param[constants.NV_PVLIST] = [vg_name]
2503 node_verify_param[constants.NV_DRBDLIST] = None
2506 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2509 # FIXME: this needs to be changed per node-group, not cluster-wide
2511 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2512 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2513 bridges.add(default_nicpp[constants.NIC_LINK])
2514 for instance in self.my_inst_info.values():
2515 for nic in instance.nics:
2516 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2517 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2518 bridges.add(full_nic[constants.NIC_LINK])
2521 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2523 # Build our expected cluster state
2524 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2526 vm_capable=node.vm_capable))
2527 for node in node_data_list)
2531 for node in self.all_node_info.values():
2532 path = _SupportsOob(self.cfg, node)
2533 if path and path not in oob_paths:
2534 oob_paths.append(path)
2537 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2539 for instance in self.my_inst_names:
2540 inst_config = self.my_inst_info[instance]
2542 for nname in inst_config.all_nodes:
2543 if nname not in node_image:
2544 gnode = self.NodeImage(name=nname)
2545 gnode.ghost = (nname not in self.all_node_info)
2546 node_image[nname] = gnode
2548 inst_config.MapLVsByNode(node_vol_should)
2550 pnode = inst_config.primary_node
2551 node_image[pnode].pinst.append(instance)
2553 for snode in inst_config.secondary_nodes:
2554 nimg = node_image[snode]
2555 nimg.sinst.append(instance)
2556 if pnode not in nimg.sbp:
2557 nimg.sbp[pnode] = []
2558 nimg.sbp[pnode].append(instance)
2560 # At this point, we have the in-memory data structures complete,
2561 # except for the runtime information, which we'll gather next
2563 # Due to the way our RPC system works, exact response times cannot be
2564 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2565 # time before and after executing the request, we can at least have a time
2567 nvinfo_starttime = time.time()
2568 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2570 self.cfg.GetClusterName())
2571 nvinfo_endtime = time.time()
2573 if self.extra_lv_nodes and vg_name is not None:
2575 self.rpc.call_node_verify(self.extra_lv_nodes,
2576 {constants.NV_LVLIST: vg_name},
2577 self.cfg.GetClusterName())
2579 extra_lv_nvinfo = {}
2581 all_drbd_map = self.cfg.ComputeDRBDMap()
2583 feedback_fn("* Gathering disk information (%s nodes)" %
2584 len(self.my_node_names))
2585 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2588 feedback_fn("* Verifying configuration file consistency")
2590 # If not all nodes are being checked, we need to make sure the master node
2591 # and a non-checked vm_capable node are in the list.
2592 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2594 vf_nvinfo = all_nvinfo.copy()
2595 vf_node_info = list(self.my_node_info.values())
2596 additional_nodes = []
2597 if master_node not in self.my_node_info:
2598 additional_nodes.append(master_node)
2599 vf_node_info.append(self.all_node_info[master_node])
2600 # Add the first vm_capable node we find which is not included
2601 for node in absent_nodes:
2602 nodeinfo = self.all_node_info[node]
2603 if nodeinfo.vm_capable and not nodeinfo.offline:
2604 additional_nodes.append(node)
2605 vf_node_info.append(self.all_node_info[node])
2607 key = constants.NV_FILELIST
2608 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2609 {key: node_verify_param[key]},
2610 self.cfg.GetClusterName()))
2612 vf_nvinfo = all_nvinfo
2613 vf_node_info = self.my_node_info.values()
2615 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2617 feedback_fn("* Verifying node status")
2621 for node_i in node_data_list:
2623 nimg = node_image[node]
2627 feedback_fn("* Skipping offline node %s" % (node,))
2631 if node == master_node:
2633 elif node_i.master_candidate:
2634 ntype = "master candidate"
2635 elif node_i.drained:
2641 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2643 msg = all_nvinfo[node].fail_msg
2644 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2646 nimg.rpc_fail = True
2649 nresult = all_nvinfo[node].payload
2651 nimg.call_ok = self._VerifyNode(node_i, nresult)
2652 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2653 self._VerifyNodeNetwork(node_i, nresult)
2654 self._VerifyOob(node_i, nresult)
2657 self._VerifyNodeLVM(node_i, nresult, vg_name)
2658 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2661 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2662 self._UpdateNodeInstances(node_i, nresult, nimg)
2663 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2664 self._UpdateNodeOS(node_i, nresult, nimg)
2666 if not nimg.os_fail:
2667 if refos_img is None:
2669 self._VerifyNodeOS(node_i, nimg, refos_img)
2670 self._VerifyNodeBridges(node_i, nresult, bridges)
2672 # Check whether all running instancies are primary for the node. (This
2673 # can no longer be done from _VerifyInstance below, since some of the
2674 # wrong instances could be from other node groups.)
2675 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2677 for inst in non_primary_inst:
2678 test = inst in self.all_inst_info
2679 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2680 "instance should not run on node %s", node_i.name)
2681 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2682 "node is running unknown instance %s", inst)
2684 for node, result in extra_lv_nvinfo.items():
2685 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2686 node_image[node], vg_name)
2688 feedback_fn("* Verifying instance status")
2689 for instance in self.my_inst_names:
2691 feedback_fn("* Verifying instance %s" % instance)
2692 inst_config = self.my_inst_info[instance]
2693 self._VerifyInstance(instance, inst_config, node_image,
2695 inst_nodes_offline = []
2697 pnode = inst_config.primary_node
2698 pnode_img = node_image[pnode]
2699 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2700 self.ENODERPC, pnode, "instance %s, connection to"
2701 " primary node failed", instance)
2703 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2704 self.EINSTANCEBADNODE, instance,
2705 "instance is marked as running and lives on offline node %s",
2706 inst_config.primary_node)
2708 # If the instance is non-redundant we cannot survive losing its primary
2709 # node, so we are not N+1 compliant. On the other hand we have no disk
2710 # templates with more than one secondary so that situation is not well
2712 # FIXME: does not support file-backed instances
2713 if not inst_config.secondary_nodes:
2714 i_non_redundant.append(instance)
2716 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2717 instance, "instance has multiple secondary nodes: %s",
2718 utils.CommaJoin(inst_config.secondary_nodes),
2719 code=self.ETYPE_WARNING)
2721 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2722 pnode = inst_config.primary_node
2723 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2724 instance_groups = {}
2726 for node in instance_nodes:
2727 instance_groups.setdefault(self.all_node_info[node].group,
2731 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2732 # Sort so that we always list the primary node first.
2733 for group, nodes in sorted(instance_groups.items(),
2734 key=lambda (_, nodes): pnode in nodes,
2737 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2738 instance, "instance has primary and secondary nodes in"
2739 " different groups: %s", utils.CommaJoin(pretty_list),
2740 code=self.ETYPE_WARNING)
2742 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2743 i_non_a_balanced.append(instance)
2745 for snode in inst_config.secondary_nodes:
2746 s_img = node_image[snode]
2747 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2748 "instance %s, connection to secondary node failed", instance)
2751 inst_nodes_offline.append(snode)
2753 # warn that the instance lives on offline nodes
2754 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2755 "instance has offline secondary node(s) %s",
2756 utils.CommaJoin(inst_nodes_offline))
2757 # ... or ghost/non-vm_capable nodes
2758 for node in inst_config.all_nodes:
2759 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2760 "instance lives on ghost node %s", node)
2761 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2762 instance, "instance lives on non-vm_capable node %s", node)
2764 feedback_fn("* Verifying orphan volumes")
2765 reserved = utils.FieldSet(*cluster.reserved_lvs)
2767 # We will get spurious "unknown volume" warnings if any node of this group
2768 # is secondary for an instance whose primary is in another group. To avoid
2769 # them, we find these instances and add their volumes to node_vol_should.
2770 for inst in self.all_inst_info.values():
2771 for secondary in inst.secondary_nodes:
2772 if (secondary in self.my_node_info
2773 and inst.name not in self.my_inst_info):
2774 inst.MapLVsByNode(node_vol_should)
2777 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2779 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2780 feedback_fn("* Verifying N+1 Memory redundancy")
2781 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2783 feedback_fn("* Other Notes")
2785 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2786 % len(i_non_redundant))
2788 if i_non_a_balanced:
2789 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2790 % len(i_non_a_balanced))
2793 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2796 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2800 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2801 """Analyze the post-hooks' result
2803 This method analyses the hook result, handles it, and sends some
2804 nicely-formatted feedback back to the user.
2806 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2807 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2808 @param hooks_results: the results of the multi-node hooks rpc call
2809 @param feedback_fn: function used send feedback back to the caller
2810 @param lu_result: previous Exec result
2811 @return: the new Exec result, based on the previous result
2815 # We only really run POST phase hooks, only for non-empty groups,
2816 # and are only interested in their results
2817 if not self.my_node_names:
2820 elif phase == constants.HOOKS_PHASE_POST:
2821 # Used to change hooks' output to proper indentation
2822 feedback_fn("* Hooks Results")
2823 assert hooks_results, "invalid result from hooks"
2825 for node_name in hooks_results:
2826 res = hooks_results[node_name]
2828 test = msg and not res.offline
2829 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2830 "Communication failure in hooks execution: %s", msg)
2831 if res.offline or msg:
2832 # No need to investigate payload if node is offline or gave an error.
2833 # override manually lu_result here as _ErrorIf only
2834 # overrides self.bad
2837 for script, hkr, output in res.payload:
2838 test = hkr == constants.HKR_FAIL
2839 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2840 "Script %s failed, output:", script)
2842 output = self._HOOKS_INDENT_RE.sub(' ', output)
2843 feedback_fn("%s" % output)
2849 class LUClusterVerifyDisks(NoHooksLU):
2850 """Verifies the cluster disks status.
2855 def ExpandNames(self):
2856 self.needed_locks = {
2857 locking.LEVEL_NODE: locking.ALL_SET,
2858 locking.LEVEL_INSTANCE: locking.ALL_SET,
2860 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2862 def Exec(self, feedback_fn):
2863 """Verify integrity of cluster disks.
2865 @rtype: tuple of three items
2866 @return: a tuple of (dict of node-to-node_error, list of instances
2867 which need activate-disks, dict of instance: (node, volume) for
2871 result = res_nodes, res_instances, res_missing = {}, [], {}
2873 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2874 instances = self.cfg.GetAllInstancesInfo().values()
2877 for inst in instances:
2879 if not inst.admin_up:
2881 inst.MapLVsByNode(inst_lvs)
2882 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2883 for node, vol_list in inst_lvs.iteritems():
2884 for vol in vol_list:
2885 nv_dict[(node, vol)] = inst
2890 node_lvs = self.rpc.call_lv_list(nodes, [])
2891 for node, node_res in node_lvs.items():
2892 if node_res.offline:
2894 msg = node_res.fail_msg
2896 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2897 res_nodes[node] = msg
2900 lvs = node_res.payload
2901 for lv_name, (_, _, lv_online) in lvs.items():
2902 inst = nv_dict.pop((node, lv_name), None)
2903 if (not lv_online and inst is not None
2904 and inst.name not in res_instances):
2905 res_instances.append(inst.name)
2907 # any leftover items in nv_dict are missing LVs, let's arrange the
2909 for key, inst in nv_dict.iteritems():
2910 if inst.name not in res_missing:
2911 res_missing[inst.name] = []
2912 res_missing[inst.name].append(key)
2917 class LUClusterRepairDiskSizes(NoHooksLU):
2918 """Verifies the cluster disks sizes.
2923 def ExpandNames(self):
2924 if self.op.instances:
2925 self.wanted_names = _GetWantedInstances(self, self.op.instances)
2926 self.needed_locks = {
2927 locking.LEVEL_NODE: [],
2928 locking.LEVEL_INSTANCE: self.wanted_names,
2930 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2932 self.wanted_names = None
2933 self.needed_locks = {
2934 locking.LEVEL_NODE: locking.ALL_SET,
2935 locking.LEVEL_INSTANCE: locking.ALL_SET,
2937 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2939 def DeclareLocks(self, level):
2940 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2941 self._LockInstancesNodes(primary_only=True)
2943 def CheckPrereq(self):
2944 """Check prerequisites.
2946 This only checks the optional instance list against the existing names.
2949 if self.wanted_names is None:
2950 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2952 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2953 in self.wanted_names]
2955 def _EnsureChildSizes(self, disk):
2956 """Ensure children of the disk have the needed disk size.
2958 This is valid mainly for DRBD8 and fixes an issue where the
2959 children have smaller disk size.
2961 @param disk: an L{ganeti.objects.Disk} object
2964 if disk.dev_type == constants.LD_DRBD8:
2965 assert disk.children, "Empty children for DRBD8?"
2966 fchild = disk.children[0]
2967 mismatch = fchild.size < disk.size
2969 self.LogInfo("Child disk has size %d, parent %d, fixing",
2970 fchild.size, disk.size)
2971 fchild.size = disk.size
2973 # and we recurse on this child only, not on the metadev
2974 return self._EnsureChildSizes(fchild) or mismatch
2978 def Exec(self, feedback_fn):
2979 """Verify the size of cluster disks.
2982 # TODO: check child disks too
2983 # TODO: check differences in size between primary/secondary nodes
2985 for instance in self.wanted_instances:
2986 pnode = instance.primary_node
2987 if pnode not in per_node_disks:
2988 per_node_disks[pnode] = []
2989 for idx, disk in enumerate(instance.disks):
2990 per_node_disks[pnode].append((instance, idx, disk))
2993 for node, dskl in per_node_disks.items():
2994 newl = [v[2].Copy() for v in dskl]
2996 self.cfg.SetDiskID(dsk, node)
2997 result = self.rpc.call_blockdev_getsize(node, newl)
2999 self.LogWarning("Failure in blockdev_getsize call to node"
3000 " %s, ignoring", node)
3002 if len(result.payload) != len(dskl):
3003 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3004 " result.payload=%s", node, len(dskl), result.payload)
3005 self.LogWarning("Invalid result from node %s, ignoring node results",
3008 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3010 self.LogWarning("Disk %d of instance %s did not return size"
3011 " information, ignoring", idx, instance.name)
3013 if not isinstance(size, (int, long)):
3014 self.LogWarning("Disk %d of instance %s did not return valid"
3015 " size information, ignoring", idx, instance.name)
3018 if size != disk.size:
3019 self.LogInfo("Disk %d of instance %s has mismatched size,"
3020 " correcting: recorded %d, actual %d", idx,
3021 instance.name, disk.size, size)
3023 self.cfg.Update(instance, feedback_fn)
3024 changed.append((instance.name, idx, size))
3025 if self._EnsureChildSizes(disk):
3026 self.cfg.Update(instance, feedback_fn)
3027 changed.append((instance.name, idx, disk.size))
3031 class LUClusterRename(LogicalUnit):
3032 """Rename the cluster.
3035 HPATH = "cluster-rename"
3036 HTYPE = constants.HTYPE_CLUSTER
3038 def BuildHooksEnv(self):
3043 "OP_TARGET": self.cfg.GetClusterName(),
3044 "NEW_NAME": self.op.name,
3047 def BuildHooksNodes(self):
3048 """Build hooks nodes.
3051 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3053 def CheckPrereq(self):
3054 """Verify that the passed name is a valid one.
3057 hostname = netutils.GetHostname(name=self.op.name,
3058 family=self.cfg.GetPrimaryIPFamily())
3060 new_name = hostname.name
3061 self.ip = new_ip = hostname.ip
3062 old_name = self.cfg.GetClusterName()
3063 old_ip = self.cfg.GetMasterIP()
3064 if new_name == old_name and new_ip == old_ip:
3065 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3066 " cluster has changed",
3068 if new_ip != old_ip:
3069 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3070 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3071 " reachable on the network" %
3072 new_ip, errors.ECODE_NOTUNIQUE)
3074 self.op.name = new_name
3076 def Exec(self, feedback_fn):
3077 """Rename the cluster.
3080 clustername = self.op.name
3083 # shutdown the master IP
3084 master = self.cfg.GetMasterNode()
3085 result = self.rpc.call_node_stop_master(master, False)
3086 result.Raise("Could not disable the master role")
3089 cluster = self.cfg.GetClusterInfo()
3090 cluster.cluster_name = clustername
3091 cluster.master_ip = ip
3092 self.cfg.Update(cluster, feedback_fn)
3094 # update the known hosts file
3095 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3096 node_list = self.cfg.GetOnlineNodeList()
3098 node_list.remove(master)
3101 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3103 result = self.rpc.call_node_start_master(master, False, False)
3104 msg = result.fail_msg
3106 self.LogWarning("Could not re-enable the master role on"
3107 " the master, please restart manually: %s", msg)
3112 class LUClusterSetParams(LogicalUnit):
3113 """Change the parameters of the cluster.
3116 HPATH = "cluster-modify"
3117 HTYPE = constants.HTYPE_CLUSTER
3120 def CheckArguments(self):
3124 if self.op.uid_pool:
3125 uidpool.CheckUidPool(self.op.uid_pool)
3127 if self.op.add_uids:
3128 uidpool.CheckUidPool(self.op.add_uids)
3130 if self.op.remove_uids:
3131 uidpool.CheckUidPool(self.op.remove_uids)
3133 def ExpandNames(self):
3134 # FIXME: in the future maybe other cluster params won't require checking on
3135 # all nodes to be modified.
3136 self.needed_locks = {
3137 locking.LEVEL_NODE: locking.ALL_SET,
3139 self.share_locks[locking.LEVEL_NODE] = 1
3141 def BuildHooksEnv(self):
3146 "OP_TARGET": self.cfg.GetClusterName(),
3147 "NEW_VG_NAME": self.op.vg_name,
3150 def BuildHooksNodes(self):
3151 """Build hooks nodes.
3154 mn = self.cfg.GetMasterNode()
3157 def CheckPrereq(self):
3158 """Check prerequisites.
3160 This checks whether the given params don't conflict and
3161 if the given volume group is valid.
3164 if self.op.vg_name is not None and not self.op.vg_name:
3165 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3166 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3167 " instances exist", errors.ECODE_INVAL)
3169 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3170 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3171 raise errors.OpPrereqError("Cannot disable drbd helper while"
3172 " drbd-based instances exist",
3175 node_list = self.glm.list_owned(locking.LEVEL_NODE)
3177 # if vg_name not None, checks given volume group on all nodes
3179 vglist = self.rpc.call_vg_list(node_list)
3180 for node in node_list:
3181 msg = vglist[node].fail_msg
3183 # ignoring down node
3184 self.LogWarning("Error while gathering data on node %s"
3185 " (ignoring node): %s", node, msg)
3187 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3189 constants.MIN_VG_SIZE)
3191 raise errors.OpPrereqError("Error on node '%s': %s" %
3192 (node, vgstatus), errors.ECODE_ENVIRON)
3194 if self.op.drbd_helper:
3195 # checks given drbd helper on all nodes
3196 helpers = self.rpc.call_drbd_helper(node_list)
3197 for node in node_list:
3198 ninfo = self.cfg.GetNodeInfo(node)
3200 self.LogInfo("Not checking drbd helper on offline node %s", node)
3202 msg = helpers[node].fail_msg
3204 raise errors.OpPrereqError("Error checking drbd helper on node"
3205 " '%s': %s" % (node, msg),
3206 errors.ECODE_ENVIRON)
3207 node_helper = helpers[node].payload
3208 if node_helper != self.op.drbd_helper:
3209 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3210 (node, node_helper), errors.ECODE_ENVIRON)
3212 self.cluster = cluster = self.cfg.GetClusterInfo()
3213 # validate params changes
3214 if self.op.beparams:
3215 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3216 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3218 if self.op.ndparams:
3219 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3220 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3222 # TODO: we need a more general way to handle resetting
3223 # cluster-level parameters to default values
3224 if self.new_ndparams["oob_program"] == "":
3225 self.new_ndparams["oob_program"] = \
3226 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3228 if self.op.nicparams:
3229 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3230 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3231 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3234 # check all instances for consistency
3235 for instance in self.cfg.GetAllInstancesInfo().values():
3236 for nic_idx, nic in enumerate(instance.nics):
3237 params_copy = copy.deepcopy(nic.nicparams)
3238 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3240 # check parameter syntax
3242 objects.NIC.CheckParameterSyntax(params_filled)
3243 except errors.ConfigurationError, err:
3244 nic_errors.append("Instance %s, nic/%d: %s" %
3245 (instance.name, nic_idx, err))
3247 # if we're moving instances to routed, check that they have an ip
3248 target_mode = params_filled[constants.NIC_MODE]
3249 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3250 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3251 " address" % (instance.name, nic_idx))
3253 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3254 "\n".join(nic_errors))
3256 # hypervisor list/parameters
3257 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3258 if self.op.hvparams:
3259 for hv_name, hv_dict in self.op.hvparams.items():
3260 if hv_name not in self.new_hvparams:
3261 self.new_hvparams[hv_name] = hv_dict
3263 self.new_hvparams[hv_name].update(hv_dict)
3265 # os hypervisor parameters
3266 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3268 for os_name, hvs in self.op.os_hvp.items():
3269 if os_name not in self.new_os_hvp:
3270 self.new_os_hvp[os_name] = hvs
3272 for hv_name, hv_dict in hvs.items():
3273 if hv_name not in self.new_os_hvp[os_name]:
3274 self.new_os_hvp[os_name][hv_name] = hv_dict
3276 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3279 self.new_osp = objects.FillDict(cluster.osparams, {})
3280 if self.op.osparams:
3281 for os_name, osp in self.op.osparams.items():
3282 if os_name not in self.new_osp:
3283 self.new_osp[os_name] = {}
3285 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3288 if not self.new_osp[os_name]:
3289 # we removed all parameters
3290 del self.new_osp[os_name]
3292 # check the parameter validity (remote check)
3293 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3294 os_name, self.new_osp[os_name])
3296 # changes to the hypervisor list
3297 if self.op.enabled_hypervisors is not None:
3298 self.hv_list = self.op.enabled_hypervisors
3299 for hv in self.hv_list:
3300 # if the hypervisor doesn't already exist in the cluster
3301 # hvparams, we initialize it to empty, and then (in both
3302 # cases) we make sure to fill the defaults, as we might not
3303 # have a complete defaults list if the hypervisor wasn't
3305 if hv not in new_hvp:
3307 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3308 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3310 self.hv_list = cluster.enabled_hypervisors
3312 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3313 # either the enabled list has changed, or the parameters have, validate
3314 for hv_name, hv_params in self.new_hvparams.items():
3315 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3316 (self.op.enabled_hypervisors and
3317 hv_name in self.op.enabled_hypervisors)):
3318 # either this is a new hypervisor, or its parameters have changed
3319 hv_class = hypervisor.GetHypervisor(hv_name)
3320 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3321 hv_class.CheckParameterSyntax(hv_params)
3322 _CheckHVParams(self, node_list, hv_name, hv_params)
3325 # no need to check any newly-enabled hypervisors, since the
3326 # defaults have already been checked in the above code-block
3327 for os_name, os_hvp in self.new_os_hvp.items():
3328 for hv_name, hv_params in os_hvp.items():
3329 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3330 # we need to fill in the new os_hvp on top of the actual hv_p
3331 cluster_defaults = self.new_hvparams.get(hv_name, {})
3332 new_osp = objects.FillDict(cluster_defaults, hv_params)
3333 hv_class = hypervisor.GetHypervisor(hv_name)
3334 hv_class.CheckParameterSyntax(new_osp)
3335 _CheckHVParams(self, node_list, hv_name, new_osp)
3337 if self.op.default_iallocator:
3338 alloc_script = utils.FindFile(self.op.default_iallocator,
3339 constants.IALLOCATOR_SEARCH_PATH,
3341 if alloc_script is None:
3342 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3343 " specified" % self.op.default_iallocator,
3346 def Exec(self, feedback_fn):
3347 """Change the parameters of the cluster.
3350 if self.op.vg_name is not None:
3351 new_volume = self.op.vg_name
3354 if new_volume != self.cfg.GetVGName():
3355 self.cfg.SetVGName(new_volume)
3357 feedback_fn("Cluster LVM configuration already in desired"
3358 " state, not changing")
3359 if self.op.drbd_helper is not None:
3360 new_helper = self.op.drbd_helper
3363 if new_helper != self.cfg.GetDRBDHelper():
3364 self.cfg.SetDRBDHelper(new_helper)
3366 feedback_fn("Cluster DRBD helper already in desired state,"
3368 if self.op.hvparams:
3369 self.cluster.hvparams = self.new_hvparams
3371 self.cluster.os_hvp = self.new_os_hvp
3372 if self.op.enabled_hypervisors is not None:
3373 self.cluster.hvparams = self.new_hvparams
3374 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3375 if self.op.beparams:
3376 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3377 if self.op.nicparams:
3378 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3379 if self.op.osparams:
3380 self.cluster.osparams = self.new_osp
3381 if self.op.ndparams:
3382 self.cluster.ndparams = self.new_ndparams
3384 if self.op.candidate_pool_size is not None:
3385 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3386 # we need to update the pool size here, otherwise the save will fail
3387 _AdjustCandidatePool(self, [])
3389 if self.op.maintain_node_health is not None:
3390 self.cluster.maintain_node_health = self.op.maintain_node_health
3392 if self.op.prealloc_wipe_disks is not None:
3393 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3395 if self.op.add_uids is not None:
3396 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3398 if self.op.remove_uids is not None:
3399 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3401 if self.op.uid_pool is not None:
3402 self.cluster.uid_pool = self.op.uid_pool
3404 if self.op.default_iallocator is not None:
3405 self.cluster.default_iallocator = self.op.default_iallocator
3407 if self.op.reserved_lvs is not None:
3408 self.cluster.reserved_lvs = self.op.reserved_lvs
3410 def helper_os(aname, mods, desc):
3412 lst = getattr(self.cluster, aname)
3413 for key, val in mods:
3414 if key == constants.DDM_ADD:
3416 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3419 elif key == constants.DDM_REMOVE:
3423 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3425 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3427 if self.op.hidden_os:
3428 helper_os("hidden_os", self.op.hidden_os, "hidden")
3430 if self.op.blacklisted_os:
3431 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3433 if self.op.master_netdev:
3434 master = self.cfg.GetMasterNode()
3435 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3436 self.cluster.master_netdev)
3437 result = self.rpc.call_node_stop_master(master, False)
3438 result.Raise("Could not disable the master ip")
3439 feedback_fn("Changing master_netdev from %s to %s" %
3440 (self.cluster.master_netdev, self.op.master_netdev))
3441 self.cluster.master_netdev = self.op.master_netdev
3443 self.cfg.Update(self.cluster, feedback_fn)
3445 if self.op.master_netdev:
3446 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3447 self.op.master_netdev)
3448 result = self.rpc.call_node_start_master(master, False, False)
3450 self.LogWarning("Could not re-enable the master ip on"
3451 " the master, please restart manually: %s",
3455 def _UploadHelper(lu, nodes, fname):
3456 """Helper for uploading a file and showing warnings.
3459 if os.path.exists(fname):
3460 result = lu.rpc.call_upload_file(nodes, fname)
3461 for to_node, to_result in result.items():
3462 msg = to_result.fail_msg
3464 msg = ("Copy of file %s to node %s failed: %s" %
3465 (fname, to_node, msg))
3466 lu.proc.LogWarning(msg)
3469 def _ComputeAncillaryFiles(cluster, redist):
3470 """Compute files external to Ganeti which need to be consistent.
3472 @type redist: boolean
3473 @param redist: Whether to include files which need to be redistributed
3476 # Compute files for all nodes
3478 constants.SSH_KNOWN_HOSTS_FILE,
3479 constants.CONFD_HMAC_KEY,
3480 constants.CLUSTER_DOMAIN_SECRET_FILE,
3484 files_all.update(constants.ALL_CERT_FILES)
3485 files_all.update(ssconf.SimpleStore().GetFileList())
3487 if cluster.modify_etc_hosts:
3488 files_all.add(constants.ETC_HOSTS)
3490 # Files which must either exist on all nodes or on none
3491 files_all_opt = set([
3492 constants.RAPI_USERS_FILE,
3495 # Files which should only be on master candidates
3498 files_mc.add(constants.CLUSTER_CONF_FILE)
3500 # Files which should only be on VM-capable nodes
3501 files_vm = set(filename
3502 for hv_name in cluster.enabled_hypervisors
3503 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3505 # Filenames must be unique
3506 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3507 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3508 "Found file listed in more than one file list"
3510 return (files_all, files_all_opt, files_mc, files_vm)
3513 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3514 """Distribute additional files which are part of the cluster configuration.
3516 ConfigWriter takes care of distributing the config and ssconf files, but
3517 there are more files which should be distributed to all nodes. This function
3518 makes sure those are copied.
3520 @param lu: calling logical unit
3521 @param additional_nodes: list of nodes not in the config to distribute to
3522 @type additional_vm: boolean
3523 @param additional_vm: whether the additional nodes are vm-capable or not
3526 # Gather target nodes
3527 cluster = lu.cfg.GetClusterInfo()
3528 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3530 online_nodes = lu.cfg.GetOnlineNodeList()
3531 vm_nodes = lu.cfg.GetVmCapableNodeList()
3533 if additional_nodes is not None:
3534 online_nodes.extend(additional_nodes)
3536 vm_nodes.extend(additional_nodes)
3538 # Never distribute to master node
3539 for nodelist in [online_nodes, vm_nodes]:
3540 if master_info.name in nodelist:
3541 nodelist.remove(master_info.name)
3544 (files_all, files_all_opt, files_mc, files_vm) = \
3545 _ComputeAncillaryFiles(cluster, True)
3547 # Never re-distribute configuration file from here
3548 assert not (constants.CLUSTER_CONF_FILE in files_all or
3549 constants.CLUSTER_CONF_FILE in files_vm)
3550 assert not files_mc, "Master candidates not handled in this function"
3553 (online_nodes, files_all),
3554 (online_nodes, files_all_opt),
3555 (vm_nodes, files_vm),
3559 for (node_list, files) in filemap:
3561 _UploadHelper(lu, node_list, fname)
3564 class LUClusterRedistConf(NoHooksLU):
3565 """Force the redistribution of cluster configuration.
3567 This is a very simple LU.
3572 def ExpandNames(self):
3573 self.needed_locks = {
3574 locking.LEVEL_NODE: locking.ALL_SET,
3576 self.share_locks[locking.LEVEL_NODE] = 1
3578 def Exec(self, feedback_fn):
3579 """Redistribute the configuration.
3582 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3583 _RedistributeAncillaryFiles(self)
3586 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3587 """Sleep and poll for an instance's disk to sync.
3590 if not instance.disks or disks is not None and not disks:
3593 disks = _ExpandCheckDisks(instance, disks)
3596 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3598 node = instance.primary_node
3601 lu.cfg.SetDiskID(dev, node)
3603 # TODO: Convert to utils.Retry
3606 degr_retries = 10 # in seconds, as we sleep 1 second each time
3610 cumul_degraded = False
3611 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3612 msg = rstats.fail_msg
3614 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3617 raise errors.RemoteError("Can't contact node %s for mirror data,"
3618 " aborting." % node)
3621 rstats = rstats.payload
3623 for i, mstat in enumerate(rstats):
3625 lu.LogWarning("Can't compute data for node %s/%s",
3626 node, disks[i].iv_name)
3629 cumul_degraded = (cumul_degraded or
3630 (mstat.is_degraded and mstat.sync_percent is None))
3631 if mstat.sync_percent is not None:
3633 if mstat.estimated_time is not None:
3634 rem_time = ("%s remaining (estimated)" %
3635 utils.FormatSeconds(mstat.estimated_time))
3636 max_time = mstat.estimated_time
3638 rem_time = "no time estimate"
3639 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3640 (disks[i].iv_name, mstat.sync_percent, rem_time))
3642 # if we're done but degraded, let's do a few small retries, to
3643 # make sure we see a stable and not transient situation; therefore
3644 # we force restart of the loop
3645 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3646 logging.info("Degraded disks found, %d retries left", degr_retries)
3654 time.sleep(min(60, max_time))
3657 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3658 return not cumul_degraded
3661 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3662 """Check that mirrors are not degraded.
3664 The ldisk parameter, if True, will change the test from the
3665 is_degraded attribute (which represents overall non-ok status for
3666 the device(s)) to the ldisk (representing the local storage status).
3669 lu.cfg.SetDiskID(dev, node)
3673 if on_primary or dev.AssembleOnSecondary():
3674 rstats = lu.rpc.call_blockdev_find(node, dev)
3675 msg = rstats.fail_msg
3677 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3679 elif not rstats.payload:
3680 lu.LogWarning("Can't find disk on node %s", node)
3684 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3686 result = result and not rstats.payload.is_degraded
3689 for child in dev.children:
3690 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3695 class LUOobCommand(NoHooksLU):
3696 """Logical unit for OOB handling.
3700 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3702 def ExpandNames(self):
3703 """Gather locks we need.
3706 if self.op.node_names:
3707 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3708 lock_names = self.op.node_names
3710 lock_names = locking.ALL_SET
3712 self.needed_locks = {
3713 locking.LEVEL_NODE: lock_names,
3716 def CheckPrereq(self):
3717 """Check prerequisites.
3720 - the node exists in the configuration
3723 Any errors are signaled by raising errors.OpPrereqError.
3727 self.master_node = self.cfg.GetMasterNode()
3729 assert self.op.power_delay >= 0.0
3731 if self.op.node_names:
3732 if (self.op.command in self._SKIP_MASTER and
3733 self.master_node in self.op.node_names):
3734 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3735 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3737 if master_oob_handler:
3738 additional_text = ("run '%s %s %s' if you want to operate on the"
3739 " master regardless") % (master_oob_handler,
3743 additional_text = "it does not support out-of-band operations"
3745 raise errors.OpPrereqError(("Operating on the master node %s is not"
3746 " allowed for %s; %s") %
3747 (self.master_node, self.op.command,
3748 additional_text), errors.ECODE_INVAL)
3750 self.op.node_names = self.cfg.GetNodeList()
3751 if self.op.command in self._SKIP_MASTER:
3752 self.op.node_names.remove(self.master_node)
3754 if self.op.command in self._SKIP_MASTER:
3755 assert self.master_node not in self.op.node_names
3757 for node_name in self.op.node_names:
3758 node = self.cfg.GetNodeInfo(node_name)
3761 raise errors.OpPrereqError("Node %s not found" % node_name,
3764 self.nodes.append(node)
3766 if (not self.op.ignore_status and
3767 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3768 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3769 " not marked offline") % node_name,
3772 def Exec(self, feedback_fn):
3773 """Execute OOB and return result if we expect any.
3776 master_node = self.master_node
3779 for idx, node in enumerate(utils.NiceSort(self.nodes,
3780 key=lambda node: node.name)):
3781 node_entry = [(constants.RS_NORMAL, node.name)]
3782 ret.append(node_entry)
3784 oob_program = _SupportsOob(self.cfg, node)
3787 node_entry.append((constants.RS_UNAVAIL, None))
3790 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3791 self.op.command, oob_program, node.name)
3792 result = self.rpc.call_run_oob(master_node, oob_program,
3793 self.op.command, node.name,
3797 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3798 node.name, result.fail_msg)
3799 node_entry.append((constants.RS_NODATA, None))
3802 self._CheckPayload(result)
3803 except errors.OpExecError, err:
3804 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3806 node_entry.append((constants.RS_NODATA, None))
3808 if self.op.command == constants.OOB_HEALTH:
3809 # For health we should log important events
3810 for item, status in result.payload:
3811 if status in [constants.OOB_STATUS_WARNING,
3812 constants.OOB_STATUS_CRITICAL]:
3813 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3814 item, node.name, status)
3816 if self.op.command == constants.OOB_POWER_ON:
3818 elif self.op.command == constants.OOB_POWER_OFF:
3819 node.powered = False
3820 elif self.op.command == constants.OOB_POWER_STATUS:
3821 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3822 if powered != node.powered:
3823 logging.warning(("Recorded power state (%s) of node '%s' does not"
3824 " match actual power state (%s)"), node.powered,
3827 # For configuration changing commands we should update the node
3828 if self.op.command in (constants.OOB_POWER_ON,
3829 constants.OOB_POWER_OFF):
3830 self.cfg.Update(node, feedback_fn)
3832 node_entry.append((constants.RS_NORMAL, result.payload))
3834 if (self.op.command == constants.OOB_POWER_ON and
3835 idx < len(self.nodes) - 1):
3836 time.sleep(self.op.power_delay)
3840 def _CheckPayload(self, result):
3841 """Checks if the payload is valid.
3843 @param result: RPC result
3844 @raises errors.OpExecError: If payload is not valid
3848 if self.op.command == constants.OOB_HEALTH:
3849 if not isinstance(result.payload, list):
3850 errs.append("command 'health' is expected to return a list but got %s" %
3851 type(result.payload))
3853 for item, status in result.payload:
3854 if status not in constants.OOB_STATUSES:
3855 errs.append("health item '%s' has invalid status '%s'" %
3858 if self.op.command == constants.OOB_POWER_STATUS:
3859 if not isinstance(result.payload, dict):
3860 errs.append("power-status is expected to return a dict but got %s" %
3861 type(result.payload))
3863 if self.op.command in [
3864 constants.OOB_POWER_ON,
3865 constants.OOB_POWER_OFF,
3866 constants.OOB_POWER_CYCLE,
3868 if result.payload is not None:
3869 errs.append("%s is expected to not return payload but got '%s'" %
3870 (self.op.command, result.payload))
3873 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3874 utils.CommaJoin(errs))
3876 class _OsQuery(_QueryBase):
3877 FIELDS = query.OS_FIELDS
3879 def ExpandNames(self, lu):
3880 # Lock all nodes in shared mode
3881 # Temporary removal of locks, should be reverted later
3882 # TODO: reintroduce locks when they are lighter-weight
3883 lu.needed_locks = {}
3884 #self.share_locks[locking.LEVEL_NODE] = 1
3885 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3887 # The following variables interact with _QueryBase._GetNames
3889 self.wanted = self.names
3891 self.wanted = locking.ALL_SET
3893 self.do_locking = self.use_locking
3895 def DeclareLocks(self, lu, level):
3899 def _DiagnoseByOS(rlist):
3900 """Remaps a per-node return list into an a per-os per-node dictionary
3902 @param rlist: a map with node names as keys and OS objects as values
3905 @return: a dictionary with osnames as keys and as value another
3906 map, with nodes as keys and tuples of (path, status, diagnose,
3907 variants, parameters, api_versions) as values, eg::
3909 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3910 (/srv/..., False, "invalid api")],
3911 "node2": [(/srv/..., True, "", [], [])]}
3916 # we build here the list of nodes that didn't fail the RPC (at RPC
3917 # level), so that nodes with a non-responding node daemon don't
3918 # make all OSes invalid
3919 good_nodes = [node_name for node_name in rlist
3920 if not rlist[node_name].fail_msg]
3921 for node_name, nr in rlist.items():
3922 if nr.fail_msg or not nr.payload:
3924 for (name, path, status, diagnose, variants,
3925 params, api_versions) in nr.payload:
3926 if name not in all_os:
3927 # build a list of nodes for this os containing empty lists
3928 # for each node in node_list
3930 for nname in good_nodes:
3931 all_os[name][nname] = []
3932 # convert params from [name, help] to (name, help)
3933 params = [tuple(v) for v in params]
3934 all_os[name][node_name].append((path, status, diagnose,
3935 variants, params, api_versions))
3938 def _GetQueryData(self, lu):
3939 """Computes the list of nodes and their attributes.
3942 # Locking is not used
3943 assert not (compat.any(lu.glm.is_owned(level)
3944 for level in locking.LEVELS
3945 if level != locking.LEVEL_CLUSTER) or
3946 self.do_locking or self.use_locking)
3948 valid_nodes = [node.name
3949 for node in lu.cfg.GetAllNodesInfo().values()
3950 if not node.offline and node.vm_capable]
3951 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3952 cluster = lu.cfg.GetClusterInfo()
3956 for (os_name, os_data) in pol.items():
3957 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3958 hidden=(os_name in cluster.hidden_os),
3959 blacklisted=(os_name in cluster.blacklisted_os))
3963 api_versions = set()
3965 for idx, osl in enumerate(os_data.values()):
3966 info.valid = bool(info.valid and osl and osl[0][1])
3970 (node_variants, node_params, node_api) = osl[0][3:6]
3973 variants.update(node_variants)
3974 parameters.update(node_params)
3975 api_versions.update(node_api)
3977 # Filter out inconsistent values
3978 variants.intersection_update(node_variants)
3979 parameters.intersection_update(node_params)
3980 api_versions.intersection_update(node_api)
3982 info.variants = list(variants)
3983 info.parameters = list(parameters)
3984 info.api_versions = list(api_versions)
3986 data[os_name] = info
3988 # Prepare data in requested order
3989 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3993 class LUOsDiagnose(NoHooksLU):
3994 """Logical unit for OS diagnose/query.
4000 def _BuildFilter(fields, names):
4001 """Builds a filter for querying OSes.
4004 name_filter = qlang.MakeSimpleFilter("name", names)
4006 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4007 # respective field is not requested
4008 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4009 for fname in ["hidden", "blacklisted"]
4010 if fname not in fields]
4011 if "valid" not in fields:
4012 status_filter.append([qlang.OP_TRUE, "valid"])
4015 status_filter.insert(0, qlang.OP_AND)
4017 status_filter = None
4019 if name_filter and status_filter:
4020 return [qlang.OP_AND, name_filter, status_filter]
4024 return status_filter
4026 def CheckArguments(self):
4027 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4028 self.op.output_fields, False)
4030 def ExpandNames(self):
4031 self.oq.ExpandNames(self)
4033 def Exec(self, feedback_fn):
4034 return self.oq.OldStyleQuery(self)
4037 class LUNodeRemove(LogicalUnit):
4038 """Logical unit for removing a node.
4041 HPATH = "node-remove"
4042 HTYPE = constants.HTYPE_NODE
4044 def BuildHooksEnv(self):
4047 This doesn't run on the target node in the pre phase as a failed
4048 node would then be impossible to remove.
4052 "OP_TARGET": self.op.node_name,
4053 "NODE_NAME": self.op.node_name,
4056 def BuildHooksNodes(self):
4057 """Build hooks nodes.
4060 all_nodes = self.cfg.GetNodeList()
4062 all_nodes.remove(self.op.node_name)
4064 logging.warning("Node '%s', which is about to be removed, was not found"
4065 " in the list of all nodes", self.op.node_name)
4066 return (all_nodes, all_nodes)
4068 def CheckPrereq(self):
4069 """Check prerequisites.
4072 - the node exists in the configuration
4073 - it does not have primary or secondary instances
4074 - it's not the master
4076 Any errors are signaled by raising errors.OpPrereqError.
4079 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4080 node = self.cfg.GetNodeInfo(self.op.node_name)
4081 assert node is not None
4083 instance_list = self.cfg.GetInstanceList()
4085 masternode = self.cfg.GetMasterNode()
4086 if node.name == masternode:
4087 raise errors.OpPrereqError("Node is the master node, failover to another"
4088 " node is required", errors.ECODE_INVAL)
4090 for instance_name in instance_list:
4091 instance = self.cfg.GetInstanceInfo(instance_name)
4092 if node.name in instance.all_nodes:
4093 raise errors.OpPrereqError("Instance %s is still running on the node,"
4094 " please remove first" % instance_name,
4096 self.op.node_name = node.name
4099 def Exec(self, feedback_fn):
4100 """Removes the node from the cluster.
4104 logging.info("Stopping the node daemon and removing configs from node %s",
4107 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4109 # Promote nodes to master candidate as needed
4110 _AdjustCandidatePool(self, exceptions=[node.name])
4111 self.context.RemoveNode(node.name)
4113 # Run post hooks on the node before it's removed
4114 _RunPostHook(self, node.name)
4116 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4117 msg = result.fail_msg
4119 self.LogWarning("Errors encountered on the remote node while leaving"
4120 " the cluster: %s", msg)
4122 # Remove node from our /etc/hosts
4123 if self.cfg.GetClusterInfo().modify_etc_hosts:
4124 master_node = self.cfg.GetMasterNode()
4125 result = self.rpc.call_etc_hosts_modify(master_node,
4126 constants.ETC_HOSTS_REMOVE,
4128 result.Raise("Can't update hosts file with new host data")
4129 _RedistributeAncillaryFiles(self)
4132 class _NodeQuery(_QueryBase):
4133 FIELDS = query.NODE_FIELDS
4135 def ExpandNames(self, lu):
4136 lu.needed_locks = {}
4137 lu.share_locks[locking.LEVEL_NODE] = 1
4140 self.wanted = _GetWantedNodes(lu, self.names)
4142 self.wanted = locking.ALL_SET
4144 self.do_locking = (self.use_locking and
4145 query.NQ_LIVE in self.requested_data)
4148 # if we don't request only static fields, we need to lock the nodes
4149 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4151 def DeclareLocks(self, lu, level):
4154 def _GetQueryData(self, lu):
4155 """Computes the list of nodes and their attributes.
4158 all_info = lu.cfg.GetAllNodesInfo()
4160 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4162 # Gather data as requested
4163 if query.NQ_LIVE in self.requested_data:
4164 # filter out non-vm_capable nodes
4165 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4167 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4168 lu.cfg.GetHypervisorType())
4169 live_data = dict((name, nresult.payload)
4170 for (name, nresult) in node_data.items()
4171 if not nresult.fail_msg and nresult.payload)
4175 if query.NQ_INST in self.requested_data:
4176 node_to_primary = dict([(name, set()) for name in nodenames])
4177 node_to_secondary = dict([(name, set()) for name in nodenames])
4179 inst_data = lu.cfg.GetAllInstancesInfo()
4181 for inst in inst_data.values():
4182 if inst.primary_node in node_to_primary:
4183 node_to_primary[inst.primary_node].add(inst.name)
4184 for secnode in inst.secondary_nodes:
4185 if secnode in node_to_secondary:
4186 node_to_secondary[secnode].add(inst.name)
4188 node_to_primary = None
4189 node_to_secondary = None
4191 if query.NQ_OOB in self.requested_data:
4192 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4193 for name, node in all_info.iteritems())
4197 if query.NQ_GROUP in self.requested_data:
4198 groups = lu.cfg.GetAllNodeGroupsInfo()
4202 return query.NodeQueryData([all_info[name] for name in nodenames],
4203 live_data, lu.cfg.GetMasterNode(),
4204 node_to_primary, node_to_secondary, groups,
4205 oob_support, lu.cfg.GetClusterInfo())
4208 class LUNodeQuery(NoHooksLU):
4209 """Logical unit for querying nodes.
4212 # pylint: disable-msg=W0142
4215 def CheckArguments(self):
4216 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4217 self.op.output_fields, self.op.use_locking)
4219 def ExpandNames(self):
4220 self.nq.ExpandNames(self)
4222 def Exec(self, feedback_fn):
4223 return self.nq.OldStyleQuery(self)
4226 class LUNodeQueryvols(NoHooksLU):
4227 """Logical unit for getting volumes on node(s).
4231 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4232 _FIELDS_STATIC = utils.FieldSet("node")
4234 def CheckArguments(self):
4235 _CheckOutputFields(static=self._FIELDS_STATIC,
4236 dynamic=self._FIELDS_DYNAMIC,
4237 selected=self.op.output_fields)
4239 def ExpandNames(self):
4240 self.needed_locks = {}
4241 self.share_locks[locking.LEVEL_NODE] = 1
4242 if not self.op.nodes:
4243 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4245 self.needed_locks[locking.LEVEL_NODE] = \
4246 _GetWantedNodes(self, self.op.nodes)
4248 def Exec(self, feedback_fn):
4249 """Computes the list of nodes and their attributes.
4252 nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4253 volumes = self.rpc.call_node_volumes(nodenames)
4255 ilist = [self.cfg.GetInstanceInfo(iname) for iname
4256 in self.cfg.GetInstanceList()]
4258 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4261 for node in nodenames:
4262 nresult = volumes[node]
4265 msg = nresult.fail_msg
4267 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4270 node_vols = nresult.payload[:]
4271 node_vols.sort(key=lambda vol: vol['dev'])
4273 for vol in node_vols:
4275 for field in self.op.output_fields:
4278 elif field == "phys":
4282 elif field == "name":
4284 elif field == "size":
4285 val = int(float(vol['size']))
4286 elif field == "instance":
4288 if node not in lv_by_node[inst]:
4290 if vol['name'] in lv_by_node[inst][node]:
4296 raise errors.ParameterError(field)
4297 node_output.append(str(val))
4299 output.append(node_output)
4304 class LUNodeQueryStorage(NoHooksLU):
4305 """Logical unit for getting information on storage units on node(s).
4308 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4311 def CheckArguments(self):
4312 _CheckOutputFields(static=self._FIELDS_STATIC,
4313 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4314 selected=self.op.output_fields)
4316 def ExpandNames(self):
4317 self.needed_locks = {}
4318 self.share_locks[locking.LEVEL_NODE] = 1
4321 self.needed_locks[locking.LEVEL_NODE] = \
4322 _GetWantedNodes(self, self.op.nodes)
4324 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4326 def Exec(self, feedback_fn):
4327 """Computes the list of nodes and their attributes.
4330 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4332 # Always get name to sort by
4333 if constants.SF_NAME in self.op.output_fields:
4334 fields = self.op.output_fields[:]
4336 fields = [constants.SF_NAME] + self.op.output_fields
4338 # Never ask for node or type as it's only known to the LU
4339 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4340 while extra in fields:
4341 fields.remove(extra)
4343 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4344 name_idx = field_idx[constants.SF_NAME]
4346 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4347 data = self.rpc.call_storage_list(self.nodes,
4348 self.op.storage_type, st_args,
4349 self.op.name, fields)
4353 for node in utils.NiceSort(self.nodes):
4354 nresult = data[node]
4358 msg = nresult.fail_msg
4360 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4363 rows = dict([(row[name_idx], row) for row in nresult.payload])
4365 for name in utils.NiceSort(rows.keys()):
4370 for field in self.op.output_fields:
4371 if field == constants.SF_NODE:
4373 elif field == constants.SF_TYPE:
4374 val = self.op.storage_type
4375 elif field in field_idx:
4376 val = row[field_idx[field]]
4378 raise errors.ParameterError(field)
4387 class _InstanceQuery(_QueryBase):
4388 FIELDS = query.INSTANCE_FIELDS
4390 def ExpandNames(self, lu):
4391 lu.needed_locks = {}
4392 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4393 lu.share_locks[locking.LEVEL_NODE] = 1
4396 self.wanted = _GetWantedInstances(lu, self.names)
4398 self.wanted = locking.ALL_SET
4400 self.do_locking = (self.use_locking and
4401 query.IQ_LIVE in self.requested_data)
4403 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4404 lu.needed_locks[locking.LEVEL_NODE] = []
4405 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4407 def DeclareLocks(self, lu, level):
4408 if level == locking.LEVEL_NODE and self.do_locking:
4409 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4411 def _GetQueryData(self, lu):
4412 """Computes the list of instances and their attributes.
4415 cluster = lu.cfg.GetClusterInfo()
4416 all_info = lu.cfg.GetAllInstancesInfo()
4418 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4420 instance_list = [all_info[name] for name in instance_names]
4421 nodes = frozenset(itertools.chain(*(inst.all_nodes
4422 for inst in instance_list)))
4423 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4426 wrongnode_inst = set()
4428 # Gather data as requested
4429 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4431 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4433 result = node_data[name]
4435 # offline nodes will be in both lists
4436 assert result.fail_msg
4437 offline_nodes.append(name)
4439 bad_nodes.append(name)
4440 elif result.payload:
4441 for inst in result.payload:
4442 if inst in all_info:
4443 if all_info[inst].primary_node == name:
4444 live_data.update(result.payload)
4446 wrongnode_inst.add(inst)
4448 # orphan instance; we don't list it here as we don't
4449 # handle this case yet in the output of instance listing
4450 logging.warning("Orphan instance '%s' found on node %s",
4452 # else no instance is alive
4456 if query.IQ_DISKUSAGE in self.requested_data:
4457 disk_usage = dict((inst.name,
4458 _ComputeDiskSize(inst.disk_template,
4459 [{constants.IDISK_SIZE: disk.size}
4460 for disk in inst.disks]))
4461 for inst in instance_list)
4465 if query.IQ_CONSOLE in self.requested_data:
4467 for inst in instance_list:
4468 if inst.name in live_data:
4469 # Instance is running
4470 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4472 consinfo[inst.name] = None
4473 assert set(consinfo.keys()) == set(instance_names)
4477 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4478 disk_usage, offline_nodes, bad_nodes,
4479 live_data, wrongnode_inst, consinfo)
4482 class LUQuery(NoHooksLU):
4483 """Query for resources/items of a certain kind.
4486 # pylint: disable-msg=W0142
4489 def CheckArguments(self):
4490 qcls = _GetQueryImplementation(self.op.what)
4492 self.impl = qcls(self.op.filter, self.op.fields, False)
4494 def ExpandNames(self):
4495 self.impl.ExpandNames(self)
4497 def DeclareLocks(self, level):
4498 self.impl.DeclareLocks(self, level)
4500 def Exec(self, feedback_fn):
4501 return self.impl.NewStyleQuery(self)
4504 class LUQueryFields(NoHooksLU):
4505 """Query for resources/items of a certain kind.
4508 # pylint: disable-msg=W0142
4511 def CheckArguments(self):
4512 self.qcls = _GetQueryImplementation(self.op.what)
4514 def ExpandNames(self):
4515 self.needed_locks = {}
4517 def Exec(self, feedback_fn):
4518 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4521 class LUNodeModifyStorage(NoHooksLU):
4522 """Logical unit for modifying a storage volume on a node.
4527 def CheckArguments(self):
4528 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4530 storage_type = self.op.storage_type
4533 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4535 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4536 " modified" % storage_type,
4539 diff = set(self.op.changes.keys()) - modifiable
4541 raise errors.OpPrereqError("The following fields can not be modified for"
4542 " storage units of type '%s': %r" %
4543 (storage_type, list(diff)),
4546 def ExpandNames(self):
4547 self.needed_locks = {
4548 locking.LEVEL_NODE: self.op.node_name,
4551 def Exec(self, feedback_fn):
4552 """Computes the list of nodes and their attributes.
4555 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4556 result = self.rpc.call_storage_modify(self.op.node_name,
4557 self.op.storage_type, st_args,
4558 self.op.name, self.op.changes)
4559 result.Raise("Failed to modify storage unit '%s' on %s" %
4560 (self.op.name, self.op.node_name))
4563 class LUNodeAdd(LogicalUnit):
4564 """Logical unit for adding node to the cluster.
4568 HTYPE = constants.HTYPE_NODE
4569 _NFLAGS = ["master_capable", "vm_capable"]
4571 def CheckArguments(self):
4572 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4573 # validate/normalize the node name
4574 self.hostname = netutils.GetHostname(name=self.op.node_name,
4575 family=self.primary_ip_family)
4576 self.op.node_name = self.hostname.name
4578 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4579 raise errors.OpPrereqError("Cannot readd the master node",
4582 if self.op.readd and self.op.group:
4583 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4584 " being readded", errors.ECODE_INVAL)
4586 def BuildHooksEnv(self):
4589 This will run on all nodes before, and on all nodes + the new node after.
4593 "OP_TARGET": self.op.node_name,
4594 "NODE_NAME": self.op.node_name,
4595 "NODE_PIP": self.op.primary_ip,
4596 "NODE_SIP": self.op.secondary_ip,
4597 "MASTER_CAPABLE": str(self.op.master_capable),
4598 "VM_CAPABLE": str(self.op.vm_capable),
4601 def BuildHooksNodes(self):
4602 """Build hooks nodes.
4605 # Exclude added node
4606 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4607 post_nodes = pre_nodes + [self.op.node_name, ]
4609 return (pre_nodes, post_nodes)
4611 def CheckPrereq(self):
4612 """Check prerequisites.
4615 - the new node is not already in the config
4617 - its parameters (single/dual homed) matches the cluster
4619 Any errors are signaled by raising errors.OpPrereqError.
4623 hostname = self.hostname
4624 node = hostname.name
4625 primary_ip = self.op.primary_ip = hostname.ip
4626 if self.op.secondary_ip is None:
4627 if self.primary_ip_family == netutils.IP6Address.family:
4628 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4629 " IPv4 address must be given as secondary",
4631 self.op.secondary_ip = primary_ip
4633 secondary_ip = self.op.secondary_ip
4634 if not netutils.IP4Address.IsValid(secondary_ip):
4635 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4636 " address" % secondary_ip, errors.ECODE_INVAL)
4638 node_list = cfg.GetNodeList()
4639 if not self.op.readd and node in node_list:
4640 raise errors.OpPrereqError("Node %s is already in the configuration" %
4641 node, errors.ECODE_EXISTS)
4642 elif self.op.readd and node not in node_list:
4643 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4646 self.changed_primary_ip = False
4648 for existing_node_name in node_list:
4649 existing_node = cfg.GetNodeInfo(existing_node_name)
4651 if self.op.readd and node == existing_node_name:
4652 if existing_node.secondary_ip != secondary_ip:
4653 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4654 " address configuration as before",
4656 if existing_node.primary_ip != primary_ip:
4657 self.changed_primary_ip = True
4661 if (existing_node.primary_ip == primary_ip or
4662 existing_node.secondary_ip == primary_ip or
4663 existing_node.primary_ip == secondary_ip or
4664 existing_node.secondary_ip == secondary_ip):
4665 raise errors.OpPrereqError("New node ip address(es) conflict with"
4666 " existing node %s" % existing_node.name,
4667 errors.ECODE_NOTUNIQUE)
4669 # After this 'if' block, None is no longer a valid value for the
4670 # _capable op attributes
4672 old_node = self.cfg.GetNodeInfo(node)
4673 assert old_node is not None, "Can't retrieve locked node %s" % node
4674 for attr in self._NFLAGS:
4675 if getattr(self.op, attr) is None:
4676 setattr(self.op, attr, getattr(old_node, attr))
4678 for attr in self._NFLAGS:
4679 if getattr(self.op, attr) is None:
4680 setattr(self.op, attr, True)
4682 if self.op.readd and not self.op.vm_capable:
4683 pri, sec = cfg.GetNodeInstances(node)
4685 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4686 " flag set to false, but it already holds"
4687 " instances" % node,
4690 # check that the type of the node (single versus dual homed) is the
4691 # same as for the master
4692 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4693 master_singlehomed = myself.secondary_ip == myself.primary_ip
4694 newbie_singlehomed = secondary_ip == primary_ip
4695 if master_singlehomed != newbie_singlehomed:
4696 if master_singlehomed:
4697 raise errors.OpPrereqError("The master has no secondary ip but the"
4698 " new node has one",
4701 raise errors.OpPrereqError("The master has a secondary ip but the"
4702 " new node doesn't have one",
4705 # checks reachability
4706 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4707 raise errors.OpPrereqError("Node not reachable by ping",
4708 errors.ECODE_ENVIRON)
4710 if not newbie_singlehomed:
4711 # check reachability from my secondary ip to newbie's secondary ip
4712 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4713 source=myself.secondary_ip):
4714 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4715 " based ping to node daemon port",
4716 errors.ECODE_ENVIRON)
4723 if self.op.master_capable:
4724 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4726 self.master_candidate = False
4729 self.new_node = old_node
4731 node_group = cfg.LookupNodeGroup(self.op.group)
4732 self.new_node = objects.Node(name=node,
4733 primary_ip=primary_ip,
4734 secondary_ip=secondary_ip,
4735 master_candidate=self.master_candidate,
4736 offline=False, drained=False,
4739 if self.op.ndparams:
4740 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4742 def Exec(self, feedback_fn):
4743 """Adds the new node to the cluster.
4746 new_node = self.new_node
4747 node = new_node.name
4749 # We adding a new node so we assume it's powered
4750 new_node.powered = True
4752 # for re-adds, reset the offline/drained/master-candidate flags;
4753 # we need to reset here, otherwise offline would prevent RPC calls
4754 # later in the procedure; this also means that if the re-add
4755 # fails, we are left with a non-offlined, broken node
4757 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4758 self.LogInfo("Readding a node, the offline/drained flags were reset")
4759 # if we demote the node, we do cleanup later in the procedure
4760 new_node.master_candidate = self.master_candidate
4761 if self.changed_primary_ip:
4762 new_node.primary_ip = self.op.primary_ip
4764 # copy the master/vm_capable flags
4765 for attr in self._NFLAGS:
4766 setattr(new_node, attr, getattr(self.op, attr))
4768 # notify the user about any possible mc promotion
4769 if new_node.master_candidate:
4770 self.LogInfo("Node will be a master candidate")
4772 if self.op.ndparams:
4773 new_node.ndparams = self.op.ndparams
4775 new_node.ndparams = {}
4777 # check connectivity
4778 result = self.rpc.call_version([node])[node]
4779 result.Raise("Can't get version information from node %s" % node)
4780 if constants.PROTOCOL_VERSION == result.payload:
4781 logging.info("Communication to node %s fine, sw version %s match",
4782 node, result.payload)
4784 raise errors.OpExecError("Version mismatch master version %s,"
4785 " node version %s" %
4786 (constants.PROTOCOL_VERSION, result.payload))
4788 # Add node to our /etc/hosts, and add key to known_hosts
4789 if self.cfg.GetClusterInfo().modify_etc_hosts:
4790 master_node = self.cfg.GetMasterNode()
4791 result = self.rpc.call_etc_hosts_modify(master_node,
4792 constants.ETC_HOSTS_ADD,
4795 result.Raise("Can't update hosts file with new host data")
4797 if new_node.secondary_ip != new_node.primary_ip:
4798 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4801 node_verify_list = [self.cfg.GetMasterNode()]
4802 node_verify_param = {
4803 constants.NV_NODELIST: [node],
4804 # TODO: do a node-net-test as well?
4807 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4808 self.cfg.GetClusterName())
4809 for verifier in node_verify_list:
4810 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4811 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4813 for failed in nl_payload:
4814 feedback_fn("ssh/hostname verification failed"
4815 " (checking from %s): %s" %
4816 (verifier, nl_payload[failed]))
4817 raise errors.OpExecError("ssh/hostname verification failed")
4820 _RedistributeAncillaryFiles(self)
4821 self.context.ReaddNode(new_node)
4822 # make sure we redistribute the config
4823 self.cfg.Update(new_node, feedback_fn)
4824 # and make sure the new node will not have old files around
4825 if not new_node.master_candidate:
4826 result = self.rpc.call_node_demote_from_mc(new_node.name)
4827 msg = result.fail_msg
4829 self.LogWarning("Node failed to demote itself from master"
4830 " candidate status: %s" % msg)
4832 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4833 additional_vm=self.op.vm_capable)
4834 self.context.AddNode(new_node, self.proc.GetECId())
4837 class LUNodeSetParams(LogicalUnit):
4838 """Modifies the parameters of a node.
4840 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4841 to the node role (as _ROLE_*)
4842 @cvar _R2F: a dictionary from node role to tuples of flags
4843 @cvar _FLAGS: a list of attribute names corresponding to the flags
4846 HPATH = "node-modify"
4847 HTYPE = constants.HTYPE_NODE
4849 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4851 (True, False, False): _ROLE_CANDIDATE,
4852 (False, True, False): _ROLE_DRAINED,
4853 (False, False, True): _ROLE_OFFLINE,
4854 (False, False, False): _ROLE_REGULAR,
4856 _R2F = dict((v, k) for k, v in _F2R.items())
4857 _FLAGS = ["master_candidate", "drained", "offline"]
4859 def CheckArguments(self):
4860 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4861 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4862 self.op.master_capable, self.op.vm_capable,
4863 self.op.secondary_ip, self.op.ndparams]
4864 if all_mods.count(None) == len(all_mods):
4865 raise errors.OpPrereqError("Please pass at least one modification",
4867 if all_mods.count(True) > 1:
4868 raise errors.OpPrereqError("Can't set the node into more than one"
4869 " state at the same time",
4872 # Boolean value that tells us whether we might be demoting from MC
4873 self.might_demote = (self.op.master_candidate == False or
4874 self.op.offline == True or
4875 self.op.drained == True or
4876 self.op.master_capable == False)
4878 if self.op.secondary_ip:
4879 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4880 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4881 " address" % self.op.secondary_ip,
4884 self.lock_all = self.op.auto_promote and self.might_demote
4885 self.lock_instances = self.op.secondary_ip is not None
4887 def ExpandNames(self):
4889 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4891 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4893 if self.lock_instances:
4894 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4896 def DeclareLocks(self, level):
4897 # If we have locked all instances, before waiting to lock nodes, release
4898 # all the ones living on nodes unrelated to the current operation.
4899 if level == locking.LEVEL_NODE and self.lock_instances:
4900 self.affected_instances = []
4901 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4904 # Build list of instances to release
4905 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4906 instance = self.context.cfg.GetInstanceInfo(instance_name)
4907 if (instance.disk_template in constants.DTS_INT_MIRROR and
4908 self.op.node_name in instance.all_nodes):
4909 instances_keep.append(instance_name)
4910 self.affected_instances.append(instance)
4912 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4914 assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4915 set(instances_keep))
4917 def BuildHooksEnv(self):
4920 This runs on the master node.
4924 "OP_TARGET": self.op.node_name,
4925 "MASTER_CANDIDATE": str(self.op.master_candidate),
4926 "OFFLINE": str(self.op.offline),
4927 "DRAINED": str(self.op.drained),
4928 "MASTER_CAPABLE": str(self.op.master_capable),
4929 "VM_CAPABLE": str(self.op.vm_capable),
4932 def BuildHooksNodes(self):
4933 """Build hooks nodes.
4936 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4939 def CheckPrereq(self):
4940 """Check prerequisites.
4942 This only checks the instance list against the existing names.
4945 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4947 if (self.op.master_candidate is not None or
4948 self.op.drained is not None or
4949 self.op.offline is not None):
4950 # we can't change the master's node flags
4951 if self.op.node_name == self.cfg.GetMasterNode():
4952 raise errors.OpPrereqError("The master role can be changed"
4953 " only via master-failover",
4956 if self.op.master_candidate and not node.master_capable:
4957 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4958 " it a master candidate" % node.name,
4961 if self.op.vm_capable == False:
4962 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4964 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4965 " the vm_capable flag" % node.name,
4968 if node.master_candidate and self.might_demote and not self.lock_all:
4969 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4970 # check if after removing the current node, we're missing master
4972 (mc_remaining, mc_should, _) = \
4973 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4974 if mc_remaining < mc_should:
4975 raise errors.OpPrereqError("Not enough master candidates, please"
4976 " pass auto promote option to allow"
4977 " promotion", errors.ECODE_STATE)
4979 self.old_flags = old_flags = (node.master_candidate,
4980 node.drained, node.offline)
4981 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4982 self.old_role = old_role = self._F2R[old_flags]
4984 # Check for ineffective changes
4985 for attr in self._FLAGS:
4986 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4987 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4988 setattr(self.op, attr, None)
4990 # Past this point, any flag change to False means a transition
4991 # away from the respective state, as only real changes are kept
4993 # TODO: We might query the real power state if it supports OOB
4994 if _SupportsOob(self.cfg, node):
4995 if self.op.offline is False and not (node.powered or
4996 self.op.powered == True):
4997 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4998 " offline status can be reset") %
5000 elif self.op.powered is not None:
5001 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5002 " as it does not support out-of-band"
5003 " handling") % self.op.node_name)
5005 # If we're being deofflined/drained, we'll MC ourself if needed
5006 if (self.op.drained == False or self.op.offline == False or
5007 (self.op.master_capable and not node.master_capable)):
5008 if _DecideSelfPromotion(self):
5009 self.op.master_candidate = True
5010 self.LogInfo("Auto-promoting node to master candidate")
5012 # If we're no longer master capable, we'll demote ourselves from MC
5013 if self.op.master_capable == False and node.master_candidate:
5014 self.LogInfo("Demoting from master candidate")
5015 self.op.master_candidate = False
5018 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5019 if self.op.master_candidate:
5020 new_role = self._ROLE_CANDIDATE
5021 elif self.op.drained:
5022 new_role = self._ROLE_DRAINED
5023 elif self.op.offline:
5024 new_role = self._ROLE_OFFLINE
5025 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5026 # False is still in new flags, which means we're un-setting (the
5028 new_role = self._ROLE_REGULAR
5029 else: # no new flags, nothing, keep old role
5032 self.new_role = new_role
5034 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5035 # Trying to transition out of offline status
5036 result = self.rpc.call_version([node.name])[node.name]
5038 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5039 " to report its version: %s" %
5040 (node.name, result.fail_msg),
5043 self.LogWarning("Transitioning node from offline to online state"
5044 " without using re-add. Please make sure the node"
5047 if self.op.secondary_ip:
5048 # Ok even without locking, because this can't be changed by any LU
5049 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5050 master_singlehomed = master.secondary_ip == master.primary_ip
5051 if master_singlehomed and self.op.secondary_ip:
5052 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5053 " homed cluster", errors.ECODE_INVAL)
5056 if self.affected_instances:
5057 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5058 " node has instances (%s) configured"
5059 " to use it" % self.affected_instances)
5061 # On online nodes, check that no instances are running, and that
5062 # the node has the new ip and we can reach it.
5063 for instance in self.affected_instances:
5064 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5066 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5067 if master.name != node.name:
5068 # check reachability from master secondary ip to new secondary ip
5069 if not netutils.TcpPing(self.op.secondary_ip,
5070 constants.DEFAULT_NODED_PORT,
5071 source=master.secondary_ip):
5072 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5073 " based ping to node daemon port",
5074 errors.ECODE_ENVIRON)
5076 if self.op.ndparams:
5077 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5078 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5079 self.new_ndparams = new_ndparams
5081 def Exec(self, feedback_fn):
5086 old_role = self.old_role
5087 new_role = self.new_role
5091 if self.op.ndparams:
5092 node.ndparams = self.new_ndparams
5094 if self.op.powered is not None:
5095 node.powered = self.op.powered
5097 for attr in ["master_capable", "vm_capable"]:
5098 val = getattr(self.op, attr)
5100 setattr(node, attr, val)
5101 result.append((attr, str(val)))
5103 if new_role != old_role:
5104 # Tell the node to demote itself, if no longer MC and not offline
5105 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5106 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5108 self.LogWarning("Node failed to demote itself: %s", msg)
5110 new_flags = self._R2F[new_role]
5111 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5113 result.append((desc, str(nf)))
5114 (node.master_candidate, node.drained, node.offline) = new_flags
5116 # we locked all nodes, we adjust the CP before updating this node
5118 _AdjustCandidatePool(self, [node.name])
5120 if self.op.secondary_ip:
5121 node.secondary_ip = self.op.secondary_ip
5122 result.append(("secondary_ip", self.op.secondary_ip))
5124 # this will trigger configuration file update, if needed
5125 self.cfg.Update(node, feedback_fn)
5127 # this will trigger job queue propagation or cleanup if the mc
5129 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5130 self.context.ReaddNode(node)
5135 class LUNodePowercycle(NoHooksLU):
5136 """Powercycles a node.
5141 def CheckArguments(self):
5142 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5143 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5144 raise errors.OpPrereqError("The node is the master and the force"
5145 " parameter was not set",
5148 def ExpandNames(self):
5149 """Locking for PowercycleNode.
5151 This is a last-resort option and shouldn't block on other
5152 jobs. Therefore, we grab no locks.
5155 self.needed_locks = {}
5157 def Exec(self, feedback_fn):
5161 result = self.rpc.call_node_powercycle(self.op.node_name,
5162 self.cfg.GetHypervisorType())
5163 result.Raise("Failed to schedule the reboot")
5164 return result.payload
5167 class LUClusterQuery(NoHooksLU):
5168 """Query cluster configuration.
5173 def ExpandNames(self):
5174 self.needed_locks = {}
5176 def Exec(self, feedback_fn):
5177 """Return cluster config.
5180 cluster = self.cfg.GetClusterInfo()
5183 # Filter just for enabled hypervisors
5184 for os_name, hv_dict in cluster.os_hvp.items():
5185 os_hvp[os_name] = {}
5186 for hv_name, hv_params in hv_dict.items():
5187 if hv_name in cluster.enabled_hypervisors:
5188 os_hvp[os_name][hv_name] = hv_params
5190 # Convert ip_family to ip_version
5191 primary_ip_version = constants.IP4_VERSION
5192 if cluster.primary_ip_family == netutils.IP6Address.family:
5193 primary_ip_version = constants.IP6_VERSION
5196 "software_version": constants.RELEASE_VERSION,
5197 "protocol_version": constants.PROTOCOL_VERSION,
5198 "config_version": constants.CONFIG_VERSION,
5199 "os_api_version": max(constants.OS_API_VERSIONS),
5200 "export_version": constants.EXPORT_VERSION,
5201 "architecture": (platform.architecture()[0], platform.machine()),
5202 "name": cluster.cluster_name,
5203 "master": cluster.master_node,
5204 "default_hypervisor": cluster.enabled_hypervisors[0],
5205 "enabled_hypervisors": cluster.enabled_hypervisors,
5206 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5207 for hypervisor_name in cluster.enabled_hypervisors]),
5209 "beparams": cluster.beparams,
5210 "osparams": cluster.osparams,
5211 "nicparams": cluster.nicparams,
5212 "ndparams": cluster.ndparams,
5213 "candidate_pool_size": cluster.candidate_pool_size,
5214 "master_netdev": cluster.master_netdev,
5215 "volume_group_name": cluster.volume_group_name,
5216 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5217 "file_storage_dir": cluster.file_storage_dir,
5218 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5219 "maintain_node_health": cluster.maintain_node_health,
5220 "ctime": cluster.ctime,
5221 "mtime": cluster.mtime,
5222 "uuid": cluster.uuid,
5223 "tags": list(cluster.GetTags()),
5224 "uid_pool": cluster.uid_pool,
5225 "default_iallocator": cluster.default_iallocator,
5226 "reserved_lvs": cluster.reserved_lvs,
5227 "primary_ip_version": primary_ip_version,
5228 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5229 "hidden_os": cluster.hidden_os,
5230 "blacklisted_os": cluster.blacklisted_os,
5236 class LUClusterConfigQuery(NoHooksLU):
5237 """Return configuration values.
5241 _FIELDS_DYNAMIC = utils.FieldSet()
5242 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5243 "watcher_pause", "volume_group_name")
5245 def CheckArguments(self):
5246 _CheckOutputFields(static=self._FIELDS_STATIC,
5247 dynamic=self._FIELDS_DYNAMIC,
5248 selected=self.op.output_fields)
5250 def ExpandNames(self):
5251 self.needed_locks = {}
5253 def Exec(self, feedback_fn):
5254 """Dump a representation of the cluster config to the standard output.
5258 for field in self.op.output_fields:
5259 if field == "cluster_name":
5260 entry = self.cfg.GetClusterName()
5261 elif field == "master_node":
5262 entry = self.cfg.GetMasterNode()
5263 elif field == "drain_flag":
5264 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5265 elif field == "watcher_pause":
5266 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5267 elif field == "volume_group_name":
5268 entry = self.cfg.GetVGName()
5270 raise errors.ParameterError(field)
5271 values.append(entry)
5275 class LUInstanceActivateDisks(NoHooksLU):
5276 """Bring up an instance's disks.
5281 def ExpandNames(self):
5282 self._ExpandAndLockInstance()
5283 self.needed_locks[locking.LEVEL_NODE] = []
5284 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5286 def DeclareLocks(self, level):
5287 if level == locking.LEVEL_NODE:
5288 self._LockInstancesNodes()
5290 def CheckPrereq(self):
5291 """Check prerequisites.
5293 This checks that the instance is in the cluster.
5296 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5297 assert self.instance is not None, \
5298 "Cannot retrieve locked instance %s" % self.op.instance_name
5299 _CheckNodeOnline(self, self.instance.primary_node)
5301 def Exec(self, feedback_fn):
5302 """Activate the disks.
5305 disks_ok, disks_info = \
5306 _AssembleInstanceDisks(self, self.instance,
5307 ignore_size=self.op.ignore_size)
5309 raise errors.OpExecError("Cannot activate block devices")
5314 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5316 """Prepare the block devices for an instance.
5318 This sets up the block devices on all nodes.
5320 @type lu: L{LogicalUnit}
5321 @param lu: the logical unit on whose behalf we execute
5322 @type instance: L{objects.Instance}
5323 @param instance: the instance for whose disks we assemble
5324 @type disks: list of L{objects.Disk} or None
5325 @param disks: which disks to assemble (or all, if None)
5326 @type ignore_secondaries: boolean
5327 @param ignore_secondaries: if true, errors on secondary nodes
5328 won't result in an error return from the function
5329 @type ignore_size: boolean
5330 @param ignore_size: if true, the current known size of the disk
5331 will not be used during the disk activation, useful for cases
5332 when the size is wrong
5333 @return: False if the operation failed, otherwise a list of
5334 (host, instance_visible_name, node_visible_name)
5335 with the mapping from node devices to instance devices
5340 iname = instance.name
5341 disks = _ExpandCheckDisks(instance, disks)
5343 # With the two passes mechanism we try to reduce the window of
5344 # opportunity for the race condition of switching DRBD to primary
5345 # before handshaking occured, but we do not eliminate it
5347 # The proper fix would be to wait (with some limits) until the
5348 # connection has been made and drbd transitions from WFConnection
5349 # into any other network-connected state (Connected, SyncTarget,
5352 # 1st pass, assemble on all nodes in secondary mode
5353 for idx, inst_disk in enumerate(disks):
5354 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5356 node_disk = node_disk.Copy()
5357 node_disk.UnsetSize()
5358 lu.cfg.SetDiskID(node_disk, node)
5359 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5360 msg = result.fail_msg
5362 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5363 " (is_primary=False, pass=1): %s",
5364 inst_disk.iv_name, node, msg)
5365 if not ignore_secondaries:
5368 # FIXME: race condition on drbd migration to primary
5370 # 2nd pass, do only the primary node
5371 for idx, inst_disk in enumerate(disks):
5374 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5375 if node != instance.primary_node:
5378 node_disk = node_disk.Copy()
5379 node_disk.UnsetSize()
5380 lu.cfg.SetDiskID(node_disk, node)
5381 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5382 msg = result.fail_msg
5384 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5385 " (is_primary=True, pass=2): %s",
5386 inst_disk.iv_name, node, msg)
5389 dev_path = result.payload
5391 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5393 # leave the disks configured for the primary node
5394 # this is a workaround that would be fixed better by
5395 # improving the logical/physical id handling
5397 lu.cfg.SetDiskID(disk, instance.primary_node)
5399 return disks_ok, device_info
5402 def _StartInstanceDisks(lu, instance, force):
5403 """Start the disks of an instance.
5406 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5407 ignore_secondaries=force)
5409 _ShutdownInstanceDisks(lu, instance)
5410 if force is not None and not force:
5411 lu.proc.LogWarning("", hint="If the message above refers to a"
5413 " you can retry the operation using '--force'.")
5414 raise errors.OpExecError("Disk consistency error")
5417 class LUInstanceDeactivateDisks(NoHooksLU):
5418 """Shutdown an instance's disks.
5423 def ExpandNames(self):
5424 self._ExpandAndLockInstance()
5425 self.needed_locks[locking.LEVEL_NODE] = []
5426 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5428 def DeclareLocks(self, level):
5429 if level == locking.LEVEL_NODE:
5430 self._LockInstancesNodes()
5432 def CheckPrereq(self):
5433 """Check prerequisites.
5435 This checks that the instance is in the cluster.
5438 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5439 assert self.instance is not None, \
5440 "Cannot retrieve locked instance %s" % self.op.instance_name
5442 def Exec(self, feedback_fn):
5443 """Deactivate the disks
5446 instance = self.instance
5448 _ShutdownInstanceDisks(self, instance)
5450 _SafeShutdownInstanceDisks(self, instance)
5453 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5454 """Shutdown block devices of an instance.
5456 This function checks if an instance is running, before calling
5457 _ShutdownInstanceDisks.
5460 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5461 _ShutdownInstanceDisks(lu, instance, disks=disks)
5464 def _ExpandCheckDisks(instance, disks):
5465 """Return the instance disks selected by the disks list
5467 @type disks: list of L{objects.Disk} or None
5468 @param disks: selected disks
5469 @rtype: list of L{objects.Disk}
5470 @return: selected instance disks to act on
5474 return instance.disks
5476 if not set(disks).issubset(instance.disks):
5477 raise errors.ProgrammerError("Can only act on disks belonging to the"
5482 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5483 """Shutdown block devices of an instance.
5485 This does the shutdown on all nodes of the instance.
5487 If the ignore_primary is false, errors on the primary node are
5492 disks = _ExpandCheckDisks(instance, disks)
5495 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5496 lu.cfg.SetDiskID(top_disk, node)
5497 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5498 msg = result.fail_msg
5500 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5501 disk.iv_name, node, msg)
5502 if ((node == instance.primary_node and not ignore_primary) or
5503 (node != instance.primary_node and not result.offline)):
5508 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5509 """Checks if a node has enough free memory.
5511 This function check if a given node has the needed amount of free
5512 memory. In case the node has less memory or we cannot get the
5513 information from the node, this function raise an OpPrereqError
5516 @type lu: C{LogicalUnit}
5517 @param lu: a logical unit from which we get configuration data
5519 @param node: the node to check
5520 @type reason: C{str}
5521 @param reason: string to use in the error message
5522 @type requested: C{int}
5523 @param requested: the amount of memory in MiB to check for
5524 @type hypervisor_name: C{str}
5525 @param hypervisor_name: the hypervisor to ask for memory stats
5526 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5527 we cannot check the node
5530 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5531 nodeinfo[node].Raise("Can't get data from node %s" % node,
5532 prereq=True, ecode=errors.ECODE_ENVIRON)
5533 free_mem = nodeinfo[node].payload.get('memory_free', None)
5534 if not isinstance(free_mem, int):
5535 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5536 " was '%s'" % (node, free_mem),
5537 errors.ECODE_ENVIRON)
5538 if requested > free_mem:
5539 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5540 " needed %s MiB, available %s MiB" %
5541 (node, reason, requested, free_mem),
5545 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5546 """Checks if nodes have enough free disk space in the all VGs.
5548 This function check if all given nodes have the needed amount of
5549 free disk. In case any node has less disk or we cannot get the
5550 information from the node, this function raise an OpPrereqError
5553 @type lu: C{LogicalUnit}
5554 @param lu: a logical unit from which we get configuration data
5555 @type nodenames: C{list}
5556 @param nodenames: the list of node names to check
5557 @type req_sizes: C{dict}
5558 @param req_sizes: the hash of vg and corresponding amount of disk in
5560 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5561 or we cannot check the node
5564 for vg, req_size in req_sizes.items():
5565 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5568 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5569 """Checks if nodes have enough free disk space in the specified VG.
5571 This function check if all given nodes have the needed amount of
5572 free disk. In case any node has less disk or we cannot get the
5573 information from the node, this function raise an OpPrereqError
5576 @type lu: C{LogicalUnit}
5577 @param lu: a logical unit from which we get configuration data
5578 @type nodenames: C{list}
5579 @param nodenames: the list of node names to check
5581 @param vg: the volume group to check
5582 @type requested: C{int}
5583 @param requested: the amount of disk in MiB to check for
5584 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5585 or we cannot check the node
5588 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5589 for node in nodenames:
5590 info = nodeinfo[node]
5591 info.Raise("Cannot get current information from node %s" % node,
5592 prereq=True, ecode=errors.ECODE_ENVIRON)
5593 vg_free = info.payload.get("vg_free", None)
5594 if not isinstance(vg_free, int):
5595 raise errors.OpPrereqError("Can't compute free disk space on node"
5596 " %s for vg %s, result was '%s'" %
5597 (node, vg, vg_free), errors.ECODE_ENVIRON)
5598 if requested > vg_free:
5599 raise errors.OpPrereqError("Not enough disk space on target node %s"
5600 " vg %s: required %d MiB, available %d MiB" %
5601 (node, vg, requested, vg_free),
5605 class LUInstanceStartup(LogicalUnit):
5606 """Starts an instance.
5609 HPATH = "instance-start"
5610 HTYPE = constants.HTYPE_INSTANCE
5613 def CheckArguments(self):
5615 if self.op.beparams:
5616 # fill the beparams dict
5617 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5619 def ExpandNames(self):
5620 self._ExpandAndLockInstance()
5622 def BuildHooksEnv(self):
5625 This runs on master, primary and secondary nodes of the instance.
5629 "FORCE": self.op.force,
5632 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5636 def BuildHooksNodes(self):
5637 """Build hooks nodes.
5640 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5643 def CheckPrereq(self):
5644 """Check prerequisites.
5646 This checks that the instance is in the cluster.
5649 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5650 assert self.instance is not None, \
5651 "Cannot retrieve locked instance %s" % self.op.instance_name
5654 if self.op.hvparams:
5655 # check hypervisor parameter syntax (locally)
5656 cluster = self.cfg.GetClusterInfo()
5657 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5658 filled_hvp = cluster.FillHV(instance)
5659 filled_hvp.update(self.op.hvparams)
5660 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5661 hv_type.CheckParameterSyntax(filled_hvp)
5662 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5664 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5666 if self.primary_offline and self.op.ignore_offline_nodes:
5667 self.proc.LogWarning("Ignoring offline primary node")
5669 if self.op.hvparams or self.op.beparams:
5670 self.proc.LogWarning("Overridden parameters are ignored")
5672 _CheckNodeOnline(self, instance.primary_node)
5674 bep = self.cfg.GetClusterInfo().FillBE(instance)
5676 # check bridges existence
5677 _CheckInstanceBridgesExist(self, instance)
5679 remote_info = self.rpc.call_instance_info(instance.primary_node,
5681 instance.hypervisor)
5682 remote_info.Raise("Error checking node %s" % instance.primary_node,
5683 prereq=True, ecode=errors.ECODE_ENVIRON)
5684 if not remote_info.payload: # not running already
5685 _CheckNodeFreeMemory(self, instance.primary_node,
5686 "starting instance %s" % instance.name,
5687 bep[constants.BE_MEMORY], instance.hypervisor)
5689 def Exec(self, feedback_fn):
5690 """Start the instance.
5693 instance = self.instance
5694 force = self.op.force
5696 if not self.op.no_remember:
5697 self.cfg.MarkInstanceUp(instance.name)
5699 if self.primary_offline:
5700 assert self.op.ignore_offline_nodes
5701 self.proc.LogInfo("Primary node offline, marked instance as started")
5703 node_current = instance.primary_node
5705 _StartInstanceDisks(self, instance, force)
5707 result = self.rpc.call_instance_start(node_current, instance,
5708 self.op.hvparams, self.op.beparams,
5709 self.op.startup_paused)
5710 msg = result.fail_msg
5712 _ShutdownInstanceDisks(self, instance)
5713 raise errors.OpExecError("Could not start instance: %s" % msg)
5716 class LUInstanceReboot(LogicalUnit):
5717 """Reboot an instance.
5720 HPATH = "instance-reboot"
5721 HTYPE = constants.HTYPE_INSTANCE
5724 def ExpandNames(self):
5725 self._ExpandAndLockInstance()
5727 def BuildHooksEnv(self):
5730 This runs on master, primary and secondary nodes of the instance.
5734 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5735 "REBOOT_TYPE": self.op.reboot_type,
5736 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5739 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5743 def BuildHooksNodes(self):
5744 """Build hooks nodes.
5747 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5750 def CheckPrereq(self):
5751 """Check prerequisites.
5753 This checks that the instance is in the cluster.
5756 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5757 assert self.instance is not None, \
5758 "Cannot retrieve locked instance %s" % self.op.instance_name
5760 _CheckNodeOnline(self, instance.primary_node)
5762 # check bridges existence
5763 _CheckInstanceBridgesExist(self, instance)
5765 def Exec(self, feedback_fn):
5766 """Reboot the instance.
5769 instance = self.instance
5770 ignore_secondaries = self.op.ignore_secondaries
5771 reboot_type = self.op.reboot_type
5773 remote_info = self.rpc.call_instance_info(instance.primary_node,
5775 instance.hypervisor)
5776 remote_info.Raise("Error checking node %s" % instance.primary_node)
5777 instance_running = bool(remote_info.payload)
5779 node_current = instance.primary_node
5781 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5782 constants.INSTANCE_REBOOT_HARD]:
5783 for disk in instance.disks:
5784 self.cfg.SetDiskID(disk, node_current)
5785 result = self.rpc.call_instance_reboot(node_current, instance,
5787 self.op.shutdown_timeout)
5788 result.Raise("Could not reboot instance")
5790 if instance_running:
5791 result = self.rpc.call_instance_shutdown(node_current, instance,
5792 self.op.shutdown_timeout)
5793 result.Raise("Could not shutdown instance for full reboot")
5794 _ShutdownInstanceDisks(self, instance)
5796 self.LogInfo("Instance %s was already stopped, starting now",
5798 _StartInstanceDisks(self, instance, ignore_secondaries)
5799 result = self.rpc.call_instance_start(node_current, instance,
5801 msg = result.fail_msg
5803 _ShutdownInstanceDisks(self, instance)
5804 raise errors.OpExecError("Could not start instance for"
5805 " full reboot: %s" % msg)
5807 self.cfg.MarkInstanceUp(instance.name)
5810 class LUInstanceShutdown(LogicalUnit):
5811 """Shutdown an instance.
5814 HPATH = "instance-stop"
5815 HTYPE = constants.HTYPE_INSTANCE
5818 def ExpandNames(self):
5819 self._ExpandAndLockInstance()
5821 def BuildHooksEnv(self):
5824 This runs on master, primary and secondary nodes of the instance.
5827 env = _BuildInstanceHookEnvByObject(self, self.instance)
5828 env["TIMEOUT"] = self.op.timeout
5831 def BuildHooksNodes(self):
5832 """Build hooks nodes.
5835 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5838 def CheckPrereq(self):
5839 """Check prerequisites.
5841 This checks that the instance is in the cluster.
5844 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5845 assert self.instance is not None, \
5846 "Cannot retrieve locked instance %s" % self.op.instance_name
5848 self.primary_offline = \
5849 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5851 if self.primary_offline and self.op.ignore_offline_nodes:
5852 self.proc.LogWarning("Ignoring offline primary node")
5854 _CheckNodeOnline(self, self.instance.primary_node)
5856 def Exec(self, feedback_fn):
5857 """Shutdown the instance.
5860 instance = self.instance
5861 node_current = instance.primary_node
5862 timeout = self.op.timeout
5864 if not self.op.no_remember:
5865 self.cfg.MarkInstanceDown(instance.name)
5867 if self.primary_offline:
5868 assert self.op.ignore_offline_nodes
5869 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5871 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5872 msg = result.fail_msg
5874 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5876 _ShutdownInstanceDisks(self, instance)
5879 class LUInstanceReinstall(LogicalUnit):
5880 """Reinstall an instance.
5883 HPATH = "instance-reinstall"
5884 HTYPE = constants.HTYPE_INSTANCE
5887 def ExpandNames(self):
5888 self._ExpandAndLockInstance()
5890 def BuildHooksEnv(self):
5893 This runs on master, primary and secondary nodes of the instance.
5896 return _BuildInstanceHookEnvByObject(self, self.instance)
5898 def BuildHooksNodes(self):
5899 """Build hooks nodes.
5902 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5905 def CheckPrereq(self):
5906 """Check prerequisites.
5908 This checks that the instance is in the cluster and is not running.
5911 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5912 assert instance is not None, \
5913 "Cannot retrieve locked instance %s" % self.op.instance_name
5914 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5915 " offline, cannot reinstall")
5916 for node in instance.secondary_nodes:
5917 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5918 " cannot reinstall")
5920 if instance.disk_template == constants.DT_DISKLESS:
5921 raise errors.OpPrereqError("Instance '%s' has no disks" %
5922 self.op.instance_name,
5924 _CheckInstanceDown(self, instance, "cannot reinstall")
5926 if self.op.os_type is not None:
5928 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5929 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5930 instance_os = self.op.os_type
5932 instance_os = instance.os
5934 nodelist = list(instance.all_nodes)
5936 if self.op.osparams:
5937 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5938 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5939 self.os_inst = i_osdict # the new dict (without defaults)
5943 self.instance = instance
5945 def Exec(self, feedback_fn):
5946 """Reinstall the instance.
5949 inst = self.instance
5951 if self.op.os_type is not None:
5952 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5953 inst.os = self.op.os_type
5954 # Write to configuration
5955 self.cfg.Update(inst, feedback_fn)
5957 _StartInstanceDisks(self, inst, None)
5959 feedback_fn("Running the instance OS create scripts...")
5960 # FIXME: pass debug option from opcode to backend
5961 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5962 self.op.debug_level,
5963 osparams=self.os_inst)
5964 result.Raise("Could not install OS for instance %s on node %s" %
5965 (inst.name, inst.primary_node))
5967 _ShutdownInstanceDisks(self, inst)
5970 class LUInstanceRecreateDisks(LogicalUnit):
5971 """Recreate an instance's missing disks.
5974 HPATH = "instance-recreate-disks"
5975 HTYPE = constants.HTYPE_INSTANCE
5978 def CheckArguments(self):
5979 # normalise the disk list
5980 self.op.disks = sorted(frozenset(self.op.disks))
5982 def ExpandNames(self):
5983 self._ExpandAndLockInstance()
5984 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5986 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5987 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5989 self.needed_locks[locking.LEVEL_NODE] = []
5991 def DeclareLocks(self, level):
5992 if level == locking.LEVEL_NODE:
5993 # if we replace the nodes, we only need to lock the old primary,
5994 # otherwise we need to lock all nodes for disk re-creation
5995 primary_only = bool(self.op.nodes)
5996 self._LockInstancesNodes(primary_only=primary_only)
5998 def BuildHooksEnv(self):
6001 This runs on master, primary and secondary nodes of the instance.
6004 return _BuildInstanceHookEnvByObject(self, self.instance)
6006 def BuildHooksNodes(self):
6007 """Build hooks nodes.
6010 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6013 def CheckPrereq(self):
6014 """Check prerequisites.
6016 This checks that the instance is in the cluster and is not running.
6019 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6020 assert instance is not None, \
6021 "Cannot retrieve locked instance %s" % self.op.instance_name
6023 if len(self.op.nodes) != len(instance.all_nodes):
6024 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6025 " %d replacement nodes were specified" %
6026 (instance.name, len(instance.all_nodes),
6027 len(self.op.nodes)),
6029 assert instance.disk_template != constants.DT_DRBD8 or \
6030 len(self.op.nodes) == 2
6031 assert instance.disk_template != constants.DT_PLAIN or \
6032 len(self.op.nodes) == 1
6033 primary_node = self.op.nodes[0]
6035 primary_node = instance.primary_node
6036 _CheckNodeOnline(self, primary_node)
6038 if instance.disk_template == constants.DT_DISKLESS:
6039 raise errors.OpPrereqError("Instance '%s' has no disks" %
6040 self.op.instance_name, errors.ECODE_INVAL)
6041 # if we replace nodes *and* the old primary is offline, we don't
6043 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6044 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6045 if not (self.op.nodes and old_pnode.offline):
6046 _CheckInstanceDown(self, instance, "cannot recreate disks")
6048 if not self.op.disks:
6049 self.op.disks = range(len(instance.disks))
6051 for idx in self.op.disks:
6052 if idx >= len(instance.disks):
6053 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6055 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6056 raise errors.OpPrereqError("Can't recreate disks partially and"
6057 " change the nodes at the same time",
6059 self.instance = instance
6061 def Exec(self, feedback_fn):
6062 """Recreate the disks.
6065 instance = self.instance
6068 mods = [] # keeps track of needed logical_id changes
6070 for idx, disk in enumerate(instance.disks):
6071 if idx not in self.op.disks: # disk idx has not been passed in
6074 # update secondaries for disks, if needed
6076 if disk.dev_type == constants.LD_DRBD8:
6077 # need to update the nodes and minors
6078 assert len(self.op.nodes) == 2
6079 assert len(disk.logical_id) == 6 # otherwise disk internals
6081 (_, _, old_port, _, _, old_secret) = disk.logical_id
6082 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6083 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6084 new_minors[0], new_minors[1], old_secret)
6085 assert len(disk.logical_id) == len(new_id)
6086 mods.append((idx, new_id))
6088 # now that we have passed all asserts above, we can apply the mods
6089 # in a single run (to avoid partial changes)
6090 for idx, new_id in mods:
6091 instance.disks[idx].logical_id = new_id
6093 # change primary node, if needed
6095 instance.primary_node = self.op.nodes[0]
6096 self.LogWarning("Changing the instance's nodes, you will have to"
6097 " remove any disks left on the older nodes manually")
6100 self.cfg.Update(instance, feedback_fn)
6102 _CreateDisks(self, instance, to_skip=to_skip)
6105 class LUInstanceRename(LogicalUnit):
6106 """Rename an instance.
6109 HPATH = "instance-rename"
6110 HTYPE = constants.HTYPE_INSTANCE
6112 def CheckArguments(self):
6116 if self.op.ip_check and not self.op.name_check:
6117 # TODO: make the ip check more flexible and not depend on the name check
6118 raise errors.OpPrereqError("IP address check requires a name check",
6121 def BuildHooksEnv(self):
6124 This runs on master, primary and secondary nodes of the instance.
6127 env = _BuildInstanceHookEnvByObject(self, self.instance)
6128 env["INSTANCE_NEW_NAME"] = self.op.new_name
6131 def BuildHooksNodes(self):
6132 """Build hooks nodes.
6135 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6138 def CheckPrereq(self):
6139 """Check prerequisites.
6141 This checks that the instance is in the cluster and is not running.
6144 self.op.instance_name = _ExpandInstanceName(self.cfg,
6145 self.op.instance_name)
6146 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6147 assert instance is not None
6148 _CheckNodeOnline(self, instance.primary_node)
6149 _CheckInstanceDown(self, instance, "cannot rename")
6150 self.instance = instance
6152 new_name = self.op.new_name
6153 if self.op.name_check:
6154 hostname = netutils.GetHostname(name=new_name)
6155 if hostname != new_name:
6156 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6158 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6159 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6160 " same as given hostname '%s'") %
6161 (hostname.name, self.op.new_name),
6163 new_name = self.op.new_name = hostname.name
6164 if (self.op.ip_check and
6165 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6166 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6167 (hostname.ip, new_name),
6168 errors.ECODE_NOTUNIQUE)
6170 instance_list = self.cfg.GetInstanceList()
6171 if new_name in instance_list and new_name != instance.name:
6172 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6173 new_name, errors.ECODE_EXISTS)
6175 def Exec(self, feedback_fn):
6176 """Rename the instance.
6179 inst = self.instance
6180 old_name = inst.name
6182 rename_file_storage = False
6183 if (inst.disk_template in constants.DTS_FILEBASED and
6184 self.op.new_name != inst.name):
6185 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6186 rename_file_storage = True
6188 self.cfg.RenameInstance(inst.name, self.op.new_name)
6189 # Change the instance lock. This is definitely safe while we hold the BGL.
6190 # Otherwise the new lock would have to be added in acquired mode.
6192 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6193 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6195 # re-read the instance from the configuration after rename
6196 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6198 if rename_file_storage:
6199 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6200 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6201 old_file_storage_dir,
6202 new_file_storage_dir)
6203 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6204 " (but the instance has been renamed in Ganeti)" %
6205 (inst.primary_node, old_file_storage_dir,
6206 new_file_storage_dir))
6208 _StartInstanceDisks(self, inst, None)
6210 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6211 old_name, self.op.debug_level)
6212 msg = result.fail_msg
6214 msg = ("Could not run OS rename script for instance %s on node %s"
6215 " (but the instance has been renamed in Ganeti): %s" %
6216 (inst.name, inst.primary_node, msg))
6217 self.proc.LogWarning(msg)
6219 _ShutdownInstanceDisks(self, inst)
6224 class LUInstanceRemove(LogicalUnit):
6225 """Remove an instance.
6228 HPATH = "instance-remove"
6229 HTYPE = constants.HTYPE_INSTANCE
6232 def ExpandNames(self):
6233 self._ExpandAndLockInstance()
6234 self.needed_locks[locking.LEVEL_NODE] = []
6235 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6237 def DeclareLocks(self, level):
6238 if level == locking.LEVEL_NODE:
6239 self._LockInstancesNodes()
6241 def BuildHooksEnv(self):
6244 This runs on master, primary and secondary nodes of the instance.
6247 env = _BuildInstanceHookEnvByObject(self, self.instance)
6248 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6251 def BuildHooksNodes(self):
6252 """Build hooks nodes.
6255 nl = [self.cfg.GetMasterNode()]
6256 nl_post = list(self.instance.all_nodes) + nl
6257 return (nl, nl_post)
6259 def CheckPrereq(self):
6260 """Check prerequisites.
6262 This checks that the instance is in the cluster.
6265 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6266 assert self.instance is not None, \
6267 "Cannot retrieve locked instance %s" % self.op.instance_name
6269 def Exec(self, feedback_fn):
6270 """Remove the instance.
6273 instance = self.instance
6274 logging.info("Shutting down instance %s on node %s",
6275 instance.name, instance.primary_node)
6277 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6278 self.op.shutdown_timeout)
6279 msg = result.fail_msg
6281 if self.op.ignore_failures:
6282 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6284 raise errors.OpExecError("Could not shutdown instance %s on"
6286 (instance.name, instance.primary_node, msg))
6288 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6291 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6292 """Utility function to remove an instance.
6295 logging.info("Removing block devices for instance %s", instance.name)
6297 if not _RemoveDisks(lu, instance):
6298 if not ignore_failures:
6299 raise errors.OpExecError("Can't remove instance's disks")
6300 feedback_fn("Warning: can't remove instance's disks")
6302 logging.info("Removing instance %s out of cluster config", instance.name)
6304 lu.cfg.RemoveInstance(instance.name)
6306 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6307 "Instance lock removal conflict"
6309 # Remove lock for the instance
6310 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6313 class LUInstanceQuery(NoHooksLU):
6314 """Logical unit for querying instances.
6317 # pylint: disable-msg=W0142
6320 def CheckArguments(self):
6321 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6322 self.op.output_fields, self.op.use_locking)
6324 def ExpandNames(self):
6325 self.iq.ExpandNames(self)
6327 def DeclareLocks(self, level):
6328 self.iq.DeclareLocks(self, level)
6330 def Exec(self, feedback_fn):
6331 return self.iq.OldStyleQuery(self)
6334 class LUInstanceFailover(LogicalUnit):
6335 """Failover an instance.
6338 HPATH = "instance-failover"
6339 HTYPE = constants.HTYPE_INSTANCE
6342 def CheckArguments(self):
6343 """Check the arguments.
6346 self.iallocator = getattr(self.op, "iallocator", None)
6347 self.target_node = getattr(self.op, "target_node", None)
6349 def ExpandNames(self):
6350 self._ExpandAndLockInstance()
6352 if self.op.target_node is not None:
6353 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6355 self.needed_locks[locking.LEVEL_NODE] = []
6356 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6358 ignore_consistency = self.op.ignore_consistency
6359 shutdown_timeout = self.op.shutdown_timeout
6360 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6363 ignore_consistency=ignore_consistency,
6364 shutdown_timeout=shutdown_timeout)
6365 self.tasklets = [self._migrater]
6367 def DeclareLocks(self, level):
6368 if level == locking.LEVEL_NODE:
6369 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6370 if instance.disk_template in constants.DTS_EXT_MIRROR:
6371 if self.op.target_node is None:
6372 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6374 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6375 self.op.target_node]
6376 del self.recalculate_locks[locking.LEVEL_NODE]
6378 self._LockInstancesNodes()
6380 def BuildHooksEnv(self):
6383 This runs on master, primary and secondary nodes of the instance.
6386 instance = self._migrater.instance
6387 source_node = instance.primary_node
6388 target_node = self.op.target_node
6390 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6391 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6392 "OLD_PRIMARY": source_node,
6393 "NEW_PRIMARY": target_node,
6396 if instance.disk_template in constants.DTS_INT_MIRROR:
6397 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6398 env["NEW_SECONDARY"] = source_node
6400 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6402 env.update(_BuildInstanceHookEnvByObject(self, instance))
6406 def BuildHooksNodes(self):
6407 """Build hooks nodes.
6410 instance = self._migrater.instance
6411 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6412 return (nl, nl + [instance.primary_node])
6415 class LUInstanceMigrate(LogicalUnit):
6416 """Migrate an instance.
6418 This is migration without shutting down, compared to the failover,
6419 which is done with shutdown.
6422 HPATH = "instance-migrate"
6423 HTYPE = constants.HTYPE_INSTANCE
6426 def ExpandNames(self):
6427 self._ExpandAndLockInstance()
6429 if self.op.target_node is not None:
6430 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6432 self.needed_locks[locking.LEVEL_NODE] = []
6433 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6435 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6436 cleanup=self.op.cleanup,
6438 fallback=self.op.allow_failover)
6439 self.tasklets = [self._migrater]
6441 def DeclareLocks(self, level):
6442 if level == locking.LEVEL_NODE:
6443 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6444 if instance.disk_template in constants.DTS_EXT_MIRROR:
6445 if self.op.target_node is None:
6446 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6448 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6449 self.op.target_node]
6450 del self.recalculate_locks[locking.LEVEL_NODE]
6452 self._LockInstancesNodes()
6454 def BuildHooksEnv(self):
6457 This runs on master, primary and secondary nodes of the instance.
6460 instance = self._migrater.instance
6461 source_node = instance.primary_node
6462 target_node = self.op.target_node
6463 env = _BuildInstanceHookEnvByObject(self, instance)
6465 "MIGRATE_LIVE": self._migrater.live,
6466 "MIGRATE_CLEANUP": self.op.cleanup,
6467 "OLD_PRIMARY": source_node,
6468 "NEW_PRIMARY": target_node,
6471 if instance.disk_template in constants.DTS_INT_MIRROR:
6472 env["OLD_SECONDARY"] = target_node
6473 env["NEW_SECONDARY"] = source_node
6475 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6479 def BuildHooksNodes(self):
6480 """Build hooks nodes.
6483 instance = self._migrater.instance
6484 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6485 return (nl, nl + [instance.primary_node])
6488 class LUInstanceMove(LogicalUnit):
6489 """Move an instance by data-copying.
6492 HPATH = "instance-move"
6493 HTYPE = constants.HTYPE_INSTANCE
6496 def ExpandNames(self):
6497 self._ExpandAndLockInstance()
6498 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6499 self.op.target_node = target_node
6500 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6501 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6503 def DeclareLocks(self, level):
6504 if level == locking.LEVEL_NODE:
6505 self._LockInstancesNodes(primary_only=True)
6507 def BuildHooksEnv(self):
6510 This runs on master, primary and secondary nodes of the instance.
6514 "TARGET_NODE": self.op.target_node,
6515 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6517 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6520 def BuildHooksNodes(self):
6521 """Build hooks nodes.
6525 self.cfg.GetMasterNode(),
6526 self.instance.primary_node,
6527 self.op.target_node,
6531 def CheckPrereq(self):
6532 """Check prerequisites.
6534 This checks that the instance is in the cluster.
6537 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6538 assert self.instance is not None, \
6539 "Cannot retrieve locked instance %s" % self.op.instance_name
6541 node = self.cfg.GetNodeInfo(self.op.target_node)
6542 assert node is not None, \
6543 "Cannot retrieve locked node %s" % self.op.target_node
6545 self.target_node = target_node = node.name
6547 if target_node == instance.primary_node:
6548 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6549 (instance.name, target_node),
6552 bep = self.cfg.GetClusterInfo().FillBE(instance)
6554 for idx, dsk in enumerate(instance.disks):
6555 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6556 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6557 " cannot copy" % idx, errors.ECODE_STATE)
6559 _CheckNodeOnline(self, target_node)
6560 _CheckNodeNotDrained(self, target_node)
6561 _CheckNodeVmCapable(self, target_node)
6563 if instance.admin_up:
6564 # check memory requirements on the secondary node
6565 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6566 instance.name, bep[constants.BE_MEMORY],
6567 instance.hypervisor)
6569 self.LogInfo("Not checking memory on the secondary node as"
6570 " instance will not be started")
6572 # check bridge existance
6573 _CheckInstanceBridgesExist(self, instance, node=target_node)
6575 def Exec(self, feedback_fn):
6576 """Move an instance.
6578 The move is done by shutting it down on its present node, copying
6579 the data over (slow) and starting it on the new node.
6582 instance = self.instance
6584 source_node = instance.primary_node
6585 target_node = self.target_node
6587 self.LogInfo("Shutting down instance %s on source node %s",
6588 instance.name, source_node)
6590 result = self.rpc.call_instance_shutdown(source_node, instance,
6591 self.op.shutdown_timeout)
6592 msg = result.fail_msg
6594 if self.op.ignore_consistency:
6595 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6596 " Proceeding anyway. Please make sure node"
6597 " %s is down. Error details: %s",
6598 instance.name, source_node, source_node, msg)
6600 raise errors.OpExecError("Could not shutdown instance %s on"
6602 (instance.name, source_node, msg))
6604 # create the target disks
6606 _CreateDisks(self, instance, target_node=target_node)
6607 except errors.OpExecError:
6608 self.LogWarning("Device creation failed, reverting...")
6610 _RemoveDisks(self, instance, target_node=target_node)
6612 self.cfg.ReleaseDRBDMinors(instance.name)
6615 cluster_name = self.cfg.GetClusterInfo().cluster_name
6618 # activate, get path, copy the data over
6619 for idx, disk in enumerate(instance.disks):
6620 self.LogInfo("Copying data for disk %d", idx)
6621 result = self.rpc.call_blockdev_assemble(target_node, disk,
6622 instance.name, True, idx)
6624 self.LogWarning("Can't assemble newly created disk %d: %s",
6625 idx, result.fail_msg)
6626 errs.append(result.fail_msg)
6628 dev_path = result.payload
6629 result = self.rpc.call_blockdev_export(source_node, disk,
6630 target_node, dev_path,
6633 self.LogWarning("Can't copy data over for disk %d: %s",
6634 idx, result.fail_msg)
6635 errs.append(result.fail_msg)
6639 self.LogWarning("Some disks failed to copy, aborting")
6641 _RemoveDisks(self, instance, target_node=target_node)
6643 self.cfg.ReleaseDRBDMinors(instance.name)
6644 raise errors.OpExecError("Errors during disk copy: %s" %
6647 instance.primary_node = target_node
6648 self.cfg.Update(instance, feedback_fn)
6650 self.LogInfo("Removing the disks on the original node")
6651 _RemoveDisks(self, instance, target_node=source_node)
6653 # Only start the instance if it's marked as up
6654 if instance.admin_up:
6655 self.LogInfo("Starting instance %s on node %s",
6656 instance.name, target_node)
6658 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6659 ignore_secondaries=True)
6661 _ShutdownInstanceDisks(self, instance)
6662 raise errors.OpExecError("Can't activate the instance's disks")
6664 result = self.rpc.call_instance_start(target_node, instance,
6666 msg = result.fail_msg
6668 _ShutdownInstanceDisks(self, instance)
6669 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6670 (instance.name, target_node, msg))
6673 class LUNodeMigrate(LogicalUnit):
6674 """Migrate all instances from a node.
6677 HPATH = "node-migrate"
6678 HTYPE = constants.HTYPE_NODE
6681 def CheckArguments(self):
6684 def ExpandNames(self):
6685 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6687 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6688 self.needed_locks = {
6689 locking.LEVEL_NODE: [self.op.node_name],
6692 def BuildHooksEnv(self):
6695 This runs on the master, the primary and all the secondaries.
6699 "NODE_NAME": self.op.node_name,
6702 def BuildHooksNodes(self):
6703 """Build hooks nodes.
6706 nl = [self.cfg.GetMasterNode()]
6709 def CheckPrereq(self):
6712 def Exec(self, feedback_fn):
6713 # Prepare jobs for migration instances
6715 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6718 iallocator=self.op.iallocator,
6719 target_node=self.op.target_node)]
6720 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6723 # TODO: Run iallocator in this opcode and pass correct placement options to
6724 # OpInstanceMigrate. Since other jobs can modify the cluster between
6725 # running the iallocator and the actual migration, a good consistency model
6726 # will have to be found.
6728 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6729 frozenset([self.op.node_name]))
6731 return ResultWithJobs(jobs)
6734 class TLMigrateInstance(Tasklet):
6735 """Tasklet class for instance migration.
6738 @ivar live: whether the migration will be done live or non-live;
6739 this variable is initalized only after CheckPrereq has run
6740 @type cleanup: boolean
6741 @ivar cleanup: Wheater we cleanup from a failed migration
6742 @type iallocator: string
6743 @ivar iallocator: The iallocator used to determine target_node
6744 @type target_node: string
6745 @ivar target_node: If given, the target_node to reallocate the instance to
6746 @type failover: boolean
6747 @ivar failover: Whether operation results in failover or migration
6748 @type fallback: boolean
6749 @ivar fallback: Whether fallback to failover is allowed if migration not
6751 @type ignore_consistency: boolean
6752 @ivar ignore_consistency: Wheter we should ignore consistency between source
6754 @type shutdown_timeout: int
6755 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6758 def __init__(self, lu, instance_name, cleanup=False,
6759 failover=False, fallback=False,
6760 ignore_consistency=False,
6761 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6762 """Initializes this class.
6765 Tasklet.__init__(self, lu)
6768 self.instance_name = instance_name
6769 self.cleanup = cleanup
6770 self.live = False # will be overridden later
6771 self.failover = failover
6772 self.fallback = fallback
6773 self.ignore_consistency = ignore_consistency
6774 self.shutdown_timeout = shutdown_timeout
6776 def CheckPrereq(self):
6777 """Check prerequisites.
6779 This checks that the instance is in the cluster.
6782 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6783 instance = self.cfg.GetInstanceInfo(instance_name)
6784 assert instance is not None
6785 self.instance = instance
6787 if (not self.cleanup and not instance.admin_up and not self.failover and
6789 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6791 self.failover = True
6793 if instance.disk_template not in constants.DTS_MIRRORED:
6798 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6799 " %s" % (instance.disk_template, text),
6802 if instance.disk_template in constants.DTS_EXT_MIRROR:
6803 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6805 if self.lu.op.iallocator:
6806 self._RunAllocator()
6808 # We set set self.target_node as it is required by
6810 self.target_node = self.lu.op.target_node
6812 # self.target_node is already populated, either directly or by the
6814 target_node = self.target_node
6815 if self.target_node == instance.primary_node:
6816 raise errors.OpPrereqError("Cannot migrate instance %s"
6817 " to its primary (%s)" %
6818 (instance.name, instance.primary_node))
6820 if len(self.lu.tasklets) == 1:
6821 # It is safe to release locks only when we're the only tasklet
6823 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6824 keep=[instance.primary_node, self.target_node])
6827 secondary_nodes = instance.secondary_nodes
6828 if not secondary_nodes:
6829 raise errors.ConfigurationError("No secondary node but using"
6830 " %s disk template" %
6831 instance.disk_template)
6832 target_node = secondary_nodes[0]
6833 if self.lu.op.iallocator or (self.lu.op.target_node and
6834 self.lu.op.target_node != target_node):
6836 text = "failed over"
6839 raise errors.OpPrereqError("Instances with disk template %s cannot"
6840 " be %s to arbitrary nodes"
6841 " (neither an iallocator nor a target"
6842 " node can be passed)" %
6843 (instance.disk_template, text),
6846 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6848 # check memory requirements on the secondary node
6849 if not self.failover or instance.admin_up:
6850 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6851 instance.name, i_be[constants.BE_MEMORY],
6852 instance.hypervisor)
6854 self.lu.LogInfo("Not checking memory on the secondary node as"
6855 " instance will not be started")
6857 # check bridge existance
6858 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6860 if not self.cleanup:
6861 _CheckNodeNotDrained(self.lu, target_node)
6862 if not self.failover:
6863 result = self.rpc.call_instance_migratable(instance.primary_node,
6865 if result.fail_msg and self.fallback:
6866 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6868 self.failover = True
6870 result.Raise("Can't migrate, please use failover",
6871 prereq=True, ecode=errors.ECODE_STATE)
6873 assert not (self.failover and self.cleanup)
6875 if not self.failover:
6876 if self.lu.op.live is not None and self.lu.op.mode is not None:
6877 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6878 " parameters are accepted",
6880 if self.lu.op.live is not None:
6882 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6884 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6885 # reset the 'live' parameter to None so that repeated
6886 # invocations of CheckPrereq do not raise an exception
6887 self.lu.op.live = None
6888 elif self.lu.op.mode is None:
6889 # read the default value from the hypervisor
6890 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6892 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6894 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6896 # Failover is never live
6899 def _RunAllocator(self):
6900 """Run the allocator based on input opcode.
6903 ial = IAllocator(self.cfg, self.rpc,
6904 mode=constants.IALLOCATOR_MODE_RELOC,
6905 name=self.instance_name,
6906 # TODO See why hail breaks with a single node below
6907 relocate_from=[self.instance.primary_node,
6908 self.instance.primary_node],
6911 ial.Run(self.lu.op.iallocator)
6914 raise errors.OpPrereqError("Can't compute nodes using"
6915 " iallocator '%s': %s" %
6916 (self.lu.op.iallocator, ial.info),
6918 if len(ial.result) != ial.required_nodes:
6919 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6920 " of nodes (%s), required %s" %
6921 (self.lu.op.iallocator, len(ial.result),
6922 ial.required_nodes), errors.ECODE_FAULT)
6923 self.target_node = ial.result[0]
6924 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6925 self.instance_name, self.lu.op.iallocator,
6926 utils.CommaJoin(ial.result))
6928 def _WaitUntilSync(self):
6929 """Poll with custom rpc for disk sync.
6931 This uses our own step-based rpc call.
6934 self.feedback_fn("* wait until resync is done")
6938 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6940 self.instance.disks)
6942 for node, nres in result.items():
6943 nres.Raise("Cannot resync disks on node %s" % node)
6944 node_done, node_percent = nres.payload
6945 all_done = all_done and node_done
6946 if node_percent is not None:
6947 min_percent = min(min_percent, node_percent)
6949 if min_percent < 100:
6950 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6953 def _EnsureSecondary(self, node):
6954 """Demote a node to secondary.
6957 self.feedback_fn("* switching node %s to secondary mode" % node)
6959 for dev in self.instance.disks:
6960 self.cfg.SetDiskID(dev, node)
6962 result = self.rpc.call_blockdev_close(node, self.instance.name,
6963 self.instance.disks)
6964 result.Raise("Cannot change disk to secondary on node %s" % node)
6966 def _GoStandalone(self):
6967 """Disconnect from the network.
6970 self.feedback_fn("* changing into standalone mode")
6971 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6972 self.instance.disks)
6973 for node, nres in result.items():
6974 nres.Raise("Cannot disconnect disks node %s" % node)
6976 def _GoReconnect(self, multimaster):
6977 """Reconnect to the network.
6983 msg = "single-master"
6984 self.feedback_fn("* changing disks into %s mode" % msg)
6985 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6986 self.instance.disks,
6987 self.instance.name, multimaster)
6988 for node, nres in result.items():
6989 nres.Raise("Cannot change disks config on node %s" % node)
6991 def _ExecCleanup(self):
6992 """Try to cleanup after a failed migration.
6994 The cleanup is done by:
6995 - check that the instance is running only on one node
6996 (and update the config if needed)
6997 - change disks on its secondary node to secondary
6998 - wait until disks are fully synchronized
6999 - disconnect from the network
7000 - change disks into single-master mode
7001 - wait again until disks are fully synchronized
7004 instance = self.instance
7005 target_node = self.target_node
7006 source_node = self.source_node
7008 # check running on only one node
7009 self.feedback_fn("* checking where the instance actually runs"
7010 " (if this hangs, the hypervisor might be in"
7012 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7013 for node, result in ins_l.items():
7014 result.Raise("Can't contact node %s" % node)
7016 runningon_source = instance.name in ins_l[source_node].payload
7017 runningon_target = instance.name in ins_l[target_node].payload
7019 if runningon_source and runningon_target:
7020 raise errors.OpExecError("Instance seems to be running on two nodes,"
7021 " or the hypervisor is confused; you will have"
7022 " to ensure manually that it runs only on one"
7023 " and restart this operation")
7025 if not (runningon_source or runningon_target):
7026 raise errors.OpExecError("Instance does not seem to be running at all;"
7027 " in this case it's safer to repair by"
7028 " running 'gnt-instance stop' to ensure disk"
7029 " shutdown, and then restarting it")
7031 if runningon_target:
7032 # the migration has actually succeeded, we need to update the config
7033 self.feedback_fn("* instance running on secondary node (%s),"
7034 " updating config" % target_node)
7035 instance.primary_node = target_node
7036 self.cfg.Update(instance, self.feedback_fn)
7037 demoted_node = source_node
7039 self.feedback_fn("* instance confirmed to be running on its"
7040 " primary node (%s)" % source_node)
7041 demoted_node = target_node
7043 if instance.disk_template in constants.DTS_INT_MIRROR:
7044 self._EnsureSecondary(demoted_node)
7046 self._WaitUntilSync()
7047 except errors.OpExecError:
7048 # we ignore here errors, since if the device is standalone, it
7049 # won't be able to sync
7051 self._GoStandalone()
7052 self._GoReconnect(False)
7053 self._WaitUntilSync()
7055 self.feedback_fn("* done")
7057 def _RevertDiskStatus(self):
7058 """Try to revert the disk status after a failed migration.
7061 target_node = self.target_node
7062 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7066 self._EnsureSecondary(target_node)
7067 self._GoStandalone()
7068 self._GoReconnect(False)
7069 self._WaitUntilSync()
7070 except errors.OpExecError, err:
7071 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7072 " please try to recover the instance manually;"
7073 " error '%s'" % str(err))
7075 def _AbortMigration(self):
7076 """Call the hypervisor code to abort a started migration.
7079 instance = self.instance
7080 target_node = self.target_node
7081 migration_info = self.migration_info
7083 abort_result = self.rpc.call_finalize_migration(target_node,
7087 abort_msg = abort_result.fail_msg
7089 logging.error("Aborting migration failed on target node %s: %s",
7090 target_node, abort_msg)
7091 # Don't raise an exception here, as we stil have to try to revert the
7092 # disk status, even if this step failed.
7094 def _ExecMigration(self):
7095 """Migrate an instance.
7097 The migrate is done by:
7098 - change the disks into dual-master mode
7099 - wait until disks are fully synchronized again
7100 - migrate the instance
7101 - change disks on the new secondary node (the old primary) to secondary
7102 - wait until disks are fully synchronized
7103 - change disks into single-master mode
7106 instance = self.instance
7107 target_node = self.target_node
7108 source_node = self.source_node
7110 self.feedback_fn("* checking disk consistency between source and target")
7111 for dev in instance.disks:
7112 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7113 raise errors.OpExecError("Disk %s is degraded or not fully"
7114 " synchronized on target node,"
7115 " aborting migration" % dev.iv_name)
7117 # First get the migration information from the remote node
7118 result = self.rpc.call_migration_info(source_node, instance)
7119 msg = result.fail_msg
7121 log_err = ("Failed fetching source migration information from %s: %s" %
7123 logging.error(log_err)
7124 raise errors.OpExecError(log_err)
7126 self.migration_info = migration_info = result.payload
7128 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7129 # Then switch the disks to master/master mode
7130 self._EnsureSecondary(target_node)
7131 self._GoStandalone()
7132 self._GoReconnect(True)
7133 self._WaitUntilSync()
7135 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7136 result = self.rpc.call_accept_instance(target_node,
7139 self.nodes_ip[target_node])
7141 msg = result.fail_msg
7143 logging.error("Instance pre-migration failed, trying to revert"
7144 " disk status: %s", msg)
7145 self.feedback_fn("Pre-migration failed, aborting")
7146 self._AbortMigration()
7147 self._RevertDiskStatus()
7148 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7149 (instance.name, msg))
7151 self.feedback_fn("* migrating instance to %s" % target_node)
7152 result = self.rpc.call_instance_migrate(source_node, instance,
7153 self.nodes_ip[target_node],
7155 msg = result.fail_msg
7157 logging.error("Instance migration failed, trying to revert"
7158 " disk status: %s", msg)
7159 self.feedback_fn("Migration failed, aborting")
7160 self._AbortMigration()
7161 self._RevertDiskStatus()
7162 raise errors.OpExecError("Could not migrate instance %s: %s" %
7163 (instance.name, msg))
7165 instance.primary_node = target_node
7166 # distribute new instance config to the other nodes
7167 self.cfg.Update(instance, self.feedback_fn)
7169 result = self.rpc.call_finalize_migration(target_node,
7173 msg = result.fail_msg
7175 logging.error("Instance migration succeeded, but finalization failed:"
7177 raise errors.OpExecError("Could not finalize instance migration: %s" %
7180 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7181 self._EnsureSecondary(source_node)
7182 self._WaitUntilSync()
7183 self._GoStandalone()
7184 self._GoReconnect(False)
7185 self._WaitUntilSync()
7187 self.feedback_fn("* done")
7189 def _ExecFailover(self):
7190 """Failover an instance.
7192 The failover is done by shutting it down on its present node and
7193 starting it on the secondary.
7196 instance = self.instance
7197 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7199 source_node = instance.primary_node
7200 target_node = self.target_node
7202 if instance.admin_up:
7203 self.feedback_fn("* checking disk consistency between source and target")
7204 for dev in instance.disks:
7205 # for drbd, these are drbd over lvm
7206 if not _CheckDiskConsistency(self, dev, target_node, False):
7207 if not self.ignore_consistency:
7208 raise errors.OpExecError("Disk %s is degraded on target node,"
7209 " aborting failover" % dev.iv_name)
7211 self.feedback_fn("* not checking disk consistency as instance is not"
7214 self.feedback_fn("* shutting down instance on source node")
7215 logging.info("Shutting down instance %s on node %s",
7216 instance.name, source_node)
7218 result = self.rpc.call_instance_shutdown(source_node, instance,
7219 self.shutdown_timeout)
7220 msg = result.fail_msg
7222 if self.ignore_consistency or primary_node.offline:
7223 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7224 " proceeding anyway; please make sure node"
7225 " %s is down; error details: %s",
7226 instance.name, source_node, source_node, msg)
7228 raise errors.OpExecError("Could not shutdown instance %s on"
7230 (instance.name, source_node, msg))
7232 self.feedback_fn("* deactivating the instance's disks on source node")
7233 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7234 raise errors.OpExecError("Can't shut down the instance's disks.")
7236 instance.primary_node = target_node
7237 # distribute new instance config to the other nodes
7238 self.cfg.Update(instance, self.feedback_fn)
7240 # Only start the instance if it's marked as up
7241 if instance.admin_up:
7242 self.feedback_fn("* activating the instance's disks on target node")
7243 logging.info("Starting instance %s on node %s",
7244 instance.name, target_node)
7246 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7247 ignore_secondaries=True)
7249 _ShutdownInstanceDisks(self, instance)
7250 raise errors.OpExecError("Can't activate the instance's disks")
7252 self.feedback_fn("* starting the instance on the target node")
7253 result = self.rpc.call_instance_start(target_node, instance, None, None)
7254 msg = result.fail_msg
7256 _ShutdownInstanceDisks(self, instance)
7257 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7258 (instance.name, target_node, msg))
7260 def Exec(self, feedback_fn):
7261 """Perform the migration.
7264 self.feedback_fn = feedback_fn
7265 self.source_node = self.instance.primary_node
7267 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7268 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7269 self.target_node = self.instance.secondary_nodes[0]
7270 # Otherwise self.target_node has been populated either
7271 # directly, or through an iallocator.
7273 self.all_nodes = [self.source_node, self.target_node]
7275 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7276 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7280 feedback_fn("Failover instance %s" % self.instance.name)
7281 self._ExecFailover()
7283 feedback_fn("Migrating instance %s" % self.instance.name)
7286 return self._ExecCleanup()
7288 return self._ExecMigration()
7291 def _CreateBlockDev(lu, node, instance, device, force_create,
7293 """Create a tree of block devices on a given node.
7295 If this device type has to be created on secondaries, create it and
7298 If not, just recurse to children keeping the same 'force' value.
7300 @param lu: the lu on whose behalf we execute
7301 @param node: the node on which to create the device
7302 @type instance: L{objects.Instance}
7303 @param instance: the instance which owns the device
7304 @type device: L{objects.Disk}
7305 @param device: the device to create
7306 @type force_create: boolean
7307 @param force_create: whether to force creation of this device; this
7308 will be change to True whenever we find a device which has
7309 CreateOnSecondary() attribute
7310 @param info: the extra 'metadata' we should attach to the device
7311 (this will be represented as a LVM tag)
7312 @type force_open: boolean
7313 @param force_open: this parameter will be passes to the
7314 L{backend.BlockdevCreate} function where it specifies
7315 whether we run on primary or not, and it affects both
7316 the child assembly and the device own Open() execution
7319 if device.CreateOnSecondary():
7323 for child in device.children:
7324 _CreateBlockDev(lu, node, instance, child, force_create,
7327 if not force_create:
7330 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7333 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7334 """Create a single block device on a given node.
7336 This will not recurse over children of the device, so they must be
7339 @param lu: the lu on whose behalf we execute
7340 @param node: the node on which to create the device
7341 @type instance: L{objects.Instance}
7342 @param instance: the instance which owns the device
7343 @type device: L{objects.Disk}
7344 @param device: the device to create
7345 @param info: the extra 'metadata' we should attach to the device
7346 (this will be represented as a LVM tag)
7347 @type force_open: boolean
7348 @param force_open: this parameter will be passes to the
7349 L{backend.BlockdevCreate} function where it specifies
7350 whether we run on primary or not, and it affects both
7351 the child assembly and the device own Open() execution
7354 lu.cfg.SetDiskID(device, node)
7355 result = lu.rpc.call_blockdev_create(node, device, device.size,
7356 instance.name, force_open, info)
7357 result.Raise("Can't create block device %s on"
7358 " node %s for instance %s" % (device, node, instance.name))
7359 if device.physical_id is None:
7360 device.physical_id = result.payload
7363 def _GenerateUniqueNames(lu, exts):
7364 """Generate a suitable LV name.
7366 This will generate a logical volume name for the given instance.
7371 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7372 results.append("%s%s" % (new_id, val))
7376 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7377 iv_name, p_minor, s_minor):
7378 """Generate a drbd8 device complete with its children.
7381 assert len(vgnames) == len(names) == 2
7382 port = lu.cfg.AllocatePort()
7383 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7384 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7385 logical_id=(vgnames[0], names[0]))
7386 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7387 logical_id=(vgnames[1], names[1]))
7388 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7389 logical_id=(primary, secondary, port,
7392 children=[dev_data, dev_meta],
7397 def _GenerateDiskTemplate(lu, template_name,
7398 instance_name, primary_node,
7399 secondary_nodes, disk_info,
7400 file_storage_dir, file_driver,
7401 base_index, feedback_fn):
7402 """Generate the entire disk layout for a given template type.
7405 #TODO: compute space requirements
7407 vgname = lu.cfg.GetVGName()
7408 disk_count = len(disk_info)
7410 if template_name == constants.DT_DISKLESS:
7412 elif template_name == constants.DT_PLAIN:
7413 if len(secondary_nodes) != 0:
7414 raise errors.ProgrammerError("Wrong template configuration")
7416 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7417 for i in range(disk_count)])
7418 for idx, disk in enumerate(disk_info):
7419 disk_index = idx + base_index
7420 vg = disk.get(constants.IDISK_VG, vgname)
7421 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7422 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7423 size=disk[constants.IDISK_SIZE],
7424 logical_id=(vg, names[idx]),
7425 iv_name="disk/%d" % disk_index,
7426 mode=disk[constants.IDISK_MODE])
7427 disks.append(disk_dev)
7428 elif template_name == constants.DT_DRBD8:
7429 if len(secondary_nodes) != 1:
7430 raise errors.ProgrammerError("Wrong template configuration")
7431 remote_node = secondary_nodes[0]
7432 minors = lu.cfg.AllocateDRBDMinor(
7433 [primary_node, remote_node] * len(disk_info), instance_name)
7436 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7437 for i in range(disk_count)]):
7438 names.append(lv_prefix + "_data")
7439 names.append(lv_prefix + "_meta")
7440 for idx, disk in enumerate(disk_info):
7441 disk_index = idx + base_index
7442 data_vg = disk.get(constants.IDISK_VG, vgname)
7443 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7444 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7445 disk[constants.IDISK_SIZE],
7447 names[idx * 2:idx * 2 + 2],
7448 "disk/%d" % disk_index,
7449 minors[idx * 2], minors[idx * 2 + 1])
7450 disk_dev.mode = disk[constants.IDISK_MODE]
7451 disks.append(disk_dev)
7452 elif template_name == constants.DT_FILE:
7453 if len(secondary_nodes) != 0:
7454 raise errors.ProgrammerError("Wrong template configuration")
7456 opcodes.RequireFileStorage()
7458 for idx, disk in enumerate(disk_info):
7459 disk_index = idx + base_index
7460 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7461 size=disk[constants.IDISK_SIZE],
7462 iv_name="disk/%d" % disk_index,
7463 logical_id=(file_driver,
7464 "%s/disk%d" % (file_storage_dir,
7466 mode=disk[constants.IDISK_MODE])
7467 disks.append(disk_dev)
7468 elif template_name == constants.DT_SHARED_FILE:
7469 if len(secondary_nodes) != 0:
7470 raise errors.ProgrammerError("Wrong template configuration")
7472 opcodes.RequireSharedFileStorage()
7474 for idx, disk in enumerate(disk_info):
7475 disk_index = idx + base_index
7476 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7477 size=disk[constants.IDISK_SIZE],
7478 iv_name="disk/%d" % disk_index,
7479 logical_id=(file_driver,
7480 "%s/disk%d" % (file_storage_dir,
7482 mode=disk[constants.IDISK_MODE])
7483 disks.append(disk_dev)
7484 elif template_name == constants.DT_BLOCK:
7485 if len(secondary_nodes) != 0:
7486 raise errors.ProgrammerError("Wrong template configuration")
7488 for idx, disk in enumerate(disk_info):
7489 disk_index = idx + base_index
7490 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7491 size=disk[constants.IDISK_SIZE],
7492 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7493 disk[constants.IDISK_ADOPT]),
7494 iv_name="disk/%d" % disk_index,
7495 mode=disk[constants.IDISK_MODE])
7496 disks.append(disk_dev)
7499 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7503 def _GetInstanceInfoText(instance):
7504 """Compute that text that should be added to the disk's metadata.
7507 return "originstname+%s" % instance.name
7510 def _CalcEta(time_taken, written, total_size):
7511 """Calculates the ETA based on size written and total size.
7513 @param time_taken: The time taken so far
7514 @param written: amount written so far
7515 @param total_size: The total size of data to be written
7516 @return: The remaining time in seconds
7519 avg_time = time_taken / float(written)
7520 return (total_size - written) * avg_time
7523 def _WipeDisks(lu, instance):
7524 """Wipes instance disks.
7526 @type lu: L{LogicalUnit}
7527 @param lu: the logical unit on whose behalf we execute
7528 @type instance: L{objects.Instance}
7529 @param instance: the instance whose disks we should create
7530 @return: the success of the wipe
7533 node = instance.primary_node
7535 for device in instance.disks:
7536 lu.cfg.SetDiskID(device, node)
7538 logging.info("Pause sync of instance %s disks", instance.name)
7539 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7541 for idx, success in enumerate(result.payload):
7543 logging.warn("pause-sync of instance %s for disks %d failed",
7547 for idx, device in enumerate(instance.disks):
7548 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7549 # MAX_WIPE_CHUNK at max
7550 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7551 constants.MIN_WIPE_CHUNK_PERCENT)
7552 # we _must_ make this an int, otherwise rounding errors will
7554 wipe_chunk_size = int(wipe_chunk_size)
7556 lu.LogInfo("* Wiping disk %d", idx)
7557 logging.info("Wiping disk %d for instance %s, node %s using"
7558 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7563 start_time = time.time()
7565 while offset < size:
7566 wipe_size = min(wipe_chunk_size, size - offset)
7567 logging.debug("Wiping disk %d, offset %s, chunk %s",
7568 idx, offset, wipe_size)
7569 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7570 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7571 (idx, offset, wipe_size))
7574 if now - last_output >= 60:
7575 eta = _CalcEta(now - start_time, offset, size)
7576 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7577 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7580 logging.info("Resume sync of instance %s disks", instance.name)
7582 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7584 for idx, success in enumerate(result.payload):
7586 lu.LogWarning("Resume sync of disk %d failed, please have a"
7587 " look at the status and troubleshoot the issue", idx)
7588 logging.warn("resume-sync of instance %s for disks %d failed",
7592 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7593 """Create all disks for an instance.
7595 This abstracts away some work from AddInstance.
7597 @type lu: L{LogicalUnit}
7598 @param lu: the logical unit on whose behalf we execute
7599 @type instance: L{objects.Instance}
7600 @param instance: the instance whose disks we should create
7602 @param to_skip: list of indices to skip
7603 @type target_node: string
7604 @param target_node: if passed, overrides the target node for creation
7606 @return: the success of the creation
7609 info = _GetInstanceInfoText(instance)
7610 if target_node is None:
7611 pnode = instance.primary_node
7612 all_nodes = instance.all_nodes
7617 if instance.disk_template in constants.DTS_FILEBASED:
7618 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7619 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7621 result.Raise("Failed to create directory '%s' on"
7622 " node %s" % (file_storage_dir, pnode))
7624 # Note: this needs to be kept in sync with adding of disks in
7625 # LUInstanceSetParams
7626 for idx, device in enumerate(instance.disks):
7627 if to_skip and idx in to_skip:
7629 logging.info("Creating volume %s for instance %s",
7630 device.iv_name, instance.name)
7632 for node in all_nodes:
7633 f_create = node == pnode
7634 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7637 def _RemoveDisks(lu, instance, target_node=None):
7638 """Remove all disks for an instance.
7640 This abstracts away some work from `AddInstance()` and
7641 `RemoveInstance()`. Note that in case some of the devices couldn't
7642 be removed, the removal will continue with the other ones (compare
7643 with `_CreateDisks()`).
7645 @type lu: L{LogicalUnit}
7646 @param lu: the logical unit on whose behalf we execute
7647 @type instance: L{objects.Instance}
7648 @param instance: the instance whose disks we should remove
7649 @type target_node: string
7650 @param target_node: used to override the node on which to remove the disks
7652 @return: the success of the removal
7655 logging.info("Removing block devices for instance %s", instance.name)
7658 for device in instance.disks:
7660 edata = [(target_node, device)]
7662 edata = device.ComputeNodeTree(instance.primary_node)
7663 for node, disk in edata:
7664 lu.cfg.SetDiskID(disk, node)
7665 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7667 lu.LogWarning("Could not remove block device %s on node %s,"
7668 " continuing anyway: %s", device.iv_name, node, msg)
7671 if instance.disk_template == constants.DT_FILE:
7672 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7676 tgt = instance.primary_node
7677 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7679 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7680 file_storage_dir, instance.primary_node, result.fail_msg)
7686 def _ComputeDiskSizePerVG(disk_template, disks):
7687 """Compute disk size requirements in the volume group
7690 def _compute(disks, payload):
7691 """Universal algorithm.
7696 vgs[disk[constants.IDISK_VG]] = \
7697 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7701 # Required free disk space as a function of disk and swap space
7703 constants.DT_DISKLESS: {},
7704 constants.DT_PLAIN: _compute(disks, 0),
7705 # 128 MB are added for drbd metadata for each disk
7706 constants.DT_DRBD8: _compute(disks, 128),
7707 constants.DT_FILE: {},
7708 constants.DT_SHARED_FILE: {},
7711 if disk_template not in req_size_dict:
7712 raise errors.ProgrammerError("Disk template '%s' size requirement"
7713 " is unknown" % disk_template)
7715 return req_size_dict[disk_template]
7718 def _ComputeDiskSize(disk_template, disks):
7719 """Compute disk size requirements in the volume group
7722 # Required free disk space as a function of disk and swap space
7724 constants.DT_DISKLESS: None,
7725 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7726 # 128 MB are added for drbd metadata for each disk
7727 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7728 constants.DT_FILE: None,
7729 constants.DT_SHARED_FILE: 0,
7730 constants.DT_BLOCK: 0,
7733 if disk_template not in req_size_dict:
7734 raise errors.ProgrammerError("Disk template '%s' size requirement"
7735 " is unknown" % disk_template)
7737 return req_size_dict[disk_template]
7740 def _FilterVmNodes(lu, nodenames):
7741 """Filters out non-vm_capable nodes from a list.
7743 @type lu: L{LogicalUnit}
7744 @param lu: the logical unit for which we check
7745 @type nodenames: list
7746 @param nodenames: the list of nodes on which we should check
7748 @return: the list of vm-capable nodes
7751 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7752 return [name for name in nodenames if name not in vm_nodes]
7755 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7756 """Hypervisor parameter validation.
7758 This function abstract the hypervisor parameter validation to be
7759 used in both instance create and instance modify.
7761 @type lu: L{LogicalUnit}
7762 @param lu: the logical unit for which we check
7763 @type nodenames: list
7764 @param nodenames: the list of nodes on which we should check
7765 @type hvname: string
7766 @param hvname: the name of the hypervisor we should use
7767 @type hvparams: dict
7768 @param hvparams: the parameters which we need to check
7769 @raise errors.OpPrereqError: if the parameters are not valid
7772 nodenames = _FilterVmNodes(lu, nodenames)
7773 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7776 for node in nodenames:
7780 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7783 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7784 """OS parameters validation.
7786 @type lu: L{LogicalUnit}
7787 @param lu: the logical unit for which we check
7788 @type required: boolean
7789 @param required: whether the validation should fail if the OS is not
7791 @type nodenames: list
7792 @param nodenames: the list of nodes on which we should check
7793 @type osname: string
7794 @param osname: the name of the hypervisor we should use
7795 @type osparams: dict
7796 @param osparams: the parameters which we need to check
7797 @raise errors.OpPrereqError: if the parameters are not valid
7800 nodenames = _FilterVmNodes(lu, nodenames)
7801 result = lu.rpc.call_os_validate(required, nodenames, osname,
7802 [constants.OS_VALIDATE_PARAMETERS],
7804 for node, nres in result.items():
7805 # we don't check for offline cases since this should be run only
7806 # against the master node and/or an instance's nodes
7807 nres.Raise("OS Parameters validation failed on node %s" % node)
7808 if not nres.payload:
7809 lu.LogInfo("OS %s not found on node %s, validation skipped",
7813 class LUInstanceCreate(LogicalUnit):
7814 """Create an instance.
7817 HPATH = "instance-add"
7818 HTYPE = constants.HTYPE_INSTANCE
7821 def CheckArguments(self):
7825 # do not require name_check to ease forward/backward compatibility
7827 if self.op.no_install and self.op.start:
7828 self.LogInfo("No-installation mode selected, disabling startup")
7829 self.op.start = False
7830 # validate/normalize the instance name
7831 self.op.instance_name = \
7832 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7834 if self.op.ip_check and not self.op.name_check:
7835 # TODO: make the ip check more flexible and not depend on the name check
7836 raise errors.OpPrereqError("Cannot do IP address check without a name"
7837 " check", errors.ECODE_INVAL)
7839 # check nics' parameter names
7840 for nic in self.op.nics:
7841 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7843 # check disks. parameter names and consistent adopt/no-adopt strategy
7844 has_adopt = has_no_adopt = False
7845 for disk in self.op.disks:
7846 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7847 if constants.IDISK_ADOPT in disk:
7851 if has_adopt and has_no_adopt:
7852 raise errors.OpPrereqError("Either all disks are adopted or none is",
7855 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7856 raise errors.OpPrereqError("Disk adoption is not supported for the"
7857 " '%s' disk template" %
7858 self.op.disk_template,
7860 if self.op.iallocator is not None:
7861 raise errors.OpPrereqError("Disk adoption not allowed with an"
7862 " iallocator script", errors.ECODE_INVAL)
7863 if self.op.mode == constants.INSTANCE_IMPORT:
7864 raise errors.OpPrereqError("Disk adoption not allowed for"
7865 " instance import", errors.ECODE_INVAL)
7867 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7868 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7869 " but no 'adopt' parameter given" %
7870 self.op.disk_template,
7873 self.adopt_disks = has_adopt
7875 # instance name verification
7876 if self.op.name_check:
7877 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7878 self.op.instance_name = self.hostname1.name
7879 # used in CheckPrereq for ip ping check
7880 self.check_ip = self.hostname1.ip
7882 self.check_ip = None
7884 # file storage checks
7885 if (self.op.file_driver and
7886 not self.op.file_driver in constants.FILE_DRIVER):
7887 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7888 self.op.file_driver, errors.ECODE_INVAL)
7890 if self.op.disk_template == constants.DT_FILE:
7891 opcodes.RequireFileStorage()
7892 elif self.op.disk_template == constants.DT_SHARED_FILE:
7893 opcodes.RequireSharedFileStorage()
7895 ### Node/iallocator related checks
7896 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7898 if self.op.pnode is not None:
7899 if self.op.disk_template in constants.DTS_INT_MIRROR:
7900 if self.op.snode is None:
7901 raise errors.OpPrereqError("The networked disk templates need"
7902 " a mirror node", errors.ECODE_INVAL)
7904 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7906 self.op.snode = None
7908 self._cds = _GetClusterDomainSecret()
7910 if self.op.mode == constants.INSTANCE_IMPORT:
7911 # On import force_variant must be True, because if we forced it at
7912 # initial install, our only chance when importing it back is that it
7914 self.op.force_variant = True
7916 if self.op.no_install:
7917 self.LogInfo("No-installation mode has no effect during import")
7919 elif self.op.mode == constants.INSTANCE_CREATE:
7920 if self.op.os_type is None:
7921 raise errors.OpPrereqError("No guest OS specified",
7923 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7924 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7925 " installation" % self.op.os_type,
7927 if self.op.disk_template is None:
7928 raise errors.OpPrereqError("No disk template specified",
7931 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7932 # Check handshake to ensure both clusters have the same domain secret
7933 src_handshake = self.op.source_handshake
7934 if not src_handshake:
7935 raise errors.OpPrereqError("Missing source handshake",
7938 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7941 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7944 # Load and check source CA
7945 self.source_x509_ca_pem = self.op.source_x509_ca
7946 if not self.source_x509_ca_pem:
7947 raise errors.OpPrereqError("Missing source X509 CA",
7951 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7953 except OpenSSL.crypto.Error, err:
7954 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7955 (err, ), errors.ECODE_INVAL)
7957 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7958 if errcode is not None:
7959 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7962 self.source_x509_ca = cert
7964 src_instance_name = self.op.source_instance_name
7965 if not src_instance_name:
7966 raise errors.OpPrereqError("Missing source instance name",
7969 self.source_instance_name = \
7970 netutils.GetHostname(name=src_instance_name).name
7973 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7974 self.op.mode, errors.ECODE_INVAL)
7976 def ExpandNames(self):
7977 """ExpandNames for CreateInstance.
7979 Figure out the right locks for instance creation.
7982 self.needed_locks = {}
7984 instance_name = self.op.instance_name
7985 # this is just a preventive check, but someone might still add this
7986 # instance in the meantime, and creation will fail at lock-add time
7987 if instance_name in self.cfg.GetInstanceList():
7988 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7989 instance_name, errors.ECODE_EXISTS)
7991 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7993 if self.op.iallocator:
7994 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7996 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7997 nodelist = [self.op.pnode]
7998 if self.op.snode is not None:
7999 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8000 nodelist.append(self.op.snode)
8001 self.needed_locks[locking.LEVEL_NODE] = nodelist
8003 # in case of import lock the source node too
8004 if self.op.mode == constants.INSTANCE_IMPORT:
8005 src_node = self.op.src_node
8006 src_path = self.op.src_path
8008 if src_path is None:
8009 self.op.src_path = src_path = self.op.instance_name
8011 if src_node is None:
8012 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8013 self.op.src_node = None
8014 if os.path.isabs(src_path):
8015 raise errors.OpPrereqError("Importing an instance from an absolute"
8016 " path requires a source node option",
8019 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8020 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8021 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8022 if not os.path.isabs(src_path):
8023 self.op.src_path = src_path = \
8024 utils.PathJoin(constants.EXPORT_DIR, src_path)
8026 def _RunAllocator(self):
8027 """Run the allocator based on input opcode.
8030 nics = [n.ToDict() for n in self.nics]
8031 ial = IAllocator(self.cfg, self.rpc,
8032 mode=constants.IALLOCATOR_MODE_ALLOC,
8033 name=self.op.instance_name,
8034 disk_template=self.op.disk_template,
8037 vcpus=self.be_full[constants.BE_VCPUS],
8038 memory=self.be_full[constants.BE_MEMORY],
8041 hypervisor=self.op.hypervisor,
8044 ial.Run(self.op.iallocator)
8047 raise errors.OpPrereqError("Can't compute nodes using"
8048 " iallocator '%s': %s" %
8049 (self.op.iallocator, ial.info),
8051 if len(ial.result) != ial.required_nodes:
8052 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8053 " of nodes (%s), required %s" %
8054 (self.op.iallocator, len(ial.result),
8055 ial.required_nodes), errors.ECODE_FAULT)
8056 self.op.pnode = ial.result[0]
8057 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8058 self.op.instance_name, self.op.iallocator,
8059 utils.CommaJoin(ial.result))
8060 if ial.required_nodes == 2:
8061 self.op.snode = ial.result[1]
8063 def BuildHooksEnv(self):
8066 This runs on master, primary and secondary nodes of the instance.
8070 "ADD_MODE": self.op.mode,
8072 if self.op.mode == constants.INSTANCE_IMPORT:
8073 env["SRC_NODE"] = self.op.src_node
8074 env["SRC_PATH"] = self.op.src_path
8075 env["SRC_IMAGES"] = self.src_images
8077 env.update(_BuildInstanceHookEnv(
8078 name=self.op.instance_name,
8079 primary_node=self.op.pnode,
8080 secondary_nodes=self.secondaries,
8081 status=self.op.start,
8082 os_type=self.op.os_type,
8083 memory=self.be_full[constants.BE_MEMORY],
8084 vcpus=self.be_full[constants.BE_VCPUS],
8085 nics=_NICListToTuple(self, self.nics),
8086 disk_template=self.op.disk_template,
8087 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8088 for d in self.disks],
8091 hypervisor_name=self.op.hypervisor,
8097 def BuildHooksNodes(self):
8098 """Build hooks nodes.
8101 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8104 def _ReadExportInfo(self):
8105 """Reads the export information from disk.
8107 It will override the opcode source node and path with the actual
8108 information, if these two were not specified before.
8110 @return: the export information
8113 assert self.op.mode == constants.INSTANCE_IMPORT
8115 src_node = self.op.src_node
8116 src_path = self.op.src_path
8118 if src_node is None:
8119 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8120 exp_list = self.rpc.call_export_list(locked_nodes)
8122 for node in exp_list:
8123 if exp_list[node].fail_msg:
8125 if src_path in exp_list[node].payload:
8127 self.op.src_node = src_node = node
8128 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8132 raise errors.OpPrereqError("No export found for relative path %s" %
8133 src_path, errors.ECODE_INVAL)
8135 _CheckNodeOnline(self, src_node)
8136 result = self.rpc.call_export_info(src_node, src_path)
8137 result.Raise("No export or invalid export found in dir %s" % src_path)
8139 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8140 if not export_info.has_section(constants.INISECT_EXP):
8141 raise errors.ProgrammerError("Corrupted export config",
8142 errors.ECODE_ENVIRON)
8144 ei_version = export_info.get(constants.INISECT_EXP, "version")
8145 if (int(ei_version) != constants.EXPORT_VERSION):
8146 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8147 (ei_version, constants.EXPORT_VERSION),
8148 errors.ECODE_ENVIRON)
8151 def _ReadExportParams(self, einfo):
8152 """Use export parameters as defaults.
8154 In case the opcode doesn't specify (as in override) some instance
8155 parameters, then try to use them from the export information, if
8159 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8161 if self.op.disk_template is None:
8162 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8163 self.op.disk_template = einfo.get(constants.INISECT_INS,
8166 raise errors.OpPrereqError("No disk template specified and the export"
8167 " is missing the disk_template information",
8170 if not self.op.disks:
8171 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8173 # TODO: import the disk iv_name too
8174 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8175 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8176 disks.append({constants.IDISK_SIZE: disk_sz})
8177 self.op.disks = disks
8179 raise errors.OpPrereqError("No disk info specified and the export"
8180 " is missing the disk information",
8183 if (not self.op.nics and
8184 einfo.has_option(constants.INISECT_INS, "nic_count")):
8186 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8188 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8189 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8194 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8195 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8197 if (self.op.hypervisor is None and
8198 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8199 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8201 if einfo.has_section(constants.INISECT_HYP):
8202 # use the export parameters but do not override the ones
8203 # specified by the user
8204 for name, value in einfo.items(constants.INISECT_HYP):
8205 if name not in self.op.hvparams:
8206 self.op.hvparams[name] = value
8208 if einfo.has_section(constants.INISECT_BEP):
8209 # use the parameters, without overriding
8210 for name, value in einfo.items(constants.INISECT_BEP):
8211 if name not in self.op.beparams:
8212 self.op.beparams[name] = value
8214 # try to read the parameters old style, from the main section
8215 for name in constants.BES_PARAMETERS:
8216 if (name not in self.op.beparams and
8217 einfo.has_option(constants.INISECT_INS, name)):
8218 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8220 if einfo.has_section(constants.INISECT_OSP):
8221 # use the parameters, without overriding
8222 for name, value in einfo.items(constants.INISECT_OSP):
8223 if name not in self.op.osparams:
8224 self.op.osparams[name] = value
8226 def _RevertToDefaults(self, cluster):
8227 """Revert the instance parameters to the default values.
8231 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8232 for name in self.op.hvparams.keys():
8233 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8234 del self.op.hvparams[name]
8236 be_defs = cluster.SimpleFillBE({})
8237 for name in self.op.beparams.keys():
8238 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8239 del self.op.beparams[name]
8241 nic_defs = cluster.SimpleFillNIC({})
8242 for nic in self.op.nics:
8243 for name in constants.NICS_PARAMETERS:
8244 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8247 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8248 for name in self.op.osparams.keys():
8249 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8250 del self.op.osparams[name]
8252 def _CalculateFileStorageDir(self):
8253 """Calculate final instance file storage dir.
8256 # file storage dir calculation/check
8257 self.instance_file_storage_dir = None
8258 if self.op.disk_template in constants.DTS_FILEBASED:
8259 # build the full file storage dir path
8262 if self.op.disk_template == constants.DT_SHARED_FILE:
8263 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8265 get_fsd_fn = self.cfg.GetFileStorageDir
8267 cfg_storagedir = get_fsd_fn()
8268 if not cfg_storagedir:
8269 raise errors.OpPrereqError("Cluster file storage dir not defined")
8270 joinargs.append(cfg_storagedir)
8272 if self.op.file_storage_dir is not None:
8273 joinargs.append(self.op.file_storage_dir)
8275 joinargs.append(self.op.instance_name)
8277 # pylint: disable-msg=W0142
8278 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8280 def CheckPrereq(self):
8281 """Check prerequisites.
8284 self._CalculateFileStorageDir()
8286 if self.op.mode == constants.INSTANCE_IMPORT:
8287 export_info = self._ReadExportInfo()
8288 self._ReadExportParams(export_info)
8290 if (not self.cfg.GetVGName() and
8291 self.op.disk_template not in constants.DTS_NOT_LVM):
8292 raise errors.OpPrereqError("Cluster does not support lvm-based"
8293 " instances", errors.ECODE_STATE)
8295 if self.op.hypervisor is None:
8296 self.op.hypervisor = self.cfg.GetHypervisorType()
8298 cluster = self.cfg.GetClusterInfo()
8299 enabled_hvs = cluster.enabled_hypervisors
8300 if self.op.hypervisor not in enabled_hvs:
8301 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8302 " cluster (%s)" % (self.op.hypervisor,
8303 ",".join(enabled_hvs)),
8306 # Check tag validity
8307 for tag in self.op.tags:
8308 objects.TaggableObject.ValidateTag(tag)
8310 # check hypervisor parameter syntax (locally)
8311 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8312 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8314 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8315 hv_type.CheckParameterSyntax(filled_hvp)
8316 self.hv_full = filled_hvp
8317 # check that we don't specify global parameters on an instance
8318 _CheckGlobalHvParams(self.op.hvparams)
8320 # fill and remember the beparams dict
8321 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8322 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8324 # build os parameters
8325 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8327 # now that hvp/bep are in final format, let's reset to defaults,
8329 if self.op.identify_defaults:
8330 self._RevertToDefaults(cluster)
8334 for idx, nic in enumerate(self.op.nics):
8335 nic_mode_req = nic.get(constants.INIC_MODE, None)
8336 nic_mode = nic_mode_req
8337 if nic_mode is None:
8338 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8340 # in routed mode, for the first nic, the default ip is 'auto'
8341 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8342 default_ip_mode = constants.VALUE_AUTO
8344 default_ip_mode = constants.VALUE_NONE
8346 # ip validity checks
8347 ip = nic.get(constants.INIC_IP, default_ip_mode)
8348 if ip is None or ip.lower() == constants.VALUE_NONE:
8350 elif ip.lower() == constants.VALUE_AUTO:
8351 if not self.op.name_check:
8352 raise errors.OpPrereqError("IP address set to auto but name checks"
8353 " have been skipped",
8355 nic_ip = self.hostname1.ip
8357 if not netutils.IPAddress.IsValid(ip):
8358 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8362 # TODO: check the ip address for uniqueness
8363 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8364 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8367 # MAC address verification
8368 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8369 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8370 mac = utils.NormalizeAndValidateMac(mac)
8373 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8374 except errors.ReservationError:
8375 raise errors.OpPrereqError("MAC address %s already in use"
8376 " in cluster" % mac,
8377 errors.ECODE_NOTUNIQUE)
8379 # Build nic parameters
8380 link = nic.get(constants.INIC_LINK, None)
8383 nicparams[constants.NIC_MODE] = nic_mode_req
8385 nicparams[constants.NIC_LINK] = link
8387 check_params = cluster.SimpleFillNIC(nicparams)
8388 objects.NIC.CheckParameterSyntax(check_params)
8389 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8391 # disk checks/pre-build
8392 default_vg = self.cfg.GetVGName()
8394 for disk in self.op.disks:
8395 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8396 if mode not in constants.DISK_ACCESS_SET:
8397 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8398 mode, errors.ECODE_INVAL)
8399 size = disk.get(constants.IDISK_SIZE, None)
8401 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8404 except (TypeError, ValueError):
8405 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8408 data_vg = disk.get(constants.IDISK_VG, default_vg)
8410 constants.IDISK_SIZE: size,
8411 constants.IDISK_MODE: mode,
8412 constants.IDISK_VG: data_vg,
8413 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8415 if constants.IDISK_ADOPT in disk:
8416 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8417 self.disks.append(new_disk)
8419 if self.op.mode == constants.INSTANCE_IMPORT:
8421 # Check that the new instance doesn't have less disks than the export
8422 instance_disks = len(self.disks)
8423 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8424 if instance_disks < export_disks:
8425 raise errors.OpPrereqError("Not enough disks to import."
8426 " (instance: %d, export: %d)" %
8427 (instance_disks, export_disks),
8431 for idx in range(export_disks):
8432 option = 'disk%d_dump' % idx
8433 if export_info.has_option(constants.INISECT_INS, option):
8434 # FIXME: are the old os-es, disk sizes, etc. useful?
8435 export_name = export_info.get(constants.INISECT_INS, option)
8436 image = utils.PathJoin(self.op.src_path, export_name)
8437 disk_images.append(image)
8439 disk_images.append(False)
8441 self.src_images = disk_images
8443 old_name = export_info.get(constants.INISECT_INS, 'name')
8445 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8446 except (TypeError, ValueError), err:
8447 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8448 " an integer: %s" % str(err),
8450 if self.op.instance_name == old_name:
8451 for idx, nic in enumerate(self.nics):
8452 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8453 nic_mac_ini = 'nic%d_mac' % idx
8454 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8456 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8458 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8459 if self.op.ip_check:
8460 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8461 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8462 (self.check_ip, self.op.instance_name),
8463 errors.ECODE_NOTUNIQUE)
8465 #### mac address generation
8466 # By generating here the mac address both the allocator and the hooks get
8467 # the real final mac address rather than the 'auto' or 'generate' value.
8468 # There is a race condition between the generation and the instance object
8469 # creation, which means that we know the mac is valid now, but we're not
8470 # sure it will be when we actually add the instance. If things go bad
8471 # adding the instance will abort because of a duplicate mac, and the
8472 # creation job will fail.
8473 for nic in self.nics:
8474 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8475 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8479 if self.op.iallocator is not None:
8480 self._RunAllocator()
8482 #### node related checks
8484 # check primary node
8485 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8486 assert self.pnode is not None, \
8487 "Cannot retrieve locked node %s" % self.op.pnode
8489 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8490 pnode.name, errors.ECODE_STATE)
8492 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8493 pnode.name, errors.ECODE_STATE)
8494 if not pnode.vm_capable:
8495 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8496 " '%s'" % pnode.name, errors.ECODE_STATE)
8498 self.secondaries = []
8500 # mirror node verification
8501 if self.op.disk_template in constants.DTS_INT_MIRROR:
8502 if self.op.snode == pnode.name:
8503 raise errors.OpPrereqError("The secondary node cannot be the"
8504 " primary node", errors.ECODE_INVAL)
8505 _CheckNodeOnline(self, self.op.snode)
8506 _CheckNodeNotDrained(self, self.op.snode)
8507 _CheckNodeVmCapable(self, self.op.snode)
8508 self.secondaries.append(self.op.snode)
8510 nodenames = [pnode.name] + self.secondaries
8512 if not self.adopt_disks:
8513 # Check lv size requirements, if not adopting
8514 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8515 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8517 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8518 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8519 disk[constants.IDISK_ADOPT])
8520 for disk in self.disks])
8521 if len(all_lvs) != len(self.disks):
8522 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8524 for lv_name in all_lvs:
8526 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8527 # to ReserveLV uses the same syntax
8528 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8529 except errors.ReservationError:
8530 raise errors.OpPrereqError("LV named %s used by another instance" %
8531 lv_name, errors.ECODE_NOTUNIQUE)
8533 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8534 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8536 node_lvs = self.rpc.call_lv_list([pnode.name],
8537 vg_names.payload.keys())[pnode.name]
8538 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8539 node_lvs = node_lvs.payload
8541 delta = all_lvs.difference(node_lvs.keys())
8543 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8544 utils.CommaJoin(delta),
8546 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8548 raise errors.OpPrereqError("Online logical volumes found, cannot"
8549 " adopt: %s" % utils.CommaJoin(online_lvs),
8551 # update the size of disk based on what is found
8552 for dsk in self.disks:
8553 dsk[constants.IDISK_SIZE] = \
8554 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8555 dsk[constants.IDISK_ADOPT])][0]))
8557 elif self.op.disk_template == constants.DT_BLOCK:
8558 # Normalize and de-duplicate device paths
8559 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8560 for disk in self.disks])
8561 if len(all_disks) != len(self.disks):
8562 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8564 baddisks = [d for d in all_disks
8565 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8567 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8568 " cannot be adopted" %
8569 (", ".join(baddisks),
8570 constants.ADOPTABLE_BLOCKDEV_ROOT),
8573 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8574 list(all_disks))[pnode.name]
8575 node_disks.Raise("Cannot get block device information from node %s" %
8577 node_disks = node_disks.payload
8578 delta = all_disks.difference(node_disks.keys())
8580 raise errors.OpPrereqError("Missing block device(s): %s" %
8581 utils.CommaJoin(delta),
8583 for dsk in self.disks:
8584 dsk[constants.IDISK_SIZE] = \
8585 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8587 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8589 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8590 # check OS parameters (remotely)
8591 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8593 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8595 # memory check on primary node
8597 _CheckNodeFreeMemory(self, self.pnode.name,
8598 "creating instance %s" % self.op.instance_name,
8599 self.be_full[constants.BE_MEMORY],
8602 self.dry_run_result = list(nodenames)
8604 def Exec(self, feedback_fn):
8605 """Create and add the instance to the cluster.
8608 instance = self.op.instance_name
8609 pnode_name = self.pnode.name
8611 ht_kind = self.op.hypervisor
8612 if ht_kind in constants.HTS_REQ_PORT:
8613 network_port = self.cfg.AllocatePort()
8617 disks = _GenerateDiskTemplate(self,
8618 self.op.disk_template,
8619 instance, pnode_name,
8622 self.instance_file_storage_dir,
8623 self.op.file_driver,
8627 iobj = objects.Instance(name=instance, os=self.op.os_type,
8628 primary_node=pnode_name,
8629 nics=self.nics, disks=disks,
8630 disk_template=self.op.disk_template,
8632 network_port=network_port,
8633 beparams=self.op.beparams,
8634 hvparams=self.op.hvparams,
8635 hypervisor=self.op.hypervisor,
8636 osparams=self.op.osparams,
8640 for tag in self.op.tags:
8643 if self.adopt_disks:
8644 if self.op.disk_template == constants.DT_PLAIN:
8645 # rename LVs to the newly-generated names; we need to construct
8646 # 'fake' LV disks with the old data, plus the new unique_id
8647 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8649 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8650 rename_to.append(t_dsk.logical_id)
8651 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8652 self.cfg.SetDiskID(t_dsk, pnode_name)
8653 result = self.rpc.call_blockdev_rename(pnode_name,
8654 zip(tmp_disks, rename_to))
8655 result.Raise("Failed to rename adoped LVs")
8657 feedback_fn("* creating instance disks...")
8659 _CreateDisks(self, iobj)
8660 except errors.OpExecError:
8661 self.LogWarning("Device creation failed, reverting...")
8663 _RemoveDisks(self, iobj)
8665 self.cfg.ReleaseDRBDMinors(instance)
8668 feedback_fn("adding instance %s to cluster config" % instance)
8670 self.cfg.AddInstance(iobj, self.proc.GetECId())
8672 # Declare that we don't want to remove the instance lock anymore, as we've
8673 # added the instance to the config
8674 del self.remove_locks[locking.LEVEL_INSTANCE]
8676 if self.op.mode == constants.INSTANCE_IMPORT:
8677 # Release unused nodes
8678 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8681 _ReleaseLocks(self, locking.LEVEL_NODE)
8684 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8685 feedback_fn("* wiping instance disks...")
8687 _WipeDisks(self, iobj)
8688 except errors.OpExecError, err:
8689 logging.exception("Wiping disks failed")
8690 self.LogWarning("Wiping instance disks failed (%s)", err)
8694 # Something is already wrong with the disks, don't do anything else
8696 elif self.op.wait_for_sync:
8697 disk_abort = not _WaitForSync(self, iobj)
8698 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8699 # make sure the disks are not degraded (still sync-ing is ok)
8701 feedback_fn("* checking mirrors status")
8702 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8707 _RemoveDisks(self, iobj)
8708 self.cfg.RemoveInstance(iobj.name)
8709 # Make sure the instance lock gets removed
8710 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8711 raise errors.OpExecError("There are some degraded disks for"
8714 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8715 if self.op.mode == constants.INSTANCE_CREATE:
8716 if not self.op.no_install:
8717 feedback_fn("* running the instance OS create scripts...")
8718 # FIXME: pass debug option from opcode to backend
8719 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8720 self.op.debug_level)
8721 result.Raise("Could not add os for instance %s"
8722 " on node %s" % (instance, pnode_name))
8724 elif self.op.mode == constants.INSTANCE_IMPORT:
8725 feedback_fn("* running the instance OS import scripts...")
8729 for idx, image in enumerate(self.src_images):
8733 # FIXME: pass debug option from opcode to backend
8734 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8735 constants.IEIO_FILE, (image, ),
8736 constants.IEIO_SCRIPT,
8737 (iobj.disks[idx], idx),
8739 transfers.append(dt)
8742 masterd.instance.TransferInstanceData(self, feedback_fn,
8743 self.op.src_node, pnode_name,
8744 self.pnode.secondary_ip,
8746 if not compat.all(import_result):
8747 self.LogWarning("Some disks for instance %s on node %s were not"
8748 " imported successfully" % (instance, pnode_name))
8750 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8751 feedback_fn("* preparing remote import...")
8752 # The source cluster will stop the instance before attempting to make a
8753 # connection. In some cases stopping an instance can take a long time,
8754 # hence the shutdown timeout is added to the connection timeout.
8755 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8756 self.op.source_shutdown_timeout)
8757 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8759 assert iobj.primary_node == self.pnode.name
8761 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8762 self.source_x509_ca,
8763 self._cds, timeouts)
8764 if not compat.all(disk_results):
8765 # TODO: Should the instance still be started, even if some disks
8766 # failed to import (valid for local imports, too)?
8767 self.LogWarning("Some disks for instance %s on node %s were not"
8768 " imported successfully" % (instance, pnode_name))
8770 # Run rename script on newly imported instance
8771 assert iobj.name == instance
8772 feedback_fn("Running rename script for %s" % instance)
8773 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8774 self.source_instance_name,
8775 self.op.debug_level)
8777 self.LogWarning("Failed to run rename script for %s on node"
8778 " %s: %s" % (instance, pnode_name, result.fail_msg))
8781 # also checked in the prereq part
8782 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8786 iobj.admin_up = True
8787 self.cfg.Update(iobj, feedback_fn)
8788 logging.info("Starting instance %s on node %s", instance, pnode_name)
8789 feedback_fn("* starting instance...")
8790 result = self.rpc.call_instance_start(pnode_name, iobj,
8792 result.Raise("Could not start instance")
8794 return list(iobj.all_nodes)
8797 class LUInstanceConsole(NoHooksLU):
8798 """Connect to an instance's console.
8800 This is somewhat special in that it returns the command line that
8801 you need to run on the master node in order to connect to the
8807 def ExpandNames(self):
8808 self._ExpandAndLockInstance()
8810 def CheckPrereq(self):
8811 """Check prerequisites.
8813 This checks that the instance is in the cluster.
8816 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8817 assert self.instance is not None, \
8818 "Cannot retrieve locked instance %s" % self.op.instance_name
8819 _CheckNodeOnline(self, self.instance.primary_node)
8821 def Exec(self, feedback_fn):
8822 """Connect to the console of an instance
8825 instance = self.instance
8826 node = instance.primary_node
8828 node_insts = self.rpc.call_instance_list([node],
8829 [instance.hypervisor])[node]
8830 node_insts.Raise("Can't get node information from %s" % node)
8832 if instance.name not in node_insts.payload:
8833 if instance.admin_up:
8834 state = constants.INSTST_ERRORDOWN
8836 state = constants.INSTST_ADMINDOWN
8837 raise errors.OpExecError("Instance %s is not running (state %s)" %
8838 (instance.name, state))
8840 logging.debug("Connecting to console of %s on %s", instance.name, node)
8842 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8845 def _GetInstanceConsole(cluster, instance):
8846 """Returns console information for an instance.
8848 @type cluster: L{objects.Cluster}
8849 @type instance: L{objects.Instance}
8853 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8854 # beparams and hvparams are passed separately, to avoid editing the
8855 # instance and then saving the defaults in the instance itself.
8856 hvparams = cluster.FillHV(instance)
8857 beparams = cluster.FillBE(instance)
8858 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8860 assert console.instance == instance.name
8861 assert console.Validate()
8863 return console.ToDict()
8866 class LUInstanceReplaceDisks(LogicalUnit):
8867 """Replace the disks of an instance.
8870 HPATH = "mirrors-replace"
8871 HTYPE = constants.HTYPE_INSTANCE
8874 def CheckArguments(self):
8875 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8878 def ExpandNames(self):
8879 self._ExpandAndLockInstance()
8881 assert locking.LEVEL_NODE not in self.needed_locks
8882 assert locking.LEVEL_NODEGROUP not in self.needed_locks
8884 assert self.op.iallocator is None or self.op.remote_node is None, \
8885 "Conflicting options"
8887 if self.op.remote_node is not None:
8888 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8890 # Warning: do not remove the locking of the new secondary here
8891 # unless DRBD8.AddChildren is changed to work in parallel;
8892 # currently it doesn't since parallel invocations of
8893 # FindUnusedMinor will conflict
8894 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8895 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8897 self.needed_locks[locking.LEVEL_NODE] = []
8898 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8900 if self.op.iallocator is not None:
8901 # iallocator will select a new node in the same group
8902 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8904 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8905 self.op.iallocator, self.op.remote_node,
8906 self.op.disks, False, self.op.early_release)
8908 self.tasklets = [self.replacer]
8910 def DeclareLocks(self, level):
8911 if level == locking.LEVEL_NODEGROUP:
8912 assert self.op.remote_node is None
8913 assert self.op.iallocator is not None
8914 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8916 self.share_locks[locking.LEVEL_NODEGROUP] = 1
8917 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8918 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8920 elif level == locking.LEVEL_NODE:
8921 if self.op.iallocator is not None:
8922 assert self.op.remote_node is None
8923 assert not self.needed_locks[locking.LEVEL_NODE]
8925 # Lock member nodes of all locked groups
8926 self.needed_locks[locking.LEVEL_NODE] = [node_name
8927 for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8928 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8930 self._LockInstancesNodes()
8932 def BuildHooksEnv(self):
8935 This runs on the master, the primary and all the secondaries.
8938 instance = self.replacer.instance
8940 "MODE": self.op.mode,
8941 "NEW_SECONDARY": self.op.remote_node,
8942 "OLD_SECONDARY": instance.secondary_nodes[0],
8944 env.update(_BuildInstanceHookEnvByObject(self, instance))
8947 def BuildHooksNodes(self):
8948 """Build hooks nodes.
8951 instance = self.replacer.instance
8953 self.cfg.GetMasterNode(),
8954 instance.primary_node,
8956 if self.op.remote_node is not None:
8957 nl.append(self.op.remote_node)
8960 def CheckPrereq(self):
8961 """Check prerequisites.
8964 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8965 self.op.iallocator is None)
8967 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8969 groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8970 if owned_groups != groups:
8971 raise errors.OpExecError("Node groups used by instance '%s' changed"
8972 " since lock was acquired, current list is %r,"
8973 " used to be '%s'" %
8974 (self.op.instance_name,
8975 utils.CommaJoin(groups),
8976 utils.CommaJoin(owned_groups)))
8978 return LogicalUnit.CheckPrereq(self)
8981 class TLReplaceDisks(Tasklet):
8982 """Replaces disks for an instance.
8984 Note: Locking is not within the scope of this class.
8987 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8988 disks, delay_iallocator, early_release):
8989 """Initializes this class.
8992 Tasklet.__init__(self, lu)
8995 self.instance_name = instance_name
8997 self.iallocator_name = iallocator_name
8998 self.remote_node = remote_node
9000 self.delay_iallocator = delay_iallocator
9001 self.early_release = early_release
9004 self.instance = None
9005 self.new_node = None
9006 self.target_node = None
9007 self.other_node = None
9008 self.remote_node_info = None
9009 self.node_secondary_ip = None
9012 def CheckArguments(mode, remote_node, iallocator):
9013 """Helper function for users of this class.
9016 # check for valid parameter combination
9017 if mode == constants.REPLACE_DISK_CHG:
9018 if remote_node is None and iallocator is None:
9019 raise errors.OpPrereqError("When changing the secondary either an"
9020 " iallocator script must be used or the"
9021 " new node given", errors.ECODE_INVAL)
9023 if remote_node is not None and iallocator is not None:
9024 raise errors.OpPrereqError("Give either the iallocator or the new"
9025 " secondary, not both", errors.ECODE_INVAL)
9027 elif remote_node is not None or iallocator is not None:
9028 # Not replacing the secondary
9029 raise errors.OpPrereqError("The iallocator and new node options can"
9030 " only be used when changing the"
9031 " secondary node", errors.ECODE_INVAL)
9034 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9035 """Compute a new secondary node using an IAllocator.
9038 ial = IAllocator(lu.cfg, lu.rpc,
9039 mode=constants.IALLOCATOR_MODE_RELOC,
9041 relocate_from=relocate_from)
9043 ial.Run(iallocator_name)
9046 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9047 " %s" % (iallocator_name, ial.info),
9050 if len(ial.result) != ial.required_nodes:
9051 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9052 " of nodes (%s), required %s" %
9054 len(ial.result), ial.required_nodes),
9057 remote_node_name = ial.result[0]
9059 lu.LogInfo("Selected new secondary for instance '%s': %s",
9060 instance_name, remote_node_name)
9062 return remote_node_name
9064 def _FindFaultyDisks(self, node_name):
9065 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9068 def _CheckDisksActivated(self, instance):
9069 """Checks if the instance disks are activated.
9071 @param instance: The instance to check disks
9072 @return: True if they are activated, False otherwise
9075 nodes = instance.all_nodes
9077 for idx, dev in enumerate(instance.disks):
9079 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9080 self.cfg.SetDiskID(dev, node)
9082 result = self.rpc.call_blockdev_find(node, dev)
9086 elif result.fail_msg or not result.payload:
9091 def CheckPrereq(self):
9092 """Check prerequisites.
9094 This checks that the instance is in the cluster.
9097 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9098 assert instance is not None, \
9099 "Cannot retrieve locked instance %s" % self.instance_name
9101 if instance.disk_template != constants.DT_DRBD8:
9102 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9103 " instances", errors.ECODE_INVAL)
9105 if len(instance.secondary_nodes) != 1:
9106 raise errors.OpPrereqError("The instance has a strange layout,"
9107 " expected one secondary but found %d" %
9108 len(instance.secondary_nodes),
9111 if not self.delay_iallocator:
9112 self._CheckPrereq2()
9114 def _CheckPrereq2(self):
9115 """Check prerequisites, second part.
9117 This function should always be part of CheckPrereq. It was separated and is
9118 now called from Exec because during node evacuation iallocator was only
9119 called with an unmodified cluster model, not taking planned changes into
9123 instance = self.instance
9124 secondary_node = instance.secondary_nodes[0]
9126 if self.iallocator_name is None:
9127 remote_node = self.remote_node
9129 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9130 instance.name, instance.secondary_nodes)
9132 if remote_node is None:
9133 self.remote_node_info = None
9135 assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9136 "Remote node '%s' is not locked" % remote_node
9138 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9139 assert self.remote_node_info is not None, \
9140 "Cannot retrieve locked node %s" % remote_node
9142 if remote_node == self.instance.primary_node:
9143 raise errors.OpPrereqError("The specified node is the primary node of"
9144 " the instance", errors.ECODE_INVAL)
9146 if remote_node == secondary_node:
9147 raise errors.OpPrereqError("The specified node is already the"
9148 " secondary node of the instance",
9151 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9152 constants.REPLACE_DISK_CHG):
9153 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9156 if self.mode == constants.REPLACE_DISK_AUTO:
9157 if not self._CheckDisksActivated(instance):
9158 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9159 " first" % self.instance_name,
9161 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9162 faulty_secondary = self._FindFaultyDisks(secondary_node)
9164 if faulty_primary and faulty_secondary:
9165 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9166 " one node and can not be repaired"
9167 " automatically" % self.instance_name,
9171 self.disks = faulty_primary
9172 self.target_node = instance.primary_node
9173 self.other_node = secondary_node
9174 check_nodes = [self.target_node, self.other_node]
9175 elif faulty_secondary:
9176 self.disks = faulty_secondary
9177 self.target_node = secondary_node
9178 self.other_node = instance.primary_node
9179 check_nodes = [self.target_node, self.other_node]
9185 # Non-automatic modes
9186 if self.mode == constants.REPLACE_DISK_PRI:
9187 self.target_node = instance.primary_node
9188 self.other_node = secondary_node
9189 check_nodes = [self.target_node, self.other_node]
9191 elif self.mode == constants.REPLACE_DISK_SEC:
9192 self.target_node = secondary_node
9193 self.other_node = instance.primary_node
9194 check_nodes = [self.target_node, self.other_node]
9196 elif self.mode == constants.REPLACE_DISK_CHG:
9197 self.new_node = remote_node
9198 self.other_node = instance.primary_node
9199 self.target_node = secondary_node
9200 check_nodes = [self.new_node, self.other_node]
9202 _CheckNodeNotDrained(self.lu, remote_node)
9203 _CheckNodeVmCapable(self.lu, remote_node)
9205 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9206 assert old_node_info is not None
9207 if old_node_info.offline and not self.early_release:
9208 # doesn't make sense to delay the release
9209 self.early_release = True
9210 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9211 " early-release mode", secondary_node)
9214 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9217 # If not specified all disks should be replaced
9219 self.disks = range(len(self.instance.disks))
9221 for node in check_nodes:
9222 _CheckNodeOnline(self.lu, node)
9224 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9227 if node_name is not None)
9229 # Release unneeded node locks
9230 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9232 # Release any owned node group
9233 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9234 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9236 # Check whether disks are valid
9237 for disk_idx in self.disks:
9238 instance.FindDisk(disk_idx)
9240 # Get secondary node IP addresses
9241 self.node_secondary_ip = \
9242 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9243 for node_name in touched_nodes)
9245 def Exec(self, feedback_fn):
9246 """Execute disk replacement.
9248 This dispatches the disk replacement to the appropriate handler.
9251 if self.delay_iallocator:
9252 self._CheckPrereq2()
9255 # Verify owned locks before starting operation
9256 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9257 assert set(owned_locks) == set(self.node_secondary_ip), \
9258 ("Incorrect node locks, owning %s, expected %s" %
9259 (owned_locks, self.node_secondary_ip.keys()))
9261 owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9262 assert list(owned_locks) == [self.instance_name], \
9263 "Instance '%s' not locked" % self.instance_name
9265 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9266 "Should not own any node group lock at this point"
9269 feedback_fn("No disks need replacement")
9272 feedback_fn("Replacing disk(s) %s for %s" %
9273 (utils.CommaJoin(self.disks), self.instance.name))
9275 activate_disks = (not self.instance.admin_up)
9277 # Activate the instance disks if we're replacing them on a down instance
9279 _StartInstanceDisks(self.lu, self.instance, True)
9282 # Should we replace the secondary node?
9283 if self.new_node is not None:
9284 fn = self._ExecDrbd8Secondary
9286 fn = self._ExecDrbd8DiskOnly
9288 result = fn(feedback_fn)
9290 # Deactivate the instance disks if we're replacing them on a
9293 _SafeShutdownInstanceDisks(self.lu, self.instance)
9296 # Verify owned locks
9297 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9298 nodes = frozenset(self.node_secondary_ip)
9299 assert ((self.early_release and not owned_locks) or
9300 (not self.early_release and not (set(owned_locks) - nodes))), \
9301 ("Not owning the correct locks, early_release=%s, owned=%r,"
9302 " nodes=%r" % (self.early_release, owned_locks, nodes))
9306 def _CheckVolumeGroup(self, nodes):
9307 self.lu.LogInfo("Checking volume groups")
9309 vgname = self.cfg.GetVGName()
9311 # Make sure volume group exists on all involved nodes
9312 results = self.rpc.call_vg_list(nodes)
9314 raise errors.OpExecError("Can't list volume groups on the nodes")
9318 res.Raise("Error checking node %s" % node)
9319 if vgname not in res.payload:
9320 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9323 def _CheckDisksExistence(self, nodes):
9324 # Check disk existence
9325 for idx, dev in enumerate(self.instance.disks):
9326 if idx not in self.disks:
9330 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9331 self.cfg.SetDiskID(dev, node)
9333 result = self.rpc.call_blockdev_find(node, dev)
9335 msg = result.fail_msg
9336 if msg or not result.payload:
9338 msg = "disk not found"
9339 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9342 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9343 for idx, dev in enumerate(self.instance.disks):
9344 if idx not in self.disks:
9347 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9350 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9352 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9353 " replace disks for instance %s" %
9354 (node_name, self.instance.name))
9356 def _CreateNewStorage(self, node_name):
9357 """Create new storage on the primary or secondary node.
9359 This is only used for same-node replaces, not for changing the
9360 secondary node, hence we don't want to modify the existing disk.
9365 for idx, dev in enumerate(self.instance.disks):
9366 if idx not in self.disks:
9369 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9371 self.cfg.SetDiskID(dev, node_name)
9373 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9374 names = _GenerateUniqueNames(self.lu, lv_names)
9376 vg_data = dev.children[0].logical_id[0]
9377 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9378 logical_id=(vg_data, names[0]))
9379 vg_meta = dev.children[1].logical_id[0]
9380 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9381 logical_id=(vg_meta, names[1]))
9383 new_lvs = [lv_data, lv_meta]
9384 old_lvs = [child.Copy() for child in dev.children]
9385 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9387 # we pass force_create=True to force the LVM creation
9388 for new_lv in new_lvs:
9389 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9390 _GetInstanceInfoText(self.instance), False)
9394 def _CheckDevices(self, node_name, iv_names):
9395 for name, (dev, _, _) in iv_names.iteritems():
9396 self.cfg.SetDiskID(dev, node_name)
9398 result = self.rpc.call_blockdev_find(node_name, dev)
9400 msg = result.fail_msg
9401 if msg or not result.payload:
9403 msg = "disk not found"
9404 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9407 if result.payload.is_degraded:
9408 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9410 def _RemoveOldStorage(self, node_name, iv_names):
9411 for name, (_, old_lvs, _) in iv_names.iteritems():
9412 self.lu.LogInfo("Remove logical volumes for %s" % name)
9415 self.cfg.SetDiskID(lv, node_name)
9417 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9419 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9420 hint="remove unused LVs manually")
9422 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9423 """Replace a disk on the primary or secondary for DRBD 8.
9425 The algorithm for replace is quite complicated:
9427 1. for each disk to be replaced:
9429 1. create new LVs on the target node with unique names
9430 1. detach old LVs from the drbd device
9431 1. rename old LVs to name_replaced.<time_t>
9432 1. rename new LVs to old LVs
9433 1. attach the new LVs (with the old names now) to the drbd device
9435 1. wait for sync across all devices
9437 1. for each modified disk:
9439 1. remove old LVs (which have the name name_replaces.<time_t>)
9441 Failures are not very well handled.
9446 # Step: check device activation
9447 self.lu.LogStep(1, steps_total, "Check device existence")
9448 self._CheckDisksExistence([self.other_node, self.target_node])
9449 self._CheckVolumeGroup([self.target_node, self.other_node])
9451 # Step: check other node consistency
9452 self.lu.LogStep(2, steps_total, "Check peer consistency")
9453 self._CheckDisksConsistency(self.other_node,
9454 self.other_node == self.instance.primary_node,
9457 # Step: create new storage
9458 self.lu.LogStep(3, steps_total, "Allocate new storage")
9459 iv_names = self._CreateNewStorage(self.target_node)
9461 # Step: for each lv, detach+rename*2+attach
9462 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9463 for dev, old_lvs, new_lvs in iv_names.itervalues():
9464 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9466 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9468 result.Raise("Can't detach drbd from local storage on node"
9469 " %s for device %s" % (self.target_node, dev.iv_name))
9471 #cfg.Update(instance)
9473 # ok, we created the new LVs, so now we know we have the needed
9474 # storage; as such, we proceed on the target node to rename
9475 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9476 # using the assumption that logical_id == physical_id (which in
9477 # turn is the unique_id on that node)
9479 # FIXME(iustin): use a better name for the replaced LVs
9480 temp_suffix = int(time.time())
9481 ren_fn = lambda d, suff: (d.physical_id[0],
9482 d.physical_id[1] + "_replaced-%s" % suff)
9484 # Build the rename list based on what LVs exist on the node
9485 rename_old_to_new = []
9486 for to_ren in old_lvs:
9487 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9488 if not result.fail_msg and result.payload:
9490 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9492 self.lu.LogInfo("Renaming the old LVs on the target node")
9493 result = self.rpc.call_blockdev_rename(self.target_node,
9495 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9497 # Now we rename the new LVs to the old LVs
9498 self.lu.LogInfo("Renaming the new LVs on the target node")
9499 rename_new_to_old = [(new, old.physical_id)
9500 for old, new in zip(old_lvs, new_lvs)]
9501 result = self.rpc.call_blockdev_rename(self.target_node,
9503 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9505 # Intermediate steps of in memory modifications
9506 for old, new in zip(old_lvs, new_lvs):
9507 new.logical_id = old.logical_id
9508 self.cfg.SetDiskID(new, self.target_node)
9510 # We need to modify old_lvs so that removal later removes the
9511 # right LVs, not the newly added ones; note that old_lvs is a
9513 for disk in old_lvs:
9514 disk.logical_id = ren_fn(disk, temp_suffix)
9515 self.cfg.SetDiskID(disk, self.target_node)
9517 # Now that the new lvs have the old name, we can add them to the device
9518 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9519 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9521 msg = result.fail_msg
9523 for new_lv in new_lvs:
9524 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9527 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9528 hint=("cleanup manually the unused logical"
9530 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9533 if self.early_release:
9534 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9536 self._RemoveOldStorage(self.target_node, iv_names)
9537 # WARNING: we release both node locks here, do not do other RPCs
9538 # than WaitForSync to the primary node
9539 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9540 names=[self.target_node, self.other_node])
9543 # This can fail as the old devices are degraded and _WaitForSync
9544 # does a combined result over all disks, so we don't check its return value
9545 self.lu.LogStep(cstep, steps_total, "Sync devices")
9547 _WaitForSync(self.lu, self.instance)
9549 # Check all devices manually
9550 self._CheckDevices(self.instance.primary_node, iv_names)
9552 # Step: remove old storage
9553 if not self.early_release:
9554 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9556 self._RemoveOldStorage(self.target_node, iv_names)
9558 def _ExecDrbd8Secondary(self, feedback_fn):
9559 """Replace the secondary node for DRBD 8.
9561 The algorithm for replace is quite complicated:
9562 - for all disks of the instance:
9563 - create new LVs on the new node with same names
9564 - shutdown the drbd device on the old secondary
9565 - disconnect the drbd network on the primary
9566 - create the drbd device on the new secondary
9567 - network attach the drbd on the primary, using an artifice:
9568 the drbd code for Attach() will connect to the network if it
9569 finds a device which is connected to the good local disks but
9571 - wait for sync across all devices
9572 - remove all disks from the old secondary
9574 Failures are not very well handled.
9579 # Step: check device activation
9580 self.lu.LogStep(1, steps_total, "Check device existence")
9581 self._CheckDisksExistence([self.instance.primary_node])
9582 self._CheckVolumeGroup([self.instance.primary_node])
9584 # Step: check other node consistency
9585 self.lu.LogStep(2, steps_total, "Check peer consistency")
9586 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9588 # Step: create new storage
9589 self.lu.LogStep(3, steps_total, "Allocate new storage")
9590 for idx, dev in enumerate(self.instance.disks):
9591 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9592 (self.new_node, idx))
9593 # we pass force_create=True to force LVM creation
9594 for new_lv in dev.children:
9595 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9596 _GetInstanceInfoText(self.instance), False)
9598 # Step 4: dbrd minors and drbd setups changes
9599 # after this, we must manually remove the drbd minors on both the
9600 # error and the success paths
9601 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9602 minors = self.cfg.AllocateDRBDMinor([self.new_node
9603 for dev in self.instance.disks],
9605 logging.debug("Allocated minors %r", minors)
9608 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9609 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9610 (self.new_node, idx))
9611 # create new devices on new_node; note that we create two IDs:
9612 # one without port, so the drbd will be activated without
9613 # networking information on the new node at this stage, and one
9614 # with network, for the latter activation in step 4
9615 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9616 if self.instance.primary_node == o_node1:
9619 assert self.instance.primary_node == o_node2, "Three-node instance?"
9622 new_alone_id = (self.instance.primary_node, self.new_node, None,
9623 p_minor, new_minor, o_secret)
9624 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9625 p_minor, new_minor, o_secret)
9627 iv_names[idx] = (dev, dev.children, new_net_id)
9628 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9630 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9631 logical_id=new_alone_id,
9632 children=dev.children,
9635 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9636 _GetInstanceInfoText(self.instance), False)
9637 except errors.GenericError:
9638 self.cfg.ReleaseDRBDMinors(self.instance.name)
9641 # We have new devices, shutdown the drbd on the old secondary
9642 for idx, dev in enumerate(self.instance.disks):
9643 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9644 self.cfg.SetDiskID(dev, self.target_node)
9645 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9647 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9648 "node: %s" % (idx, msg),
9649 hint=("Please cleanup this device manually as"
9650 " soon as possible"))
9652 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9653 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9654 self.node_secondary_ip,
9655 self.instance.disks)\
9656 [self.instance.primary_node]
9658 msg = result.fail_msg
9660 # detaches didn't succeed (unlikely)
9661 self.cfg.ReleaseDRBDMinors(self.instance.name)
9662 raise errors.OpExecError("Can't detach the disks from the network on"
9663 " old node: %s" % (msg,))
9665 # if we managed to detach at least one, we update all the disks of
9666 # the instance to point to the new secondary
9667 self.lu.LogInfo("Updating instance configuration")
9668 for dev, _, new_logical_id in iv_names.itervalues():
9669 dev.logical_id = new_logical_id
9670 self.cfg.SetDiskID(dev, self.instance.primary_node)
9672 self.cfg.Update(self.instance, feedback_fn)
9674 # and now perform the drbd attach
9675 self.lu.LogInfo("Attaching primary drbds to new secondary"
9676 " (standalone => connected)")
9677 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9679 self.node_secondary_ip,
9680 self.instance.disks,
9683 for to_node, to_result in result.items():
9684 msg = to_result.fail_msg
9686 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9688 hint=("please do a gnt-instance info to see the"
9689 " status of disks"))
9691 if self.early_release:
9692 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9694 self._RemoveOldStorage(self.target_node, iv_names)
9695 # WARNING: we release all node locks here, do not do other RPCs
9696 # than WaitForSync to the primary node
9697 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9698 names=[self.instance.primary_node,
9703 # This can fail as the old devices are degraded and _WaitForSync
9704 # does a combined result over all disks, so we don't check its return value
9705 self.lu.LogStep(cstep, steps_total, "Sync devices")
9707 _WaitForSync(self.lu, self.instance)
9709 # Check all devices manually
9710 self._CheckDevices(self.instance.primary_node, iv_names)
9712 # Step: remove old storage
9713 if not self.early_release:
9714 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9715 self._RemoveOldStorage(self.target_node, iv_names)
9718 class LURepairNodeStorage(NoHooksLU):
9719 """Repairs the volume group on a node.
9724 def CheckArguments(self):
9725 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9727 storage_type = self.op.storage_type
9729 if (constants.SO_FIX_CONSISTENCY not in
9730 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9731 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9732 " repaired" % storage_type,
9735 def ExpandNames(self):
9736 self.needed_locks = {
9737 locking.LEVEL_NODE: [self.op.node_name],
9740 def _CheckFaultyDisks(self, instance, node_name):
9741 """Ensure faulty disks abort the opcode or at least warn."""
9743 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9745 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9746 " node '%s'" % (instance.name, node_name),
9748 except errors.OpPrereqError, err:
9749 if self.op.ignore_consistency:
9750 self.proc.LogWarning(str(err.args[0]))
9754 def CheckPrereq(self):
9755 """Check prerequisites.
9758 # Check whether any instance on this node has faulty disks
9759 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9760 if not inst.admin_up:
9762 check_nodes = set(inst.all_nodes)
9763 check_nodes.discard(self.op.node_name)
9764 for inst_node_name in check_nodes:
9765 self._CheckFaultyDisks(inst, inst_node_name)
9767 def Exec(self, feedback_fn):
9768 feedback_fn("Repairing storage unit '%s' on %s ..." %
9769 (self.op.name, self.op.node_name))
9771 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9772 result = self.rpc.call_storage_execute(self.op.node_name,
9773 self.op.storage_type, st_args,
9775 constants.SO_FIX_CONSISTENCY)
9776 result.Raise("Failed to repair storage unit '%s' on %s" %
9777 (self.op.name, self.op.node_name))
9780 class LUNodeEvacuate(NoHooksLU):
9781 """Evacuates instances off a list of nodes.
9786 def CheckArguments(self):
9787 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9789 def ExpandNames(self):
9790 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9792 if self.op.remote_node is not None:
9793 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9794 assert self.op.remote_node
9796 if self.op.remote_node == self.op.node_name:
9797 raise errors.OpPrereqError("Can not use evacuated node as a new"
9798 " secondary node", errors.ECODE_INVAL)
9800 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9801 raise errors.OpPrereqError("Without the use of an iallocator only"
9802 " secondary instances can be evacuated",
9806 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9807 self.needed_locks = {
9808 locking.LEVEL_INSTANCE: [],
9809 locking.LEVEL_NODEGROUP: [],
9810 locking.LEVEL_NODE: [],
9813 if self.op.remote_node is None:
9814 # Iallocator will choose any node(s) in the same group
9815 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9817 group_nodes = frozenset([self.op.remote_node])
9819 # Determine nodes to be locked
9820 self.lock_nodes = set([self.op.node_name]) | group_nodes
9822 def _DetermineInstances(self):
9823 """Builds list of instances to operate on.
9826 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9828 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9829 # Primary instances only
9830 inst_fn = _GetNodePrimaryInstances
9831 assert self.op.remote_node is None, \
9832 "Evacuating primary instances requires iallocator"
9833 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9834 # Secondary instances only
9835 inst_fn = _GetNodeSecondaryInstances
9838 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9839 inst_fn = _GetNodeInstances
9841 return inst_fn(self.cfg, self.op.node_name)
9843 def DeclareLocks(self, level):
9844 if level == locking.LEVEL_INSTANCE:
9845 # Lock instances optimistically, needs verification once node and group
9846 # locks have been acquired
9847 self.needed_locks[locking.LEVEL_INSTANCE] = \
9848 set(i.name for i in self._DetermineInstances())
9850 elif level == locking.LEVEL_NODEGROUP:
9851 # Lock node groups optimistically, needs verification once nodes have
9853 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9854 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9856 elif level == locking.LEVEL_NODE:
9857 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9859 def CheckPrereq(self):
9861 owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9862 owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9863 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9865 assert owned_nodes == self.lock_nodes
9867 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9868 if owned_groups != wanted_groups:
9869 raise errors.OpExecError("Node groups changed since locks were acquired,"
9870 " current groups are '%s', used to be '%s'" %
9871 (utils.CommaJoin(wanted_groups),
9872 utils.CommaJoin(owned_groups)))
9874 # Determine affected instances
9875 self.instances = self._DetermineInstances()
9876 self.instance_names = [i.name for i in self.instances]
9878 if set(self.instance_names) != owned_instances:
9879 raise errors.OpExecError("Instances on node '%s' changed since locks"
9880 " were acquired, current instances are '%s',"
9881 " used to be '%s'" %
9883 utils.CommaJoin(self.instance_names),
9884 utils.CommaJoin(owned_instances)))
9886 if self.instance_names:
9887 self.LogInfo("Evacuating instances from node '%s': %s",
9889 utils.CommaJoin(utils.NiceSort(self.instance_names)))
9891 self.LogInfo("No instances to evacuate from node '%s'",
9894 if self.op.remote_node is not None:
9895 for i in self.instances:
9896 if i.primary_node == self.op.remote_node:
9897 raise errors.OpPrereqError("Node %s is the primary node of"
9898 " instance %s, cannot use it as"
9900 (self.op.remote_node, i.name),
9903 def Exec(self, feedback_fn):
9904 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
9906 if not self.instance_names:
9907 # No instances to evacuate
9910 elif self.op.iallocator is not None:
9911 # TODO: Implement relocation to other group
9912 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
9913 evac_mode=self.op.mode,
9914 instances=list(self.instance_names))
9916 ial.Run(self.op.iallocator)
9919 raise errors.OpPrereqError("Can't compute node evacuation using"
9920 " iallocator '%s': %s" %
9921 (self.op.iallocator, ial.info),
9924 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
9926 elif self.op.remote_node is not None:
9927 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
9929 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
9930 remote_node=self.op.remote_node,
9932 mode=constants.REPLACE_DISK_CHG,
9933 early_release=self.op.early_release)]
9934 for instance_name in self.instance_names
9938 raise errors.ProgrammerError("No iallocator or remote node")
9940 return ResultWithJobs(jobs)
9943 def _SetOpEarlyRelease(early_release, op):
9944 """Sets C{early_release} flag on opcodes if available.
9948 op.early_release = early_release
9949 except AttributeError:
9950 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
9955 def _NodeEvacDest(use_nodes, group, nodes):
9956 """Returns group or nodes depending on caller's choice.
9960 return utils.CommaJoin(nodes)
9965 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
9966 """Unpacks the result of change-group and node-evacuate iallocator requests.
9968 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
9969 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
9971 @type lu: L{LogicalUnit}
9972 @param lu: Logical unit instance
9973 @type alloc_result: tuple/list
9974 @param alloc_result: Result from iallocator
9975 @type early_release: bool
9976 @param early_release: Whether to release locks early if possible
9977 @type use_nodes: bool
9978 @param use_nodes: Whether to display node names instead of groups
9981 (moved, failed, jobs) = alloc_result
9984 lu.LogWarning("Unable to evacuate instances %s",
9985 utils.CommaJoin("%s (%s)" % (name, reason)
9986 for (name, reason) in failed))
9989 lu.LogInfo("Instances to be moved: %s",
9990 utils.CommaJoin("%s (to %s)" %
9991 (name, _NodeEvacDest(use_nodes, group, nodes))
9992 for (name, group, nodes) in moved))
9994 return [map(compat.partial(_SetOpEarlyRelease, early_release),
9995 map(opcodes.OpCode.LoadOpCode, ops))
9999 class LUInstanceGrowDisk(LogicalUnit):
10000 """Grow a disk of an instance.
10003 HPATH = "disk-grow"
10004 HTYPE = constants.HTYPE_INSTANCE
10007 def ExpandNames(self):
10008 self._ExpandAndLockInstance()
10009 self.needed_locks[locking.LEVEL_NODE] = []
10010 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10012 def DeclareLocks(self, level):
10013 if level == locking.LEVEL_NODE:
10014 self._LockInstancesNodes()
10016 def BuildHooksEnv(self):
10017 """Build hooks env.
10019 This runs on the master, the primary and all the secondaries.
10023 "DISK": self.op.disk,
10024 "AMOUNT": self.op.amount,
10026 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10029 def BuildHooksNodes(self):
10030 """Build hooks nodes.
10033 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10036 def CheckPrereq(self):
10037 """Check prerequisites.
10039 This checks that the instance is in the cluster.
10042 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10043 assert instance is not None, \
10044 "Cannot retrieve locked instance %s" % self.op.instance_name
10045 nodenames = list(instance.all_nodes)
10046 for node in nodenames:
10047 _CheckNodeOnline(self, node)
10049 self.instance = instance
10051 if instance.disk_template not in constants.DTS_GROWABLE:
10052 raise errors.OpPrereqError("Instance's disk layout does not support"
10053 " growing", errors.ECODE_INVAL)
10055 self.disk = instance.FindDisk(self.op.disk)
10057 if instance.disk_template not in (constants.DT_FILE,
10058 constants.DT_SHARED_FILE):
10059 # TODO: check the free disk space for file, when that feature will be
10061 _CheckNodesFreeDiskPerVG(self, nodenames,
10062 self.disk.ComputeGrowth(self.op.amount))
10064 def Exec(self, feedback_fn):
10065 """Execute disk grow.
10068 instance = self.instance
10071 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10073 raise errors.OpExecError("Cannot activate block device to grow")
10075 # First run all grow ops in dry-run mode
10076 for node in instance.all_nodes:
10077 self.cfg.SetDiskID(disk, node)
10078 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10079 result.Raise("Grow request failed to node %s" % node)
10081 # We know that (as far as we can test) operations across different
10082 # nodes will succeed, time to run it for real
10083 for node in instance.all_nodes:
10084 self.cfg.SetDiskID(disk, node)
10085 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10086 result.Raise("Grow request failed to node %s" % node)
10088 # TODO: Rewrite code to work properly
10089 # DRBD goes into sync mode for a short amount of time after executing the
10090 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10091 # calling "resize" in sync mode fails. Sleeping for a short amount of
10092 # time is a work-around.
10095 disk.RecordGrow(self.op.amount)
10096 self.cfg.Update(instance, feedback_fn)
10097 if self.op.wait_for_sync:
10098 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10100 self.proc.LogWarning("Disk sync-ing has not returned a good"
10101 " status; please check the instance")
10102 if not instance.admin_up:
10103 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10104 elif not instance.admin_up:
10105 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10106 " not supposed to be running because no wait for"
10107 " sync mode was requested")
10110 class LUInstanceQueryData(NoHooksLU):
10111 """Query runtime instance data.
10116 def ExpandNames(self):
10117 self.needed_locks = {}
10119 # Use locking if requested or when non-static information is wanted
10120 if not (self.op.static or self.op.use_locking):
10121 self.LogWarning("Non-static data requested, locks need to be acquired")
10122 self.op.use_locking = True
10124 if self.op.instances or not self.op.use_locking:
10125 # Expand instance names right here
10126 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10128 # Will use acquired locks
10129 self.wanted_names = None
10131 if self.op.use_locking:
10132 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10134 if self.wanted_names is None:
10135 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10137 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10139 self.needed_locks[locking.LEVEL_NODE] = []
10140 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10141 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10143 def DeclareLocks(self, level):
10144 if self.op.use_locking and level == locking.LEVEL_NODE:
10145 self._LockInstancesNodes()
10147 def CheckPrereq(self):
10148 """Check prerequisites.
10150 This only checks the optional instance list against the existing names.
10153 if self.wanted_names is None:
10154 assert self.op.use_locking, "Locking was not used"
10155 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10157 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10158 for name in self.wanted_names]
10160 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10161 """Returns the status of a block device
10164 if self.op.static or not node:
10167 self.cfg.SetDiskID(dev, node)
10169 result = self.rpc.call_blockdev_find(node, dev)
10173 result.Raise("Can't compute disk status for %s" % instance_name)
10175 status = result.payload
10179 return (status.dev_path, status.major, status.minor,
10180 status.sync_percent, status.estimated_time,
10181 status.is_degraded, status.ldisk_status)
10183 def _ComputeDiskStatus(self, instance, snode, dev):
10184 """Compute block device status.
10187 if dev.dev_type in constants.LDS_DRBD:
10188 # we change the snode then (otherwise we use the one passed in)
10189 if dev.logical_id[0] == instance.primary_node:
10190 snode = dev.logical_id[1]
10192 snode = dev.logical_id[0]
10194 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10195 instance.name, dev)
10196 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10199 dev_children = [self._ComputeDiskStatus(instance, snode, child)
10200 for child in dev.children]
10205 "iv_name": dev.iv_name,
10206 "dev_type": dev.dev_type,
10207 "logical_id": dev.logical_id,
10208 "physical_id": dev.physical_id,
10209 "pstatus": dev_pstatus,
10210 "sstatus": dev_sstatus,
10211 "children": dev_children,
10216 def Exec(self, feedback_fn):
10217 """Gather and return data"""
10220 cluster = self.cfg.GetClusterInfo()
10222 for instance in self.wanted_instances:
10223 if not self.op.static:
10224 remote_info = self.rpc.call_instance_info(instance.primary_node,
10226 instance.hypervisor)
10227 remote_info.Raise("Error checking node %s" % instance.primary_node)
10228 remote_info = remote_info.payload
10229 if remote_info and "state" in remote_info:
10230 remote_state = "up"
10232 remote_state = "down"
10234 remote_state = None
10235 if instance.admin_up:
10236 config_state = "up"
10238 config_state = "down"
10240 disks = [self._ComputeDiskStatus(instance, None, device)
10241 for device in instance.disks]
10243 result[instance.name] = {
10244 "name": instance.name,
10245 "config_state": config_state,
10246 "run_state": remote_state,
10247 "pnode": instance.primary_node,
10248 "snodes": instance.secondary_nodes,
10250 # this happens to be the same format used for hooks
10251 "nics": _NICListToTuple(self, instance.nics),
10252 "disk_template": instance.disk_template,
10254 "hypervisor": instance.hypervisor,
10255 "network_port": instance.network_port,
10256 "hv_instance": instance.hvparams,
10257 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10258 "be_instance": instance.beparams,
10259 "be_actual": cluster.FillBE(instance),
10260 "os_instance": instance.osparams,
10261 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10262 "serial_no": instance.serial_no,
10263 "mtime": instance.mtime,
10264 "ctime": instance.ctime,
10265 "uuid": instance.uuid,
10271 class LUInstanceSetParams(LogicalUnit):
10272 """Modifies an instances's parameters.
10275 HPATH = "instance-modify"
10276 HTYPE = constants.HTYPE_INSTANCE
10279 def CheckArguments(self):
10280 if not (self.op.nics or self.op.disks or self.op.disk_template or
10281 self.op.hvparams or self.op.beparams or self.op.os_name):
10282 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10284 if self.op.hvparams:
10285 _CheckGlobalHvParams(self.op.hvparams)
10289 for disk_op, disk_dict in self.op.disks:
10290 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10291 if disk_op == constants.DDM_REMOVE:
10292 disk_addremove += 1
10294 elif disk_op == constants.DDM_ADD:
10295 disk_addremove += 1
10297 if not isinstance(disk_op, int):
10298 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10299 if not isinstance(disk_dict, dict):
10300 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10301 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10303 if disk_op == constants.DDM_ADD:
10304 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10305 if mode not in constants.DISK_ACCESS_SET:
10306 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10307 errors.ECODE_INVAL)
10308 size = disk_dict.get(constants.IDISK_SIZE, None)
10310 raise errors.OpPrereqError("Required disk parameter size missing",
10311 errors.ECODE_INVAL)
10314 except (TypeError, ValueError), err:
10315 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10316 str(err), errors.ECODE_INVAL)
10317 disk_dict[constants.IDISK_SIZE] = size
10319 # modification of disk
10320 if constants.IDISK_SIZE in disk_dict:
10321 raise errors.OpPrereqError("Disk size change not possible, use"
10322 " grow-disk", errors.ECODE_INVAL)
10324 if disk_addremove > 1:
10325 raise errors.OpPrereqError("Only one disk add or remove operation"
10326 " supported at a time", errors.ECODE_INVAL)
10328 if self.op.disks and self.op.disk_template is not None:
10329 raise errors.OpPrereqError("Disk template conversion and other disk"
10330 " changes not supported at the same time",
10331 errors.ECODE_INVAL)
10333 if (self.op.disk_template and
10334 self.op.disk_template in constants.DTS_INT_MIRROR and
10335 self.op.remote_node is None):
10336 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10337 " one requires specifying a secondary node",
10338 errors.ECODE_INVAL)
10342 for nic_op, nic_dict in self.op.nics:
10343 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10344 if nic_op == constants.DDM_REMOVE:
10347 elif nic_op == constants.DDM_ADD:
10350 if not isinstance(nic_op, int):
10351 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10352 if not isinstance(nic_dict, dict):
10353 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10354 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10356 # nic_dict should be a dict
10357 nic_ip = nic_dict.get(constants.INIC_IP, None)
10358 if nic_ip is not None:
10359 if nic_ip.lower() == constants.VALUE_NONE:
10360 nic_dict[constants.INIC_IP] = None
10362 if not netutils.IPAddress.IsValid(nic_ip):
10363 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10364 errors.ECODE_INVAL)
10366 nic_bridge = nic_dict.get('bridge', None)
10367 nic_link = nic_dict.get(constants.INIC_LINK, None)
10368 if nic_bridge and nic_link:
10369 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10370 " at the same time", errors.ECODE_INVAL)
10371 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10372 nic_dict['bridge'] = None
10373 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10374 nic_dict[constants.INIC_LINK] = None
10376 if nic_op == constants.DDM_ADD:
10377 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10378 if nic_mac is None:
10379 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10381 if constants.INIC_MAC in nic_dict:
10382 nic_mac = nic_dict[constants.INIC_MAC]
10383 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10384 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10386 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10387 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10388 " modifying an existing nic",
10389 errors.ECODE_INVAL)
10391 if nic_addremove > 1:
10392 raise errors.OpPrereqError("Only one NIC add or remove operation"
10393 " supported at a time", errors.ECODE_INVAL)
10395 def ExpandNames(self):
10396 self._ExpandAndLockInstance()
10397 self.needed_locks[locking.LEVEL_NODE] = []
10398 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10400 def DeclareLocks(self, level):
10401 if level == locking.LEVEL_NODE:
10402 self._LockInstancesNodes()
10403 if self.op.disk_template and self.op.remote_node:
10404 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10405 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10407 def BuildHooksEnv(self):
10408 """Build hooks env.
10410 This runs on the master, primary and secondaries.
10414 if constants.BE_MEMORY in self.be_new:
10415 args['memory'] = self.be_new[constants.BE_MEMORY]
10416 if constants.BE_VCPUS in self.be_new:
10417 args['vcpus'] = self.be_new[constants.BE_VCPUS]
10418 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10419 # information at all.
10422 nic_override = dict(self.op.nics)
10423 for idx, nic in enumerate(self.instance.nics):
10424 if idx in nic_override:
10425 this_nic_override = nic_override[idx]
10427 this_nic_override = {}
10428 if constants.INIC_IP in this_nic_override:
10429 ip = this_nic_override[constants.INIC_IP]
10432 if constants.INIC_MAC in this_nic_override:
10433 mac = this_nic_override[constants.INIC_MAC]
10436 if idx in self.nic_pnew:
10437 nicparams = self.nic_pnew[idx]
10439 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10440 mode = nicparams[constants.NIC_MODE]
10441 link = nicparams[constants.NIC_LINK]
10442 args['nics'].append((ip, mac, mode, link))
10443 if constants.DDM_ADD in nic_override:
10444 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10445 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10446 nicparams = self.nic_pnew[constants.DDM_ADD]
10447 mode = nicparams[constants.NIC_MODE]
10448 link = nicparams[constants.NIC_LINK]
10449 args['nics'].append((ip, mac, mode, link))
10450 elif constants.DDM_REMOVE in nic_override:
10451 del args['nics'][-1]
10453 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10454 if self.op.disk_template:
10455 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10459 def BuildHooksNodes(self):
10460 """Build hooks nodes.
10463 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10466 def CheckPrereq(self):
10467 """Check prerequisites.
10469 This only checks the instance list against the existing names.
10472 # checking the new params on the primary/secondary nodes
10474 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10475 cluster = self.cluster = self.cfg.GetClusterInfo()
10476 assert self.instance is not None, \
10477 "Cannot retrieve locked instance %s" % self.op.instance_name
10478 pnode = instance.primary_node
10479 nodelist = list(instance.all_nodes)
10482 if self.op.os_name and not self.op.force:
10483 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10484 self.op.force_variant)
10485 instance_os = self.op.os_name
10487 instance_os = instance.os
10489 if self.op.disk_template:
10490 if instance.disk_template == self.op.disk_template:
10491 raise errors.OpPrereqError("Instance already has disk template %s" %
10492 instance.disk_template, errors.ECODE_INVAL)
10494 if (instance.disk_template,
10495 self.op.disk_template) not in self._DISK_CONVERSIONS:
10496 raise errors.OpPrereqError("Unsupported disk template conversion from"
10497 " %s to %s" % (instance.disk_template,
10498 self.op.disk_template),
10499 errors.ECODE_INVAL)
10500 _CheckInstanceDown(self, instance, "cannot change disk template")
10501 if self.op.disk_template in constants.DTS_INT_MIRROR:
10502 if self.op.remote_node == pnode:
10503 raise errors.OpPrereqError("Given new secondary node %s is the same"
10504 " as the primary node of the instance" %
10505 self.op.remote_node, errors.ECODE_STATE)
10506 _CheckNodeOnline(self, self.op.remote_node)
10507 _CheckNodeNotDrained(self, self.op.remote_node)
10508 # FIXME: here we assume that the old instance type is DT_PLAIN
10509 assert instance.disk_template == constants.DT_PLAIN
10510 disks = [{constants.IDISK_SIZE: d.size,
10511 constants.IDISK_VG: d.logical_id[0]}
10512 for d in instance.disks]
10513 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10514 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10516 # hvparams processing
10517 if self.op.hvparams:
10518 hv_type = instance.hypervisor
10519 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10520 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10521 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10524 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10525 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10526 self.hv_new = hv_new # the new actual values
10527 self.hv_inst = i_hvdict # the new dict (without defaults)
10529 self.hv_new = self.hv_inst = {}
10531 # beparams processing
10532 if self.op.beparams:
10533 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10535 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10536 be_new = cluster.SimpleFillBE(i_bedict)
10537 self.be_new = be_new # the new actual values
10538 self.be_inst = i_bedict # the new dict (without defaults)
10540 self.be_new = self.be_inst = {}
10541 be_old = cluster.FillBE(instance)
10543 # osparams processing
10544 if self.op.osparams:
10545 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10546 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10547 self.os_inst = i_osdict # the new dict (without defaults)
10553 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10554 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10555 mem_check_list = [pnode]
10556 if be_new[constants.BE_AUTO_BALANCE]:
10557 # either we changed auto_balance to yes or it was from before
10558 mem_check_list.extend(instance.secondary_nodes)
10559 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10560 instance.hypervisor)
10561 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10562 instance.hypervisor)
10563 pninfo = nodeinfo[pnode]
10564 msg = pninfo.fail_msg
10566 # Assume the primary node is unreachable and go ahead
10567 self.warn.append("Can't get info from primary node %s: %s" %
10569 elif not isinstance(pninfo.payload.get('memory_free', None), int):
10570 self.warn.append("Node data from primary node %s doesn't contain"
10571 " free memory information" % pnode)
10572 elif instance_info.fail_msg:
10573 self.warn.append("Can't get instance runtime information: %s" %
10574 instance_info.fail_msg)
10576 if instance_info.payload:
10577 current_mem = int(instance_info.payload['memory'])
10579 # Assume instance not running
10580 # (there is a slight race condition here, but it's not very probable,
10581 # and we have no other way to check)
10583 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10584 pninfo.payload['memory_free'])
10586 raise errors.OpPrereqError("This change will prevent the instance"
10587 " from starting, due to %d MB of memory"
10588 " missing on its primary node" % miss_mem,
10589 errors.ECODE_NORES)
10591 if be_new[constants.BE_AUTO_BALANCE]:
10592 for node, nres in nodeinfo.items():
10593 if node not in instance.secondary_nodes:
10595 nres.Raise("Can't get info from secondary node %s" % node,
10596 prereq=True, ecode=errors.ECODE_STATE)
10597 if not isinstance(nres.payload.get('memory_free', None), int):
10598 raise errors.OpPrereqError("Secondary node %s didn't return free"
10599 " memory information" % node,
10600 errors.ECODE_STATE)
10601 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10602 raise errors.OpPrereqError("This change will prevent the instance"
10603 " from failover to its secondary node"
10604 " %s, due to not enough memory" % node,
10605 errors.ECODE_STATE)
10609 self.nic_pinst = {}
10610 for nic_op, nic_dict in self.op.nics:
10611 if nic_op == constants.DDM_REMOVE:
10612 if not instance.nics:
10613 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10614 errors.ECODE_INVAL)
10616 if nic_op != constants.DDM_ADD:
10618 if not instance.nics:
10619 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10620 " no NICs" % nic_op,
10621 errors.ECODE_INVAL)
10622 if nic_op < 0 or nic_op >= len(instance.nics):
10623 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10625 (nic_op, len(instance.nics) - 1),
10626 errors.ECODE_INVAL)
10627 old_nic_params = instance.nics[nic_op].nicparams
10628 old_nic_ip = instance.nics[nic_op].ip
10630 old_nic_params = {}
10633 update_params_dict = dict([(key, nic_dict[key])
10634 for key in constants.NICS_PARAMETERS
10635 if key in nic_dict])
10637 if 'bridge' in nic_dict:
10638 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10640 new_nic_params = _GetUpdatedParams(old_nic_params,
10641 update_params_dict)
10642 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10643 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10644 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10645 self.nic_pinst[nic_op] = new_nic_params
10646 self.nic_pnew[nic_op] = new_filled_nic_params
10647 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10649 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10650 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10651 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10653 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10655 self.warn.append(msg)
10657 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10658 if new_nic_mode == constants.NIC_MODE_ROUTED:
10659 if constants.INIC_IP in nic_dict:
10660 nic_ip = nic_dict[constants.INIC_IP]
10662 nic_ip = old_nic_ip
10664 raise errors.OpPrereqError('Cannot set the nic ip to None'
10665 ' on a routed nic', errors.ECODE_INVAL)
10666 if constants.INIC_MAC in nic_dict:
10667 nic_mac = nic_dict[constants.INIC_MAC]
10668 if nic_mac is None:
10669 raise errors.OpPrereqError('Cannot set the nic mac to None',
10670 errors.ECODE_INVAL)
10671 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10672 # otherwise generate the mac
10673 nic_dict[constants.INIC_MAC] = \
10674 self.cfg.GenerateMAC(self.proc.GetECId())
10676 # or validate/reserve the current one
10678 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10679 except errors.ReservationError:
10680 raise errors.OpPrereqError("MAC address %s already in use"
10681 " in cluster" % nic_mac,
10682 errors.ECODE_NOTUNIQUE)
10685 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10686 raise errors.OpPrereqError("Disk operations not supported for"
10687 " diskless instances",
10688 errors.ECODE_INVAL)
10689 for disk_op, _ in self.op.disks:
10690 if disk_op == constants.DDM_REMOVE:
10691 if len(instance.disks) == 1:
10692 raise errors.OpPrereqError("Cannot remove the last disk of"
10693 " an instance", errors.ECODE_INVAL)
10694 _CheckInstanceDown(self, instance, "cannot remove disks")
10696 if (disk_op == constants.DDM_ADD and
10697 len(instance.disks) >= constants.MAX_DISKS):
10698 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10699 " add more" % constants.MAX_DISKS,
10700 errors.ECODE_STATE)
10701 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10703 if disk_op < 0 or disk_op >= len(instance.disks):
10704 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10706 (disk_op, len(instance.disks)),
10707 errors.ECODE_INVAL)
10711 def _ConvertPlainToDrbd(self, feedback_fn):
10712 """Converts an instance from plain to drbd.
10715 feedback_fn("Converting template to drbd")
10716 instance = self.instance
10717 pnode = instance.primary_node
10718 snode = self.op.remote_node
10720 # create a fake disk info for _GenerateDiskTemplate
10721 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10722 constants.IDISK_VG: d.logical_id[0]}
10723 for d in instance.disks]
10724 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10725 instance.name, pnode, [snode],
10726 disk_info, None, None, 0, feedback_fn)
10727 info = _GetInstanceInfoText(instance)
10728 feedback_fn("Creating aditional volumes...")
10729 # first, create the missing data and meta devices
10730 for disk in new_disks:
10731 # unfortunately this is... not too nice
10732 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10734 for child in disk.children:
10735 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10736 # at this stage, all new LVs have been created, we can rename the
10738 feedback_fn("Renaming original volumes...")
10739 rename_list = [(o, n.children[0].logical_id)
10740 for (o, n) in zip(instance.disks, new_disks)]
10741 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10742 result.Raise("Failed to rename original LVs")
10744 feedback_fn("Initializing DRBD devices...")
10745 # all child devices are in place, we can now create the DRBD devices
10746 for disk in new_disks:
10747 for node in [pnode, snode]:
10748 f_create = node == pnode
10749 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10751 # at this point, the instance has been modified
10752 instance.disk_template = constants.DT_DRBD8
10753 instance.disks = new_disks
10754 self.cfg.Update(instance, feedback_fn)
10756 # disks are created, waiting for sync
10757 disk_abort = not _WaitForSync(self, instance,
10758 oneshot=not self.op.wait_for_sync)
10760 raise errors.OpExecError("There are some degraded disks for"
10761 " this instance, please cleanup manually")
10763 def _ConvertDrbdToPlain(self, feedback_fn):
10764 """Converts an instance from drbd to plain.
10767 instance = self.instance
10768 assert len(instance.secondary_nodes) == 1
10769 pnode = instance.primary_node
10770 snode = instance.secondary_nodes[0]
10771 feedback_fn("Converting template to plain")
10773 old_disks = instance.disks
10774 new_disks = [d.children[0] for d in old_disks]
10776 # copy over size and mode
10777 for parent, child in zip(old_disks, new_disks):
10778 child.size = parent.size
10779 child.mode = parent.mode
10781 # update instance structure
10782 instance.disks = new_disks
10783 instance.disk_template = constants.DT_PLAIN
10784 self.cfg.Update(instance, feedback_fn)
10786 feedback_fn("Removing volumes on the secondary node...")
10787 for disk in old_disks:
10788 self.cfg.SetDiskID(disk, snode)
10789 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10791 self.LogWarning("Could not remove block device %s on node %s,"
10792 " continuing anyway: %s", disk.iv_name, snode, msg)
10794 feedback_fn("Removing unneeded volumes on the primary node...")
10795 for idx, disk in enumerate(old_disks):
10796 meta = disk.children[1]
10797 self.cfg.SetDiskID(meta, pnode)
10798 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10800 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10801 " continuing anyway: %s", idx, pnode, msg)
10803 def Exec(self, feedback_fn):
10804 """Modifies an instance.
10806 All parameters take effect only at the next restart of the instance.
10809 # Process here the warnings from CheckPrereq, as we don't have a
10810 # feedback_fn there.
10811 for warn in self.warn:
10812 feedback_fn("WARNING: %s" % warn)
10815 instance = self.instance
10817 for disk_op, disk_dict in self.op.disks:
10818 if disk_op == constants.DDM_REMOVE:
10819 # remove the last disk
10820 device = instance.disks.pop()
10821 device_idx = len(instance.disks)
10822 for node, disk in device.ComputeNodeTree(instance.primary_node):
10823 self.cfg.SetDiskID(disk, node)
10824 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10826 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10827 " continuing anyway", device_idx, node, msg)
10828 result.append(("disk/%d" % device_idx, "remove"))
10829 elif disk_op == constants.DDM_ADD:
10831 if instance.disk_template in (constants.DT_FILE,
10832 constants.DT_SHARED_FILE):
10833 file_driver, file_path = instance.disks[0].logical_id
10834 file_path = os.path.dirname(file_path)
10836 file_driver = file_path = None
10837 disk_idx_base = len(instance.disks)
10838 new_disk = _GenerateDiskTemplate(self,
10839 instance.disk_template,
10840 instance.name, instance.primary_node,
10841 instance.secondary_nodes,
10845 disk_idx_base, feedback_fn)[0]
10846 instance.disks.append(new_disk)
10847 info = _GetInstanceInfoText(instance)
10849 logging.info("Creating volume %s for instance %s",
10850 new_disk.iv_name, instance.name)
10851 # Note: this needs to be kept in sync with _CreateDisks
10853 for node in instance.all_nodes:
10854 f_create = node == instance.primary_node
10856 _CreateBlockDev(self, node, instance, new_disk,
10857 f_create, info, f_create)
10858 except errors.OpExecError, err:
10859 self.LogWarning("Failed to create volume %s (%s) on"
10861 new_disk.iv_name, new_disk, node, err)
10862 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10863 (new_disk.size, new_disk.mode)))
10865 # change a given disk
10866 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10867 result.append(("disk.mode/%d" % disk_op,
10868 disk_dict[constants.IDISK_MODE]))
10870 if self.op.disk_template:
10871 r_shut = _ShutdownInstanceDisks(self, instance)
10873 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10874 " proceed with disk template conversion")
10875 mode = (instance.disk_template, self.op.disk_template)
10877 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10879 self.cfg.ReleaseDRBDMinors(instance.name)
10881 result.append(("disk_template", self.op.disk_template))
10884 for nic_op, nic_dict in self.op.nics:
10885 if nic_op == constants.DDM_REMOVE:
10886 # remove the last nic
10887 del instance.nics[-1]
10888 result.append(("nic.%d" % len(instance.nics), "remove"))
10889 elif nic_op == constants.DDM_ADD:
10890 # mac and bridge should be set, by now
10891 mac = nic_dict[constants.INIC_MAC]
10892 ip = nic_dict.get(constants.INIC_IP, None)
10893 nicparams = self.nic_pinst[constants.DDM_ADD]
10894 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10895 instance.nics.append(new_nic)
10896 result.append(("nic.%d" % (len(instance.nics) - 1),
10897 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10898 (new_nic.mac, new_nic.ip,
10899 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10900 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10903 for key in (constants.INIC_MAC, constants.INIC_IP):
10904 if key in nic_dict:
10905 setattr(instance.nics[nic_op], key, nic_dict[key])
10906 if nic_op in self.nic_pinst:
10907 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10908 for key, val in nic_dict.iteritems():
10909 result.append(("nic.%s/%d" % (key, nic_op), val))
10912 if self.op.hvparams:
10913 instance.hvparams = self.hv_inst
10914 for key, val in self.op.hvparams.iteritems():
10915 result.append(("hv/%s" % key, val))
10918 if self.op.beparams:
10919 instance.beparams = self.be_inst
10920 for key, val in self.op.beparams.iteritems():
10921 result.append(("be/%s" % key, val))
10924 if self.op.os_name:
10925 instance.os = self.op.os_name
10928 if self.op.osparams:
10929 instance.osparams = self.os_inst
10930 for key, val in self.op.osparams.iteritems():
10931 result.append(("os/%s" % key, val))
10933 self.cfg.Update(instance, feedback_fn)
10937 _DISK_CONVERSIONS = {
10938 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10939 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10943 class LUBackupQuery(NoHooksLU):
10944 """Query the exports list
10949 def ExpandNames(self):
10950 self.needed_locks = {}
10951 self.share_locks[locking.LEVEL_NODE] = 1
10952 if not self.op.nodes:
10953 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10955 self.needed_locks[locking.LEVEL_NODE] = \
10956 _GetWantedNodes(self, self.op.nodes)
10958 def Exec(self, feedback_fn):
10959 """Compute the list of all the exported system images.
10962 @return: a dictionary with the structure node->(export-list)
10963 where export-list is a list of the instances exported on
10967 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10968 rpcresult = self.rpc.call_export_list(self.nodes)
10970 for node in rpcresult:
10971 if rpcresult[node].fail_msg:
10972 result[node] = False
10974 result[node] = rpcresult[node].payload
10979 class LUBackupPrepare(NoHooksLU):
10980 """Prepares an instance for an export and returns useful information.
10985 def ExpandNames(self):
10986 self._ExpandAndLockInstance()
10988 def CheckPrereq(self):
10989 """Check prerequisites.
10992 instance_name = self.op.instance_name
10994 self.instance = self.cfg.GetInstanceInfo(instance_name)
10995 assert self.instance is not None, \
10996 "Cannot retrieve locked instance %s" % self.op.instance_name
10997 _CheckNodeOnline(self, self.instance.primary_node)
10999 self._cds = _GetClusterDomainSecret()
11001 def Exec(self, feedback_fn):
11002 """Prepares an instance for an export.
11005 instance = self.instance
11007 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11008 salt = utils.GenerateSecret(8)
11010 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11011 result = self.rpc.call_x509_cert_create(instance.primary_node,
11012 constants.RIE_CERT_VALIDITY)
11013 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11015 (name, cert_pem) = result.payload
11017 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11021 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11022 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11024 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11030 class LUBackupExport(LogicalUnit):
11031 """Export an instance to an image in the cluster.
11034 HPATH = "instance-export"
11035 HTYPE = constants.HTYPE_INSTANCE
11038 def CheckArguments(self):
11039 """Check the arguments.
11042 self.x509_key_name = self.op.x509_key_name
11043 self.dest_x509_ca_pem = self.op.destination_x509_ca
11045 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11046 if not self.x509_key_name:
11047 raise errors.OpPrereqError("Missing X509 key name for encryption",
11048 errors.ECODE_INVAL)
11050 if not self.dest_x509_ca_pem:
11051 raise errors.OpPrereqError("Missing destination X509 CA",
11052 errors.ECODE_INVAL)
11054 def ExpandNames(self):
11055 self._ExpandAndLockInstance()
11057 # Lock all nodes for local exports
11058 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11059 # FIXME: lock only instance primary and destination node
11061 # Sad but true, for now we have do lock all nodes, as we don't know where
11062 # the previous export might be, and in this LU we search for it and
11063 # remove it from its current node. In the future we could fix this by:
11064 # - making a tasklet to search (share-lock all), then create the
11065 # new one, then one to remove, after
11066 # - removing the removal operation altogether
11067 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11069 def DeclareLocks(self, level):
11070 """Last minute lock declaration."""
11071 # All nodes are locked anyway, so nothing to do here.
11073 def BuildHooksEnv(self):
11074 """Build hooks env.
11076 This will run on the master, primary node and target node.
11080 "EXPORT_MODE": self.op.mode,
11081 "EXPORT_NODE": self.op.target_node,
11082 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11083 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11084 # TODO: Generic function for boolean env variables
11085 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11088 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11092 def BuildHooksNodes(self):
11093 """Build hooks nodes.
11096 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11098 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11099 nl.append(self.op.target_node)
11103 def CheckPrereq(self):
11104 """Check prerequisites.
11106 This checks that the instance and node names are valid.
11109 instance_name = self.op.instance_name
11111 self.instance = self.cfg.GetInstanceInfo(instance_name)
11112 assert self.instance is not None, \
11113 "Cannot retrieve locked instance %s" % self.op.instance_name
11114 _CheckNodeOnline(self, self.instance.primary_node)
11116 if (self.op.remove_instance and self.instance.admin_up and
11117 not self.op.shutdown):
11118 raise errors.OpPrereqError("Can not remove instance without shutting it"
11121 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11122 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11123 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11124 assert self.dst_node is not None
11126 _CheckNodeOnline(self, self.dst_node.name)
11127 _CheckNodeNotDrained(self, self.dst_node.name)
11130 self.dest_disk_info = None
11131 self.dest_x509_ca = None
11133 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11134 self.dst_node = None
11136 if len(self.op.target_node) != len(self.instance.disks):
11137 raise errors.OpPrereqError(("Received destination information for %s"
11138 " disks, but instance %s has %s disks") %
11139 (len(self.op.target_node), instance_name,
11140 len(self.instance.disks)),
11141 errors.ECODE_INVAL)
11143 cds = _GetClusterDomainSecret()
11145 # Check X509 key name
11147 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11148 except (TypeError, ValueError), err:
11149 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11151 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11152 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11153 errors.ECODE_INVAL)
11155 # Load and verify CA
11157 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11158 except OpenSSL.crypto.Error, err:
11159 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11160 (err, ), errors.ECODE_INVAL)
11162 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11163 if errcode is not None:
11164 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11165 (msg, ), errors.ECODE_INVAL)
11167 self.dest_x509_ca = cert
11169 # Verify target information
11171 for idx, disk_data in enumerate(self.op.target_node):
11173 (host, port, magic) = \
11174 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11175 except errors.GenericError, err:
11176 raise errors.OpPrereqError("Target info for disk %s: %s" %
11177 (idx, err), errors.ECODE_INVAL)
11179 disk_info.append((host, port, magic))
11181 assert len(disk_info) == len(self.op.target_node)
11182 self.dest_disk_info = disk_info
11185 raise errors.ProgrammerError("Unhandled export mode %r" %
11188 # instance disk type verification
11189 # TODO: Implement export support for file-based disks
11190 for disk in self.instance.disks:
11191 if disk.dev_type == constants.LD_FILE:
11192 raise errors.OpPrereqError("Export not supported for instances with"
11193 " file-based disks", errors.ECODE_INVAL)
11195 def _CleanupExports(self, feedback_fn):
11196 """Removes exports of current instance from all other nodes.
11198 If an instance in a cluster with nodes A..D was exported to node C, its
11199 exports will be removed from the nodes A, B and D.
11202 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11204 nodelist = self.cfg.GetNodeList()
11205 nodelist.remove(self.dst_node.name)
11207 # on one-node clusters nodelist will be empty after the removal
11208 # if we proceed the backup would be removed because OpBackupQuery
11209 # substitutes an empty list with the full cluster node list.
11210 iname = self.instance.name
11212 feedback_fn("Removing old exports for instance %s" % iname)
11213 exportlist = self.rpc.call_export_list(nodelist)
11214 for node in exportlist:
11215 if exportlist[node].fail_msg:
11217 if iname in exportlist[node].payload:
11218 msg = self.rpc.call_export_remove(node, iname).fail_msg
11220 self.LogWarning("Could not remove older export for instance %s"
11221 " on node %s: %s", iname, node, msg)
11223 def Exec(self, feedback_fn):
11224 """Export an instance to an image in the cluster.
11227 assert self.op.mode in constants.EXPORT_MODES
11229 instance = self.instance
11230 src_node = instance.primary_node
11232 if self.op.shutdown:
11233 # shutdown the instance, but not the disks
11234 feedback_fn("Shutting down instance %s" % instance.name)
11235 result = self.rpc.call_instance_shutdown(src_node, instance,
11236 self.op.shutdown_timeout)
11237 # TODO: Maybe ignore failures if ignore_remove_failures is set
11238 result.Raise("Could not shutdown instance %s on"
11239 " node %s" % (instance.name, src_node))
11241 # set the disks ID correctly since call_instance_start needs the
11242 # correct drbd minor to create the symlinks
11243 for disk in instance.disks:
11244 self.cfg.SetDiskID(disk, src_node)
11246 activate_disks = (not instance.admin_up)
11249 # Activate the instance disks if we'exporting a stopped instance
11250 feedback_fn("Activating disks for %s" % instance.name)
11251 _StartInstanceDisks(self, instance, None)
11254 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11257 helper.CreateSnapshots()
11259 if (self.op.shutdown and instance.admin_up and
11260 not self.op.remove_instance):
11261 assert not activate_disks
11262 feedback_fn("Starting instance %s" % instance.name)
11263 result = self.rpc.call_instance_start(src_node, instance,
11265 msg = result.fail_msg
11267 feedback_fn("Failed to start instance: %s" % msg)
11268 _ShutdownInstanceDisks(self, instance)
11269 raise errors.OpExecError("Could not start instance: %s" % msg)
11271 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11272 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11273 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11274 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11275 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11277 (key_name, _, _) = self.x509_key_name
11280 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11283 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11284 key_name, dest_ca_pem,
11289 # Check for backwards compatibility
11290 assert len(dresults) == len(instance.disks)
11291 assert compat.all(isinstance(i, bool) for i in dresults), \
11292 "Not all results are boolean: %r" % dresults
11296 feedback_fn("Deactivating disks for %s" % instance.name)
11297 _ShutdownInstanceDisks(self, instance)
11299 if not (compat.all(dresults) and fin_resu):
11302 failures.append("export finalization")
11303 if not compat.all(dresults):
11304 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11306 failures.append("disk export: disk(s) %s" % fdsk)
11308 raise errors.OpExecError("Export failed, errors in %s" %
11309 utils.CommaJoin(failures))
11311 # At this point, the export was successful, we can cleanup/finish
11313 # Remove instance if requested
11314 if self.op.remove_instance:
11315 feedback_fn("Removing instance %s" % instance.name)
11316 _RemoveInstance(self, feedback_fn, instance,
11317 self.op.ignore_remove_failures)
11319 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11320 self._CleanupExports(feedback_fn)
11322 return fin_resu, dresults
11325 class LUBackupRemove(NoHooksLU):
11326 """Remove exports related to the named instance.
11331 def ExpandNames(self):
11332 self.needed_locks = {}
11333 # We need all nodes to be locked in order for RemoveExport to work, but we
11334 # don't need to lock the instance itself, as nothing will happen to it (and
11335 # we can remove exports also for a removed instance)
11336 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11338 def Exec(self, feedback_fn):
11339 """Remove any export.
11342 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11343 # If the instance was not found we'll try with the name that was passed in.
11344 # This will only work if it was an FQDN, though.
11346 if not instance_name:
11348 instance_name = self.op.instance_name
11350 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11351 exportlist = self.rpc.call_export_list(locked_nodes)
11353 for node in exportlist:
11354 msg = exportlist[node].fail_msg
11356 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11358 if instance_name in exportlist[node].payload:
11360 result = self.rpc.call_export_remove(node, instance_name)
11361 msg = result.fail_msg
11363 logging.error("Could not remove export for instance %s"
11364 " on node %s: %s", instance_name, node, msg)
11366 if fqdn_warn and not found:
11367 feedback_fn("Export not found. If trying to remove an export belonging"
11368 " to a deleted instance please use its Fully Qualified"
11372 class LUGroupAdd(LogicalUnit):
11373 """Logical unit for creating node groups.
11376 HPATH = "group-add"
11377 HTYPE = constants.HTYPE_GROUP
11380 def ExpandNames(self):
11381 # We need the new group's UUID here so that we can create and acquire the
11382 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11383 # that it should not check whether the UUID exists in the configuration.
11384 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11385 self.needed_locks = {}
11386 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11388 def CheckPrereq(self):
11389 """Check prerequisites.
11391 This checks that the given group name is not an existing node group
11396 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11397 except errors.OpPrereqError:
11400 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11401 " node group (UUID: %s)" %
11402 (self.op.group_name, existing_uuid),
11403 errors.ECODE_EXISTS)
11405 if self.op.ndparams:
11406 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11408 def BuildHooksEnv(self):
11409 """Build hooks env.
11413 "GROUP_NAME": self.op.group_name,
11416 def BuildHooksNodes(self):
11417 """Build hooks nodes.
11420 mn = self.cfg.GetMasterNode()
11421 return ([mn], [mn])
11423 def Exec(self, feedback_fn):
11424 """Add the node group to the cluster.
11427 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11428 uuid=self.group_uuid,
11429 alloc_policy=self.op.alloc_policy,
11430 ndparams=self.op.ndparams)
11432 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11433 del self.remove_locks[locking.LEVEL_NODEGROUP]
11436 class LUGroupAssignNodes(NoHooksLU):
11437 """Logical unit for assigning nodes to groups.
11442 def ExpandNames(self):
11443 # These raise errors.OpPrereqError on their own:
11444 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11445 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11447 # We want to lock all the affected nodes and groups. We have readily
11448 # available the list of nodes, and the *destination* group. To gather the
11449 # list of "source" groups, we need to fetch node information later on.
11450 self.needed_locks = {
11451 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11452 locking.LEVEL_NODE: self.op.nodes,
11455 def DeclareLocks(self, level):
11456 if level == locking.LEVEL_NODEGROUP:
11457 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11459 # Try to get all affected nodes' groups without having the group or node
11460 # lock yet. Needs verification later in the code flow.
11461 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11463 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11465 def CheckPrereq(self):
11466 """Check prerequisites.
11469 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11470 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11471 frozenset(self.op.nodes))
11473 expected_locks = (set([self.group_uuid]) |
11474 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11475 actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11476 if actual_locks != expected_locks:
11477 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11478 " current groups are '%s', used to be '%s'" %
11479 (utils.CommaJoin(expected_locks),
11480 utils.CommaJoin(actual_locks)))
11482 self.node_data = self.cfg.GetAllNodesInfo()
11483 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11484 instance_data = self.cfg.GetAllInstancesInfo()
11486 if self.group is None:
11487 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11488 (self.op.group_name, self.group_uuid))
11490 (new_splits, previous_splits) = \
11491 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11492 for node in self.op.nodes],
11493 self.node_data, instance_data)
11496 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11498 if not self.op.force:
11499 raise errors.OpExecError("The following instances get split by this"
11500 " change and --force was not given: %s" %
11503 self.LogWarning("This operation will split the following instances: %s",
11506 if previous_splits:
11507 self.LogWarning("In addition, these already-split instances continue"
11508 " to be split across groups: %s",
11509 utils.CommaJoin(utils.NiceSort(previous_splits)))
11511 def Exec(self, feedback_fn):
11512 """Assign nodes to a new group.
11515 for node in self.op.nodes:
11516 self.node_data[node].group = self.group_uuid
11518 # FIXME: Depends on side-effects of modifying the result of
11519 # C{cfg.GetAllNodesInfo}
11521 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11524 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11525 """Check for split instances after a node assignment.
11527 This method considers a series of node assignments as an atomic operation,
11528 and returns information about split instances after applying the set of
11531 In particular, it returns information about newly split instances, and
11532 instances that were already split, and remain so after the change.
11534 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11537 @type changes: list of (node_name, new_group_uuid) pairs.
11538 @param changes: list of node assignments to consider.
11539 @param node_data: a dict with data for all nodes
11540 @param instance_data: a dict with all instances to consider
11541 @rtype: a two-tuple
11542 @return: a list of instances that were previously okay and result split as a
11543 consequence of this change, and a list of instances that were previously
11544 split and this change does not fix.
11547 changed_nodes = dict((node, group) for node, group in changes
11548 if node_data[node].group != group)
11550 all_split_instances = set()
11551 previously_split_instances = set()
11553 def InstanceNodes(instance):
11554 return [instance.primary_node] + list(instance.secondary_nodes)
11556 for inst in instance_data.values():
11557 if inst.disk_template not in constants.DTS_INT_MIRROR:
11560 instance_nodes = InstanceNodes(inst)
11562 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11563 previously_split_instances.add(inst.name)
11565 if len(set(changed_nodes.get(node, node_data[node].group)
11566 for node in instance_nodes)) > 1:
11567 all_split_instances.add(inst.name)
11569 return (list(all_split_instances - previously_split_instances),
11570 list(previously_split_instances & all_split_instances))
11573 class _GroupQuery(_QueryBase):
11574 FIELDS = query.GROUP_FIELDS
11576 def ExpandNames(self, lu):
11577 lu.needed_locks = {}
11579 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11580 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11583 self.wanted = [name_to_uuid[name]
11584 for name in utils.NiceSort(name_to_uuid.keys())]
11586 # Accept names to be either names or UUIDs.
11589 all_uuid = frozenset(self._all_groups.keys())
11591 for name in self.names:
11592 if name in all_uuid:
11593 self.wanted.append(name)
11594 elif name in name_to_uuid:
11595 self.wanted.append(name_to_uuid[name])
11597 missing.append(name)
11600 raise errors.OpPrereqError("Some groups do not exist: %s" %
11601 utils.CommaJoin(missing),
11602 errors.ECODE_NOENT)
11604 def DeclareLocks(self, lu, level):
11607 def _GetQueryData(self, lu):
11608 """Computes the list of node groups and their attributes.
11611 do_nodes = query.GQ_NODE in self.requested_data
11612 do_instances = query.GQ_INST in self.requested_data
11614 group_to_nodes = None
11615 group_to_instances = None
11617 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11618 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11619 # latter GetAllInstancesInfo() is not enough, for we have to go through
11620 # instance->node. Hence, we will need to process nodes even if we only need
11621 # instance information.
11622 if do_nodes or do_instances:
11623 all_nodes = lu.cfg.GetAllNodesInfo()
11624 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11627 for node in all_nodes.values():
11628 if node.group in group_to_nodes:
11629 group_to_nodes[node.group].append(node.name)
11630 node_to_group[node.name] = node.group
11633 all_instances = lu.cfg.GetAllInstancesInfo()
11634 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11636 for instance in all_instances.values():
11637 node = instance.primary_node
11638 if node in node_to_group:
11639 group_to_instances[node_to_group[node]].append(instance.name)
11642 # Do not pass on node information if it was not requested.
11643 group_to_nodes = None
11645 return query.GroupQueryData([self._all_groups[uuid]
11646 for uuid in self.wanted],
11647 group_to_nodes, group_to_instances)
11650 class LUGroupQuery(NoHooksLU):
11651 """Logical unit for querying node groups.
11656 def CheckArguments(self):
11657 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11658 self.op.output_fields, False)
11660 def ExpandNames(self):
11661 self.gq.ExpandNames(self)
11663 def Exec(self, feedback_fn):
11664 return self.gq.OldStyleQuery(self)
11667 class LUGroupSetParams(LogicalUnit):
11668 """Modifies the parameters of a node group.
11671 HPATH = "group-modify"
11672 HTYPE = constants.HTYPE_GROUP
11675 def CheckArguments(self):
11678 self.op.alloc_policy,
11681 if all_changes.count(None) == len(all_changes):
11682 raise errors.OpPrereqError("Please pass at least one modification",
11683 errors.ECODE_INVAL)
11685 def ExpandNames(self):
11686 # This raises errors.OpPrereqError on its own:
11687 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11689 self.needed_locks = {
11690 locking.LEVEL_NODEGROUP: [self.group_uuid],
11693 def CheckPrereq(self):
11694 """Check prerequisites.
11697 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11699 if self.group is None:
11700 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11701 (self.op.group_name, self.group_uuid))
11703 if self.op.ndparams:
11704 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11705 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11706 self.new_ndparams = new_ndparams
11708 def BuildHooksEnv(self):
11709 """Build hooks env.
11713 "GROUP_NAME": self.op.group_name,
11714 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11717 def BuildHooksNodes(self):
11718 """Build hooks nodes.
11721 mn = self.cfg.GetMasterNode()
11722 return ([mn], [mn])
11724 def Exec(self, feedback_fn):
11725 """Modifies the node group.
11730 if self.op.ndparams:
11731 self.group.ndparams = self.new_ndparams
11732 result.append(("ndparams", str(self.group.ndparams)))
11734 if self.op.alloc_policy:
11735 self.group.alloc_policy = self.op.alloc_policy
11737 self.cfg.Update(self.group, feedback_fn)
11742 class LUGroupRemove(LogicalUnit):
11743 HPATH = "group-remove"
11744 HTYPE = constants.HTYPE_GROUP
11747 def ExpandNames(self):
11748 # This will raises errors.OpPrereqError on its own:
11749 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11750 self.needed_locks = {
11751 locking.LEVEL_NODEGROUP: [self.group_uuid],
11754 def CheckPrereq(self):
11755 """Check prerequisites.
11757 This checks that the given group name exists as a node group, that is
11758 empty (i.e., contains no nodes), and that is not the last group of the
11762 # Verify that the group is empty.
11763 group_nodes = [node.name
11764 for node in self.cfg.GetAllNodesInfo().values()
11765 if node.group == self.group_uuid]
11768 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11770 (self.op.group_name,
11771 utils.CommaJoin(utils.NiceSort(group_nodes))),
11772 errors.ECODE_STATE)
11774 # Verify the cluster would not be left group-less.
11775 if len(self.cfg.GetNodeGroupList()) == 1:
11776 raise errors.OpPrereqError("Group '%s' is the only group,"
11777 " cannot be removed" %
11778 self.op.group_name,
11779 errors.ECODE_STATE)
11781 def BuildHooksEnv(self):
11782 """Build hooks env.
11786 "GROUP_NAME": self.op.group_name,
11789 def BuildHooksNodes(self):
11790 """Build hooks nodes.
11793 mn = self.cfg.GetMasterNode()
11794 return ([mn], [mn])
11796 def Exec(self, feedback_fn):
11797 """Remove the node group.
11801 self.cfg.RemoveNodeGroup(self.group_uuid)
11802 except errors.ConfigurationError:
11803 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11804 (self.op.group_name, self.group_uuid))
11806 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11809 class LUGroupRename(LogicalUnit):
11810 HPATH = "group-rename"
11811 HTYPE = constants.HTYPE_GROUP
11814 def ExpandNames(self):
11815 # This raises errors.OpPrereqError on its own:
11816 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11818 self.needed_locks = {
11819 locking.LEVEL_NODEGROUP: [self.group_uuid],
11822 def CheckPrereq(self):
11823 """Check prerequisites.
11825 Ensures requested new name is not yet used.
11829 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11830 except errors.OpPrereqError:
11833 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11834 " node group (UUID: %s)" %
11835 (self.op.new_name, new_name_uuid),
11836 errors.ECODE_EXISTS)
11838 def BuildHooksEnv(self):
11839 """Build hooks env.
11843 "OLD_NAME": self.op.group_name,
11844 "NEW_NAME": self.op.new_name,
11847 def BuildHooksNodes(self):
11848 """Build hooks nodes.
11851 mn = self.cfg.GetMasterNode()
11853 all_nodes = self.cfg.GetAllNodesInfo()
11854 all_nodes.pop(mn, None)
11857 run_nodes.extend(node.name for node in all_nodes.values()
11858 if node.group == self.group_uuid)
11860 return (run_nodes, run_nodes)
11862 def Exec(self, feedback_fn):
11863 """Rename the node group.
11866 group = self.cfg.GetNodeGroup(self.group_uuid)
11869 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11870 (self.op.group_name, self.group_uuid))
11872 group.name = self.op.new_name
11873 self.cfg.Update(group, feedback_fn)
11875 return self.op.new_name
11878 class LUGroupEvacuate(LogicalUnit):
11879 HPATH = "group-evacuate"
11880 HTYPE = constants.HTYPE_GROUP
11883 def ExpandNames(self):
11884 # This raises errors.OpPrereqError on its own:
11885 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11887 if self.op.target_groups:
11888 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11889 self.op.target_groups)
11891 self.req_target_uuids = []
11893 if self.group_uuid in self.req_target_uuids:
11894 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
11895 " as a target group (targets are %s)" %
11897 utils.CommaJoin(self.req_target_uuids)),
11898 errors.ECODE_INVAL)
11900 if not self.op.iallocator:
11901 # Use default iallocator
11902 self.op.iallocator = self.cfg.GetDefaultIAllocator()
11904 if not self.op.iallocator:
11905 raise errors.OpPrereqError("No iallocator was specified, neither in the"
11906 " opcode nor as a cluster-wide default",
11907 errors.ECODE_INVAL)
11909 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11910 self.needed_locks = {
11911 locking.LEVEL_INSTANCE: [],
11912 locking.LEVEL_NODEGROUP: [],
11913 locking.LEVEL_NODE: [],
11916 def DeclareLocks(self, level):
11917 if level == locking.LEVEL_INSTANCE:
11918 assert not self.needed_locks[locking.LEVEL_INSTANCE]
11920 # Lock instances optimistically, needs verification once node and group
11921 # locks have been acquired
11922 self.needed_locks[locking.LEVEL_INSTANCE] = \
11923 self.cfg.GetNodeGroupInstances(self.group_uuid)
11925 elif level == locking.LEVEL_NODEGROUP:
11926 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11928 if self.req_target_uuids:
11929 lock_groups = set([self.group_uuid] + self.req_target_uuids)
11931 # Lock all groups used by instances optimistically; this requires going
11932 # via the node before it's locked, requiring verification later on
11933 lock_groups.update(group_uuid
11934 for instance_name in
11935 self.glm.list_owned(locking.LEVEL_INSTANCE)
11937 self.cfg.GetInstanceNodeGroups(instance_name))
11939 # No target groups, need to lock all of them
11940 lock_groups = locking.ALL_SET
11942 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11944 elif level == locking.LEVEL_NODE:
11945 # This will only lock the nodes in the group to be evacuated which
11946 # contain actual instances
11947 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11948 self._LockInstancesNodes()
11950 # Lock all nodes in group to be evacuated
11951 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
11952 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
11953 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11955 def CheckPrereq(self):
11956 owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
11957 owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
11958 owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
11960 assert owned_groups.issuperset(self.req_target_uuids)
11961 assert self.group_uuid in owned_groups
11963 # Check if locked instances are still correct
11964 wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
11965 if owned_instances != wanted_instances:
11966 raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
11967 " changed since locks were acquired, wanted"
11968 " %s, have %s; retry the operation" %
11970 utils.CommaJoin(wanted_instances),
11971 utils.CommaJoin(owned_instances)),
11972 errors.ECODE_STATE)
11974 # Get instance information
11975 self.instances = dict((name, self.cfg.GetInstanceInfo(name))
11976 for name in owned_instances)
11978 # Check if node groups for locked instances are still correct
11979 for instance_name in owned_instances:
11980 inst = self.instances[instance_name]
11981 assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
11982 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
11983 assert owned_nodes.issuperset(inst.all_nodes), \
11984 "Instance %s's nodes changed while we kept the lock" % instance_name
11986 inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
11987 if not owned_groups.issuperset(inst_groups):
11988 raise errors.OpPrereqError("Instance's node groups changed since locks"
11989 " were acquired, current groups are '%s',"
11990 " owning groups '%s'; retry the operation" %
11991 (utils.CommaJoin(inst_groups),
11992 utils.CommaJoin(owned_groups)),
11993 errors.ECODE_STATE)
11995 if self.req_target_uuids:
11996 # User requested specific target groups
11997 self.target_uuids = self.req_target_uuids
11999 # All groups except the one to be evacuated are potential targets
12000 self.target_uuids = [group_uuid for group_uuid in owned_groups
12001 if group_uuid != self.group_uuid]
12003 if not self.target_uuids:
12004 raise errors.OpExecError("There are no possible target groups")
12006 def BuildHooksEnv(self):
12007 """Build hooks env.
12011 "GROUP_NAME": self.op.group_name,
12012 "TARGET_GROUPS": " ".join(self.target_uuids),
12015 def BuildHooksNodes(self):
12016 """Build hooks nodes.
12019 mn = self.cfg.GetMasterNode()
12021 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12023 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12025 return (run_nodes, run_nodes)
12027 def Exec(self, feedback_fn):
12028 instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12030 assert self.group_uuid not in self.target_uuids
12032 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12033 instances=instances, target_groups=self.target_uuids)
12035 ial.Run(self.op.iallocator)
12037 if not ial.success:
12038 raise errors.OpPrereqError("Can't compute group evacuation using"
12039 " iallocator '%s': %s" %
12040 (self.op.iallocator, ial.info),
12041 errors.ECODE_NORES)
12043 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12045 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12046 len(jobs), self.op.group_name)
12048 return ResultWithJobs(jobs)
12051 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12052 """Generic tags LU.
12054 This is an abstract class which is the parent of all the other tags LUs.
12057 def ExpandNames(self):
12058 self.group_uuid = None
12059 self.needed_locks = {}
12060 if self.op.kind == constants.TAG_NODE:
12061 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12062 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12063 elif self.op.kind == constants.TAG_INSTANCE:
12064 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12065 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12066 elif self.op.kind == constants.TAG_NODEGROUP:
12067 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12069 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12070 # not possible to acquire the BGL based on opcode parameters)
12072 def CheckPrereq(self):
12073 """Check prerequisites.
12076 if self.op.kind == constants.TAG_CLUSTER:
12077 self.target = self.cfg.GetClusterInfo()
12078 elif self.op.kind == constants.TAG_NODE:
12079 self.target = self.cfg.GetNodeInfo(self.op.name)
12080 elif self.op.kind == constants.TAG_INSTANCE:
12081 self.target = self.cfg.GetInstanceInfo(self.op.name)
12082 elif self.op.kind == constants.TAG_NODEGROUP:
12083 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12085 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12086 str(self.op.kind), errors.ECODE_INVAL)
12089 class LUTagsGet(TagsLU):
12090 """Returns the tags of a given object.
12095 def ExpandNames(self):
12096 TagsLU.ExpandNames(self)
12098 # Share locks as this is only a read operation
12099 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
12101 def Exec(self, feedback_fn):
12102 """Returns the tag list.
12105 return list(self.target.GetTags())
12108 class LUTagsSearch(NoHooksLU):
12109 """Searches the tags for a given pattern.
12114 def ExpandNames(self):
12115 self.needed_locks = {}
12117 def CheckPrereq(self):
12118 """Check prerequisites.
12120 This checks the pattern passed for validity by compiling it.
12124 self.re = re.compile(self.op.pattern)
12125 except re.error, err:
12126 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12127 (self.op.pattern, err), errors.ECODE_INVAL)
12129 def Exec(self, feedback_fn):
12130 """Returns the tag list.
12134 tgts = [("/cluster", cfg.GetClusterInfo())]
12135 ilist = cfg.GetAllInstancesInfo().values()
12136 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12137 nlist = cfg.GetAllNodesInfo().values()
12138 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12139 tgts.extend(("/nodegroup/%s" % n.name, n)
12140 for n in cfg.GetAllNodeGroupsInfo().values())
12142 for path, target in tgts:
12143 for tag in target.GetTags():
12144 if self.re.search(tag):
12145 results.append((path, tag))
12149 class LUTagsSet(TagsLU):
12150 """Sets a tag on a given object.
12155 def CheckPrereq(self):
12156 """Check prerequisites.
12158 This checks the type and length of the tag name and value.
12161 TagsLU.CheckPrereq(self)
12162 for tag in self.op.tags:
12163 objects.TaggableObject.ValidateTag(tag)
12165 def Exec(self, feedback_fn):
12170 for tag in self.op.tags:
12171 self.target.AddTag(tag)
12172 except errors.TagError, err:
12173 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12174 self.cfg.Update(self.target, feedback_fn)
12177 class LUTagsDel(TagsLU):
12178 """Delete a list of tags from a given object.
12183 def CheckPrereq(self):
12184 """Check prerequisites.
12186 This checks that we have the given tag.
12189 TagsLU.CheckPrereq(self)
12190 for tag in self.op.tags:
12191 objects.TaggableObject.ValidateTag(tag)
12192 del_tags = frozenset(self.op.tags)
12193 cur_tags = self.target.GetTags()
12195 diff_tags = del_tags - cur_tags
12197 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12198 raise errors.OpPrereqError("Tag(s) %s not found" %
12199 (utils.CommaJoin(diff_names), ),
12200 errors.ECODE_NOENT)
12202 def Exec(self, feedback_fn):
12203 """Remove the tag from the object.
12206 for tag in self.op.tags:
12207 self.target.RemoveTag(tag)
12208 self.cfg.Update(self.target, feedback_fn)
12211 class LUTestDelay(NoHooksLU):
12212 """Sleep for a specified amount of time.
12214 This LU sleeps on the master and/or nodes for a specified amount of
12220 def ExpandNames(self):
12221 """Expand names and set required locks.
12223 This expands the node list, if any.
12226 self.needed_locks = {}
12227 if self.op.on_nodes:
12228 # _GetWantedNodes can be used here, but is not always appropriate to use
12229 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12230 # more information.
12231 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12232 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12234 def _TestDelay(self):
12235 """Do the actual sleep.
12238 if self.op.on_master:
12239 if not utils.TestDelay(self.op.duration):
12240 raise errors.OpExecError("Error during master delay test")
12241 if self.op.on_nodes:
12242 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12243 for node, node_result in result.items():
12244 node_result.Raise("Failure during rpc call to node %s" % node)
12246 def Exec(self, feedback_fn):
12247 """Execute the test delay opcode, with the wanted repetitions.
12250 if self.op.repeat == 0:
12253 top_value = self.op.repeat - 1
12254 for i in range(self.op.repeat):
12255 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12259 class LUTestJqueue(NoHooksLU):
12260 """Utility LU to test some aspects of the job queue.
12265 # Must be lower than default timeout for WaitForJobChange to see whether it
12266 # notices changed jobs
12267 _CLIENT_CONNECT_TIMEOUT = 20.0
12268 _CLIENT_CONFIRM_TIMEOUT = 60.0
12271 def _NotifyUsingSocket(cls, cb, errcls):
12272 """Opens a Unix socket and waits for another program to connect.
12275 @param cb: Callback to send socket name to client
12276 @type errcls: class
12277 @param errcls: Exception class to use for errors
12280 # Using a temporary directory as there's no easy way to create temporary
12281 # sockets without writing a custom loop around tempfile.mktemp and
12283 tmpdir = tempfile.mkdtemp()
12285 tmpsock = utils.PathJoin(tmpdir, "sock")
12287 logging.debug("Creating temporary socket at %s", tmpsock)
12288 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12293 # Send details to client
12296 # Wait for client to connect before continuing
12297 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12299 (conn, _) = sock.accept()
12300 except socket.error, err:
12301 raise errcls("Client didn't connect in time (%s)" % err)
12305 # Remove as soon as client is connected
12306 shutil.rmtree(tmpdir)
12308 # Wait for client to close
12311 # pylint: disable-msg=E1101
12312 # Instance of '_socketobject' has no ... member
12313 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12315 except socket.error, err:
12316 raise errcls("Client failed to confirm notification (%s)" % err)
12320 def _SendNotification(self, test, arg, sockname):
12321 """Sends a notification to the client.
12324 @param test: Test name
12325 @param arg: Test argument (depends on test)
12326 @type sockname: string
12327 @param sockname: Socket path
12330 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12332 def _Notify(self, prereq, test, arg):
12333 """Notifies the client of a test.
12336 @param prereq: Whether this is a prereq-phase test
12338 @param test: Test name
12339 @param arg: Test argument (depends on test)
12343 errcls = errors.OpPrereqError
12345 errcls = errors.OpExecError
12347 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12351 def CheckArguments(self):
12352 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12353 self.expandnames_calls = 0
12355 def ExpandNames(self):
12356 checkargs_calls = getattr(self, "checkargs_calls", 0)
12357 if checkargs_calls < 1:
12358 raise errors.ProgrammerError("CheckArguments was not called")
12360 self.expandnames_calls += 1
12362 if self.op.notify_waitlock:
12363 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12365 self.LogInfo("Expanding names")
12367 # Get lock on master node (just to get a lock, not for a particular reason)
12368 self.needed_locks = {
12369 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12372 def Exec(self, feedback_fn):
12373 if self.expandnames_calls < 1:
12374 raise errors.ProgrammerError("ExpandNames was not called")
12376 if self.op.notify_exec:
12377 self._Notify(False, constants.JQT_EXEC, None)
12379 self.LogInfo("Executing")
12381 if self.op.log_messages:
12382 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12383 for idx, msg in enumerate(self.op.log_messages):
12384 self.LogInfo("Sending log message %s", idx + 1)
12385 feedback_fn(constants.JQT_MSGPREFIX + msg)
12386 # Report how many test messages have been sent
12387 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12390 raise errors.OpExecError("Opcode failure was requested")
12395 class IAllocator(object):
12396 """IAllocator framework.
12398 An IAllocator instance has three sets of attributes:
12399 - cfg that is needed to query the cluster
12400 - input data (all members of the _KEYS class attribute are required)
12401 - four buffer attributes (in|out_data|text), that represent the
12402 input (to the external script) in text and data structure format,
12403 and the output from it, again in two formats
12404 - the result variables from the script (success, info, nodes) for
12408 # pylint: disable-msg=R0902
12409 # lots of instance attributes
12411 def __init__(self, cfg, rpc, mode, **kwargs):
12414 # init buffer variables
12415 self.in_text = self.out_text = self.in_data = self.out_data = None
12416 # init all input fields so that pylint is happy
12418 self.memory = self.disks = self.disk_template = None
12419 self.os = self.tags = self.nics = self.vcpus = None
12420 self.hypervisor = None
12421 self.relocate_from = None
12423 self.evac_nodes = None
12424 self.instances = None
12425 self.evac_mode = None
12426 self.target_groups = []
12428 self.required_nodes = None
12429 # init result fields
12430 self.success = self.info = self.result = None
12433 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12435 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12436 " IAllocator" % self.mode)
12438 keyset = [n for (n, _) in keydata]
12441 if key not in keyset:
12442 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12443 " IAllocator" % key)
12444 setattr(self, key, kwargs[key])
12447 if key not in kwargs:
12448 raise errors.ProgrammerError("Missing input parameter '%s' to"
12449 " IAllocator" % key)
12450 self._BuildInputData(compat.partial(fn, self), keydata)
12452 def _ComputeClusterData(self):
12453 """Compute the generic allocator input data.
12455 This is the data that is independent of the actual operation.
12459 cluster_info = cfg.GetClusterInfo()
12462 "version": constants.IALLOCATOR_VERSION,
12463 "cluster_name": cfg.GetClusterName(),
12464 "cluster_tags": list(cluster_info.GetTags()),
12465 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12466 # we don't have job IDs
12468 ninfo = cfg.GetAllNodesInfo()
12469 iinfo = cfg.GetAllInstancesInfo().values()
12470 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12473 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12475 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12476 hypervisor_name = self.hypervisor
12477 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12478 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12480 hypervisor_name = cluster_info.enabled_hypervisors[0]
12482 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12485 self.rpc.call_all_instances_info(node_list,
12486 cluster_info.enabled_hypervisors)
12488 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12490 config_ndata = self._ComputeBasicNodeData(ninfo)
12491 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12492 i_list, config_ndata)
12493 assert len(data["nodes"]) == len(ninfo), \
12494 "Incomplete node data computed"
12496 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12498 self.in_data = data
12501 def _ComputeNodeGroupData(cfg):
12502 """Compute node groups data.
12505 ng = dict((guuid, {
12506 "name": gdata.name,
12507 "alloc_policy": gdata.alloc_policy,
12509 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12514 def _ComputeBasicNodeData(node_cfg):
12515 """Compute global node data.
12518 @returns: a dict of name: (node dict, node config)
12521 # fill in static (config-based) values
12522 node_results = dict((ninfo.name, {
12523 "tags": list(ninfo.GetTags()),
12524 "primary_ip": ninfo.primary_ip,
12525 "secondary_ip": ninfo.secondary_ip,
12526 "offline": ninfo.offline,
12527 "drained": ninfo.drained,
12528 "master_candidate": ninfo.master_candidate,
12529 "group": ninfo.group,
12530 "master_capable": ninfo.master_capable,
12531 "vm_capable": ninfo.vm_capable,
12533 for ninfo in node_cfg.values())
12535 return node_results
12538 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12540 """Compute global node data.
12542 @param node_results: the basic node structures as filled from the config
12545 # make a copy of the current dict
12546 node_results = dict(node_results)
12547 for nname, nresult in node_data.items():
12548 assert nname in node_results, "Missing basic data for node %s" % nname
12549 ninfo = node_cfg[nname]
12551 if not (ninfo.offline or ninfo.drained):
12552 nresult.Raise("Can't get data for node %s" % nname)
12553 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12555 remote_info = nresult.payload
12557 for attr in ['memory_total', 'memory_free', 'memory_dom0',
12558 'vg_size', 'vg_free', 'cpu_total']:
12559 if attr not in remote_info:
12560 raise errors.OpExecError("Node '%s' didn't return attribute"
12561 " '%s'" % (nname, attr))
12562 if not isinstance(remote_info[attr], int):
12563 raise errors.OpExecError("Node '%s' returned invalid value"
12565 (nname, attr, remote_info[attr]))
12566 # compute memory used by primary instances
12567 i_p_mem = i_p_up_mem = 0
12568 for iinfo, beinfo in i_list:
12569 if iinfo.primary_node == nname:
12570 i_p_mem += beinfo[constants.BE_MEMORY]
12571 if iinfo.name not in node_iinfo[nname].payload:
12574 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12575 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12576 remote_info['memory_free'] -= max(0, i_mem_diff)
12579 i_p_up_mem += beinfo[constants.BE_MEMORY]
12581 # compute memory used by instances
12583 "total_memory": remote_info['memory_total'],
12584 "reserved_memory": remote_info['memory_dom0'],
12585 "free_memory": remote_info['memory_free'],
12586 "total_disk": remote_info['vg_size'],
12587 "free_disk": remote_info['vg_free'],
12588 "total_cpus": remote_info['cpu_total'],
12589 "i_pri_memory": i_p_mem,
12590 "i_pri_up_memory": i_p_up_mem,
12592 pnr_dyn.update(node_results[nname])
12593 node_results[nname] = pnr_dyn
12595 return node_results
12598 def _ComputeInstanceData(cluster_info, i_list):
12599 """Compute global instance data.
12603 for iinfo, beinfo in i_list:
12605 for nic in iinfo.nics:
12606 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12610 "mode": filled_params[constants.NIC_MODE],
12611 "link": filled_params[constants.NIC_LINK],
12613 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12614 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12615 nic_data.append(nic_dict)
12617 "tags": list(iinfo.GetTags()),
12618 "admin_up": iinfo.admin_up,
12619 "vcpus": beinfo[constants.BE_VCPUS],
12620 "memory": beinfo[constants.BE_MEMORY],
12622 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12624 "disks": [{constants.IDISK_SIZE: dsk.size,
12625 constants.IDISK_MODE: dsk.mode}
12626 for dsk in iinfo.disks],
12627 "disk_template": iinfo.disk_template,
12628 "hypervisor": iinfo.hypervisor,
12630 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12632 instance_data[iinfo.name] = pir
12634 return instance_data
12636 def _AddNewInstance(self):
12637 """Add new instance data to allocator structure.
12639 This in combination with _AllocatorGetClusterData will create the
12640 correct structure needed as input for the allocator.
12642 The checks for the completeness of the opcode must have already been
12646 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12648 if self.disk_template in constants.DTS_INT_MIRROR:
12649 self.required_nodes = 2
12651 self.required_nodes = 1
12655 "disk_template": self.disk_template,
12658 "vcpus": self.vcpus,
12659 "memory": self.memory,
12660 "disks": self.disks,
12661 "disk_space_total": disk_space,
12663 "required_nodes": self.required_nodes,
12664 "hypervisor": self.hypervisor,
12669 def _AddRelocateInstance(self):
12670 """Add relocate instance data to allocator structure.
12672 This in combination with _IAllocatorGetClusterData will create the
12673 correct structure needed as input for the allocator.
12675 The checks for the completeness of the opcode must have already been
12679 instance = self.cfg.GetInstanceInfo(self.name)
12680 if instance is None:
12681 raise errors.ProgrammerError("Unknown instance '%s' passed to"
12682 " IAllocator" % self.name)
12684 if instance.disk_template not in constants.DTS_MIRRORED:
12685 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12686 errors.ECODE_INVAL)
12688 if instance.disk_template in constants.DTS_INT_MIRROR and \
12689 len(instance.secondary_nodes) != 1:
12690 raise errors.OpPrereqError("Instance has not exactly one secondary node",
12691 errors.ECODE_STATE)
12693 self.required_nodes = 1
12694 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12695 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12699 "disk_space_total": disk_space,
12700 "required_nodes": self.required_nodes,
12701 "relocate_from": self.relocate_from,
12705 def _AddEvacuateNodes(self):
12706 """Add evacuate nodes data to allocator structure.
12710 "evac_nodes": self.evac_nodes
12714 def _AddNodeEvacuate(self):
12715 """Get data for node-evacuate requests.
12719 "instances": self.instances,
12720 "evac_mode": self.evac_mode,
12723 def _AddChangeGroup(self):
12724 """Get data for node-evacuate requests.
12728 "instances": self.instances,
12729 "target_groups": self.target_groups,
12732 def _BuildInputData(self, fn, keydata):
12733 """Build input data structures.
12736 self._ComputeClusterData()
12739 request["type"] = self.mode
12740 for keyname, keytype in keydata:
12741 if keyname not in request:
12742 raise errors.ProgrammerError("Request parameter %s is missing" %
12744 val = request[keyname]
12745 if not keytype(val):
12746 raise errors.ProgrammerError("Request parameter %s doesn't pass"
12747 " validation, value %s, expected"
12748 " type %s" % (keyname, val, keytype))
12749 self.in_data["request"] = request
12751 self.in_text = serializer.Dump(self.in_data)
12753 _STRING_LIST = ht.TListOf(ht.TString)
12754 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12755 # pylint: disable-msg=E1101
12756 # Class '...' has no 'OP_ID' member
12757 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12758 opcodes.OpInstanceMigrate.OP_ID,
12759 opcodes.OpInstanceReplaceDisks.OP_ID])
12763 ht.TListOf(ht.TAnd(ht.TIsLength(3),
12764 ht.TItems([ht.TNonEmptyString,
12765 ht.TNonEmptyString,
12766 ht.TListOf(ht.TNonEmptyString),
12769 ht.TListOf(ht.TAnd(ht.TIsLength(2),
12770 ht.TItems([ht.TNonEmptyString,
12773 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12774 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12777 constants.IALLOCATOR_MODE_ALLOC:
12780 ("name", ht.TString),
12781 ("memory", ht.TInt),
12782 ("disks", ht.TListOf(ht.TDict)),
12783 ("disk_template", ht.TString),
12784 ("os", ht.TString),
12785 ("tags", _STRING_LIST),
12786 ("nics", ht.TListOf(ht.TDict)),
12787 ("vcpus", ht.TInt),
12788 ("hypervisor", ht.TString),
12790 constants.IALLOCATOR_MODE_RELOC:
12791 (_AddRelocateInstance,
12792 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12794 constants.IALLOCATOR_MODE_MEVAC:
12795 (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12796 ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12797 constants.IALLOCATOR_MODE_NODE_EVAC:
12798 (_AddNodeEvacuate, [
12799 ("instances", _STRING_LIST),
12800 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12802 constants.IALLOCATOR_MODE_CHG_GROUP:
12803 (_AddChangeGroup, [
12804 ("instances", _STRING_LIST),
12805 ("target_groups", _STRING_LIST),
12809 def Run(self, name, validate=True, call_fn=None):
12810 """Run an instance allocator and return the results.
12813 if call_fn is None:
12814 call_fn = self.rpc.call_iallocator_runner
12816 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12817 result.Raise("Failure while running the iallocator script")
12819 self.out_text = result.payload
12821 self._ValidateResult()
12823 def _ValidateResult(self):
12824 """Process the allocator results.
12826 This will process and if successful save the result in
12827 self.out_data and the other parameters.
12831 rdict = serializer.Load(self.out_text)
12832 except Exception, err:
12833 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12835 if not isinstance(rdict, dict):
12836 raise errors.OpExecError("Can't parse iallocator results: not a dict")
12838 # TODO: remove backwards compatiblity in later versions
12839 if "nodes" in rdict and "result" not in rdict:
12840 rdict["result"] = rdict["nodes"]
12843 for key in "success", "info", "result":
12844 if key not in rdict:
12845 raise errors.OpExecError("Can't parse iallocator results:"
12846 " missing key '%s'" % key)
12847 setattr(self, key, rdict[key])
12849 if not self._result_check(self.result):
12850 raise errors.OpExecError("Iallocator returned invalid result,"
12851 " expected %s, got %s" %
12852 (self._result_check, self.result),
12853 errors.ECODE_INVAL)
12855 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12856 constants.IALLOCATOR_MODE_MEVAC):
12857 node2group = dict((name, ndata["group"])
12858 for (name, ndata) in self.in_data["nodes"].items())
12860 fn = compat.partial(self._NodesToGroups, node2group,
12861 self.in_data["nodegroups"])
12863 if self.mode == constants.IALLOCATOR_MODE_RELOC:
12864 assert self.relocate_from is not None
12865 assert self.required_nodes == 1
12867 request_groups = fn(self.relocate_from)
12868 result_groups = fn(rdict["result"])
12870 if result_groups != request_groups:
12871 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12872 " differ from original groups (%s)" %
12873 (utils.CommaJoin(result_groups),
12874 utils.CommaJoin(request_groups)))
12875 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12876 request_groups = fn(self.evac_nodes)
12877 for (instance_name, secnode) in self.result:
12878 result_groups = fn([secnode])
12879 if result_groups != request_groups:
12880 raise errors.OpExecError("Iallocator returned new secondary node"
12881 " '%s' (group '%s') for instance '%s'"
12882 " which is not in original group '%s'" %
12883 (secnode, utils.CommaJoin(result_groups),
12885 utils.CommaJoin(request_groups)))
12887 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12889 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12890 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
12892 self.out_data = rdict
12895 def _NodesToGroups(node2group, groups, nodes):
12896 """Returns a list of unique group names for a list of nodes.
12898 @type node2group: dict
12899 @param node2group: Map from node name to group UUID
12901 @param groups: Group information
12903 @param nodes: Node names
12910 group_uuid = node2group[node]
12912 # Ignore unknown node
12916 group = groups[group_uuid]
12918 # Can't find group, let's use UUID
12919 group_name = group_uuid
12921 group_name = group["name"]
12923 result.add(group_name)
12925 return sorted(result)
12928 class LUTestAllocator(NoHooksLU):
12929 """Run allocator tests.
12931 This LU runs the allocator tests
12934 def CheckPrereq(self):
12935 """Check prerequisites.
12937 This checks the opcode parameters depending on the director and mode test.
12940 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12941 for attr in ["memory", "disks", "disk_template",
12942 "os", "tags", "nics", "vcpus"]:
12943 if not hasattr(self.op, attr):
12944 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12945 attr, errors.ECODE_INVAL)
12946 iname = self.cfg.ExpandInstanceName(self.op.name)
12947 if iname is not None:
12948 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12949 iname, errors.ECODE_EXISTS)
12950 if not isinstance(self.op.nics, list):
12951 raise errors.OpPrereqError("Invalid parameter 'nics'",
12952 errors.ECODE_INVAL)
12953 if not isinstance(self.op.disks, list):
12954 raise errors.OpPrereqError("Invalid parameter 'disks'",
12955 errors.ECODE_INVAL)
12956 for row in self.op.disks:
12957 if (not isinstance(row, dict) or
12958 constants.IDISK_SIZE not in row or
12959 not isinstance(row[constants.IDISK_SIZE], int) or
12960 constants.IDISK_MODE not in row or
12961 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12962 raise errors.OpPrereqError("Invalid contents of the 'disks'"
12963 " parameter", errors.ECODE_INVAL)
12964 if self.op.hypervisor is None:
12965 self.op.hypervisor = self.cfg.GetHypervisorType()
12966 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12967 fname = _ExpandInstanceName(self.cfg, self.op.name)
12968 self.op.name = fname
12969 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12970 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12971 if not hasattr(self.op, "evac_nodes"):
12972 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12973 " opcode input", errors.ECODE_INVAL)
12974 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12975 constants.IALLOCATOR_MODE_NODE_EVAC):
12976 if not self.op.instances:
12977 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
12978 self.op.instances = _GetWantedInstances(self, self.op.instances)
12980 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12981 self.op.mode, errors.ECODE_INVAL)
12983 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12984 if self.op.allocator is None:
12985 raise errors.OpPrereqError("Missing allocator name",
12986 errors.ECODE_INVAL)
12987 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12988 raise errors.OpPrereqError("Wrong allocator test '%s'" %
12989 self.op.direction, errors.ECODE_INVAL)
12991 def Exec(self, feedback_fn):
12992 """Run the allocator test.
12995 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12996 ial = IAllocator(self.cfg, self.rpc,
12999 memory=self.op.memory,
13000 disks=self.op.disks,
13001 disk_template=self.op.disk_template,
13005 vcpus=self.op.vcpus,
13006 hypervisor=self.op.hypervisor,
13008 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13009 ial = IAllocator(self.cfg, self.rpc,
13012 relocate_from=list(self.relocate_from),
13014 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13015 ial = IAllocator(self.cfg, self.rpc,
13017 evac_nodes=self.op.evac_nodes)
13018 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13019 ial = IAllocator(self.cfg, self.rpc,
13021 instances=self.op.instances,
13022 target_groups=self.op.target_groups)
13023 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13024 ial = IAllocator(self.cfg, self.rpc,
13026 instances=self.op.instances,
13027 evac_mode=self.op.evac_mode)
13029 raise errors.ProgrammerError("Uncatched mode %s in"
13030 " LUTestAllocator.Exec", self.op.mode)
13032 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13033 result = ial.in_text
13035 ial.Run(self.op.allocator, validate=False)
13036 result = ial.out_text
13040 #: Query type implementations
13042 constants.QR_INSTANCE: _InstanceQuery,
13043 constants.QR_NODE: _NodeQuery,
13044 constants.QR_GROUP: _GroupQuery,
13045 constants.QR_OS: _OsQuery,
13048 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13051 def _GetQueryImplementation(name):
13052 """Returns the implemtnation for a query type.
13054 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13058 return _QUERY_IMPL[name]
13060 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13061 errors.ECODE_INVAL)