4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
62 import ganeti.masterd.instance # pylint: disable-msg=W0611
65 def _SupportsOob(cfg, node):
66 """Tells if node supports OOB.
68 @type cfg: L{config.ConfigWriter}
69 @param cfg: The cluster configuration
70 @type node: L{objects.Node}
72 @return: The OOB script if supported or an empty string otherwise
75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
134 self.context = context
136 # Dicts used to declare locking needs to mcpu
137 self.needed_locks = None
138 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
140 self.remove_locks = {}
141 # Used to force good behavior when calling helper functions
142 self.recalculate_locks = {}
144 self.Log = processor.Log # pylint: disable-msg=C0103
145 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148 # support for dry-run
149 self.dry_run_result = None
150 # support for generic debug attribute
151 if (not hasattr(self.op, "debug_level") or
152 not isinstance(self.op.debug_level, int)):
153 self.op.debug_level = 0
158 # Validate opcode parameters and set defaults
159 self.op.Validate(True)
161 self.CheckArguments()
163 def CheckArguments(self):
164 """Check syntactic validity for the opcode arguments.
166 This method is for doing a simple syntactic check and ensure
167 validity of opcode parameters, without any cluster-related
168 checks. While the same can be accomplished in ExpandNames and/or
169 CheckPrereq, doing these separate is better because:
171 - ExpandNames is left as as purely a lock-related function
172 - CheckPrereq is run after we have acquired locks (and possible
175 The function is allowed to change the self.op attribute so that
176 later methods can no longer worry about missing parameters.
181 def ExpandNames(self):
182 """Expand names for this LU.
184 This method is called before starting to execute the opcode, and it should
185 update all the parameters of the opcode to their canonical form (e.g. a
186 short node name must be fully expanded after this method has successfully
187 completed). This way locking, hooks, logging, etc. can work correctly.
189 LUs which implement this method must also populate the self.needed_locks
190 member, as a dict with lock levels as keys, and a list of needed lock names
193 - use an empty dict if you don't need any lock
194 - if you don't need any lock at a particular level omit that level
195 - don't put anything for the BGL level
196 - if you want all locks at a level use locking.ALL_SET as a value
198 If you need to share locks (rather than acquire them exclusively) at one
199 level you can modify self.share_locks, setting a true value (usually 1) for
200 that level. By default locks are not shared.
202 This function can also define a list of tasklets, which then will be
203 executed in order instead of the usual LU-level CheckPrereq and Exec
204 functions, if those are not defined by the LU.
208 # Acquire all nodes and one instance
209 self.needed_locks = {
210 locking.LEVEL_NODE: locking.ALL_SET,
211 locking.LEVEL_INSTANCE: ['instance1.example.com'],
213 # Acquire just two nodes
214 self.needed_locks = {
215 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218 self.needed_locks = {} # No, you can't leave it to the default value None
221 # The implementation of this method is mandatory only if the new LU is
222 # concurrent, so that old LUs don't need to be changed all at the same
225 self.needed_locks = {} # Exclusive LUs don't need locks.
227 raise NotImplementedError
229 def DeclareLocks(self, level):
230 """Declare LU locking needs for a level
232 While most LUs can just declare their locking needs at ExpandNames time,
233 sometimes there's the need to calculate some locks after having acquired
234 the ones before. This function is called just before acquiring locks at a
235 particular level, but after acquiring the ones at lower levels, and permits
236 such calculations. It can be used to modify self.needed_locks, and by
237 default it does nothing.
239 This function is only called if you have something already set in
240 self.needed_locks for the level.
242 @param level: Locking level which is going to be locked
243 @type level: member of ganeti.locking.LEVELS
247 def CheckPrereq(self):
248 """Check prerequisites for this LU.
250 This method should check that the prerequisites for the execution
251 of this LU are fulfilled. It can do internode communication, but
252 it should be idempotent - no cluster or system changes are
255 The method should raise errors.OpPrereqError in case something is
256 not fulfilled. Its return value is ignored.
258 This method should also update all the parameters of the opcode to
259 their canonical form if it hasn't been done by ExpandNames before.
262 if self.tasklets is not None:
263 for (idx, tl) in enumerate(self.tasklets):
264 logging.debug("Checking prerequisites for tasklet %s/%s",
265 idx + 1, len(self.tasklets))
270 def Exec(self, feedback_fn):
273 This method should implement the actual work. It should raise
274 errors.OpExecError for failures that are somewhat dealt with in
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283 raise NotImplementedError
285 def BuildHooksEnv(self):
286 """Build hooks environment for this LU.
289 @return: Dictionary containing the environment that will be used for
290 running the hooks for this LU. The keys of the dict must not be prefixed
291 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292 will extend the environment with additional variables. If no environment
293 should be defined, an empty dictionary should be returned (not C{None}).
294 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
298 raise NotImplementedError
300 def BuildHooksNodes(self):
301 """Build list of nodes to run LU's hooks.
303 @rtype: tuple; (list, list)
304 @return: Tuple containing a list of node names on which the hook
305 should run before the execution and a list of node names on which the
306 hook should run after the execution. No nodes should be returned as an
307 empty list (and not None).
308 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
312 raise NotImplementedError
314 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315 """Notify the LU about the results of its hooks.
317 This method is called every time a hooks phase is executed, and notifies
318 the Logical Unit about the hooks' result. The LU can then use it to alter
319 its result based on the hooks. By default the method does nothing and the
320 previous result is passed back unchanged but any LU can define it if it
321 wants to use the local cluster hook-scripts somehow.
323 @param phase: one of L{constants.HOOKS_PHASE_POST} or
324 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325 @param hook_results: the results of the multi-node hooks rpc call
326 @param feedback_fn: function used send feedback back to the caller
327 @param lu_result: the previous Exec result this LU had, or None
329 @return: the new Exec result, based on the previous result
333 # API must be kept, thus we ignore the unused argument and could
334 # be a function warnings
335 # pylint: disable-msg=W0613,R0201
338 def _ExpandAndLockInstance(self):
339 """Helper function to expand and lock an instance.
341 Many LUs that work on an instance take its name in self.op.instance_name
342 and need to expand it and then declare the expanded name for locking. This
343 function does it, and then updates self.op.instance_name to the expanded
344 name. It also initializes needed_locks as a dict, if this hasn't been done
348 if self.needed_locks is None:
349 self.needed_locks = {}
351 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352 "_ExpandAndLockInstance called with instance-level locks set"
353 self.op.instance_name = _ExpandInstanceName(self.cfg,
354 self.op.instance_name)
355 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
357 def _LockInstancesNodes(self, primary_only=False):
358 """Helper function to declare instances' nodes for locking.
360 This function should be called after locking one or more instances to lock
361 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362 with all primary or secondary nodes for instances already locked and
363 present in self.needed_locks[locking.LEVEL_INSTANCE].
365 It should be called from DeclareLocks, and for safety only works if
366 self.recalculate_locks[locking.LEVEL_NODE] is set.
368 In the future it may grow parameters to just lock some instance's nodes, or
369 to just lock primaries or secondary nodes, if needed.
371 If should be called in DeclareLocks in a way similar to::
373 if level == locking.LEVEL_NODE:
374 self._LockInstancesNodes()
376 @type primary_only: boolean
377 @param primary_only: only lock primary nodes of locked instances
380 assert locking.LEVEL_NODE in self.recalculate_locks, \
381 "_LockInstancesNodes helper function called with no nodes to recalculate"
383 # TODO: check if we're really been called with the instance locks held
385 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386 # future we might want to have different behaviors depending on the value
387 # of self.recalculate_locks[locking.LEVEL_NODE]
389 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390 instance = self.context.cfg.GetInstanceInfo(instance_name)
391 wanted_nodes.append(instance.primary_node)
393 wanted_nodes.extend(instance.secondary_nodes)
395 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
400 del self.recalculate_locks[locking.LEVEL_NODE]
403 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404 """Simple LU which runs no hooks.
406 This LU is intended as a parent for other LogicalUnits which will
407 run no hooks, in order to reduce duplicate code.
413 def BuildHooksEnv(self):
414 """Empty BuildHooksEnv for NoHooksLu.
416 This just raises an error.
419 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
421 def BuildHooksNodes(self):
422 """Empty BuildHooksNodes for NoHooksLU.
425 raise AssertionError("BuildHooksNodes called for NoHooksLU")
429 """Tasklet base class.
431 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432 they can mix legacy code with tasklets. Locking needs to be done in the LU,
433 tasklets know nothing about locks.
435 Subclasses must follow these rules:
436 - Implement CheckPrereq
440 def __init__(self, lu):
447 def CheckPrereq(self):
448 """Check prerequisites for this tasklets.
450 This method should check whether the prerequisites for the execution of
451 this tasklet are fulfilled. It can do internode communication, but it
452 should be idempotent - no cluster or system changes are allowed.
454 The method should raise errors.OpPrereqError in case something is not
455 fulfilled. Its return value is ignored.
457 This method should also update all parameters to their canonical form if it
458 hasn't been done before.
463 def Exec(self, feedback_fn):
464 """Execute the tasklet.
466 This method should implement the actual work. It should raise
467 errors.OpExecError for failures that are somewhat dealt with in code, or
471 raise NotImplementedError
475 """Base for query utility classes.
478 #: Attribute holding field definitions
481 def __init__(self, filter_, fields, use_locking):
482 """Initializes this class.
485 self.use_locking = use_locking
487 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
489 self.requested_data = self.query.RequestedData()
490 self.names = self.query.RequestedNames()
492 # Sort only if no names were requested
493 self.sort_by_name = not self.names
495 self.do_locking = None
498 def _GetNames(self, lu, all_names, lock_level):
499 """Helper function to determine names asked for in the query.
503 names = lu.glm.list_owned(lock_level)
507 if self.wanted == locking.ALL_SET:
508 assert not self.names
509 # caller didn't specify names, so ordering is not important
510 return utils.NiceSort(names)
512 # caller specified names and we must keep the same order
514 assert not self.do_locking or lu.glm.is_owned(lock_level)
516 missing = set(self.wanted).difference(names)
518 raise errors.OpExecError("Some items were removed before retrieving"
519 " their data: %s" % missing)
521 # Return expanded names
524 def ExpandNames(self, lu):
525 """Expand names for this query.
527 See L{LogicalUnit.ExpandNames}.
530 raise NotImplementedError()
532 def DeclareLocks(self, lu, level):
533 """Declare locks for this query.
535 See L{LogicalUnit.DeclareLocks}.
538 raise NotImplementedError()
540 def _GetQueryData(self, lu):
541 """Collects all data for this query.
543 @return: Query data object
546 raise NotImplementedError()
548 def NewStyleQuery(self, lu):
549 """Collect data and execute query.
552 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553 sort_by_name=self.sort_by_name)
555 def OldStyleQuery(self, lu):
556 """Collect data and execute query.
559 return self.query.OldStyleQuery(self._GetQueryData(lu),
560 sort_by_name=self.sort_by_name)
563 def _GetWantedNodes(lu, nodes):
564 """Returns list of checked and expanded node names.
566 @type lu: L{LogicalUnit}
567 @param lu: the logical unit on whose behalf we execute
569 @param nodes: list of node names or None for all nodes
571 @return: the list of nodes, sorted
572 @raise errors.ProgrammerError: if the nodes parameter is wrong type
576 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
578 return utils.NiceSort(lu.cfg.GetNodeList())
581 def _GetWantedInstances(lu, instances):
582 """Returns list of checked and expanded instance names.
584 @type lu: L{LogicalUnit}
585 @param lu: the logical unit on whose behalf we execute
586 @type instances: list
587 @param instances: list of instance names or None for all instances
589 @return: the list of instances, sorted
590 @raise errors.OpPrereqError: if the instances parameter is wrong type
591 @raise errors.OpPrereqError: if any of the passed instances is not found
595 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
597 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
601 def _GetUpdatedParams(old_params, update_dict,
602 use_default=True, use_none=False):
603 """Return the new version of a parameter dictionary.
605 @type old_params: dict
606 @param old_params: old parameters
607 @type update_dict: dict
608 @param update_dict: dict containing new parameter values, or
609 constants.VALUE_DEFAULT to reset the parameter to its default
611 @param use_default: boolean
612 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613 values as 'to be deleted' values
614 @param use_none: boolean
615 @type use_none: whether to recognise C{None} values as 'to be
618 @return: the new parameter dictionary
621 params_copy = copy.deepcopy(old_params)
622 for key, val in update_dict.iteritems():
623 if ((use_default and val == constants.VALUE_DEFAULT) or
624 (use_none and val is None)):
630 params_copy[key] = val
634 def _ReleaseLocks(lu, level, names=None, keep=None):
635 """Releases locks owned by an LU.
637 @type lu: L{LogicalUnit}
638 @param level: Lock level
639 @type names: list or None
640 @param names: Names of locks to release
641 @type keep: list or None
642 @param keep: Names of locks to retain
645 assert not (keep is not None and names is not None), \
646 "Only one of the 'names' and the 'keep' parameters can be given"
648 if names is not None:
649 should_release = names.__contains__
651 should_release = lambda name: name not in keep
653 should_release = None
659 # Determine which locks to release
660 for name in lu.glm.list_owned(level):
661 if should_release(name):
666 assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
668 # Release just some locks
669 lu.glm.release(level, names=release)
671 assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
674 lu.glm.release(level)
676 assert not lu.glm.is_owned(level), "No locks should be owned"
679 def _RunPostHook(lu, node_name):
680 """Runs the post-hook for an opcode on a single node.
683 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
685 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
687 # pylint: disable-msg=W0702
688 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
691 def _CheckOutputFields(static, dynamic, selected):
692 """Checks whether all selected fields are valid.
694 @type static: L{utils.FieldSet}
695 @param static: static fields set
696 @type dynamic: L{utils.FieldSet}
697 @param dynamic: dynamic fields set
704 delta = f.NonMatching(selected)
706 raise errors.OpPrereqError("Unknown output fields selected: %s"
707 % ",".join(delta), errors.ECODE_INVAL)
710 def _CheckGlobalHvParams(params):
711 """Validates that given hypervisor params are not global ones.
713 This will ensure that instances don't get customised versions of
717 used_globals = constants.HVC_GLOBALS.intersection(params)
719 msg = ("The following hypervisor parameters are global and cannot"
720 " be customized at instance level, please modify them at"
721 " cluster level: %s" % utils.CommaJoin(used_globals))
722 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
725 def _CheckNodeOnline(lu, node, msg=None):
726 """Ensure that a given node is online.
728 @param lu: the LU on behalf of which we make the check
729 @param node: the node to check
730 @param msg: if passed, should be a message to replace the default one
731 @raise errors.OpPrereqError: if the node is offline
735 msg = "Can't use offline node"
736 if lu.cfg.GetNodeInfo(node).offline:
737 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
740 def _CheckNodeNotDrained(lu, node):
741 """Ensure that a given node is not drained.
743 @param lu: the LU on behalf of which we make the check
744 @param node: the node to check
745 @raise errors.OpPrereqError: if the node is drained
748 if lu.cfg.GetNodeInfo(node).drained:
749 raise errors.OpPrereqError("Can't use drained node %s" % node,
753 def _CheckNodeVmCapable(lu, node):
754 """Ensure that a given node is vm capable.
756 @param lu: the LU on behalf of which we make the check
757 @param node: the node to check
758 @raise errors.OpPrereqError: if the node is not vm capable
761 if not lu.cfg.GetNodeInfo(node).vm_capable:
762 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
766 def _CheckNodeHasOS(lu, node, os_name, force_variant):
767 """Ensure that a node supports a given OS.
769 @param lu: the LU on behalf of which we make the check
770 @param node: the node to check
771 @param os_name: the OS to query about
772 @param force_variant: whether to ignore variant errors
773 @raise errors.OpPrereqError: if the node is not supporting the OS
776 result = lu.rpc.call_os_get(node, os_name)
777 result.Raise("OS '%s' not in supported OS list for node %s" %
779 prereq=True, ecode=errors.ECODE_INVAL)
780 if not force_variant:
781 _CheckOSVariant(result.payload, os_name)
784 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785 """Ensure that a node has the given secondary ip.
787 @type lu: L{LogicalUnit}
788 @param lu: the LU on behalf of which we make the check
790 @param node: the node to check
791 @type secondary_ip: string
792 @param secondary_ip: the ip to check
793 @type prereq: boolean
794 @param prereq: whether to throw a prerequisite or an execute error
795 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
799 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800 result.Raise("Failure checking secondary ip on node %s" % node,
801 prereq=prereq, ecode=errors.ECODE_ENVIRON)
802 if not result.payload:
803 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804 " please fix and re-run this command" % secondary_ip)
806 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
808 raise errors.OpExecError(msg)
811 def _GetClusterDomainSecret():
812 """Reads the cluster domain secret.
815 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
819 def _CheckInstanceDown(lu, instance, reason):
820 """Ensure that an instance is not running."""
821 if instance.admin_up:
822 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823 (instance.name, reason), errors.ECODE_STATE)
825 pnode = instance.primary_node
826 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827 ins_l.Raise("Can't contact node %s for instance information" % pnode,
828 prereq=True, ecode=errors.ECODE_ENVIRON)
830 if instance.name in ins_l.payload:
831 raise errors.OpPrereqError("Instance %s is running, %s" %
832 (instance.name, reason), errors.ECODE_STATE)
835 def _ExpandItemName(fn, name, kind):
836 """Expand an item name.
838 @param fn: the function to use for expansion
839 @param name: requested item name
840 @param kind: text description ('Node' or 'Instance')
841 @return: the resolved (full) name
842 @raise errors.OpPrereqError: if the item is not found
846 if full_name is None:
847 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
852 def _ExpandNodeName(cfg, name):
853 """Wrapper over L{_ExpandItemName} for nodes."""
854 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
857 def _ExpandInstanceName(cfg, name):
858 """Wrapper over L{_ExpandItemName} for instance."""
859 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
862 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863 memory, vcpus, nics, disk_template, disks,
864 bep, hvp, hypervisor_name):
865 """Builds instance related env variables for hooks
867 This builds the hook environment from individual variables.
870 @param name: the name of the instance
871 @type primary_node: string
872 @param primary_node: the name of the instance's primary node
873 @type secondary_nodes: list
874 @param secondary_nodes: list of secondary nodes as strings
875 @type os_type: string
876 @param os_type: the name of the instance's OS
877 @type status: boolean
878 @param status: the should_run status of the instance
880 @param memory: the memory size of the instance
882 @param vcpus: the count of VCPUs the instance has
884 @param nics: list of tuples (ip, mac, mode, link) representing
885 the NICs the instance has
886 @type disk_template: string
887 @param disk_template: the disk template of the instance
889 @param disks: the list of (size, mode) pairs
891 @param bep: the backend parameters for the instance
893 @param hvp: the hypervisor parameters for the instance
894 @type hypervisor_name: string
895 @param hypervisor_name: the hypervisor for the instance
897 @return: the hook environment for this instance
906 "INSTANCE_NAME": name,
907 "INSTANCE_PRIMARY": primary_node,
908 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
909 "INSTANCE_OS_TYPE": os_type,
910 "INSTANCE_STATUS": str_status,
911 "INSTANCE_MEMORY": memory,
912 "INSTANCE_VCPUS": vcpus,
913 "INSTANCE_DISK_TEMPLATE": disk_template,
914 "INSTANCE_HYPERVISOR": hypervisor_name,
918 nic_count = len(nics)
919 for idx, (ip, mac, mode, link) in enumerate(nics):
922 env["INSTANCE_NIC%d_IP" % idx] = ip
923 env["INSTANCE_NIC%d_MAC" % idx] = mac
924 env["INSTANCE_NIC%d_MODE" % idx] = mode
925 env["INSTANCE_NIC%d_LINK" % idx] = link
926 if mode == constants.NIC_MODE_BRIDGED:
927 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
931 env["INSTANCE_NIC_COUNT"] = nic_count
934 disk_count = len(disks)
935 for idx, (size, mode) in enumerate(disks):
936 env["INSTANCE_DISK%d_SIZE" % idx] = size
937 env["INSTANCE_DISK%d_MODE" % idx] = mode
941 env["INSTANCE_DISK_COUNT"] = disk_count
943 for source, kind in [(bep, "BE"), (hvp, "HV")]:
944 for key, value in source.items():
945 env["INSTANCE_%s_%s" % (kind, key)] = value
950 def _NICListToTuple(lu, nics):
951 """Build a list of nic information tuples.
953 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
954 value in LUInstanceQueryData.
956 @type lu: L{LogicalUnit}
957 @param lu: the logical unit on whose behalf we execute
958 @type nics: list of L{objects.NIC}
959 @param nics: list of nics to convert to hooks tuples
963 cluster = lu.cfg.GetClusterInfo()
967 filled_params = cluster.SimpleFillNIC(nic.nicparams)
968 mode = filled_params[constants.NIC_MODE]
969 link = filled_params[constants.NIC_LINK]
970 hooks_nics.append((ip, mac, mode, link))
974 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
975 """Builds instance related env variables for hooks from an object.
977 @type lu: L{LogicalUnit}
978 @param lu: the logical unit on whose behalf we execute
979 @type instance: L{objects.Instance}
980 @param instance: the instance for which we should build the
983 @param override: dictionary with key/values that will override
986 @return: the hook environment dictionary
989 cluster = lu.cfg.GetClusterInfo()
990 bep = cluster.FillBE(instance)
991 hvp = cluster.FillHV(instance)
993 'name': instance.name,
994 'primary_node': instance.primary_node,
995 'secondary_nodes': instance.secondary_nodes,
996 'os_type': instance.os,
997 'status': instance.admin_up,
998 'memory': bep[constants.BE_MEMORY],
999 'vcpus': bep[constants.BE_VCPUS],
1000 'nics': _NICListToTuple(lu, instance.nics),
1001 'disk_template': instance.disk_template,
1002 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1005 'hypervisor_name': instance.hypervisor,
1008 args.update(override)
1009 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1012 def _AdjustCandidatePool(lu, exceptions):
1013 """Adjust the candidate pool after node operations.
1016 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1018 lu.LogInfo("Promoted nodes to master candidate role: %s",
1019 utils.CommaJoin(node.name for node in mod_list))
1020 for name in mod_list:
1021 lu.context.ReaddNode(name)
1022 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1024 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1028 def _DecideSelfPromotion(lu, exceptions=None):
1029 """Decide whether I should promote myself as a master candidate.
1032 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1033 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1034 # the new node will increase mc_max with one, so:
1035 mc_should = min(mc_should + 1, cp_size)
1036 return mc_now < mc_should
1039 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1040 """Check that the brigdes needed by a list of nics exist.
1043 cluster = lu.cfg.GetClusterInfo()
1044 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1045 brlist = [params[constants.NIC_LINK] for params in paramslist
1046 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1048 result = lu.rpc.call_bridges_exist(target_node, brlist)
1049 result.Raise("Error checking bridges on destination node '%s'" %
1050 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1053 def _CheckInstanceBridgesExist(lu, instance, node=None):
1054 """Check that the brigdes needed by an instance exist.
1058 node = instance.primary_node
1059 _CheckNicsBridgesExist(lu, instance.nics, node)
1062 def _CheckOSVariant(os_obj, name):
1063 """Check whether an OS name conforms to the os variants specification.
1065 @type os_obj: L{objects.OS}
1066 @param os_obj: OS object to check
1068 @param name: OS name passed by the user, to check for validity
1071 if not os_obj.supported_variants:
1073 variant = objects.OS.GetVariant(name)
1075 raise errors.OpPrereqError("OS name must include a variant",
1078 if variant not in os_obj.supported_variants:
1079 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1082 def _GetNodeInstancesInner(cfg, fn):
1083 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1086 def _GetNodeInstances(cfg, node_name):
1087 """Returns a list of all primary and secondary instances on a node.
1091 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1094 def _GetNodePrimaryInstances(cfg, node_name):
1095 """Returns primary instances on a node.
1098 return _GetNodeInstancesInner(cfg,
1099 lambda inst: node_name == inst.primary_node)
1102 def _GetNodeSecondaryInstances(cfg, node_name):
1103 """Returns secondary instances on a node.
1106 return _GetNodeInstancesInner(cfg,
1107 lambda inst: node_name in inst.secondary_nodes)
1110 def _GetStorageTypeArgs(cfg, storage_type):
1111 """Returns the arguments for a storage type.
1114 # Special case for file storage
1115 if storage_type == constants.ST_FILE:
1116 # storage.FileStorage wants a list of storage directories
1117 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1122 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1125 for dev in instance.disks:
1126 cfg.SetDiskID(dev, node_name)
1128 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1129 result.Raise("Failed to get disk status from node %s" % node_name,
1130 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1132 for idx, bdev_status in enumerate(result.payload):
1133 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1139 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1140 """Check the sanity of iallocator and node arguments and use the
1141 cluster-wide iallocator if appropriate.
1143 Check that at most one of (iallocator, node) is specified. If none is
1144 specified, then the LU's opcode's iallocator slot is filled with the
1145 cluster-wide default iallocator.
1147 @type iallocator_slot: string
1148 @param iallocator_slot: the name of the opcode iallocator slot
1149 @type node_slot: string
1150 @param node_slot: the name of the opcode target node slot
1153 node = getattr(lu.op, node_slot, None)
1154 iallocator = getattr(lu.op, iallocator_slot, None)
1156 if node is not None and iallocator is not None:
1157 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1159 elif node is None and iallocator is None:
1160 default_iallocator = lu.cfg.GetDefaultIAllocator()
1161 if default_iallocator:
1162 setattr(lu.op, iallocator_slot, default_iallocator)
1164 raise errors.OpPrereqError("No iallocator or node given and no"
1165 " cluster-wide default iallocator found;"
1166 " please specify either an iallocator or a"
1167 " node, or set a cluster-wide default"
1171 class LUClusterPostInit(LogicalUnit):
1172 """Logical unit for running hooks after cluster initialization.
1175 HPATH = "cluster-init"
1176 HTYPE = constants.HTYPE_CLUSTER
1178 def BuildHooksEnv(self):
1183 "OP_TARGET": self.cfg.GetClusterName(),
1186 def BuildHooksNodes(self):
1187 """Build hooks nodes.
1190 return ([], [self.cfg.GetMasterNode()])
1192 def Exec(self, feedback_fn):
1199 class LUClusterDestroy(LogicalUnit):
1200 """Logical unit for destroying the cluster.
1203 HPATH = "cluster-destroy"
1204 HTYPE = constants.HTYPE_CLUSTER
1206 def BuildHooksEnv(self):
1211 "OP_TARGET": self.cfg.GetClusterName(),
1214 def BuildHooksNodes(self):
1215 """Build hooks nodes.
1220 def CheckPrereq(self):
1221 """Check prerequisites.
1223 This checks whether the cluster is empty.
1225 Any errors are signaled by raising errors.OpPrereqError.
1228 master = self.cfg.GetMasterNode()
1230 nodelist = self.cfg.GetNodeList()
1231 if len(nodelist) != 1 or nodelist[0] != master:
1232 raise errors.OpPrereqError("There are still %d node(s) in"
1233 " this cluster." % (len(nodelist) - 1),
1235 instancelist = self.cfg.GetInstanceList()
1237 raise errors.OpPrereqError("There are still %d instance(s) in"
1238 " this cluster." % len(instancelist),
1241 def Exec(self, feedback_fn):
1242 """Destroys the cluster.
1245 master = self.cfg.GetMasterNode()
1247 # Run post hooks on master node before it's removed
1248 _RunPostHook(self, master)
1250 result = self.rpc.call_node_stop_master(master, False)
1251 result.Raise("Could not disable the master role")
1256 def _VerifyCertificate(filename):
1257 """Verifies a certificate for LUClusterVerifyConfig.
1259 @type filename: string
1260 @param filename: Path to PEM file
1264 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1265 utils.ReadFile(filename))
1266 except Exception, err: # pylint: disable-msg=W0703
1267 return (LUClusterVerifyConfig.ETYPE_ERROR,
1268 "Failed to load X509 certificate %s: %s" % (filename, err))
1271 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1272 constants.SSL_CERT_EXPIRATION_ERROR)
1275 fnamemsg = "While verifying %s: %s" % (filename, msg)
1280 return (None, fnamemsg)
1281 elif errcode == utils.CERT_WARNING:
1282 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1283 elif errcode == utils.CERT_ERROR:
1284 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1286 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1289 def _GetAllHypervisorParameters(cluster, instances):
1290 """Compute the set of all hypervisor parameters.
1292 @type cluster: L{objects.Cluster}
1293 @param cluster: the cluster object
1294 @param instances: list of L{objects.Instance}
1295 @param instances: additional instances from which to obtain parameters
1296 @rtype: list of (origin, hypervisor, parameters)
1297 @return: a list with all parameters found, indicating the hypervisor they
1298 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1303 for hv_name in cluster.enabled_hypervisors:
1304 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1306 for os_name, os_hvp in cluster.os_hvp.items():
1307 for hv_name, hv_params in os_hvp.items():
1309 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1310 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1312 # TODO: collapse identical parameter values in a single one
1313 for instance in instances:
1314 if instance.hvparams:
1315 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1316 cluster.FillHV(instance)))
1321 class _VerifyErrors(object):
1322 """Mix-in for cluster/group verify LUs.
1324 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1325 self.op and self._feedback_fn to be available.)
1328 TCLUSTER = "cluster"
1330 TINSTANCE = "instance"
1332 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1333 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1334 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1335 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1336 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1337 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1338 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1339 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1340 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1341 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1342 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1343 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1344 ENODEDRBD = (TNODE, "ENODEDRBD")
1345 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1346 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1347 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1348 ENODEHV = (TNODE, "ENODEHV")
1349 ENODELVM = (TNODE, "ENODELVM")
1350 ENODEN1 = (TNODE, "ENODEN1")
1351 ENODENET = (TNODE, "ENODENET")
1352 ENODEOS = (TNODE, "ENODEOS")
1353 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1354 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1355 ENODERPC = (TNODE, "ENODERPC")
1356 ENODESSH = (TNODE, "ENODESSH")
1357 ENODEVERSION = (TNODE, "ENODEVERSION")
1358 ENODESETUP = (TNODE, "ENODESETUP")
1359 ENODETIME = (TNODE, "ENODETIME")
1360 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1362 ETYPE_FIELD = "code"
1363 ETYPE_ERROR = "ERROR"
1364 ETYPE_WARNING = "WARNING"
1366 def _Error(self, ecode, item, msg, *args, **kwargs):
1367 """Format an error message.
1369 Based on the opcode's error_codes parameter, either format a
1370 parseable error code, or a simpler error string.
1372 This must be called only from Exec and functions called from Exec.
1375 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1377 # first complete the msg
1380 # then format the whole message
1381 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1382 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1388 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1389 # and finally report it via the feedback_fn
1390 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1392 def _ErrorIf(self, cond, *args, **kwargs):
1393 """Log an error message if the passed condition is True.
1397 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1399 self._Error(*args, **kwargs)
1400 # do not mark the operation as failed for WARN cases only
1401 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1402 self.bad = self.bad or cond
1405 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1406 """Verifies the cluster config.
1411 def _VerifyHVP(self, hvp_data):
1412 """Verifies locally the syntax of the hypervisor parameters.
1415 for item, hv_name, hv_params in hvp_data:
1416 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1419 hv_class = hypervisor.GetHypervisor(hv_name)
1420 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1421 hv_class.CheckParameterSyntax(hv_params)
1422 except errors.GenericError, err:
1423 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1425 def ExpandNames(self):
1426 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1427 self.all_node_info = self.cfg.GetAllNodesInfo()
1428 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1429 self.needed_locks = {}
1431 def Exec(self, feedback_fn):
1432 """Verify integrity of cluster, performing various test on nodes.
1436 self._feedback_fn = feedback_fn
1438 feedback_fn("* Verifying cluster config")
1440 for msg in self.cfg.VerifyConfig():
1441 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1443 feedback_fn("* Verifying cluster certificate files")
1445 for cert_filename in constants.ALL_CERT_FILES:
1446 (errcode, msg) = _VerifyCertificate(cert_filename)
1447 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1449 feedback_fn("* Verifying hypervisor parameters")
1451 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1452 self.all_inst_info.values()))
1454 feedback_fn("* Verifying all nodes belong to an existing group")
1456 # We do this verification here because, should this bogus circumstance
1457 # occur, it would never be catched by VerifyGroup, which only acts on
1458 # nodes/instances reachable from existing node groups.
1460 dangling_nodes = set(node.name for node in self.all_node_info.values()
1461 if node.group not in self.all_group_info)
1463 dangling_instances = {}
1464 no_node_instances = []
1466 for inst in self.all_inst_info.values():
1467 if inst.primary_node in dangling_nodes:
1468 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1469 elif inst.primary_node not in self.all_node_info:
1470 no_node_instances.append(inst.name)
1475 utils.CommaJoin(dangling_instances.get(node.name,
1477 for node in dangling_nodes]
1479 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1480 "the following nodes (and their instances) belong to a non"
1481 " existing group: %s", utils.CommaJoin(pretty_dangling))
1483 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1484 "the following instances have a non-existing primary-node:"
1485 " %s", utils.CommaJoin(no_node_instances))
1487 return (not self.bad, [g.name for g in self.all_group_info.values()])
1490 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1491 """Verifies the status of a node group.
1494 HPATH = "cluster-verify"
1495 HTYPE = constants.HTYPE_CLUSTER
1498 _HOOKS_INDENT_RE = re.compile("^", re.M)
1500 class NodeImage(object):
1501 """A class representing the logical and physical status of a node.
1504 @ivar name: the node name to which this object refers
1505 @ivar volumes: a structure as returned from
1506 L{ganeti.backend.GetVolumeList} (runtime)
1507 @ivar instances: a list of running instances (runtime)
1508 @ivar pinst: list of configured primary instances (config)
1509 @ivar sinst: list of configured secondary instances (config)
1510 @ivar sbp: dictionary of {primary-node: list of instances} for all
1511 instances for which this node is secondary (config)
1512 @ivar mfree: free memory, as reported by hypervisor (runtime)
1513 @ivar dfree: free disk, as reported by the node (runtime)
1514 @ivar offline: the offline status (config)
1515 @type rpc_fail: boolean
1516 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1517 not whether the individual keys were correct) (runtime)
1518 @type lvm_fail: boolean
1519 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1520 @type hyp_fail: boolean
1521 @ivar hyp_fail: whether the RPC call didn't return the instance list
1522 @type ghost: boolean
1523 @ivar ghost: whether this is a known node or not (config)
1524 @type os_fail: boolean
1525 @ivar os_fail: whether the RPC call didn't return valid OS data
1527 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1528 @type vm_capable: boolean
1529 @ivar vm_capable: whether the node can host instances
1532 def __init__(self, offline=False, name=None, vm_capable=True):
1541 self.offline = offline
1542 self.vm_capable = vm_capable
1543 self.rpc_fail = False
1544 self.lvm_fail = False
1545 self.hyp_fail = False
1547 self.os_fail = False
1550 def ExpandNames(self):
1551 # This raises errors.OpPrereqError on its own:
1552 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1554 all_node_info = self.cfg.GetAllNodesInfo()
1555 all_inst_info = self.cfg.GetAllInstancesInfo()
1557 node_names = set(node.name
1558 for node in all_node_info.values()
1559 if node.group == self.group_uuid)
1561 inst_names = [inst.name
1562 for inst in all_inst_info.values()
1563 if inst.primary_node in node_names]
1565 # In Exec(), we warn about mirrored instances that have primary and
1566 # secondary living in separate node groups. To fully verify that
1567 # volumes for these instances are healthy, we will need to do an
1568 # extra call to their secondaries. We ensure here those nodes will
1570 for inst in inst_names:
1571 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1572 node_names.update(all_inst_info[inst].secondary_nodes)
1574 self.needed_locks = {
1575 locking.LEVEL_NODEGROUP: [self.group_uuid],
1576 locking.LEVEL_NODE: list(node_names),
1577 locking.LEVEL_INSTANCE: inst_names,
1580 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1582 def CheckPrereq(self):
1583 self.all_node_info = self.cfg.GetAllNodesInfo()
1584 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1586 group_nodes = set(node.name
1587 for node in self.all_node_info.values()
1588 if node.group == self.group_uuid)
1590 group_instances = set(inst.name
1591 for inst in self.all_inst_info.values()
1592 if inst.primary_node in group_nodes)
1595 group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1597 unlocked_instances = \
1598 group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1601 raise errors.OpPrereqError("missing lock for nodes: %s" %
1602 utils.CommaJoin(unlocked_nodes))
1604 if unlocked_instances:
1605 raise errors.OpPrereqError("missing lock for instances: %s" %
1606 utils.CommaJoin(unlocked_instances))
1608 self.my_node_names = utils.NiceSort(group_nodes)
1609 self.my_inst_names = utils.NiceSort(group_instances)
1611 self.my_node_info = dict((name, self.all_node_info[name])
1612 for name in self.my_node_names)
1614 self.my_inst_info = dict((name, self.all_inst_info[name])
1615 for name in self.my_inst_names)
1617 # We detect here the nodes that will need the extra RPC calls for verifying
1618 # split LV volumes; they should be locked.
1619 extra_lv_nodes = set()
1621 for inst in self.my_inst_info.values():
1622 if inst.disk_template in constants.DTS_INT_MIRROR:
1623 group = self.my_node_info[inst.primary_node].group
1624 for nname in inst.secondary_nodes:
1625 if self.all_node_info[nname].group != group:
1626 extra_lv_nodes.add(nname)
1628 unlocked_lv_nodes = \
1629 extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1631 if unlocked_lv_nodes:
1632 raise errors.OpPrereqError("these nodes could be locked: %s" %
1633 utils.CommaJoin(unlocked_lv_nodes))
1634 self.extra_lv_nodes = list(extra_lv_nodes)
1636 def _VerifyNode(self, ninfo, nresult):
1637 """Perform some basic validation on data returned from a node.
1639 - check the result data structure is well formed and has all the
1641 - check ganeti version
1643 @type ninfo: L{objects.Node}
1644 @param ninfo: the node to check
1645 @param nresult: the results from the node
1647 @return: whether overall this call was successful (and we can expect
1648 reasonable values in the respose)
1652 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1654 # main result, nresult should be a non-empty dict
1655 test = not nresult or not isinstance(nresult, dict)
1656 _ErrorIf(test, self.ENODERPC, node,
1657 "unable to verify node: no data returned")
1661 # compares ganeti version
1662 local_version = constants.PROTOCOL_VERSION
1663 remote_version = nresult.get("version", None)
1664 test = not (remote_version and
1665 isinstance(remote_version, (list, tuple)) and
1666 len(remote_version) == 2)
1667 _ErrorIf(test, self.ENODERPC, node,
1668 "connection to node returned invalid data")
1672 test = local_version != remote_version[0]
1673 _ErrorIf(test, self.ENODEVERSION, node,
1674 "incompatible protocol versions: master %s,"
1675 " node %s", local_version, remote_version[0])
1679 # node seems compatible, we can actually try to look into its results
1681 # full package version
1682 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1683 self.ENODEVERSION, node,
1684 "software version mismatch: master %s, node %s",
1685 constants.RELEASE_VERSION, remote_version[1],
1686 code=self.ETYPE_WARNING)
1688 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1689 if ninfo.vm_capable and isinstance(hyp_result, dict):
1690 for hv_name, hv_result in hyp_result.iteritems():
1691 test = hv_result is not None
1692 _ErrorIf(test, self.ENODEHV, node,
1693 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1695 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1696 if ninfo.vm_capable and isinstance(hvp_result, list):
1697 for item, hv_name, hv_result in hvp_result:
1698 _ErrorIf(True, self.ENODEHV, node,
1699 "hypervisor %s parameter verify failure (source %s): %s",
1700 hv_name, item, hv_result)
1702 test = nresult.get(constants.NV_NODESETUP,
1703 ["Missing NODESETUP results"])
1704 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1709 def _VerifyNodeTime(self, ninfo, nresult,
1710 nvinfo_starttime, nvinfo_endtime):
1711 """Check the node time.
1713 @type ninfo: L{objects.Node}
1714 @param ninfo: the node to check
1715 @param nresult: the remote results for the node
1716 @param nvinfo_starttime: the start time of the RPC call
1717 @param nvinfo_endtime: the end time of the RPC call
1721 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1723 ntime = nresult.get(constants.NV_TIME, None)
1725 ntime_merged = utils.MergeTime(ntime)
1726 except (ValueError, TypeError):
1727 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1730 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1731 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1732 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1733 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1737 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1738 "Node time diverges by at least %s from master node time",
1741 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1742 """Check the node LVM results.
1744 @type ninfo: L{objects.Node}
1745 @param ninfo: the node to check
1746 @param nresult: the remote results for the node
1747 @param vg_name: the configured VG name
1754 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1756 # checks vg existence and size > 20G
1757 vglist = nresult.get(constants.NV_VGLIST, None)
1759 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1761 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1762 constants.MIN_VG_SIZE)
1763 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1766 pvlist = nresult.get(constants.NV_PVLIST, None)
1767 test = pvlist is None
1768 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1770 # check that ':' is not present in PV names, since it's a
1771 # special character for lvcreate (denotes the range of PEs to
1773 for _, pvname, owner_vg in pvlist:
1774 test = ":" in pvname
1775 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1776 " '%s' of VG '%s'", pvname, owner_vg)
1778 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1779 """Check the node bridges.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param bridges: the expected list of bridges
1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1793 missing = nresult.get(constants.NV_BRIDGES, None)
1794 test = not isinstance(missing, list)
1795 _ErrorIf(test, self.ENODENET, node,
1796 "did not return valid bridge information")
1798 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1799 utils.CommaJoin(sorted(missing)))
1801 def _VerifyNodeNetwork(self, ninfo, nresult):
1802 """Check the node network connectivity results.
1804 @type ninfo: L{objects.Node}
1805 @param ninfo: the node to check
1806 @param nresult: the remote results for the node
1810 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1812 test = constants.NV_NODELIST not in nresult
1813 _ErrorIf(test, self.ENODESSH, node,
1814 "node hasn't returned node ssh connectivity data")
1816 if nresult[constants.NV_NODELIST]:
1817 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1818 _ErrorIf(True, self.ENODESSH, node,
1819 "ssh communication with node '%s': %s", a_node, a_msg)
1821 test = constants.NV_NODENETTEST not in nresult
1822 _ErrorIf(test, self.ENODENET, node,
1823 "node hasn't returned node tcp connectivity data")
1825 if nresult[constants.NV_NODENETTEST]:
1826 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1828 _ErrorIf(True, self.ENODENET, node,
1829 "tcp communication with node '%s': %s",
1830 anode, nresult[constants.NV_NODENETTEST][anode])
1832 test = constants.NV_MASTERIP not in nresult
1833 _ErrorIf(test, self.ENODENET, node,
1834 "node hasn't returned node master IP reachability data")
1836 if not nresult[constants.NV_MASTERIP]:
1837 if node == self.master_node:
1838 msg = "the master node cannot reach the master IP (not configured?)"
1840 msg = "cannot reach the master IP"
1841 _ErrorIf(True, self.ENODENET, node, msg)
1843 def _VerifyInstance(self, instance, instanceconfig, node_image,
1845 """Verify an instance.
1847 This function checks to see if the required block devices are
1848 available on the instance's node.
1851 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1852 node_current = instanceconfig.primary_node
1854 node_vol_should = {}
1855 instanceconfig.MapLVsByNode(node_vol_should)
1857 for node in node_vol_should:
1858 n_img = node_image[node]
1859 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1860 # ignore missing volumes on offline or broken nodes
1862 for volume in node_vol_should[node]:
1863 test = volume not in n_img.volumes
1864 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1865 "volume %s missing on node %s", volume, node)
1867 if instanceconfig.admin_up:
1868 pri_img = node_image[node_current]
1869 test = instance not in pri_img.instances and not pri_img.offline
1870 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1871 "instance not running on its primary node %s",
1874 diskdata = [(nname, success, status, idx)
1875 for (nname, disks) in diskstatus.items()
1876 for idx, (success, status) in enumerate(disks)]
1878 for nname, success, bdev_status, idx in diskdata:
1879 # the 'ghost node' construction in Exec() ensures that we have a
1881 snode = node_image[nname]
1882 bad_snode = snode.ghost or snode.offline
1883 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1884 self.EINSTANCEFAULTYDISK, instance,
1885 "couldn't retrieve status for disk/%s on %s: %s",
1886 idx, nname, bdev_status)
1887 _ErrorIf((instanceconfig.admin_up and success and
1888 bdev_status.ldisk_status == constants.LDS_FAULTY),
1889 self.EINSTANCEFAULTYDISK, instance,
1890 "disk/%s on %s is faulty", idx, nname)
1892 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1893 """Verify if there are any unknown volumes in the cluster.
1895 The .os, .swap and backup volumes are ignored. All other volumes are
1896 reported as unknown.
1898 @type reserved: L{ganeti.utils.FieldSet}
1899 @param reserved: a FieldSet of reserved volume names
1902 for node, n_img in node_image.items():
1903 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1904 # skip non-healthy nodes
1906 for volume in n_img.volumes:
1907 test = ((node not in node_vol_should or
1908 volume not in node_vol_should[node]) and
1909 not reserved.Matches(volume))
1910 self._ErrorIf(test, self.ENODEORPHANLV, node,
1911 "volume %s is unknown", volume)
1913 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1914 """Verify N+1 Memory Resilience.
1916 Check that if one single node dies we can still start all the
1917 instances it was primary for.
1920 cluster_info = self.cfg.GetClusterInfo()
1921 for node, n_img in node_image.items():
1922 # This code checks that every node which is now listed as
1923 # secondary has enough memory to host all instances it is
1924 # supposed to should a single other node in the cluster fail.
1925 # FIXME: not ready for failover to an arbitrary node
1926 # FIXME: does not support file-backed instances
1927 # WARNING: we currently take into account down instances as well
1928 # as up ones, considering that even if they're down someone
1929 # might want to start them even in the event of a node failure.
1931 # we're skipping offline nodes from the N+1 warning, since
1932 # most likely we don't have good memory infromation from them;
1933 # we already list instances living on such nodes, and that's
1936 for prinode, instances in n_img.sbp.items():
1938 for instance in instances:
1939 bep = cluster_info.FillBE(instance_cfg[instance])
1940 if bep[constants.BE_AUTO_BALANCE]:
1941 needed_mem += bep[constants.BE_MEMORY]
1942 test = n_img.mfree < needed_mem
1943 self._ErrorIf(test, self.ENODEN1, node,
1944 "not enough memory to accomodate instance failovers"
1945 " should node %s fail (%dMiB needed, %dMiB available)",
1946 prinode, needed_mem, n_img.mfree)
1949 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1950 (files_all, files_all_opt, files_mc, files_vm)):
1951 """Verifies file checksums collected from all nodes.
1953 @param errorif: Callback for reporting errors
1954 @param nodeinfo: List of L{objects.Node} objects
1955 @param master_node: Name of master node
1956 @param all_nvinfo: RPC results
1959 node_names = frozenset(node.name for node in nodeinfo)
1961 assert master_node in node_names
1962 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1963 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1964 "Found file listed in more than one file list"
1966 # Define functions determining which nodes to consider for a file
1967 file2nodefn = dict([(filename, fn)
1968 for (files, fn) in [(files_all, None),
1969 (files_all_opt, None),
1970 (files_mc, lambda node: (node.master_candidate or
1971 node.name == master_node)),
1972 (files_vm, lambda node: node.vm_capable)]
1973 for filename in files])
1975 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1977 for node in nodeinfo:
1978 nresult = all_nvinfo[node.name]
1980 if nresult.fail_msg or not nresult.payload:
1983 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1985 test = not (node_files and isinstance(node_files, dict))
1986 errorif(test, cls.ENODEFILECHECK, node.name,
1987 "Node did not return file checksum data")
1991 for (filename, checksum) in node_files.items():
1992 # Check if the file should be considered for a node
1993 fn = file2nodefn[filename]
1994 if fn is None or fn(node):
1995 fileinfo[filename].setdefault(checksum, set()).add(node.name)
1997 for (filename, checksums) in fileinfo.items():
1998 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2000 # Nodes having the file
2001 with_file = frozenset(node_name
2002 for nodes in fileinfo[filename].values()
2003 for node_name in nodes)
2005 # Nodes missing file
2006 missing_file = node_names - with_file
2008 if filename in files_all_opt:
2010 errorif(missing_file and missing_file != node_names,
2011 cls.ECLUSTERFILECHECK, None,
2012 "File %s is optional, but it must exist on all or no nodes (not"
2014 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2016 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2017 "File %s is missing from node(s) %s", filename,
2018 utils.CommaJoin(utils.NiceSort(missing_file)))
2020 # See if there are multiple versions of the file
2021 test = len(checksums) > 1
2023 variants = ["variant %s on %s" %
2024 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2025 for (idx, (checksum, nodes)) in
2026 enumerate(sorted(checksums.items()))]
2030 errorif(test, cls.ECLUSTERFILECHECK, None,
2031 "File %s found with %s different checksums (%s)",
2032 filename, len(checksums), "; ".join(variants))
2034 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2036 """Verifies and the node DRBD status.
2038 @type ninfo: L{objects.Node}
2039 @param ninfo: the node to check
2040 @param nresult: the remote results for the node
2041 @param instanceinfo: the dict of instances
2042 @param drbd_helper: the configured DRBD usermode helper
2043 @param drbd_map: the DRBD map as returned by
2044 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2048 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2051 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2052 test = (helper_result == None)
2053 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2054 "no drbd usermode helper returned")
2056 status, payload = helper_result
2058 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2059 "drbd usermode helper check unsuccessful: %s", payload)
2060 test = status and (payload != drbd_helper)
2061 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2062 "wrong drbd usermode helper: %s", payload)
2064 # compute the DRBD minors
2066 for minor, instance in drbd_map[node].items():
2067 test = instance not in instanceinfo
2068 _ErrorIf(test, self.ECLUSTERCFG, None,
2069 "ghost instance '%s' in temporary DRBD map", instance)
2070 # ghost instance should not be running, but otherwise we
2071 # don't give double warnings (both ghost instance and
2072 # unallocated minor in use)
2074 node_drbd[minor] = (instance, False)
2076 instance = instanceinfo[instance]
2077 node_drbd[minor] = (instance.name, instance.admin_up)
2079 # and now check them
2080 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2081 test = not isinstance(used_minors, (tuple, list))
2082 _ErrorIf(test, self.ENODEDRBD, node,
2083 "cannot parse drbd status file: %s", str(used_minors))
2085 # we cannot check drbd status
2088 for minor, (iname, must_exist) in node_drbd.items():
2089 test = minor not in used_minors and must_exist
2090 _ErrorIf(test, self.ENODEDRBD, node,
2091 "drbd minor %d of instance %s is not active", minor, iname)
2092 for minor in used_minors:
2093 test = minor not in node_drbd
2094 _ErrorIf(test, self.ENODEDRBD, node,
2095 "unallocated drbd minor %d is in use", minor)
2097 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2098 """Builds the node OS structures.
2100 @type ninfo: L{objects.Node}
2101 @param ninfo: the node to check
2102 @param nresult: the remote results for the node
2103 @param nimg: the node image object
2107 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2109 remote_os = nresult.get(constants.NV_OSLIST, None)
2110 test = (not isinstance(remote_os, list) or
2111 not compat.all(isinstance(v, list) and len(v) == 7
2112 for v in remote_os))
2114 _ErrorIf(test, self.ENODEOS, node,
2115 "node hasn't returned valid OS data")
2124 for (name, os_path, status, diagnose,
2125 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2127 if name not in os_dict:
2130 # parameters is a list of lists instead of list of tuples due to
2131 # JSON lacking a real tuple type, fix it:
2132 parameters = [tuple(v) for v in parameters]
2133 os_dict[name].append((os_path, status, diagnose,
2134 set(variants), set(parameters), set(api_ver)))
2136 nimg.oslist = os_dict
2138 def _VerifyNodeOS(self, ninfo, nimg, base):
2139 """Verifies the node OS list.
2141 @type ninfo: L{objects.Node}
2142 @param ninfo: the node to check
2143 @param nimg: the node image object
2144 @param base: the 'template' node we match against (e.g. from the master)
2148 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2150 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2152 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2153 for os_name, os_data in nimg.oslist.items():
2154 assert os_data, "Empty OS status for OS %s?!" % os_name
2155 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2156 _ErrorIf(not f_status, self.ENODEOS, node,
2157 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2158 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2159 "OS '%s' has multiple entries (first one shadows the rest): %s",
2160 os_name, utils.CommaJoin([v[0] for v in os_data]))
2161 # this will catched in backend too
2162 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2163 and not f_var, self.ENODEOS, node,
2164 "OS %s with API at least %d does not declare any variant",
2165 os_name, constants.OS_API_V15)
2166 # comparisons with the 'base' image
2167 test = os_name not in base.oslist
2168 _ErrorIf(test, self.ENODEOS, node,
2169 "Extra OS %s not present on reference node (%s)",
2173 assert base.oslist[os_name], "Base node has empty OS status?"
2174 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2176 # base OS is invalid, skipping
2178 for kind, a, b in [("API version", f_api, b_api),
2179 ("variants list", f_var, b_var),
2180 ("parameters", beautify_params(f_param),
2181 beautify_params(b_param))]:
2182 _ErrorIf(a != b, self.ENODEOS, node,
2183 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2184 kind, os_name, base.name,
2185 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2187 # check any missing OSes
2188 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2189 _ErrorIf(missing, self.ENODEOS, node,
2190 "OSes present on reference node %s but missing on this node: %s",
2191 base.name, utils.CommaJoin(missing))
2193 def _VerifyOob(self, ninfo, nresult):
2194 """Verifies out of band functionality of a node.
2196 @type ninfo: L{objects.Node}
2197 @param ninfo: the node to check
2198 @param nresult: the remote results for the node
2202 # We just have to verify the paths on master and/or master candidates
2203 # as the oob helper is invoked on the master
2204 if ((ninfo.master_candidate or ninfo.master_capable) and
2205 constants.NV_OOB_PATHS in nresult):
2206 for path_result in nresult[constants.NV_OOB_PATHS]:
2207 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2209 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2210 """Verifies and updates the node volume data.
2212 This function will update a L{NodeImage}'s internal structures
2213 with data from the remote call.
2215 @type ninfo: L{objects.Node}
2216 @param ninfo: the node to check
2217 @param nresult: the remote results for the node
2218 @param nimg: the node image object
2219 @param vg_name: the configured VG name
2223 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2225 nimg.lvm_fail = True
2226 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2229 elif isinstance(lvdata, basestring):
2230 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2231 utils.SafeEncode(lvdata))
2232 elif not isinstance(lvdata, dict):
2233 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2235 nimg.volumes = lvdata
2236 nimg.lvm_fail = False
2238 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2239 """Verifies and updates the node instance list.
2241 If the listing was successful, then updates this node's instance
2242 list. Otherwise, it marks the RPC call as failed for the instance
2245 @type ninfo: L{objects.Node}
2246 @param ninfo: the node to check
2247 @param nresult: the remote results for the node
2248 @param nimg: the node image object
2251 idata = nresult.get(constants.NV_INSTANCELIST, None)
2252 test = not isinstance(idata, list)
2253 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2254 " (instancelist): %s", utils.SafeEncode(str(idata)))
2256 nimg.hyp_fail = True
2258 nimg.instances = idata
2260 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2261 """Verifies and computes a node information map
2263 @type ninfo: L{objects.Node}
2264 @param ninfo: the node to check
2265 @param nresult: the remote results for the node
2266 @param nimg: the node image object
2267 @param vg_name: the configured VG name
2271 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2273 # try to read free memory (from the hypervisor)
2274 hv_info = nresult.get(constants.NV_HVINFO, None)
2275 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2276 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2279 nimg.mfree = int(hv_info["memory_free"])
2280 except (ValueError, TypeError):
2281 _ErrorIf(True, self.ENODERPC, node,
2282 "node returned invalid nodeinfo, check hypervisor")
2284 # FIXME: devise a free space model for file based instances as well
2285 if vg_name is not None:
2286 test = (constants.NV_VGLIST not in nresult or
2287 vg_name not in nresult[constants.NV_VGLIST])
2288 _ErrorIf(test, self.ENODELVM, node,
2289 "node didn't return data for the volume group '%s'"
2290 " - it is either missing or broken", vg_name)
2293 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2294 except (ValueError, TypeError):
2295 _ErrorIf(True, self.ENODERPC, node,
2296 "node returned invalid LVM info, check LVM status")
2298 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2299 """Gets per-disk status information for all instances.
2301 @type nodelist: list of strings
2302 @param nodelist: Node names
2303 @type node_image: dict of (name, L{objects.Node})
2304 @param node_image: Node objects
2305 @type instanceinfo: dict of (name, L{objects.Instance})
2306 @param instanceinfo: Instance objects
2307 @rtype: {instance: {node: [(succes, payload)]}}
2308 @return: a dictionary of per-instance dictionaries with nodes as
2309 keys and disk information as values; the disk information is a
2310 list of tuples (success, payload)
2313 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2316 node_disks_devonly = {}
2317 diskless_instances = set()
2318 diskless = constants.DT_DISKLESS
2320 for nname in nodelist:
2321 node_instances = list(itertools.chain(node_image[nname].pinst,
2322 node_image[nname].sinst))
2323 diskless_instances.update(inst for inst in node_instances
2324 if instanceinfo[inst].disk_template == diskless)
2325 disks = [(inst, disk)
2326 for inst in node_instances
2327 for disk in instanceinfo[inst].disks]
2330 # No need to collect data
2333 node_disks[nname] = disks
2335 # Creating copies as SetDiskID below will modify the objects and that can
2336 # lead to incorrect data returned from nodes
2337 devonly = [dev.Copy() for (_, dev) in disks]
2340 self.cfg.SetDiskID(dev, nname)
2342 node_disks_devonly[nname] = devonly
2344 assert len(node_disks) == len(node_disks_devonly)
2346 # Collect data from all nodes with disks
2347 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2350 assert len(result) == len(node_disks)
2354 for (nname, nres) in result.items():
2355 disks = node_disks[nname]
2358 # No data from this node
2359 data = len(disks) * [(False, "node offline")]
2362 _ErrorIf(msg, self.ENODERPC, nname,
2363 "while getting disk information: %s", msg)
2365 # No data from this node
2366 data = len(disks) * [(False, msg)]
2369 for idx, i in enumerate(nres.payload):
2370 if isinstance(i, (tuple, list)) and len(i) == 2:
2373 logging.warning("Invalid result from node %s, entry %d: %s",
2375 data.append((False, "Invalid result from the remote node"))
2377 for ((inst, _), status) in zip(disks, data):
2378 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2380 # Add empty entries for diskless instances.
2381 for inst in diskless_instances:
2382 assert inst not in instdisk
2385 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2386 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2387 compat.all(isinstance(s, (tuple, list)) and
2388 len(s) == 2 for s in statuses)
2389 for inst, nnames in instdisk.items()
2390 for nname, statuses in nnames.items())
2391 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2395 def BuildHooksEnv(self):
2398 Cluster-Verify hooks just ran in the post phase and their failure makes
2399 the output be logged in the verify output and the verification to fail.
2403 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2406 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2407 for node in self.my_node_info.values())
2411 def BuildHooksNodes(self):
2412 """Build hooks nodes.
2415 assert self.my_node_names, ("Node list not gathered,"
2416 " has CheckPrereq been executed?")
2417 return ([], self.my_node_names)
2419 def Exec(self, feedback_fn):
2420 """Verify integrity of the node group, performing various test on nodes.
2423 # This method has too many local variables. pylint: disable-msg=R0914
2425 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2426 verbose = self.op.verbose
2427 self._feedback_fn = feedback_fn
2429 vg_name = self.cfg.GetVGName()
2430 drbd_helper = self.cfg.GetDRBDHelper()
2431 cluster = self.cfg.GetClusterInfo()
2432 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2433 hypervisors = cluster.enabled_hypervisors
2434 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2436 i_non_redundant = [] # Non redundant instances
2437 i_non_a_balanced = [] # Non auto-balanced instances
2438 n_offline = 0 # Count of offline nodes
2439 n_drained = 0 # Count of nodes being drained
2440 node_vol_should = {}
2442 # FIXME: verify OS list
2445 filemap = _ComputeAncillaryFiles(cluster, False)
2447 # do local checksums
2448 master_node = self.master_node = self.cfg.GetMasterNode()
2449 master_ip = self.cfg.GetMasterIP()
2451 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2453 # We will make nodes contact all nodes in their group, and one node from
2454 # every other group.
2455 # TODO: should it be a *random* node, different every time?
2456 online_nodes = [node.name for node in node_data_list if not node.offline]
2457 other_group_nodes = {}
2459 for name in sorted(self.all_node_info):
2460 node = self.all_node_info[name]
2461 if (node.group not in other_group_nodes
2462 and node.group != self.group_uuid
2463 and not node.offline):
2464 other_group_nodes[node.group] = node.name
2466 node_verify_param = {
2467 constants.NV_FILELIST:
2468 utils.UniqueSequence(filename
2469 for files in filemap
2470 for filename in files),
2471 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2472 constants.NV_HYPERVISOR: hypervisors,
2473 constants.NV_HVPARAMS:
2474 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2475 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2476 for node in node_data_list
2477 if not node.offline],
2478 constants.NV_INSTANCELIST: hypervisors,
2479 constants.NV_VERSION: None,
2480 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2481 constants.NV_NODESETUP: None,
2482 constants.NV_TIME: None,
2483 constants.NV_MASTERIP: (master_node, master_ip),
2484 constants.NV_OSLIST: None,
2485 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2488 if vg_name is not None:
2489 node_verify_param[constants.NV_VGLIST] = None
2490 node_verify_param[constants.NV_LVLIST] = vg_name
2491 node_verify_param[constants.NV_PVLIST] = [vg_name]
2492 node_verify_param[constants.NV_DRBDLIST] = None
2495 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2498 # FIXME: this needs to be changed per node-group, not cluster-wide
2500 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2501 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2502 bridges.add(default_nicpp[constants.NIC_LINK])
2503 for instance in self.my_inst_info.values():
2504 for nic in instance.nics:
2505 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2506 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2507 bridges.add(full_nic[constants.NIC_LINK])
2510 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2512 # Build our expected cluster state
2513 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2515 vm_capable=node.vm_capable))
2516 for node in node_data_list)
2520 for node in self.all_node_info.values():
2521 path = _SupportsOob(self.cfg, node)
2522 if path and path not in oob_paths:
2523 oob_paths.append(path)
2526 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2528 for instance in self.my_inst_names:
2529 inst_config = self.my_inst_info[instance]
2531 for nname in inst_config.all_nodes:
2532 if nname not in node_image:
2533 gnode = self.NodeImage(name=nname)
2534 gnode.ghost = (nname not in self.all_node_info)
2535 node_image[nname] = gnode
2537 inst_config.MapLVsByNode(node_vol_should)
2539 pnode = inst_config.primary_node
2540 node_image[pnode].pinst.append(instance)
2542 for snode in inst_config.secondary_nodes:
2543 nimg = node_image[snode]
2544 nimg.sinst.append(instance)
2545 if pnode not in nimg.sbp:
2546 nimg.sbp[pnode] = []
2547 nimg.sbp[pnode].append(instance)
2549 # At this point, we have the in-memory data structures complete,
2550 # except for the runtime information, which we'll gather next
2552 # Due to the way our RPC system works, exact response times cannot be
2553 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2554 # time before and after executing the request, we can at least have a time
2556 nvinfo_starttime = time.time()
2557 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2559 self.cfg.GetClusterName())
2560 if self.extra_lv_nodes and vg_name is not None:
2562 self.rpc.call_node_verify(self.extra_lv_nodes,
2563 {constants.NV_LVLIST: vg_name},
2564 self.cfg.GetClusterName())
2566 extra_lv_nvinfo = {}
2567 nvinfo_endtime = time.time()
2569 all_drbd_map = self.cfg.ComputeDRBDMap()
2571 feedback_fn("* Gathering disk information (%s nodes)" %
2572 len(self.my_node_names))
2573 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2576 feedback_fn("* Verifying configuration file consistency")
2578 # If not all nodes are being checked, we need to make sure the master node
2579 # and a non-checked vm_capable node are in the list.
2580 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2582 vf_nvinfo = all_nvinfo.copy()
2583 vf_node_info = list(self.my_node_info.values())
2584 additional_nodes = []
2585 if master_node not in self.my_node_info:
2586 additional_nodes.append(master_node)
2587 vf_node_info.append(self.all_node_info[master_node])
2588 # Add the first vm_capable node we find which is not included
2589 for node in absent_nodes:
2590 nodeinfo = self.all_node_info[node]
2591 if nodeinfo.vm_capable and not nodeinfo.offline:
2592 additional_nodes.append(node)
2593 vf_node_info.append(self.all_node_info[node])
2595 key = constants.NV_FILELIST
2596 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2597 {key: node_verify_param[key]},
2598 self.cfg.GetClusterName()))
2600 vf_nvinfo = all_nvinfo
2601 vf_node_info = self.my_node_info.values()
2603 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2605 feedback_fn("* Verifying node status")
2609 for node_i in node_data_list:
2611 nimg = node_image[node]
2615 feedback_fn("* Skipping offline node %s" % (node,))
2619 if node == master_node:
2621 elif node_i.master_candidate:
2622 ntype = "master candidate"
2623 elif node_i.drained:
2629 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2631 msg = all_nvinfo[node].fail_msg
2632 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2634 nimg.rpc_fail = True
2637 nresult = all_nvinfo[node].payload
2639 nimg.call_ok = self._VerifyNode(node_i, nresult)
2640 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2641 self._VerifyNodeNetwork(node_i, nresult)
2642 self._VerifyOob(node_i, nresult)
2645 self._VerifyNodeLVM(node_i, nresult, vg_name)
2646 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2649 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2650 self._UpdateNodeInstances(node_i, nresult, nimg)
2651 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2652 self._UpdateNodeOS(node_i, nresult, nimg)
2654 if not nimg.os_fail:
2655 if refos_img is None:
2657 self._VerifyNodeOS(node_i, nimg, refos_img)
2658 self._VerifyNodeBridges(node_i, nresult, bridges)
2660 # Check whether all running instancies are primary for the node. (This
2661 # can no longer be done from _VerifyInstance below, since some of the
2662 # wrong instances could be from other node groups.)
2663 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2665 for inst in non_primary_inst:
2666 test = inst in self.all_inst_info
2667 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2668 "instance should not run on node %s", node_i.name)
2669 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2670 "node is running unknown instance %s", inst)
2672 for node, result in extra_lv_nvinfo.items():
2673 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2674 node_image[node], vg_name)
2676 feedback_fn("* Verifying instance status")
2677 for instance in self.my_inst_names:
2679 feedback_fn("* Verifying instance %s" % instance)
2680 inst_config = self.my_inst_info[instance]
2681 self._VerifyInstance(instance, inst_config, node_image,
2683 inst_nodes_offline = []
2685 pnode = inst_config.primary_node
2686 pnode_img = node_image[pnode]
2687 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2688 self.ENODERPC, pnode, "instance %s, connection to"
2689 " primary node failed", instance)
2691 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2692 self.EINSTANCEBADNODE, instance,
2693 "instance is marked as running and lives on offline node %s",
2694 inst_config.primary_node)
2696 # If the instance is non-redundant we cannot survive losing its primary
2697 # node, so we are not N+1 compliant. On the other hand we have no disk
2698 # templates with more than one secondary so that situation is not well
2700 # FIXME: does not support file-backed instances
2701 if not inst_config.secondary_nodes:
2702 i_non_redundant.append(instance)
2704 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2705 instance, "instance has multiple secondary nodes: %s",
2706 utils.CommaJoin(inst_config.secondary_nodes),
2707 code=self.ETYPE_WARNING)
2709 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2710 pnode = inst_config.primary_node
2711 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2712 instance_groups = {}
2714 for node in instance_nodes:
2715 instance_groups.setdefault(self.all_node_info[node].group,
2719 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2720 # Sort so that we always list the primary node first.
2721 for group, nodes in sorted(instance_groups.items(),
2722 key=lambda (_, nodes): pnode in nodes,
2725 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2726 instance, "instance has primary and secondary nodes in"
2727 " different groups: %s", utils.CommaJoin(pretty_list),
2728 code=self.ETYPE_WARNING)
2730 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2731 i_non_a_balanced.append(instance)
2733 for snode in inst_config.secondary_nodes:
2734 s_img = node_image[snode]
2735 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2736 "instance %s, connection to secondary node failed", instance)
2739 inst_nodes_offline.append(snode)
2741 # warn that the instance lives on offline nodes
2742 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2743 "instance has offline secondary node(s) %s",
2744 utils.CommaJoin(inst_nodes_offline))
2745 # ... or ghost/non-vm_capable nodes
2746 for node in inst_config.all_nodes:
2747 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2748 "instance lives on ghost node %s", node)
2749 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2750 instance, "instance lives on non-vm_capable node %s", node)
2752 feedback_fn("* Verifying orphan volumes")
2753 reserved = utils.FieldSet(*cluster.reserved_lvs)
2755 # We will get spurious "unknown volume" warnings if any node of this group
2756 # is secondary for an instance whose primary is in another group. To avoid
2757 # them, we find these instances and add their volumes to node_vol_should.
2758 for inst in self.all_inst_info.values():
2759 for secondary in inst.secondary_nodes:
2760 if (secondary in self.my_node_info
2761 and inst.name not in self.my_inst_info):
2762 inst.MapLVsByNode(node_vol_should)
2765 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2767 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2768 feedback_fn("* Verifying N+1 Memory redundancy")
2769 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2771 feedback_fn("* Other Notes")
2773 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2774 % len(i_non_redundant))
2776 if i_non_a_balanced:
2777 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2778 % len(i_non_a_balanced))
2781 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2784 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2788 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2789 """Analyze the post-hooks' result
2791 This method analyses the hook result, handles it, and sends some
2792 nicely-formatted feedback back to the user.
2794 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2795 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2796 @param hooks_results: the results of the multi-node hooks rpc call
2797 @param feedback_fn: function used send feedback back to the caller
2798 @param lu_result: previous Exec result
2799 @return: the new Exec result, based on the previous result
2803 # We only really run POST phase hooks, and are only interested in
2805 if phase == constants.HOOKS_PHASE_POST:
2806 # Used to change hooks' output to proper indentation
2807 feedback_fn("* Hooks Results")
2808 assert hooks_results, "invalid result from hooks"
2810 for node_name in hooks_results:
2811 res = hooks_results[node_name]
2813 test = msg and not res.offline
2814 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2815 "Communication failure in hooks execution: %s", msg)
2816 if res.offline or msg:
2817 # No need to investigate payload if node is offline or gave an error.
2818 # override manually lu_result here as _ErrorIf only
2819 # overrides self.bad
2822 for script, hkr, output in res.payload:
2823 test = hkr == constants.HKR_FAIL
2824 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2825 "Script %s failed, output:", script)
2827 output = self._HOOKS_INDENT_RE.sub(' ', output)
2828 feedback_fn("%s" % output)
2834 class LUClusterVerifyDisks(NoHooksLU):
2835 """Verifies the cluster disks status.
2840 def ExpandNames(self):
2841 self.needed_locks = {
2842 locking.LEVEL_NODE: locking.ALL_SET,
2843 locking.LEVEL_INSTANCE: locking.ALL_SET,
2845 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2847 def Exec(self, feedback_fn):
2848 """Verify integrity of cluster disks.
2850 @rtype: tuple of three items
2851 @return: a tuple of (dict of node-to-node_error, list of instances
2852 which need activate-disks, dict of instance: (node, volume) for
2856 result = res_nodes, res_instances, res_missing = {}, [], {}
2858 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2859 instances = self.cfg.GetAllInstancesInfo().values()
2862 for inst in instances:
2864 if not inst.admin_up:
2866 inst.MapLVsByNode(inst_lvs)
2867 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2868 for node, vol_list in inst_lvs.iteritems():
2869 for vol in vol_list:
2870 nv_dict[(node, vol)] = inst
2875 node_lvs = self.rpc.call_lv_list(nodes, [])
2876 for node, node_res in node_lvs.items():
2877 if node_res.offline:
2879 msg = node_res.fail_msg
2881 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2882 res_nodes[node] = msg
2885 lvs = node_res.payload
2886 for lv_name, (_, _, lv_online) in lvs.items():
2887 inst = nv_dict.pop((node, lv_name), None)
2888 if (not lv_online and inst is not None
2889 and inst.name not in res_instances):
2890 res_instances.append(inst.name)
2892 # any leftover items in nv_dict are missing LVs, let's arrange the
2894 for key, inst in nv_dict.iteritems():
2895 if inst.name not in res_missing:
2896 res_missing[inst.name] = []
2897 res_missing[inst.name].append(key)
2902 class LUClusterRepairDiskSizes(NoHooksLU):
2903 """Verifies the cluster disks sizes.
2908 def ExpandNames(self):
2909 if self.op.instances:
2910 self.wanted_names = _GetWantedInstances(self, self.op.instances)
2911 self.needed_locks = {
2912 locking.LEVEL_NODE: [],
2913 locking.LEVEL_INSTANCE: self.wanted_names,
2915 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2917 self.wanted_names = None
2918 self.needed_locks = {
2919 locking.LEVEL_NODE: locking.ALL_SET,
2920 locking.LEVEL_INSTANCE: locking.ALL_SET,
2922 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2924 def DeclareLocks(self, level):
2925 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2926 self._LockInstancesNodes(primary_only=True)
2928 def CheckPrereq(self):
2929 """Check prerequisites.
2931 This only checks the optional instance list against the existing names.
2934 if self.wanted_names is None:
2935 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2937 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2938 in self.wanted_names]
2940 def _EnsureChildSizes(self, disk):
2941 """Ensure children of the disk have the needed disk size.
2943 This is valid mainly for DRBD8 and fixes an issue where the
2944 children have smaller disk size.
2946 @param disk: an L{ganeti.objects.Disk} object
2949 if disk.dev_type == constants.LD_DRBD8:
2950 assert disk.children, "Empty children for DRBD8?"
2951 fchild = disk.children[0]
2952 mismatch = fchild.size < disk.size
2954 self.LogInfo("Child disk has size %d, parent %d, fixing",
2955 fchild.size, disk.size)
2956 fchild.size = disk.size
2958 # and we recurse on this child only, not on the metadev
2959 return self._EnsureChildSizes(fchild) or mismatch
2963 def Exec(self, feedback_fn):
2964 """Verify the size of cluster disks.
2967 # TODO: check child disks too
2968 # TODO: check differences in size between primary/secondary nodes
2970 for instance in self.wanted_instances:
2971 pnode = instance.primary_node
2972 if pnode not in per_node_disks:
2973 per_node_disks[pnode] = []
2974 for idx, disk in enumerate(instance.disks):
2975 per_node_disks[pnode].append((instance, idx, disk))
2978 for node, dskl in per_node_disks.items():
2979 newl = [v[2].Copy() for v in dskl]
2981 self.cfg.SetDiskID(dsk, node)
2982 result = self.rpc.call_blockdev_getsize(node, newl)
2984 self.LogWarning("Failure in blockdev_getsize call to node"
2985 " %s, ignoring", node)
2987 if len(result.payload) != len(dskl):
2988 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2989 " result.payload=%s", node, len(dskl), result.payload)
2990 self.LogWarning("Invalid result from node %s, ignoring node results",
2993 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2995 self.LogWarning("Disk %d of instance %s did not return size"
2996 " information, ignoring", idx, instance.name)
2998 if not isinstance(size, (int, long)):
2999 self.LogWarning("Disk %d of instance %s did not return valid"
3000 " size information, ignoring", idx, instance.name)
3003 if size != disk.size:
3004 self.LogInfo("Disk %d of instance %s has mismatched size,"
3005 " correcting: recorded %d, actual %d", idx,
3006 instance.name, disk.size, size)
3008 self.cfg.Update(instance, feedback_fn)
3009 changed.append((instance.name, idx, size))
3010 if self._EnsureChildSizes(disk):
3011 self.cfg.Update(instance, feedback_fn)
3012 changed.append((instance.name, idx, disk.size))
3016 class LUClusterRename(LogicalUnit):
3017 """Rename the cluster.
3020 HPATH = "cluster-rename"
3021 HTYPE = constants.HTYPE_CLUSTER
3023 def BuildHooksEnv(self):
3028 "OP_TARGET": self.cfg.GetClusterName(),
3029 "NEW_NAME": self.op.name,
3032 def BuildHooksNodes(self):
3033 """Build hooks nodes.
3036 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3038 def CheckPrereq(self):
3039 """Verify that the passed name is a valid one.
3042 hostname = netutils.GetHostname(name=self.op.name,
3043 family=self.cfg.GetPrimaryIPFamily())
3045 new_name = hostname.name
3046 self.ip = new_ip = hostname.ip
3047 old_name = self.cfg.GetClusterName()
3048 old_ip = self.cfg.GetMasterIP()
3049 if new_name == old_name and new_ip == old_ip:
3050 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3051 " cluster has changed",
3053 if new_ip != old_ip:
3054 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3055 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3056 " reachable on the network" %
3057 new_ip, errors.ECODE_NOTUNIQUE)
3059 self.op.name = new_name
3061 def Exec(self, feedback_fn):
3062 """Rename the cluster.
3065 clustername = self.op.name
3068 # shutdown the master IP
3069 master = self.cfg.GetMasterNode()
3070 result = self.rpc.call_node_stop_master(master, False)
3071 result.Raise("Could not disable the master role")
3074 cluster = self.cfg.GetClusterInfo()
3075 cluster.cluster_name = clustername
3076 cluster.master_ip = ip
3077 self.cfg.Update(cluster, feedback_fn)
3079 # update the known hosts file
3080 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3081 node_list = self.cfg.GetOnlineNodeList()
3083 node_list.remove(master)
3086 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3088 result = self.rpc.call_node_start_master(master, False, False)
3089 msg = result.fail_msg
3091 self.LogWarning("Could not re-enable the master role on"
3092 " the master, please restart manually: %s", msg)
3097 class LUClusterSetParams(LogicalUnit):
3098 """Change the parameters of the cluster.
3101 HPATH = "cluster-modify"
3102 HTYPE = constants.HTYPE_CLUSTER
3105 def CheckArguments(self):
3109 if self.op.uid_pool:
3110 uidpool.CheckUidPool(self.op.uid_pool)
3112 if self.op.add_uids:
3113 uidpool.CheckUidPool(self.op.add_uids)
3115 if self.op.remove_uids:
3116 uidpool.CheckUidPool(self.op.remove_uids)
3118 def ExpandNames(self):
3119 # FIXME: in the future maybe other cluster params won't require checking on
3120 # all nodes to be modified.
3121 self.needed_locks = {
3122 locking.LEVEL_NODE: locking.ALL_SET,
3124 self.share_locks[locking.LEVEL_NODE] = 1
3126 def BuildHooksEnv(self):
3131 "OP_TARGET": self.cfg.GetClusterName(),
3132 "NEW_VG_NAME": self.op.vg_name,
3135 def BuildHooksNodes(self):
3136 """Build hooks nodes.
3139 mn = self.cfg.GetMasterNode()
3142 def CheckPrereq(self):
3143 """Check prerequisites.
3145 This checks whether the given params don't conflict and
3146 if the given volume group is valid.
3149 if self.op.vg_name is not None and not self.op.vg_name:
3150 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3151 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3152 " instances exist", errors.ECODE_INVAL)
3154 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3155 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3156 raise errors.OpPrereqError("Cannot disable drbd helper while"
3157 " drbd-based instances exist",
3160 node_list = self.glm.list_owned(locking.LEVEL_NODE)
3162 # if vg_name not None, checks given volume group on all nodes
3164 vglist = self.rpc.call_vg_list(node_list)
3165 for node in node_list:
3166 msg = vglist[node].fail_msg
3168 # ignoring down node
3169 self.LogWarning("Error while gathering data on node %s"
3170 " (ignoring node): %s", node, msg)
3172 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3174 constants.MIN_VG_SIZE)
3176 raise errors.OpPrereqError("Error on node '%s': %s" %
3177 (node, vgstatus), errors.ECODE_ENVIRON)
3179 if self.op.drbd_helper:
3180 # checks given drbd helper on all nodes
3181 helpers = self.rpc.call_drbd_helper(node_list)
3182 for node in node_list:
3183 ninfo = self.cfg.GetNodeInfo(node)
3185 self.LogInfo("Not checking drbd helper on offline node %s", node)
3187 msg = helpers[node].fail_msg
3189 raise errors.OpPrereqError("Error checking drbd helper on node"
3190 " '%s': %s" % (node, msg),
3191 errors.ECODE_ENVIRON)
3192 node_helper = helpers[node].payload
3193 if node_helper != self.op.drbd_helper:
3194 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3195 (node, node_helper), errors.ECODE_ENVIRON)
3197 self.cluster = cluster = self.cfg.GetClusterInfo()
3198 # validate params changes
3199 if self.op.beparams:
3200 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3201 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3203 if self.op.ndparams:
3204 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3205 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3207 # TODO: we need a more general way to handle resetting
3208 # cluster-level parameters to default values
3209 if self.new_ndparams["oob_program"] == "":
3210 self.new_ndparams["oob_program"] = \
3211 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3213 if self.op.nicparams:
3214 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3215 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3216 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3219 # check all instances for consistency
3220 for instance in self.cfg.GetAllInstancesInfo().values():
3221 for nic_idx, nic in enumerate(instance.nics):
3222 params_copy = copy.deepcopy(nic.nicparams)
3223 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3225 # check parameter syntax
3227 objects.NIC.CheckParameterSyntax(params_filled)
3228 except errors.ConfigurationError, err:
3229 nic_errors.append("Instance %s, nic/%d: %s" %
3230 (instance.name, nic_idx, err))
3232 # if we're moving instances to routed, check that they have an ip
3233 target_mode = params_filled[constants.NIC_MODE]
3234 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3235 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3236 " address" % (instance.name, nic_idx))
3238 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3239 "\n".join(nic_errors))
3241 # hypervisor list/parameters
3242 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3243 if self.op.hvparams:
3244 for hv_name, hv_dict in self.op.hvparams.items():
3245 if hv_name not in self.new_hvparams:
3246 self.new_hvparams[hv_name] = hv_dict
3248 self.new_hvparams[hv_name].update(hv_dict)
3250 # os hypervisor parameters
3251 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3253 for os_name, hvs in self.op.os_hvp.items():
3254 if os_name not in self.new_os_hvp:
3255 self.new_os_hvp[os_name] = hvs
3257 for hv_name, hv_dict in hvs.items():
3258 if hv_name not in self.new_os_hvp[os_name]:
3259 self.new_os_hvp[os_name][hv_name] = hv_dict
3261 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3264 self.new_osp = objects.FillDict(cluster.osparams, {})
3265 if self.op.osparams:
3266 for os_name, osp in self.op.osparams.items():
3267 if os_name not in self.new_osp:
3268 self.new_osp[os_name] = {}
3270 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3273 if not self.new_osp[os_name]:
3274 # we removed all parameters
3275 del self.new_osp[os_name]
3277 # check the parameter validity (remote check)
3278 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3279 os_name, self.new_osp[os_name])
3281 # changes to the hypervisor list
3282 if self.op.enabled_hypervisors is not None:
3283 self.hv_list = self.op.enabled_hypervisors
3284 for hv in self.hv_list:
3285 # if the hypervisor doesn't already exist in the cluster
3286 # hvparams, we initialize it to empty, and then (in both
3287 # cases) we make sure to fill the defaults, as we might not
3288 # have a complete defaults list if the hypervisor wasn't
3290 if hv not in new_hvp:
3292 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3293 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3295 self.hv_list = cluster.enabled_hypervisors
3297 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3298 # either the enabled list has changed, or the parameters have, validate
3299 for hv_name, hv_params in self.new_hvparams.items():
3300 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3301 (self.op.enabled_hypervisors and
3302 hv_name in self.op.enabled_hypervisors)):
3303 # either this is a new hypervisor, or its parameters have changed
3304 hv_class = hypervisor.GetHypervisor(hv_name)
3305 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3306 hv_class.CheckParameterSyntax(hv_params)
3307 _CheckHVParams(self, node_list, hv_name, hv_params)
3310 # no need to check any newly-enabled hypervisors, since the
3311 # defaults have already been checked in the above code-block
3312 for os_name, os_hvp in self.new_os_hvp.items():
3313 for hv_name, hv_params in os_hvp.items():
3314 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3315 # we need to fill in the new os_hvp on top of the actual hv_p
3316 cluster_defaults = self.new_hvparams.get(hv_name, {})
3317 new_osp = objects.FillDict(cluster_defaults, hv_params)
3318 hv_class = hypervisor.GetHypervisor(hv_name)
3319 hv_class.CheckParameterSyntax(new_osp)
3320 _CheckHVParams(self, node_list, hv_name, new_osp)
3322 if self.op.default_iallocator:
3323 alloc_script = utils.FindFile(self.op.default_iallocator,
3324 constants.IALLOCATOR_SEARCH_PATH,
3326 if alloc_script is None:
3327 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3328 " specified" % self.op.default_iallocator,
3331 def Exec(self, feedback_fn):
3332 """Change the parameters of the cluster.
3335 if self.op.vg_name is not None:
3336 new_volume = self.op.vg_name
3339 if new_volume != self.cfg.GetVGName():
3340 self.cfg.SetVGName(new_volume)
3342 feedback_fn("Cluster LVM configuration already in desired"
3343 " state, not changing")
3344 if self.op.drbd_helper is not None:
3345 new_helper = self.op.drbd_helper
3348 if new_helper != self.cfg.GetDRBDHelper():
3349 self.cfg.SetDRBDHelper(new_helper)
3351 feedback_fn("Cluster DRBD helper already in desired state,"
3353 if self.op.hvparams:
3354 self.cluster.hvparams = self.new_hvparams
3356 self.cluster.os_hvp = self.new_os_hvp
3357 if self.op.enabled_hypervisors is not None:
3358 self.cluster.hvparams = self.new_hvparams
3359 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3360 if self.op.beparams:
3361 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3362 if self.op.nicparams:
3363 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3364 if self.op.osparams:
3365 self.cluster.osparams = self.new_osp
3366 if self.op.ndparams:
3367 self.cluster.ndparams = self.new_ndparams
3369 if self.op.candidate_pool_size is not None:
3370 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3371 # we need to update the pool size here, otherwise the save will fail
3372 _AdjustCandidatePool(self, [])
3374 if self.op.maintain_node_health is not None:
3375 self.cluster.maintain_node_health = self.op.maintain_node_health
3377 if self.op.prealloc_wipe_disks is not None:
3378 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3380 if self.op.add_uids is not None:
3381 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3383 if self.op.remove_uids is not None:
3384 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3386 if self.op.uid_pool is not None:
3387 self.cluster.uid_pool = self.op.uid_pool
3389 if self.op.default_iallocator is not None:
3390 self.cluster.default_iallocator = self.op.default_iallocator
3392 if self.op.reserved_lvs is not None:
3393 self.cluster.reserved_lvs = self.op.reserved_lvs
3395 def helper_os(aname, mods, desc):
3397 lst = getattr(self.cluster, aname)
3398 for key, val in mods:
3399 if key == constants.DDM_ADD:
3401 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3404 elif key == constants.DDM_REMOVE:
3408 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3410 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3412 if self.op.hidden_os:
3413 helper_os("hidden_os", self.op.hidden_os, "hidden")
3415 if self.op.blacklisted_os:
3416 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3418 if self.op.master_netdev:
3419 master = self.cfg.GetMasterNode()
3420 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3421 self.cluster.master_netdev)
3422 result = self.rpc.call_node_stop_master(master, False)
3423 result.Raise("Could not disable the master ip")
3424 feedback_fn("Changing master_netdev from %s to %s" %
3425 (self.cluster.master_netdev, self.op.master_netdev))
3426 self.cluster.master_netdev = self.op.master_netdev
3428 self.cfg.Update(self.cluster, feedback_fn)
3430 if self.op.master_netdev:
3431 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3432 self.op.master_netdev)
3433 result = self.rpc.call_node_start_master(master, False, False)
3435 self.LogWarning("Could not re-enable the master ip on"
3436 " the master, please restart manually: %s",
3440 def _UploadHelper(lu, nodes, fname):
3441 """Helper for uploading a file and showing warnings.
3444 if os.path.exists(fname):
3445 result = lu.rpc.call_upload_file(nodes, fname)
3446 for to_node, to_result in result.items():
3447 msg = to_result.fail_msg
3449 msg = ("Copy of file %s to node %s failed: %s" %
3450 (fname, to_node, msg))
3451 lu.proc.LogWarning(msg)
3454 def _ComputeAncillaryFiles(cluster, redist):
3455 """Compute files external to Ganeti which need to be consistent.
3457 @type redist: boolean
3458 @param redist: Whether to include files which need to be redistributed
3461 # Compute files for all nodes
3463 constants.SSH_KNOWN_HOSTS_FILE,
3464 constants.CONFD_HMAC_KEY,
3465 constants.CLUSTER_DOMAIN_SECRET_FILE,
3469 files_all.update(constants.ALL_CERT_FILES)
3470 files_all.update(ssconf.SimpleStore().GetFileList())
3472 if cluster.modify_etc_hosts:
3473 files_all.add(constants.ETC_HOSTS)
3475 # Files which must either exist on all nodes or on none
3476 files_all_opt = set([
3477 constants.RAPI_USERS_FILE,
3480 # Files which should only be on master candidates
3483 files_mc.add(constants.CLUSTER_CONF_FILE)
3485 # Files which should only be on VM-capable nodes
3486 files_vm = set(filename
3487 for hv_name in cluster.enabled_hypervisors
3488 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3490 # Filenames must be unique
3491 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3492 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3493 "Found file listed in more than one file list"
3495 return (files_all, files_all_opt, files_mc, files_vm)
3498 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3499 """Distribute additional files which are part of the cluster configuration.
3501 ConfigWriter takes care of distributing the config and ssconf files, but
3502 there are more files which should be distributed to all nodes. This function
3503 makes sure those are copied.
3505 @param lu: calling logical unit
3506 @param additional_nodes: list of nodes not in the config to distribute to
3507 @type additional_vm: boolean
3508 @param additional_vm: whether the additional nodes are vm-capable or not
3511 # Gather target nodes
3512 cluster = lu.cfg.GetClusterInfo()
3513 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3515 online_nodes = lu.cfg.GetOnlineNodeList()
3516 vm_nodes = lu.cfg.GetVmCapableNodeList()
3518 if additional_nodes is not None:
3519 online_nodes.extend(additional_nodes)
3521 vm_nodes.extend(additional_nodes)
3523 # Never distribute to master node
3524 for nodelist in [online_nodes, vm_nodes]:
3525 if master_info.name in nodelist:
3526 nodelist.remove(master_info.name)
3529 (files_all, files_all_opt, files_mc, files_vm) = \
3530 _ComputeAncillaryFiles(cluster, True)
3532 # Never re-distribute configuration file from here
3533 assert not (constants.CLUSTER_CONF_FILE in files_all or
3534 constants.CLUSTER_CONF_FILE in files_vm)
3535 assert not files_mc, "Master candidates not handled in this function"
3538 (online_nodes, files_all),
3539 (online_nodes, files_all_opt),
3540 (vm_nodes, files_vm),
3544 for (node_list, files) in filemap:
3546 _UploadHelper(lu, node_list, fname)
3549 class LUClusterRedistConf(NoHooksLU):
3550 """Force the redistribution of cluster configuration.
3552 This is a very simple LU.
3557 def ExpandNames(self):
3558 self.needed_locks = {
3559 locking.LEVEL_NODE: locking.ALL_SET,
3561 self.share_locks[locking.LEVEL_NODE] = 1
3563 def Exec(self, feedback_fn):
3564 """Redistribute the configuration.
3567 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3568 _RedistributeAncillaryFiles(self)
3571 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3572 """Sleep and poll for an instance's disk to sync.
3575 if not instance.disks or disks is not None and not disks:
3578 disks = _ExpandCheckDisks(instance, disks)
3581 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3583 node = instance.primary_node
3586 lu.cfg.SetDiskID(dev, node)
3588 # TODO: Convert to utils.Retry
3591 degr_retries = 10 # in seconds, as we sleep 1 second each time
3595 cumul_degraded = False
3596 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3597 msg = rstats.fail_msg
3599 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3602 raise errors.RemoteError("Can't contact node %s for mirror data,"
3603 " aborting." % node)
3606 rstats = rstats.payload
3608 for i, mstat in enumerate(rstats):
3610 lu.LogWarning("Can't compute data for node %s/%s",
3611 node, disks[i].iv_name)
3614 cumul_degraded = (cumul_degraded or
3615 (mstat.is_degraded and mstat.sync_percent is None))
3616 if mstat.sync_percent is not None:
3618 if mstat.estimated_time is not None:
3619 rem_time = ("%s remaining (estimated)" %
3620 utils.FormatSeconds(mstat.estimated_time))
3621 max_time = mstat.estimated_time
3623 rem_time = "no time estimate"
3624 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3625 (disks[i].iv_name, mstat.sync_percent, rem_time))
3627 # if we're done but degraded, let's do a few small retries, to
3628 # make sure we see a stable and not transient situation; therefore
3629 # we force restart of the loop
3630 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3631 logging.info("Degraded disks found, %d retries left", degr_retries)
3639 time.sleep(min(60, max_time))
3642 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3643 return not cumul_degraded
3646 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3647 """Check that mirrors are not degraded.
3649 The ldisk parameter, if True, will change the test from the
3650 is_degraded attribute (which represents overall non-ok status for
3651 the device(s)) to the ldisk (representing the local storage status).
3654 lu.cfg.SetDiskID(dev, node)
3658 if on_primary or dev.AssembleOnSecondary():
3659 rstats = lu.rpc.call_blockdev_find(node, dev)
3660 msg = rstats.fail_msg
3662 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3664 elif not rstats.payload:
3665 lu.LogWarning("Can't find disk on node %s", node)
3669 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3671 result = result and not rstats.payload.is_degraded
3674 for child in dev.children:
3675 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3680 class LUOobCommand(NoHooksLU):
3681 """Logical unit for OOB handling.
3685 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3687 def ExpandNames(self):
3688 """Gather locks we need.
3691 if self.op.node_names:
3692 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3693 lock_names = self.op.node_names
3695 lock_names = locking.ALL_SET
3697 self.needed_locks = {
3698 locking.LEVEL_NODE: lock_names,
3701 def CheckPrereq(self):
3702 """Check prerequisites.
3705 - the node exists in the configuration
3708 Any errors are signaled by raising errors.OpPrereqError.
3712 self.master_node = self.cfg.GetMasterNode()
3714 assert self.op.power_delay >= 0.0
3716 if self.op.node_names:
3717 if (self.op.command in self._SKIP_MASTER and
3718 self.master_node in self.op.node_names):
3719 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3720 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3722 if master_oob_handler:
3723 additional_text = ("run '%s %s %s' if you want to operate on the"
3724 " master regardless") % (master_oob_handler,
3728 additional_text = "it does not support out-of-band operations"
3730 raise errors.OpPrereqError(("Operating on the master node %s is not"
3731 " allowed for %s; %s") %
3732 (self.master_node, self.op.command,
3733 additional_text), errors.ECODE_INVAL)
3735 self.op.node_names = self.cfg.GetNodeList()
3736 if self.op.command in self._SKIP_MASTER:
3737 self.op.node_names.remove(self.master_node)
3739 if self.op.command in self._SKIP_MASTER:
3740 assert self.master_node not in self.op.node_names
3742 for node_name in self.op.node_names:
3743 node = self.cfg.GetNodeInfo(node_name)
3746 raise errors.OpPrereqError("Node %s not found" % node_name,
3749 self.nodes.append(node)
3751 if (not self.op.ignore_status and
3752 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3753 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3754 " not marked offline") % node_name,
3757 def Exec(self, feedback_fn):
3758 """Execute OOB and return result if we expect any.
3761 master_node = self.master_node
3764 for idx, node in enumerate(utils.NiceSort(self.nodes,
3765 key=lambda node: node.name)):
3766 node_entry = [(constants.RS_NORMAL, node.name)]
3767 ret.append(node_entry)
3769 oob_program = _SupportsOob(self.cfg, node)
3772 node_entry.append((constants.RS_UNAVAIL, None))
3775 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3776 self.op.command, oob_program, node.name)
3777 result = self.rpc.call_run_oob(master_node, oob_program,
3778 self.op.command, node.name,
3782 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3783 node.name, result.fail_msg)
3784 node_entry.append((constants.RS_NODATA, None))
3787 self._CheckPayload(result)
3788 except errors.OpExecError, err:
3789 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3791 node_entry.append((constants.RS_NODATA, None))
3793 if self.op.command == constants.OOB_HEALTH:
3794 # For health we should log important events
3795 for item, status in result.payload:
3796 if status in [constants.OOB_STATUS_WARNING,
3797 constants.OOB_STATUS_CRITICAL]:
3798 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3799 item, node.name, status)
3801 if self.op.command == constants.OOB_POWER_ON:
3803 elif self.op.command == constants.OOB_POWER_OFF:
3804 node.powered = False
3805 elif self.op.command == constants.OOB_POWER_STATUS:
3806 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3807 if powered != node.powered:
3808 logging.warning(("Recorded power state (%s) of node '%s' does not"
3809 " match actual power state (%s)"), node.powered,
3812 # For configuration changing commands we should update the node
3813 if self.op.command in (constants.OOB_POWER_ON,
3814 constants.OOB_POWER_OFF):
3815 self.cfg.Update(node, feedback_fn)
3817 node_entry.append((constants.RS_NORMAL, result.payload))
3819 if (self.op.command == constants.OOB_POWER_ON and
3820 idx < len(self.nodes) - 1):
3821 time.sleep(self.op.power_delay)
3825 def _CheckPayload(self, result):
3826 """Checks if the payload is valid.
3828 @param result: RPC result
3829 @raises errors.OpExecError: If payload is not valid
3833 if self.op.command == constants.OOB_HEALTH:
3834 if not isinstance(result.payload, list):
3835 errs.append("command 'health' is expected to return a list but got %s" %
3836 type(result.payload))
3838 for item, status in result.payload:
3839 if status not in constants.OOB_STATUSES:
3840 errs.append("health item '%s' has invalid status '%s'" %
3843 if self.op.command == constants.OOB_POWER_STATUS:
3844 if not isinstance(result.payload, dict):
3845 errs.append("power-status is expected to return a dict but got %s" %
3846 type(result.payload))
3848 if self.op.command in [
3849 constants.OOB_POWER_ON,
3850 constants.OOB_POWER_OFF,
3851 constants.OOB_POWER_CYCLE,
3853 if result.payload is not None:
3854 errs.append("%s is expected to not return payload but got '%s'" %
3855 (self.op.command, result.payload))
3858 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3859 utils.CommaJoin(errs))
3861 class _OsQuery(_QueryBase):
3862 FIELDS = query.OS_FIELDS
3864 def ExpandNames(self, lu):
3865 # Lock all nodes in shared mode
3866 # Temporary removal of locks, should be reverted later
3867 # TODO: reintroduce locks when they are lighter-weight
3868 lu.needed_locks = {}
3869 #self.share_locks[locking.LEVEL_NODE] = 1
3870 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3872 # The following variables interact with _QueryBase._GetNames
3874 self.wanted = self.names
3876 self.wanted = locking.ALL_SET
3878 self.do_locking = self.use_locking
3880 def DeclareLocks(self, lu, level):
3884 def _DiagnoseByOS(rlist):
3885 """Remaps a per-node return list into an a per-os per-node dictionary
3887 @param rlist: a map with node names as keys and OS objects as values
3890 @return: a dictionary with osnames as keys and as value another
3891 map, with nodes as keys and tuples of (path, status, diagnose,
3892 variants, parameters, api_versions) as values, eg::
3894 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3895 (/srv/..., False, "invalid api")],
3896 "node2": [(/srv/..., True, "", [], [])]}
3901 # we build here the list of nodes that didn't fail the RPC (at RPC
3902 # level), so that nodes with a non-responding node daemon don't
3903 # make all OSes invalid
3904 good_nodes = [node_name for node_name in rlist
3905 if not rlist[node_name].fail_msg]
3906 for node_name, nr in rlist.items():
3907 if nr.fail_msg or not nr.payload:
3909 for (name, path, status, diagnose, variants,
3910 params, api_versions) in nr.payload:
3911 if name not in all_os:
3912 # build a list of nodes for this os containing empty lists
3913 # for each node in node_list
3915 for nname in good_nodes:
3916 all_os[name][nname] = []
3917 # convert params from [name, help] to (name, help)
3918 params = [tuple(v) for v in params]
3919 all_os[name][node_name].append((path, status, diagnose,
3920 variants, params, api_versions))
3923 def _GetQueryData(self, lu):
3924 """Computes the list of nodes and their attributes.
3927 # Locking is not used
3928 assert not (compat.any(lu.glm.is_owned(level)
3929 for level in locking.LEVELS
3930 if level != locking.LEVEL_CLUSTER) or
3931 self.do_locking or self.use_locking)
3933 valid_nodes = [node.name
3934 for node in lu.cfg.GetAllNodesInfo().values()
3935 if not node.offline and node.vm_capable]
3936 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3937 cluster = lu.cfg.GetClusterInfo()
3941 for (os_name, os_data) in pol.items():
3942 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3943 hidden=(os_name in cluster.hidden_os),
3944 blacklisted=(os_name in cluster.blacklisted_os))
3948 api_versions = set()
3950 for idx, osl in enumerate(os_data.values()):
3951 info.valid = bool(info.valid and osl and osl[0][1])
3955 (node_variants, node_params, node_api) = osl[0][3:6]
3958 variants.update(node_variants)
3959 parameters.update(node_params)
3960 api_versions.update(node_api)
3962 # Filter out inconsistent values
3963 variants.intersection_update(node_variants)
3964 parameters.intersection_update(node_params)
3965 api_versions.intersection_update(node_api)
3967 info.variants = list(variants)
3968 info.parameters = list(parameters)
3969 info.api_versions = list(api_versions)
3971 data[os_name] = info
3973 # Prepare data in requested order
3974 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3978 class LUOsDiagnose(NoHooksLU):
3979 """Logical unit for OS diagnose/query.
3985 def _BuildFilter(fields, names):
3986 """Builds a filter for querying OSes.
3989 name_filter = qlang.MakeSimpleFilter("name", names)
3991 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3992 # respective field is not requested
3993 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3994 for fname in ["hidden", "blacklisted"]
3995 if fname not in fields]
3996 if "valid" not in fields:
3997 status_filter.append([qlang.OP_TRUE, "valid"])
4000 status_filter.insert(0, qlang.OP_AND)
4002 status_filter = None
4004 if name_filter and status_filter:
4005 return [qlang.OP_AND, name_filter, status_filter]
4009 return status_filter
4011 def CheckArguments(self):
4012 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4013 self.op.output_fields, False)
4015 def ExpandNames(self):
4016 self.oq.ExpandNames(self)
4018 def Exec(self, feedback_fn):
4019 return self.oq.OldStyleQuery(self)
4022 class LUNodeRemove(LogicalUnit):
4023 """Logical unit for removing a node.
4026 HPATH = "node-remove"
4027 HTYPE = constants.HTYPE_NODE
4029 def BuildHooksEnv(self):
4032 This doesn't run on the target node in the pre phase as a failed
4033 node would then be impossible to remove.
4037 "OP_TARGET": self.op.node_name,
4038 "NODE_NAME": self.op.node_name,
4041 def BuildHooksNodes(self):
4042 """Build hooks nodes.
4045 all_nodes = self.cfg.GetNodeList()
4047 all_nodes.remove(self.op.node_name)
4049 logging.warning("Node '%s', which is about to be removed, was not found"
4050 " in the list of all nodes", self.op.node_name)
4051 return (all_nodes, all_nodes)
4053 def CheckPrereq(self):
4054 """Check prerequisites.
4057 - the node exists in the configuration
4058 - it does not have primary or secondary instances
4059 - it's not the master
4061 Any errors are signaled by raising errors.OpPrereqError.
4064 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4065 node = self.cfg.GetNodeInfo(self.op.node_name)
4066 assert node is not None
4068 instance_list = self.cfg.GetInstanceList()
4070 masternode = self.cfg.GetMasterNode()
4071 if node.name == masternode:
4072 raise errors.OpPrereqError("Node is the master node, failover to another"
4073 " node is required", errors.ECODE_INVAL)
4075 for instance_name in instance_list:
4076 instance = self.cfg.GetInstanceInfo(instance_name)
4077 if node.name in instance.all_nodes:
4078 raise errors.OpPrereqError("Instance %s is still running on the node,"
4079 " please remove first" % instance_name,
4081 self.op.node_name = node.name
4084 def Exec(self, feedback_fn):
4085 """Removes the node from the cluster.
4089 logging.info("Stopping the node daemon and removing configs from node %s",
4092 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4094 # Promote nodes to master candidate as needed
4095 _AdjustCandidatePool(self, exceptions=[node.name])
4096 self.context.RemoveNode(node.name)
4098 # Run post hooks on the node before it's removed
4099 _RunPostHook(self, node.name)
4101 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4102 msg = result.fail_msg
4104 self.LogWarning("Errors encountered on the remote node while leaving"
4105 " the cluster: %s", msg)
4107 # Remove node from our /etc/hosts
4108 if self.cfg.GetClusterInfo().modify_etc_hosts:
4109 master_node = self.cfg.GetMasterNode()
4110 result = self.rpc.call_etc_hosts_modify(master_node,
4111 constants.ETC_HOSTS_REMOVE,
4113 result.Raise("Can't update hosts file with new host data")
4114 _RedistributeAncillaryFiles(self)
4117 class _NodeQuery(_QueryBase):
4118 FIELDS = query.NODE_FIELDS
4120 def ExpandNames(self, lu):
4121 lu.needed_locks = {}
4122 lu.share_locks[locking.LEVEL_NODE] = 1
4125 self.wanted = _GetWantedNodes(lu, self.names)
4127 self.wanted = locking.ALL_SET
4129 self.do_locking = (self.use_locking and
4130 query.NQ_LIVE in self.requested_data)
4133 # if we don't request only static fields, we need to lock the nodes
4134 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4136 def DeclareLocks(self, lu, level):
4139 def _GetQueryData(self, lu):
4140 """Computes the list of nodes and their attributes.
4143 all_info = lu.cfg.GetAllNodesInfo()
4145 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4147 # Gather data as requested
4148 if query.NQ_LIVE in self.requested_data:
4149 # filter out non-vm_capable nodes
4150 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4152 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4153 lu.cfg.GetHypervisorType())
4154 live_data = dict((name, nresult.payload)
4155 for (name, nresult) in node_data.items()
4156 if not nresult.fail_msg and nresult.payload)
4160 if query.NQ_INST in self.requested_data:
4161 node_to_primary = dict([(name, set()) for name in nodenames])
4162 node_to_secondary = dict([(name, set()) for name in nodenames])
4164 inst_data = lu.cfg.GetAllInstancesInfo()
4166 for inst in inst_data.values():
4167 if inst.primary_node in node_to_primary:
4168 node_to_primary[inst.primary_node].add(inst.name)
4169 for secnode in inst.secondary_nodes:
4170 if secnode in node_to_secondary:
4171 node_to_secondary[secnode].add(inst.name)
4173 node_to_primary = None
4174 node_to_secondary = None
4176 if query.NQ_OOB in self.requested_data:
4177 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4178 for name, node in all_info.iteritems())
4182 if query.NQ_GROUP in self.requested_data:
4183 groups = lu.cfg.GetAllNodeGroupsInfo()
4187 return query.NodeQueryData([all_info[name] for name in nodenames],
4188 live_data, lu.cfg.GetMasterNode(),
4189 node_to_primary, node_to_secondary, groups,
4190 oob_support, lu.cfg.GetClusterInfo())
4193 class LUNodeQuery(NoHooksLU):
4194 """Logical unit for querying nodes.
4197 # pylint: disable-msg=W0142
4200 def CheckArguments(self):
4201 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4202 self.op.output_fields, self.op.use_locking)
4204 def ExpandNames(self):
4205 self.nq.ExpandNames(self)
4207 def Exec(self, feedback_fn):
4208 return self.nq.OldStyleQuery(self)
4211 class LUNodeQueryvols(NoHooksLU):
4212 """Logical unit for getting volumes on node(s).
4216 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4217 _FIELDS_STATIC = utils.FieldSet("node")
4219 def CheckArguments(self):
4220 _CheckOutputFields(static=self._FIELDS_STATIC,
4221 dynamic=self._FIELDS_DYNAMIC,
4222 selected=self.op.output_fields)
4224 def ExpandNames(self):
4225 self.needed_locks = {}
4226 self.share_locks[locking.LEVEL_NODE] = 1
4227 if not self.op.nodes:
4228 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4230 self.needed_locks[locking.LEVEL_NODE] = \
4231 _GetWantedNodes(self, self.op.nodes)
4233 def Exec(self, feedback_fn):
4234 """Computes the list of nodes and their attributes.
4237 nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4238 volumes = self.rpc.call_node_volumes(nodenames)
4240 ilist = [self.cfg.GetInstanceInfo(iname) for iname
4241 in self.cfg.GetInstanceList()]
4243 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4246 for node in nodenames:
4247 nresult = volumes[node]
4250 msg = nresult.fail_msg
4252 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4255 node_vols = nresult.payload[:]
4256 node_vols.sort(key=lambda vol: vol['dev'])
4258 for vol in node_vols:
4260 for field in self.op.output_fields:
4263 elif field == "phys":
4267 elif field == "name":
4269 elif field == "size":
4270 val = int(float(vol['size']))
4271 elif field == "instance":
4273 if node not in lv_by_node[inst]:
4275 if vol['name'] in lv_by_node[inst][node]:
4281 raise errors.ParameterError(field)
4282 node_output.append(str(val))
4284 output.append(node_output)
4289 class LUNodeQueryStorage(NoHooksLU):
4290 """Logical unit for getting information on storage units on node(s).
4293 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4296 def CheckArguments(self):
4297 _CheckOutputFields(static=self._FIELDS_STATIC,
4298 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4299 selected=self.op.output_fields)
4301 def ExpandNames(self):
4302 self.needed_locks = {}
4303 self.share_locks[locking.LEVEL_NODE] = 1
4306 self.needed_locks[locking.LEVEL_NODE] = \
4307 _GetWantedNodes(self, self.op.nodes)
4309 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4311 def Exec(self, feedback_fn):
4312 """Computes the list of nodes and their attributes.
4315 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4317 # Always get name to sort by
4318 if constants.SF_NAME in self.op.output_fields:
4319 fields = self.op.output_fields[:]
4321 fields = [constants.SF_NAME] + self.op.output_fields
4323 # Never ask for node or type as it's only known to the LU
4324 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4325 while extra in fields:
4326 fields.remove(extra)
4328 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4329 name_idx = field_idx[constants.SF_NAME]
4331 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4332 data = self.rpc.call_storage_list(self.nodes,
4333 self.op.storage_type, st_args,
4334 self.op.name, fields)
4338 for node in utils.NiceSort(self.nodes):
4339 nresult = data[node]
4343 msg = nresult.fail_msg
4345 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4348 rows = dict([(row[name_idx], row) for row in nresult.payload])
4350 for name in utils.NiceSort(rows.keys()):
4355 for field in self.op.output_fields:
4356 if field == constants.SF_NODE:
4358 elif field == constants.SF_TYPE:
4359 val = self.op.storage_type
4360 elif field in field_idx:
4361 val = row[field_idx[field]]
4363 raise errors.ParameterError(field)
4372 class _InstanceQuery(_QueryBase):
4373 FIELDS = query.INSTANCE_FIELDS
4375 def ExpandNames(self, lu):
4376 lu.needed_locks = {}
4377 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4378 lu.share_locks[locking.LEVEL_NODE] = 1
4381 self.wanted = _GetWantedInstances(lu, self.names)
4383 self.wanted = locking.ALL_SET
4385 self.do_locking = (self.use_locking and
4386 query.IQ_LIVE in self.requested_data)
4388 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4389 lu.needed_locks[locking.LEVEL_NODE] = []
4390 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4392 def DeclareLocks(self, lu, level):
4393 if level == locking.LEVEL_NODE and self.do_locking:
4394 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4396 def _GetQueryData(self, lu):
4397 """Computes the list of instances and their attributes.
4400 cluster = lu.cfg.GetClusterInfo()
4401 all_info = lu.cfg.GetAllInstancesInfo()
4403 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4405 instance_list = [all_info[name] for name in instance_names]
4406 nodes = frozenset(itertools.chain(*(inst.all_nodes
4407 for inst in instance_list)))
4408 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4411 wrongnode_inst = set()
4413 # Gather data as requested
4414 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4416 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4418 result = node_data[name]
4420 # offline nodes will be in both lists
4421 assert result.fail_msg
4422 offline_nodes.append(name)
4424 bad_nodes.append(name)
4425 elif result.payload:
4426 for inst in result.payload:
4427 if inst in all_info:
4428 if all_info[inst].primary_node == name:
4429 live_data.update(result.payload)
4431 wrongnode_inst.add(inst)
4433 # orphan instance; we don't list it here as we don't
4434 # handle this case yet in the output of instance listing
4435 logging.warning("Orphan instance '%s' found on node %s",
4437 # else no instance is alive
4441 if query.IQ_DISKUSAGE in self.requested_data:
4442 disk_usage = dict((inst.name,
4443 _ComputeDiskSize(inst.disk_template,
4444 [{constants.IDISK_SIZE: disk.size}
4445 for disk in inst.disks]))
4446 for inst in instance_list)
4450 if query.IQ_CONSOLE in self.requested_data:
4452 for inst in instance_list:
4453 if inst.name in live_data:
4454 # Instance is running
4455 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4457 consinfo[inst.name] = None
4458 assert set(consinfo.keys()) == set(instance_names)
4462 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4463 disk_usage, offline_nodes, bad_nodes,
4464 live_data, wrongnode_inst, consinfo)
4467 class LUQuery(NoHooksLU):
4468 """Query for resources/items of a certain kind.
4471 # pylint: disable-msg=W0142
4474 def CheckArguments(self):
4475 qcls = _GetQueryImplementation(self.op.what)
4477 self.impl = qcls(self.op.filter, self.op.fields, False)
4479 def ExpandNames(self):
4480 self.impl.ExpandNames(self)
4482 def DeclareLocks(self, level):
4483 self.impl.DeclareLocks(self, level)
4485 def Exec(self, feedback_fn):
4486 return self.impl.NewStyleQuery(self)
4489 class LUQueryFields(NoHooksLU):
4490 """Query for resources/items of a certain kind.
4493 # pylint: disable-msg=W0142
4496 def CheckArguments(self):
4497 self.qcls = _GetQueryImplementation(self.op.what)
4499 def ExpandNames(self):
4500 self.needed_locks = {}
4502 def Exec(self, feedback_fn):
4503 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4506 class LUNodeModifyStorage(NoHooksLU):
4507 """Logical unit for modifying a storage volume on a node.
4512 def CheckArguments(self):
4513 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4515 storage_type = self.op.storage_type
4518 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4520 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4521 " modified" % storage_type,
4524 diff = set(self.op.changes.keys()) - modifiable
4526 raise errors.OpPrereqError("The following fields can not be modified for"
4527 " storage units of type '%s': %r" %
4528 (storage_type, list(diff)),
4531 def ExpandNames(self):
4532 self.needed_locks = {
4533 locking.LEVEL_NODE: self.op.node_name,
4536 def Exec(self, feedback_fn):
4537 """Computes the list of nodes and their attributes.
4540 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4541 result = self.rpc.call_storage_modify(self.op.node_name,
4542 self.op.storage_type, st_args,
4543 self.op.name, self.op.changes)
4544 result.Raise("Failed to modify storage unit '%s' on %s" %
4545 (self.op.name, self.op.node_name))
4548 class LUNodeAdd(LogicalUnit):
4549 """Logical unit for adding node to the cluster.
4553 HTYPE = constants.HTYPE_NODE
4554 _NFLAGS = ["master_capable", "vm_capable"]
4556 def CheckArguments(self):
4557 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4558 # validate/normalize the node name
4559 self.hostname = netutils.GetHostname(name=self.op.node_name,
4560 family=self.primary_ip_family)
4561 self.op.node_name = self.hostname.name
4563 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4564 raise errors.OpPrereqError("Cannot readd the master node",
4567 if self.op.readd and self.op.group:
4568 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4569 " being readded", errors.ECODE_INVAL)
4571 def BuildHooksEnv(self):
4574 This will run on all nodes before, and on all nodes + the new node after.
4578 "OP_TARGET": self.op.node_name,
4579 "NODE_NAME": self.op.node_name,
4580 "NODE_PIP": self.op.primary_ip,
4581 "NODE_SIP": self.op.secondary_ip,
4582 "MASTER_CAPABLE": str(self.op.master_capable),
4583 "VM_CAPABLE": str(self.op.vm_capable),
4586 def BuildHooksNodes(self):
4587 """Build hooks nodes.
4590 # Exclude added node
4591 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4592 post_nodes = pre_nodes + [self.op.node_name, ]
4594 return (pre_nodes, post_nodes)
4596 def CheckPrereq(self):
4597 """Check prerequisites.
4600 - the new node is not already in the config
4602 - its parameters (single/dual homed) matches the cluster
4604 Any errors are signaled by raising errors.OpPrereqError.
4608 hostname = self.hostname
4609 node = hostname.name
4610 primary_ip = self.op.primary_ip = hostname.ip
4611 if self.op.secondary_ip is None:
4612 if self.primary_ip_family == netutils.IP6Address.family:
4613 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4614 " IPv4 address must be given as secondary",
4616 self.op.secondary_ip = primary_ip
4618 secondary_ip = self.op.secondary_ip
4619 if not netutils.IP4Address.IsValid(secondary_ip):
4620 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4621 " address" % secondary_ip, errors.ECODE_INVAL)
4623 node_list = cfg.GetNodeList()
4624 if not self.op.readd and node in node_list:
4625 raise errors.OpPrereqError("Node %s is already in the configuration" %
4626 node, errors.ECODE_EXISTS)
4627 elif self.op.readd and node not in node_list:
4628 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4631 self.changed_primary_ip = False
4633 for existing_node_name in node_list:
4634 existing_node = cfg.GetNodeInfo(existing_node_name)
4636 if self.op.readd and node == existing_node_name:
4637 if existing_node.secondary_ip != secondary_ip:
4638 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4639 " address configuration as before",
4641 if existing_node.primary_ip != primary_ip:
4642 self.changed_primary_ip = True
4646 if (existing_node.primary_ip == primary_ip or
4647 existing_node.secondary_ip == primary_ip or
4648 existing_node.primary_ip == secondary_ip or
4649 existing_node.secondary_ip == secondary_ip):
4650 raise errors.OpPrereqError("New node ip address(es) conflict with"
4651 " existing node %s" % existing_node.name,
4652 errors.ECODE_NOTUNIQUE)
4654 # After this 'if' block, None is no longer a valid value for the
4655 # _capable op attributes
4657 old_node = self.cfg.GetNodeInfo(node)
4658 assert old_node is not None, "Can't retrieve locked node %s" % node
4659 for attr in self._NFLAGS:
4660 if getattr(self.op, attr) is None:
4661 setattr(self.op, attr, getattr(old_node, attr))
4663 for attr in self._NFLAGS:
4664 if getattr(self.op, attr) is None:
4665 setattr(self.op, attr, True)
4667 if self.op.readd and not self.op.vm_capable:
4668 pri, sec = cfg.GetNodeInstances(node)
4670 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4671 " flag set to false, but it already holds"
4672 " instances" % node,
4675 # check that the type of the node (single versus dual homed) is the
4676 # same as for the master
4677 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4678 master_singlehomed = myself.secondary_ip == myself.primary_ip
4679 newbie_singlehomed = secondary_ip == primary_ip
4680 if master_singlehomed != newbie_singlehomed:
4681 if master_singlehomed:
4682 raise errors.OpPrereqError("The master has no secondary ip but the"
4683 " new node has one",
4686 raise errors.OpPrereqError("The master has a secondary ip but the"
4687 " new node doesn't have one",
4690 # checks reachability
4691 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4692 raise errors.OpPrereqError("Node not reachable by ping",
4693 errors.ECODE_ENVIRON)
4695 if not newbie_singlehomed:
4696 # check reachability from my secondary ip to newbie's secondary ip
4697 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4698 source=myself.secondary_ip):
4699 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4700 " based ping to node daemon port",
4701 errors.ECODE_ENVIRON)
4708 if self.op.master_capable:
4709 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4711 self.master_candidate = False
4714 self.new_node = old_node
4716 node_group = cfg.LookupNodeGroup(self.op.group)
4717 self.new_node = objects.Node(name=node,
4718 primary_ip=primary_ip,
4719 secondary_ip=secondary_ip,
4720 master_candidate=self.master_candidate,
4721 offline=False, drained=False,
4724 if self.op.ndparams:
4725 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4727 def Exec(self, feedback_fn):
4728 """Adds the new node to the cluster.
4731 new_node = self.new_node
4732 node = new_node.name
4734 # We adding a new node so we assume it's powered
4735 new_node.powered = True
4737 # for re-adds, reset the offline/drained/master-candidate flags;
4738 # we need to reset here, otherwise offline would prevent RPC calls
4739 # later in the procedure; this also means that if the re-add
4740 # fails, we are left with a non-offlined, broken node
4742 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4743 self.LogInfo("Readding a node, the offline/drained flags were reset")
4744 # if we demote the node, we do cleanup later in the procedure
4745 new_node.master_candidate = self.master_candidate
4746 if self.changed_primary_ip:
4747 new_node.primary_ip = self.op.primary_ip
4749 # copy the master/vm_capable flags
4750 for attr in self._NFLAGS:
4751 setattr(new_node, attr, getattr(self.op, attr))
4753 # notify the user about any possible mc promotion
4754 if new_node.master_candidate:
4755 self.LogInfo("Node will be a master candidate")
4757 if self.op.ndparams:
4758 new_node.ndparams = self.op.ndparams
4760 new_node.ndparams = {}
4762 # check connectivity
4763 result = self.rpc.call_version([node])[node]
4764 result.Raise("Can't get version information from node %s" % node)
4765 if constants.PROTOCOL_VERSION == result.payload:
4766 logging.info("Communication to node %s fine, sw version %s match",
4767 node, result.payload)
4769 raise errors.OpExecError("Version mismatch master version %s,"
4770 " node version %s" %
4771 (constants.PROTOCOL_VERSION, result.payload))
4773 # Add node to our /etc/hosts, and add key to known_hosts
4774 if self.cfg.GetClusterInfo().modify_etc_hosts:
4775 master_node = self.cfg.GetMasterNode()
4776 result = self.rpc.call_etc_hosts_modify(master_node,
4777 constants.ETC_HOSTS_ADD,
4780 result.Raise("Can't update hosts file with new host data")
4782 if new_node.secondary_ip != new_node.primary_ip:
4783 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4786 node_verify_list = [self.cfg.GetMasterNode()]
4787 node_verify_param = {
4788 constants.NV_NODELIST: [node],
4789 # TODO: do a node-net-test as well?
4792 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4793 self.cfg.GetClusterName())
4794 for verifier in node_verify_list:
4795 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4796 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4798 for failed in nl_payload:
4799 feedback_fn("ssh/hostname verification failed"
4800 " (checking from %s): %s" %
4801 (verifier, nl_payload[failed]))
4802 raise errors.OpExecError("ssh/hostname verification failed")
4805 _RedistributeAncillaryFiles(self)
4806 self.context.ReaddNode(new_node)
4807 # make sure we redistribute the config
4808 self.cfg.Update(new_node, feedback_fn)
4809 # and make sure the new node will not have old files around
4810 if not new_node.master_candidate:
4811 result = self.rpc.call_node_demote_from_mc(new_node.name)
4812 msg = result.fail_msg
4814 self.LogWarning("Node failed to demote itself from master"
4815 " candidate status: %s" % msg)
4817 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4818 additional_vm=self.op.vm_capable)
4819 self.context.AddNode(new_node, self.proc.GetECId())
4822 class LUNodeSetParams(LogicalUnit):
4823 """Modifies the parameters of a node.
4825 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4826 to the node role (as _ROLE_*)
4827 @cvar _R2F: a dictionary from node role to tuples of flags
4828 @cvar _FLAGS: a list of attribute names corresponding to the flags
4831 HPATH = "node-modify"
4832 HTYPE = constants.HTYPE_NODE
4834 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4836 (True, False, False): _ROLE_CANDIDATE,
4837 (False, True, False): _ROLE_DRAINED,
4838 (False, False, True): _ROLE_OFFLINE,
4839 (False, False, False): _ROLE_REGULAR,
4841 _R2F = dict((v, k) for k, v in _F2R.items())
4842 _FLAGS = ["master_candidate", "drained", "offline"]
4844 def CheckArguments(self):
4845 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4846 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4847 self.op.master_capable, self.op.vm_capable,
4848 self.op.secondary_ip, self.op.ndparams]
4849 if all_mods.count(None) == len(all_mods):
4850 raise errors.OpPrereqError("Please pass at least one modification",
4852 if all_mods.count(True) > 1:
4853 raise errors.OpPrereqError("Can't set the node into more than one"
4854 " state at the same time",
4857 # Boolean value that tells us whether we might be demoting from MC
4858 self.might_demote = (self.op.master_candidate == False or
4859 self.op.offline == True or
4860 self.op.drained == True or
4861 self.op.master_capable == False)
4863 if self.op.secondary_ip:
4864 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4865 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4866 " address" % self.op.secondary_ip,
4869 self.lock_all = self.op.auto_promote and self.might_demote
4870 self.lock_instances = self.op.secondary_ip is not None
4872 def ExpandNames(self):
4874 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4876 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4878 if self.lock_instances:
4879 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4881 def DeclareLocks(self, level):
4882 # If we have locked all instances, before waiting to lock nodes, release
4883 # all the ones living on nodes unrelated to the current operation.
4884 if level == locking.LEVEL_NODE and self.lock_instances:
4885 self.affected_instances = []
4886 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4889 # Build list of instances to release
4890 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4891 instance = self.context.cfg.GetInstanceInfo(instance_name)
4892 if (instance.disk_template in constants.DTS_INT_MIRROR and
4893 self.op.node_name in instance.all_nodes):
4894 instances_keep.append(instance_name)
4895 self.affected_instances.append(instance)
4897 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4899 assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4900 set(instances_keep))
4902 def BuildHooksEnv(self):
4905 This runs on the master node.
4909 "OP_TARGET": self.op.node_name,
4910 "MASTER_CANDIDATE": str(self.op.master_candidate),
4911 "OFFLINE": str(self.op.offline),
4912 "DRAINED": str(self.op.drained),
4913 "MASTER_CAPABLE": str(self.op.master_capable),
4914 "VM_CAPABLE": str(self.op.vm_capable),
4917 def BuildHooksNodes(self):
4918 """Build hooks nodes.
4921 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4924 def CheckPrereq(self):
4925 """Check prerequisites.
4927 This only checks the instance list against the existing names.
4930 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4932 if (self.op.master_candidate is not None or
4933 self.op.drained is not None or
4934 self.op.offline is not None):
4935 # we can't change the master's node flags
4936 if self.op.node_name == self.cfg.GetMasterNode():
4937 raise errors.OpPrereqError("The master role can be changed"
4938 " only via master-failover",
4941 if self.op.master_candidate and not node.master_capable:
4942 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4943 " it a master candidate" % node.name,
4946 if self.op.vm_capable == False:
4947 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4949 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4950 " the vm_capable flag" % node.name,
4953 if node.master_candidate and self.might_demote and not self.lock_all:
4954 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4955 # check if after removing the current node, we're missing master
4957 (mc_remaining, mc_should, _) = \
4958 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4959 if mc_remaining < mc_should:
4960 raise errors.OpPrereqError("Not enough master candidates, please"
4961 " pass auto promote option to allow"
4962 " promotion", errors.ECODE_STATE)
4964 self.old_flags = old_flags = (node.master_candidate,
4965 node.drained, node.offline)
4966 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4967 self.old_role = old_role = self._F2R[old_flags]
4969 # Check for ineffective changes
4970 for attr in self._FLAGS:
4971 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4972 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4973 setattr(self.op, attr, None)
4975 # Past this point, any flag change to False means a transition
4976 # away from the respective state, as only real changes are kept
4978 # TODO: We might query the real power state if it supports OOB
4979 if _SupportsOob(self.cfg, node):
4980 if self.op.offline is False and not (node.powered or
4981 self.op.powered == True):
4982 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4983 " offline status can be reset") %
4985 elif self.op.powered is not None:
4986 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4987 " as it does not support out-of-band"
4988 " handling") % self.op.node_name)
4990 # If we're being deofflined/drained, we'll MC ourself if needed
4991 if (self.op.drained == False or self.op.offline == False or
4992 (self.op.master_capable and not node.master_capable)):
4993 if _DecideSelfPromotion(self):
4994 self.op.master_candidate = True
4995 self.LogInfo("Auto-promoting node to master candidate")
4997 # If we're no longer master capable, we'll demote ourselves from MC
4998 if self.op.master_capable == False and node.master_candidate:
4999 self.LogInfo("Demoting from master candidate")
5000 self.op.master_candidate = False
5003 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5004 if self.op.master_candidate:
5005 new_role = self._ROLE_CANDIDATE
5006 elif self.op.drained:
5007 new_role = self._ROLE_DRAINED
5008 elif self.op.offline:
5009 new_role = self._ROLE_OFFLINE
5010 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5011 # False is still in new flags, which means we're un-setting (the
5013 new_role = self._ROLE_REGULAR
5014 else: # no new flags, nothing, keep old role
5017 self.new_role = new_role
5019 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5020 # Trying to transition out of offline status
5021 result = self.rpc.call_version([node.name])[node.name]
5023 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5024 " to report its version: %s" %
5025 (node.name, result.fail_msg),
5028 self.LogWarning("Transitioning node from offline to online state"
5029 " without using re-add. Please make sure the node"
5032 if self.op.secondary_ip:
5033 # Ok even without locking, because this can't be changed by any LU
5034 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5035 master_singlehomed = master.secondary_ip == master.primary_ip
5036 if master_singlehomed and self.op.secondary_ip:
5037 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5038 " homed cluster", errors.ECODE_INVAL)
5041 if self.affected_instances:
5042 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5043 " node has instances (%s) configured"
5044 " to use it" % self.affected_instances)
5046 # On online nodes, check that no instances are running, and that
5047 # the node has the new ip and we can reach it.
5048 for instance in self.affected_instances:
5049 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5051 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5052 if master.name != node.name:
5053 # check reachability from master secondary ip to new secondary ip
5054 if not netutils.TcpPing(self.op.secondary_ip,
5055 constants.DEFAULT_NODED_PORT,
5056 source=master.secondary_ip):
5057 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5058 " based ping to node daemon port",
5059 errors.ECODE_ENVIRON)
5061 if self.op.ndparams:
5062 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5063 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5064 self.new_ndparams = new_ndparams
5066 def Exec(self, feedback_fn):
5071 old_role = self.old_role
5072 new_role = self.new_role
5076 if self.op.ndparams:
5077 node.ndparams = self.new_ndparams
5079 if self.op.powered is not None:
5080 node.powered = self.op.powered
5082 for attr in ["master_capable", "vm_capable"]:
5083 val = getattr(self.op, attr)
5085 setattr(node, attr, val)
5086 result.append((attr, str(val)))
5088 if new_role != old_role:
5089 # Tell the node to demote itself, if no longer MC and not offline
5090 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5091 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5093 self.LogWarning("Node failed to demote itself: %s", msg)
5095 new_flags = self._R2F[new_role]
5096 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5098 result.append((desc, str(nf)))
5099 (node.master_candidate, node.drained, node.offline) = new_flags
5101 # we locked all nodes, we adjust the CP before updating this node
5103 _AdjustCandidatePool(self, [node.name])
5105 if self.op.secondary_ip:
5106 node.secondary_ip = self.op.secondary_ip
5107 result.append(("secondary_ip", self.op.secondary_ip))
5109 # this will trigger configuration file update, if needed
5110 self.cfg.Update(node, feedback_fn)
5112 # this will trigger job queue propagation or cleanup if the mc
5114 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5115 self.context.ReaddNode(node)
5120 class LUNodePowercycle(NoHooksLU):
5121 """Powercycles a node.
5126 def CheckArguments(self):
5127 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5128 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5129 raise errors.OpPrereqError("The node is the master and the force"
5130 " parameter was not set",
5133 def ExpandNames(self):
5134 """Locking for PowercycleNode.
5136 This is a last-resort option and shouldn't block on other
5137 jobs. Therefore, we grab no locks.
5140 self.needed_locks = {}
5142 def Exec(self, feedback_fn):
5146 result = self.rpc.call_node_powercycle(self.op.node_name,
5147 self.cfg.GetHypervisorType())
5148 result.Raise("Failed to schedule the reboot")
5149 return result.payload
5152 class LUClusterQuery(NoHooksLU):
5153 """Query cluster configuration.
5158 def ExpandNames(self):
5159 self.needed_locks = {}
5161 def Exec(self, feedback_fn):
5162 """Return cluster config.
5165 cluster = self.cfg.GetClusterInfo()
5168 # Filter just for enabled hypervisors
5169 for os_name, hv_dict in cluster.os_hvp.items():
5170 os_hvp[os_name] = {}
5171 for hv_name, hv_params in hv_dict.items():
5172 if hv_name in cluster.enabled_hypervisors:
5173 os_hvp[os_name][hv_name] = hv_params
5175 # Convert ip_family to ip_version
5176 primary_ip_version = constants.IP4_VERSION
5177 if cluster.primary_ip_family == netutils.IP6Address.family:
5178 primary_ip_version = constants.IP6_VERSION
5181 "software_version": constants.RELEASE_VERSION,
5182 "protocol_version": constants.PROTOCOL_VERSION,
5183 "config_version": constants.CONFIG_VERSION,
5184 "os_api_version": max(constants.OS_API_VERSIONS),
5185 "export_version": constants.EXPORT_VERSION,
5186 "architecture": (platform.architecture()[0], platform.machine()),
5187 "name": cluster.cluster_name,
5188 "master": cluster.master_node,
5189 "default_hypervisor": cluster.enabled_hypervisors[0],
5190 "enabled_hypervisors": cluster.enabled_hypervisors,
5191 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5192 for hypervisor_name in cluster.enabled_hypervisors]),
5194 "beparams": cluster.beparams,
5195 "osparams": cluster.osparams,
5196 "nicparams": cluster.nicparams,
5197 "ndparams": cluster.ndparams,
5198 "candidate_pool_size": cluster.candidate_pool_size,
5199 "master_netdev": cluster.master_netdev,
5200 "volume_group_name": cluster.volume_group_name,
5201 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5202 "file_storage_dir": cluster.file_storage_dir,
5203 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5204 "maintain_node_health": cluster.maintain_node_health,
5205 "ctime": cluster.ctime,
5206 "mtime": cluster.mtime,
5207 "uuid": cluster.uuid,
5208 "tags": list(cluster.GetTags()),
5209 "uid_pool": cluster.uid_pool,
5210 "default_iallocator": cluster.default_iallocator,
5211 "reserved_lvs": cluster.reserved_lvs,
5212 "primary_ip_version": primary_ip_version,
5213 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5214 "hidden_os": cluster.hidden_os,
5215 "blacklisted_os": cluster.blacklisted_os,
5221 class LUClusterConfigQuery(NoHooksLU):
5222 """Return configuration values.
5226 _FIELDS_DYNAMIC = utils.FieldSet()
5227 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5228 "watcher_pause", "volume_group_name")
5230 def CheckArguments(self):
5231 _CheckOutputFields(static=self._FIELDS_STATIC,
5232 dynamic=self._FIELDS_DYNAMIC,
5233 selected=self.op.output_fields)
5235 def ExpandNames(self):
5236 self.needed_locks = {}
5238 def Exec(self, feedback_fn):
5239 """Dump a representation of the cluster config to the standard output.
5243 for field in self.op.output_fields:
5244 if field == "cluster_name":
5245 entry = self.cfg.GetClusterName()
5246 elif field == "master_node":
5247 entry = self.cfg.GetMasterNode()
5248 elif field == "drain_flag":
5249 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5250 elif field == "watcher_pause":
5251 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5252 elif field == "volume_group_name":
5253 entry = self.cfg.GetVGName()
5255 raise errors.ParameterError(field)
5256 values.append(entry)
5260 class LUInstanceActivateDisks(NoHooksLU):
5261 """Bring up an instance's disks.
5266 def ExpandNames(self):
5267 self._ExpandAndLockInstance()
5268 self.needed_locks[locking.LEVEL_NODE] = []
5269 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5271 def DeclareLocks(self, level):
5272 if level == locking.LEVEL_NODE:
5273 self._LockInstancesNodes()
5275 def CheckPrereq(self):
5276 """Check prerequisites.
5278 This checks that the instance is in the cluster.
5281 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5282 assert self.instance is not None, \
5283 "Cannot retrieve locked instance %s" % self.op.instance_name
5284 _CheckNodeOnline(self, self.instance.primary_node)
5286 def Exec(self, feedback_fn):
5287 """Activate the disks.
5290 disks_ok, disks_info = \
5291 _AssembleInstanceDisks(self, self.instance,
5292 ignore_size=self.op.ignore_size)
5294 raise errors.OpExecError("Cannot activate block devices")
5299 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5301 """Prepare the block devices for an instance.
5303 This sets up the block devices on all nodes.
5305 @type lu: L{LogicalUnit}
5306 @param lu: the logical unit on whose behalf we execute
5307 @type instance: L{objects.Instance}
5308 @param instance: the instance for whose disks we assemble
5309 @type disks: list of L{objects.Disk} or None
5310 @param disks: which disks to assemble (or all, if None)
5311 @type ignore_secondaries: boolean
5312 @param ignore_secondaries: if true, errors on secondary nodes
5313 won't result in an error return from the function
5314 @type ignore_size: boolean
5315 @param ignore_size: if true, the current known size of the disk
5316 will not be used during the disk activation, useful for cases
5317 when the size is wrong
5318 @return: False if the operation failed, otherwise a list of
5319 (host, instance_visible_name, node_visible_name)
5320 with the mapping from node devices to instance devices
5325 iname = instance.name
5326 disks = _ExpandCheckDisks(instance, disks)
5328 # With the two passes mechanism we try to reduce the window of
5329 # opportunity for the race condition of switching DRBD to primary
5330 # before handshaking occured, but we do not eliminate it
5332 # The proper fix would be to wait (with some limits) until the
5333 # connection has been made and drbd transitions from WFConnection
5334 # into any other network-connected state (Connected, SyncTarget,
5337 # 1st pass, assemble on all nodes in secondary mode
5338 for idx, inst_disk in enumerate(disks):
5339 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5341 node_disk = node_disk.Copy()
5342 node_disk.UnsetSize()
5343 lu.cfg.SetDiskID(node_disk, node)
5344 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5345 msg = result.fail_msg
5347 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5348 " (is_primary=False, pass=1): %s",
5349 inst_disk.iv_name, node, msg)
5350 if not ignore_secondaries:
5353 # FIXME: race condition on drbd migration to primary
5355 # 2nd pass, do only the primary node
5356 for idx, inst_disk in enumerate(disks):
5359 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5360 if node != instance.primary_node:
5363 node_disk = node_disk.Copy()
5364 node_disk.UnsetSize()
5365 lu.cfg.SetDiskID(node_disk, node)
5366 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5367 msg = result.fail_msg
5369 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5370 " (is_primary=True, pass=2): %s",
5371 inst_disk.iv_name, node, msg)
5374 dev_path = result.payload
5376 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5378 # leave the disks configured for the primary node
5379 # this is a workaround that would be fixed better by
5380 # improving the logical/physical id handling
5382 lu.cfg.SetDiskID(disk, instance.primary_node)
5384 return disks_ok, device_info
5387 def _StartInstanceDisks(lu, instance, force):
5388 """Start the disks of an instance.
5391 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5392 ignore_secondaries=force)
5394 _ShutdownInstanceDisks(lu, instance)
5395 if force is not None and not force:
5396 lu.proc.LogWarning("", hint="If the message above refers to a"
5398 " you can retry the operation using '--force'.")
5399 raise errors.OpExecError("Disk consistency error")
5402 class LUInstanceDeactivateDisks(NoHooksLU):
5403 """Shutdown an instance's disks.
5408 def ExpandNames(self):
5409 self._ExpandAndLockInstance()
5410 self.needed_locks[locking.LEVEL_NODE] = []
5411 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5413 def DeclareLocks(self, level):
5414 if level == locking.LEVEL_NODE:
5415 self._LockInstancesNodes()
5417 def CheckPrereq(self):
5418 """Check prerequisites.
5420 This checks that the instance is in the cluster.
5423 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5424 assert self.instance is not None, \
5425 "Cannot retrieve locked instance %s" % self.op.instance_name
5427 def Exec(self, feedback_fn):
5428 """Deactivate the disks
5431 instance = self.instance
5433 _ShutdownInstanceDisks(self, instance)
5435 _SafeShutdownInstanceDisks(self, instance)
5438 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5439 """Shutdown block devices of an instance.
5441 This function checks if an instance is running, before calling
5442 _ShutdownInstanceDisks.
5445 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5446 _ShutdownInstanceDisks(lu, instance, disks=disks)
5449 def _ExpandCheckDisks(instance, disks):
5450 """Return the instance disks selected by the disks list
5452 @type disks: list of L{objects.Disk} or None
5453 @param disks: selected disks
5454 @rtype: list of L{objects.Disk}
5455 @return: selected instance disks to act on
5459 return instance.disks
5461 if not set(disks).issubset(instance.disks):
5462 raise errors.ProgrammerError("Can only act on disks belonging to the"
5467 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5468 """Shutdown block devices of an instance.
5470 This does the shutdown on all nodes of the instance.
5472 If the ignore_primary is false, errors on the primary node are
5477 disks = _ExpandCheckDisks(instance, disks)
5480 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5481 lu.cfg.SetDiskID(top_disk, node)
5482 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5483 msg = result.fail_msg
5485 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5486 disk.iv_name, node, msg)
5487 if ((node == instance.primary_node and not ignore_primary) or
5488 (node != instance.primary_node and not result.offline)):
5493 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5494 """Checks if a node has enough free memory.
5496 This function check if a given node has the needed amount of free
5497 memory. In case the node has less memory or we cannot get the
5498 information from the node, this function raise an OpPrereqError
5501 @type lu: C{LogicalUnit}
5502 @param lu: a logical unit from which we get configuration data
5504 @param node: the node to check
5505 @type reason: C{str}
5506 @param reason: string to use in the error message
5507 @type requested: C{int}
5508 @param requested: the amount of memory in MiB to check for
5509 @type hypervisor_name: C{str}
5510 @param hypervisor_name: the hypervisor to ask for memory stats
5511 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5512 we cannot check the node
5515 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5516 nodeinfo[node].Raise("Can't get data from node %s" % node,
5517 prereq=True, ecode=errors.ECODE_ENVIRON)
5518 free_mem = nodeinfo[node].payload.get('memory_free', None)
5519 if not isinstance(free_mem, int):
5520 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5521 " was '%s'" % (node, free_mem),
5522 errors.ECODE_ENVIRON)
5523 if requested > free_mem:
5524 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5525 " needed %s MiB, available %s MiB" %
5526 (node, reason, requested, free_mem),
5530 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5531 """Checks if nodes have enough free disk space in the all VGs.
5533 This function check if all given nodes have the needed amount of
5534 free disk. In case any node has less disk or we cannot get the
5535 information from the node, this function raise an OpPrereqError
5538 @type lu: C{LogicalUnit}
5539 @param lu: a logical unit from which we get configuration data
5540 @type nodenames: C{list}
5541 @param nodenames: the list of node names to check
5542 @type req_sizes: C{dict}
5543 @param req_sizes: the hash of vg and corresponding amount of disk in
5545 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5546 or we cannot check the node
5549 for vg, req_size in req_sizes.items():
5550 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5553 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5554 """Checks if nodes have enough free disk space in the specified VG.
5556 This function check if all given nodes have the needed amount of
5557 free disk. In case any node has less disk or we cannot get the
5558 information from the node, this function raise an OpPrereqError
5561 @type lu: C{LogicalUnit}
5562 @param lu: a logical unit from which we get configuration data
5563 @type nodenames: C{list}
5564 @param nodenames: the list of node names to check
5566 @param vg: the volume group to check
5567 @type requested: C{int}
5568 @param requested: the amount of disk in MiB to check for
5569 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5570 or we cannot check the node
5573 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5574 for node in nodenames:
5575 info = nodeinfo[node]
5576 info.Raise("Cannot get current information from node %s" % node,
5577 prereq=True, ecode=errors.ECODE_ENVIRON)
5578 vg_free = info.payload.get("vg_free", None)
5579 if not isinstance(vg_free, int):
5580 raise errors.OpPrereqError("Can't compute free disk space on node"
5581 " %s for vg %s, result was '%s'" %
5582 (node, vg, vg_free), errors.ECODE_ENVIRON)
5583 if requested > vg_free:
5584 raise errors.OpPrereqError("Not enough disk space on target node %s"
5585 " vg %s: required %d MiB, available %d MiB" %
5586 (node, vg, requested, vg_free),
5590 class LUInstanceStartup(LogicalUnit):
5591 """Starts an instance.
5594 HPATH = "instance-start"
5595 HTYPE = constants.HTYPE_INSTANCE
5598 def CheckArguments(self):
5600 if self.op.beparams:
5601 # fill the beparams dict
5602 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5604 def ExpandNames(self):
5605 self._ExpandAndLockInstance()
5607 def BuildHooksEnv(self):
5610 This runs on master, primary and secondary nodes of the instance.
5614 "FORCE": self.op.force,
5617 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5621 def BuildHooksNodes(self):
5622 """Build hooks nodes.
5625 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5628 def CheckPrereq(self):
5629 """Check prerequisites.
5631 This checks that the instance is in the cluster.
5634 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5635 assert self.instance is not None, \
5636 "Cannot retrieve locked instance %s" % self.op.instance_name
5639 if self.op.hvparams:
5640 # check hypervisor parameter syntax (locally)
5641 cluster = self.cfg.GetClusterInfo()
5642 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5643 filled_hvp = cluster.FillHV(instance)
5644 filled_hvp.update(self.op.hvparams)
5645 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5646 hv_type.CheckParameterSyntax(filled_hvp)
5647 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5649 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5651 if self.primary_offline and self.op.ignore_offline_nodes:
5652 self.proc.LogWarning("Ignoring offline primary node")
5654 if self.op.hvparams or self.op.beparams:
5655 self.proc.LogWarning("Overridden parameters are ignored")
5657 _CheckNodeOnline(self, instance.primary_node)
5659 bep = self.cfg.GetClusterInfo().FillBE(instance)
5661 # check bridges existence
5662 _CheckInstanceBridgesExist(self, instance)
5664 remote_info = self.rpc.call_instance_info(instance.primary_node,
5666 instance.hypervisor)
5667 remote_info.Raise("Error checking node %s" % instance.primary_node,
5668 prereq=True, ecode=errors.ECODE_ENVIRON)
5669 if not remote_info.payload: # not running already
5670 _CheckNodeFreeMemory(self, instance.primary_node,
5671 "starting instance %s" % instance.name,
5672 bep[constants.BE_MEMORY], instance.hypervisor)
5674 def Exec(self, feedback_fn):
5675 """Start the instance.
5678 instance = self.instance
5679 force = self.op.force
5681 if not self.op.no_remember:
5682 self.cfg.MarkInstanceUp(instance.name)
5684 if self.primary_offline:
5685 assert self.op.ignore_offline_nodes
5686 self.proc.LogInfo("Primary node offline, marked instance as started")
5688 node_current = instance.primary_node
5690 _StartInstanceDisks(self, instance, force)
5692 result = self.rpc.call_instance_start(node_current, instance,
5693 self.op.hvparams, self.op.beparams)
5694 msg = result.fail_msg
5696 _ShutdownInstanceDisks(self, instance)
5697 raise errors.OpExecError("Could not start instance: %s" % msg)
5700 class LUInstanceReboot(LogicalUnit):
5701 """Reboot an instance.
5704 HPATH = "instance-reboot"
5705 HTYPE = constants.HTYPE_INSTANCE
5708 def ExpandNames(self):
5709 self._ExpandAndLockInstance()
5711 def BuildHooksEnv(self):
5714 This runs on master, primary and secondary nodes of the instance.
5718 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5719 "REBOOT_TYPE": self.op.reboot_type,
5720 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5723 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5727 def BuildHooksNodes(self):
5728 """Build hooks nodes.
5731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5734 def CheckPrereq(self):
5735 """Check prerequisites.
5737 This checks that the instance is in the cluster.
5740 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5741 assert self.instance is not None, \
5742 "Cannot retrieve locked instance %s" % self.op.instance_name
5744 _CheckNodeOnline(self, instance.primary_node)
5746 # check bridges existence
5747 _CheckInstanceBridgesExist(self, instance)
5749 def Exec(self, feedback_fn):
5750 """Reboot the instance.
5753 instance = self.instance
5754 ignore_secondaries = self.op.ignore_secondaries
5755 reboot_type = self.op.reboot_type
5757 remote_info = self.rpc.call_instance_info(instance.primary_node,
5759 instance.hypervisor)
5760 remote_info.Raise("Error checking node %s" % instance.primary_node)
5761 instance_running = bool(remote_info.payload)
5763 node_current = instance.primary_node
5765 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5766 constants.INSTANCE_REBOOT_HARD]:
5767 for disk in instance.disks:
5768 self.cfg.SetDiskID(disk, node_current)
5769 result = self.rpc.call_instance_reboot(node_current, instance,
5771 self.op.shutdown_timeout)
5772 result.Raise("Could not reboot instance")
5774 if instance_running:
5775 result = self.rpc.call_instance_shutdown(node_current, instance,
5776 self.op.shutdown_timeout)
5777 result.Raise("Could not shutdown instance for full reboot")
5778 _ShutdownInstanceDisks(self, instance)
5780 self.LogInfo("Instance %s was already stopped, starting now",
5782 _StartInstanceDisks(self, instance, ignore_secondaries)
5783 result = self.rpc.call_instance_start(node_current, instance, None, None)
5784 msg = result.fail_msg
5786 _ShutdownInstanceDisks(self, instance)
5787 raise errors.OpExecError("Could not start instance for"
5788 " full reboot: %s" % msg)
5790 self.cfg.MarkInstanceUp(instance.name)
5793 class LUInstanceShutdown(LogicalUnit):
5794 """Shutdown an instance.
5797 HPATH = "instance-stop"
5798 HTYPE = constants.HTYPE_INSTANCE
5801 def ExpandNames(self):
5802 self._ExpandAndLockInstance()
5804 def BuildHooksEnv(self):
5807 This runs on master, primary and secondary nodes of the instance.
5810 env = _BuildInstanceHookEnvByObject(self, self.instance)
5811 env["TIMEOUT"] = self.op.timeout
5814 def BuildHooksNodes(self):
5815 """Build hooks nodes.
5818 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5821 def CheckPrereq(self):
5822 """Check prerequisites.
5824 This checks that the instance is in the cluster.
5827 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5828 assert self.instance is not None, \
5829 "Cannot retrieve locked instance %s" % self.op.instance_name
5831 self.primary_offline = \
5832 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5834 if self.primary_offline and self.op.ignore_offline_nodes:
5835 self.proc.LogWarning("Ignoring offline primary node")
5837 _CheckNodeOnline(self, self.instance.primary_node)
5839 def Exec(self, feedback_fn):
5840 """Shutdown the instance.
5843 instance = self.instance
5844 node_current = instance.primary_node
5845 timeout = self.op.timeout
5847 if not self.op.no_remember:
5848 self.cfg.MarkInstanceDown(instance.name)
5850 if self.primary_offline:
5851 assert self.op.ignore_offline_nodes
5852 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5854 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5855 msg = result.fail_msg
5857 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5859 _ShutdownInstanceDisks(self, instance)
5862 class LUInstanceReinstall(LogicalUnit):
5863 """Reinstall an instance.
5866 HPATH = "instance-reinstall"
5867 HTYPE = constants.HTYPE_INSTANCE
5870 def ExpandNames(self):
5871 self._ExpandAndLockInstance()
5873 def BuildHooksEnv(self):
5876 This runs on master, primary and secondary nodes of the instance.
5879 return _BuildInstanceHookEnvByObject(self, self.instance)
5881 def BuildHooksNodes(self):
5882 """Build hooks nodes.
5885 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5888 def CheckPrereq(self):
5889 """Check prerequisites.
5891 This checks that the instance is in the cluster and is not running.
5894 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5895 assert instance is not None, \
5896 "Cannot retrieve locked instance %s" % self.op.instance_name
5897 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5898 " offline, cannot reinstall")
5899 for node in instance.secondary_nodes:
5900 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5901 " cannot reinstall")
5903 if instance.disk_template == constants.DT_DISKLESS:
5904 raise errors.OpPrereqError("Instance '%s' has no disks" %
5905 self.op.instance_name,
5907 _CheckInstanceDown(self, instance, "cannot reinstall")
5909 if self.op.os_type is not None:
5911 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5912 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5913 instance_os = self.op.os_type
5915 instance_os = instance.os
5917 nodelist = list(instance.all_nodes)
5919 if self.op.osparams:
5920 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5921 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5922 self.os_inst = i_osdict # the new dict (without defaults)
5926 self.instance = instance
5928 def Exec(self, feedback_fn):
5929 """Reinstall the instance.
5932 inst = self.instance
5934 if self.op.os_type is not None:
5935 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5936 inst.os = self.op.os_type
5937 # Write to configuration
5938 self.cfg.Update(inst, feedback_fn)
5940 _StartInstanceDisks(self, inst, None)
5942 feedback_fn("Running the instance OS create scripts...")
5943 # FIXME: pass debug option from opcode to backend
5944 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5945 self.op.debug_level,
5946 osparams=self.os_inst)
5947 result.Raise("Could not install OS for instance %s on node %s" %
5948 (inst.name, inst.primary_node))
5950 _ShutdownInstanceDisks(self, inst)
5953 class LUInstanceRecreateDisks(LogicalUnit):
5954 """Recreate an instance's missing disks.
5957 HPATH = "instance-recreate-disks"
5958 HTYPE = constants.HTYPE_INSTANCE
5961 def CheckArguments(self):
5962 # normalise the disk list
5963 self.op.disks = sorted(frozenset(self.op.disks))
5965 def ExpandNames(self):
5966 self._ExpandAndLockInstance()
5967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5969 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5970 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5972 self.needed_locks[locking.LEVEL_NODE] = []
5974 def DeclareLocks(self, level):
5975 if level == locking.LEVEL_NODE:
5976 # if we replace the nodes, we only need to lock the old primary,
5977 # otherwise we need to lock all nodes for disk re-creation
5978 primary_only = bool(self.op.nodes)
5979 self._LockInstancesNodes(primary_only=primary_only)
5981 def BuildHooksEnv(self):
5984 This runs on master, primary and secondary nodes of the instance.
5987 return _BuildInstanceHookEnvByObject(self, self.instance)
5989 def BuildHooksNodes(self):
5990 """Build hooks nodes.
5993 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5996 def CheckPrereq(self):
5997 """Check prerequisites.
5999 This checks that the instance is in the cluster and is not running.
6002 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6003 assert instance is not None, \
6004 "Cannot retrieve locked instance %s" % self.op.instance_name
6006 if len(self.op.nodes) != len(instance.all_nodes):
6007 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6008 " %d replacement nodes were specified" %
6009 (instance.name, len(instance.all_nodes),
6010 len(self.op.nodes)),
6012 assert instance.disk_template != constants.DT_DRBD8 or \
6013 len(self.op.nodes) == 2
6014 assert instance.disk_template != constants.DT_PLAIN or \
6015 len(self.op.nodes) == 1
6016 primary_node = self.op.nodes[0]
6018 primary_node = instance.primary_node
6019 _CheckNodeOnline(self, primary_node)
6021 if instance.disk_template == constants.DT_DISKLESS:
6022 raise errors.OpPrereqError("Instance '%s' has no disks" %
6023 self.op.instance_name, errors.ECODE_INVAL)
6024 # if we replace nodes *and* the old primary is offline, we don't
6026 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6027 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6028 if not (self.op.nodes and old_pnode.offline):
6029 _CheckInstanceDown(self, instance, "cannot recreate disks")
6031 if not self.op.disks:
6032 self.op.disks = range(len(instance.disks))
6034 for idx in self.op.disks:
6035 if idx >= len(instance.disks):
6036 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6038 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6039 raise errors.OpPrereqError("Can't recreate disks partially and"
6040 " change the nodes at the same time",
6042 self.instance = instance
6044 def Exec(self, feedback_fn):
6045 """Recreate the disks.
6048 # change primary node, if needed
6050 self.instance.primary_node = self.op.nodes[0]
6051 self.LogWarning("Changing the instance's nodes, you will have to"
6052 " remove any disks left on the older nodes manually")
6055 for idx, disk in enumerate(self.instance.disks):
6056 if idx not in self.op.disks: # disk idx has not been passed in
6059 # update secondaries for disks, if needed
6061 if disk.dev_type == constants.LD_DRBD8:
6062 # need to update the nodes
6063 assert len(self.op.nodes) == 2
6064 logical_id = list(disk.logical_id)
6065 logical_id[0] = self.op.nodes[0]
6066 logical_id[1] = self.op.nodes[1]
6067 disk.logical_id = tuple(logical_id)
6070 self.cfg.Update(self.instance, feedback_fn)
6072 _CreateDisks(self, self.instance, to_skip=to_skip)
6075 class LUInstanceRename(LogicalUnit):
6076 """Rename an instance.
6079 HPATH = "instance-rename"
6080 HTYPE = constants.HTYPE_INSTANCE
6082 def CheckArguments(self):
6086 if self.op.ip_check and not self.op.name_check:
6087 # TODO: make the ip check more flexible and not depend on the name check
6088 raise errors.OpPrereqError("IP address check requires a name check",
6091 def BuildHooksEnv(self):
6094 This runs on master, primary and secondary nodes of the instance.
6097 env = _BuildInstanceHookEnvByObject(self, self.instance)
6098 env["INSTANCE_NEW_NAME"] = self.op.new_name
6101 def BuildHooksNodes(self):
6102 """Build hooks nodes.
6105 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6108 def CheckPrereq(self):
6109 """Check prerequisites.
6111 This checks that the instance is in the cluster and is not running.
6114 self.op.instance_name = _ExpandInstanceName(self.cfg,
6115 self.op.instance_name)
6116 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6117 assert instance is not None
6118 _CheckNodeOnline(self, instance.primary_node)
6119 _CheckInstanceDown(self, instance, "cannot rename")
6120 self.instance = instance
6122 new_name = self.op.new_name
6123 if self.op.name_check:
6124 hostname = netutils.GetHostname(name=new_name)
6125 if hostname != new_name:
6126 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6128 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6129 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6130 " same as given hostname '%s'") %
6131 (hostname.name, self.op.new_name),
6133 new_name = self.op.new_name = hostname.name
6134 if (self.op.ip_check and
6135 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6136 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6137 (hostname.ip, new_name),
6138 errors.ECODE_NOTUNIQUE)
6140 instance_list = self.cfg.GetInstanceList()
6141 if new_name in instance_list and new_name != instance.name:
6142 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6143 new_name, errors.ECODE_EXISTS)
6145 def Exec(self, feedback_fn):
6146 """Rename the instance.
6149 inst = self.instance
6150 old_name = inst.name
6152 rename_file_storage = False
6153 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
6154 self.op.new_name != inst.name):
6155 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6156 rename_file_storage = True
6158 self.cfg.RenameInstance(inst.name, self.op.new_name)
6159 # Change the instance lock. This is definitely safe while we hold the BGL.
6160 # Otherwise the new lock would have to be added in acquired mode.
6162 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6163 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6165 # re-read the instance from the configuration after rename
6166 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6168 if rename_file_storage:
6169 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6170 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6171 old_file_storage_dir,
6172 new_file_storage_dir)
6173 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6174 " (but the instance has been renamed in Ganeti)" %
6175 (inst.primary_node, old_file_storage_dir,
6176 new_file_storage_dir))
6178 _StartInstanceDisks(self, inst, None)
6180 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6181 old_name, self.op.debug_level)
6182 msg = result.fail_msg
6184 msg = ("Could not run OS rename script for instance %s on node %s"
6185 " (but the instance has been renamed in Ganeti): %s" %
6186 (inst.name, inst.primary_node, msg))
6187 self.proc.LogWarning(msg)
6189 _ShutdownInstanceDisks(self, inst)
6194 class LUInstanceRemove(LogicalUnit):
6195 """Remove an instance.
6198 HPATH = "instance-remove"
6199 HTYPE = constants.HTYPE_INSTANCE
6202 def ExpandNames(self):
6203 self._ExpandAndLockInstance()
6204 self.needed_locks[locking.LEVEL_NODE] = []
6205 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6207 def DeclareLocks(self, level):
6208 if level == locking.LEVEL_NODE:
6209 self._LockInstancesNodes()
6211 def BuildHooksEnv(self):
6214 This runs on master, primary and secondary nodes of the instance.
6217 env = _BuildInstanceHookEnvByObject(self, self.instance)
6218 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6221 def BuildHooksNodes(self):
6222 """Build hooks nodes.
6225 nl = [self.cfg.GetMasterNode()]
6226 nl_post = list(self.instance.all_nodes) + nl
6227 return (nl, nl_post)
6229 def CheckPrereq(self):
6230 """Check prerequisites.
6232 This checks that the instance is in the cluster.
6235 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6236 assert self.instance is not None, \
6237 "Cannot retrieve locked instance %s" % self.op.instance_name
6239 def Exec(self, feedback_fn):
6240 """Remove the instance.
6243 instance = self.instance
6244 logging.info("Shutting down instance %s on node %s",
6245 instance.name, instance.primary_node)
6247 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6248 self.op.shutdown_timeout)
6249 msg = result.fail_msg
6251 if self.op.ignore_failures:
6252 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6254 raise errors.OpExecError("Could not shutdown instance %s on"
6256 (instance.name, instance.primary_node, msg))
6258 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6261 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6262 """Utility function to remove an instance.
6265 logging.info("Removing block devices for instance %s", instance.name)
6267 if not _RemoveDisks(lu, instance):
6268 if not ignore_failures:
6269 raise errors.OpExecError("Can't remove instance's disks")
6270 feedback_fn("Warning: can't remove instance's disks")
6272 logging.info("Removing instance %s out of cluster config", instance.name)
6274 lu.cfg.RemoveInstance(instance.name)
6276 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6277 "Instance lock removal conflict"
6279 # Remove lock for the instance
6280 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6283 class LUInstanceQuery(NoHooksLU):
6284 """Logical unit for querying instances.
6287 # pylint: disable-msg=W0142
6290 def CheckArguments(self):
6291 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6292 self.op.output_fields, self.op.use_locking)
6294 def ExpandNames(self):
6295 self.iq.ExpandNames(self)
6297 def DeclareLocks(self, level):
6298 self.iq.DeclareLocks(self, level)
6300 def Exec(self, feedback_fn):
6301 return self.iq.OldStyleQuery(self)
6304 class LUInstanceFailover(LogicalUnit):
6305 """Failover an instance.
6308 HPATH = "instance-failover"
6309 HTYPE = constants.HTYPE_INSTANCE
6312 def CheckArguments(self):
6313 """Check the arguments.
6316 self.iallocator = getattr(self.op, "iallocator", None)
6317 self.target_node = getattr(self.op, "target_node", None)
6319 def ExpandNames(self):
6320 self._ExpandAndLockInstance()
6322 if self.op.target_node is not None:
6323 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6325 self.needed_locks[locking.LEVEL_NODE] = []
6326 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6328 ignore_consistency = self.op.ignore_consistency
6329 shutdown_timeout = self.op.shutdown_timeout
6330 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6333 ignore_consistency=ignore_consistency,
6334 shutdown_timeout=shutdown_timeout)
6335 self.tasklets = [self._migrater]
6337 def DeclareLocks(self, level):
6338 if level == locking.LEVEL_NODE:
6339 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6340 if instance.disk_template in constants.DTS_EXT_MIRROR:
6341 if self.op.target_node is None:
6342 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6344 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6345 self.op.target_node]
6346 del self.recalculate_locks[locking.LEVEL_NODE]
6348 self._LockInstancesNodes()
6350 def BuildHooksEnv(self):
6353 This runs on master, primary and secondary nodes of the instance.
6356 instance = self._migrater.instance
6357 source_node = instance.primary_node
6358 target_node = self.op.target_node
6360 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6361 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6362 "OLD_PRIMARY": source_node,
6363 "NEW_PRIMARY": target_node,
6366 if instance.disk_template in constants.DTS_INT_MIRROR:
6367 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6368 env["NEW_SECONDARY"] = source_node
6370 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6372 env.update(_BuildInstanceHookEnvByObject(self, instance))
6376 def BuildHooksNodes(self):
6377 """Build hooks nodes.
6380 instance = self._migrater.instance
6381 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6382 return (nl, nl + [instance.primary_node])
6385 class LUInstanceMigrate(LogicalUnit):
6386 """Migrate an instance.
6388 This is migration without shutting down, compared to the failover,
6389 which is done with shutdown.
6392 HPATH = "instance-migrate"
6393 HTYPE = constants.HTYPE_INSTANCE
6396 def ExpandNames(self):
6397 self._ExpandAndLockInstance()
6399 if self.op.target_node is not None:
6400 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6402 self.needed_locks[locking.LEVEL_NODE] = []
6403 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6405 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6406 cleanup=self.op.cleanup,
6408 fallback=self.op.allow_failover)
6409 self.tasklets = [self._migrater]
6411 def DeclareLocks(self, level):
6412 if level == locking.LEVEL_NODE:
6413 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6414 if instance.disk_template in constants.DTS_EXT_MIRROR:
6415 if self.op.target_node is None:
6416 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6418 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6419 self.op.target_node]
6420 del self.recalculate_locks[locking.LEVEL_NODE]
6422 self._LockInstancesNodes()
6424 def BuildHooksEnv(self):
6427 This runs on master, primary and secondary nodes of the instance.
6430 instance = self._migrater.instance
6431 source_node = instance.primary_node
6432 target_node = self.op.target_node
6433 env = _BuildInstanceHookEnvByObject(self, instance)
6435 "MIGRATE_LIVE": self._migrater.live,
6436 "MIGRATE_CLEANUP": self.op.cleanup,
6437 "OLD_PRIMARY": source_node,
6438 "NEW_PRIMARY": target_node,
6441 if instance.disk_template in constants.DTS_INT_MIRROR:
6442 env["OLD_SECONDARY"] = target_node
6443 env["NEW_SECONDARY"] = source_node
6445 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6449 def BuildHooksNodes(self):
6450 """Build hooks nodes.
6453 instance = self._migrater.instance
6454 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6455 return (nl, nl + [instance.primary_node])
6458 class LUInstanceMove(LogicalUnit):
6459 """Move an instance by data-copying.
6462 HPATH = "instance-move"
6463 HTYPE = constants.HTYPE_INSTANCE
6466 def ExpandNames(self):
6467 self._ExpandAndLockInstance()
6468 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6469 self.op.target_node = target_node
6470 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6471 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6473 def DeclareLocks(self, level):
6474 if level == locking.LEVEL_NODE:
6475 self._LockInstancesNodes(primary_only=True)
6477 def BuildHooksEnv(self):
6480 This runs on master, primary and secondary nodes of the instance.
6484 "TARGET_NODE": self.op.target_node,
6485 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6487 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6490 def BuildHooksNodes(self):
6491 """Build hooks nodes.
6495 self.cfg.GetMasterNode(),
6496 self.instance.primary_node,
6497 self.op.target_node,
6501 def CheckPrereq(self):
6502 """Check prerequisites.
6504 This checks that the instance is in the cluster.
6507 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6508 assert self.instance is not None, \
6509 "Cannot retrieve locked instance %s" % self.op.instance_name
6511 node = self.cfg.GetNodeInfo(self.op.target_node)
6512 assert node is not None, \
6513 "Cannot retrieve locked node %s" % self.op.target_node
6515 self.target_node = target_node = node.name
6517 if target_node == instance.primary_node:
6518 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6519 (instance.name, target_node),
6522 bep = self.cfg.GetClusterInfo().FillBE(instance)
6524 for idx, dsk in enumerate(instance.disks):
6525 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6526 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6527 " cannot copy" % idx, errors.ECODE_STATE)
6529 _CheckNodeOnline(self, target_node)
6530 _CheckNodeNotDrained(self, target_node)
6531 _CheckNodeVmCapable(self, target_node)
6533 if instance.admin_up:
6534 # check memory requirements on the secondary node
6535 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6536 instance.name, bep[constants.BE_MEMORY],
6537 instance.hypervisor)
6539 self.LogInfo("Not checking memory on the secondary node as"
6540 " instance will not be started")
6542 # check bridge existance
6543 _CheckInstanceBridgesExist(self, instance, node=target_node)
6545 def Exec(self, feedback_fn):
6546 """Move an instance.
6548 The move is done by shutting it down on its present node, copying
6549 the data over (slow) and starting it on the new node.
6552 instance = self.instance
6554 source_node = instance.primary_node
6555 target_node = self.target_node
6557 self.LogInfo("Shutting down instance %s on source node %s",
6558 instance.name, source_node)
6560 result = self.rpc.call_instance_shutdown(source_node, instance,
6561 self.op.shutdown_timeout)
6562 msg = result.fail_msg
6564 if self.op.ignore_consistency:
6565 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6566 " Proceeding anyway. Please make sure node"
6567 " %s is down. Error details: %s",
6568 instance.name, source_node, source_node, msg)
6570 raise errors.OpExecError("Could not shutdown instance %s on"
6572 (instance.name, source_node, msg))
6574 # create the target disks
6576 _CreateDisks(self, instance, target_node=target_node)
6577 except errors.OpExecError:
6578 self.LogWarning("Device creation failed, reverting...")
6580 _RemoveDisks(self, instance, target_node=target_node)
6582 self.cfg.ReleaseDRBDMinors(instance.name)
6585 cluster_name = self.cfg.GetClusterInfo().cluster_name
6588 # activate, get path, copy the data over
6589 for idx, disk in enumerate(instance.disks):
6590 self.LogInfo("Copying data for disk %d", idx)
6591 result = self.rpc.call_blockdev_assemble(target_node, disk,
6592 instance.name, True, idx)
6594 self.LogWarning("Can't assemble newly created disk %d: %s",
6595 idx, result.fail_msg)
6596 errs.append(result.fail_msg)
6598 dev_path = result.payload
6599 result = self.rpc.call_blockdev_export(source_node, disk,
6600 target_node, dev_path,
6603 self.LogWarning("Can't copy data over for disk %d: %s",
6604 idx, result.fail_msg)
6605 errs.append(result.fail_msg)
6609 self.LogWarning("Some disks failed to copy, aborting")
6611 _RemoveDisks(self, instance, target_node=target_node)
6613 self.cfg.ReleaseDRBDMinors(instance.name)
6614 raise errors.OpExecError("Errors during disk copy: %s" %
6617 instance.primary_node = target_node
6618 self.cfg.Update(instance, feedback_fn)
6620 self.LogInfo("Removing the disks on the original node")
6621 _RemoveDisks(self, instance, target_node=source_node)
6623 # Only start the instance if it's marked as up
6624 if instance.admin_up:
6625 self.LogInfo("Starting instance %s on node %s",
6626 instance.name, target_node)
6628 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6629 ignore_secondaries=True)
6631 _ShutdownInstanceDisks(self, instance)
6632 raise errors.OpExecError("Can't activate the instance's disks")
6634 result = self.rpc.call_instance_start(target_node, instance, None, None)
6635 msg = result.fail_msg
6637 _ShutdownInstanceDisks(self, instance)
6638 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6639 (instance.name, target_node, msg))
6642 class LUNodeMigrate(LogicalUnit):
6643 """Migrate all instances from a node.
6646 HPATH = "node-migrate"
6647 HTYPE = constants.HTYPE_NODE
6650 def CheckArguments(self):
6653 def ExpandNames(self):
6654 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6656 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6657 self.needed_locks = {
6658 locking.LEVEL_NODE: [self.op.node_name],
6661 def BuildHooksEnv(self):
6664 This runs on the master, the primary and all the secondaries.
6668 "NODE_NAME": self.op.node_name,
6671 def BuildHooksNodes(self):
6672 """Build hooks nodes.
6675 nl = [self.cfg.GetMasterNode()]
6678 def CheckPrereq(self):
6681 def Exec(self, feedback_fn):
6682 # Prepare jobs for migration instances
6684 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6687 iallocator=self.op.iallocator,
6688 target_node=self.op.target_node)]
6689 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6692 # TODO: Run iallocator in this opcode and pass correct placement options to
6693 # OpInstanceMigrate. Since other jobs can modify the cluster between
6694 # running the iallocator and the actual migration, a good consistency model
6695 # will have to be found.
6697 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6698 frozenset([self.op.node_name]))
6700 return ResultWithJobs(jobs)
6703 class TLMigrateInstance(Tasklet):
6704 """Tasklet class for instance migration.
6707 @ivar live: whether the migration will be done live or non-live;
6708 this variable is initalized only after CheckPrereq has run
6709 @type cleanup: boolean
6710 @ivar cleanup: Wheater we cleanup from a failed migration
6711 @type iallocator: string
6712 @ivar iallocator: The iallocator used to determine target_node
6713 @type target_node: string
6714 @ivar target_node: If given, the target_node to reallocate the instance to
6715 @type failover: boolean
6716 @ivar failover: Whether operation results in failover or migration
6717 @type fallback: boolean
6718 @ivar fallback: Whether fallback to failover is allowed if migration not
6720 @type ignore_consistency: boolean
6721 @ivar ignore_consistency: Wheter we should ignore consistency between source
6723 @type shutdown_timeout: int
6724 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6727 def __init__(self, lu, instance_name, cleanup=False,
6728 failover=False, fallback=False,
6729 ignore_consistency=False,
6730 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6731 """Initializes this class.
6734 Tasklet.__init__(self, lu)
6737 self.instance_name = instance_name
6738 self.cleanup = cleanup
6739 self.live = False # will be overridden later
6740 self.failover = failover
6741 self.fallback = fallback
6742 self.ignore_consistency = ignore_consistency
6743 self.shutdown_timeout = shutdown_timeout
6745 def CheckPrereq(self):
6746 """Check prerequisites.
6748 This checks that the instance is in the cluster.
6751 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6752 instance = self.cfg.GetInstanceInfo(instance_name)
6753 assert instance is not None
6754 self.instance = instance
6756 if (not self.cleanup and not instance.admin_up and not self.failover and
6758 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6760 self.failover = True
6762 if instance.disk_template not in constants.DTS_MIRRORED:
6767 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6768 " %s" % (instance.disk_template, text),
6771 if instance.disk_template in constants.DTS_EXT_MIRROR:
6772 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6774 if self.lu.op.iallocator:
6775 self._RunAllocator()
6777 # We set set self.target_node as it is required by
6779 self.target_node = self.lu.op.target_node
6781 # self.target_node is already populated, either directly or by the
6783 target_node = self.target_node
6784 if self.target_node == instance.primary_node:
6785 raise errors.OpPrereqError("Cannot migrate instance %s"
6786 " to its primary (%s)" %
6787 (instance.name, instance.primary_node))
6789 if len(self.lu.tasklets) == 1:
6790 # It is safe to release locks only when we're the only tasklet
6792 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6793 keep=[instance.primary_node, self.target_node])
6796 secondary_nodes = instance.secondary_nodes
6797 if not secondary_nodes:
6798 raise errors.ConfigurationError("No secondary node but using"
6799 " %s disk template" %
6800 instance.disk_template)
6801 target_node = secondary_nodes[0]
6802 if self.lu.op.iallocator or (self.lu.op.target_node and
6803 self.lu.op.target_node != target_node):
6805 text = "failed over"
6808 raise errors.OpPrereqError("Instances with disk template %s cannot"
6809 " be %s to arbitrary nodes"
6810 " (neither an iallocator nor a target"
6811 " node can be passed)" %
6812 (instance.disk_template, text),
6815 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6817 # check memory requirements on the secondary node
6818 if not self.failover or instance.admin_up:
6819 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6820 instance.name, i_be[constants.BE_MEMORY],
6821 instance.hypervisor)
6823 self.lu.LogInfo("Not checking memory on the secondary node as"
6824 " instance will not be started")
6826 # check bridge existance
6827 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6829 if not self.cleanup:
6830 _CheckNodeNotDrained(self.lu, target_node)
6831 if not self.failover:
6832 result = self.rpc.call_instance_migratable(instance.primary_node,
6834 if result.fail_msg and self.fallback:
6835 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6837 self.failover = True
6839 result.Raise("Can't migrate, please use failover",
6840 prereq=True, ecode=errors.ECODE_STATE)
6842 assert not (self.failover and self.cleanup)
6844 if not self.failover:
6845 if self.lu.op.live is not None and self.lu.op.mode is not None:
6846 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6847 " parameters are accepted",
6849 if self.lu.op.live is not None:
6851 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6853 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6854 # reset the 'live' parameter to None so that repeated
6855 # invocations of CheckPrereq do not raise an exception
6856 self.lu.op.live = None
6857 elif self.lu.op.mode is None:
6858 # read the default value from the hypervisor
6859 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6861 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6863 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6865 # Failover is never live
6868 def _RunAllocator(self):
6869 """Run the allocator based on input opcode.
6872 ial = IAllocator(self.cfg, self.rpc,
6873 mode=constants.IALLOCATOR_MODE_RELOC,
6874 name=self.instance_name,
6875 # TODO See why hail breaks with a single node below
6876 relocate_from=[self.instance.primary_node,
6877 self.instance.primary_node],
6880 ial.Run(self.lu.op.iallocator)
6883 raise errors.OpPrereqError("Can't compute nodes using"
6884 " iallocator '%s': %s" %
6885 (self.lu.op.iallocator, ial.info),
6887 if len(ial.result) != ial.required_nodes:
6888 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6889 " of nodes (%s), required %s" %
6890 (self.lu.op.iallocator, len(ial.result),
6891 ial.required_nodes), errors.ECODE_FAULT)
6892 self.target_node = ial.result[0]
6893 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6894 self.instance_name, self.lu.op.iallocator,
6895 utils.CommaJoin(ial.result))
6897 def _WaitUntilSync(self):
6898 """Poll with custom rpc for disk sync.
6900 This uses our own step-based rpc call.
6903 self.feedback_fn("* wait until resync is done")
6907 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6909 self.instance.disks)
6911 for node, nres in result.items():
6912 nres.Raise("Cannot resync disks on node %s" % node)
6913 node_done, node_percent = nres.payload
6914 all_done = all_done and node_done
6915 if node_percent is not None:
6916 min_percent = min(min_percent, node_percent)
6918 if min_percent < 100:
6919 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6922 def _EnsureSecondary(self, node):
6923 """Demote a node to secondary.
6926 self.feedback_fn("* switching node %s to secondary mode" % node)
6928 for dev in self.instance.disks:
6929 self.cfg.SetDiskID(dev, node)
6931 result = self.rpc.call_blockdev_close(node, self.instance.name,
6932 self.instance.disks)
6933 result.Raise("Cannot change disk to secondary on node %s" % node)
6935 def _GoStandalone(self):
6936 """Disconnect from the network.
6939 self.feedback_fn("* changing into standalone mode")
6940 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6941 self.instance.disks)
6942 for node, nres in result.items():
6943 nres.Raise("Cannot disconnect disks node %s" % node)
6945 def _GoReconnect(self, multimaster):
6946 """Reconnect to the network.
6952 msg = "single-master"
6953 self.feedback_fn("* changing disks into %s mode" % msg)
6954 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6955 self.instance.disks,
6956 self.instance.name, multimaster)
6957 for node, nres in result.items():
6958 nres.Raise("Cannot change disks config on node %s" % node)
6960 def _ExecCleanup(self):
6961 """Try to cleanup after a failed migration.
6963 The cleanup is done by:
6964 - check that the instance is running only on one node
6965 (and update the config if needed)
6966 - change disks on its secondary node to secondary
6967 - wait until disks are fully synchronized
6968 - disconnect from the network
6969 - change disks into single-master mode
6970 - wait again until disks are fully synchronized
6973 instance = self.instance
6974 target_node = self.target_node
6975 source_node = self.source_node
6977 # check running on only one node
6978 self.feedback_fn("* checking where the instance actually runs"
6979 " (if this hangs, the hypervisor might be in"
6981 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6982 for node, result in ins_l.items():
6983 result.Raise("Can't contact node %s" % node)
6985 runningon_source = instance.name in ins_l[source_node].payload
6986 runningon_target = instance.name in ins_l[target_node].payload
6988 if runningon_source and runningon_target:
6989 raise errors.OpExecError("Instance seems to be running on two nodes,"
6990 " or the hypervisor is confused; you will have"
6991 " to ensure manually that it runs only on one"
6992 " and restart this operation")
6994 if not (runningon_source or runningon_target):
6995 raise errors.OpExecError("Instance does not seem to be running at all;"
6996 " in this case it's safer to repair by"
6997 " running 'gnt-instance stop' to ensure disk"
6998 " shutdown, and then restarting it")
7000 if runningon_target:
7001 # the migration has actually succeeded, we need to update the config
7002 self.feedback_fn("* instance running on secondary node (%s),"
7003 " updating config" % target_node)
7004 instance.primary_node = target_node
7005 self.cfg.Update(instance, self.feedback_fn)
7006 demoted_node = source_node
7008 self.feedback_fn("* instance confirmed to be running on its"
7009 " primary node (%s)" % source_node)
7010 demoted_node = target_node
7012 if instance.disk_template in constants.DTS_INT_MIRROR:
7013 self._EnsureSecondary(demoted_node)
7015 self._WaitUntilSync()
7016 except errors.OpExecError:
7017 # we ignore here errors, since if the device is standalone, it
7018 # won't be able to sync
7020 self._GoStandalone()
7021 self._GoReconnect(False)
7022 self._WaitUntilSync()
7024 self.feedback_fn("* done")
7026 def _RevertDiskStatus(self):
7027 """Try to revert the disk status after a failed migration.
7030 target_node = self.target_node
7031 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7035 self._EnsureSecondary(target_node)
7036 self._GoStandalone()
7037 self._GoReconnect(False)
7038 self._WaitUntilSync()
7039 except errors.OpExecError, err:
7040 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7041 " please try to recover the instance manually;"
7042 " error '%s'" % str(err))
7044 def _AbortMigration(self):
7045 """Call the hypervisor code to abort a started migration.
7048 instance = self.instance
7049 target_node = self.target_node
7050 migration_info = self.migration_info
7052 abort_result = self.rpc.call_finalize_migration(target_node,
7056 abort_msg = abort_result.fail_msg
7058 logging.error("Aborting migration failed on target node %s: %s",
7059 target_node, abort_msg)
7060 # Don't raise an exception here, as we stil have to try to revert the
7061 # disk status, even if this step failed.
7063 def _ExecMigration(self):
7064 """Migrate an instance.
7066 The migrate is done by:
7067 - change the disks into dual-master mode
7068 - wait until disks are fully synchronized again
7069 - migrate the instance
7070 - change disks on the new secondary node (the old primary) to secondary
7071 - wait until disks are fully synchronized
7072 - change disks into single-master mode
7075 instance = self.instance
7076 target_node = self.target_node
7077 source_node = self.source_node
7079 self.feedback_fn("* checking disk consistency between source and target")
7080 for dev in instance.disks:
7081 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7082 raise errors.OpExecError("Disk %s is degraded or not fully"
7083 " synchronized on target node,"
7084 " aborting migration" % dev.iv_name)
7086 # First get the migration information from the remote node
7087 result = self.rpc.call_migration_info(source_node, instance)
7088 msg = result.fail_msg
7090 log_err = ("Failed fetching source migration information from %s: %s" %
7092 logging.error(log_err)
7093 raise errors.OpExecError(log_err)
7095 self.migration_info = migration_info = result.payload
7097 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7098 # Then switch the disks to master/master mode
7099 self._EnsureSecondary(target_node)
7100 self._GoStandalone()
7101 self._GoReconnect(True)
7102 self._WaitUntilSync()
7104 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7105 result = self.rpc.call_accept_instance(target_node,
7108 self.nodes_ip[target_node])
7110 msg = result.fail_msg
7112 logging.error("Instance pre-migration failed, trying to revert"
7113 " disk status: %s", msg)
7114 self.feedback_fn("Pre-migration failed, aborting")
7115 self._AbortMigration()
7116 self._RevertDiskStatus()
7117 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7118 (instance.name, msg))
7120 self.feedback_fn("* migrating instance to %s" % target_node)
7121 result = self.rpc.call_instance_migrate(source_node, instance,
7122 self.nodes_ip[target_node],
7124 msg = result.fail_msg
7126 logging.error("Instance migration failed, trying to revert"
7127 " disk status: %s", msg)
7128 self.feedback_fn("Migration failed, aborting")
7129 self._AbortMigration()
7130 self._RevertDiskStatus()
7131 raise errors.OpExecError("Could not migrate instance %s: %s" %
7132 (instance.name, msg))
7134 instance.primary_node = target_node
7135 # distribute new instance config to the other nodes
7136 self.cfg.Update(instance, self.feedback_fn)
7138 result = self.rpc.call_finalize_migration(target_node,
7142 msg = result.fail_msg
7144 logging.error("Instance migration succeeded, but finalization failed:"
7146 raise errors.OpExecError("Could not finalize instance migration: %s" %
7149 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7150 self._EnsureSecondary(source_node)
7151 self._WaitUntilSync()
7152 self._GoStandalone()
7153 self._GoReconnect(False)
7154 self._WaitUntilSync()
7156 self.feedback_fn("* done")
7158 def _ExecFailover(self):
7159 """Failover an instance.
7161 The failover is done by shutting it down on its present node and
7162 starting it on the secondary.
7165 instance = self.instance
7166 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7168 source_node = instance.primary_node
7169 target_node = self.target_node
7171 if instance.admin_up:
7172 self.feedback_fn("* checking disk consistency between source and target")
7173 for dev in instance.disks:
7174 # for drbd, these are drbd over lvm
7175 if not _CheckDiskConsistency(self, dev, target_node, False):
7176 if not self.ignore_consistency:
7177 raise errors.OpExecError("Disk %s is degraded on target node,"
7178 " aborting failover" % dev.iv_name)
7180 self.feedback_fn("* not checking disk consistency as instance is not"
7183 self.feedback_fn("* shutting down instance on source node")
7184 logging.info("Shutting down instance %s on node %s",
7185 instance.name, source_node)
7187 result = self.rpc.call_instance_shutdown(source_node, instance,
7188 self.shutdown_timeout)
7189 msg = result.fail_msg
7191 if self.ignore_consistency or primary_node.offline:
7192 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7193 " proceeding anyway; please make sure node"
7194 " %s is down; error details: %s",
7195 instance.name, source_node, source_node, msg)
7197 raise errors.OpExecError("Could not shutdown instance %s on"
7199 (instance.name, source_node, msg))
7201 self.feedback_fn("* deactivating the instance's disks on source node")
7202 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7203 raise errors.OpExecError("Can't shut down the instance's disks.")
7205 instance.primary_node = target_node
7206 # distribute new instance config to the other nodes
7207 self.cfg.Update(instance, self.feedback_fn)
7209 # Only start the instance if it's marked as up
7210 if instance.admin_up:
7211 self.feedback_fn("* activating the instance's disks on target node")
7212 logging.info("Starting instance %s on node %s",
7213 instance.name, target_node)
7215 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7216 ignore_secondaries=True)
7218 _ShutdownInstanceDisks(self, instance)
7219 raise errors.OpExecError("Can't activate the instance's disks")
7221 self.feedback_fn("* starting the instance on the target node")
7222 result = self.rpc.call_instance_start(target_node, instance, None, None)
7223 msg = result.fail_msg
7225 _ShutdownInstanceDisks(self, instance)
7226 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7227 (instance.name, target_node, msg))
7229 def Exec(self, feedback_fn):
7230 """Perform the migration.
7233 self.feedback_fn = feedback_fn
7234 self.source_node = self.instance.primary_node
7236 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7237 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7238 self.target_node = self.instance.secondary_nodes[0]
7239 # Otherwise self.target_node has been populated either
7240 # directly, or through an iallocator.
7242 self.all_nodes = [self.source_node, self.target_node]
7244 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7245 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7249 feedback_fn("Failover instance %s" % self.instance.name)
7250 self._ExecFailover()
7252 feedback_fn("Migrating instance %s" % self.instance.name)
7255 return self._ExecCleanup()
7257 return self._ExecMigration()
7260 def _CreateBlockDev(lu, node, instance, device, force_create,
7262 """Create a tree of block devices on a given node.
7264 If this device type has to be created on secondaries, create it and
7267 If not, just recurse to children keeping the same 'force' value.
7269 @param lu: the lu on whose behalf we execute
7270 @param node: the node on which to create the device
7271 @type instance: L{objects.Instance}
7272 @param instance: the instance which owns the device
7273 @type device: L{objects.Disk}
7274 @param device: the device to create
7275 @type force_create: boolean
7276 @param force_create: whether to force creation of this device; this
7277 will be change to True whenever we find a device which has
7278 CreateOnSecondary() attribute
7279 @param info: the extra 'metadata' we should attach to the device
7280 (this will be represented as a LVM tag)
7281 @type force_open: boolean
7282 @param force_open: this parameter will be passes to the
7283 L{backend.BlockdevCreate} function where it specifies
7284 whether we run on primary or not, and it affects both
7285 the child assembly and the device own Open() execution
7288 if device.CreateOnSecondary():
7292 for child in device.children:
7293 _CreateBlockDev(lu, node, instance, child, force_create,
7296 if not force_create:
7299 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7302 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7303 """Create a single block device on a given node.
7305 This will not recurse over children of the device, so they must be
7308 @param lu: the lu on whose behalf we execute
7309 @param node: the node on which to create the device
7310 @type instance: L{objects.Instance}
7311 @param instance: the instance which owns the device
7312 @type device: L{objects.Disk}
7313 @param device: the device to create
7314 @param info: the extra 'metadata' we should attach to the device
7315 (this will be represented as a LVM tag)
7316 @type force_open: boolean
7317 @param force_open: this parameter will be passes to the
7318 L{backend.BlockdevCreate} function where it specifies
7319 whether we run on primary or not, and it affects both
7320 the child assembly and the device own Open() execution
7323 lu.cfg.SetDiskID(device, node)
7324 result = lu.rpc.call_blockdev_create(node, device, device.size,
7325 instance.name, force_open, info)
7326 result.Raise("Can't create block device %s on"
7327 " node %s for instance %s" % (device, node, instance.name))
7328 if device.physical_id is None:
7329 device.physical_id = result.payload
7332 def _GenerateUniqueNames(lu, exts):
7333 """Generate a suitable LV name.
7335 This will generate a logical volume name for the given instance.
7340 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7341 results.append("%s%s" % (new_id, val))
7345 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7346 iv_name, p_minor, s_minor):
7347 """Generate a drbd8 device complete with its children.
7350 assert len(vgnames) == len(names) == 2
7351 port = lu.cfg.AllocatePort()
7352 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7353 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7354 logical_id=(vgnames[0], names[0]))
7355 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7356 logical_id=(vgnames[1], names[1]))
7357 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7358 logical_id=(primary, secondary, port,
7361 children=[dev_data, dev_meta],
7366 def _GenerateDiskTemplate(lu, template_name,
7367 instance_name, primary_node,
7368 secondary_nodes, disk_info,
7369 file_storage_dir, file_driver,
7370 base_index, feedback_fn):
7371 """Generate the entire disk layout for a given template type.
7374 #TODO: compute space requirements
7376 vgname = lu.cfg.GetVGName()
7377 disk_count = len(disk_info)
7379 if template_name == constants.DT_DISKLESS:
7381 elif template_name == constants.DT_PLAIN:
7382 if len(secondary_nodes) != 0:
7383 raise errors.ProgrammerError("Wrong template configuration")
7385 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7386 for i in range(disk_count)])
7387 for idx, disk in enumerate(disk_info):
7388 disk_index = idx + base_index
7389 vg = disk.get(constants.IDISK_VG, vgname)
7390 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7391 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7392 size=disk[constants.IDISK_SIZE],
7393 logical_id=(vg, names[idx]),
7394 iv_name="disk/%d" % disk_index,
7395 mode=disk[constants.IDISK_MODE])
7396 disks.append(disk_dev)
7397 elif template_name == constants.DT_DRBD8:
7398 if len(secondary_nodes) != 1:
7399 raise errors.ProgrammerError("Wrong template configuration")
7400 remote_node = secondary_nodes[0]
7401 minors = lu.cfg.AllocateDRBDMinor(
7402 [primary_node, remote_node] * len(disk_info), instance_name)
7405 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7406 for i in range(disk_count)]):
7407 names.append(lv_prefix + "_data")
7408 names.append(lv_prefix + "_meta")
7409 for idx, disk in enumerate(disk_info):
7410 disk_index = idx + base_index
7411 data_vg = disk.get(constants.IDISK_VG, vgname)
7412 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7413 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7414 disk[constants.IDISK_SIZE],
7416 names[idx * 2:idx * 2 + 2],
7417 "disk/%d" % disk_index,
7418 minors[idx * 2], minors[idx * 2 + 1])
7419 disk_dev.mode = disk[constants.IDISK_MODE]
7420 disks.append(disk_dev)
7421 elif template_name == constants.DT_FILE:
7422 if len(secondary_nodes) != 0:
7423 raise errors.ProgrammerError("Wrong template configuration")
7425 opcodes.RequireFileStorage()
7427 for idx, disk in enumerate(disk_info):
7428 disk_index = idx + base_index
7429 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7430 size=disk[constants.IDISK_SIZE],
7431 iv_name="disk/%d" % disk_index,
7432 logical_id=(file_driver,
7433 "%s/disk%d" % (file_storage_dir,
7435 mode=disk[constants.IDISK_MODE])
7436 disks.append(disk_dev)
7437 elif template_name == constants.DT_SHARED_FILE:
7438 if len(secondary_nodes) != 0:
7439 raise errors.ProgrammerError("Wrong template configuration")
7441 opcodes.RequireSharedFileStorage()
7443 for idx, disk in enumerate(disk_info):
7444 disk_index = idx + base_index
7445 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7446 size=disk[constants.IDISK_SIZE],
7447 iv_name="disk/%d" % disk_index,
7448 logical_id=(file_driver,
7449 "%s/disk%d" % (file_storage_dir,
7451 mode=disk[constants.IDISK_MODE])
7452 disks.append(disk_dev)
7453 elif template_name == constants.DT_BLOCK:
7454 if len(secondary_nodes) != 0:
7455 raise errors.ProgrammerError("Wrong template configuration")
7457 for idx, disk in enumerate(disk_info):
7458 disk_index = idx + base_index
7459 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7460 size=disk[constants.IDISK_SIZE],
7461 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7462 disk[constants.IDISK_ADOPT]),
7463 iv_name="disk/%d" % disk_index,
7464 mode=disk[constants.IDISK_MODE])
7465 disks.append(disk_dev)
7468 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7472 def _GetInstanceInfoText(instance):
7473 """Compute that text that should be added to the disk's metadata.
7476 return "originstname+%s" % instance.name
7479 def _CalcEta(time_taken, written, total_size):
7480 """Calculates the ETA based on size written and total size.
7482 @param time_taken: The time taken so far
7483 @param written: amount written so far
7484 @param total_size: The total size of data to be written
7485 @return: The remaining time in seconds
7488 avg_time = time_taken / float(written)
7489 return (total_size - written) * avg_time
7492 def _WipeDisks(lu, instance):
7493 """Wipes instance disks.
7495 @type lu: L{LogicalUnit}
7496 @param lu: the logical unit on whose behalf we execute
7497 @type instance: L{objects.Instance}
7498 @param instance: the instance whose disks we should create
7499 @return: the success of the wipe
7502 node = instance.primary_node
7504 for device in instance.disks:
7505 lu.cfg.SetDiskID(device, node)
7507 logging.info("Pause sync of instance %s disks", instance.name)
7508 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7510 for idx, success in enumerate(result.payload):
7512 logging.warn("pause-sync of instance %s for disks %d failed",
7516 for idx, device in enumerate(instance.disks):
7517 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7518 # MAX_WIPE_CHUNK at max
7519 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7520 constants.MIN_WIPE_CHUNK_PERCENT)
7521 # we _must_ make this an int, otherwise rounding errors will
7523 wipe_chunk_size = int(wipe_chunk_size)
7525 lu.LogInfo("* Wiping disk %d", idx)
7526 logging.info("Wiping disk %d for instance %s, node %s using"
7527 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7532 start_time = time.time()
7534 while offset < size:
7535 wipe_size = min(wipe_chunk_size, size - offset)
7536 logging.debug("Wiping disk %d, offset %s, chunk %s",
7537 idx, offset, wipe_size)
7538 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7539 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7540 (idx, offset, wipe_size))
7543 if now - last_output >= 60:
7544 eta = _CalcEta(now - start_time, offset, size)
7545 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7546 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7549 logging.info("Resume sync of instance %s disks", instance.name)
7551 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7553 for idx, success in enumerate(result.payload):
7555 lu.LogWarning("Resume sync of disk %d failed, please have a"
7556 " look at the status and troubleshoot the issue", idx)
7557 logging.warn("resume-sync of instance %s for disks %d failed",
7561 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7562 """Create all disks for an instance.
7564 This abstracts away some work from AddInstance.
7566 @type lu: L{LogicalUnit}
7567 @param lu: the logical unit on whose behalf we execute
7568 @type instance: L{objects.Instance}
7569 @param instance: the instance whose disks we should create
7571 @param to_skip: list of indices to skip
7572 @type target_node: string
7573 @param target_node: if passed, overrides the target node for creation
7575 @return: the success of the creation
7578 info = _GetInstanceInfoText(instance)
7579 if target_node is None:
7580 pnode = instance.primary_node
7581 all_nodes = instance.all_nodes
7586 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7587 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7588 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7590 result.Raise("Failed to create directory '%s' on"
7591 " node %s" % (file_storage_dir, pnode))
7593 # Note: this needs to be kept in sync with adding of disks in
7594 # LUInstanceSetParams
7595 for idx, device in enumerate(instance.disks):
7596 if to_skip and idx in to_skip:
7598 logging.info("Creating volume %s for instance %s",
7599 device.iv_name, instance.name)
7601 for node in all_nodes:
7602 f_create = node == pnode
7603 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7606 def _RemoveDisks(lu, instance, target_node=None):
7607 """Remove all disks for an instance.
7609 This abstracts away some work from `AddInstance()` and
7610 `RemoveInstance()`. Note that in case some of the devices couldn't
7611 be removed, the removal will continue with the other ones (compare
7612 with `_CreateDisks()`).
7614 @type lu: L{LogicalUnit}
7615 @param lu: the logical unit on whose behalf we execute
7616 @type instance: L{objects.Instance}
7617 @param instance: the instance whose disks we should remove
7618 @type target_node: string
7619 @param target_node: used to override the node on which to remove the disks
7621 @return: the success of the removal
7624 logging.info("Removing block devices for instance %s", instance.name)
7627 for device in instance.disks:
7629 edata = [(target_node, device)]
7631 edata = device.ComputeNodeTree(instance.primary_node)
7632 for node, disk in edata:
7633 lu.cfg.SetDiskID(disk, node)
7634 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7636 lu.LogWarning("Could not remove block device %s on node %s,"
7637 " continuing anyway: %s", device.iv_name, node, msg)
7640 if instance.disk_template == constants.DT_FILE:
7641 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7645 tgt = instance.primary_node
7646 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7648 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7649 file_storage_dir, instance.primary_node, result.fail_msg)
7655 def _ComputeDiskSizePerVG(disk_template, disks):
7656 """Compute disk size requirements in the volume group
7659 def _compute(disks, payload):
7660 """Universal algorithm.
7665 vgs[disk[constants.IDISK_VG]] = \
7666 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7670 # Required free disk space as a function of disk and swap space
7672 constants.DT_DISKLESS: {},
7673 constants.DT_PLAIN: _compute(disks, 0),
7674 # 128 MB are added for drbd metadata for each disk
7675 constants.DT_DRBD8: _compute(disks, 128),
7676 constants.DT_FILE: {},
7677 constants.DT_SHARED_FILE: {},
7680 if disk_template not in req_size_dict:
7681 raise errors.ProgrammerError("Disk template '%s' size requirement"
7682 " is unknown" % disk_template)
7684 return req_size_dict[disk_template]
7687 def _ComputeDiskSize(disk_template, disks):
7688 """Compute disk size requirements in the volume group
7691 # Required free disk space as a function of disk and swap space
7693 constants.DT_DISKLESS: None,
7694 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7695 # 128 MB are added for drbd metadata for each disk
7696 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7697 constants.DT_FILE: None,
7698 constants.DT_SHARED_FILE: 0,
7699 constants.DT_BLOCK: 0,
7702 if disk_template not in req_size_dict:
7703 raise errors.ProgrammerError("Disk template '%s' size requirement"
7704 " is unknown" % disk_template)
7706 return req_size_dict[disk_template]
7709 def _FilterVmNodes(lu, nodenames):
7710 """Filters out non-vm_capable nodes from a list.
7712 @type lu: L{LogicalUnit}
7713 @param lu: the logical unit for which we check
7714 @type nodenames: list
7715 @param nodenames: the list of nodes on which we should check
7717 @return: the list of vm-capable nodes
7720 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7721 return [name for name in nodenames if name not in vm_nodes]
7724 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7725 """Hypervisor parameter validation.
7727 This function abstract the hypervisor parameter validation to be
7728 used in both instance create and instance modify.
7730 @type lu: L{LogicalUnit}
7731 @param lu: the logical unit for which we check
7732 @type nodenames: list
7733 @param nodenames: the list of nodes on which we should check
7734 @type hvname: string
7735 @param hvname: the name of the hypervisor we should use
7736 @type hvparams: dict
7737 @param hvparams: the parameters which we need to check
7738 @raise errors.OpPrereqError: if the parameters are not valid
7741 nodenames = _FilterVmNodes(lu, nodenames)
7742 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7745 for node in nodenames:
7749 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7752 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7753 """OS parameters validation.
7755 @type lu: L{LogicalUnit}
7756 @param lu: the logical unit for which we check
7757 @type required: boolean
7758 @param required: whether the validation should fail if the OS is not
7760 @type nodenames: list
7761 @param nodenames: the list of nodes on which we should check
7762 @type osname: string
7763 @param osname: the name of the hypervisor we should use
7764 @type osparams: dict
7765 @param osparams: the parameters which we need to check
7766 @raise errors.OpPrereqError: if the parameters are not valid
7769 nodenames = _FilterVmNodes(lu, nodenames)
7770 result = lu.rpc.call_os_validate(required, nodenames, osname,
7771 [constants.OS_VALIDATE_PARAMETERS],
7773 for node, nres in result.items():
7774 # we don't check for offline cases since this should be run only
7775 # against the master node and/or an instance's nodes
7776 nres.Raise("OS Parameters validation failed on node %s" % node)
7777 if not nres.payload:
7778 lu.LogInfo("OS %s not found on node %s, validation skipped",
7782 class LUInstanceCreate(LogicalUnit):
7783 """Create an instance.
7786 HPATH = "instance-add"
7787 HTYPE = constants.HTYPE_INSTANCE
7790 def CheckArguments(self):
7794 # do not require name_check to ease forward/backward compatibility
7796 if self.op.no_install and self.op.start:
7797 self.LogInfo("No-installation mode selected, disabling startup")
7798 self.op.start = False
7799 # validate/normalize the instance name
7800 self.op.instance_name = \
7801 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7803 if self.op.ip_check and not self.op.name_check:
7804 # TODO: make the ip check more flexible and not depend on the name check
7805 raise errors.OpPrereqError("Cannot do IP address check without a name"
7806 " check", errors.ECODE_INVAL)
7808 # check nics' parameter names
7809 for nic in self.op.nics:
7810 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7812 # check disks. parameter names and consistent adopt/no-adopt strategy
7813 has_adopt = has_no_adopt = False
7814 for disk in self.op.disks:
7815 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7816 if constants.IDISK_ADOPT in disk:
7820 if has_adopt and has_no_adopt:
7821 raise errors.OpPrereqError("Either all disks are adopted or none is",
7824 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7825 raise errors.OpPrereqError("Disk adoption is not supported for the"
7826 " '%s' disk template" %
7827 self.op.disk_template,
7829 if self.op.iallocator is not None:
7830 raise errors.OpPrereqError("Disk adoption not allowed with an"
7831 " iallocator script", errors.ECODE_INVAL)
7832 if self.op.mode == constants.INSTANCE_IMPORT:
7833 raise errors.OpPrereqError("Disk adoption not allowed for"
7834 " instance import", errors.ECODE_INVAL)
7836 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7837 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7838 " but no 'adopt' parameter given" %
7839 self.op.disk_template,
7842 self.adopt_disks = has_adopt
7844 # instance name verification
7845 if self.op.name_check:
7846 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7847 self.op.instance_name = self.hostname1.name
7848 # used in CheckPrereq for ip ping check
7849 self.check_ip = self.hostname1.ip
7851 self.check_ip = None
7853 # file storage checks
7854 if (self.op.file_driver and
7855 not self.op.file_driver in constants.FILE_DRIVER):
7856 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7857 self.op.file_driver, errors.ECODE_INVAL)
7859 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7860 raise errors.OpPrereqError("File storage directory path not absolute",
7863 ### Node/iallocator related checks
7864 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7866 if self.op.pnode is not None:
7867 if self.op.disk_template in constants.DTS_INT_MIRROR:
7868 if self.op.snode is None:
7869 raise errors.OpPrereqError("The networked disk templates need"
7870 " a mirror node", errors.ECODE_INVAL)
7872 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7874 self.op.snode = None
7876 self._cds = _GetClusterDomainSecret()
7878 if self.op.mode == constants.INSTANCE_IMPORT:
7879 # On import force_variant must be True, because if we forced it at
7880 # initial install, our only chance when importing it back is that it
7882 self.op.force_variant = True
7884 if self.op.no_install:
7885 self.LogInfo("No-installation mode has no effect during import")
7887 elif self.op.mode == constants.INSTANCE_CREATE:
7888 if self.op.os_type is None:
7889 raise errors.OpPrereqError("No guest OS specified",
7891 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7892 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7893 " installation" % self.op.os_type,
7895 if self.op.disk_template is None:
7896 raise errors.OpPrereqError("No disk template specified",
7899 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7900 # Check handshake to ensure both clusters have the same domain secret
7901 src_handshake = self.op.source_handshake
7902 if not src_handshake:
7903 raise errors.OpPrereqError("Missing source handshake",
7906 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7909 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7912 # Load and check source CA
7913 self.source_x509_ca_pem = self.op.source_x509_ca
7914 if not self.source_x509_ca_pem:
7915 raise errors.OpPrereqError("Missing source X509 CA",
7919 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7921 except OpenSSL.crypto.Error, err:
7922 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7923 (err, ), errors.ECODE_INVAL)
7925 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7926 if errcode is not None:
7927 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7930 self.source_x509_ca = cert
7932 src_instance_name = self.op.source_instance_name
7933 if not src_instance_name:
7934 raise errors.OpPrereqError("Missing source instance name",
7937 self.source_instance_name = \
7938 netutils.GetHostname(name=src_instance_name).name
7941 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7942 self.op.mode, errors.ECODE_INVAL)
7944 def ExpandNames(self):
7945 """ExpandNames for CreateInstance.
7947 Figure out the right locks for instance creation.
7950 self.needed_locks = {}
7952 instance_name = self.op.instance_name
7953 # this is just a preventive check, but someone might still add this
7954 # instance in the meantime, and creation will fail at lock-add time
7955 if instance_name in self.cfg.GetInstanceList():
7956 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7957 instance_name, errors.ECODE_EXISTS)
7959 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7961 if self.op.iallocator:
7962 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7964 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7965 nodelist = [self.op.pnode]
7966 if self.op.snode is not None:
7967 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7968 nodelist.append(self.op.snode)
7969 self.needed_locks[locking.LEVEL_NODE] = nodelist
7971 # in case of import lock the source node too
7972 if self.op.mode == constants.INSTANCE_IMPORT:
7973 src_node = self.op.src_node
7974 src_path = self.op.src_path
7976 if src_path is None:
7977 self.op.src_path = src_path = self.op.instance_name
7979 if src_node is None:
7980 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7981 self.op.src_node = None
7982 if os.path.isabs(src_path):
7983 raise errors.OpPrereqError("Importing an instance from an absolute"
7984 " path requires a source node option",
7987 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7988 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7989 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7990 if not os.path.isabs(src_path):
7991 self.op.src_path = src_path = \
7992 utils.PathJoin(constants.EXPORT_DIR, src_path)
7994 def _RunAllocator(self):
7995 """Run the allocator based on input opcode.
7998 nics = [n.ToDict() for n in self.nics]
7999 ial = IAllocator(self.cfg, self.rpc,
8000 mode=constants.IALLOCATOR_MODE_ALLOC,
8001 name=self.op.instance_name,
8002 disk_template=self.op.disk_template,
8005 vcpus=self.be_full[constants.BE_VCPUS],
8006 memory=self.be_full[constants.BE_MEMORY],
8009 hypervisor=self.op.hypervisor,
8012 ial.Run(self.op.iallocator)
8015 raise errors.OpPrereqError("Can't compute nodes using"
8016 " iallocator '%s': %s" %
8017 (self.op.iallocator, ial.info),
8019 if len(ial.result) != ial.required_nodes:
8020 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8021 " of nodes (%s), required %s" %
8022 (self.op.iallocator, len(ial.result),
8023 ial.required_nodes), errors.ECODE_FAULT)
8024 self.op.pnode = ial.result[0]
8025 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8026 self.op.instance_name, self.op.iallocator,
8027 utils.CommaJoin(ial.result))
8028 if ial.required_nodes == 2:
8029 self.op.snode = ial.result[1]
8031 def BuildHooksEnv(self):
8034 This runs on master, primary and secondary nodes of the instance.
8038 "ADD_MODE": self.op.mode,
8040 if self.op.mode == constants.INSTANCE_IMPORT:
8041 env["SRC_NODE"] = self.op.src_node
8042 env["SRC_PATH"] = self.op.src_path
8043 env["SRC_IMAGES"] = self.src_images
8045 env.update(_BuildInstanceHookEnv(
8046 name=self.op.instance_name,
8047 primary_node=self.op.pnode,
8048 secondary_nodes=self.secondaries,
8049 status=self.op.start,
8050 os_type=self.op.os_type,
8051 memory=self.be_full[constants.BE_MEMORY],
8052 vcpus=self.be_full[constants.BE_VCPUS],
8053 nics=_NICListToTuple(self, self.nics),
8054 disk_template=self.op.disk_template,
8055 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8056 for d in self.disks],
8059 hypervisor_name=self.op.hypervisor,
8064 def BuildHooksNodes(self):
8065 """Build hooks nodes.
8068 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8071 def _ReadExportInfo(self):
8072 """Reads the export information from disk.
8074 It will override the opcode source node and path with the actual
8075 information, if these two were not specified before.
8077 @return: the export information
8080 assert self.op.mode == constants.INSTANCE_IMPORT
8082 src_node = self.op.src_node
8083 src_path = self.op.src_path
8085 if src_node is None:
8086 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8087 exp_list = self.rpc.call_export_list(locked_nodes)
8089 for node in exp_list:
8090 if exp_list[node].fail_msg:
8092 if src_path in exp_list[node].payload:
8094 self.op.src_node = src_node = node
8095 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8099 raise errors.OpPrereqError("No export found for relative path %s" %
8100 src_path, errors.ECODE_INVAL)
8102 _CheckNodeOnline(self, src_node)
8103 result = self.rpc.call_export_info(src_node, src_path)
8104 result.Raise("No export or invalid export found in dir %s" % src_path)
8106 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8107 if not export_info.has_section(constants.INISECT_EXP):
8108 raise errors.ProgrammerError("Corrupted export config",
8109 errors.ECODE_ENVIRON)
8111 ei_version = export_info.get(constants.INISECT_EXP, "version")
8112 if (int(ei_version) != constants.EXPORT_VERSION):
8113 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8114 (ei_version, constants.EXPORT_VERSION),
8115 errors.ECODE_ENVIRON)
8118 def _ReadExportParams(self, einfo):
8119 """Use export parameters as defaults.
8121 In case the opcode doesn't specify (as in override) some instance
8122 parameters, then try to use them from the export information, if
8126 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8128 if self.op.disk_template is None:
8129 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8130 self.op.disk_template = einfo.get(constants.INISECT_INS,
8133 raise errors.OpPrereqError("No disk template specified and the export"
8134 " is missing the disk_template information",
8137 if not self.op.disks:
8138 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8140 # TODO: import the disk iv_name too
8141 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8142 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8143 disks.append({constants.IDISK_SIZE: disk_sz})
8144 self.op.disks = disks
8146 raise errors.OpPrereqError("No disk info specified and the export"
8147 " is missing the disk information",
8150 if (not self.op.nics and
8151 einfo.has_option(constants.INISECT_INS, "nic_count")):
8153 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8155 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8156 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8161 if (self.op.hypervisor is None and
8162 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8163 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8164 if einfo.has_section(constants.INISECT_HYP):
8165 # use the export parameters but do not override the ones
8166 # specified by the user
8167 for name, value in einfo.items(constants.INISECT_HYP):
8168 if name not in self.op.hvparams:
8169 self.op.hvparams[name] = value
8171 if einfo.has_section(constants.INISECT_BEP):
8172 # use the parameters, without overriding
8173 for name, value in einfo.items(constants.INISECT_BEP):
8174 if name not in self.op.beparams:
8175 self.op.beparams[name] = value
8177 # try to read the parameters old style, from the main section
8178 for name in constants.BES_PARAMETERS:
8179 if (name not in self.op.beparams and
8180 einfo.has_option(constants.INISECT_INS, name)):
8181 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8183 if einfo.has_section(constants.INISECT_OSP):
8184 # use the parameters, without overriding
8185 for name, value in einfo.items(constants.INISECT_OSP):
8186 if name not in self.op.osparams:
8187 self.op.osparams[name] = value
8189 def _RevertToDefaults(self, cluster):
8190 """Revert the instance parameters to the default values.
8194 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8195 for name in self.op.hvparams.keys():
8196 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8197 del self.op.hvparams[name]
8199 be_defs = cluster.SimpleFillBE({})
8200 for name in self.op.beparams.keys():
8201 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8202 del self.op.beparams[name]
8204 nic_defs = cluster.SimpleFillNIC({})
8205 for nic in self.op.nics:
8206 for name in constants.NICS_PARAMETERS:
8207 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8210 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8211 for name in self.op.osparams.keys():
8212 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8213 del self.op.osparams[name]
8215 def CheckPrereq(self):
8216 """Check prerequisites.
8219 if self.op.mode == constants.INSTANCE_IMPORT:
8220 export_info = self._ReadExportInfo()
8221 self._ReadExportParams(export_info)
8223 if (not self.cfg.GetVGName() and
8224 self.op.disk_template not in constants.DTS_NOT_LVM):
8225 raise errors.OpPrereqError("Cluster does not support lvm-based"
8226 " instances", errors.ECODE_STATE)
8228 if self.op.hypervisor is None:
8229 self.op.hypervisor = self.cfg.GetHypervisorType()
8231 cluster = self.cfg.GetClusterInfo()
8232 enabled_hvs = cluster.enabled_hypervisors
8233 if self.op.hypervisor not in enabled_hvs:
8234 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8235 " cluster (%s)" % (self.op.hypervisor,
8236 ",".join(enabled_hvs)),
8239 # check hypervisor parameter syntax (locally)
8240 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8241 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8243 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8244 hv_type.CheckParameterSyntax(filled_hvp)
8245 self.hv_full = filled_hvp
8246 # check that we don't specify global parameters on an instance
8247 _CheckGlobalHvParams(self.op.hvparams)
8249 # fill and remember the beparams dict
8250 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8251 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8253 # build os parameters
8254 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8256 # now that hvp/bep are in final format, let's reset to defaults,
8258 if self.op.identify_defaults:
8259 self._RevertToDefaults(cluster)
8263 for idx, nic in enumerate(self.op.nics):
8264 nic_mode_req = nic.get(constants.INIC_MODE, None)
8265 nic_mode = nic_mode_req
8266 if nic_mode is None:
8267 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8269 # in routed mode, for the first nic, the default ip is 'auto'
8270 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8271 default_ip_mode = constants.VALUE_AUTO
8273 default_ip_mode = constants.VALUE_NONE
8275 # ip validity checks
8276 ip = nic.get(constants.INIC_IP, default_ip_mode)
8277 if ip is None or ip.lower() == constants.VALUE_NONE:
8279 elif ip.lower() == constants.VALUE_AUTO:
8280 if not self.op.name_check:
8281 raise errors.OpPrereqError("IP address set to auto but name checks"
8282 " have been skipped",
8284 nic_ip = self.hostname1.ip
8286 if not netutils.IPAddress.IsValid(ip):
8287 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8291 # TODO: check the ip address for uniqueness
8292 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8293 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8296 # MAC address verification
8297 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8298 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8299 mac = utils.NormalizeAndValidateMac(mac)
8302 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8303 except errors.ReservationError:
8304 raise errors.OpPrereqError("MAC address %s already in use"
8305 " in cluster" % mac,
8306 errors.ECODE_NOTUNIQUE)
8308 # Build nic parameters
8309 link = nic.get(constants.INIC_LINK, None)
8312 nicparams[constants.NIC_MODE] = nic_mode_req
8314 nicparams[constants.NIC_LINK] = link
8316 check_params = cluster.SimpleFillNIC(nicparams)
8317 objects.NIC.CheckParameterSyntax(check_params)
8318 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8320 # disk checks/pre-build
8321 default_vg = self.cfg.GetVGName()
8323 for disk in self.op.disks:
8324 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8325 if mode not in constants.DISK_ACCESS_SET:
8326 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8327 mode, errors.ECODE_INVAL)
8328 size = disk.get(constants.IDISK_SIZE, None)
8330 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8333 except (TypeError, ValueError):
8334 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8337 data_vg = disk.get(constants.IDISK_VG, default_vg)
8339 constants.IDISK_SIZE: size,
8340 constants.IDISK_MODE: mode,
8341 constants.IDISK_VG: data_vg,
8342 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8344 if constants.IDISK_ADOPT in disk:
8345 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8346 self.disks.append(new_disk)
8348 if self.op.mode == constants.INSTANCE_IMPORT:
8350 # Check that the new instance doesn't have less disks than the export
8351 instance_disks = len(self.disks)
8352 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8353 if instance_disks < export_disks:
8354 raise errors.OpPrereqError("Not enough disks to import."
8355 " (instance: %d, export: %d)" %
8356 (instance_disks, export_disks),
8360 for idx in range(export_disks):
8361 option = 'disk%d_dump' % idx
8362 if export_info.has_option(constants.INISECT_INS, option):
8363 # FIXME: are the old os-es, disk sizes, etc. useful?
8364 export_name = export_info.get(constants.INISECT_INS, option)
8365 image = utils.PathJoin(self.op.src_path, export_name)
8366 disk_images.append(image)
8368 disk_images.append(False)
8370 self.src_images = disk_images
8372 old_name = export_info.get(constants.INISECT_INS, 'name')
8374 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8375 except (TypeError, ValueError), err:
8376 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8377 " an integer: %s" % str(err),
8379 if self.op.instance_name == old_name:
8380 for idx, nic in enumerate(self.nics):
8381 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8382 nic_mac_ini = 'nic%d_mac' % idx
8383 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8385 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8387 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8388 if self.op.ip_check:
8389 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8390 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8391 (self.check_ip, self.op.instance_name),
8392 errors.ECODE_NOTUNIQUE)
8394 #### mac address generation
8395 # By generating here the mac address both the allocator and the hooks get
8396 # the real final mac address rather than the 'auto' or 'generate' value.
8397 # There is a race condition between the generation and the instance object
8398 # creation, which means that we know the mac is valid now, but we're not
8399 # sure it will be when we actually add the instance. If things go bad
8400 # adding the instance will abort because of a duplicate mac, and the
8401 # creation job will fail.
8402 for nic in self.nics:
8403 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8404 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8408 if self.op.iallocator is not None:
8409 self._RunAllocator()
8411 #### node related checks
8413 # check primary node
8414 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8415 assert self.pnode is not None, \
8416 "Cannot retrieve locked node %s" % self.op.pnode
8418 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8419 pnode.name, errors.ECODE_STATE)
8421 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8422 pnode.name, errors.ECODE_STATE)
8423 if not pnode.vm_capable:
8424 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8425 " '%s'" % pnode.name, errors.ECODE_STATE)
8427 self.secondaries = []
8429 # mirror node verification
8430 if self.op.disk_template in constants.DTS_INT_MIRROR:
8431 if self.op.snode == pnode.name:
8432 raise errors.OpPrereqError("The secondary node cannot be the"
8433 " primary node", errors.ECODE_INVAL)
8434 _CheckNodeOnline(self, self.op.snode)
8435 _CheckNodeNotDrained(self, self.op.snode)
8436 _CheckNodeVmCapable(self, self.op.snode)
8437 self.secondaries.append(self.op.snode)
8439 nodenames = [pnode.name] + self.secondaries
8441 if not self.adopt_disks:
8442 # Check lv size requirements, if not adopting
8443 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8444 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8446 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8447 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8448 disk[constants.IDISK_ADOPT])
8449 for disk in self.disks])
8450 if len(all_lvs) != len(self.disks):
8451 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8453 for lv_name in all_lvs:
8455 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8456 # to ReserveLV uses the same syntax
8457 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8458 except errors.ReservationError:
8459 raise errors.OpPrereqError("LV named %s used by another instance" %
8460 lv_name, errors.ECODE_NOTUNIQUE)
8462 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8463 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8465 node_lvs = self.rpc.call_lv_list([pnode.name],
8466 vg_names.payload.keys())[pnode.name]
8467 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8468 node_lvs = node_lvs.payload
8470 delta = all_lvs.difference(node_lvs.keys())
8472 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8473 utils.CommaJoin(delta),
8475 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8477 raise errors.OpPrereqError("Online logical volumes found, cannot"
8478 " adopt: %s" % utils.CommaJoin(online_lvs),
8480 # update the size of disk based on what is found
8481 for dsk in self.disks:
8482 dsk[constants.IDISK_SIZE] = \
8483 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8484 dsk[constants.IDISK_ADOPT])][0]))
8486 elif self.op.disk_template == constants.DT_BLOCK:
8487 # Normalize and de-duplicate device paths
8488 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8489 for disk in self.disks])
8490 if len(all_disks) != len(self.disks):
8491 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8493 baddisks = [d for d in all_disks
8494 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8496 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8497 " cannot be adopted" %
8498 (", ".join(baddisks),
8499 constants.ADOPTABLE_BLOCKDEV_ROOT),
8502 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8503 list(all_disks))[pnode.name]
8504 node_disks.Raise("Cannot get block device information from node %s" %
8506 node_disks = node_disks.payload
8507 delta = all_disks.difference(node_disks.keys())
8509 raise errors.OpPrereqError("Missing block device(s): %s" %
8510 utils.CommaJoin(delta),
8512 for dsk in self.disks:
8513 dsk[constants.IDISK_SIZE] = \
8514 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8516 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8518 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8519 # check OS parameters (remotely)
8520 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8522 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8524 # memory check on primary node
8526 _CheckNodeFreeMemory(self, self.pnode.name,
8527 "creating instance %s" % self.op.instance_name,
8528 self.be_full[constants.BE_MEMORY],
8531 self.dry_run_result = list(nodenames)
8533 def Exec(self, feedback_fn):
8534 """Create and add the instance to the cluster.
8537 instance = self.op.instance_name
8538 pnode_name = self.pnode.name
8540 ht_kind = self.op.hypervisor
8541 if ht_kind in constants.HTS_REQ_PORT:
8542 network_port = self.cfg.AllocatePort()
8546 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8547 # this is needed because os.path.join does not accept None arguments
8548 if self.op.file_storage_dir is None:
8549 string_file_storage_dir = ""
8551 string_file_storage_dir = self.op.file_storage_dir
8553 # build the full file storage dir path
8554 if self.op.disk_template == constants.DT_SHARED_FILE:
8555 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8557 get_fsd_fn = self.cfg.GetFileStorageDir
8559 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8560 string_file_storage_dir, instance)
8562 file_storage_dir = ""
8564 disks = _GenerateDiskTemplate(self,
8565 self.op.disk_template,
8566 instance, pnode_name,
8570 self.op.file_driver,
8574 iobj = objects.Instance(name=instance, os=self.op.os_type,
8575 primary_node=pnode_name,
8576 nics=self.nics, disks=disks,
8577 disk_template=self.op.disk_template,
8579 network_port=network_port,
8580 beparams=self.op.beparams,
8581 hvparams=self.op.hvparams,
8582 hypervisor=self.op.hypervisor,
8583 osparams=self.op.osparams,
8586 if self.adopt_disks:
8587 if self.op.disk_template == constants.DT_PLAIN:
8588 # rename LVs to the newly-generated names; we need to construct
8589 # 'fake' LV disks with the old data, plus the new unique_id
8590 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8592 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8593 rename_to.append(t_dsk.logical_id)
8594 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8595 self.cfg.SetDiskID(t_dsk, pnode_name)
8596 result = self.rpc.call_blockdev_rename(pnode_name,
8597 zip(tmp_disks, rename_to))
8598 result.Raise("Failed to rename adoped LVs")
8600 feedback_fn("* creating instance disks...")
8602 _CreateDisks(self, iobj)
8603 except errors.OpExecError:
8604 self.LogWarning("Device creation failed, reverting...")
8606 _RemoveDisks(self, iobj)
8608 self.cfg.ReleaseDRBDMinors(instance)
8611 feedback_fn("adding instance %s to cluster config" % instance)
8613 self.cfg.AddInstance(iobj, self.proc.GetECId())
8615 # Declare that we don't want to remove the instance lock anymore, as we've
8616 # added the instance to the config
8617 del self.remove_locks[locking.LEVEL_INSTANCE]
8619 if self.op.mode == constants.INSTANCE_IMPORT:
8620 # Release unused nodes
8621 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8624 _ReleaseLocks(self, locking.LEVEL_NODE)
8627 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8628 feedback_fn("* wiping instance disks...")
8630 _WipeDisks(self, iobj)
8631 except errors.OpExecError, err:
8632 logging.exception("Wiping disks failed")
8633 self.LogWarning("Wiping instance disks failed (%s)", err)
8637 # Something is already wrong with the disks, don't do anything else
8639 elif self.op.wait_for_sync:
8640 disk_abort = not _WaitForSync(self, iobj)
8641 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8642 # make sure the disks are not degraded (still sync-ing is ok)
8644 feedback_fn("* checking mirrors status")
8645 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8650 _RemoveDisks(self, iobj)
8651 self.cfg.RemoveInstance(iobj.name)
8652 # Make sure the instance lock gets removed
8653 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8654 raise errors.OpExecError("There are some degraded disks for"
8657 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8658 if self.op.mode == constants.INSTANCE_CREATE:
8659 if not self.op.no_install:
8660 feedback_fn("* running the instance OS create scripts...")
8661 # FIXME: pass debug option from opcode to backend
8662 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8663 self.op.debug_level)
8664 result.Raise("Could not add os for instance %s"
8665 " on node %s" % (instance, pnode_name))
8667 elif self.op.mode == constants.INSTANCE_IMPORT:
8668 feedback_fn("* running the instance OS import scripts...")
8672 for idx, image in enumerate(self.src_images):
8676 # FIXME: pass debug option from opcode to backend
8677 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8678 constants.IEIO_FILE, (image, ),
8679 constants.IEIO_SCRIPT,
8680 (iobj.disks[idx], idx),
8682 transfers.append(dt)
8685 masterd.instance.TransferInstanceData(self, feedback_fn,
8686 self.op.src_node, pnode_name,
8687 self.pnode.secondary_ip,
8689 if not compat.all(import_result):
8690 self.LogWarning("Some disks for instance %s on node %s were not"
8691 " imported successfully" % (instance, pnode_name))
8693 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8694 feedback_fn("* preparing remote import...")
8695 # The source cluster will stop the instance before attempting to make a
8696 # connection. In some cases stopping an instance can take a long time,
8697 # hence the shutdown timeout is added to the connection timeout.
8698 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8699 self.op.source_shutdown_timeout)
8700 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8702 assert iobj.primary_node == self.pnode.name
8704 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8705 self.source_x509_ca,
8706 self._cds, timeouts)
8707 if not compat.all(disk_results):
8708 # TODO: Should the instance still be started, even if some disks
8709 # failed to import (valid for local imports, too)?
8710 self.LogWarning("Some disks for instance %s on node %s were not"
8711 " imported successfully" % (instance, pnode_name))
8713 # Run rename script on newly imported instance
8714 assert iobj.name == instance
8715 feedback_fn("Running rename script for %s" % instance)
8716 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8717 self.source_instance_name,
8718 self.op.debug_level)
8720 self.LogWarning("Failed to run rename script for %s on node"
8721 " %s: %s" % (instance, pnode_name, result.fail_msg))
8724 # also checked in the prereq part
8725 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8729 iobj.admin_up = True
8730 self.cfg.Update(iobj, feedback_fn)
8731 logging.info("Starting instance %s on node %s", instance, pnode_name)
8732 feedback_fn("* starting instance...")
8733 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8734 result.Raise("Could not start instance")
8736 return list(iobj.all_nodes)
8739 class LUInstanceConsole(NoHooksLU):
8740 """Connect to an instance's console.
8742 This is somewhat special in that it returns the command line that
8743 you need to run on the master node in order to connect to the
8749 def ExpandNames(self):
8750 self._ExpandAndLockInstance()
8752 def CheckPrereq(self):
8753 """Check prerequisites.
8755 This checks that the instance is in the cluster.
8758 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8759 assert self.instance is not None, \
8760 "Cannot retrieve locked instance %s" % self.op.instance_name
8761 _CheckNodeOnline(self, self.instance.primary_node)
8763 def Exec(self, feedback_fn):
8764 """Connect to the console of an instance
8767 instance = self.instance
8768 node = instance.primary_node
8770 node_insts = self.rpc.call_instance_list([node],
8771 [instance.hypervisor])[node]
8772 node_insts.Raise("Can't get node information from %s" % node)
8774 if instance.name not in node_insts.payload:
8775 if instance.admin_up:
8776 state = constants.INSTST_ERRORDOWN
8778 state = constants.INSTST_ADMINDOWN
8779 raise errors.OpExecError("Instance %s is not running (state %s)" %
8780 (instance.name, state))
8782 logging.debug("Connecting to console of %s on %s", instance.name, node)
8784 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8787 def _GetInstanceConsole(cluster, instance):
8788 """Returns console information for an instance.
8790 @type cluster: L{objects.Cluster}
8791 @type instance: L{objects.Instance}
8795 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8796 # beparams and hvparams are passed separately, to avoid editing the
8797 # instance and then saving the defaults in the instance itself.
8798 hvparams = cluster.FillHV(instance)
8799 beparams = cluster.FillBE(instance)
8800 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8802 assert console.instance == instance.name
8803 assert console.Validate()
8805 return console.ToDict()
8808 class LUInstanceReplaceDisks(LogicalUnit):
8809 """Replace the disks of an instance.
8812 HPATH = "mirrors-replace"
8813 HTYPE = constants.HTYPE_INSTANCE
8816 def CheckArguments(self):
8817 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8820 def ExpandNames(self):
8821 self._ExpandAndLockInstance()
8823 assert locking.LEVEL_NODE not in self.needed_locks
8824 assert locking.LEVEL_NODEGROUP not in self.needed_locks
8826 assert self.op.iallocator is None or self.op.remote_node is None, \
8827 "Conflicting options"
8829 if self.op.remote_node is not None:
8830 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8832 # Warning: do not remove the locking of the new secondary here
8833 # unless DRBD8.AddChildren is changed to work in parallel;
8834 # currently it doesn't since parallel invocations of
8835 # FindUnusedMinor will conflict
8836 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8837 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8839 self.needed_locks[locking.LEVEL_NODE] = []
8840 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8842 if self.op.iallocator is not None:
8843 # iallocator will select a new node in the same group
8844 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8846 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8847 self.op.iallocator, self.op.remote_node,
8848 self.op.disks, False, self.op.early_release)
8850 self.tasklets = [self.replacer]
8852 def DeclareLocks(self, level):
8853 if level == locking.LEVEL_NODEGROUP:
8854 assert self.op.remote_node is None
8855 assert self.op.iallocator is not None
8856 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8858 self.share_locks[locking.LEVEL_NODEGROUP] = 1
8859 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8860 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8862 elif level == locking.LEVEL_NODE:
8863 if self.op.iallocator is not None:
8864 assert self.op.remote_node is None
8865 assert not self.needed_locks[locking.LEVEL_NODE]
8867 # Lock member nodes of all locked groups
8868 self.needed_locks[locking.LEVEL_NODE] = [node_name
8869 for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8870 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8872 self._LockInstancesNodes()
8874 def BuildHooksEnv(self):
8877 This runs on the master, the primary and all the secondaries.
8880 instance = self.replacer.instance
8882 "MODE": self.op.mode,
8883 "NEW_SECONDARY": self.op.remote_node,
8884 "OLD_SECONDARY": instance.secondary_nodes[0],
8886 env.update(_BuildInstanceHookEnvByObject(self, instance))
8889 def BuildHooksNodes(self):
8890 """Build hooks nodes.
8893 instance = self.replacer.instance
8895 self.cfg.GetMasterNode(),
8896 instance.primary_node,
8898 if self.op.remote_node is not None:
8899 nl.append(self.op.remote_node)
8902 def CheckPrereq(self):
8903 """Check prerequisites.
8906 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8907 self.op.iallocator is None)
8909 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8911 groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8912 if owned_groups != groups:
8913 raise errors.OpExecError("Node groups used by instance '%s' changed"
8914 " since lock was acquired, current list is %r,"
8915 " used to be '%s'" %
8916 (self.op.instance_name,
8917 utils.CommaJoin(groups),
8918 utils.CommaJoin(owned_groups)))
8920 return LogicalUnit.CheckPrereq(self)
8923 class TLReplaceDisks(Tasklet):
8924 """Replaces disks for an instance.
8926 Note: Locking is not within the scope of this class.
8929 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8930 disks, delay_iallocator, early_release):
8931 """Initializes this class.
8934 Tasklet.__init__(self, lu)
8937 self.instance_name = instance_name
8939 self.iallocator_name = iallocator_name
8940 self.remote_node = remote_node
8942 self.delay_iallocator = delay_iallocator
8943 self.early_release = early_release
8946 self.instance = None
8947 self.new_node = None
8948 self.target_node = None
8949 self.other_node = None
8950 self.remote_node_info = None
8951 self.node_secondary_ip = None
8954 def CheckArguments(mode, remote_node, iallocator):
8955 """Helper function for users of this class.
8958 # check for valid parameter combination
8959 if mode == constants.REPLACE_DISK_CHG:
8960 if remote_node is None and iallocator is None:
8961 raise errors.OpPrereqError("When changing the secondary either an"
8962 " iallocator script must be used or the"
8963 " new node given", errors.ECODE_INVAL)
8965 if remote_node is not None and iallocator is not None:
8966 raise errors.OpPrereqError("Give either the iallocator or the new"
8967 " secondary, not both", errors.ECODE_INVAL)
8969 elif remote_node is not None or iallocator is not None:
8970 # Not replacing the secondary
8971 raise errors.OpPrereqError("The iallocator and new node options can"
8972 " only be used when changing the"
8973 " secondary node", errors.ECODE_INVAL)
8976 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8977 """Compute a new secondary node using an IAllocator.
8980 ial = IAllocator(lu.cfg, lu.rpc,
8981 mode=constants.IALLOCATOR_MODE_RELOC,
8983 relocate_from=relocate_from)
8985 ial.Run(iallocator_name)
8988 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8989 " %s" % (iallocator_name, ial.info),
8992 if len(ial.result) != ial.required_nodes:
8993 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8994 " of nodes (%s), required %s" %
8996 len(ial.result), ial.required_nodes),
8999 remote_node_name = ial.result[0]
9001 lu.LogInfo("Selected new secondary for instance '%s': %s",
9002 instance_name, remote_node_name)
9004 return remote_node_name
9006 def _FindFaultyDisks(self, node_name):
9007 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9010 def _CheckDisksActivated(self, instance):
9011 """Checks if the instance disks are activated.
9013 @param instance: The instance to check disks
9014 @return: True if they are activated, False otherwise
9017 nodes = instance.all_nodes
9019 for idx, dev in enumerate(instance.disks):
9021 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9022 self.cfg.SetDiskID(dev, node)
9024 result = self.rpc.call_blockdev_find(node, dev)
9028 elif result.fail_msg or not result.payload:
9033 def CheckPrereq(self):
9034 """Check prerequisites.
9036 This checks that the instance is in the cluster.
9039 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9040 assert instance is not None, \
9041 "Cannot retrieve locked instance %s" % self.instance_name
9043 if instance.disk_template != constants.DT_DRBD8:
9044 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9045 " instances", errors.ECODE_INVAL)
9047 if len(instance.secondary_nodes) != 1:
9048 raise errors.OpPrereqError("The instance has a strange layout,"
9049 " expected one secondary but found %d" %
9050 len(instance.secondary_nodes),
9053 if not self.delay_iallocator:
9054 self._CheckPrereq2()
9056 def _CheckPrereq2(self):
9057 """Check prerequisites, second part.
9059 This function should always be part of CheckPrereq. It was separated and is
9060 now called from Exec because during node evacuation iallocator was only
9061 called with an unmodified cluster model, not taking planned changes into
9065 instance = self.instance
9066 secondary_node = instance.secondary_nodes[0]
9068 if self.iallocator_name is None:
9069 remote_node = self.remote_node
9071 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9072 instance.name, instance.secondary_nodes)
9074 if remote_node is None:
9075 self.remote_node_info = None
9077 assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9078 "Remote node '%s' is not locked" % remote_node
9080 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9081 assert self.remote_node_info is not None, \
9082 "Cannot retrieve locked node %s" % remote_node
9084 if remote_node == self.instance.primary_node:
9085 raise errors.OpPrereqError("The specified node is the primary node of"
9086 " the instance", errors.ECODE_INVAL)
9088 if remote_node == secondary_node:
9089 raise errors.OpPrereqError("The specified node is already the"
9090 " secondary node of the instance",
9093 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9094 constants.REPLACE_DISK_CHG):
9095 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9098 if self.mode == constants.REPLACE_DISK_AUTO:
9099 if not self._CheckDisksActivated(instance):
9100 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9101 " first" % self.instance_name,
9103 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9104 faulty_secondary = self._FindFaultyDisks(secondary_node)
9106 if faulty_primary and faulty_secondary:
9107 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9108 " one node and can not be repaired"
9109 " automatically" % self.instance_name,
9113 self.disks = faulty_primary
9114 self.target_node = instance.primary_node
9115 self.other_node = secondary_node
9116 check_nodes = [self.target_node, self.other_node]
9117 elif faulty_secondary:
9118 self.disks = faulty_secondary
9119 self.target_node = secondary_node
9120 self.other_node = instance.primary_node
9121 check_nodes = [self.target_node, self.other_node]
9127 # Non-automatic modes
9128 if self.mode == constants.REPLACE_DISK_PRI:
9129 self.target_node = instance.primary_node
9130 self.other_node = secondary_node
9131 check_nodes = [self.target_node, self.other_node]
9133 elif self.mode == constants.REPLACE_DISK_SEC:
9134 self.target_node = secondary_node
9135 self.other_node = instance.primary_node
9136 check_nodes = [self.target_node, self.other_node]
9138 elif self.mode == constants.REPLACE_DISK_CHG:
9139 self.new_node = remote_node
9140 self.other_node = instance.primary_node
9141 self.target_node = secondary_node
9142 check_nodes = [self.new_node, self.other_node]
9144 _CheckNodeNotDrained(self.lu, remote_node)
9145 _CheckNodeVmCapable(self.lu, remote_node)
9147 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9148 assert old_node_info is not None
9149 if old_node_info.offline and not self.early_release:
9150 # doesn't make sense to delay the release
9151 self.early_release = True
9152 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9153 " early-release mode", secondary_node)
9156 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9159 # If not specified all disks should be replaced
9161 self.disks = range(len(self.instance.disks))
9163 for node in check_nodes:
9164 _CheckNodeOnline(self.lu, node)
9166 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9169 if node_name is not None)
9171 # Release unneeded node locks
9172 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9174 # Release any owned node group
9175 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9176 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9178 # Check whether disks are valid
9179 for disk_idx in self.disks:
9180 instance.FindDisk(disk_idx)
9182 # Get secondary node IP addresses
9183 self.node_secondary_ip = \
9184 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9185 for node_name in touched_nodes)
9187 def Exec(self, feedback_fn):
9188 """Execute disk replacement.
9190 This dispatches the disk replacement to the appropriate handler.
9193 if self.delay_iallocator:
9194 self._CheckPrereq2()
9197 # Verify owned locks before starting operation
9198 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9199 assert set(owned_locks) == set(self.node_secondary_ip), \
9200 ("Incorrect node locks, owning %s, expected %s" %
9201 (owned_locks, self.node_secondary_ip.keys()))
9203 owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9204 assert list(owned_locks) == [self.instance_name], \
9205 "Instance '%s' not locked" % self.instance_name
9207 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9208 "Should not own any node group lock at this point"
9211 feedback_fn("No disks need replacement")
9214 feedback_fn("Replacing disk(s) %s for %s" %
9215 (utils.CommaJoin(self.disks), self.instance.name))
9217 activate_disks = (not self.instance.admin_up)
9219 # Activate the instance disks if we're replacing them on a down instance
9221 _StartInstanceDisks(self.lu, self.instance, True)
9224 # Should we replace the secondary node?
9225 if self.new_node is not None:
9226 fn = self._ExecDrbd8Secondary
9228 fn = self._ExecDrbd8DiskOnly
9230 result = fn(feedback_fn)
9232 # Deactivate the instance disks if we're replacing them on a
9235 _SafeShutdownInstanceDisks(self.lu, self.instance)
9238 # Verify owned locks
9239 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9240 nodes = frozenset(self.node_secondary_ip)
9241 assert ((self.early_release and not owned_locks) or
9242 (not self.early_release and not (set(owned_locks) - nodes))), \
9243 ("Not owning the correct locks, early_release=%s, owned=%r,"
9244 " nodes=%r" % (self.early_release, owned_locks, nodes))
9248 def _CheckVolumeGroup(self, nodes):
9249 self.lu.LogInfo("Checking volume groups")
9251 vgname = self.cfg.GetVGName()
9253 # Make sure volume group exists on all involved nodes
9254 results = self.rpc.call_vg_list(nodes)
9256 raise errors.OpExecError("Can't list volume groups on the nodes")
9260 res.Raise("Error checking node %s" % node)
9261 if vgname not in res.payload:
9262 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9265 def _CheckDisksExistence(self, nodes):
9266 # Check disk existence
9267 for idx, dev in enumerate(self.instance.disks):
9268 if idx not in self.disks:
9272 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9273 self.cfg.SetDiskID(dev, node)
9275 result = self.rpc.call_blockdev_find(node, dev)
9277 msg = result.fail_msg
9278 if msg or not result.payload:
9280 msg = "disk not found"
9281 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9284 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9285 for idx, dev in enumerate(self.instance.disks):
9286 if idx not in self.disks:
9289 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9292 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9294 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9295 " replace disks for instance %s" %
9296 (node_name, self.instance.name))
9298 def _CreateNewStorage(self, node_name):
9301 for idx, dev in enumerate(self.instance.disks):
9302 if idx not in self.disks:
9305 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9307 self.cfg.SetDiskID(dev, node_name)
9309 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9310 names = _GenerateUniqueNames(self.lu, lv_names)
9312 vg_data = dev.children[0].logical_id[0]
9313 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9314 logical_id=(vg_data, names[0]))
9315 vg_meta = dev.children[1].logical_id[0]
9316 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9317 logical_id=(vg_meta, names[1]))
9319 new_lvs = [lv_data, lv_meta]
9320 old_lvs = dev.children
9321 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9323 # we pass force_create=True to force the LVM creation
9324 for new_lv in new_lvs:
9325 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9326 _GetInstanceInfoText(self.instance), False)
9330 def _CheckDevices(self, node_name, iv_names):
9331 for name, (dev, _, _) in iv_names.iteritems():
9332 self.cfg.SetDiskID(dev, node_name)
9334 result = self.rpc.call_blockdev_find(node_name, dev)
9336 msg = result.fail_msg
9337 if msg or not result.payload:
9339 msg = "disk not found"
9340 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9343 if result.payload.is_degraded:
9344 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9346 def _RemoveOldStorage(self, node_name, iv_names):
9347 for name, (_, old_lvs, _) in iv_names.iteritems():
9348 self.lu.LogInfo("Remove logical volumes for %s" % name)
9351 self.cfg.SetDiskID(lv, node_name)
9353 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9355 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9356 hint="remove unused LVs manually")
9358 def _ExecDrbd8DiskOnly(self, feedback_fn):
9359 """Replace a disk on the primary or secondary for DRBD 8.
9361 The algorithm for replace is quite complicated:
9363 1. for each disk to be replaced:
9365 1. create new LVs on the target node with unique names
9366 1. detach old LVs from the drbd device
9367 1. rename old LVs to name_replaced.<time_t>
9368 1. rename new LVs to old LVs
9369 1. attach the new LVs (with the old names now) to the drbd device
9371 1. wait for sync across all devices
9373 1. for each modified disk:
9375 1. remove old LVs (which have the name name_replaces.<time_t>)
9377 Failures are not very well handled.
9382 # Step: check device activation
9383 self.lu.LogStep(1, steps_total, "Check device existence")
9384 self._CheckDisksExistence([self.other_node, self.target_node])
9385 self._CheckVolumeGroup([self.target_node, self.other_node])
9387 # Step: check other node consistency
9388 self.lu.LogStep(2, steps_total, "Check peer consistency")
9389 self._CheckDisksConsistency(self.other_node,
9390 self.other_node == self.instance.primary_node,
9393 # Step: create new storage
9394 self.lu.LogStep(3, steps_total, "Allocate new storage")
9395 iv_names = self._CreateNewStorage(self.target_node)
9397 # Step: for each lv, detach+rename*2+attach
9398 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9399 for dev, old_lvs, new_lvs in iv_names.itervalues():
9400 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9402 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9404 result.Raise("Can't detach drbd from local storage on node"
9405 " %s for device %s" % (self.target_node, dev.iv_name))
9407 #cfg.Update(instance)
9409 # ok, we created the new LVs, so now we know we have the needed
9410 # storage; as such, we proceed on the target node to rename
9411 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9412 # using the assumption that logical_id == physical_id (which in
9413 # turn is the unique_id on that node)
9415 # FIXME(iustin): use a better name for the replaced LVs
9416 temp_suffix = int(time.time())
9417 ren_fn = lambda d, suff: (d.physical_id[0],
9418 d.physical_id[1] + "_replaced-%s" % suff)
9420 # Build the rename list based on what LVs exist on the node
9421 rename_old_to_new = []
9422 for to_ren in old_lvs:
9423 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9424 if not result.fail_msg and result.payload:
9426 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9428 self.lu.LogInfo("Renaming the old LVs on the target node")
9429 result = self.rpc.call_blockdev_rename(self.target_node,
9431 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9433 # Now we rename the new LVs to the old LVs
9434 self.lu.LogInfo("Renaming the new LVs on the target node")
9435 rename_new_to_old = [(new, old.physical_id)
9436 for old, new in zip(old_lvs, new_lvs)]
9437 result = self.rpc.call_blockdev_rename(self.target_node,
9439 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9441 for old, new in zip(old_lvs, new_lvs):
9442 new.logical_id = old.logical_id
9443 self.cfg.SetDiskID(new, self.target_node)
9445 for disk in old_lvs:
9446 disk.logical_id = ren_fn(disk, temp_suffix)
9447 self.cfg.SetDiskID(disk, self.target_node)
9449 # Now that the new lvs have the old name, we can add them to the device
9450 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9451 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9453 msg = result.fail_msg
9455 for new_lv in new_lvs:
9456 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9459 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9460 hint=("cleanup manually the unused logical"
9462 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9464 dev.children = new_lvs
9466 self.cfg.Update(self.instance, feedback_fn)
9469 if self.early_release:
9470 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9472 self._RemoveOldStorage(self.target_node, iv_names)
9473 # WARNING: we release both node locks here, do not do other RPCs
9474 # than WaitForSync to the primary node
9475 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9476 names=[self.target_node, self.other_node])
9479 # This can fail as the old devices are degraded and _WaitForSync
9480 # does a combined result over all disks, so we don't check its return value
9481 self.lu.LogStep(cstep, steps_total, "Sync devices")
9483 _WaitForSync(self.lu, self.instance)
9485 # Check all devices manually
9486 self._CheckDevices(self.instance.primary_node, iv_names)
9488 # Step: remove old storage
9489 if not self.early_release:
9490 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9492 self._RemoveOldStorage(self.target_node, iv_names)
9494 def _ExecDrbd8Secondary(self, feedback_fn):
9495 """Replace the secondary node for DRBD 8.
9497 The algorithm for replace is quite complicated:
9498 - for all disks of the instance:
9499 - create new LVs on the new node with same names
9500 - shutdown the drbd device on the old secondary
9501 - disconnect the drbd network on the primary
9502 - create the drbd device on the new secondary
9503 - network attach the drbd on the primary, using an artifice:
9504 the drbd code for Attach() will connect to the network if it
9505 finds a device which is connected to the good local disks but
9507 - wait for sync across all devices
9508 - remove all disks from the old secondary
9510 Failures are not very well handled.
9515 # Step: check device activation
9516 self.lu.LogStep(1, steps_total, "Check device existence")
9517 self._CheckDisksExistence([self.instance.primary_node])
9518 self._CheckVolumeGroup([self.instance.primary_node])
9520 # Step: check other node consistency
9521 self.lu.LogStep(2, steps_total, "Check peer consistency")
9522 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9524 # Step: create new storage
9525 self.lu.LogStep(3, steps_total, "Allocate new storage")
9526 for idx, dev in enumerate(self.instance.disks):
9527 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9528 (self.new_node, idx))
9529 # we pass force_create=True to force LVM creation
9530 for new_lv in dev.children:
9531 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9532 _GetInstanceInfoText(self.instance), False)
9534 # Step 4: dbrd minors and drbd setups changes
9535 # after this, we must manually remove the drbd minors on both the
9536 # error and the success paths
9537 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9538 minors = self.cfg.AllocateDRBDMinor([self.new_node
9539 for dev in self.instance.disks],
9541 logging.debug("Allocated minors %r", minors)
9544 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9545 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9546 (self.new_node, idx))
9547 # create new devices on new_node; note that we create two IDs:
9548 # one without port, so the drbd will be activated without
9549 # networking information on the new node at this stage, and one
9550 # with network, for the latter activation in step 4
9551 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9552 if self.instance.primary_node == o_node1:
9555 assert self.instance.primary_node == o_node2, "Three-node instance?"
9558 new_alone_id = (self.instance.primary_node, self.new_node, None,
9559 p_minor, new_minor, o_secret)
9560 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9561 p_minor, new_minor, o_secret)
9563 iv_names[idx] = (dev, dev.children, new_net_id)
9564 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9566 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9567 logical_id=new_alone_id,
9568 children=dev.children,
9571 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9572 _GetInstanceInfoText(self.instance), False)
9573 except errors.GenericError:
9574 self.cfg.ReleaseDRBDMinors(self.instance.name)
9577 # We have new devices, shutdown the drbd on the old secondary
9578 for idx, dev in enumerate(self.instance.disks):
9579 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9580 self.cfg.SetDiskID(dev, self.target_node)
9581 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9583 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9584 "node: %s" % (idx, msg),
9585 hint=("Please cleanup this device manually as"
9586 " soon as possible"))
9588 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9589 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9590 self.node_secondary_ip,
9591 self.instance.disks)\
9592 [self.instance.primary_node]
9594 msg = result.fail_msg
9596 # detaches didn't succeed (unlikely)
9597 self.cfg.ReleaseDRBDMinors(self.instance.name)
9598 raise errors.OpExecError("Can't detach the disks from the network on"
9599 " old node: %s" % (msg,))
9601 # if we managed to detach at least one, we update all the disks of
9602 # the instance to point to the new secondary
9603 self.lu.LogInfo("Updating instance configuration")
9604 for dev, _, new_logical_id in iv_names.itervalues():
9605 dev.logical_id = new_logical_id
9606 self.cfg.SetDiskID(dev, self.instance.primary_node)
9608 self.cfg.Update(self.instance, feedback_fn)
9610 # and now perform the drbd attach
9611 self.lu.LogInfo("Attaching primary drbds to new secondary"
9612 " (standalone => connected)")
9613 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9615 self.node_secondary_ip,
9616 self.instance.disks,
9619 for to_node, to_result in result.items():
9620 msg = to_result.fail_msg
9622 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9624 hint=("please do a gnt-instance info to see the"
9625 " status of disks"))
9627 if self.early_release:
9628 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9630 self._RemoveOldStorage(self.target_node, iv_names)
9631 # WARNING: we release all node locks here, do not do other RPCs
9632 # than WaitForSync to the primary node
9633 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9634 names=[self.instance.primary_node,
9639 # This can fail as the old devices are degraded and _WaitForSync
9640 # does a combined result over all disks, so we don't check its return value
9641 self.lu.LogStep(cstep, steps_total, "Sync devices")
9643 _WaitForSync(self.lu, self.instance)
9645 # Check all devices manually
9646 self._CheckDevices(self.instance.primary_node, iv_names)
9648 # Step: remove old storage
9649 if not self.early_release:
9650 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9651 self._RemoveOldStorage(self.target_node, iv_names)
9654 class LURepairNodeStorage(NoHooksLU):
9655 """Repairs the volume group on a node.
9660 def CheckArguments(self):
9661 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9663 storage_type = self.op.storage_type
9665 if (constants.SO_FIX_CONSISTENCY not in
9666 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9667 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9668 " repaired" % storage_type,
9671 def ExpandNames(self):
9672 self.needed_locks = {
9673 locking.LEVEL_NODE: [self.op.node_name],
9676 def _CheckFaultyDisks(self, instance, node_name):
9677 """Ensure faulty disks abort the opcode or at least warn."""
9679 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9681 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9682 " node '%s'" % (instance.name, node_name),
9684 except errors.OpPrereqError, err:
9685 if self.op.ignore_consistency:
9686 self.proc.LogWarning(str(err.args[0]))
9690 def CheckPrereq(self):
9691 """Check prerequisites.
9694 # Check whether any instance on this node has faulty disks
9695 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9696 if not inst.admin_up:
9698 check_nodes = set(inst.all_nodes)
9699 check_nodes.discard(self.op.node_name)
9700 for inst_node_name in check_nodes:
9701 self._CheckFaultyDisks(inst, inst_node_name)
9703 def Exec(self, feedback_fn):
9704 feedback_fn("Repairing storage unit '%s' on %s ..." %
9705 (self.op.name, self.op.node_name))
9707 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9708 result = self.rpc.call_storage_execute(self.op.node_name,
9709 self.op.storage_type, st_args,
9711 constants.SO_FIX_CONSISTENCY)
9712 result.Raise("Failed to repair storage unit '%s' on %s" %
9713 (self.op.name, self.op.node_name))
9716 class LUNodeEvacStrategy(NoHooksLU):
9717 """Computes the node evacuation strategy.
9722 def CheckArguments(self):
9723 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9725 def ExpandNames(self):
9726 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9727 self.needed_locks = locks = {}
9728 if self.op.remote_node is None:
9729 locks[locking.LEVEL_NODE] = locking.ALL_SET
9731 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9732 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9734 def Exec(self, feedback_fn):
9736 for node in self.op.nodes:
9737 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9741 if self.op.remote_node is not None:
9744 if i.primary_node == self.op.remote_node:
9745 raise errors.OpPrereqError("Node %s is the primary node of"
9746 " instance %s, cannot use it as"
9748 (self.op.remote_node, i.name),
9750 result.append([i.name, self.op.remote_node])
9752 ial = IAllocator(self.cfg, self.rpc,
9753 mode=constants.IALLOCATOR_MODE_MEVAC,
9754 evac_nodes=self.op.nodes)
9755 ial.Run(self.op.iallocator, validate=True)
9757 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9763 class LUInstanceGrowDisk(LogicalUnit):
9764 """Grow a disk of an instance.
9768 HTYPE = constants.HTYPE_INSTANCE
9771 def ExpandNames(self):
9772 self._ExpandAndLockInstance()
9773 self.needed_locks[locking.LEVEL_NODE] = []
9774 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9776 def DeclareLocks(self, level):
9777 if level == locking.LEVEL_NODE:
9778 self._LockInstancesNodes()
9780 def BuildHooksEnv(self):
9783 This runs on the master, the primary and all the secondaries.
9787 "DISK": self.op.disk,
9788 "AMOUNT": self.op.amount,
9790 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9793 def BuildHooksNodes(self):
9794 """Build hooks nodes.
9797 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9800 def CheckPrereq(self):
9801 """Check prerequisites.
9803 This checks that the instance is in the cluster.
9806 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9807 assert instance is not None, \
9808 "Cannot retrieve locked instance %s" % self.op.instance_name
9809 nodenames = list(instance.all_nodes)
9810 for node in nodenames:
9811 _CheckNodeOnline(self, node)
9813 self.instance = instance
9815 if instance.disk_template not in constants.DTS_GROWABLE:
9816 raise errors.OpPrereqError("Instance's disk layout does not support"
9817 " growing", errors.ECODE_INVAL)
9819 self.disk = instance.FindDisk(self.op.disk)
9821 if instance.disk_template not in (constants.DT_FILE,
9822 constants.DT_SHARED_FILE):
9823 # TODO: check the free disk space for file, when that feature will be
9825 _CheckNodesFreeDiskPerVG(self, nodenames,
9826 self.disk.ComputeGrowth(self.op.amount))
9828 def Exec(self, feedback_fn):
9829 """Execute disk grow.
9832 instance = self.instance
9835 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9837 raise errors.OpExecError("Cannot activate block device to grow")
9839 # First run all grow ops in dry-run mode
9840 for node in instance.all_nodes:
9841 self.cfg.SetDiskID(disk, node)
9842 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9843 result.Raise("Grow request failed to node %s" % node)
9845 # We know that (as far as we can test) operations across different
9846 # nodes will succeed, time to run it for real
9847 for node in instance.all_nodes:
9848 self.cfg.SetDiskID(disk, node)
9849 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9850 result.Raise("Grow request failed to node %s" % node)
9852 # TODO: Rewrite code to work properly
9853 # DRBD goes into sync mode for a short amount of time after executing the
9854 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9855 # calling "resize" in sync mode fails. Sleeping for a short amount of
9856 # time is a work-around.
9859 disk.RecordGrow(self.op.amount)
9860 self.cfg.Update(instance, feedback_fn)
9861 if self.op.wait_for_sync:
9862 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9864 self.proc.LogWarning("Disk sync-ing has not returned a good"
9865 " status; please check the instance")
9866 if not instance.admin_up:
9867 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9868 elif not instance.admin_up:
9869 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9870 " not supposed to be running because no wait for"
9871 " sync mode was requested")
9874 class LUInstanceQueryData(NoHooksLU):
9875 """Query runtime instance data.
9880 def ExpandNames(self):
9881 self.needed_locks = {}
9883 # Use locking if requested or when non-static information is wanted
9884 if not (self.op.static or self.op.use_locking):
9885 self.LogWarning("Non-static data requested, locks need to be acquired")
9886 self.op.use_locking = True
9888 if self.op.instances or not self.op.use_locking:
9889 # Expand instance names right here
9890 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9892 # Will use acquired locks
9893 self.wanted_names = None
9895 if self.op.use_locking:
9896 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9898 if self.wanted_names is None:
9899 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9901 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9903 self.needed_locks[locking.LEVEL_NODE] = []
9904 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9905 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9907 def DeclareLocks(self, level):
9908 if self.op.use_locking and level == locking.LEVEL_NODE:
9909 self._LockInstancesNodes()
9911 def CheckPrereq(self):
9912 """Check prerequisites.
9914 This only checks the optional instance list against the existing names.
9917 if self.wanted_names is None:
9918 assert self.op.use_locking, "Locking was not used"
9919 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9921 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9922 for name in self.wanted_names]
9924 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9925 """Returns the status of a block device
9928 if self.op.static or not node:
9931 self.cfg.SetDiskID(dev, node)
9933 result = self.rpc.call_blockdev_find(node, dev)
9937 result.Raise("Can't compute disk status for %s" % instance_name)
9939 status = result.payload
9943 return (status.dev_path, status.major, status.minor,
9944 status.sync_percent, status.estimated_time,
9945 status.is_degraded, status.ldisk_status)
9947 def _ComputeDiskStatus(self, instance, snode, dev):
9948 """Compute block device status.
9951 if dev.dev_type in constants.LDS_DRBD:
9952 # we change the snode then (otherwise we use the one passed in)
9953 if dev.logical_id[0] == instance.primary_node:
9954 snode = dev.logical_id[1]
9956 snode = dev.logical_id[0]
9958 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9960 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9963 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9964 for child in dev.children]
9969 "iv_name": dev.iv_name,
9970 "dev_type": dev.dev_type,
9971 "logical_id": dev.logical_id,
9972 "physical_id": dev.physical_id,
9973 "pstatus": dev_pstatus,
9974 "sstatus": dev_sstatus,
9975 "children": dev_children,
9980 def Exec(self, feedback_fn):
9981 """Gather and return data"""
9984 cluster = self.cfg.GetClusterInfo()
9986 for instance in self.wanted_instances:
9987 if not self.op.static:
9988 remote_info = self.rpc.call_instance_info(instance.primary_node,
9990 instance.hypervisor)
9991 remote_info.Raise("Error checking node %s" % instance.primary_node)
9992 remote_info = remote_info.payload
9993 if remote_info and "state" in remote_info:
9996 remote_state = "down"
9999 if instance.admin_up:
10000 config_state = "up"
10002 config_state = "down"
10004 disks = [self._ComputeDiskStatus(instance, None, device)
10005 for device in instance.disks]
10007 result[instance.name] = {
10008 "name": instance.name,
10009 "config_state": config_state,
10010 "run_state": remote_state,
10011 "pnode": instance.primary_node,
10012 "snodes": instance.secondary_nodes,
10014 # this happens to be the same format used for hooks
10015 "nics": _NICListToTuple(self, instance.nics),
10016 "disk_template": instance.disk_template,
10018 "hypervisor": instance.hypervisor,
10019 "network_port": instance.network_port,
10020 "hv_instance": instance.hvparams,
10021 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10022 "be_instance": instance.beparams,
10023 "be_actual": cluster.FillBE(instance),
10024 "os_instance": instance.osparams,
10025 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10026 "serial_no": instance.serial_no,
10027 "mtime": instance.mtime,
10028 "ctime": instance.ctime,
10029 "uuid": instance.uuid,
10035 class LUInstanceSetParams(LogicalUnit):
10036 """Modifies an instances's parameters.
10039 HPATH = "instance-modify"
10040 HTYPE = constants.HTYPE_INSTANCE
10043 def CheckArguments(self):
10044 if not (self.op.nics or self.op.disks or self.op.disk_template or
10045 self.op.hvparams or self.op.beparams or self.op.os_name):
10046 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10048 if self.op.hvparams:
10049 _CheckGlobalHvParams(self.op.hvparams)
10053 for disk_op, disk_dict in self.op.disks:
10054 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10055 if disk_op == constants.DDM_REMOVE:
10056 disk_addremove += 1
10058 elif disk_op == constants.DDM_ADD:
10059 disk_addremove += 1
10061 if not isinstance(disk_op, int):
10062 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10063 if not isinstance(disk_dict, dict):
10064 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10065 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10067 if disk_op == constants.DDM_ADD:
10068 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10069 if mode not in constants.DISK_ACCESS_SET:
10070 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10071 errors.ECODE_INVAL)
10072 size = disk_dict.get(constants.IDISK_SIZE, None)
10074 raise errors.OpPrereqError("Required disk parameter size missing",
10075 errors.ECODE_INVAL)
10078 except (TypeError, ValueError), err:
10079 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10080 str(err), errors.ECODE_INVAL)
10081 disk_dict[constants.IDISK_SIZE] = size
10083 # modification of disk
10084 if constants.IDISK_SIZE in disk_dict:
10085 raise errors.OpPrereqError("Disk size change not possible, use"
10086 " grow-disk", errors.ECODE_INVAL)
10088 if disk_addremove > 1:
10089 raise errors.OpPrereqError("Only one disk add or remove operation"
10090 " supported at a time", errors.ECODE_INVAL)
10092 if self.op.disks and self.op.disk_template is not None:
10093 raise errors.OpPrereqError("Disk template conversion and other disk"
10094 " changes not supported at the same time",
10095 errors.ECODE_INVAL)
10097 if (self.op.disk_template and
10098 self.op.disk_template in constants.DTS_INT_MIRROR and
10099 self.op.remote_node is None):
10100 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10101 " one requires specifying a secondary node",
10102 errors.ECODE_INVAL)
10106 for nic_op, nic_dict in self.op.nics:
10107 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10108 if nic_op == constants.DDM_REMOVE:
10111 elif nic_op == constants.DDM_ADD:
10114 if not isinstance(nic_op, int):
10115 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10116 if not isinstance(nic_dict, dict):
10117 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10118 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10120 # nic_dict should be a dict
10121 nic_ip = nic_dict.get(constants.INIC_IP, None)
10122 if nic_ip is not None:
10123 if nic_ip.lower() == constants.VALUE_NONE:
10124 nic_dict[constants.INIC_IP] = None
10126 if not netutils.IPAddress.IsValid(nic_ip):
10127 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10128 errors.ECODE_INVAL)
10130 nic_bridge = nic_dict.get('bridge', None)
10131 nic_link = nic_dict.get(constants.INIC_LINK, None)
10132 if nic_bridge and nic_link:
10133 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10134 " at the same time", errors.ECODE_INVAL)
10135 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10136 nic_dict['bridge'] = None
10137 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10138 nic_dict[constants.INIC_LINK] = None
10140 if nic_op == constants.DDM_ADD:
10141 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10142 if nic_mac is None:
10143 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10145 if constants.INIC_MAC in nic_dict:
10146 nic_mac = nic_dict[constants.INIC_MAC]
10147 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10148 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10150 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10151 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10152 " modifying an existing nic",
10153 errors.ECODE_INVAL)
10155 if nic_addremove > 1:
10156 raise errors.OpPrereqError("Only one NIC add or remove operation"
10157 " supported at a time", errors.ECODE_INVAL)
10159 def ExpandNames(self):
10160 self._ExpandAndLockInstance()
10161 self.needed_locks[locking.LEVEL_NODE] = []
10162 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10164 def DeclareLocks(self, level):
10165 if level == locking.LEVEL_NODE:
10166 self._LockInstancesNodes()
10167 if self.op.disk_template and self.op.remote_node:
10168 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10169 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10171 def BuildHooksEnv(self):
10172 """Build hooks env.
10174 This runs on the master, primary and secondaries.
10178 if constants.BE_MEMORY in self.be_new:
10179 args['memory'] = self.be_new[constants.BE_MEMORY]
10180 if constants.BE_VCPUS in self.be_new:
10181 args['vcpus'] = self.be_new[constants.BE_VCPUS]
10182 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10183 # information at all.
10186 nic_override = dict(self.op.nics)
10187 for idx, nic in enumerate(self.instance.nics):
10188 if idx in nic_override:
10189 this_nic_override = nic_override[idx]
10191 this_nic_override = {}
10192 if constants.INIC_IP in this_nic_override:
10193 ip = this_nic_override[constants.INIC_IP]
10196 if constants.INIC_MAC in this_nic_override:
10197 mac = this_nic_override[constants.INIC_MAC]
10200 if idx in self.nic_pnew:
10201 nicparams = self.nic_pnew[idx]
10203 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10204 mode = nicparams[constants.NIC_MODE]
10205 link = nicparams[constants.NIC_LINK]
10206 args['nics'].append((ip, mac, mode, link))
10207 if constants.DDM_ADD in nic_override:
10208 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10209 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10210 nicparams = self.nic_pnew[constants.DDM_ADD]
10211 mode = nicparams[constants.NIC_MODE]
10212 link = nicparams[constants.NIC_LINK]
10213 args['nics'].append((ip, mac, mode, link))
10214 elif constants.DDM_REMOVE in nic_override:
10215 del args['nics'][-1]
10217 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10218 if self.op.disk_template:
10219 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10223 def BuildHooksNodes(self):
10224 """Build hooks nodes.
10227 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10230 def CheckPrereq(self):
10231 """Check prerequisites.
10233 This only checks the instance list against the existing names.
10236 # checking the new params on the primary/secondary nodes
10238 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10239 cluster = self.cluster = self.cfg.GetClusterInfo()
10240 assert self.instance is not None, \
10241 "Cannot retrieve locked instance %s" % self.op.instance_name
10242 pnode = instance.primary_node
10243 nodelist = list(instance.all_nodes)
10246 if self.op.os_name and not self.op.force:
10247 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10248 self.op.force_variant)
10249 instance_os = self.op.os_name
10251 instance_os = instance.os
10253 if self.op.disk_template:
10254 if instance.disk_template == self.op.disk_template:
10255 raise errors.OpPrereqError("Instance already has disk template %s" %
10256 instance.disk_template, errors.ECODE_INVAL)
10258 if (instance.disk_template,
10259 self.op.disk_template) not in self._DISK_CONVERSIONS:
10260 raise errors.OpPrereqError("Unsupported disk template conversion from"
10261 " %s to %s" % (instance.disk_template,
10262 self.op.disk_template),
10263 errors.ECODE_INVAL)
10264 _CheckInstanceDown(self, instance, "cannot change disk template")
10265 if self.op.disk_template in constants.DTS_INT_MIRROR:
10266 if self.op.remote_node == pnode:
10267 raise errors.OpPrereqError("Given new secondary node %s is the same"
10268 " as the primary node of the instance" %
10269 self.op.remote_node, errors.ECODE_STATE)
10270 _CheckNodeOnline(self, self.op.remote_node)
10271 _CheckNodeNotDrained(self, self.op.remote_node)
10272 # FIXME: here we assume that the old instance type is DT_PLAIN
10273 assert instance.disk_template == constants.DT_PLAIN
10274 disks = [{constants.IDISK_SIZE: d.size,
10275 constants.IDISK_VG: d.logical_id[0]}
10276 for d in instance.disks]
10277 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10278 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10280 # hvparams processing
10281 if self.op.hvparams:
10282 hv_type = instance.hypervisor
10283 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10284 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10285 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10288 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10289 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10290 self.hv_new = hv_new # the new actual values
10291 self.hv_inst = i_hvdict # the new dict (without defaults)
10293 self.hv_new = self.hv_inst = {}
10295 # beparams processing
10296 if self.op.beparams:
10297 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10299 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10300 be_new = cluster.SimpleFillBE(i_bedict)
10301 self.be_new = be_new # the new actual values
10302 self.be_inst = i_bedict # the new dict (without defaults)
10304 self.be_new = self.be_inst = {}
10305 be_old = cluster.FillBE(instance)
10307 # osparams processing
10308 if self.op.osparams:
10309 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10310 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10311 self.os_inst = i_osdict # the new dict (without defaults)
10317 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10318 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10319 mem_check_list = [pnode]
10320 if be_new[constants.BE_AUTO_BALANCE]:
10321 # either we changed auto_balance to yes or it was from before
10322 mem_check_list.extend(instance.secondary_nodes)
10323 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10324 instance.hypervisor)
10325 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10326 instance.hypervisor)
10327 pninfo = nodeinfo[pnode]
10328 msg = pninfo.fail_msg
10330 # Assume the primary node is unreachable and go ahead
10331 self.warn.append("Can't get info from primary node %s: %s" %
10333 elif not isinstance(pninfo.payload.get('memory_free', None), int):
10334 self.warn.append("Node data from primary node %s doesn't contain"
10335 " free memory information" % pnode)
10336 elif instance_info.fail_msg:
10337 self.warn.append("Can't get instance runtime information: %s" %
10338 instance_info.fail_msg)
10340 if instance_info.payload:
10341 current_mem = int(instance_info.payload['memory'])
10343 # Assume instance not running
10344 # (there is a slight race condition here, but it's not very probable,
10345 # and we have no other way to check)
10347 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10348 pninfo.payload['memory_free'])
10350 raise errors.OpPrereqError("This change will prevent the instance"
10351 " from starting, due to %d MB of memory"
10352 " missing on its primary node" % miss_mem,
10353 errors.ECODE_NORES)
10355 if be_new[constants.BE_AUTO_BALANCE]:
10356 for node, nres in nodeinfo.items():
10357 if node not in instance.secondary_nodes:
10359 nres.Raise("Can't get info from secondary node %s" % node,
10360 prereq=True, ecode=errors.ECODE_STATE)
10361 if not isinstance(nres.payload.get('memory_free', None), int):
10362 raise errors.OpPrereqError("Secondary node %s didn't return free"
10363 " memory information" % node,
10364 errors.ECODE_STATE)
10365 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10366 raise errors.OpPrereqError("This change will prevent the instance"
10367 " from failover to its secondary node"
10368 " %s, due to not enough memory" % node,
10369 errors.ECODE_STATE)
10373 self.nic_pinst = {}
10374 for nic_op, nic_dict in self.op.nics:
10375 if nic_op == constants.DDM_REMOVE:
10376 if not instance.nics:
10377 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10378 errors.ECODE_INVAL)
10380 if nic_op != constants.DDM_ADD:
10382 if not instance.nics:
10383 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10384 " no NICs" % nic_op,
10385 errors.ECODE_INVAL)
10386 if nic_op < 0 or nic_op >= len(instance.nics):
10387 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10389 (nic_op, len(instance.nics) - 1),
10390 errors.ECODE_INVAL)
10391 old_nic_params = instance.nics[nic_op].nicparams
10392 old_nic_ip = instance.nics[nic_op].ip
10394 old_nic_params = {}
10397 update_params_dict = dict([(key, nic_dict[key])
10398 for key in constants.NICS_PARAMETERS
10399 if key in nic_dict])
10401 if 'bridge' in nic_dict:
10402 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10404 new_nic_params = _GetUpdatedParams(old_nic_params,
10405 update_params_dict)
10406 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10407 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10408 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10409 self.nic_pinst[nic_op] = new_nic_params
10410 self.nic_pnew[nic_op] = new_filled_nic_params
10411 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10413 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10414 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10415 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10417 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10419 self.warn.append(msg)
10421 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10422 if new_nic_mode == constants.NIC_MODE_ROUTED:
10423 if constants.INIC_IP in nic_dict:
10424 nic_ip = nic_dict[constants.INIC_IP]
10426 nic_ip = old_nic_ip
10428 raise errors.OpPrereqError('Cannot set the nic ip to None'
10429 ' on a routed nic', errors.ECODE_INVAL)
10430 if constants.INIC_MAC in nic_dict:
10431 nic_mac = nic_dict[constants.INIC_MAC]
10432 if nic_mac is None:
10433 raise errors.OpPrereqError('Cannot set the nic mac to None',
10434 errors.ECODE_INVAL)
10435 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10436 # otherwise generate the mac
10437 nic_dict[constants.INIC_MAC] = \
10438 self.cfg.GenerateMAC(self.proc.GetECId())
10440 # or validate/reserve the current one
10442 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10443 except errors.ReservationError:
10444 raise errors.OpPrereqError("MAC address %s already in use"
10445 " in cluster" % nic_mac,
10446 errors.ECODE_NOTUNIQUE)
10449 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10450 raise errors.OpPrereqError("Disk operations not supported for"
10451 " diskless instances",
10452 errors.ECODE_INVAL)
10453 for disk_op, _ in self.op.disks:
10454 if disk_op == constants.DDM_REMOVE:
10455 if len(instance.disks) == 1:
10456 raise errors.OpPrereqError("Cannot remove the last disk of"
10457 " an instance", errors.ECODE_INVAL)
10458 _CheckInstanceDown(self, instance, "cannot remove disks")
10460 if (disk_op == constants.DDM_ADD and
10461 len(instance.disks) >= constants.MAX_DISKS):
10462 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10463 " add more" % constants.MAX_DISKS,
10464 errors.ECODE_STATE)
10465 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10467 if disk_op < 0 or disk_op >= len(instance.disks):
10468 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10470 (disk_op, len(instance.disks)),
10471 errors.ECODE_INVAL)
10475 def _ConvertPlainToDrbd(self, feedback_fn):
10476 """Converts an instance from plain to drbd.
10479 feedback_fn("Converting template to drbd")
10480 instance = self.instance
10481 pnode = instance.primary_node
10482 snode = self.op.remote_node
10484 # create a fake disk info for _GenerateDiskTemplate
10485 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10486 constants.IDISK_VG: d.logical_id[0]}
10487 for d in instance.disks]
10488 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10489 instance.name, pnode, [snode],
10490 disk_info, None, None, 0, feedback_fn)
10491 info = _GetInstanceInfoText(instance)
10492 feedback_fn("Creating aditional volumes...")
10493 # first, create the missing data and meta devices
10494 for disk in new_disks:
10495 # unfortunately this is... not too nice
10496 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10498 for child in disk.children:
10499 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10500 # at this stage, all new LVs have been created, we can rename the
10502 feedback_fn("Renaming original volumes...")
10503 rename_list = [(o, n.children[0].logical_id)
10504 for (o, n) in zip(instance.disks, new_disks)]
10505 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10506 result.Raise("Failed to rename original LVs")
10508 feedback_fn("Initializing DRBD devices...")
10509 # all child devices are in place, we can now create the DRBD devices
10510 for disk in new_disks:
10511 for node in [pnode, snode]:
10512 f_create = node == pnode
10513 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10515 # at this point, the instance has been modified
10516 instance.disk_template = constants.DT_DRBD8
10517 instance.disks = new_disks
10518 self.cfg.Update(instance, feedback_fn)
10520 # disks are created, waiting for sync
10521 disk_abort = not _WaitForSync(self, instance,
10522 oneshot=not self.op.wait_for_sync)
10524 raise errors.OpExecError("There are some degraded disks for"
10525 " this instance, please cleanup manually")
10527 def _ConvertDrbdToPlain(self, feedback_fn):
10528 """Converts an instance from drbd to plain.
10531 instance = self.instance
10532 assert len(instance.secondary_nodes) == 1
10533 pnode = instance.primary_node
10534 snode = instance.secondary_nodes[0]
10535 feedback_fn("Converting template to plain")
10537 old_disks = instance.disks
10538 new_disks = [d.children[0] for d in old_disks]
10540 # copy over size and mode
10541 for parent, child in zip(old_disks, new_disks):
10542 child.size = parent.size
10543 child.mode = parent.mode
10545 # update instance structure
10546 instance.disks = new_disks
10547 instance.disk_template = constants.DT_PLAIN
10548 self.cfg.Update(instance, feedback_fn)
10550 feedback_fn("Removing volumes on the secondary node...")
10551 for disk in old_disks:
10552 self.cfg.SetDiskID(disk, snode)
10553 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10555 self.LogWarning("Could not remove block device %s on node %s,"
10556 " continuing anyway: %s", disk.iv_name, snode, msg)
10558 feedback_fn("Removing unneeded volumes on the primary node...")
10559 for idx, disk in enumerate(old_disks):
10560 meta = disk.children[1]
10561 self.cfg.SetDiskID(meta, pnode)
10562 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10564 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10565 " continuing anyway: %s", idx, pnode, msg)
10567 def Exec(self, feedback_fn):
10568 """Modifies an instance.
10570 All parameters take effect only at the next restart of the instance.
10573 # Process here the warnings from CheckPrereq, as we don't have a
10574 # feedback_fn there.
10575 for warn in self.warn:
10576 feedback_fn("WARNING: %s" % warn)
10579 instance = self.instance
10581 for disk_op, disk_dict in self.op.disks:
10582 if disk_op == constants.DDM_REMOVE:
10583 # remove the last disk
10584 device = instance.disks.pop()
10585 device_idx = len(instance.disks)
10586 for node, disk in device.ComputeNodeTree(instance.primary_node):
10587 self.cfg.SetDiskID(disk, node)
10588 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10590 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10591 " continuing anyway", device_idx, node, msg)
10592 result.append(("disk/%d" % device_idx, "remove"))
10593 elif disk_op == constants.DDM_ADD:
10595 if instance.disk_template in (constants.DT_FILE,
10596 constants.DT_SHARED_FILE):
10597 file_driver, file_path = instance.disks[0].logical_id
10598 file_path = os.path.dirname(file_path)
10600 file_driver = file_path = None
10601 disk_idx_base = len(instance.disks)
10602 new_disk = _GenerateDiskTemplate(self,
10603 instance.disk_template,
10604 instance.name, instance.primary_node,
10605 instance.secondary_nodes,
10609 disk_idx_base, feedback_fn)[0]
10610 instance.disks.append(new_disk)
10611 info = _GetInstanceInfoText(instance)
10613 logging.info("Creating volume %s for instance %s",
10614 new_disk.iv_name, instance.name)
10615 # Note: this needs to be kept in sync with _CreateDisks
10617 for node in instance.all_nodes:
10618 f_create = node == instance.primary_node
10620 _CreateBlockDev(self, node, instance, new_disk,
10621 f_create, info, f_create)
10622 except errors.OpExecError, err:
10623 self.LogWarning("Failed to create volume %s (%s) on"
10625 new_disk.iv_name, new_disk, node, err)
10626 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10627 (new_disk.size, new_disk.mode)))
10629 # change a given disk
10630 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10631 result.append(("disk.mode/%d" % disk_op,
10632 disk_dict[constants.IDISK_MODE]))
10634 if self.op.disk_template:
10635 r_shut = _ShutdownInstanceDisks(self, instance)
10637 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10638 " proceed with disk template conversion")
10639 mode = (instance.disk_template, self.op.disk_template)
10641 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10643 self.cfg.ReleaseDRBDMinors(instance.name)
10645 result.append(("disk_template", self.op.disk_template))
10648 for nic_op, nic_dict in self.op.nics:
10649 if nic_op == constants.DDM_REMOVE:
10650 # remove the last nic
10651 del instance.nics[-1]
10652 result.append(("nic.%d" % len(instance.nics), "remove"))
10653 elif nic_op == constants.DDM_ADD:
10654 # mac and bridge should be set, by now
10655 mac = nic_dict[constants.INIC_MAC]
10656 ip = nic_dict.get(constants.INIC_IP, None)
10657 nicparams = self.nic_pinst[constants.DDM_ADD]
10658 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10659 instance.nics.append(new_nic)
10660 result.append(("nic.%d" % (len(instance.nics) - 1),
10661 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10662 (new_nic.mac, new_nic.ip,
10663 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10664 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10667 for key in (constants.INIC_MAC, constants.INIC_IP):
10668 if key in nic_dict:
10669 setattr(instance.nics[nic_op], key, nic_dict[key])
10670 if nic_op in self.nic_pinst:
10671 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10672 for key, val in nic_dict.iteritems():
10673 result.append(("nic.%s/%d" % (key, nic_op), val))
10676 if self.op.hvparams:
10677 instance.hvparams = self.hv_inst
10678 for key, val in self.op.hvparams.iteritems():
10679 result.append(("hv/%s" % key, val))
10682 if self.op.beparams:
10683 instance.beparams = self.be_inst
10684 for key, val in self.op.beparams.iteritems():
10685 result.append(("be/%s" % key, val))
10688 if self.op.os_name:
10689 instance.os = self.op.os_name
10692 if self.op.osparams:
10693 instance.osparams = self.os_inst
10694 for key, val in self.op.osparams.iteritems():
10695 result.append(("os/%s" % key, val))
10697 self.cfg.Update(instance, feedback_fn)
10701 _DISK_CONVERSIONS = {
10702 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10703 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10707 class LUBackupQuery(NoHooksLU):
10708 """Query the exports list
10713 def ExpandNames(self):
10714 self.needed_locks = {}
10715 self.share_locks[locking.LEVEL_NODE] = 1
10716 if not self.op.nodes:
10717 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10719 self.needed_locks[locking.LEVEL_NODE] = \
10720 _GetWantedNodes(self, self.op.nodes)
10722 def Exec(self, feedback_fn):
10723 """Compute the list of all the exported system images.
10726 @return: a dictionary with the structure node->(export-list)
10727 where export-list is a list of the instances exported on
10731 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10732 rpcresult = self.rpc.call_export_list(self.nodes)
10734 for node in rpcresult:
10735 if rpcresult[node].fail_msg:
10736 result[node] = False
10738 result[node] = rpcresult[node].payload
10743 class LUBackupPrepare(NoHooksLU):
10744 """Prepares an instance for an export and returns useful information.
10749 def ExpandNames(self):
10750 self._ExpandAndLockInstance()
10752 def CheckPrereq(self):
10753 """Check prerequisites.
10756 instance_name = self.op.instance_name
10758 self.instance = self.cfg.GetInstanceInfo(instance_name)
10759 assert self.instance is not None, \
10760 "Cannot retrieve locked instance %s" % self.op.instance_name
10761 _CheckNodeOnline(self, self.instance.primary_node)
10763 self._cds = _GetClusterDomainSecret()
10765 def Exec(self, feedback_fn):
10766 """Prepares an instance for an export.
10769 instance = self.instance
10771 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10772 salt = utils.GenerateSecret(8)
10774 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10775 result = self.rpc.call_x509_cert_create(instance.primary_node,
10776 constants.RIE_CERT_VALIDITY)
10777 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10779 (name, cert_pem) = result.payload
10781 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10785 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10786 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10788 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10794 class LUBackupExport(LogicalUnit):
10795 """Export an instance to an image in the cluster.
10798 HPATH = "instance-export"
10799 HTYPE = constants.HTYPE_INSTANCE
10802 def CheckArguments(self):
10803 """Check the arguments.
10806 self.x509_key_name = self.op.x509_key_name
10807 self.dest_x509_ca_pem = self.op.destination_x509_ca
10809 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10810 if not self.x509_key_name:
10811 raise errors.OpPrereqError("Missing X509 key name for encryption",
10812 errors.ECODE_INVAL)
10814 if not self.dest_x509_ca_pem:
10815 raise errors.OpPrereqError("Missing destination X509 CA",
10816 errors.ECODE_INVAL)
10818 def ExpandNames(self):
10819 self._ExpandAndLockInstance()
10821 # Lock all nodes for local exports
10822 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10823 # FIXME: lock only instance primary and destination node
10825 # Sad but true, for now we have do lock all nodes, as we don't know where
10826 # the previous export might be, and in this LU we search for it and
10827 # remove it from its current node. In the future we could fix this by:
10828 # - making a tasklet to search (share-lock all), then create the
10829 # new one, then one to remove, after
10830 # - removing the removal operation altogether
10831 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10833 def DeclareLocks(self, level):
10834 """Last minute lock declaration."""
10835 # All nodes are locked anyway, so nothing to do here.
10837 def BuildHooksEnv(self):
10838 """Build hooks env.
10840 This will run on the master, primary node and target node.
10844 "EXPORT_MODE": self.op.mode,
10845 "EXPORT_NODE": self.op.target_node,
10846 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10847 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10848 # TODO: Generic function for boolean env variables
10849 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10852 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10856 def BuildHooksNodes(self):
10857 """Build hooks nodes.
10860 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10862 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10863 nl.append(self.op.target_node)
10867 def CheckPrereq(self):
10868 """Check prerequisites.
10870 This checks that the instance and node names are valid.
10873 instance_name = self.op.instance_name
10875 self.instance = self.cfg.GetInstanceInfo(instance_name)
10876 assert self.instance is not None, \
10877 "Cannot retrieve locked instance %s" % self.op.instance_name
10878 _CheckNodeOnline(self, self.instance.primary_node)
10880 if (self.op.remove_instance and self.instance.admin_up and
10881 not self.op.shutdown):
10882 raise errors.OpPrereqError("Can not remove instance without shutting it"
10885 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10886 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10887 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10888 assert self.dst_node is not None
10890 _CheckNodeOnline(self, self.dst_node.name)
10891 _CheckNodeNotDrained(self, self.dst_node.name)
10894 self.dest_disk_info = None
10895 self.dest_x509_ca = None
10897 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10898 self.dst_node = None
10900 if len(self.op.target_node) != len(self.instance.disks):
10901 raise errors.OpPrereqError(("Received destination information for %s"
10902 " disks, but instance %s has %s disks") %
10903 (len(self.op.target_node), instance_name,
10904 len(self.instance.disks)),
10905 errors.ECODE_INVAL)
10907 cds = _GetClusterDomainSecret()
10909 # Check X509 key name
10911 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10912 except (TypeError, ValueError), err:
10913 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10915 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10916 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10917 errors.ECODE_INVAL)
10919 # Load and verify CA
10921 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10922 except OpenSSL.crypto.Error, err:
10923 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10924 (err, ), errors.ECODE_INVAL)
10926 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10927 if errcode is not None:
10928 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10929 (msg, ), errors.ECODE_INVAL)
10931 self.dest_x509_ca = cert
10933 # Verify target information
10935 for idx, disk_data in enumerate(self.op.target_node):
10937 (host, port, magic) = \
10938 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10939 except errors.GenericError, err:
10940 raise errors.OpPrereqError("Target info for disk %s: %s" %
10941 (idx, err), errors.ECODE_INVAL)
10943 disk_info.append((host, port, magic))
10945 assert len(disk_info) == len(self.op.target_node)
10946 self.dest_disk_info = disk_info
10949 raise errors.ProgrammerError("Unhandled export mode %r" %
10952 # instance disk type verification
10953 # TODO: Implement export support for file-based disks
10954 for disk in self.instance.disks:
10955 if disk.dev_type == constants.LD_FILE:
10956 raise errors.OpPrereqError("Export not supported for instances with"
10957 " file-based disks", errors.ECODE_INVAL)
10959 def _CleanupExports(self, feedback_fn):
10960 """Removes exports of current instance from all other nodes.
10962 If an instance in a cluster with nodes A..D was exported to node C, its
10963 exports will be removed from the nodes A, B and D.
10966 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10968 nodelist = self.cfg.GetNodeList()
10969 nodelist.remove(self.dst_node.name)
10971 # on one-node clusters nodelist will be empty after the removal
10972 # if we proceed the backup would be removed because OpBackupQuery
10973 # substitutes an empty list with the full cluster node list.
10974 iname = self.instance.name
10976 feedback_fn("Removing old exports for instance %s" % iname)
10977 exportlist = self.rpc.call_export_list(nodelist)
10978 for node in exportlist:
10979 if exportlist[node].fail_msg:
10981 if iname in exportlist[node].payload:
10982 msg = self.rpc.call_export_remove(node, iname).fail_msg
10984 self.LogWarning("Could not remove older export for instance %s"
10985 " on node %s: %s", iname, node, msg)
10987 def Exec(self, feedback_fn):
10988 """Export an instance to an image in the cluster.
10991 assert self.op.mode in constants.EXPORT_MODES
10993 instance = self.instance
10994 src_node = instance.primary_node
10996 if self.op.shutdown:
10997 # shutdown the instance, but not the disks
10998 feedback_fn("Shutting down instance %s" % instance.name)
10999 result = self.rpc.call_instance_shutdown(src_node, instance,
11000 self.op.shutdown_timeout)
11001 # TODO: Maybe ignore failures if ignore_remove_failures is set
11002 result.Raise("Could not shutdown instance %s on"
11003 " node %s" % (instance.name, src_node))
11005 # set the disks ID correctly since call_instance_start needs the
11006 # correct drbd minor to create the symlinks
11007 for disk in instance.disks:
11008 self.cfg.SetDiskID(disk, src_node)
11010 activate_disks = (not instance.admin_up)
11013 # Activate the instance disks if we'exporting a stopped instance
11014 feedback_fn("Activating disks for %s" % instance.name)
11015 _StartInstanceDisks(self, instance, None)
11018 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11021 helper.CreateSnapshots()
11023 if (self.op.shutdown and instance.admin_up and
11024 not self.op.remove_instance):
11025 assert not activate_disks
11026 feedback_fn("Starting instance %s" % instance.name)
11027 result = self.rpc.call_instance_start(src_node, instance, None, None)
11028 msg = result.fail_msg
11030 feedback_fn("Failed to start instance: %s" % msg)
11031 _ShutdownInstanceDisks(self, instance)
11032 raise errors.OpExecError("Could not start instance: %s" % msg)
11034 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11035 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11036 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11037 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11038 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11040 (key_name, _, _) = self.x509_key_name
11043 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11046 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11047 key_name, dest_ca_pem,
11052 # Check for backwards compatibility
11053 assert len(dresults) == len(instance.disks)
11054 assert compat.all(isinstance(i, bool) for i in dresults), \
11055 "Not all results are boolean: %r" % dresults
11059 feedback_fn("Deactivating disks for %s" % instance.name)
11060 _ShutdownInstanceDisks(self, instance)
11062 if not (compat.all(dresults) and fin_resu):
11065 failures.append("export finalization")
11066 if not compat.all(dresults):
11067 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11069 failures.append("disk export: disk(s) %s" % fdsk)
11071 raise errors.OpExecError("Export failed, errors in %s" %
11072 utils.CommaJoin(failures))
11074 # At this point, the export was successful, we can cleanup/finish
11076 # Remove instance if requested
11077 if self.op.remove_instance:
11078 feedback_fn("Removing instance %s" % instance.name)
11079 _RemoveInstance(self, feedback_fn, instance,
11080 self.op.ignore_remove_failures)
11082 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11083 self._CleanupExports(feedback_fn)
11085 return fin_resu, dresults
11088 class LUBackupRemove(NoHooksLU):
11089 """Remove exports related to the named instance.
11094 def ExpandNames(self):
11095 self.needed_locks = {}
11096 # We need all nodes to be locked in order for RemoveExport to work, but we
11097 # don't need to lock the instance itself, as nothing will happen to it (and
11098 # we can remove exports also for a removed instance)
11099 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11101 def Exec(self, feedback_fn):
11102 """Remove any export.
11105 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11106 # If the instance was not found we'll try with the name that was passed in.
11107 # This will only work if it was an FQDN, though.
11109 if not instance_name:
11111 instance_name = self.op.instance_name
11113 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11114 exportlist = self.rpc.call_export_list(locked_nodes)
11116 for node in exportlist:
11117 msg = exportlist[node].fail_msg
11119 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11121 if instance_name in exportlist[node].payload:
11123 result = self.rpc.call_export_remove(node, instance_name)
11124 msg = result.fail_msg
11126 logging.error("Could not remove export for instance %s"
11127 " on node %s: %s", instance_name, node, msg)
11129 if fqdn_warn and not found:
11130 feedback_fn("Export not found. If trying to remove an export belonging"
11131 " to a deleted instance please use its Fully Qualified"
11135 class LUGroupAdd(LogicalUnit):
11136 """Logical unit for creating node groups.
11139 HPATH = "group-add"
11140 HTYPE = constants.HTYPE_GROUP
11143 def ExpandNames(self):
11144 # We need the new group's UUID here so that we can create and acquire the
11145 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11146 # that it should not check whether the UUID exists in the configuration.
11147 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11148 self.needed_locks = {}
11149 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11151 def CheckPrereq(self):
11152 """Check prerequisites.
11154 This checks that the given group name is not an existing node group
11159 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11160 except errors.OpPrereqError:
11163 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11164 " node group (UUID: %s)" %
11165 (self.op.group_name, existing_uuid),
11166 errors.ECODE_EXISTS)
11168 if self.op.ndparams:
11169 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11171 def BuildHooksEnv(self):
11172 """Build hooks env.
11176 "GROUP_NAME": self.op.group_name,
11179 def BuildHooksNodes(self):
11180 """Build hooks nodes.
11183 mn = self.cfg.GetMasterNode()
11184 return ([mn], [mn])
11186 def Exec(self, feedback_fn):
11187 """Add the node group to the cluster.
11190 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11191 uuid=self.group_uuid,
11192 alloc_policy=self.op.alloc_policy,
11193 ndparams=self.op.ndparams)
11195 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11196 del self.remove_locks[locking.LEVEL_NODEGROUP]
11199 class LUGroupAssignNodes(NoHooksLU):
11200 """Logical unit for assigning nodes to groups.
11205 def ExpandNames(self):
11206 # These raise errors.OpPrereqError on their own:
11207 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11208 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11210 # We want to lock all the affected nodes and groups. We have readily
11211 # available the list of nodes, and the *destination* group. To gather the
11212 # list of "source" groups, we need to fetch node information later on.
11213 self.needed_locks = {
11214 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11215 locking.LEVEL_NODE: self.op.nodes,
11218 def DeclareLocks(self, level):
11219 if level == locking.LEVEL_NODEGROUP:
11220 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11222 # Try to get all affected nodes' groups without having the group or node
11223 # lock yet. Needs verification later in the code flow.
11224 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11226 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11228 def CheckPrereq(self):
11229 """Check prerequisites.
11232 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11233 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11234 frozenset(self.op.nodes))
11236 expected_locks = (set([self.group_uuid]) |
11237 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11238 actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11239 if actual_locks != expected_locks:
11240 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11241 " current groups are '%s', used to be '%s'" %
11242 (utils.CommaJoin(expected_locks),
11243 utils.CommaJoin(actual_locks)))
11245 self.node_data = self.cfg.GetAllNodesInfo()
11246 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11247 instance_data = self.cfg.GetAllInstancesInfo()
11249 if self.group is None:
11250 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11251 (self.op.group_name, self.group_uuid))
11253 (new_splits, previous_splits) = \
11254 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11255 for node in self.op.nodes],
11256 self.node_data, instance_data)
11259 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11261 if not self.op.force:
11262 raise errors.OpExecError("The following instances get split by this"
11263 " change and --force was not given: %s" %
11266 self.LogWarning("This operation will split the following instances: %s",
11269 if previous_splits:
11270 self.LogWarning("In addition, these already-split instances continue"
11271 " to be split across groups: %s",
11272 utils.CommaJoin(utils.NiceSort(previous_splits)))
11274 def Exec(self, feedback_fn):
11275 """Assign nodes to a new group.
11278 for node in self.op.nodes:
11279 self.node_data[node].group = self.group_uuid
11281 # FIXME: Depends on side-effects of modifying the result of
11282 # C{cfg.GetAllNodesInfo}
11284 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11287 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11288 """Check for split instances after a node assignment.
11290 This method considers a series of node assignments as an atomic operation,
11291 and returns information about split instances after applying the set of
11294 In particular, it returns information about newly split instances, and
11295 instances that were already split, and remain so after the change.
11297 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11300 @type changes: list of (node_name, new_group_uuid) pairs.
11301 @param changes: list of node assignments to consider.
11302 @param node_data: a dict with data for all nodes
11303 @param instance_data: a dict with all instances to consider
11304 @rtype: a two-tuple
11305 @return: a list of instances that were previously okay and result split as a
11306 consequence of this change, and a list of instances that were previously
11307 split and this change does not fix.
11310 changed_nodes = dict((node, group) for node, group in changes
11311 if node_data[node].group != group)
11313 all_split_instances = set()
11314 previously_split_instances = set()
11316 def InstanceNodes(instance):
11317 return [instance.primary_node] + list(instance.secondary_nodes)
11319 for inst in instance_data.values():
11320 if inst.disk_template not in constants.DTS_INT_MIRROR:
11323 instance_nodes = InstanceNodes(inst)
11325 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11326 previously_split_instances.add(inst.name)
11328 if len(set(changed_nodes.get(node, node_data[node].group)
11329 for node in instance_nodes)) > 1:
11330 all_split_instances.add(inst.name)
11332 return (list(all_split_instances - previously_split_instances),
11333 list(previously_split_instances & all_split_instances))
11336 class _GroupQuery(_QueryBase):
11337 FIELDS = query.GROUP_FIELDS
11339 def ExpandNames(self, lu):
11340 lu.needed_locks = {}
11342 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11343 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11346 self.wanted = [name_to_uuid[name]
11347 for name in utils.NiceSort(name_to_uuid.keys())]
11349 # Accept names to be either names or UUIDs.
11352 all_uuid = frozenset(self._all_groups.keys())
11354 for name in self.names:
11355 if name in all_uuid:
11356 self.wanted.append(name)
11357 elif name in name_to_uuid:
11358 self.wanted.append(name_to_uuid[name])
11360 missing.append(name)
11363 raise errors.OpPrereqError("Some groups do not exist: %s" %
11364 utils.CommaJoin(missing),
11365 errors.ECODE_NOENT)
11367 def DeclareLocks(self, lu, level):
11370 def _GetQueryData(self, lu):
11371 """Computes the list of node groups and their attributes.
11374 do_nodes = query.GQ_NODE in self.requested_data
11375 do_instances = query.GQ_INST in self.requested_data
11377 group_to_nodes = None
11378 group_to_instances = None
11380 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11381 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11382 # latter GetAllInstancesInfo() is not enough, for we have to go through
11383 # instance->node. Hence, we will need to process nodes even if we only need
11384 # instance information.
11385 if do_nodes or do_instances:
11386 all_nodes = lu.cfg.GetAllNodesInfo()
11387 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11390 for node in all_nodes.values():
11391 if node.group in group_to_nodes:
11392 group_to_nodes[node.group].append(node.name)
11393 node_to_group[node.name] = node.group
11396 all_instances = lu.cfg.GetAllInstancesInfo()
11397 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11399 for instance in all_instances.values():
11400 node = instance.primary_node
11401 if node in node_to_group:
11402 group_to_instances[node_to_group[node]].append(instance.name)
11405 # Do not pass on node information if it was not requested.
11406 group_to_nodes = None
11408 return query.GroupQueryData([self._all_groups[uuid]
11409 for uuid in self.wanted],
11410 group_to_nodes, group_to_instances)
11413 class LUGroupQuery(NoHooksLU):
11414 """Logical unit for querying node groups.
11419 def CheckArguments(self):
11420 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11421 self.op.output_fields, False)
11423 def ExpandNames(self):
11424 self.gq.ExpandNames(self)
11426 def Exec(self, feedback_fn):
11427 return self.gq.OldStyleQuery(self)
11430 class LUGroupSetParams(LogicalUnit):
11431 """Modifies the parameters of a node group.
11434 HPATH = "group-modify"
11435 HTYPE = constants.HTYPE_GROUP
11438 def CheckArguments(self):
11441 self.op.alloc_policy,
11444 if all_changes.count(None) == len(all_changes):
11445 raise errors.OpPrereqError("Please pass at least one modification",
11446 errors.ECODE_INVAL)
11448 def ExpandNames(self):
11449 # This raises errors.OpPrereqError on its own:
11450 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11452 self.needed_locks = {
11453 locking.LEVEL_NODEGROUP: [self.group_uuid],
11456 def CheckPrereq(self):
11457 """Check prerequisites.
11460 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11462 if self.group is None:
11463 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11464 (self.op.group_name, self.group_uuid))
11466 if self.op.ndparams:
11467 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11468 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11469 self.new_ndparams = new_ndparams
11471 def BuildHooksEnv(self):
11472 """Build hooks env.
11476 "GROUP_NAME": self.op.group_name,
11477 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11480 def BuildHooksNodes(self):
11481 """Build hooks nodes.
11484 mn = self.cfg.GetMasterNode()
11485 return ([mn], [mn])
11487 def Exec(self, feedback_fn):
11488 """Modifies the node group.
11493 if self.op.ndparams:
11494 self.group.ndparams = self.new_ndparams
11495 result.append(("ndparams", str(self.group.ndparams)))
11497 if self.op.alloc_policy:
11498 self.group.alloc_policy = self.op.alloc_policy
11500 self.cfg.Update(self.group, feedback_fn)
11505 class LUGroupRemove(LogicalUnit):
11506 HPATH = "group-remove"
11507 HTYPE = constants.HTYPE_GROUP
11510 def ExpandNames(self):
11511 # This will raises errors.OpPrereqError on its own:
11512 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11513 self.needed_locks = {
11514 locking.LEVEL_NODEGROUP: [self.group_uuid],
11517 def CheckPrereq(self):
11518 """Check prerequisites.
11520 This checks that the given group name exists as a node group, that is
11521 empty (i.e., contains no nodes), and that is not the last group of the
11525 # Verify that the group is empty.
11526 group_nodes = [node.name
11527 for node in self.cfg.GetAllNodesInfo().values()
11528 if node.group == self.group_uuid]
11531 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11533 (self.op.group_name,
11534 utils.CommaJoin(utils.NiceSort(group_nodes))),
11535 errors.ECODE_STATE)
11537 # Verify the cluster would not be left group-less.
11538 if len(self.cfg.GetNodeGroupList()) == 1:
11539 raise errors.OpPrereqError("Group '%s' is the only group,"
11540 " cannot be removed" %
11541 self.op.group_name,
11542 errors.ECODE_STATE)
11544 def BuildHooksEnv(self):
11545 """Build hooks env.
11549 "GROUP_NAME": self.op.group_name,
11552 def BuildHooksNodes(self):
11553 """Build hooks nodes.
11556 mn = self.cfg.GetMasterNode()
11557 return ([mn], [mn])
11559 def Exec(self, feedback_fn):
11560 """Remove the node group.
11564 self.cfg.RemoveNodeGroup(self.group_uuid)
11565 except errors.ConfigurationError:
11566 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11567 (self.op.group_name, self.group_uuid))
11569 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11572 class LUGroupRename(LogicalUnit):
11573 HPATH = "group-rename"
11574 HTYPE = constants.HTYPE_GROUP
11577 def ExpandNames(self):
11578 # This raises errors.OpPrereqError on its own:
11579 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11581 self.needed_locks = {
11582 locking.LEVEL_NODEGROUP: [self.group_uuid],
11585 def CheckPrereq(self):
11586 """Check prerequisites.
11588 Ensures requested new name is not yet used.
11592 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11593 except errors.OpPrereqError:
11596 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11597 " node group (UUID: %s)" %
11598 (self.op.new_name, new_name_uuid),
11599 errors.ECODE_EXISTS)
11601 def BuildHooksEnv(self):
11602 """Build hooks env.
11606 "OLD_NAME": self.op.group_name,
11607 "NEW_NAME": self.op.new_name,
11610 def BuildHooksNodes(self):
11611 """Build hooks nodes.
11614 mn = self.cfg.GetMasterNode()
11616 all_nodes = self.cfg.GetAllNodesInfo()
11617 all_nodes.pop(mn, None)
11620 run_nodes.extend(node.name for node in all_nodes.values()
11621 if node.group == self.group_uuid)
11623 return (run_nodes, run_nodes)
11625 def Exec(self, feedback_fn):
11626 """Rename the node group.
11629 group = self.cfg.GetNodeGroup(self.group_uuid)
11632 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11633 (self.op.group_name, self.group_uuid))
11635 group.name = self.op.new_name
11636 self.cfg.Update(group, feedback_fn)
11638 return self.op.new_name
11641 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11642 """Generic tags LU.
11644 This is an abstract class which is the parent of all the other tags LUs.
11647 def ExpandNames(self):
11648 self.group_uuid = None
11649 self.needed_locks = {}
11650 if self.op.kind == constants.TAG_NODE:
11651 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11652 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11653 elif self.op.kind == constants.TAG_INSTANCE:
11654 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11655 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11656 elif self.op.kind == constants.TAG_NODEGROUP:
11657 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11659 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11660 # not possible to acquire the BGL based on opcode parameters)
11662 def CheckPrereq(self):
11663 """Check prerequisites.
11666 if self.op.kind == constants.TAG_CLUSTER:
11667 self.target = self.cfg.GetClusterInfo()
11668 elif self.op.kind == constants.TAG_NODE:
11669 self.target = self.cfg.GetNodeInfo(self.op.name)
11670 elif self.op.kind == constants.TAG_INSTANCE:
11671 self.target = self.cfg.GetInstanceInfo(self.op.name)
11672 elif self.op.kind == constants.TAG_NODEGROUP:
11673 self.target = self.cfg.GetNodeGroup(self.group_uuid)
11675 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11676 str(self.op.kind), errors.ECODE_INVAL)
11679 class LUTagsGet(TagsLU):
11680 """Returns the tags of a given object.
11685 def ExpandNames(self):
11686 TagsLU.ExpandNames(self)
11688 # Share locks as this is only a read operation
11689 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11691 def Exec(self, feedback_fn):
11692 """Returns the tag list.
11695 return list(self.target.GetTags())
11698 class LUTagsSearch(NoHooksLU):
11699 """Searches the tags for a given pattern.
11704 def ExpandNames(self):
11705 self.needed_locks = {}
11707 def CheckPrereq(self):
11708 """Check prerequisites.
11710 This checks the pattern passed for validity by compiling it.
11714 self.re = re.compile(self.op.pattern)
11715 except re.error, err:
11716 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11717 (self.op.pattern, err), errors.ECODE_INVAL)
11719 def Exec(self, feedback_fn):
11720 """Returns the tag list.
11724 tgts = [("/cluster", cfg.GetClusterInfo())]
11725 ilist = cfg.GetAllInstancesInfo().values()
11726 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11727 nlist = cfg.GetAllNodesInfo().values()
11728 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11729 tgts.extend(("/nodegroup/%s" % n.name, n)
11730 for n in cfg.GetAllNodeGroupsInfo().values())
11732 for path, target in tgts:
11733 for tag in target.GetTags():
11734 if self.re.search(tag):
11735 results.append((path, tag))
11739 class LUTagsSet(TagsLU):
11740 """Sets a tag on a given object.
11745 def CheckPrereq(self):
11746 """Check prerequisites.
11748 This checks the type and length of the tag name and value.
11751 TagsLU.CheckPrereq(self)
11752 for tag in self.op.tags:
11753 objects.TaggableObject.ValidateTag(tag)
11755 def Exec(self, feedback_fn):
11760 for tag in self.op.tags:
11761 self.target.AddTag(tag)
11762 except errors.TagError, err:
11763 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11764 self.cfg.Update(self.target, feedback_fn)
11767 class LUTagsDel(TagsLU):
11768 """Delete a list of tags from a given object.
11773 def CheckPrereq(self):
11774 """Check prerequisites.
11776 This checks that we have the given tag.
11779 TagsLU.CheckPrereq(self)
11780 for tag in self.op.tags:
11781 objects.TaggableObject.ValidateTag(tag)
11782 del_tags = frozenset(self.op.tags)
11783 cur_tags = self.target.GetTags()
11785 diff_tags = del_tags - cur_tags
11787 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11788 raise errors.OpPrereqError("Tag(s) %s not found" %
11789 (utils.CommaJoin(diff_names), ),
11790 errors.ECODE_NOENT)
11792 def Exec(self, feedback_fn):
11793 """Remove the tag from the object.
11796 for tag in self.op.tags:
11797 self.target.RemoveTag(tag)
11798 self.cfg.Update(self.target, feedback_fn)
11801 class LUTestDelay(NoHooksLU):
11802 """Sleep for a specified amount of time.
11804 This LU sleeps on the master and/or nodes for a specified amount of
11810 def ExpandNames(self):
11811 """Expand names and set required locks.
11813 This expands the node list, if any.
11816 self.needed_locks = {}
11817 if self.op.on_nodes:
11818 # _GetWantedNodes can be used here, but is not always appropriate to use
11819 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11820 # more information.
11821 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11822 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11824 def _TestDelay(self):
11825 """Do the actual sleep.
11828 if self.op.on_master:
11829 if not utils.TestDelay(self.op.duration):
11830 raise errors.OpExecError("Error during master delay test")
11831 if self.op.on_nodes:
11832 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11833 for node, node_result in result.items():
11834 node_result.Raise("Failure during rpc call to node %s" % node)
11836 def Exec(self, feedback_fn):
11837 """Execute the test delay opcode, with the wanted repetitions.
11840 if self.op.repeat == 0:
11843 top_value = self.op.repeat - 1
11844 for i in range(self.op.repeat):
11845 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11849 class LUTestJqueue(NoHooksLU):
11850 """Utility LU to test some aspects of the job queue.
11855 # Must be lower than default timeout for WaitForJobChange to see whether it
11856 # notices changed jobs
11857 _CLIENT_CONNECT_TIMEOUT = 20.0
11858 _CLIENT_CONFIRM_TIMEOUT = 60.0
11861 def _NotifyUsingSocket(cls, cb, errcls):
11862 """Opens a Unix socket and waits for another program to connect.
11865 @param cb: Callback to send socket name to client
11866 @type errcls: class
11867 @param errcls: Exception class to use for errors
11870 # Using a temporary directory as there's no easy way to create temporary
11871 # sockets without writing a custom loop around tempfile.mktemp and
11873 tmpdir = tempfile.mkdtemp()
11875 tmpsock = utils.PathJoin(tmpdir, "sock")
11877 logging.debug("Creating temporary socket at %s", tmpsock)
11878 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11883 # Send details to client
11886 # Wait for client to connect before continuing
11887 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11889 (conn, _) = sock.accept()
11890 except socket.error, err:
11891 raise errcls("Client didn't connect in time (%s)" % err)
11895 # Remove as soon as client is connected
11896 shutil.rmtree(tmpdir)
11898 # Wait for client to close
11901 # pylint: disable-msg=E1101
11902 # Instance of '_socketobject' has no ... member
11903 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11905 except socket.error, err:
11906 raise errcls("Client failed to confirm notification (%s)" % err)
11910 def _SendNotification(self, test, arg, sockname):
11911 """Sends a notification to the client.
11914 @param test: Test name
11915 @param arg: Test argument (depends on test)
11916 @type sockname: string
11917 @param sockname: Socket path
11920 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11922 def _Notify(self, prereq, test, arg):
11923 """Notifies the client of a test.
11926 @param prereq: Whether this is a prereq-phase test
11928 @param test: Test name
11929 @param arg: Test argument (depends on test)
11933 errcls = errors.OpPrereqError
11935 errcls = errors.OpExecError
11937 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11941 def CheckArguments(self):
11942 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11943 self.expandnames_calls = 0
11945 def ExpandNames(self):
11946 checkargs_calls = getattr(self, "checkargs_calls", 0)
11947 if checkargs_calls < 1:
11948 raise errors.ProgrammerError("CheckArguments was not called")
11950 self.expandnames_calls += 1
11952 if self.op.notify_waitlock:
11953 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11955 self.LogInfo("Expanding names")
11957 # Get lock on master node (just to get a lock, not for a particular reason)
11958 self.needed_locks = {
11959 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11962 def Exec(self, feedback_fn):
11963 if self.expandnames_calls < 1:
11964 raise errors.ProgrammerError("ExpandNames was not called")
11966 if self.op.notify_exec:
11967 self._Notify(False, constants.JQT_EXEC, None)
11969 self.LogInfo("Executing")
11971 if self.op.log_messages:
11972 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11973 for idx, msg in enumerate(self.op.log_messages):
11974 self.LogInfo("Sending log message %s", idx + 1)
11975 feedback_fn(constants.JQT_MSGPREFIX + msg)
11976 # Report how many test messages have been sent
11977 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11980 raise errors.OpExecError("Opcode failure was requested")
11985 class IAllocator(object):
11986 """IAllocator framework.
11988 An IAllocator instance has three sets of attributes:
11989 - cfg that is needed to query the cluster
11990 - input data (all members of the _KEYS class attribute are required)
11991 - four buffer attributes (in|out_data|text), that represent the
11992 input (to the external script) in text and data structure format,
11993 and the output from it, again in two formats
11994 - the result variables from the script (success, info, nodes) for
11998 # pylint: disable-msg=R0902
11999 # lots of instance attributes
12001 def __init__(self, cfg, rpc, mode, **kwargs):
12004 # init buffer variables
12005 self.in_text = self.out_text = self.in_data = self.out_data = None
12006 # init all input fields so that pylint is happy
12008 self.memory = self.disks = self.disk_template = None
12009 self.os = self.tags = self.nics = self.vcpus = None
12010 self.hypervisor = None
12011 self.relocate_from = None
12013 self.evac_nodes = None
12014 self.instances = None
12015 self.reloc_mode = None
12016 self.target_groups = []
12018 self.required_nodes = None
12019 # init result fields
12020 self.success = self.info = self.result = None
12023 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12025 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12026 " IAllocator" % self.mode)
12028 keyset = [n for (n, _) in keydata]
12031 if key not in keyset:
12032 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12033 " IAllocator" % key)
12034 setattr(self, key, kwargs[key])
12037 if key not in kwargs:
12038 raise errors.ProgrammerError("Missing input parameter '%s' to"
12039 " IAllocator" % key)
12040 self._BuildInputData(compat.partial(fn, self), keydata)
12042 def _ComputeClusterData(self):
12043 """Compute the generic allocator input data.
12045 This is the data that is independent of the actual operation.
12049 cluster_info = cfg.GetClusterInfo()
12052 "version": constants.IALLOCATOR_VERSION,
12053 "cluster_name": cfg.GetClusterName(),
12054 "cluster_tags": list(cluster_info.GetTags()),
12055 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12056 # we don't have job IDs
12058 ninfo = cfg.GetAllNodesInfo()
12059 iinfo = cfg.GetAllInstancesInfo().values()
12060 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12063 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12065 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12066 hypervisor_name = self.hypervisor
12067 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12068 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12069 elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
12070 constants.IALLOCATOR_MODE_MRELOC):
12071 hypervisor_name = cluster_info.enabled_hypervisors[0]
12073 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12076 self.rpc.call_all_instances_info(node_list,
12077 cluster_info.enabled_hypervisors)
12079 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12081 config_ndata = self._ComputeBasicNodeData(ninfo)
12082 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12083 i_list, config_ndata)
12084 assert len(data["nodes"]) == len(ninfo), \
12085 "Incomplete node data computed"
12087 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12089 self.in_data = data
12092 def _ComputeNodeGroupData(cfg):
12093 """Compute node groups data.
12096 ng = dict((guuid, {
12097 "name": gdata.name,
12098 "alloc_policy": gdata.alloc_policy,
12100 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12105 def _ComputeBasicNodeData(node_cfg):
12106 """Compute global node data.
12109 @returns: a dict of name: (node dict, node config)
12112 # fill in static (config-based) values
12113 node_results = dict((ninfo.name, {
12114 "tags": list(ninfo.GetTags()),
12115 "primary_ip": ninfo.primary_ip,
12116 "secondary_ip": ninfo.secondary_ip,
12117 "offline": ninfo.offline,
12118 "drained": ninfo.drained,
12119 "master_candidate": ninfo.master_candidate,
12120 "group": ninfo.group,
12121 "master_capable": ninfo.master_capable,
12122 "vm_capable": ninfo.vm_capable,
12124 for ninfo in node_cfg.values())
12126 return node_results
12129 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12131 """Compute global node data.
12133 @param node_results: the basic node structures as filled from the config
12136 # make a copy of the current dict
12137 node_results = dict(node_results)
12138 for nname, nresult in node_data.items():
12139 assert nname in node_results, "Missing basic data for node %s" % nname
12140 ninfo = node_cfg[nname]
12142 if not (ninfo.offline or ninfo.drained):
12143 nresult.Raise("Can't get data for node %s" % nname)
12144 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12146 remote_info = nresult.payload
12148 for attr in ['memory_total', 'memory_free', 'memory_dom0',
12149 'vg_size', 'vg_free', 'cpu_total']:
12150 if attr not in remote_info:
12151 raise errors.OpExecError("Node '%s' didn't return attribute"
12152 " '%s'" % (nname, attr))
12153 if not isinstance(remote_info[attr], int):
12154 raise errors.OpExecError("Node '%s' returned invalid value"
12156 (nname, attr, remote_info[attr]))
12157 # compute memory used by primary instances
12158 i_p_mem = i_p_up_mem = 0
12159 for iinfo, beinfo in i_list:
12160 if iinfo.primary_node == nname:
12161 i_p_mem += beinfo[constants.BE_MEMORY]
12162 if iinfo.name not in node_iinfo[nname].payload:
12165 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12166 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12167 remote_info['memory_free'] -= max(0, i_mem_diff)
12170 i_p_up_mem += beinfo[constants.BE_MEMORY]
12172 # compute memory used by instances
12174 "total_memory": remote_info['memory_total'],
12175 "reserved_memory": remote_info['memory_dom0'],
12176 "free_memory": remote_info['memory_free'],
12177 "total_disk": remote_info['vg_size'],
12178 "free_disk": remote_info['vg_free'],
12179 "total_cpus": remote_info['cpu_total'],
12180 "i_pri_memory": i_p_mem,
12181 "i_pri_up_memory": i_p_up_mem,
12183 pnr_dyn.update(node_results[nname])
12184 node_results[nname] = pnr_dyn
12186 return node_results
12189 def _ComputeInstanceData(cluster_info, i_list):
12190 """Compute global instance data.
12194 for iinfo, beinfo in i_list:
12196 for nic in iinfo.nics:
12197 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12201 "mode": filled_params[constants.NIC_MODE],
12202 "link": filled_params[constants.NIC_LINK],
12204 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12205 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12206 nic_data.append(nic_dict)
12208 "tags": list(iinfo.GetTags()),
12209 "admin_up": iinfo.admin_up,
12210 "vcpus": beinfo[constants.BE_VCPUS],
12211 "memory": beinfo[constants.BE_MEMORY],
12213 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12215 "disks": [{constants.IDISK_SIZE: dsk.size,
12216 constants.IDISK_MODE: dsk.mode}
12217 for dsk in iinfo.disks],
12218 "disk_template": iinfo.disk_template,
12219 "hypervisor": iinfo.hypervisor,
12221 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12223 instance_data[iinfo.name] = pir
12225 return instance_data
12227 def _AddNewInstance(self):
12228 """Add new instance data to allocator structure.
12230 This in combination with _AllocatorGetClusterData will create the
12231 correct structure needed as input for the allocator.
12233 The checks for the completeness of the opcode must have already been
12237 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12239 if self.disk_template in constants.DTS_INT_MIRROR:
12240 self.required_nodes = 2
12242 self.required_nodes = 1
12246 "disk_template": self.disk_template,
12249 "vcpus": self.vcpus,
12250 "memory": self.memory,
12251 "disks": self.disks,
12252 "disk_space_total": disk_space,
12254 "required_nodes": self.required_nodes,
12255 "hypervisor": self.hypervisor,
12260 def _AddRelocateInstance(self):
12261 """Add relocate instance data to allocator structure.
12263 This in combination with _IAllocatorGetClusterData will create the
12264 correct structure needed as input for the allocator.
12266 The checks for the completeness of the opcode must have already been
12270 instance = self.cfg.GetInstanceInfo(self.name)
12271 if instance is None:
12272 raise errors.ProgrammerError("Unknown instance '%s' passed to"
12273 " IAllocator" % self.name)
12275 if instance.disk_template not in constants.DTS_MIRRORED:
12276 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12277 errors.ECODE_INVAL)
12279 if instance.disk_template in constants.DTS_INT_MIRROR and \
12280 len(instance.secondary_nodes) != 1:
12281 raise errors.OpPrereqError("Instance has not exactly one secondary node",
12282 errors.ECODE_STATE)
12284 self.required_nodes = 1
12285 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12286 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12290 "disk_space_total": disk_space,
12291 "required_nodes": self.required_nodes,
12292 "relocate_from": self.relocate_from,
12296 def _AddEvacuateNodes(self):
12297 """Add evacuate nodes data to allocator structure.
12301 "evac_nodes": self.evac_nodes
12305 def _AddMultiRelocate(self):
12306 """Get data for multi-relocate requests.
12310 "instances": self.instances,
12311 "reloc_mode": self.reloc_mode,
12312 "target_groups": self.target_groups,
12315 def _BuildInputData(self, fn, keydata):
12316 """Build input data structures.
12319 self._ComputeClusterData()
12322 request["type"] = self.mode
12323 for keyname, keytype in keydata:
12324 if keyname not in request:
12325 raise errors.ProgrammerError("Request parameter %s is missing" %
12327 val = request[keyname]
12328 if not keytype(val):
12329 raise errors.ProgrammerError("Request parameter %s doesn't pass"
12330 " validation, value %s, expected"
12331 " type %s" % (keyname, val, keytype))
12332 self.in_data["request"] = request
12334 self.in_text = serializer.Dump(self.in_data)
12336 _STRING_LIST = ht.TListOf(ht.TString)
12338 constants.IALLOCATOR_MODE_ALLOC:
12341 ("name", ht.TString),
12342 ("memory", ht.TInt),
12343 ("disks", ht.TListOf(ht.TDict)),
12344 ("disk_template", ht.TString),
12345 ("os", ht.TString),
12346 ("tags", _STRING_LIST),
12347 ("nics", ht.TListOf(ht.TDict)),
12348 ("vcpus", ht.TInt),
12349 ("hypervisor", ht.TString),
12351 constants.IALLOCATOR_MODE_RELOC:
12352 (_AddRelocateInstance,
12353 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12355 constants.IALLOCATOR_MODE_MEVAC:
12356 (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12357 ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12358 constants.IALLOCATOR_MODE_MRELOC:
12359 (_AddMultiRelocate, [
12360 ("instances", _STRING_LIST),
12361 ("reloc_mode", ht.TElemOf(constants.IALLOCATOR_MRELOC_MODES)),
12362 ("target_groups", _STRING_LIST),
12364 ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12365 # pylint: disable-msg=E1101
12366 # Class '...' has no 'OP_ID' member
12367 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12368 opcodes.OpInstanceMigrate.OP_ID,
12369 opcodes.OpInstanceReplaceDisks.OP_ID])
12373 def Run(self, name, validate=True, call_fn=None):
12374 """Run an instance allocator and return the results.
12377 if call_fn is None:
12378 call_fn = self.rpc.call_iallocator_runner
12380 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12381 result.Raise("Failure while running the iallocator script")
12383 self.out_text = result.payload
12385 self._ValidateResult()
12387 def _ValidateResult(self):
12388 """Process the allocator results.
12390 This will process and if successful save the result in
12391 self.out_data and the other parameters.
12395 rdict = serializer.Load(self.out_text)
12396 except Exception, err:
12397 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12399 if not isinstance(rdict, dict):
12400 raise errors.OpExecError("Can't parse iallocator results: not a dict")
12402 # TODO: remove backwards compatiblity in later versions
12403 if "nodes" in rdict and "result" not in rdict:
12404 rdict["result"] = rdict["nodes"]
12407 for key in "success", "info", "result":
12408 if key not in rdict:
12409 raise errors.OpExecError("Can't parse iallocator results:"
12410 " missing key '%s'" % key)
12411 setattr(self, key, rdict[key])
12413 if not self._result_check(self.result):
12414 raise errors.OpExecError("Iallocator returned invalid result,"
12415 " expected %s, got %s" %
12416 (self._result_check, self.result),
12417 errors.ECODE_INVAL)
12419 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12420 constants.IALLOCATOR_MODE_MEVAC):
12421 node2group = dict((name, ndata["group"])
12422 for (name, ndata) in self.in_data["nodes"].items())
12424 fn = compat.partial(self._NodesToGroups, node2group,
12425 self.in_data["nodegroups"])
12427 if self.mode == constants.IALLOCATOR_MODE_RELOC:
12428 assert self.relocate_from is not None
12429 assert self.required_nodes == 1
12431 request_groups = fn(self.relocate_from)
12432 result_groups = fn(rdict["result"])
12434 if result_groups != request_groups:
12435 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12436 " differ from original groups (%s)" %
12437 (utils.CommaJoin(result_groups),
12438 utils.CommaJoin(request_groups)))
12439 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12440 request_groups = fn(self.evac_nodes)
12441 for (instance_name, secnode) in self.result:
12442 result_groups = fn([secnode])
12443 if result_groups != request_groups:
12444 raise errors.OpExecError("Iallocator returned new secondary node"
12445 " '%s' (group '%s') for instance '%s'"
12446 " which is not in original group '%s'" %
12447 (secnode, utils.CommaJoin(result_groups),
12449 utils.CommaJoin(request_groups)))
12451 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12453 self.out_data = rdict
12456 def _NodesToGroups(node2group, groups, nodes):
12457 """Returns a list of unique group names for a list of nodes.
12459 @type node2group: dict
12460 @param node2group: Map from node name to group UUID
12462 @param groups: Group information
12464 @param nodes: Node names
12471 group_uuid = node2group[node]
12473 # Ignore unknown node
12477 group = groups[group_uuid]
12479 # Can't find group, let's use UUID
12480 group_name = group_uuid
12482 group_name = group["name"]
12484 result.add(group_name)
12486 return sorted(result)
12489 class LUTestAllocator(NoHooksLU):
12490 """Run allocator tests.
12492 This LU runs the allocator tests
12495 def CheckPrereq(self):
12496 """Check prerequisites.
12498 This checks the opcode parameters depending on the director and mode test.
12501 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12502 for attr in ["memory", "disks", "disk_template",
12503 "os", "tags", "nics", "vcpus"]:
12504 if not hasattr(self.op, attr):
12505 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12506 attr, errors.ECODE_INVAL)
12507 iname = self.cfg.ExpandInstanceName(self.op.name)
12508 if iname is not None:
12509 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12510 iname, errors.ECODE_EXISTS)
12511 if not isinstance(self.op.nics, list):
12512 raise errors.OpPrereqError("Invalid parameter 'nics'",
12513 errors.ECODE_INVAL)
12514 if not isinstance(self.op.disks, list):
12515 raise errors.OpPrereqError("Invalid parameter 'disks'",
12516 errors.ECODE_INVAL)
12517 for row in self.op.disks:
12518 if (not isinstance(row, dict) or
12519 constants.IDISK_SIZE not in row or
12520 not isinstance(row[constants.IDISK_SIZE], int) or
12521 constants.IDISK_MODE not in row or
12522 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12523 raise errors.OpPrereqError("Invalid contents of the 'disks'"
12524 " parameter", errors.ECODE_INVAL)
12525 if self.op.hypervisor is None:
12526 self.op.hypervisor = self.cfg.GetHypervisorType()
12527 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12528 fname = _ExpandInstanceName(self.cfg, self.op.name)
12529 self.op.name = fname
12530 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12531 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12532 if not hasattr(self.op, "evac_nodes"):
12533 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12534 " opcode input", errors.ECODE_INVAL)
12535 elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12536 if self.op.instances:
12537 self.op.instances = _GetWantedInstances(self, self.op.instances)
12539 raise errors.OpPrereqError("Missing instances to relocate",
12540 errors.ECODE_INVAL)
12542 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12543 self.op.mode, errors.ECODE_INVAL)
12545 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12546 if self.op.allocator is None:
12547 raise errors.OpPrereqError("Missing allocator name",
12548 errors.ECODE_INVAL)
12549 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12550 raise errors.OpPrereqError("Wrong allocator test '%s'" %
12551 self.op.direction, errors.ECODE_INVAL)
12553 def Exec(self, feedback_fn):
12554 """Run the allocator test.
12557 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12558 ial = IAllocator(self.cfg, self.rpc,
12561 memory=self.op.memory,
12562 disks=self.op.disks,
12563 disk_template=self.op.disk_template,
12567 vcpus=self.op.vcpus,
12568 hypervisor=self.op.hypervisor,
12570 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12571 ial = IAllocator(self.cfg, self.rpc,
12574 relocate_from=list(self.relocate_from),
12576 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12577 ial = IAllocator(self.cfg, self.rpc,
12579 evac_nodes=self.op.evac_nodes)
12580 elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12581 ial = IAllocator(self.cfg, self.rpc,
12583 instances=self.op.instances,
12584 reloc_mode=self.op.reloc_mode,
12585 target_groups=self.op.target_groups)
12587 raise errors.ProgrammerError("Uncatched mode %s in"
12588 " LUTestAllocator.Exec", self.op.mode)
12590 if self.op.direction == constants.IALLOCATOR_DIR_IN:
12591 result = ial.in_text
12593 ial.Run(self.op.allocator, validate=False)
12594 result = ial.out_text
12598 #: Query type implementations
12600 constants.QR_INSTANCE: _InstanceQuery,
12601 constants.QR_NODE: _NodeQuery,
12602 constants.QR_GROUP: _GroupQuery,
12603 constants.QR_OS: _OsQuery,
12606 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12609 def _GetQueryImplementation(name):
12610 """Returns the implemtnation for a query type.
12612 @param name: Query type, must be one of L{constants.QR_VIA_OP}
12616 return _QUERY_IMPL[name]
12618 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12619 errors.ECODE_INVAL)