4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
62 import ganeti.masterd.instance # pylint: disable-msg=W0611
65 def _SupportsOob(cfg, node):
66 """Tells if node supports OOB.
68 @type cfg: L{config.ConfigWriter}
69 @param cfg: The cluster configuration
70 @type node: L{objects.Node}
72 @return: The OOB script if supported or an empty string otherwise
75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
134 self.context = context
136 # Dicts used to declare locking needs to mcpu
137 self.needed_locks = None
138 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
140 self.remove_locks = {}
141 # Used to force good behavior when calling helper functions
142 self.recalculate_locks = {}
144 self.Log = processor.Log # pylint: disable-msg=C0103
145 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148 # support for dry-run
149 self.dry_run_result = None
150 # support for generic debug attribute
151 if (not hasattr(self.op, "debug_level") or
152 not isinstance(self.op.debug_level, int)):
153 self.op.debug_level = 0
158 # Validate opcode parameters and set defaults
159 self.op.Validate(True)
161 self.CheckArguments()
163 def CheckArguments(self):
164 """Check syntactic validity for the opcode arguments.
166 This method is for doing a simple syntactic check and ensure
167 validity of opcode parameters, without any cluster-related
168 checks. While the same can be accomplished in ExpandNames and/or
169 CheckPrereq, doing these separate is better because:
171 - ExpandNames is left as as purely a lock-related function
172 - CheckPrereq is run after we have acquired locks (and possible
175 The function is allowed to change the self.op attribute so that
176 later methods can no longer worry about missing parameters.
181 def ExpandNames(self):
182 """Expand names for this LU.
184 This method is called before starting to execute the opcode, and it should
185 update all the parameters of the opcode to their canonical form (e.g. a
186 short node name must be fully expanded after this method has successfully
187 completed). This way locking, hooks, logging, etc. can work correctly.
189 LUs which implement this method must also populate the self.needed_locks
190 member, as a dict with lock levels as keys, and a list of needed lock names
193 - use an empty dict if you don't need any lock
194 - if you don't need any lock at a particular level omit that level
195 - don't put anything for the BGL level
196 - if you want all locks at a level use locking.ALL_SET as a value
198 If you need to share locks (rather than acquire them exclusively) at one
199 level you can modify self.share_locks, setting a true value (usually 1) for
200 that level. By default locks are not shared.
202 This function can also define a list of tasklets, which then will be
203 executed in order instead of the usual LU-level CheckPrereq and Exec
204 functions, if those are not defined by the LU.
208 # Acquire all nodes and one instance
209 self.needed_locks = {
210 locking.LEVEL_NODE: locking.ALL_SET,
211 locking.LEVEL_INSTANCE: ['instance1.example.com'],
213 # Acquire just two nodes
214 self.needed_locks = {
215 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218 self.needed_locks = {} # No, you can't leave it to the default value None
221 # The implementation of this method is mandatory only if the new LU is
222 # concurrent, so that old LUs don't need to be changed all at the same
225 self.needed_locks = {} # Exclusive LUs don't need locks.
227 raise NotImplementedError
229 def DeclareLocks(self, level):
230 """Declare LU locking needs for a level
232 While most LUs can just declare their locking needs at ExpandNames time,
233 sometimes there's the need to calculate some locks after having acquired
234 the ones before. This function is called just before acquiring locks at a
235 particular level, but after acquiring the ones at lower levels, and permits
236 such calculations. It can be used to modify self.needed_locks, and by
237 default it does nothing.
239 This function is only called if you have something already set in
240 self.needed_locks for the level.
242 @param level: Locking level which is going to be locked
243 @type level: member of ganeti.locking.LEVELS
247 def CheckPrereq(self):
248 """Check prerequisites for this LU.
250 This method should check that the prerequisites for the execution
251 of this LU are fulfilled. It can do internode communication, but
252 it should be idempotent - no cluster or system changes are
255 The method should raise errors.OpPrereqError in case something is
256 not fulfilled. Its return value is ignored.
258 This method should also update all the parameters of the opcode to
259 their canonical form if it hasn't been done by ExpandNames before.
262 if self.tasklets is not None:
263 for (idx, tl) in enumerate(self.tasklets):
264 logging.debug("Checking prerequisites for tasklet %s/%s",
265 idx + 1, len(self.tasklets))
270 def Exec(self, feedback_fn):
273 This method should implement the actual work. It should raise
274 errors.OpExecError for failures that are somewhat dealt with in
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283 raise NotImplementedError
285 def BuildHooksEnv(self):
286 """Build hooks environment for this LU.
289 @return: Dictionary containing the environment that will be used for
290 running the hooks for this LU. The keys of the dict must not be prefixed
291 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292 will extend the environment with additional variables. If no environment
293 should be defined, an empty dictionary should be returned (not C{None}).
294 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
298 raise NotImplementedError
300 def BuildHooksNodes(self):
301 """Build list of nodes to run LU's hooks.
303 @rtype: tuple; (list, list)
304 @return: Tuple containing a list of node names on which the hook
305 should run before the execution and a list of node names on which the
306 hook should run after the execution. No nodes should be returned as an
307 empty list (and not None).
308 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
312 raise NotImplementedError
314 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315 """Notify the LU about the results of its hooks.
317 This method is called every time a hooks phase is executed, and notifies
318 the Logical Unit about the hooks' result. The LU can then use it to alter
319 its result based on the hooks. By default the method does nothing and the
320 previous result is passed back unchanged but any LU can define it if it
321 wants to use the local cluster hook-scripts somehow.
323 @param phase: one of L{constants.HOOKS_PHASE_POST} or
324 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325 @param hook_results: the results of the multi-node hooks rpc call
326 @param feedback_fn: function used send feedback back to the caller
327 @param lu_result: the previous Exec result this LU had, or None
329 @return: the new Exec result, based on the previous result
333 # API must be kept, thus we ignore the unused argument and could
334 # be a function warnings
335 # pylint: disable-msg=W0613,R0201
338 def _ExpandAndLockInstance(self):
339 """Helper function to expand and lock an instance.
341 Many LUs that work on an instance take its name in self.op.instance_name
342 and need to expand it and then declare the expanded name for locking. This
343 function does it, and then updates self.op.instance_name to the expanded
344 name. It also initializes needed_locks as a dict, if this hasn't been done
348 if self.needed_locks is None:
349 self.needed_locks = {}
351 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352 "_ExpandAndLockInstance called with instance-level locks set"
353 self.op.instance_name = _ExpandInstanceName(self.cfg,
354 self.op.instance_name)
355 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
357 def _LockInstancesNodes(self, primary_only=False):
358 """Helper function to declare instances' nodes for locking.
360 This function should be called after locking one or more instances to lock
361 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362 with all primary or secondary nodes for instances already locked and
363 present in self.needed_locks[locking.LEVEL_INSTANCE].
365 It should be called from DeclareLocks, and for safety only works if
366 self.recalculate_locks[locking.LEVEL_NODE] is set.
368 In the future it may grow parameters to just lock some instance's nodes, or
369 to just lock primaries or secondary nodes, if needed.
371 If should be called in DeclareLocks in a way similar to::
373 if level == locking.LEVEL_NODE:
374 self._LockInstancesNodes()
376 @type primary_only: boolean
377 @param primary_only: only lock primary nodes of locked instances
380 assert locking.LEVEL_NODE in self.recalculate_locks, \
381 "_LockInstancesNodes helper function called with no nodes to recalculate"
383 # TODO: check if we're really been called with the instance locks held
385 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386 # future we might want to have different behaviors depending on the value
387 # of self.recalculate_locks[locking.LEVEL_NODE]
389 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390 instance = self.context.cfg.GetInstanceInfo(instance_name)
391 wanted_nodes.append(instance.primary_node)
393 wanted_nodes.extend(instance.secondary_nodes)
395 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
400 del self.recalculate_locks[locking.LEVEL_NODE]
403 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404 """Simple LU which runs no hooks.
406 This LU is intended as a parent for other LogicalUnits which will
407 run no hooks, in order to reduce duplicate code.
413 def BuildHooksEnv(self):
414 """Empty BuildHooksEnv for NoHooksLu.
416 This just raises an error.
419 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
421 def BuildHooksNodes(self):
422 """Empty BuildHooksNodes for NoHooksLU.
425 raise AssertionError("BuildHooksNodes called for NoHooksLU")
429 """Tasklet base class.
431 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432 they can mix legacy code with tasklets. Locking needs to be done in the LU,
433 tasklets know nothing about locks.
435 Subclasses must follow these rules:
436 - Implement CheckPrereq
440 def __init__(self, lu):
447 def CheckPrereq(self):
448 """Check prerequisites for this tasklets.
450 This method should check whether the prerequisites for the execution of
451 this tasklet are fulfilled. It can do internode communication, but it
452 should be idempotent - no cluster or system changes are allowed.
454 The method should raise errors.OpPrereqError in case something is not
455 fulfilled. Its return value is ignored.
457 This method should also update all parameters to their canonical form if it
458 hasn't been done before.
463 def Exec(self, feedback_fn):
464 """Execute the tasklet.
466 This method should implement the actual work. It should raise
467 errors.OpExecError for failures that are somewhat dealt with in code, or
471 raise NotImplementedError
475 """Base for query utility classes.
478 #: Attribute holding field definitions
481 def __init__(self, filter_, fields, use_locking):
482 """Initializes this class.
485 self.use_locking = use_locking
487 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
489 self.requested_data = self.query.RequestedData()
490 self.names = self.query.RequestedNames()
492 # Sort only if no names were requested
493 self.sort_by_name = not self.names
495 self.do_locking = None
498 def _GetNames(self, lu, all_names, lock_level):
499 """Helper function to determine names asked for in the query.
503 names = lu.glm.list_owned(lock_level)
507 if self.wanted == locking.ALL_SET:
508 assert not self.names
509 # caller didn't specify names, so ordering is not important
510 return utils.NiceSort(names)
512 # caller specified names and we must keep the same order
514 assert not self.do_locking or lu.glm.is_owned(lock_level)
516 missing = set(self.wanted).difference(names)
518 raise errors.OpExecError("Some items were removed before retrieving"
519 " their data: %s" % missing)
521 # Return expanded names
524 def ExpandNames(self, lu):
525 """Expand names for this query.
527 See L{LogicalUnit.ExpandNames}.
530 raise NotImplementedError()
532 def DeclareLocks(self, lu, level):
533 """Declare locks for this query.
535 See L{LogicalUnit.DeclareLocks}.
538 raise NotImplementedError()
540 def _GetQueryData(self, lu):
541 """Collects all data for this query.
543 @return: Query data object
546 raise NotImplementedError()
548 def NewStyleQuery(self, lu):
549 """Collect data and execute query.
552 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553 sort_by_name=self.sort_by_name)
555 def OldStyleQuery(self, lu):
556 """Collect data and execute query.
559 return self.query.OldStyleQuery(self._GetQueryData(lu),
560 sort_by_name=self.sort_by_name)
563 def _GetWantedNodes(lu, nodes):
564 """Returns list of checked and expanded node names.
566 @type lu: L{LogicalUnit}
567 @param lu: the logical unit on whose behalf we execute
569 @param nodes: list of node names or None for all nodes
571 @return: the list of nodes, sorted
572 @raise errors.ProgrammerError: if the nodes parameter is wrong type
576 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
578 return utils.NiceSort(lu.cfg.GetNodeList())
581 def _GetWantedInstances(lu, instances):
582 """Returns list of checked and expanded instance names.
584 @type lu: L{LogicalUnit}
585 @param lu: the logical unit on whose behalf we execute
586 @type instances: list
587 @param instances: list of instance names or None for all instances
589 @return: the list of instances, sorted
590 @raise errors.OpPrereqError: if the instances parameter is wrong type
591 @raise errors.OpPrereqError: if any of the passed instances is not found
595 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
597 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
601 def _GetUpdatedParams(old_params, update_dict,
602 use_default=True, use_none=False):
603 """Return the new version of a parameter dictionary.
605 @type old_params: dict
606 @param old_params: old parameters
607 @type update_dict: dict
608 @param update_dict: dict containing new parameter values, or
609 constants.VALUE_DEFAULT to reset the parameter to its default
611 @param use_default: boolean
612 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613 values as 'to be deleted' values
614 @param use_none: boolean
615 @type use_none: whether to recognise C{None} values as 'to be
618 @return: the new parameter dictionary
621 params_copy = copy.deepcopy(old_params)
622 for key, val in update_dict.iteritems():
623 if ((use_default and val == constants.VALUE_DEFAULT) or
624 (use_none and val is None)):
630 params_copy[key] = val
634 def _ReleaseLocks(lu, level, names=None, keep=None):
635 """Releases locks owned by an LU.
637 @type lu: L{LogicalUnit}
638 @param level: Lock level
639 @type names: list or None
640 @param names: Names of locks to release
641 @type keep: list or None
642 @param keep: Names of locks to retain
645 assert not (keep is not None and names is not None), \
646 "Only one of the 'names' and the 'keep' parameters can be given"
648 if names is not None:
649 should_release = names.__contains__
651 should_release = lambda name: name not in keep
653 should_release = None
659 # Determine which locks to release
660 for name in lu.glm.list_owned(level):
661 if should_release(name):
666 assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
668 # Release just some locks
669 lu.glm.release(level, names=release)
671 assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
674 lu.glm.release(level)
676 assert not lu.glm.is_owned(level), "No locks should be owned"
679 def _RunPostHook(lu, node_name):
680 """Runs the post-hook for an opcode on a single node.
683 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
685 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
687 # pylint: disable-msg=W0702
688 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
691 def _CheckOutputFields(static, dynamic, selected):
692 """Checks whether all selected fields are valid.
694 @type static: L{utils.FieldSet}
695 @param static: static fields set
696 @type dynamic: L{utils.FieldSet}
697 @param dynamic: dynamic fields set
704 delta = f.NonMatching(selected)
706 raise errors.OpPrereqError("Unknown output fields selected: %s"
707 % ",".join(delta), errors.ECODE_INVAL)
710 def _CheckGlobalHvParams(params):
711 """Validates that given hypervisor params are not global ones.
713 This will ensure that instances don't get customised versions of
717 used_globals = constants.HVC_GLOBALS.intersection(params)
719 msg = ("The following hypervisor parameters are global and cannot"
720 " be customized at instance level, please modify them at"
721 " cluster level: %s" % utils.CommaJoin(used_globals))
722 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
725 def _CheckNodeOnline(lu, node, msg=None):
726 """Ensure that a given node is online.
728 @param lu: the LU on behalf of which we make the check
729 @param node: the node to check
730 @param msg: if passed, should be a message to replace the default one
731 @raise errors.OpPrereqError: if the node is offline
735 msg = "Can't use offline node"
736 if lu.cfg.GetNodeInfo(node).offline:
737 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
740 def _CheckNodeNotDrained(lu, node):
741 """Ensure that a given node is not drained.
743 @param lu: the LU on behalf of which we make the check
744 @param node: the node to check
745 @raise errors.OpPrereqError: if the node is drained
748 if lu.cfg.GetNodeInfo(node).drained:
749 raise errors.OpPrereqError("Can't use drained node %s" % node,
753 def _CheckNodeVmCapable(lu, node):
754 """Ensure that a given node is vm capable.
756 @param lu: the LU on behalf of which we make the check
757 @param node: the node to check
758 @raise errors.OpPrereqError: if the node is not vm capable
761 if not lu.cfg.GetNodeInfo(node).vm_capable:
762 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
766 def _CheckNodeHasOS(lu, node, os_name, force_variant):
767 """Ensure that a node supports a given OS.
769 @param lu: the LU on behalf of which we make the check
770 @param node: the node to check
771 @param os_name: the OS to query about
772 @param force_variant: whether to ignore variant errors
773 @raise errors.OpPrereqError: if the node is not supporting the OS
776 result = lu.rpc.call_os_get(node, os_name)
777 result.Raise("OS '%s' not in supported OS list for node %s" %
779 prereq=True, ecode=errors.ECODE_INVAL)
780 if not force_variant:
781 _CheckOSVariant(result.payload, os_name)
784 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785 """Ensure that a node has the given secondary ip.
787 @type lu: L{LogicalUnit}
788 @param lu: the LU on behalf of which we make the check
790 @param node: the node to check
791 @type secondary_ip: string
792 @param secondary_ip: the ip to check
793 @type prereq: boolean
794 @param prereq: whether to throw a prerequisite or an execute error
795 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
799 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800 result.Raise("Failure checking secondary ip on node %s" % node,
801 prereq=prereq, ecode=errors.ECODE_ENVIRON)
802 if not result.payload:
803 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804 " please fix and re-run this command" % secondary_ip)
806 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
808 raise errors.OpExecError(msg)
811 def _GetClusterDomainSecret():
812 """Reads the cluster domain secret.
815 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
819 def _CheckInstanceDown(lu, instance, reason):
820 """Ensure that an instance is not running."""
821 if instance.admin_up:
822 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823 (instance.name, reason), errors.ECODE_STATE)
825 pnode = instance.primary_node
826 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827 ins_l.Raise("Can't contact node %s for instance information" % pnode,
828 prereq=True, ecode=errors.ECODE_ENVIRON)
830 if instance.name in ins_l.payload:
831 raise errors.OpPrereqError("Instance %s is running, %s" %
832 (instance.name, reason), errors.ECODE_STATE)
835 def _ExpandItemName(fn, name, kind):
836 """Expand an item name.
838 @param fn: the function to use for expansion
839 @param name: requested item name
840 @param kind: text description ('Node' or 'Instance')
841 @return: the resolved (full) name
842 @raise errors.OpPrereqError: if the item is not found
846 if full_name is None:
847 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
852 def _ExpandNodeName(cfg, name):
853 """Wrapper over L{_ExpandItemName} for nodes."""
854 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
857 def _ExpandInstanceName(cfg, name):
858 """Wrapper over L{_ExpandItemName} for instance."""
859 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
862 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863 memory, vcpus, nics, disk_template, disks,
864 bep, hvp, hypervisor_name, tags):
865 """Builds instance related env variables for hooks
867 This builds the hook environment from individual variables.
870 @param name: the name of the instance
871 @type primary_node: string
872 @param primary_node: the name of the instance's primary node
873 @type secondary_nodes: list
874 @param secondary_nodes: list of secondary nodes as strings
875 @type os_type: string
876 @param os_type: the name of the instance's OS
877 @type status: boolean
878 @param status: the should_run status of the instance
880 @param memory: the memory size of the instance
882 @param vcpus: the count of VCPUs the instance has
884 @param nics: list of tuples (ip, mac, mode, link) representing
885 the NICs the instance has
886 @type disk_template: string
887 @param disk_template: the disk template of the instance
889 @param disks: the list of (size, mode) pairs
891 @param bep: the backend parameters for the instance
893 @param hvp: the hypervisor parameters for the instance
894 @type hypervisor_name: string
895 @param hypervisor_name: the hypervisor for the instance
897 @param tags: list of instance tags as strings
899 @return: the hook environment for this instance
908 "INSTANCE_NAME": name,
909 "INSTANCE_PRIMARY": primary_node,
910 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
911 "INSTANCE_OS_TYPE": os_type,
912 "INSTANCE_STATUS": str_status,
913 "INSTANCE_MEMORY": memory,
914 "INSTANCE_VCPUS": vcpus,
915 "INSTANCE_DISK_TEMPLATE": disk_template,
916 "INSTANCE_HYPERVISOR": hypervisor_name,
920 nic_count = len(nics)
921 for idx, (ip, mac, mode, link) in enumerate(nics):
924 env["INSTANCE_NIC%d_IP" % idx] = ip
925 env["INSTANCE_NIC%d_MAC" % idx] = mac
926 env["INSTANCE_NIC%d_MODE" % idx] = mode
927 env["INSTANCE_NIC%d_LINK" % idx] = link
928 if mode == constants.NIC_MODE_BRIDGED:
929 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
933 env["INSTANCE_NIC_COUNT"] = nic_count
936 disk_count = len(disks)
937 for idx, (size, mode) in enumerate(disks):
938 env["INSTANCE_DISK%d_SIZE" % idx] = size
939 env["INSTANCE_DISK%d_MODE" % idx] = mode
943 env["INSTANCE_DISK_COUNT"] = disk_count
948 env["INSTANCE_TAGS"] = " ".join(tags)
950 for source, kind in [(bep, "BE"), (hvp, "HV")]:
951 for key, value in source.items():
952 env["INSTANCE_%s_%s" % (kind, key)] = value
957 def _NICListToTuple(lu, nics):
958 """Build a list of nic information tuples.
960 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
961 value in LUInstanceQueryData.
963 @type lu: L{LogicalUnit}
964 @param lu: the logical unit on whose behalf we execute
965 @type nics: list of L{objects.NIC}
966 @param nics: list of nics to convert to hooks tuples
970 cluster = lu.cfg.GetClusterInfo()
974 filled_params = cluster.SimpleFillNIC(nic.nicparams)
975 mode = filled_params[constants.NIC_MODE]
976 link = filled_params[constants.NIC_LINK]
977 hooks_nics.append((ip, mac, mode, link))
981 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
982 """Builds instance related env variables for hooks from an object.
984 @type lu: L{LogicalUnit}
985 @param lu: the logical unit on whose behalf we execute
986 @type instance: L{objects.Instance}
987 @param instance: the instance for which we should build the
990 @param override: dictionary with key/values that will override
993 @return: the hook environment dictionary
996 cluster = lu.cfg.GetClusterInfo()
997 bep = cluster.FillBE(instance)
998 hvp = cluster.FillHV(instance)
1000 'name': instance.name,
1001 'primary_node': instance.primary_node,
1002 'secondary_nodes': instance.secondary_nodes,
1003 'os_type': instance.os,
1004 'status': instance.admin_up,
1005 'memory': bep[constants.BE_MEMORY],
1006 'vcpus': bep[constants.BE_VCPUS],
1007 'nics': _NICListToTuple(lu, instance.nics),
1008 'disk_template': instance.disk_template,
1009 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1012 'hypervisor_name': instance.hypervisor,
1013 'tags': instance.tags,
1016 args.update(override)
1017 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1020 def _AdjustCandidatePool(lu, exceptions):
1021 """Adjust the candidate pool after node operations.
1024 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1026 lu.LogInfo("Promoted nodes to master candidate role: %s",
1027 utils.CommaJoin(node.name for node in mod_list))
1028 for name in mod_list:
1029 lu.context.ReaddNode(name)
1030 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1032 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1036 def _DecideSelfPromotion(lu, exceptions=None):
1037 """Decide whether I should promote myself as a master candidate.
1040 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042 # the new node will increase mc_max with one, so:
1043 mc_should = min(mc_should + 1, cp_size)
1044 return mc_now < mc_should
1047 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048 """Check that the brigdes needed by a list of nics exist.
1051 cluster = lu.cfg.GetClusterInfo()
1052 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053 brlist = [params[constants.NIC_LINK] for params in paramslist
1054 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1056 result = lu.rpc.call_bridges_exist(target_node, brlist)
1057 result.Raise("Error checking bridges on destination node '%s'" %
1058 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1061 def _CheckInstanceBridgesExist(lu, instance, node=None):
1062 """Check that the brigdes needed by an instance exist.
1066 node = instance.primary_node
1067 _CheckNicsBridgesExist(lu, instance.nics, node)
1070 def _CheckOSVariant(os_obj, name):
1071 """Check whether an OS name conforms to the os variants specification.
1073 @type os_obj: L{objects.OS}
1074 @param os_obj: OS object to check
1076 @param name: OS name passed by the user, to check for validity
1079 if not os_obj.supported_variants:
1081 variant = objects.OS.GetVariant(name)
1083 raise errors.OpPrereqError("OS name must include a variant",
1086 if variant not in os_obj.supported_variants:
1087 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1090 def _GetNodeInstancesInner(cfg, fn):
1091 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1094 def _GetNodeInstances(cfg, node_name):
1095 """Returns a list of all primary and secondary instances on a node.
1099 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1102 def _GetNodePrimaryInstances(cfg, node_name):
1103 """Returns primary instances on a node.
1106 return _GetNodeInstancesInner(cfg,
1107 lambda inst: node_name == inst.primary_node)
1110 def _GetNodeSecondaryInstances(cfg, node_name):
1111 """Returns secondary instances on a node.
1114 return _GetNodeInstancesInner(cfg,
1115 lambda inst: node_name in inst.secondary_nodes)
1118 def _GetStorageTypeArgs(cfg, storage_type):
1119 """Returns the arguments for a storage type.
1122 # Special case for file storage
1123 if storage_type == constants.ST_FILE:
1124 # storage.FileStorage wants a list of storage directories
1125 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1130 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1133 for dev in instance.disks:
1134 cfg.SetDiskID(dev, node_name)
1136 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1137 result.Raise("Failed to get disk status from node %s" % node_name,
1138 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1140 for idx, bdev_status in enumerate(result.payload):
1141 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1147 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1148 """Check the sanity of iallocator and node arguments and use the
1149 cluster-wide iallocator if appropriate.
1151 Check that at most one of (iallocator, node) is specified. If none is
1152 specified, then the LU's opcode's iallocator slot is filled with the
1153 cluster-wide default iallocator.
1155 @type iallocator_slot: string
1156 @param iallocator_slot: the name of the opcode iallocator slot
1157 @type node_slot: string
1158 @param node_slot: the name of the opcode target node slot
1161 node = getattr(lu.op, node_slot, None)
1162 iallocator = getattr(lu.op, iallocator_slot, None)
1164 if node is not None and iallocator is not None:
1165 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1167 elif node is None and iallocator is None:
1168 default_iallocator = lu.cfg.GetDefaultIAllocator()
1169 if default_iallocator:
1170 setattr(lu.op, iallocator_slot, default_iallocator)
1172 raise errors.OpPrereqError("No iallocator or node given and no"
1173 " cluster-wide default iallocator found;"
1174 " please specify either an iallocator or a"
1175 " node, or set a cluster-wide default"
1179 class LUClusterPostInit(LogicalUnit):
1180 """Logical unit for running hooks after cluster initialization.
1183 HPATH = "cluster-init"
1184 HTYPE = constants.HTYPE_CLUSTER
1186 def BuildHooksEnv(self):
1191 "OP_TARGET": self.cfg.GetClusterName(),
1194 def BuildHooksNodes(self):
1195 """Build hooks nodes.
1198 return ([], [self.cfg.GetMasterNode()])
1200 def Exec(self, feedback_fn):
1207 class LUClusterDestroy(LogicalUnit):
1208 """Logical unit for destroying the cluster.
1211 HPATH = "cluster-destroy"
1212 HTYPE = constants.HTYPE_CLUSTER
1214 def BuildHooksEnv(self):
1219 "OP_TARGET": self.cfg.GetClusterName(),
1222 def BuildHooksNodes(self):
1223 """Build hooks nodes.
1228 def CheckPrereq(self):
1229 """Check prerequisites.
1231 This checks whether the cluster is empty.
1233 Any errors are signaled by raising errors.OpPrereqError.
1236 master = self.cfg.GetMasterNode()
1238 nodelist = self.cfg.GetNodeList()
1239 if len(nodelist) != 1 or nodelist[0] != master:
1240 raise errors.OpPrereqError("There are still %d node(s) in"
1241 " this cluster." % (len(nodelist) - 1),
1243 instancelist = self.cfg.GetInstanceList()
1245 raise errors.OpPrereqError("There are still %d instance(s) in"
1246 " this cluster." % len(instancelist),
1249 def Exec(self, feedback_fn):
1250 """Destroys the cluster.
1253 master = self.cfg.GetMasterNode()
1255 # Run post hooks on master node before it's removed
1256 _RunPostHook(self, master)
1258 result = self.rpc.call_node_stop_master(master, False)
1259 result.Raise("Could not disable the master role")
1264 def _VerifyCertificate(filename):
1265 """Verifies a certificate for L{LUClusterVerifyConfig}.
1267 @type filename: string
1268 @param filename: Path to PEM file
1272 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273 utils.ReadFile(filename))
1274 except Exception, err: # pylint: disable-msg=W0703
1275 return (LUClusterVerifyConfig.ETYPE_ERROR,
1276 "Failed to load X509 certificate %s: %s" % (filename, err))
1279 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280 constants.SSL_CERT_EXPIRATION_ERROR)
1283 fnamemsg = "While verifying %s: %s" % (filename, msg)
1288 return (None, fnamemsg)
1289 elif errcode == utils.CERT_WARNING:
1290 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1291 elif errcode == utils.CERT_ERROR:
1292 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1294 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1297 def _GetAllHypervisorParameters(cluster, instances):
1298 """Compute the set of all hypervisor parameters.
1300 @type cluster: L{objects.Cluster}
1301 @param cluster: the cluster object
1302 @param instances: list of L{objects.Instance}
1303 @param instances: additional instances from which to obtain parameters
1304 @rtype: list of (origin, hypervisor, parameters)
1305 @return: a list with all parameters found, indicating the hypervisor they
1306 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1311 for hv_name in cluster.enabled_hypervisors:
1312 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1314 for os_name, os_hvp in cluster.os_hvp.items():
1315 for hv_name, hv_params in os_hvp.items():
1317 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1318 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1320 # TODO: collapse identical parameter values in a single one
1321 for instance in instances:
1322 if instance.hvparams:
1323 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1324 cluster.FillHV(instance)))
1329 class _VerifyErrors(object):
1330 """Mix-in for cluster/group verify LUs.
1332 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1333 self.op and self._feedback_fn to be available.)
1336 TCLUSTER = "cluster"
1338 TINSTANCE = "instance"
1340 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1341 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1342 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1343 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1344 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1345 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1346 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1347 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1348 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1349 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1350 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1351 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1352 ENODEDRBD = (TNODE, "ENODEDRBD")
1353 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1354 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1355 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1356 ENODEHV = (TNODE, "ENODEHV")
1357 ENODELVM = (TNODE, "ENODELVM")
1358 ENODEN1 = (TNODE, "ENODEN1")
1359 ENODENET = (TNODE, "ENODENET")
1360 ENODEOS = (TNODE, "ENODEOS")
1361 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1362 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1363 ENODERPC = (TNODE, "ENODERPC")
1364 ENODESSH = (TNODE, "ENODESSH")
1365 ENODEVERSION = (TNODE, "ENODEVERSION")
1366 ENODESETUP = (TNODE, "ENODESETUP")
1367 ENODETIME = (TNODE, "ENODETIME")
1368 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1370 ETYPE_FIELD = "code"
1371 ETYPE_ERROR = "ERROR"
1372 ETYPE_WARNING = "WARNING"
1374 def _Error(self, ecode, item, msg, *args, **kwargs):
1375 """Format an error message.
1377 Based on the opcode's error_codes parameter, either format a
1378 parseable error code, or a simpler error string.
1380 This must be called only from Exec and functions called from Exec.
1383 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1385 # first complete the msg
1388 # then format the whole message
1389 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1390 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1396 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397 # and finally report it via the feedback_fn
1398 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1400 def _ErrorIf(self, cond, *args, **kwargs):
1401 """Log an error message if the passed condition is True.
1405 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1407 self._Error(*args, **kwargs)
1408 # do not mark the operation as failed for WARN cases only
1409 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410 self.bad = self.bad or cond
1413 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1414 """Verifies the cluster config.
1419 def _VerifyHVP(self, hvp_data):
1420 """Verifies locally the syntax of the hypervisor parameters.
1423 for item, hv_name, hv_params in hvp_data:
1424 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1427 hv_class = hypervisor.GetHypervisor(hv_name)
1428 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1429 hv_class.CheckParameterSyntax(hv_params)
1430 except errors.GenericError, err:
1431 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1433 def ExpandNames(self):
1434 # Information can be safely retrieved as the BGL is acquired in exclusive
1436 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1437 self.all_node_info = self.cfg.GetAllNodesInfo()
1438 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1439 self.needed_locks = {}
1441 def Exec(self, feedback_fn):
1442 """Verify integrity of cluster, performing various test on nodes.
1446 self._feedback_fn = feedback_fn
1448 feedback_fn("* Verifying cluster config")
1450 for msg in self.cfg.VerifyConfig():
1451 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1453 feedback_fn("* Verifying cluster certificate files")
1455 for cert_filename in constants.ALL_CERT_FILES:
1456 (errcode, msg) = _VerifyCertificate(cert_filename)
1457 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1459 feedback_fn("* Verifying hypervisor parameters")
1461 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1462 self.all_inst_info.values()))
1464 feedback_fn("* Verifying all nodes belong to an existing group")
1466 # We do this verification here because, should this bogus circumstance
1467 # occur, it would never be caught by VerifyGroup, which only acts on
1468 # nodes/instances reachable from existing node groups.
1470 dangling_nodes = set(node.name for node in self.all_node_info.values()
1471 if node.group not in self.all_group_info)
1473 dangling_instances = {}
1474 no_node_instances = []
1476 for inst in self.all_inst_info.values():
1477 if inst.primary_node in dangling_nodes:
1478 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1479 elif inst.primary_node not in self.all_node_info:
1480 no_node_instances.append(inst.name)
1485 utils.CommaJoin(dangling_instances.get(node.name,
1487 for node in dangling_nodes]
1489 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1490 "the following nodes (and their instances) belong to a non"
1491 " existing group: %s", utils.CommaJoin(pretty_dangling))
1493 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1494 "the following instances have a non-existing primary-node:"
1495 " %s", utils.CommaJoin(no_node_instances))
1497 return (not self.bad, [g.name for g in self.all_group_info.values()])
1500 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1501 """Verifies the status of a node group.
1504 HPATH = "cluster-verify"
1505 HTYPE = constants.HTYPE_CLUSTER
1508 _HOOKS_INDENT_RE = re.compile("^", re.M)
1510 class NodeImage(object):
1511 """A class representing the logical and physical status of a node.
1514 @ivar name: the node name to which this object refers
1515 @ivar volumes: a structure as returned from
1516 L{ganeti.backend.GetVolumeList} (runtime)
1517 @ivar instances: a list of running instances (runtime)
1518 @ivar pinst: list of configured primary instances (config)
1519 @ivar sinst: list of configured secondary instances (config)
1520 @ivar sbp: dictionary of {primary-node: list of instances} for all
1521 instances for which this node is secondary (config)
1522 @ivar mfree: free memory, as reported by hypervisor (runtime)
1523 @ivar dfree: free disk, as reported by the node (runtime)
1524 @ivar offline: the offline status (config)
1525 @type rpc_fail: boolean
1526 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1527 not whether the individual keys were correct) (runtime)
1528 @type lvm_fail: boolean
1529 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1530 @type hyp_fail: boolean
1531 @ivar hyp_fail: whether the RPC call didn't return the instance list
1532 @type ghost: boolean
1533 @ivar ghost: whether this is a known node or not (config)
1534 @type os_fail: boolean
1535 @ivar os_fail: whether the RPC call didn't return valid OS data
1537 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1538 @type vm_capable: boolean
1539 @ivar vm_capable: whether the node can host instances
1542 def __init__(self, offline=False, name=None, vm_capable=True):
1551 self.offline = offline
1552 self.vm_capable = vm_capable
1553 self.rpc_fail = False
1554 self.lvm_fail = False
1555 self.hyp_fail = False
1557 self.os_fail = False
1560 def ExpandNames(self):
1561 # This raises errors.OpPrereqError on its own:
1562 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1564 # Get instances in node group; this is unsafe and needs verification later
1565 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1567 self.needed_locks = {
1568 locking.LEVEL_INSTANCE: inst_names,
1569 locking.LEVEL_NODEGROUP: [self.group_uuid],
1570 locking.LEVEL_NODE: [],
1573 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1575 def DeclareLocks(self, level):
1576 if level == locking.LEVEL_NODE:
1577 # Get members of node group; this is unsafe and needs verification later
1578 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1580 all_inst_info = self.cfg.GetAllInstancesInfo()
1582 # In Exec(), we warn about mirrored instances that have primary and
1583 # secondary living in separate node groups. To fully verify that
1584 # volumes for these instances are healthy, we will need to do an
1585 # extra call to their secondaries. We ensure here those nodes will
1587 for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1588 # Important: access only the instances whose lock is owned
1589 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1590 nodes.update(all_inst_info[inst].secondary_nodes)
1592 self.needed_locks[locking.LEVEL_NODE] = nodes
1594 def CheckPrereq(self):
1595 group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1596 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1599 group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1601 unlocked_instances = \
1602 group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1605 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1606 utils.CommaJoin(unlocked_nodes))
1608 if unlocked_instances:
1609 raise errors.OpPrereqError("Missing lock for instances: %s" %
1610 utils.CommaJoin(unlocked_instances))
1612 self.all_node_info = self.cfg.GetAllNodesInfo()
1613 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1615 self.my_node_names = utils.NiceSort(group_nodes)
1616 self.my_inst_names = utils.NiceSort(group_instances)
1618 self.my_node_info = dict((name, self.all_node_info[name])
1619 for name in self.my_node_names)
1621 self.my_inst_info = dict((name, self.all_inst_info[name])
1622 for name in self.my_inst_names)
1624 # We detect here the nodes that will need the extra RPC calls for verifying
1625 # split LV volumes; they should be locked.
1626 extra_lv_nodes = set()
1628 for inst in self.my_inst_info.values():
1629 if inst.disk_template in constants.DTS_INT_MIRROR:
1630 group = self.my_node_info[inst.primary_node].group
1631 for nname in inst.secondary_nodes:
1632 if self.all_node_info[nname].group != group:
1633 extra_lv_nodes.add(nname)
1635 unlocked_lv_nodes = \
1636 extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1638 if unlocked_lv_nodes:
1639 raise errors.OpPrereqError("these nodes could be locked: %s" %
1640 utils.CommaJoin(unlocked_lv_nodes))
1641 self.extra_lv_nodes = list(extra_lv_nodes)
1643 def _VerifyNode(self, ninfo, nresult):
1644 """Perform some basic validation on data returned from a node.
1646 - check the result data structure is well formed and has all the
1648 - check ganeti version
1650 @type ninfo: L{objects.Node}
1651 @param ninfo: the node to check
1652 @param nresult: the results from the node
1654 @return: whether overall this call was successful (and we can expect
1655 reasonable values in the respose)
1659 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661 # main result, nresult should be a non-empty dict
1662 test = not nresult or not isinstance(nresult, dict)
1663 _ErrorIf(test, self.ENODERPC, node,
1664 "unable to verify node: no data returned")
1668 # compares ganeti version
1669 local_version = constants.PROTOCOL_VERSION
1670 remote_version = nresult.get("version", None)
1671 test = not (remote_version and
1672 isinstance(remote_version, (list, tuple)) and
1673 len(remote_version) == 2)
1674 _ErrorIf(test, self.ENODERPC, node,
1675 "connection to node returned invalid data")
1679 test = local_version != remote_version[0]
1680 _ErrorIf(test, self.ENODEVERSION, node,
1681 "incompatible protocol versions: master %s,"
1682 " node %s", local_version, remote_version[0])
1686 # node seems compatible, we can actually try to look into its results
1688 # full package version
1689 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1690 self.ENODEVERSION, node,
1691 "software version mismatch: master %s, node %s",
1692 constants.RELEASE_VERSION, remote_version[1],
1693 code=self.ETYPE_WARNING)
1695 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1696 if ninfo.vm_capable and isinstance(hyp_result, dict):
1697 for hv_name, hv_result in hyp_result.iteritems():
1698 test = hv_result is not None
1699 _ErrorIf(test, self.ENODEHV, node,
1700 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1702 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1703 if ninfo.vm_capable and isinstance(hvp_result, list):
1704 for item, hv_name, hv_result in hvp_result:
1705 _ErrorIf(True, self.ENODEHV, node,
1706 "hypervisor %s parameter verify failure (source %s): %s",
1707 hv_name, item, hv_result)
1709 test = nresult.get(constants.NV_NODESETUP,
1710 ["Missing NODESETUP results"])
1711 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1716 def _VerifyNodeTime(self, ninfo, nresult,
1717 nvinfo_starttime, nvinfo_endtime):
1718 """Check the node time.
1720 @type ninfo: L{objects.Node}
1721 @param ninfo: the node to check
1722 @param nresult: the remote results for the node
1723 @param nvinfo_starttime: the start time of the RPC call
1724 @param nvinfo_endtime: the end time of the RPC call
1728 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730 ntime = nresult.get(constants.NV_TIME, None)
1732 ntime_merged = utils.MergeTime(ntime)
1733 except (ValueError, TypeError):
1734 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1737 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1738 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1739 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1740 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1744 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1745 "Node time diverges by at least %s from master node time",
1748 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1749 """Check the node LVM results.
1751 @type ninfo: L{objects.Node}
1752 @param ninfo: the node to check
1753 @param nresult: the remote results for the node
1754 @param vg_name: the configured VG name
1761 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1763 # checks vg existence and size > 20G
1764 vglist = nresult.get(constants.NV_VGLIST, None)
1766 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1768 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1769 constants.MIN_VG_SIZE)
1770 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1773 pvlist = nresult.get(constants.NV_PVLIST, None)
1774 test = pvlist is None
1775 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1777 # check that ':' is not present in PV names, since it's a
1778 # special character for lvcreate (denotes the range of PEs to
1780 for _, pvname, owner_vg in pvlist:
1781 test = ":" in pvname
1782 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1783 " '%s' of VG '%s'", pvname, owner_vg)
1785 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1786 """Check the node bridges.
1788 @type ninfo: L{objects.Node}
1789 @param ninfo: the node to check
1790 @param nresult: the remote results for the node
1791 @param bridges: the expected list of bridges
1798 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1800 missing = nresult.get(constants.NV_BRIDGES, None)
1801 test = not isinstance(missing, list)
1802 _ErrorIf(test, self.ENODENET, node,
1803 "did not return valid bridge information")
1805 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1806 utils.CommaJoin(sorted(missing)))
1808 def _VerifyNodeNetwork(self, ninfo, nresult):
1809 """Check the node network connectivity results.
1811 @type ninfo: L{objects.Node}
1812 @param ninfo: the node to check
1813 @param nresult: the remote results for the node
1817 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1819 test = constants.NV_NODELIST not in nresult
1820 _ErrorIf(test, self.ENODESSH, node,
1821 "node hasn't returned node ssh connectivity data")
1823 if nresult[constants.NV_NODELIST]:
1824 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1825 _ErrorIf(True, self.ENODESSH, node,
1826 "ssh communication with node '%s': %s", a_node, a_msg)
1828 test = constants.NV_NODENETTEST not in nresult
1829 _ErrorIf(test, self.ENODENET, node,
1830 "node hasn't returned node tcp connectivity data")
1832 if nresult[constants.NV_NODENETTEST]:
1833 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1835 _ErrorIf(True, self.ENODENET, node,
1836 "tcp communication with node '%s': %s",
1837 anode, nresult[constants.NV_NODENETTEST][anode])
1839 test = constants.NV_MASTERIP not in nresult
1840 _ErrorIf(test, self.ENODENET, node,
1841 "node hasn't returned node master IP reachability data")
1843 if not nresult[constants.NV_MASTERIP]:
1844 if node == self.master_node:
1845 msg = "the master node cannot reach the master IP (not configured?)"
1847 msg = "cannot reach the master IP"
1848 _ErrorIf(True, self.ENODENET, node, msg)
1850 def _VerifyInstance(self, instance, instanceconfig, node_image,
1852 """Verify an instance.
1854 This function checks to see if the required block devices are
1855 available on the instance's node.
1858 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1859 node_current = instanceconfig.primary_node
1861 node_vol_should = {}
1862 instanceconfig.MapLVsByNode(node_vol_should)
1864 for node in node_vol_should:
1865 n_img = node_image[node]
1866 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1867 # ignore missing volumes on offline or broken nodes
1869 for volume in node_vol_should[node]:
1870 test = volume not in n_img.volumes
1871 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1872 "volume %s missing on node %s", volume, node)
1874 if instanceconfig.admin_up:
1875 pri_img = node_image[node_current]
1876 test = instance not in pri_img.instances and not pri_img.offline
1877 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1878 "instance not running on its primary node %s",
1881 diskdata = [(nname, success, status, idx)
1882 for (nname, disks) in diskstatus.items()
1883 for idx, (success, status) in enumerate(disks)]
1885 for nname, success, bdev_status, idx in diskdata:
1886 # the 'ghost node' construction in Exec() ensures that we have a
1888 snode = node_image[nname]
1889 bad_snode = snode.ghost or snode.offline
1890 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1891 self.EINSTANCEFAULTYDISK, instance,
1892 "couldn't retrieve status for disk/%s on %s: %s",
1893 idx, nname, bdev_status)
1894 _ErrorIf((instanceconfig.admin_up and success and
1895 bdev_status.ldisk_status == constants.LDS_FAULTY),
1896 self.EINSTANCEFAULTYDISK, instance,
1897 "disk/%s on %s is faulty", idx, nname)
1899 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1900 """Verify if there are any unknown volumes in the cluster.
1902 The .os, .swap and backup volumes are ignored. All other volumes are
1903 reported as unknown.
1905 @type reserved: L{ganeti.utils.FieldSet}
1906 @param reserved: a FieldSet of reserved volume names
1909 for node, n_img in node_image.items():
1910 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1911 # skip non-healthy nodes
1913 for volume in n_img.volumes:
1914 test = ((node not in node_vol_should or
1915 volume not in node_vol_should[node]) and
1916 not reserved.Matches(volume))
1917 self._ErrorIf(test, self.ENODEORPHANLV, node,
1918 "volume %s is unknown", volume)
1920 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1921 """Verify N+1 Memory Resilience.
1923 Check that if one single node dies we can still start all the
1924 instances it was primary for.
1927 cluster_info = self.cfg.GetClusterInfo()
1928 for node, n_img in node_image.items():
1929 # This code checks that every node which is now listed as
1930 # secondary has enough memory to host all instances it is
1931 # supposed to should a single other node in the cluster fail.
1932 # FIXME: not ready for failover to an arbitrary node
1933 # FIXME: does not support file-backed instances
1934 # WARNING: we currently take into account down instances as well
1935 # as up ones, considering that even if they're down someone
1936 # might want to start them even in the event of a node failure.
1938 # we're skipping offline nodes from the N+1 warning, since
1939 # most likely we don't have good memory infromation from them;
1940 # we already list instances living on such nodes, and that's
1943 for prinode, instances in n_img.sbp.items():
1945 for instance in instances:
1946 bep = cluster_info.FillBE(instance_cfg[instance])
1947 if bep[constants.BE_AUTO_BALANCE]:
1948 needed_mem += bep[constants.BE_MEMORY]
1949 test = n_img.mfree < needed_mem
1950 self._ErrorIf(test, self.ENODEN1, node,
1951 "not enough memory to accomodate instance failovers"
1952 " should node %s fail (%dMiB needed, %dMiB available)",
1953 prinode, needed_mem, n_img.mfree)
1956 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1957 (files_all, files_all_opt, files_mc, files_vm)):
1958 """Verifies file checksums collected from all nodes.
1960 @param errorif: Callback for reporting errors
1961 @param nodeinfo: List of L{objects.Node} objects
1962 @param master_node: Name of master node
1963 @param all_nvinfo: RPC results
1966 node_names = frozenset(node.name for node in nodeinfo)
1968 assert master_node in node_names
1969 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1970 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1971 "Found file listed in more than one file list"
1973 # Define functions determining which nodes to consider for a file
1974 file2nodefn = dict([(filename, fn)
1975 for (files, fn) in [(files_all, None),
1976 (files_all_opt, None),
1977 (files_mc, lambda node: (node.master_candidate or
1978 node.name == master_node)),
1979 (files_vm, lambda node: node.vm_capable)]
1980 for filename in files])
1982 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1984 for node in nodeinfo:
1985 nresult = all_nvinfo[node.name]
1987 if nresult.fail_msg or not nresult.payload:
1990 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1992 test = not (node_files and isinstance(node_files, dict))
1993 errorif(test, cls.ENODEFILECHECK, node.name,
1994 "Node did not return file checksum data")
1998 for (filename, checksum) in node_files.items():
1999 # Check if the file should be considered for a node
2000 fn = file2nodefn[filename]
2001 if fn is None or fn(node):
2002 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2004 for (filename, checksums) in fileinfo.items():
2005 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2007 # Nodes having the file
2008 with_file = frozenset(node_name
2009 for nodes in fileinfo[filename].values()
2010 for node_name in nodes)
2012 # Nodes missing file
2013 missing_file = node_names - with_file
2015 if filename in files_all_opt:
2017 errorif(missing_file and missing_file != node_names,
2018 cls.ECLUSTERFILECHECK, None,
2019 "File %s is optional, but it must exist on all or no nodes (not"
2021 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2023 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2024 "File %s is missing from node(s) %s", filename,
2025 utils.CommaJoin(utils.NiceSort(missing_file)))
2027 # See if there are multiple versions of the file
2028 test = len(checksums) > 1
2030 variants = ["variant %s on %s" %
2031 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2032 for (idx, (checksum, nodes)) in
2033 enumerate(sorted(checksums.items()))]
2037 errorif(test, cls.ECLUSTERFILECHECK, None,
2038 "File %s found with %s different checksums (%s)",
2039 filename, len(checksums), "; ".join(variants))
2041 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2043 """Verifies and the node DRBD status.
2045 @type ninfo: L{objects.Node}
2046 @param ninfo: the node to check
2047 @param nresult: the remote results for the node
2048 @param instanceinfo: the dict of instances
2049 @param drbd_helper: the configured DRBD usermode helper
2050 @param drbd_map: the DRBD map as returned by
2051 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2055 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2058 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2059 test = (helper_result == None)
2060 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2061 "no drbd usermode helper returned")
2063 status, payload = helper_result
2065 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2066 "drbd usermode helper check unsuccessful: %s", payload)
2067 test = status and (payload != drbd_helper)
2068 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2069 "wrong drbd usermode helper: %s", payload)
2071 # compute the DRBD minors
2073 for minor, instance in drbd_map[node].items():
2074 test = instance not in instanceinfo
2075 _ErrorIf(test, self.ECLUSTERCFG, None,
2076 "ghost instance '%s' in temporary DRBD map", instance)
2077 # ghost instance should not be running, but otherwise we
2078 # don't give double warnings (both ghost instance and
2079 # unallocated minor in use)
2081 node_drbd[minor] = (instance, False)
2083 instance = instanceinfo[instance]
2084 node_drbd[minor] = (instance.name, instance.admin_up)
2086 # and now check them
2087 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2088 test = not isinstance(used_minors, (tuple, list))
2089 _ErrorIf(test, self.ENODEDRBD, node,
2090 "cannot parse drbd status file: %s", str(used_minors))
2092 # we cannot check drbd status
2095 for minor, (iname, must_exist) in node_drbd.items():
2096 test = minor not in used_minors and must_exist
2097 _ErrorIf(test, self.ENODEDRBD, node,
2098 "drbd minor %d of instance %s is not active", minor, iname)
2099 for minor in used_minors:
2100 test = minor not in node_drbd
2101 _ErrorIf(test, self.ENODEDRBD, node,
2102 "unallocated drbd minor %d is in use", minor)
2104 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2105 """Builds the node OS structures.
2107 @type ninfo: L{objects.Node}
2108 @param ninfo: the node to check
2109 @param nresult: the remote results for the node
2110 @param nimg: the node image object
2114 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2116 remote_os = nresult.get(constants.NV_OSLIST, None)
2117 test = (not isinstance(remote_os, list) or
2118 not compat.all(isinstance(v, list) and len(v) == 7
2119 for v in remote_os))
2121 _ErrorIf(test, self.ENODEOS, node,
2122 "node hasn't returned valid OS data")
2131 for (name, os_path, status, diagnose,
2132 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2134 if name not in os_dict:
2137 # parameters is a list of lists instead of list of tuples due to
2138 # JSON lacking a real tuple type, fix it:
2139 parameters = [tuple(v) for v in parameters]
2140 os_dict[name].append((os_path, status, diagnose,
2141 set(variants), set(parameters), set(api_ver)))
2143 nimg.oslist = os_dict
2145 def _VerifyNodeOS(self, ninfo, nimg, base):
2146 """Verifies the node OS list.
2148 @type ninfo: L{objects.Node}
2149 @param ninfo: the node to check
2150 @param nimg: the node image object
2151 @param base: the 'template' node we match against (e.g. from the master)
2155 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2157 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2159 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2160 for os_name, os_data in nimg.oslist.items():
2161 assert os_data, "Empty OS status for OS %s?!" % os_name
2162 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2163 _ErrorIf(not f_status, self.ENODEOS, node,
2164 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2165 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2166 "OS '%s' has multiple entries (first one shadows the rest): %s",
2167 os_name, utils.CommaJoin([v[0] for v in os_data]))
2168 # this will catched in backend too
2169 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2170 and not f_var, self.ENODEOS, node,
2171 "OS %s with API at least %d does not declare any variant",
2172 os_name, constants.OS_API_V15)
2173 # comparisons with the 'base' image
2174 test = os_name not in base.oslist
2175 _ErrorIf(test, self.ENODEOS, node,
2176 "Extra OS %s not present on reference node (%s)",
2180 assert base.oslist[os_name], "Base node has empty OS status?"
2181 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2183 # base OS is invalid, skipping
2185 for kind, a, b in [("API version", f_api, b_api),
2186 ("variants list", f_var, b_var),
2187 ("parameters", beautify_params(f_param),
2188 beautify_params(b_param))]:
2189 _ErrorIf(a != b, self.ENODEOS, node,
2190 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2191 kind, os_name, base.name,
2192 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2194 # check any missing OSes
2195 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2196 _ErrorIf(missing, self.ENODEOS, node,
2197 "OSes present on reference node %s but missing on this node: %s",
2198 base.name, utils.CommaJoin(missing))
2200 def _VerifyOob(self, ninfo, nresult):
2201 """Verifies out of band functionality of a node.
2203 @type ninfo: L{objects.Node}
2204 @param ninfo: the node to check
2205 @param nresult: the remote results for the node
2209 # We just have to verify the paths on master and/or master candidates
2210 # as the oob helper is invoked on the master
2211 if ((ninfo.master_candidate or ninfo.master_capable) and
2212 constants.NV_OOB_PATHS in nresult):
2213 for path_result in nresult[constants.NV_OOB_PATHS]:
2214 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2216 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2217 """Verifies and updates the node volume data.
2219 This function will update a L{NodeImage}'s internal structures
2220 with data from the remote call.
2222 @type ninfo: L{objects.Node}
2223 @param ninfo: the node to check
2224 @param nresult: the remote results for the node
2225 @param nimg: the node image object
2226 @param vg_name: the configured VG name
2230 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2232 nimg.lvm_fail = True
2233 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2236 elif isinstance(lvdata, basestring):
2237 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2238 utils.SafeEncode(lvdata))
2239 elif not isinstance(lvdata, dict):
2240 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2242 nimg.volumes = lvdata
2243 nimg.lvm_fail = False
2245 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2246 """Verifies and updates the node instance list.
2248 If the listing was successful, then updates this node's instance
2249 list. Otherwise, it marks the RPC call as failed for the instance
2252 @type ninfo: L{objects.Node}
2253 @param ninfo: the node to check
2254 @param nresult: the remote results for the node
2255 @param nimg: the node image object
2258 idata = nresult.get(constants.NV_INSTANCELIST, None)
2259 test = not isinstance(idata, list)
2260 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2261 " (instancelist): %s", utils.SafeEncode(str(idata)))
2263 nimg.hyp_fail = True
2265 nimg.instances = idata
2267 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2268 """Verifies and computes a node information map
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param nimg: the node image object
2274 @param vg_name: the configured VG name
2278 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2280 # try to read free memory (from the hypervisor)
2281 hv_info = nresult.get(constants.NV_HVINFO, None)
2282 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2283 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2286 nimg.mfree = int(hv_info["memory_free"])
2287 except (ValueError, TypeError):
2288 _ErrorIf(True, self.ENODERPC, node,
2289 "node returned invalid nodeinfo, check hypervisor")
2291 # FIXME: devise a free space model for file based instances as well
2292 if vg_name is not None:
2293 test = (constants.NV_VGLIST not in nresult or
2294 vg_name not in nresult[constants.NV_VGLIST])
2295 _ErrorIf(test, self.ENODELVM, node,
2296 "node didn't return data for the volume group '%s'"
2297 " - it is either missing or broken", vg_name)
2300 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2301 except (ValueError, TypeError):
2302 _ErrorIf(True, self.ENODERPC, node,
2303 "node returned invalid LVM info, check LVM status")
2305 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2306 """Gets per-disk status information for all instances.
2308 @type nodelist: list of strings
2309 @param nodelist: Node names
2310 @type node_image: dict of (name, L{objects.Node})
2311 @param node_image: Node objects
2312 @type instanceinfo: dict of (name, L{objects.Instance})
2313 @param instanceinfo: Instance objects
2314 @rtype: {instance: {node: [(succes, payload)]}}
2315 @return: a dictionary of per-instance dictionaries with nodes as
2316 keys and disk information as values; the disk information is a
2317 list of tuples (success, payload)
2320 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2323 node_disks_devonly = {}
2324 diskless_instances = set()
2325 diskless = constants.DT_DISKLESS
2327 for nname in nodelist:
2328 node_instances = list(itertools.chain(node_image[nname].pinst,
2329 node_image[nname].sinst))
2330 diskless_instances.update(inst for inst in node_instances
2331 if instanceinfo[inst].disk_template == diskless)
2332 disks = [(inst, disk)
2333 for inst in node_instances
2334 for disk in instanceinfo[inst].disks]
2337 # No need to collect data
2340 node_disks[nname] = disks
2342 # Creating copies as SetDiskID below will modify the objects and that can
2343 # lead to incorrect data returned from nodes
2344 devonly = [dev.Copy() for (_, dev) in disks]
2347 self.cfg.SetDiskID(dev, nname)
2349 node_disks_devonly[nname] = devonly
2351 assert len(node_disks) == len(node_disks_devonly)
2353 # Collect data from all nodes with disks
2354 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2357 assert len(result) == len(node_disks)
2361 for (nname, nres) in result.items():
2362 disks = node_disks[nname]
2365 # No data from this node
2366 data = len(disks) * [(False, "node offline")]
2369 _ErrorIf(msg, self.ENODERPC, nname,
2370 "while getting disk information: %s", msg)
2372 # No data from this node
2373 data = len(disks) * [(False, msg)]
2376 for idx, i in enumerate(nres.payload):
2377 if isinstance(i, (tuple, list)) and len(i) == 2:
2380 logging.warning("Invalid result from node %s, entry %d: %s",
2382 data.append((False, "Invalid result from the remote node"))
2384 for ((inst, _), status) in zip(disks, data):
2385 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2387 # Add empty entries for diskless instances.
2388 for inst in diskless_instances:
2389 assert inst not in instdisk
2392 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2393 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2394 compat.all(isinstance(s, (tuple, list)) and
2395 len(s) == 2 for s in statuses)
2396 for inst, nnames in instdisk.items()
2397 for nname, statuses in nnames.items())
2398 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2402 def BuildHooksEnv(self):
2405 Cluster-Verify hooks just ran in the post phase and their failure makes
2406 the output be logged in the verify output and the verification to fail.
2410 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2413 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2414 for node in self.my_node_info.values())
2418 def BuildHooksNodes(self):
2419 """Build hooks nodes.
2422 assert self.my_node_names, ("Node list not gathered,"
2423 " has CheckPrereq been executed?")
2424 return ([], self.my_node_names)
2426 def Exec(self, feedback_fn):
2427 """Verify integrity of the node group, performing various test on nodes.
2430 # This method has too many local variables. pylint: disable-msg=R0914
2432 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2433 verbose = self.op.verbose
2434 self._feedback_fn = feedback_fn
2436 vg_name = self.cfg.GetVGName()
2437 drbd_helper = self.cfg.GetDRBDHelper()
2438 cluster = self.cfg.GetClusterInfo()
2439 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2440 hypervisors = cluster.enabled_hypervisors
2441 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2443 i_non_redundant = [] # Non redundant instances
2444 i_non_a_balanced = [] # Non auto-balanced instances
2445 n_offline = 0 # Count of offline nodes
2446 n_drained = 0 # Count of nodes being drained
2447 node_vol_should = {}
2449 # FIXME: verify OS list
2452 filemap = _ComputeAncillaryFiles(cluster, False)
2454 # do local checksums
2455 master_node = self.master_node = self.cfg.GetMasterNode()
2456 master_ip = self.cfg.GetMasterIP()
2458 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2460 # We will make nodes contact all nodes in their group, and one node from
2461 # every other group.
2462 # TODO: should it be a *random* node, different every time?
2463 online_nodes = [node.name for node in node_data_list if not node.offline]
2464 other_group_nodes = {}
2466 for name in sorted(self.all_node_info):
2467 node = self.all_node_info[name]
2468 if (node.group not in other_group_nodes
2469 and node.group != self.group_uuid
2470 and not node.offline):
2471 other_group_nodes[node.group] = node.name
2473 node_verify_param = {
2474 constants.NV_FILELIST:
2475 utils.UniqueSequence(filename
2476 for files in filemap
2477 for filename in files),
2478 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2479 constants.NV_HYPERVISOR: hypervisors,
2480 constants.NV_HVPARAMS:
2481 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2482 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2483 for node in node_data_list
2484 if not node.offline],
2485 constants.NV_INSTANCELIST: hypervisors,
2486 constants.NV_VERSION: None,
2487 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2488 constants.NV_NODESETUP: None,
2489 constants.NV_TIME: None,
2490 constants.NV_MASTERIP: (master_node, master_ip),
2491 constants.NV_OSLIST: None,
2492 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2495 if vg_name is not None:
2496 node_verify_param[constants.NV_VGLIST] = None
2497 node_verify_param[constants.NV_LVLIST] = vg_name
2498 node_verify_param[constants.NV_PVLIST] = [vg_name]
2499 node_verify_param[constants.NV_DRBDLIST] = None
2502 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2505 # FIXME: this needs to be changed per node-group, not cluster-wide
2507 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2508 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2509 bridges.add(default_nicpp[constants.NIC_LINK])
2510 for instance in self.my_inst_info.values():
2511 for nic in instance.nics:
2512 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2513 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2514 bridges.add(full_nic[constants.NIC_LINK])
2517 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2519 # Build our expected cluster state
2520 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2522 vm_capable=node.vm_capable))
2523 for node in node_data_list)
2527 for node in self.all_node_info.values():
2528 path = _SupportsOob(self.cfg, node)
2529 if path and path not in oob_paths:
2530 oob_paths.append(path)
2533 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2535 for instance in self.my_inst_names:
2536 inst_config = self.my_inst_info[instance]
2538 for nname in inst_config.all_nodes:
2539 if nname not in node_image:
2540 gnode = self.NodeImage(name=nname)
2541 gnode.ghost = (nname not in self.all_node_info)
2542 node_image[nname] = gnode
2544 inst_config.MapLVsByNode(node_vol_should)
2546 pnode = inst_config.primary_node
2547 node_image[pnode].pinst.append(instance)
2549 for snode in inst_config.secondary_nodes:
2550 nimg = node_image[snode]
2551 nimg.sinst.append(instance)
2552 if pnode not in nimg.sbp:
2553 nimg.sbp[pnode] = []
2554 nimg.sbp[pnode].append(instance)
2556 # At this point, we have the in-memory data structures complete,
2557 # except for the runtime information, which we'll gather next
2559 # Due to the way our RPC system works, exact response times cannot be
2560 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2561 # time before and after executing the request, we can at least have a time
2563 nvinfo_starttime = time.time()
2564 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2566 self.cfg.GetClusterName())
2567 nvinfo_endtime = time.time()
2569 if self.extra_lv_nodes and vg_name is not None:
2571 self.rpc.call_node_verify(self.extra_lv_nodes,
2572 {constants.NV_LVLIST: vg_name},
2573 self.cfg.GetClusterName())
2575 extra_lv_nvinfo = {}
2577 all_drbd_map = self.cfg.ComputeDRBDMap()
2579 feedback_fn("* Gathering disk information (%s nodes)" %
2580 len(self.my_node_names))
2581 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2584 feedback_fn("* Verifying configuration file consistency")
2586 # If not all nodes are being checked, we need to make sure the master node
2587 # and a non-checked vm_capable node are in the list.
2588 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2590 vf_nvinfo = all_nvinfo.copy()
2591 vf_node_info = list(self.my_node_info.values())
2592 additional_nodes = []
2593 if master_node not in self.my_node_info:
2594 additional_nodes.append(master_node)
2595 vf_node_info.append(self.all_node_info[master_node])
2596 # Add the first vm_capable node we find which is not included
2597 for node in absent_nodes:
2598 nodeinfo = self.all_node_info[node]
2599 if nodeinfo.vm_capable and not nodeinfo.offline:
2600 additional_nodes.append(node)
2601 vf_node_info.append(self.all_node_info[node])
2603 key = constants.NV_FILELIST
2604 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2605 {key: node_verify_param[key]},
2606 self.cfg.GetClusterName()))
2608 vf_nvinfo = all_nvinfo
2609 vf_node_info = self.my_node_info.values()
2611 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2613 feedback_fn("* Verifying node status")
2617 for node_i in node_data_list:
2619 nimg = node_image[node]
2623 feedback_fn("* Skipping offline node %s" % (node,))
2627 if node == master_node:
2629 elif node_i.master_candidate:
2630 ntype = "master candidate"
2631 elif node_i.drained:
2637 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2639 msg = all_nvinfo[node].fail_msg
2640 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2642 nimg.rpc_fail = True
2645 nresult = all_nvinfo[node].payload
2647 nimg.call_ok = self._VerifyNode(node_i, nresult)
2648 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2649 self._VerifyNodeNetwork(node_i, nresult)
2650 self._VerifyOob(node_i, nresult)
2653 self._VerifyNodeLVM(node_i, nresult, vg_name)
2654 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2657 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2658 self._UpdateNodeInstances(node_i, nresult, nimg)
2659 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2660 self._UpdateNodeOS(node_i, nresult, nimg)
2662 if not nimg.os_fail:
2663 if refos_img is None:
2665 self._VerifyNodeOS(node_i, nimg, refos_img)
2666 self._VerifyNodeBridges(node_i, nresult, bridges)
2668 # Check whether all running instancies are primary for the node. (This
2669 # can no longer be done from _VerifyInstance below, since some of the
2670 # wrong instances could be from other node groups.)
2671 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2673 for inst in non_primary_inst:
2674 test = inst in self.all_inst_info
2675 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2676 "instance should not run on node %s", node_i.name)
2677 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2678 "node is running unknown instance %s", inst)
2680 for node, result in extra_lv_nvinfo.items():
2681 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2682 node_image[node], vg_name)
2684 feedback_fn("* Verifying instance status")
2685 for instance in self.my_inst_names:
2687 feedback_fn("* Verifying instance %s" % instance)
2688 inst_config = self.my_inst_info[instance]
2689 self._VerifyInstance(instance, inst_config, node_image,
2691 inst_nodes_offline = []
2693 pnode = inst_config.primary_node
2694 pnode_img = node_image[pnode]
2695 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2696 self.ENODERPC, pnode, "instance %s, connection to"
2697 " primary node failed", instance)
2699 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2700 self.EINSTANCEBADNODE, instance,
2701 "instance is marked as running and lives on offline node %s",
2702 inst_config.primary_node)
2704 # If the instance is non-redundant we cannot survive losing its primary
2705 # node, so we are not N+1 compliant. On the other hand we have no disk
2706 # templates with more than one secondary so that situation is not well
2708 # FIXME: does not support file-backed instances
2709 if not inst_config.secondary_nodes:
2710 i_non_redundant.append(instance)
2712 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2713 instance, "instance has multiple secondary nodes: %s",
2714 utils.CommaJoin(inst_config.secondary_nodes),
2715 code=self.ETYPE_WARNING)
2717 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2718 pnode = inst_config.primary_node
2719 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720 instance_groups = {}
2722 for node in instance_nodes:
2723 instance_groups.setdefault(self.all_node_info[node].group,
2727 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728 # Sort so that we always list the primary node first.
2729 for group, nodes in sorted(instance_groups.items(),
2730 key=lambda (_, nodes): pnode in nodes,
2733 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2734 instance, "instance has primary and secondary nodes in"
2735 " different groups: %s", utils.CommaJoin(pretty_list),
2736 code=self.ETYPE_WARNING)
2738 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2739 i_non_a_balanced.append(instance)
2741 for snode in inst_config.secondary_nodes:
2742 s_img = node_image[snode]
2743 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2744 "instance %s, connection to secondary node failed", instance)
2747 inst_nodes_offline.append(snode)
2749 # warn that the instance lives on offline nodes
2750 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2751 "instance has offline secondary node(s) %s",
2752 utils.CommaJoin(inst_nodes_offline))
2753 # ... or ghost/non-vm_capable nodes
2754 for node in inst_config.all_nodes:
2755 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2756 "instance lives on ghost node %s", node)
2757 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2758 instance, "instance lives on non-vm_capable node %s", node)
2760 feedback_fn("* Verifying orphan volumes")
2761 reserved = utils.FieldSet(*cluster.reserved_lvs)
2763 # We will get spurious "unknown volume" warnings if any node of this group
2764 # is secondary for an instance whose primary is in another group. To avoid
2765 # them, we find these instances and add their volumes to node_vol_should.
2766 for inst in self.all_inst_info.values():
2767 for secondary in inst.secondary_nodes:
2768 if (secondary in self.my_node_info
2769 and inst.name not in self.my_inst_info):
2770 inst.MapLVsByNode(node_vol_should)
2773 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2775 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2776 feedback_fn("* Verifying N+1 Memory redundancy")
2777 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2779 feedback_fn("* Other Notes")
2781 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2782 % len(i_non_redundant))
2784 if i_non_a_balanced:
2785 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2786 % len(i_non_a_balanced))
2789 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2792 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2796 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2797 """Analyze the post-hooks' result
2799 This method analyses the hook result, handles it, and sends some
2800 nicely-formatted feedback back to the user.
2802 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2803 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2804 @param hooks_results: the results of the multi-node hooks rpc call
2805 @param feedback_fn: function used send feedback back to the caller
2806 @param lu_result: previous Exec result
2807 @return: the new Exec result, based on the previous result
2811 # We only really run POST phase hooks, and are only interested in
2813 if phase == constants.HOOKS_PHASE_POST:
2814 # Used to change hooks' output to proper indentation
2815 feedback_fn("* Hooks Results")
2816 assert hooks_results, "invalid result from hooks"
2818 for node_name in hooks_results:
2819 res = hooks_results[node_name]
2821 test = msg and not res.offline
2822 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2823 "Communication failure in hooks execution: %s", msg)
2824 if res.offline or msg:
2825 # No need to investigate payload if node is offline or gave an error.
2826 # override manually lu_result here as _ErrorIf only
2827 # overrides self.bad
2830 for script, hkr, output in res.payload:
2831 test = hkr == constants.HKR_FAIL
2832 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2833 "Script %s failed, output:", script)
2835 output = self._HOOKS_INDENT_RE.sub(' ', output)
2836 feedback_fn("%s" % output)
2842 class LUClusterVerifyDisks(NoHooksLU):
2843 """Verifies the cluster disks status.
2848 def ExpandNames(self):
2849 self.needed_locks = {
2850 locking.LEVEL_NODE: locking.ALL_SET,
2851 locking.LEVEL_INSTANCE: locking.ALL_SET,
2853 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2855 def Exec(self, feedback_fn):
2856 """Verify integrity of cluster disks.
2858 @rtype: tuple of three items
2859 @return: a tuple of (dict of node-to-node_error, list of instances
2860 which need activate-disks, dict of instance: (node, volume) for
2864 result = res_nodes, res_instances, res_missing = {}, [], {}
2866 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2867 instances = self.cfg.GetAllInstancesInfo().values()
2870 for inst in instances:
2872 if not inst.admin_up:
2874 inst.MapLVsByNode(inst_lvs)
2875 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2876 for node, vol_list in inst_lvs.iteritems():
2877 for vol in vol_list:
2878 nv_dict[(node, vol)] = inst
2883 node_lvs = self.rpc.call_lv_list(nodes, [])
2884 for node, node_res in node_lvs.items():
2885 if node_res.offline:
2887 msg = node_res.fail_msg
2889 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2890 res_nodes[node] = msg
2893 lvs = node_res.payload
2894 for lv_name, (_, _, lv_online) in lvs.items():
2895 inst = nv_dict.pop((node, lv_name), None)
2896 if (not lv_online and inst is not None
2897 and inst.name not in res_instances):
2898 res_instances.append(inst.name)
2900 # any leftover items in nv_dict are missing LVs, let's arrange the
2902 for key, inst in nv_dict.iteritems():
2903 if inst.name not in res_missing:
2904 res_missing[inst.name] = []
2905 res_missing[inst.name].append(key)
2910 class LUClusterRepairDiskSizes(NoHooksLU):
2911 """Verifies the cluster disks sizes.
2916 def ExpandNames(self):
2917 if self.op.instances:
2918 self.wanted_names = _GetWantedInstances(self, self.op.instances)
2919 self.needed_locks = {
2920 locking.LEVEL_NODE: [],
2921 locking.LEVEL_INSTANCE: self.wanted_names,
2923 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2925 self.wanted_names = None
2926 self.needed_locks = {
2927 locking.LEVEL_NODE: locking.ALL_SET,
2928 locking.LEVEL_INSTANCE: locking.ALL_SET,
2930 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2932 def DeclareLocks(self, level):
2933 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2934 self._LockInstancesNodes(primary_only=True)
2936 def CheckPrereq(self):
2937 """Check prerequisites.
2939 This only checks the optional instance list against the existing names.
2942 if self.wanted_names is None:
2943 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2945 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2946 in self.wanted_names]
2948 def _EnsureChildSizes(self, disk):
2949 """Ensure children of the disk have the needed disk size.
2951 This is valid mainly for DRBD8 and fixes an issue where the
2952 children have smaller disk size.
2954 @param disk: an L{ganeti.objects.Disk} object
2957 if disk.dev_type == constants.LD_DRBD8:
2958 assert disk.children, "Empty children for DRBD8?"
2959 fchild = disk.children[0]
2960 mismatch = fchild.size < disk.size
2962 self.LogInfo("Child disk has size %d, parent %d, fixing",
2963 fchild.size, disk.size)
2964 fchild.size = disk.size
2966 # and we recurse on this child only, not on the metadev
2967 return self._EnsureChildSizes(fchild) or mismatch
2971 def Exec(self, feedback_fn):
2972 """Verify the size of cluster disks.
2975 # TODO: check child disks too
2976 # TODO: check differences in size between primary/secondary nodes
2978 for instance in self.wanted_instances:
2979 pnode = instance.primary_node
2980 if pnode not in per_node_disks:
2981 per_node_disks[pnode] = []
2982 for idx, disk in enumerate(instance.disks):
2983 per_node_disks[pnode].append((instance, idx, disk))
2986 for node, dskl in per_node_disks.items():
2987 newl = [v[2].Copy() for v in dskl]
2989 self.cfg.SetDiskID(dsk, node)
2990 result = self.rpc.call_blockdev_getsize(node, newl)
2992 self.LogWarning("Failure in blockdev_getsize call to node"
2993 " %s, ignoring", node)
2995 if len(result.payload) != len(dskl):
2996 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2997 " result.payload=%s", node, len(dskl), result.payload)
2998 self.LogWarning("Invalid result from node %s, ignoring node results",
3001 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3003 self.LogWarning("Disk %d of instance %s did not return size"
3004 " information, ignoring", idx, instance.name)
3006 if not isinstance(size, (int, long)):
3007 self.LogWarning("Disk %d of instance %s did not return valid"
3008 " size information, ignoring", idx, instance.name)
3011 if size != disk.size:
3012 self.LogInfo("Disk %d of instance %s has mismatched size,"
3013 " correcting: recorded %d, actual %d", idx,
3014 instance.name, disk.size, size)
3016 self.cfg.Update(instance, feedback_fn)
3017 changed.append((instance.name, idx, size))
3018 if self._EnsureChildSizes(disk):
3019 self.cfg.Update(instance, feedback_fn)
3020 changed.append((instance.name, idx, disk.size))
3024 class LUClusterRename(LogicalUnit):
3025 """Rename the cluster.
3028 HPATH = "cluster-rename"
3029 HTYPE = constants.HTYPE_CLUSTER
3031 def BuildHooksEnv(self):
3036 "OP_TARGET": self.cfg.GetClusterName(),
3037 "NEW_NAME": self.op.name,
3040 def BuildHooksNodes(self):
3041 """Build hooks nodes.
3044 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3046 def CheckPrereq(self):
3047 """Verify that the passed name is a valid one.
3050 hostname = netutils.GetHostname(name=self.op.name,
3051 family=self.cfg.GetPrimaryIPFamily())
3053 new_name = hostname.name
3054 self.ip = new_ip = hostname.ip
3055 old_name = self.cfg.GetClusterName()
3056 old_ip = self.cfg.GetMasterIP()
3057 if new_name == old_name and new_ip == old_ip:
3058 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3059 " cluster has changed",
3061 if new_ip != old_ip:
3062 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3063 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3064 " reachable on the network" %
3065 new_ip, errors.ECODE_NOTUNIQUE)
3067 self.op.name = new_name
3069 def Exec(self, feedback_fn):
3070 """Rename the cluster.
3073 clustername = self.op.name
3076 # shutdown the master IP
3077 master = self.cfg.GetMasterNode()
3078 result = self.rpc.call_node_stop_master(master, False)
3079 result.Raise("Could not disable the master role")
3082 cluster = self.cfg.GetClusterInfo()
3083 cluster.cluster_name = clustername
3084 cluster.master_ip = ip
3085 self.cfg.Update(cluster, feedback_fn)
3087 # update the known hosts file
3088 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3089 node_list = self.cfg.GetOnlineNodeList()
3091 node_list.remove(master)
3094 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3096 result = self.rpc.call_node_start_master(master, False, False)
3097 msg = result.fail_msg
3099 self.LogWarning("Could not re-enable the master role on"
3100 " the master, please restart manually: %s", msg)
3105 class LUClusterSetParams(LogicalUnit):
3106 """Change the parameters of the cluster.
3109 HPATH = "cluster-modify"
3110 HTYPE = constants.HTYPE_CLUSTER
3113 def CheckArguments(self):
3117 if self.op.uid_pool:
3118 uidpool.CheckUidPool(self.op.uid_pool)
3120 if self.op.add_uids:
3121 uidpool.CheckUidPool(self.op.add_uids)
3123 if self.op.remove_uids:
3124 uidpool.CheckUidPool(self.op.remove_uids)
3126 def ExpandNames(self):
3127 # FIXME: in the future maybe other cluster params won't require checking on
3128 # all nodes to be modified.
3129 self.needed_locks = {
3130 locking.LEVEL_NODE: locking.ALL_SET,
3132 self.share_locks[locking.LEVEL_NODE] = 1
3134 def BuildHooksEnv(self):
3139 "OP_TARGET": self.cfg.GetClusterName(),
3140 "NEW_VG_NAME": self.op.vg_name,
3143 def BuildHooksNodes(self):
3144 """Build hooks nodes.
3147 mn = self.cfg.GetMasterNode()
3150 def CheckPrereq(self):
3151 """Check prerequisites.
3153 This checks whether the given params don't conflict and
3154 if the given volume group is valid.
3157 if self.op.vg_name is not None and not self.op.vg_name:
3158 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3159 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3160 " instances exist", errors.ECODE_INVAL)
3162 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3163 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3164 raise errors.OpPrereqError("Cannot disable drbd helper while"
3165 " drbd-based instances exist",
3168 node_list = self.glm.list_owned(locking.LEVEL_NODE)
3170 # if vg_name not None, checks given volume group on all nodes
3172 vglist = self.rpc.call_vg_list(node_list)
3173 for node in node_list:
3174 msg = vglist[node].fail_msg
3176 # ignoring down node
3177 self.LogWarning("Error while gathering data on node %s"
3178 " (ignoring node): %s", node, msg)
3180 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3182 constants.MIN_VG_SIZE)
3184 raise errors.OpPrereqError("Error on node '%s': %s" %
3185 (node, vgstatus), errors.ECODE_ENVIRON)
3187 if self.op.drbd_helper:
3188 # checks given drbd helper on all nodes
3189 helpers = self.rpc.call_drbd_helper(node_list)
3190 for node in node_list:
3191 ninfo = self.cfg.GetNodeInfo(node)
3193 self.LogInfo("Not checking drbd helper on offline node %s", node)
3195 msg = helpers[node].fail_msg
3197 raise errors.OpPrereqError("Error checking drbd helper on node"
3198 " '%s': %s" % (node, msg),
3199 errors.ECODE_ENVIRON)
3200 node_helper = helpers[node].payload
3201 if node_helper != self.op.drbd_helper:
3202 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3203 (node, node_helper), errors.ECODE_ENVIRON)
3205 self.cluster = cluster = self.cfg.GetClusterInfo()
3206 # validate params changes
3207 if self.op.beparams:
3208 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3209 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3211 if self.op.ndparams:
3212 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3213 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3215 # TODO: we need a more general way to handle resetting
3216 # cluster-level parameters to default values
3217 if self.new_ndparams["oob_program"] == "":
3218 self.new_ndparams["oob_program"] = \
3219 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3221 if self.op.nicparams:
3222 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3223 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3224 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3227 # check all instances for consistency
3228 for instance in self.cfg.GetAllInstancesInfo().values():
3229 for nic_idx, nic in enumerate(instance.nics):
3230 params_copy = copy.deepcopy(nic.nicparams)
3231 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3233 # check parameter syntax
3235 objects.NIC.CheckParameterSyntax(params_filled)
3236 except errors.ConfigurationError, err:
3237 nic_errors.append("Instance %s, nic/%d: %s" %
3238 (instance.name, nic_idx, err))
3240 # if we're moving instances to routed, check that they have an ip
3241 target_mode = params_filled[constants.NIC_MODE]
3242 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3243 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3244 " address" % (instance.name, nic_idx))
3246 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3247 "\n".join(nic_errors))
3249 # hypervisor list/parameters
3250 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3251 if self.op.hvparams:
3252 for hv_name, hv_dict in self.op.hvparams.items():
3253 if hv_name not in self.new_hvparams:
3254 self.new_hvparams[hv_name] = hv_dict
3256 self.new_hvparams[hv_name].update(hv_dict)
3258 # os hypervisor parameters
3259 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3261 for os_name, hvs in self.op.os_hvp.items():
3262 if os_name not in self.new_os_hvp:
3263 self.new_os_hvp[os_name] = hvs
3265 for hv_name, hv_dict in hvs.items():
3266 if hv_name not in self.new_os_hvp[os_name]:
3267 self.new_os_hvp[os_name][hv_name] = hv_dict
3269 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3272 self.new_osp = objects.FillDict(cluster.osparams, {})
3273 if self.op.osparams:
3274 for os_name, osp in self.op.osparams.items():
3275 if os_name not in self.new_osp:
3276 self.new_osp[os_name] = {}
3278 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3281 if not self.new_osp[os_name]:
3282 # we removed all parameters
3283 del self.new_osp[os_name]
3285 # check the parameter validity (remote check)
3286 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3287 os_name, self.new_osp[os_name])
3289 # changes to the hypervisor list
3290 if self.op.enabled_hypervisors is not None:
3291 self.hv_list = self.op.enabled_hypervisors
3292 for hv in self.hv_list:
3293 # if the hypervisor doesn't already exist in the cluster
3294 # hvparams, we initialize it to empty, and then (in both
3295 # cases) we make sure to fill the defaults, as we might not
3296 # have a complete defaults list if the hypervisor wasn't
3298 if hv not in new_hvp:
3300 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3301 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3303 self.hv_list = cluster.enabled_hypervisors
3305 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3306 # either the enabled list has changed, or the parameters have, validate
3307 for hv_name, hv_params in self.new_hvparams.items():
3308 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3309 (self.op.enabled_hypervisors and
3310 hv_name in self.op.enabled_hypervisors)):
3311 # either this is a new hypervisor, or its parameters have changed
3312 hv_class = hypervisor.GetHypervisor(hv_name)
3313 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3314 hv_class.CheckParameterSyntax(hv_params)
3315 _CheckHVParams(self, node_list, hv_name, hv_params)
3318 # no need to check any newly-enabled hypervisors, since the
3319 # defaults have already been checked in the above code-block
3320 for os_name, os_hvp in self.new_os_hvp.items():
3321 for hv_name, hv_params in os_hvp.items():
3322 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3323 # we need to fill in the new os_hvp on top of the actual hv_p
3324 cluster_defaults = self.new_hvparams.get(hv_name, {})
3325 new_osp = objects.FillDict(cluster_defaults, hv_params)
3326 hv_class = hypervisor.GetHypervisor(hv_name)
3327 hv_class.CheckParameterSyntax(new_osp)
3328 _CheckHVParams(self, node_list, hv_name, new_osp)
3330 if self.op.default_iallocator:
3331 alloc_script = utils.FindFile(self.op.default_iallocator,
3332 constants.IALLOCATOR_SEARCH_PATH,
3334 if alloc_script is None:
3335 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3336 " specified" % self.op.default_iallocator,
3339 def Exec(self, feedback_fn):
3340 """Change the parameters of the cluster.
3343 if self.op.vg_name is not None:
3344 new_volume = self.op.vg_name
3347 if new_volume != self.cfg.GetVGName():
3348 self.cfg.SetVGName(new_volume)
3350 feedback_fn("Cluster LVM configuration already in desired"
3351 " state, not changing")
3352 if self.op.drbd_helper is not None:
3353 new_helper = self.op.drbd_helper
3356 if new_helper != self.cfg.GetDRBDHelper():
3357 self.cfg.SetDRBDHelper(new_helper)
3359 feedback_fn("Cluster DRBD helper already in desired state,"
3361 if self.op.hvparams:
3362 self.cluster.hvparams = self.new_hvparams
3364 self.cluster.os_hvp = self.new_os_hvp
3365 if self.op.enabled_hypervisors is not None:
3366 self.cluster.hvparams = self.new_hvparams
3367 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3368 if self.op.beparams:
3369 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3370 if self.op.nicparams:
3371 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3372 if self.op.osparams:
3373 self.cluster.osparams = self.new_osp
3374 if self.op.ndparams:
3375 self.cluster.ndparams = self.new_ndparams
3377 if self.op.candidate_pool_size is not None:
3378 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3379 # we need to update the pool size here, otherwise the save will fail
3380 _AdjustCandidatePool(self, [])
3382 if self.op.maintain_node_health is not None:
3383 self.cluster.maintain_node_health = self.op.maintain_node_health
3385 if self.op.prealloc_wipe_disks is not None:
3386 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3388 if self.op.add_uids is not None:
3389 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3391 if self.op.remove_uids is not None:
3392 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3394 if self.op.uid_pool is not None:
3395 self.cluster.uid_pool = self.op.uid_pool
3397 if self.op.default_iallocator is not None:
3398 self.cluster.default_iallocator = self.op.default_iallocator
3400 if self.op.reserved_lvs is not None:
3401 self.cluster.reserved_lvs = self.op.reserved_lvs
3403 def helper_os(aname, mods, desc):
3405 lst = getattr(self.cluster, aname)
3406 for key, val in mods:
3407 if key == constants.DDM_ADD:
3409 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3412 elif key == constants.DDM_REMOVE:
3416 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3418 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3420 if self.op.hidden_os:
3421 helper_os("hidden_os", self.op.hidden_os, "hidden")
3423 if self.op.blacklisted_os:
3424 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3426 if self.op.master_netdev:
3427 master = self.cfg.GetMasterNode()
3428 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3429 self.cluster.master_netdev)
3430 result = self.rpc.call_node_stop_master(master, False)
3431 result.Raise("Could not disable the master ip")
3432 feedback_fn("Changing master_netdev from %s to %s" %
3433 (self.cluster.master_netdev, self.op.master_netdev))
3434 self.cluster.master_netdev = self.op.master_netdev
3436 self.cfg.Update(self.cluster, feedback_fn)
3438 if self.op.master_netdev:
3439 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3440 self.op.master_netdev)
3441 result = self.rpc.call_node_start_master(master, False, False)
3443 self.LogWarning("Could not re-enable the master ip on"
3444 " the master, please restart manually: %s",
3448 def _UploadHelper(lu, nodes, fname):
3449 """Helper for uploading a file and showing warnings.
3452 if os.path.exists(fname):
3453 result = lu.rpc.call_upload_file(nodes, fname)
3454 for to_node, to_result in result.items():
3455 msg = to_result.fail_msg
3457 msg = ("Copy of file %s to node %s failed: %s" %
3458 (fname, to_node, msg))
3459 lu.proc.LogWarning(msg)
3462 def _ComputeAncillaryFiles(cluster, redist):
3463 """Compute files external to Ganeti which need to be consistent.
3465 @type redist: boolean
3466 @param redist: Whether to include files which need to be redistributed
3469 # Compute files for all nodes
3471 constants.SSH_KNOWN_HOSTS_FILE,
3472 constants.CONFD_HMAC_KEY,
3473 constants.CLUSTER_DOMAIN_SECRET_FILE,
3477 files_all.update(constants.ALL_CERT_FILES)
3478 files_all.update(ssconf.SimpleStore().GetFileList())
3480 if cluster.modify_etc_hosts:
3481 files_all.add(constants.ETC_HOSTS)
3483 # Files which must either exist on all nodes or on none
3484 files_all_opt = set([
3485 constants.RAPI_USERS_FILE,
3488 # Files which should only be on master candidates
3491 files_mc.add(constants.CLUSTER_CONF_FILE)
3493 # Files which should only be on VM-capable nodes
3494 files_vm = set(filename
3495 for hv_name in cluster.enabled_hypervisors
3496 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3498 # Filenames must be unique
3499 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3500 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3501 "Found file listed in more than one file list"
3503 return (files_all, files_all_opt, files_mc, files_vm)
3506 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3507 """Distribute additional files which are part of the cluster configuration.
3509 ConfigWriter takes care of distributing the config and ssconf files, but
3510 there are more files which should be distributed to all nodes. This function
3511 makes sure those are copied.
3513 @param lu: calling logical unit
3514 @param additional_nodes: list of nodes not in the config to distribute to
3515 @type additional_vm: boolean
3516 @param additional_vm: whether the additional nodes are vm-capable or not
3519 # Gather target nodes
3520 cluster = lu.cfg.GetClusterInfo()
3521 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3523 online_nodes = lu.cfg.GetOnlineNodeList()
3524 vm_nodes = lu.cfg.GetVmCapableNodeList()
3526 if additional_nodes is not None:
3527 online_nodes.extend(additional_nodes)
3529 vm_nodes.extend(additional_nodes)
3531 # Never distribute to master node
3532 for nodelist in [online_nodes, vm_nodes]:
3533 if master_info.name in nodelist:
3534 nodelist.remove(master_info.name)
3537 (files_all, files_all_opt, files_mc, files_vm) = \
3538 _ComputeAncillaryFiles(cluster, True)
3540 # Never re-distribute configuration file from here
3541 assert not (constants.CLUSTER_CONF_FILE in files_all or
3542 constants.CLUSTER_CONF_FILE in files_vm)
3543 assert not files_mc, "Master candidates not handled in this function"
3546 (online_nodes, files_all),
3547 (online_nodes, files_all_opt),
3548 (vm_nodes, files_vm),
3552 for (node_list, files) in filemap:
3554 _UploadHelper(lu, node_list, fname)
3557 class LUClusterRedistConf(NoHooksLU):
3558 """Force the redistribution of cluster configuration.
3560 This is a very simple LU.
3565 def ExpandNames(self):
3566 self.needed_locks = {
3567 locking.LEVEL_NODE: locking.ALL_SET,
3569 self.share_locks[locking.LEVEL_NODE] = 1
3571 def Exec(self, feedback_fn):
3572 """Redistribute the configuration.
3575 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3576 _RedistributeAncillaryFiles(self)
3579 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3580 """Sleep and poll for an instance's disk to sync.
3583 if not instance.disks or disks is not None and not disks:
3586 disks = _ExpandCheckDisks(instance, disks)
3589 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3591 node = instance.primary_node
3594 lu.cfg.SetDiskID(dev, node)
3596 # TODO: Convert to utils.Retry
3599 degr_retries = 10 # in seconds, as we sleep 1 second each time
3603 cumul_degraded = False
3604 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3605 msg = rstats.fail_msg
3607 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3610 raise errors.RemoteError("Can't contact node %s for mirror data,"
3611 " aborting." % node)
3614 rstats = rstats.payload
3616 for i, mstat in enumerate(rstats):
3618 lu.LogWarning("Can't compute data for node %s/%s",
3619 node, disks[i].iv_name)
3622 cumul_degraded = (cumul_degraded or
3623 (mstat.is_degraded and mstat.sync_percent is None))
3624 if mstat.sync_percent is not None:
3626 if mstat.estimated_time is not None:
3627 rem_time = ("%s remaining (estimated)" %
3628 utils.FormatSeconds(mstat.estimated_time))
3629 max_time = mstat.estimated_time
3631 rem_time = "no time estimate"
3632 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3633 (disks[i].iv_name, mstat.sync_percent, rem_time))
3635 # if we're done but degraded, let's do a few small retries, to
3636 # make sure we see a stable and not transient situation; therefore
3637 # we force restart of the loop
3638 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3639 logging.info("Degraded disks found, %d retries left", degr_retries)
3647 time.sleep(min(60, max_time))
3650 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3651 return not cumul_degraded
3654 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3655 """Check that mirrors are not degraded.
3657 The ldisk parameter, if True, will change the test from the
3658 is_degraded attribute (which represents overall non-ok status for
3659 the device(s)) to the ldisk (representing the local storage status).
3662 lu.cfg.SetDiskID(dev, node)
3666 if on_primary or dev.AssembleOnSecondary():
3667 rstats = lu.rpc.call_blockdev_find(node, dev)
3668 msg = rstats.fail_msg
3670 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3672 elif not rstats.payload:
3673 lu.LogWarning("Can't find disk on node %s", node)
3677 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3679 result = result and not rstats.payload.is_degraded
3682 for child in dev.children:
3683 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3688 class LUOobCommand(NoHooksLU):
3689 """Logical unit for OOB handling.
3693 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3695 def ExpandNames(self):
3696 """Gather locks we need.
3699 if self.op.node_names:
3700 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3701 lock_names = self.op.node_names
3703 lock_names = locking.ALL_SET
3705 self.needed_locks = {
3706 locking.LEVEL_NODE: lock_names,
3709 def CheckPrereq(self):
3710 """Check prerequisites.
3713 - the node exists in the configuration
3716 Any errors are signaled by raising errors.OpPrereqError.
3720 self.master_node = self.cfg.GetMasterNode()
3722 assert self.op.power_delay >= 0.0
3724 if self.op.node_names:
3725 if (self.op.command in self._SKIP_MASTER and
3726 self.master_node in self.op.node_names):
3727 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3728 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3730 if master_oob_handler:
3731 additional_text = ("run '%s %s %s' if you want to operate on the"
3732 " master regardless") % (master_oob_handler,
3736 additional_text = "it does not support out-of-band operations"
3738 raise errors.OpPrereqError(("Operating on the master node %s is not"
3739 " allowed for %s; %s") %
3740 (self.master_node, self.op.command,
3741 additional_text), errors.ECODE_INVAL)
3743 self.op.node_names = self.cfg.GetNodeList()
3744 if self.op.command in self._SKIP_MASTER:
3745 self.op.node_names.remove(self.master_node)
3747 if self.op.command in self._SKIP_MASTER:
3748 assert self.master_node not in self.op.node_names
3750 for node_name in self.op.node_names:
3751 node = self.cfg.GetNodeInfo(node_name)
3754 raise errors.OpPrereqError("Node %s not found" % node_name,
3757 self.nodes.append(node)
3759 if (not self.op.ignore_status and
3760 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3761 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3762 " not marked offline") % node_name,
3765 def Exec(self, feedback_fn):
3766 """Execute OOB and return result if we expect any.
3769 master_node = self.master_node
3772 for idx, node in enumerate(utils.NiceSort(self.nodes,
3773 key=lambda node: node.name)):
3774 node_entry = [(constants.RS_NORMAL, node.name)]
3775 ret.append(node_entry)
3777 oob_program = _SupportsOob(self.cfg, node)
3780 node_entry.append((constants.RS_UNAVAIL, None))
3783 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3784 self.op.command, oob_program, node.name)
3785 result = self.rpc.call_run_oob(master_node, oob_program,
3786 self.op.command, node.name,
3790 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3791 node.name, result.fail_msg)
3792 node_entry.append((constants.RS_NODATA, None))
3795 self._CheckPayload(result)
3796 except errors.OpExecError, err:
3797 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3799 node_entry.append((constants.RS_NODATA, None))
3801 if self.op.command == constants.OOB_HEALTH:
3802 # For health we should log important events
3803 for item, status in result.payload:
3804 if status in [constants.OOB_STATUS_WARNING,
3805 constants.OOB_STATUS_CRITICAL]:
3806 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3807 item, node.name, status)
3809 if self.op.command == constants.OOB_POWER_ON:
3811 elif self.op.command == constants.OOB_POWER_OFF:
3812 node.powered = False
3813 elif self.op.command == constants.OOB_POWER_STATUS:
3814 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3815 if powered != node.powered:
3816 logging.warning(("Recorded power state (%s) of node '%s' does not"
3817 " match actual power state (%s)"), node.powered,
3820 # For configuration changing commands we should update the node
3821 if self.op.command in (constants.OOB_POWER_ON,
3822 constants.OOB_POWER_OFF):
3823 self.cfg.Update(node, feedback_fn)
3825 node_entry.append((constants.RS_NORMAL, result.payload))
3827 if (self.op.command == constants.OOB_POWER_ON and
3828 idx < len(self.nodes) - 1):
3829 time.sleep(self.op.power_delay)
3833 def _CheckPayload(self, result):
3834 """Checks if the payload is valid.
3836 @param result: RPC result
3837 @raises errors.OpExecError: If payload is not valid
3841 if self.op.command == constants.OOB_HEALTH:
3842 if not isinstance(result.payload, list):
3843 errs.append("command 'health' is expected to return a list but got %s" %
3844 type(result.payload))
3846 for item, status in result.payload:
3847 if status not in constants.OOB_STATUSES:
3848 errs.append("health item '%s' has invalid status '%s'" %
3851 if self.op.command == constants.OOB_POWER_STATUS:
3852 if not isinstance(result.payload, dict):
3853 errs.append("power-status is expected to return a dict but got %s" %
3854 type(result.payload))
3856 if self.op.command in [
3857 constants.OOB_POWER_ON,
3858 constants.OOB_POWER_OFF,
3859 constants.OOB_POWER_CYCLE,
3861 if result.payload is not None:
3862 errs.append("%s is expected to not return payload but got '%s'" %
3863 (self.op.command, result.payload))
3866 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3867 utils.CommaJoin(errs))
3869 class _OsQuery(_QueryBase):
3870 FIELDS = query.OS_FIELDS
3872 def ExpandNames(self, lu):
3873 # Lock all nodes in shared mode
3874 # Temporary removal of locks, should be reverted later
3875 # TODO: reintroduce locks when they are lighter-weight
3876 lu.needed_locks = {}
3877 #self.share_locks[locking.LEVEL_NODE] = 1
3878 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3880 # The following variables interact with _QueryBase._GetNames
3882 self.wanted = self.names
3884 self.wanted = locking.ALL_SET
3886 self.do_locking = self.use_locking
3888 def DeclareLocks(self, lu, level):
3892 def _DiagnoseByOS(rlist):
3893 """Remaps a per-node return list into an a per-os per-node dictionary
3895 @param rlist: a map with node names as keys and OS objects as values
3898 @return: a dictionary with osnames as keys and as value another
3899 map, with nodes as keys and tuples of (path, status, diagnose,
3900 variants, parameters, api_versions) as values, eg::
3902 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3903 (/srv/..., False, "invalid api")],
3904 "node2": [(/srv/..., True, "", [], [])]}
3909 # we build here the list of nodes that didn't fail the RPC (at RPC
3910 # level), so that nodes with a non-responding node daemon don't
3911 # make all OSes invalid
3912 good_nodes = [node_name for node_name in rlist
3913 if not rlist[node_name].fail_msg]
3914 for node_name, nr in rlist.items():
3915 if nr.fail_msg or not nr.payload:
3917 for (name, path, status, diagnose, variants,
3918 params, api_versions) in nr.payload:
3919 if name not in all_os:
3920 # build a list of nodes for this os containing empty lists
3921 # for each node in node_list
3923 for nname in good_nodes:
3924 all_os[name][nname] = []
3925 # convert params from [name, help] to (name, help)
3926 params = [tuple(v) for v in params]
3927 all_os[name][node_name].append((path, status, diagnose,
3928 variants, params, api_versions))
3931 def _GetQueryData(self, lu):
3932 """Computes the list of nodes and their attributes.
3935 # Locking is not used
3936 assert not (compat.any(lu.glm.is_owned(level)
3937 for level in locking.LEVELS
3938 if level != locking.LEVEL_CLUSTER) or
3939 self.do_locking or self.use_locking)
3941 valid_nodes = [node.name
3942 for node in lu.cfg.GetAllNodesInfo().values()
3943 if not node.offline and node.vm_capable]
3944 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3945 cluster = lu.cfg.GetClusterInfo()
3949 for (os_name, os_data) in pol.items():
3950 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3951 hidden=(os_name in cluster.hidden_os),
3952 blacklisted=(os_name in cluster.blacklisted_os))
3956 api_versions = set()
3958 for idx, osl in enumerate(os_data.values()):
3959 info.valid = bool(info.valid and osl and osl[0][1])
3963 (node_variants, node_params, node_api) = osl[0][3:6]
3966 variants.update(node_variants)
3967 parameters.update(node_params)
3968 api_versions.update(node_api)
3970 # Filter out inconsistent values
3971 variants.intersection_update(node_variants)
3972 parameters.intersection_update(node_params)
3973 api_versions.intersection_update(node_api)
3975 info.variants = list(variants)
3976 info.parameters = list(parameters)
3977 info.api_versions = list(api_versions)
3979 data[os_name] = info
3981 # Prepare data in requested order
3982 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3986 class LUOsDiagnose(NoHooksLU):
3987 """Logical unit for OS diagnose/query.
3993 def _BuildFilter(fields, names):
3994 """Builds a filter for querying OSes.
3997 name_filter = qlang.MakeSimpleFilter("name", names)
3999 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4000 # respective field is not requested
4001 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4002 for fname in ["hidden", "blacklisted"]
4003 if fname not in fields]
4004 if "valid" not in fields:
4005 status_filter.append([qlang.OP_TRUE, "valid"])
4008 status_filter.insert(0, qlang.OP_AND)
4010 status_filter = None
4012 if name_filter and status_filter:
4013 return [qlang.OP_AND, name_filter, status_filter]
4017 return status_filter
4019 def CheckArguments(self):
4020 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4021 self.op.output_fields, False)
4023 def ExpandNames(self):
4024 self.oq.ExpandNames(self)
4026 def Exec(self, feedback_fn):
4027 return self.oq.OldStyleQuery(self)
4030 class LUNodeRemove(LogicalUnit):
4031 """Logical unit for removing a node.
4034 HPATH = "node-remove"
4035 HTYPE = constants.HTYPE_NODE
4037 def BuildHooksEnv(self):
4040 This doesn't run on the target node in the pre phase as a failed
4041 node would then be impossible to remove.
4045 "OP_TARGET": self.op.node_name,
4046 "NODE_NAME": self.op.node_name,
4049 def BuildHooksNodes(self):
4050 """Build hooks nodes.
4053 all_nodes = self.cfg.GetNodeList()
4055 all_nodes.remove(self.op.node_name)
4057 logging.warning("Node '%s', which is about to be removed, was not found"
4058 " in the list of all nodes", self.op.node_name)
4059 return (all_nodes, all_nodes)
4061 def CheckPrereq(self):
4062 """Check prerequisites.
4065 - the node exists in the configuration
4066 - it does not have primary or secondary instances
4067 - it's not the master
4069 Any errors are signaled by raising errors.OpPrereqError.
4072 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4073 node = self.cfg.GetNodeInfo(self.op.node_name)
4074 assert node is not None
4076 instance_list = self.cfg.GetInstanceList()
4078 masternode = self.cfg.GetMasterNode()
4079 if node.name == masternode:
4080 raise errors.OpPrereqError("Node is the master node, failover to another"
4081 " node is required", errors.ECODE_INVAL)
4083 for instance_name in instance_list:
4084 instance = self.cfg.GetInstanceInfo(instance_name)
4085 if node.name in instance.all_nodes:
4086 raise errors.OpPrereqError("Instance %s is still running on the node,"
4087 " please remove first" % instance_name,
4089 self.op.node_name = node.name
4092 def Exec(self, feedback_fn):
4093 """Removes the node from the cluster.
4097 logging.info("Stopping the node daemon and removing configs from node %s",
4100 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4102 # Promote nodes to master candidate as needed
4103 _AdjustCandidatePool(self, exceptions=[node.name])
4104 self.context.RemoveNode(node.name)
4106 # Run post hooks on the node before it's removed
4107 _RunPostHook(self, node.name)
4109 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4110 msg = result.fail_msg
4112 self.LogWarning("Errors encountered on the remote node while leaving"
4113 " the cluster: %s", msg)
4115 # Remove node from our /etc/hosts
4116 if self.cfg.GetClusterInfo().modify_etc_hosts:
4117 master_node = self.cfg.GetMasterNode()
4118 result = self.rpc.call_etc_hosts_modify(master_node,
4119 constants.ETC_HOSTS_REMOVE,
4121 result.Raise("Can't update hosts file with new host data")
4122 _RedistributeAncillaryFiles(self)
4125 class _NodeQuery(_QueryBase):
4126 FIELDS = query.NODE_FIELDS
4128 def ExpandNames(self, lu):
4129 lu.needed_locks = {}
4130 lu.share_locks[locking.LEVEL_NODE] = 1
4133 self.wanted = _GetWantedNodes(lu, self.names)
4135 self.wanted = locking.ALL_SET
4137 self.do_locking = (self.use_locking and
4138 query.NQ_LIVE in self.requested_data)
4141 # if we don't request only static fields, we need to lock the nodes
4142 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4144 def DeclareLocks(self, lu, level):
4147 def _GetQueryData(self, lu):
4148 """Computes the list of nodes and their attributes.
4151 all_info = lu.cfg.GetAllNodesInfo()
4153 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4155 # Gather data as requested
4156 if query.NQ_LIVE in self.requested_data:
4157 # filter out non-vm_capable nodes
4158 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4160 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4161 lu.cfg.GetHypervisorType())
4162 live_data = dict((name, nresult.payload)
4163 for (name, nresult) in node_data.items()
4164 if not nresult.fail_msg and nresult.payload)
4168 if query.NQ_INST in self.requested_data:
4169 node_to_primary = dict([(name, set()) for name in nodenames])
4170 node_to_secondary = dict([(name, set()) for name in nodenames])
4172 inst_data = lu.cfg.GetAllInstancesInfo()
4174 for inst in inst_data.values():
4175 if inst.primary_node in node_to_primary:
4176 node_to_primary[inst.primary_node].add(inst.name)
4177 for secnode in inst.secondary_nodes:
4178 if secnode in node_to_secondary:
4179 node_to_secondary[secnode].add(inst.name)
4181 node_to_primary = None
4182 node_to_secondary = None
4184 if query.NQ_OOB in self.requested_data:
4185 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4186 for name, node in all_info.iteritems())
4190 if query.NQ_GROUP in self.requested_data:
4191 groups = lu.cfg.GetAllNodeGroupsInfo()
4195 return query.NodeQueryData([all_info[name] for name in nodenames],
4196 live_data, lu.cfg.GetMasterNode(),
4197 node_to_primary, node_to_secondary, groups,
4198 oob_support, lu.cfg.GetClusterInfo())
4201 class LUNodeQuery(NoHooksLU):
4202 """Logical unit for querying nodes.
4205 # pylint: disable-msg=W0142
4208 def CheckArguments(self):
4209 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4210 self.op.output_fields, self.op.use_locking)
4212 def ExpandNames(self):
4213 self.nq.ExpandNames(self)
4215 def Exec(self, feedback_fn):
4216 return self.nq.OldStyleQuery(self)
4219 class LUNodeQueryvols(NoHooksLU):
4220 """Logical unit for getting volumes on node(s).
4224 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4225 _FIELDS_STATIC = utils.FieldSet("node")
4227 def CheckArguments(self):
4228 _CheckOutputFields(static=self._FIELDS_STATIC,
4229 dynamic=self._FIELDS_DYNAMIC,
4230 selected=self.op.output_fields)
4232 def ExpandNames(self):
4233 self.needed_locks = {}
4234 self.share_locks[locking.LEVEL_NODE] = 1
4235 if not self.op.nodes:
4236 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4238 self.needed_locks[locking.LEVEL_NODE] = \
4239 _GetWantedNodes(self, self.op.nodes)
4241 def Exec(self, feedback_fn):
4242 """Computes the list of nodes and their attributes.
4245 nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4246 volumes = self.rpc.call_node_volumes(nodenames)
4248 ilist = [self.cfg.GetInstanceInfo(iname) for iname
4249 in self.cfg.GetInstanceList()]
4251 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4254 for node in nodenames:
4255 nresult = volumes[node]
4258 msg = nresult.fail_msg
4260 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4263 node_vols = nresult.payload[:]
4264 node_vols.sort(key=lambda vol: vol['dev'])
4266 for vol in node_vols:
4268 for field in self.op.output_fields:
4271 elif field == "phys":
4275 elif field == "name":
4277 elif field == "size":
4278 val = int(float(vol['size']))
4279 elif field == "instance":
4281 if node not in lv_by_node[inst]:
4283 if vol['name'] in lv_by_node[inst][node]:
4289 raise errors.ParameterError(field)
4290 node_output.append(str(val))
4292 output.append(node_output)
4297 class LUNodeQueryStorage(NoHooksLU):
4298 """Logical unit for getting information on storage units on node(s).
4301 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4304 def CheckArguments(self):
4305 _CheckOutputFields(static=self._FIELDS_STATIC,
4306 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4307 selected=self.op.output_fields)
4309 def ExpandNames(self):
4310 self.needed_locks = {}
4311 self.share_locks[locking.LEVEL_NODE] = 1
4314 self.needed_locks[locking.LEVEL_NODE] = \
4315 _GetWantedNodes(self, self.op.nodes)
4317 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4319 def Exec(self, feedback_fn):
4320 """Computes the list of nodes and their attributes.
4323 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4325 # Always get name to sort by
4326 if constants.SF_NAME in self.op.output_fields:
4327 fields = self.op.output_fields[:]
4329 fields = [constants.SF_NAME] + self.op.output_fields
4331 # Never ask for node or type as it's only known to the LU
4332 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4333 while extra in fields:
4334 fields.remove(extra)
4336 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4337 name_idx = field_idx[constants.SF_NAME]
4339 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4340 data = self.rpc.call_storage_list(self.nodes,
4341 self.op.storage_type, st_args,
4342 self.op.name, fields)
4346 for node in utils.NiceSort(self.nodes):
4347 nresult = data[node]
4351 msg = nresult.fail_msg
4353 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4356 rows = dict([(row[name_idx], row) for row in nresult.payload])
4358 for name in utils.NiceSort(rows.keys()):
4363 for field in self.op.output_fields:
4364 if field == constants.SF_NODE:
4366 elif field == constants.SF_TYPE:
4367 val = self.op.storage_type
4368 elif field in field_idx:
4369 val = row[field_idx[field]]
4371 raise errors.ParameterError(field)
4380 class _InstanceQuery(_QueryBase):
4381 FIELDS = query.INSTANCE_FIELDS
4383 def ExpandNames(self, lu):
4384 lu.needed_locks = {}
4385 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4386 lu.share_locks[locking.LEVEL_NODE] = 1
4389 self.wanted = _GetWantedInstances(lu, self.names)
4391 self.wanted = locking.ALL_SET
4393 self.do_locking = (self.use_locking and
4394 query.IQ_LIVE in self.requested_data)
4396 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4397 lu.needed_locks[locking.LEVEL_NODE] = []
4398 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4400 def DeclareLocks(self, lu, level):
4401 if level == locking.LEVEL_NODE and self.do_locking:
4402 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4404 def _GetQueryData(self, lu):
4405 """Computes the list of instances and their attributes.
4408 cluster = lu.cfg.GetClusterInfo()
4409 all_info = lu.cfg.GetAllInstancesInfo()
4411 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4413 instance_list = [all_info[name] for name in instance_names]
4414 nodes = frozenset(itertools.chain(*(inst.all_nodes
4415 for inst in instance_list)))
4416 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4419 wrongnode_inst = set()
4421 # Gather data as requested
4422 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4424 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4426 result = node_data[name]
4428 # offline nodes will be in both lists
4429 assert result.fail_msg
4430 offline_nodes.append(name)
4432 bad_nodes.append(name)
4433 elif result.payload:
4434 for inst in result.payload:
4435 if inst in all_info:
4436 if all_info[inst].primary_node == name:
4437 live_data.update(result.payload)
4439 wrongnode_inst.add(inst)
4441 # orphan instance; we don't list it here as we don't
4442 # handle this case yet in the output of instance listing
4443 logging.warning("Orphan instance '%s' found on node %s",
4445 # else no instance is alive
4449 if query.IQ_DISKUSAGE in self.requested_data:
4450 disk_usage = dict((inst.name,
4451 _ComputeDiskSize(inst.disk_template,
4452 [{constants.IDISK_SIZE: disk.size}
4453 for disk in inst.disks]))
4454 for inst in instance_list)
4458 if query.IQ_CONSOLE in self.requested_data:
4460 for inst in instance_list:
4461 if inst.name in live_data:
4462 # Instance is running
4463 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4465 consinfo[inst.name] = None
4466 assert set(consinfo.keys()) == set(instance_names)
4470 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4471 disk_usage, offline_nodes, bad_nodes,
4472 live_data, wrongnode_inst, consinfo)
4475 class LUQuery(NoHooksLU):
4476 """Query for resources/items of a certain kind.
4479 # pylint: disable-msg=W0142
4482 def CheckArguments(self):
4483 qcls = _GetQueryImplementation(self.op.what)
4485 self.impl = qcls(self.op.filter, self.op.fields, False)
4487 def ExpandNames(self):
4488 self.impl.ExpandNames(self)
4490 def DeclareLocks(self, level):
4491 self.impl.DeclareLocks(self, level)
4493 def Exec(self, feedback_fn):
4494 return self.impl.NewStyleQuery(self)
4497 class LUQueryFields(NoHooksLU):
4498 """Query for resources/items of a certain kind.
4501 # pylint: disable-msg=W0142
4504 def CheckArguments(self):
4505 self.qcls = _GetQueryImplementation(self.op.what)
4507 def ExpandNames(self):
4508 self.needed_locks = {}
4510 def Exec(self, feedback_fn):
4511 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4514 class LUNodeModifyStorage(NoHooksLU):
4515 """Logical unit for modifying a storage volume on a node.
4520 def CheckArguments(self):
4521 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4523 storage_type = self.op.storage_type
4526 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4528 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4529 " modified" % storage_type,
4532 diff = set(self.op.changes.keys()) - modifiable
4534 raise errors.OpPrereqError("The following fields can not be modified for"
4535 " storage units of type '%s': %r" %
4536 (storage_type, list(diff)),
4539 def ExpandNames(self):
4540 self.needed_locks = {
4541 locking.LEVEL_NODE: self.op.node_name,
4544 def Exec(self, feedback_fn):
4545 """Computes the list of nodes and their attributes.
4548 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4549 result = self.rpc.call_storage_modify(self.op.node_name,
4550 self.op.storage_type, st_args,
4551 self.op.name, self.op.changes)
4552 result.Raise("Failed to modify storage unit '%s' on %s" %
4553 (self.op.name, self.op.node_name))
4556 class LUNodeAdd(LogicalUnit):
4557 """Logical unit for adding node to the cluster.
4561 HTYPE = constants.HTYPE_NODE
4562 _NFLAGS = ["master_capable", "vm_capable"]
4564 def CheckArguments(self):
4565 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4566 # validate/normalize the node name
4567 self.hostname = netutils.GetHostname(name=self.op.node_name,
4568 family=self.primary_ip_family)
4569 self.op.node_name = self.hostname.name
4571 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4572 raise errors.OpPrereqError("Cannot readd the master node",
4575 if self.op.readd and self.op.group:
4576 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4577 " being readded", errors.ECODE_INVAL)
4579 def BuildHooksEnv(self):
4582 This will run on all nodes before, and on all nodes + the new node after.
4586 "OP_TARGET": self.op.node_name,
4587 "NODE_NAME": self.op.node_name,
4588 "NODE_PIP": self.op.primary_ip,
4589 "NODE_SIP": self.op.secondary_ip,
4590 "MASTER_CAPABLE": str(self.op.master_capable),
4591 "VM_CAPABLE": str(self.op.vm_capable),
4594 def BuildHooksNodes(self):
4595 """Build hooks nodes.
4598 # Exclude added node
4599 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4600 post_nodes = pre_nodes + [self.op.node_name, ]
4602 return (pre_nodes, post_nodes)
4604 def CheckPrereq(self):
4605 """Check prerequisites.
4608 - the new node is not already in the config
4610 - its parameters (single/dual homed) matches the cluster
4612 Any errors are signaled by raising errors.OpPrereqError.
4616 hostname = self.hostname
4617 node = hostname.name
4618 primary_ip = self.op.primary_ip = hostname.ip
4619 if self.op.secondary_ip is None:
4620 if self.primary_ip_family == netutils.IP6Address.family:
4621 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4622 " IPv4 address must be given as secondary",
4624 self.op.secondary_ip = primary_ip
4626 secondary_ip = self.op.secondary_ip
4627 if not netutils.IP4Address.IsValid(secondary_ip):
4628 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4629 " address" % secondary_ip, errors.ECODE_INVAL)
4631 node_list = cfg.GetNodeList()
4632 if not self.op.readd and node in node_list:
4633 raise errors.OpPrereqError("Node %s is already in the configuration" %
4634 node, errors.ECODE_EXISTS)
4635 elif self.op.readd and node not in node_list:
4636 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4639 self.changed_primary_ip = False
4641 for existing_node_name in node_list:
4642 existing_node = cfg.GetNodeInfo(existing_node_name)
4644 if self.op.readd and node == existing_node_name:
4645 if existing_node.secondary_ip != secondary_ip:
4646 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4647 " address configuration as before",
4649 if existing_node.primary_ip != primary_ip:
4650 self.changed_primary_ip = True
4654 if (existing_node.primary_ip == primary_ip or
4655 existing_node.secondary_ip == primary_ip or
4656 existing_node.primary_ip == secondary_ip or
4657 existing_node.secondary_ip == secondary_ip):
4658 raise errors.OpPrereqError("New node ip address(es) conflict with"
4659 " existing node %s" % existing_node.name,
4660 errors.ECODE_NOTUNIQUE)
4662 # After this 'if' block, None is no longer a valid value for the
4663 # _capable op attributes
4665 old_node = self.cfg.GetNodeInfo(node)
4666 assert old_node is not None, "Can't retrieve locked node %s" % node
4667 for attr in self._NFLAGS:
4668 if getattr(self.op, attr) is None:
4669 setattr(self.op, attr, getattr(old_node, attr))
4671 for attr in self._NFLAGS:
4672 if getattr(self.op, attr) is None:
4673 setattr(self.op, attr, True)
4675 if self.op.readd and not self.op.vm_capable:
4676 pri, sec = cfg.GetNodeInstances(node)
4678 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4679 " flag set to false, but it already holds"
4680 " instances" % node,
4683 # check that the type of the node (single versus dual homed) is the
4684 # same as for the master
4685 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4686 master_singlehomed = myself.secondary_ip == myself.primary_ip
4687 newbie_singlehomed = secondary_ip == primary_ip
4688 if master_singlehomed != newbie_singlehomed:
4689 if master_singlehomed:
4690 raise errors.OpPrereqError("The master has no secondary ip but the"
4691 " new node has one",
4694 raise errors.OpPrereqError("The master has a secondary ip but the"
4695 " new node doesn't have one",
4698 # checks reachability
4699 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4700 raise errors.OpPrereqError("Node not reachable by ping",
4701 errors.ECODE_ENVIRON)
4703 if not newbie_singlehomed:
4704 # check reachability from my secondary ip to newbie's secondary ip
4705 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4706 source=myself.secondary_ip):
4707 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4708 " based ping to node daemon port",
4709 errors.ECODE_ENVIRON)
4716 if self.op.master_capable:
4717 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4719 self.master_candidate = False
4722 self.new_node = old_node
4724 node_group = cfg.LookupNodeGroup(self.op.group)
4725 self.new_node = objects.Node(name=node,
4726 primary_ip=primary_ip,
4727 secondary_ip=secondary_ip,
4728 master_candidate=self.master_candidate,
4729 offline=False, drained=False,
4732 if self.op.ndparams:
4733 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4735 def Exec(self, feedback_fn):
4736 """Adds the new node to the cluster.
4739 new_node = self.new_node
4740 node = new_node.name
4742 # We adding a new node so we assume it's powered
4743 new_node.powered = True
4745 # for re-adds, reset the offline/drained/master-candidate flags;
4746 # we need to reset here, otherwise offline would prevent RPC calls
4747 # later in the procedure; this also means that if the re-add
4748 # fails, we are left with a non-offlined, broken node
4750 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4751 self.LogInfo("Readding a node, the offline/drained flags were reset")
4752 # if we demote the node, we do cleanup later in the procedure
4753 new_node.master_candidate = self.master_candidate
4754 if self.changed_primary_ip:
4755 new_node.primary_ip = self.op.primary_ip
4757 # copy the master/vm_capable flags
4758 for attr in self._NFLAGS:
4759 setattr(new_node, attr, getattr(self.op, attr))
4761 # notify the user about any possible mc promotion
4762 if new_node.master_candidate:
4763 self.LogInfo("Node will be a master candidate")
4765 if self.op.ndparams:
4766 new_node.ndparams = self.op.ndparams
4768 new_node.ndparams = {}
4770 # check connectivity
4771 result = self.rpc.call_version([node])[node]
4772 result.Raise("Can't get version information from node %s" % node)
4773 if constants.PROTOCOL_VERSION == result.payload:
4774 logging.info("Communication to node %s fine, sw version %s match",
4775 node, result.payload)
4777 raise errors.OpExecError("Version mismatch master version %s,"
4778 " node version %s" %
4779 (constants.PROTOCOL_VERSION, result.payload))
4781 # Add node to our /etc/hosts, and add key to known_hosts
4782 if self.cfg.GetClusterInfo().modify_etc_hosts:
4783 master_node = self.cfg.GetMasterNode()
4784 result = self.rpc.call_etc_hosts_modify(master_node,
4785 constants.ETC_HOSTS_ADD,
4788 result.Raise("Can't update hosts file with new host data")
4790 if new_node.secondary_ip != new_node.primary_ip:
4791 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4794 node_verify_list = [self.cfg.GetMasterNode()]
4795 node_verify_param = {
4796 constants.NV_NODELIST: [node],
4797 # TODO: do a node-net-test as well?
4800 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4801 self.cfg.GetClusterName())
4802 for verifier in node_verify_list:
4803 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4804 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4806 for failed in nl_payload:
4807 feedback_fn("ssh/hostname verification failed"
4808 " (checking from %s): %s" %
4809 (verifier, nl_payload[failed]))
4810 raise errors.OpExecError("ssh/hostname verification failed")
4813 _RedistributeAncillaryFiles(self)
4814 self.context.ReaddNode(new_node)
4815 # make sure we redistribute the config
4816 self.cfg.Update(new_node, feedback_fn)
4817 # and make sure the new node will not have old files around
4818 if not new_node.master_candidate:
4819 result = self.rpc.call_node_demote_from_mc(new_node.name)
4820 msg = result.fail_msg
4822 self.LogWarning("Node failed to demote itself from master"
4823 " candidate status: %s" % msg)
4825 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4826 additional_vm=self.op.vm_capable)
4827 self.context.AddNode(new_node, self.proc.GetECId())
4830 class LUNodeSetParams(LogicalUnit):
4831 """Modifies the parameters of a node.
4833 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4834 to the node role (as _ROLE_*)
4835 @cvar _R2F: a dictionary from node role to tuples of flags
4836 @cvar _FLAGS: a list of attribute names corresponding to the flags
4839 HPATH = "node-modify"
4840 HTYPE = constants.HTYPE_NODE
4842 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4844 (True, False, False): _ROLE_CANDIDATE,
4845 (False, True, False): _ROLE_DRAINED,
4846 (False, False, True): _ROLE_OFFLINE,
4847 (False, False, False): _ROLE_REGULAR,
4849 _R2F = dict((v, k) for k, v in _F2R.items())
4850 _FLAGS = ["master_candidate", "drained", "offline"]
4852 def CheckArguments(self):
4853 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4854 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4855 self.op.master_capable, self.op.vm_capable,
4856 self.op.secondary_ip, self.op.ndparams]
4857 if all_mods.count(None) == len(all_mods):
4858 raise errors.OpPrereqError("Please pass at least one modification",
4860 if all_mods.count(True) > 1:
4861 raise errors.OpPrereqError("Can't set the node into more than one"
4862 " state at the same time",
4865 # Boolean value that tells us whether we might be demoting from MC
4866 self.might_demote = (self.op.master_candidate == False or
4867 self.op.offline == True or
4868 self.op.drained == True or
4869 self.op.master_capable == False)
4871 if self.op.secondary_ip:
4872 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4873 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4874 " address" % self.op.secondary_ip,
4877 self.lock_all = self.op.auto_promote and self.might_demote
4878 self.lock_instances = self.op.secondary_ip is not None
4880 def ExpandNames(self):
4882 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4884 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4886 if self.lock_instances:
4887 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4889 def DeclareLocks(self, level):
4890 # If we have locked all instances, before waiting to lock nodes, release
4891 # all the ones living on nodes unrelated to the current operation.
4892 if level == locking.LEVEL_NODE and self.lock_instances:
4893 self.affected_instances = []
4894 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4897 # Build list of instances to release
4898 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4899 instance = self.context.cfg.GetInstanceInfo(instance_name)
4900 if (instance.disk_template in constants.DTS_INT_MIRROR and
4901 self.op.node_name in instance.all_nodes):
4902 instances_keep.append(instance_name)
4903 self.affected_instances.append(instance)
4905 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4907 assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4908 set(instances_keep))
4910 def BuildHooksEnv(self):
4913 This runs on the master node.
4917 "OP_TARGET": self.op.node_name,
4918 "MASTER_CANDIDATE": str(self.op.master_candidate),
4919 "OFFLINE": str(self.op.offline),
4920 "DRAINED": str(self.op.drained),
4921 "MASTER_CAPABLE": str(self.op.master_capable),
4922 "VM_CAPABLE": str(self.op.vm_capable),
4925 def BuildHooksNodes(self):
4926 """Build hooks nodes.
4929 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4932 def CheckPrereq(self):
4933 """Check prerequisites.
4935 This only checks the instance list against the existing names.
4938 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4940 if (self.op.master_candidate is not None or
4941 self.op.drained is not None or
4942 self.op.offline is not None):
4943 # we can't change the master's node flags
4944 if self.op.node_name == self.cfg.GetMasterNode():
4945 raise errors.OpPrereqError("The master role can be changed"
4946 " only via master-failover",
4949 if self.op.master_candidate and not node.master_capable:
4950 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4951 " it a master candidate" % node.name,
4954 if self.op.vm_capable == False:
4955 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4957 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4958 " the vm_capable flag" % node.name,
4961 if node.master_candidate and self.might_demote and not self.lock_all:
4962 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4963 # check if after removing the current node, we're missing master
4965 (mc_remaining, mc_should, _) = \
4966 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4967 if mc_remaining < mc_should:
4968 raise errors.OpPrereqError("Not enough master candidates, please"
4969 " pass auto promote option to allow"
4970 " promotion", errors.ECODE_STATE)
4972 self.old_flags = old_flags = (node.master_candidate,
4973 node.drained, node.offline)
4974 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4975 self.old_role = old_role = self._F2R[old_flags]
4977 # Check for ineffective changes
4978 for attr in self._FLAGS:
4979 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4980 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4981 setattr(self.op, attr, None)
4983 # Past this point, any flag change to False means a transition
4984 # away from the respective state, as only real changes are kept
4986 # TODO: We might query the real power state if it supports OOB
4987 if _SupportsOob(self.cfg, node):
4988 if self.op.offline is False and not (node.powered or
4989 self.op.powered == True):
4990 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4991 " offline status can be reset") %
4993 elif self.op.powered is not None:
4994 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4995 " as it does not support out-of-band"
4996 " handling") % self.op.node_name)
4998 # If we're being deofflined/drained, we'll MC ourself if needed
4999 if (self.op.drained == False or self.op.offline == False or
5000 (self.op.master_capable and not node.master_capable)):
5001 if _DecideSelfPromotion(self):
5002 self.op.master_candidate = True
5003 self.LogInfo("Auto-promoting node to master candidate")
5005 # If we're no longer master capable, we'll demote ourselves from MC
5006 if self.op.master_capable == False and node.master_candidate:
5007 self.LogInfo("Demoting from master candidate")
5008 self.op.master_candidate = False
5011 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5012 if self.op.master_candidate:
5013 new_role = self._ROLE_CANDIDATE
5014 elif self.op.drained:
5015 new_role = self._ROLE_DRAINED
5016 elif self.op.offline:
5017 new_role = self._ROLE_OFFLINE
5018 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5019 # False is still in new flags, which means we're un-setting (the
5021 new_role = self._ROLE_REGULAR
5022 else: # no new flags, nothing, keep old role
5025 self.new_role = new_role
5027 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5028 # Trying to transition out of offline status
5029 result = self.rpc.call_version([node.name])[node.name]
5031 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5032 " to report its version: %s" %
5033 (node.name, result.fail_msg),
5036 self.LogWarning("Transitioning node from offline to online state"
5037 " without using re-add. Please make sure the node"
5040 if self.op.secondary_ip:
5041 # Ok even without locking, because this can't be changed by any LU
5042 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5043 master_singlehomed = master.secondary_ip == master.primary_ip
5044 if master_singlehomed and self.op.secondary_ip:
5045 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5046 " homed cluster", errors.ECODE_INVAL)
5049 if self.affected_instances:
5050 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5051 " node has instances (%s) configured"
5052 " to use it" % self.affected_instances)
5054 # On online nodes, check that no instances are running, and that
5055 # the node has the new ip and we can reach it.
5056 for instance in self.affected_instances:
5057 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5059 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5060 if master.name != node.name:
5061 # check reachability from master secondary ip to new secondary ip
5062 if not netutils.TcpPing(self.op.secondary_ip,
5063 constants.DEFAULT_NODED_PORT,
5064 source=master.secondary_ip):
5065 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5066 " based ping to node daemon port",
5067 errors.ECODE_ENVIRON)
5069 if self.op.ndparams:
5070 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5071 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5072 self.new_ndparams = new_ndparams
5074 def Exec(self, feedback_fn):
5079 old_role = self.old_role
5080 new_role = self.new_role
5084 if self.op.ndparams:
5085 node.ndparams = self.new_ndparams
5087 if self.op.powered is not None:
5088 node.powered = self.op.powered
5090 for attr in ["master_capable", "vm_capable"]:
5091 val = getattr(self.op, attr)
5093 setattr(node, attr, val)
5094 result.append((attr, str(val)))
5096 if new_role != old_role:
5097 # Tell the node to demote itself, if no longer MC and not offline
5098 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5099 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5101 self.LogWarning("Node failed to demote itself: %s", msg)
5103 new_flags = self._R2F[new_role]
5104 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5106 result.append((desc, str(nf)))
5107 (node.master_candidate, node.drained, node.offline) = new_flags
5109 # we locked all nodes, we adjust the CP before updating this node
5111 _AdjustCandidatePool(self, [node.name])
5113 if self.op.secondary_ip:
5114 node.secondary_ip = self.op.secondary_ip
5115 result.append(("secondary_ip", self.op.secondary_ip))
5117 # this will trigger configuration file update, if needed
5118 self.cfg.Update(node, feedback_fn)
5120 # this will trigger job queue propagation or cleanup if the mc
5122 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5123 self.context.ReaddNode(node)
5128 class LUNodePowercycle(NoHooksLU):
5129 """Powercycles a node.
5134 def CheckArguments(self):
5135 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5136 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5137 raise errors.OpPrereqError("The node is the master and the force"
5138 " parameter was not set",
5141 def ExpandNames(self):
5142 """Locking for PowercycleNode.
5144 This is a last-resort option and shouldn't block on other
5145 jobs. Therefore, we grab no locks.
5148 self.needed_locks = {}
5150 def Exec(self, feedback_fn):
5154 result = self.rpc.call_node_powercycle(self.op.node_name,
5155 self.cfg.GetHypervisorType())
5156 result.Raise("Failed to schedule the reboot")
5157 return result.payload
5160 class LUClusterQuery(NoHooksLU):
5161 """Query cluster configuration.
5166 def ExpandNames(self):
5167 self.needed_locks = {}
5169 def Exec(self, feedback_fn):
5170 """Return cluster config.
5173 cluster = self.cfg.GetClusterInfo()
5176 # Filter just for enabled hypervisors
5177 for os_name, hv_dict in cluster.os_hvp.items():
5178 os_hvp[os_name] = {}
5179 for hv_name, hv_params in hv_dict.items():
5180 if hv_name in cluster.enabled_hypervisors:
5181 os_hvp[os_name][hv_name] = hv_params
5183 # Convert ip_family to ip_version
5184 primary_ip_version = constants.IP4_VERSION
5185 if cluster.primary_ip_family == netutils.IP6Address.family:
5186 primary_ip_version = constants.IP6_VERSION
5189 "software_version": constants.RELEASE_VERSION,
5190 "protocol_version": constants.PROTOCOL_VERSION,
5191 "config_version": constants.CONFIG_VERSION,
5192 "os_api_version": max(constants.OS_API_VERSIONS),
5193 "export_version": constants.EXPORT_VERSION,
5194 "architecture": (platform.architecture()[0], platform.machine()),
5195 "name": cluster.cluster_name,
5196 "master": cluster.master_node,
5197 "default_hypervisor": cluster.enabled_hypervisors[0],
5198 "enabled_hypervisors": cluster.enabled_hypervisors,
5199 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5200 for hypervisor_name in cluster.enabled_hypervisors]),
5202 "beparams": cluster.beparams,
5203 "osparams": cluster.osparams,
5204 "nicparams": cluster.nicparams,
5205 "ndparams": cluster.ndparams,
5206 "candidate_pool_size": cluster.candidate_pool_size,
5207 "master_netdev": cluster.master_netdev,
5208 "volume_group_name": cluster.volume_group_name,
5209 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5210 "file_storage_dir": cluster.file_storage_dir,
5211 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5212 "maintain_node_health": cluster.maintain_node_health,
5213 "ctime": cluster.ctime,
5214 "mtime": cluster.mtime,
5215 "uuid": cluster.uuid,
5216 "tags": list(cluster.GetTags()),
5217 "uid_pool": cluster.uid_pool,
5218 "default_iallocator": cluster.default_iallocator,
5219 "reserved_lvs": cluster.reserved_lvs,
5220 "primary_ip_version": primary_ip_version,
5221 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5222 "hidden_os": cluster.hidden_os,
5223 "blacklisted_os": cluster.blacklisted_os,
5229 class LUClusterConfigQuery(NoHooksLU):
5230 """Return configuration values.
5234 _FIELDS_DYNAMIC = utils.FieldSet()
5235 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5236 "watcher_pause", "volume_group_name")
5238 def CheckArguments(self):
5239 _CheckOutputFields(static=self._FIELDS_STATIC,
5240 dynamic=self._FIELDS_DYNAMIC,
5241 selected=self.op.output_fields)
5243 def ExpandNames(self):
5244 self.needed_locks = {}
5246 def Exec(self, feedback_fn):
5247 """Dump a representation of the cluster config to the standard output.
5251 for field in self.op.output_fields:
5252 if field == "cluster_name":
5253 entry = self.cfg.GetClusterName()
5254 elif field == "master_node":
5255 entry = self.cfg.GetMasterNode()
5256 elif field == "drain_flag":
5257 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5258 elif field == "watcher_pause":
5259 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5260 elif field == "volume_group_name":
5261 entry = self.cfg.GetVGName()
5263 raise errors.ParameterError(field)
5264 values.append(entry)
5268 class LUInstanceActivateDisks(NoHooksLU):
5269 """Bring up an instance's disks.
5274 def ExpandNames(self):
5275 self._ExpandAndLockInstance()
5276 self.needed_locks[locking.LEVEL_NODE] = []
5277 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5279 def DeclareLocks(self, level):
5280 if level == locking.LEVEL_NODE:
5281 self._LockInstancesNodes()
5283 def CheckPrereq(self):
5284 """Check prerequisites.
5286 This checks that the instance is in the cluster.
5289 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290 assert self.instance is not None, \
5291 "Cannot retrieve locked instance %s" % self.op.instance_name
5292 _CheckNodeOnline(self, self.instance.primary_node)
5294 def Exec(self, feedback_fn):
5295 """Activate the disks.
5298 disks_ok, disks_info = \
5299 _AssembleInstanceDisks(self, self.instance,
5300 ignore_size=self.op.ignore_size)
5302 raise errors.OpExecError("Cannot activate block devices")
5307 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5309 """Prepare the block devices for an instance.
5311 This sets up the block devices on all nodes.
5313 @type lu: L{LogicalUnit}
5314 @param lu: the logical unit on whose behalf we execute
5315 @type instance: L{objects.Instance}
5316 @param instance: the instance for whose disks we assemble
5317 @type disks: list of L{objects.Disk} or None
5318 @param disks: which disks to assemble (or all, if None)
5319 @type ignore_secondaries: boolean
5320 @param ignore_secondaries: if true, errors on secondary nodes
5321 won't result in an error return from the function
5322 @type ignore_size: boolean
5323 @param ignore_size: if true, the current known size of the disk
5324 will not be used during the disk activation, useful for cases
5325 when the size is wrong
5326 @return: False if the operation failed, otherwise a list of
5327 (host, instance_visible_name, node_visible_name)
5328 with the mapping from node devices to instance devices
5333 iname = instance.name
5334 disks = _ExpandCheckDisks(instance, disks)
5336 # With the two passes mechanism we try to reduce the window of
5337 # opportunity for the race condition of switching DRBD to primary
5338 # before handshaking occured, but we do not eliminate it
5340 # The proper fix would be to wait (with some limits) until the
5341 # connection has been made and drbd transitions from WFConnection
5342 # into any other network-connected state (Connected, SyncTarget,
5345 # 1st pass, assemble on all nodes in secondary mode
5346 for idx, inst_disk in enumerate(disks):
5347 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5349 node_disk = node_disk.Copy()
5350 node_disk.UnsetSize()
5351 lu.cfg.SetDiskID(node_disk, node)
5352 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5353 msg = result.fail_msg
5355 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5356 " (is_primary=False, pass=1): %s",
5357 inst_disk.iv_name, node, msg)
5358 if not ignore_secondaries:
5361 # FIXME: race condition on drbd migration to primary
5363 # 2nd pass, do only the primary node
5364 for idx, inst_disk in enumerate(disks):
5367 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5368 if node != instance.primary_node:
5371 node_disk = node_disk.Copy()
5372 node_disk.UnsetSize()
5373 lu.cfg.SetDiskID(node_disk, node)
5374 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5375 msg = result.fail_msg
5377 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5378 " (is_primary=True, pass=2): %s",
5379 inst_disk.iv_name, node, msg)
5382 dev_path = result.payload
5384 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5386 # leave the disks configured for the primary node
5387 # this is a workaround that would be fixed better by
5388 # improving the logical/physical id handling
5390 lu.cfg.SetDiskID(disk, instance.primary_node)
5392 return disks_ok, device_info
5395 def _StartInstanceDisks(lu, instance, force):
5396 """Start the disks of an instance.
5399 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5400 ignore_secondaries=force)
5402 _ShutdownInstanceDisks(lu, instance)
5403 if force is not None and not force:
5404 lu.proc.LogWarning("", hint="If the message above refers to a"
5406 " you can retry the operation using '--force'.")
5407 raise errors.OpExecError("Disk consistency error")
5410 class LUInstanceDeactivateDisks(NoHooksLU):
5411 """Shutdown an instance's disks.
5416 def ExpandNames(self):
5417 self._ExpandAndLockInstance()
5418 self.needed_locks[locking.LEVEL_NODE] = []
5419 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5421 def DeclareLocks(self, level):
5422 if level == locking.LEVEL_NODE:
5423 self._LockInstancesNodes()
5425 def CheckPrereq(self):
5426 """Check prerequisites.
5428 This checks that the instance is in the cluster.
5431 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5432 assert self.instance is not None, \
5433 "Cannot retrieve locked instance %s" % self.op.instance_name
5435 def Exec(self, feedback_fn):
5436 """Deactivate the disks
5439 instance = self.instance
5441 _ShutdownInstanceDisks(self, instance)
5443 _SafeShutdownInstanceDisks(self, instance)
5446 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5447 """Shutdown block devices of an instance.
5449 This function checks if an instance is running, before calling
5450 _ShutdownInstanceDisks.
5453 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5454 _ShutdownInstanceDisks(lu, instance, disks=disks)
5457 def _ExpandCheckDisks(instance, disks):
5458 """Return the instance disks selected by the disks list
5460 @type disks: list of L{objects.Disk} or None
5461 @param disks: selected disks
5462 @rtype: list of L{objects.Disk}
5463 @return: selected instance disks to act on
5467 return instance.disks
5469 if not set(disks).issubset(instance.disks):
5470 raise errors.ProgrammerError("Can only act on disks belonging to the"
5475 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5476 """Shutdown block devices of an instance.
5478 This does the shutdown on all nodes of the instance.
5480 If the ignore_primary is false, errors on the primary node are
5485 disks = _ExpandCheckDisks(instance, disks)
5488 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5489 lu.cfg.SetDiskID(top_disk, node)
5490 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5491 msg = result.fail_msg
5493 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5494 disk.iv_name, node, msg)
5495 if ((node == instance.primary_node and not ignore_primary) or
5496 (node != instance.primary_node and not result.offline)):
5501 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5502 """Checks if a node has enough free memory.
5504 This function check if a given node has the needed amount of free
5505 memory. In case the node has less memory or we cannot get the
5506 information from the node, this function raise an OpPrereqError
5509 @type lu: C{LogicalUnit}
5510 @param lu: a logical unit from which we get configuration data
5512 @param node: the node to check
5513 @type reason: C{str}
5514 @param reason: string to use in the error message
5515 @type requested: C{int}
5516 @param requested: the amount of memory in MiB to check for
5517 @type hypervisor_name: C{str}
5518 @param hypervisor_name: the hypervisor to ask for memory stats
5519 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5520 we cannot check the node
5523 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5524 nodeinfo[node].Raise("Can't get data from node %s" % node,
5525 prereq=True, ecode=errors.ECODE_ENVIRON)
5526 free_mem = nodeinfo[node].payload.get('memory_free', None)
5527 if not isinstance(free_mem, int):
5528 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5529 " was '%s'" % (node, free_mem),
5530 errors.ECODE_ENVIRON)
5531 if requested > free_mem:
5532 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5533 " needed %s MiB, available %s MiB" %
5534 (node, reason, requested, free_mem),
5538 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5539 """Checks if nodes have enough free disk space in the all VGs.
5541 This function check if all given nodes have the needed amount of
5542 free disk. In case any node has less disk or we cannot get the
5543 information from the node, this function raise an OpPrereqError
5546 @type lu: C{LogicalUnit}
5547 @param lu: a logical unit from which we get configuration data
5548 @type nodenames: C{list}
5549 @param nodenames: the list of node names to check
5550 @type req_sizes: C{dict}
5551 @param req_sizes: the hash of vg and corresponding amount of disk in
5553 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5554 or we cannot check the node
5557 for vg, req_size in req_sizes.items():
5558 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5561 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5562 """Checks if nodes have enough free disk space in the specified VG.
5564 This function check if all given nodes have the needed amount of
5565 free disk. In case any node has less disk or we cannot get the
5566 information from the node, this function raise an OpPrereqError
5569 @type lu: C{LogicalUnit}
5570 @param lu: a logical unit from which we get configuration data
5571 @type nodenames: C{list}
5572 @param nodenames: the list of node names to check
5574 @param vg: the volume group to check
5575 @type requested: C{int}
5576 @param requested: the amount of disk in MiB to check for
5577 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5578 or we cannot check the node
5581 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5582 for node in nodenames:
5583 info = nodeinfo[node]
5584 info.Raise("Cannot get current information from node %s" % node,
5585 prereq=True, ecode=errors.ECODE_ENVIRON)
5586 vg_free = info.payload.get("vg_free", None)
5587 if not isinstance(vg_free, int):
5588 raise errors.OpPrereqError("Can't compute free disk space on node"
5589 " %s for vg %s, result was '%s'" %
5590 (node, vg, vg_free), errors.ECODE_ENVIRON)
5591 if requested > vg_free:
5592 raise errors.OpPrereqError("Not enough disk space on target node %s"
5593 " vg %s: required %d MiB, available %d MiB" %
5594 (node, vg, requested, vg_free),
5598 class LUInstanceStartup(LogicalUnit):
5599 """Starts an instance.
5602 HPATH = "instance-start"
5603 HTYPE = constants.HTYPE_INSTANCE
5606 def CheckArguments(self):
5608 if self.op.beparams:
5609 # fill the beparams dict
5610 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5612 def ExpandNames(self):
5613 self._ExpandAndLockInstance()
5615 def BuildHooksEnv(self):
5618 This runs on master, primary and secondary nodes of the instance.
5622 "FORCE": self.op.force,
5625 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5629 def BuildHooksNodes(self):
5630 """Build hooks nodes.
5633 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5636 def CheckPrereq(self):
5637 """Check prerequisites.
5639 This checks that the instance is in the cluster.
5642 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643 assert self.instance is not None, \
5644 "Cannot retrieve locked instance %s" % self.op.instance_name
5647 if self.op.hvparams:
5648 # check hypervisor parameter syntax (locally)
5649 cluster = self.cfg.GetClusterInfo()
5650 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5651 filled_hvp = cluster.FillHV(instance)
5652 filled_hvp.update(self.op.hvparams)
5653 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5654 hv_type.CheckParameterSyntax(filled_hvp)
5655 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5657 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5659 if self.primary_offline and self.op.ignore_offline_nodes:
5660 self.proc.LogWarning("Ignoring offline primary node")
5662 if self.op.hvparams or self.op.beparams:
5663 self.proc.LogWarning("Overridden parameters are ignored")
5665 _CheckNodeOnline(self, instance.primary_node)
5667 bep = self.cfg.GetClusterInfo().FillBE(instance)
5669 # check bridges existence
5670 _CheckInstanceBridgesExist(self, instance)
5672 remote_info = self.rpc.call_instance_info(instance.primary_node,
5674 instance.hypervisor)
5675 remote_info.Raise("Error checking node %s" % instance.primary_node,
5676 prereq=True, ecode=errors.ECODE_ENVIRON)
5677 if not remote_info.payload: # not running already
5678 _CheckNodeFreeMemory(self, instance.primary_node,
5679 "starting instance %s" % instance.name,
5680 bep[constants.BE_MEMORY], instance.hypervisor)
5682 def Exec(self, feedback_fn):
5683 """Start the instance.
5686 instance = self.instance
5687 force = self.op.force
5689 if not self.op.no_remember:
5690 self.cfg.MarkInstanceUp(instance.name)
5692 if self.primary_offline:
5693 assert self.op.ignore_offline_nodes
5694 self.proc.LogInfo("Primary node offline, marked instance as started")
5696 node_current = instance.primary_node
5698 _StartInstanceDisks(self, instance, force)
5700 result = self.rpc.call_instance_start(node_current, instance,
5701 self.op.hvparams, self.op.beparams)
5702 msg = result.fail_msg
5704 _ShutdownInstanceDisks(self, instance)
5705 raise errors.OpExecError("Could not start instance: %s" % msg)
5708 class LUInstanceReboot(LogicalUnit):
5709 """Reboot an instance.
5712 HPATH = "instance-reboot"
5713 HTYPE = constants.HTYPE_INSTANCE
5716 def ExpandNames(self):
5717 self._ExpandAndLockInstance()
5719 def BuildHooksEnv(self):
5722 This runs on master, primary and secondary nodes of the instance.
5726 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5727 "REBOOT_TYPE": self.op.reboot_type,
5728 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5731 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5735 def BuildHooksNodes(self):
5736 """Build hooks nodes.
5739 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5742 def CheckPrereq(self):
5743 """Check prerequisites.
5745 This checks that the instance is in the cluster.
5748 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5749 assert self.instance is not None, \
5750 "Cannot retrieve locked instance %s" % self.op.instance_name
5752 _CheckNodeOnline(self, instance.primary_node)
5754 # check bridges existence
5755 _CheckInstanceBridgesExist(self, instance)
5757 def Exec(self, feedback_fn):
5758 """Reboot the instance.
5761 instance = self.instance
5762 ignore_secondaries = self.op.ignore_secondaries
5763 reboot_type = self.op.reboot_type
5765 remote_info = self.rpc.call_instance_info(instance.primary_node,
5767 instance.hypervisor)
5768 remote_info.Raise("Error checking node %s" % instance.primary_node)
5769 instance_running = bool(remote_info.payload)
5771 node_current = instance.primary_node
5773 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5774 constants.INSTANCE_REBOOT_HARD]:
5775 for disk in instance.disks:
5776 self.cfg.SetDiskID(disk, node_current)
5777 result = self.rpc.call_instance_reboot(node_current, instance,
5779 self.op.shutdown_timeout)
5780 result.Raise("Could not reboot instance")
5782 if instance_running:
5783 result = self.rpc.call_instance_shutdown(node_current, instance,
5784 self.op.shutdown_timeout)
5785 result.Raise("Could not shutdown instance for full reboot")
5786 _ShutdownInstanceDisks(self, instance)
5788 self.LogInfo("Instance %s was already stopped, starting now",
5790 _StartInstanceDisks(self, instance, ignore_secondaries)
5791 result = self.rpc.call_instance_start(node_current, instance, None, None)
5792 msg = result.fail_msg
5794 _ShutdownInstanceDisks(self, instance)
5795 raise errors.OpExecError("Could not start instance for"
5796 " full reboot: %s" % msg)
5798 self.cfg.MarkInstanceUp(instance.name)
5801 class LUInstanceShutdown(LogicalUnit):
5802 """Shutdown an instance.
5805 HPATH = "instance-stop"
5806 HTYPE = constants.HTYPE_INSTANCE
5809 def ExpandNames(self):
5810 self._ExpandAndLockInstance()
5812 def BuildHooksEnv(self):
5815 This runs on master, primary and secondary nodes of the instance.
5818 env = _BuildInstanceHookEnvByObject(self, self.instance)
5819 env["TIMEOUT"] = self.op.timeout
5822 def BuildHooksNodes(self):
5823 """Build hooks nodes.
5826 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5829 def CheckPrereq(self):
5830 """Check prerequisites.
5832 This checks that the instance is in the cluster.
5835 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5836 assert self.instance is not None, \
5837 "Cannot retrieve locked instance %s" % self.op.instance_name
5839 self.primary_offline = \
5840 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5842 if self.primary_offline and self.op.ignore_offline_nodes:
5843 self.proc.LogWarning("Ignoring offline primary node")
5845 _CheckNodeOnline(self, self.instance.primary_node)
5847 def Exec(self, feedback_fn):
5848 """Shutdown the instance.
5851 instance = self.instance
5852 node_current = instance.primary_node
5853 timeout = self.op.timeout
5855 if not self.op.no_remember:
5856 self.cfg.MarkInstanceDown(instance.name)
5858 if self.primary_offline:
5859 assert self.op.ignore_offline_nodes
5860 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5862 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5863 msg = result.fail_msg
5865 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5867 _ShutdownInstanceDisks(self, instance)
5870 class LUInstanceReinstall(LogicalUnit):
5871 """Reinstall an instance.
5874 HPATH = "instance-reinstall"
5875 HTYPE = constants.HTYPE_INSTANCE
5878 def ExpandNames(self):
5879 self._ExpandAndLockInstance()
5881 def BuildHooksEnv(self):
5884 This runs on master, primary and secondary nodes of the instance.
5887 return _BuildInstanceHookEnvByObject(self, self.instance)
5889 def BuildHooksNodes(self):
5890 """Build hooks nodes.
5893 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5896 def CheckPrereq(self):
5897 """Check prerequisites.
5899 This checks that the instance is in the cluster and is not running.
5902 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5903 assert instance is not None, \
5904 "Cannot retrieve locked instance %s" % self.op.instance_name
5905 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5906 " offline, cannot reinstall")
5907 for node in instance.secondary_nodes:
5908 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5909 " cannot reinstall")
5911 if instance.disk_template == constants.DT_DISKLESS:
5912 raise errors.OpPrereqError("Instance '%s' has no disks" %
5913 self.op.instance_name,
5915 _CheckInstanceDown(self, instance, "cannot reinstall")
5917 if self.op.os_type is not None:
5919 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5920 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5921 instance_os = self.op.os_type
5923 instance_os = instance.os
5925 nodelist = list(instance.all_nodes)
5927 if self.op.osparams:
5928 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5929 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5930 self.os_inst = i_osdict # the new dict (without defaults)
5934 self.instance = instance
5936 def Exec(self, feedback_fn):
5937 """Reinstall the instance.
5940 inst = self.instance
5942 if self.op.os_type is not None:
5943 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5944 inst.os = self.op.os_type
5945 # Write to configuration
5946 self.cfg.Update(inst, feedback_fn)
5948 _StartInstanceDisks(self, inst, None)
5950 feedback_fn("Running the instance OS create scripts...")
5951 # FIXME: pass debug option from opcode to backend
5952 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5953 self.op.debug_level,
5954 osparams=self.os_inst)
5955 result.Raise("Could not install OS for instance %s on node %s" %
5956 (inst.name, inst.primary_node))
5958 _ShutdownInstanceDisks(self, inst)
5961 class LUInstanceRecreateDisks(LogicalUnit):
5962 """Recreate an instance's missing disks.
5965 HPATH = "instance-recreate-disks"
5966 HTYPE = constants.HTYPE_INSTANCE
5969 def CheckArguments(self):
5970 # normalise the disk list
5971 self.op.disks = sorted(frozenset(self.op.disks))
5973 def ExpandNames(self):
5974 self._ExpandAndLockInstance()
5975 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5977 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5978 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5980 self.needed_locks[locking.LEVEL_NODE] = []
5982 def DeclareLocks(self, level):
5983 if level == locking.LEVEL_NODE:
5984 # if we replace the nodes, we only need to lock the old primary,
5985 # otherwise we need to lock all nodes for disk re-creation
5986 primary_only = bool(self.op.nodes)
5987 self._LockInstancesNodes(primary_only=primary_only)
5989 def BuildHooksEnv(self):
5992 This runs on master, primary and secondary nodes of the instance.
5995 return _BuildInstanceHookEnvByObject(self, self.instance)
5997 def BuildHooksNodes(self):
5998 """Build hooks nodes.
6001 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6004 def CheckPrereq(self):
6005 """Check prerequisites.
6007 This checks that the instance is in the cluster and is not running.
6010 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6011 assert instance is not None, \
6012 "Cannot retrieve locked instance %s" % self.op.instance_name
6014 if len(self.op.nodes) != len(instance.all_nodes):
6015 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6016 " %d replacement nodes were specified" %
6017 (instance.name, len(instance.all_nodes),
6018 len(self.op.nodes)),
6020 assert instance.disk_template != constants.DT_DRBD8 or \
6021 len(self.op.nodes) == 2
6022 assert instance.disk_template != constants.DT_PLAIN or \
6023 len(self.op.nodes) == 1
6024 primary_node = self.op.nodes[0]
6026 primary_node = instance.primary_node
6027 _CheckNodeOnline(self, primary_node)
6029 if instance.disk_template == constants.DT_DISKLESS:
6030 raise errors.OpPrereqError("Instance '%s' has no disks" %
6031 self.op.instance_name, errors.ECODE_INVAL)
6032 # if we replace nodes *and* the old primary is offline, we don't
6034 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6035 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6036 if not (self.op.nodes and old_pnode.offline):
6037 _CheckInstanceDown(self, instance, "cannot recreate disks")
6039 if not self.op.disks:
6040 self.op.disks = range(len(instance.disks))
6042 for idx in self.op.disks:
6043 if idx >= len(instance.disks):
6044 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6046 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6047 raise errors.OpPrereqError("Can't recreate disks partially and"
6048 " change the nodes at the same time",
6050 self.instance = instance
6052 def Exec(self, feedback_fn):
6053 """Recreate the disks.
6056 # change primary node, if needed
6058 self.instance.primary_node = self.op.nodes[0]
6059 self.LogWarning("Changing the instance's nodes, you will have to"
6060 " remove any disks left on the older nodes manually")
6063 for idx, disk in enumerate(self.instance.disks):
6064 if idx not in self.op.disks: # disk idx has not been passed in
6067 # update secondaries for disks, if needed
6069 if disk.dev_type == constants.LD_DRBD8:
6070 # need to update the nodes
6071 assert len(self.op.nodes) == 2
6072 logical_id = list(disk.logical_id)
6073 logical_id[0] = self.op.nodes[0]
6074 logical_id[1] = self.op.nodes[1]
6075 disk.logical_id = tuple(logical_id)
6078 self.cfg.Update(self.instance, feedback_fn)
6080 _CreateDisks(self, self.instance, to_skip=to_skip)
6083 class LUInstanceRename(LogicalUnit):
6084 """Rename an instance.
6087 HPATH = "instance-rename"
6088 HTYPE = constants.HTYPE_INSTANCE
6090 def CheckArguments(self):
6094 if self.op.ip_check and not self.op.name_check:
6095 # TODO: make the ip check more flexible and not depend on the name check
6096 raise errors.OpPrereqError("IP address check requires a name check",
6099 def BuildHooksEnv(self):
6102 This runs on master, primary and secondary nodes of the instance.
6105 env = _BuildInstanceHookEnvByObject(self, self.instance)
6106 env["INSTANCE_NEW_NAME"] = self.op.new_name
6109 def BuildHooksNodes(self):
6110 """Build hooks nodes.
6113 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6116 def CheckPrereq(self):
6117 """Check prerequisites.
6119 This checks that the instance is in the cluster and is not running.
6122 self.op.instance_name = _ExpandInstanceName(self.cfg,
6123 self.op.instance_name)
6124 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6125 assert instance is not None
6126 _CheckNodeOnline(self, instance.primary_node)
6127 _CheckInstanceDown(self, instance, "cannot rename")
6128 self.instance = instance
6130 new_name = self.op.new_name
6131 if self.op.name_check:
6132 hostname = netutils.GetHostname(name=new_name)
6133 if hostname != new_name:
6134 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6136 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6137 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6138 " same as given hostname '%s'") %
6139 (hostname.name, self.op.new_name),
6141 new_name = self.op.new_name = hostname.name
6142 if (self.op.ip_check and
6143 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6144 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6145 (hostname.ip, new_name),
6146 errors.ECODE_NOTUNIQUE)
6148 instance_list = self.cfg.GetInstanceList()
6149 if new_name in instance_list and new_name != instance.name:
6150 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6151 new_name, errors.ECODE_EXISTS)
6153 def Exec(self, feedback_fn):
6154 """Rename the instance.
6157 inst = self.instance
6158 old_name = inst.name
6160 rename_file_storage = False
6161 if (inst.disk_template in constants.DTS_FILEBASED and
6162 self.op.new_name != inst.name):
6163 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6164 rename_file_storage = True
6166 self.cfg.RenameInstance(inst.name, self.op.new_name)
6167 # Change the instance lock. This is definitely safe while we hold the BGL.
6168 # Otherwise the new lock would have to be added in acquired mode.
6170 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6171 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6173 # re-read the instance from the configuration after rename
6174 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6176 if rename_file_storage:
6177 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6178 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6179 old_file_storage_dir,
6180 new_file_storage_dir)
6181 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6182 " (but the instance has been renamed in Ganeti)" %
6183 (inst.primary_node, old_file_storage_dir,
6184 new_file_storage_dir))
6186 _StartInstanceDisks(self, inst, None)
6188 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6189 old_name, self.op.debug_level)
6190 msg = result.fail_msg
6192 msg = ("Could not run OS rename script for instance %s on node %s"
6193 " (but the instance has been renamed in Ganeti): %s" %
6194 (inst.name, inst.primary_node, msg))
6195 self.proc.LogWarning(msg)
6197 _ShutdownInstanceDisks(self, inst)
6202 class LUInstanceRemove(LogicalUnit):
6203 """Remove an instance.
6206 HPATH = "instance-remove"
6207 HTYPE = constants.HTYPE_INSTANCE
6210 def ExpandNames(self):
6211 self._ExpandAndLockInstance()
6212 self.needed_locks[locking.LEVEL_NODE] = []
6213 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6215 def DeclareLocks(self, level):
6216 if level == locking.LEVEL_NODE:
6217 self._LockInstancesNodes()
6219 def BuildHooksEnv(self):
6222 This runs on master, primary and secondary nodes of the instance.
6225 env = _BuildInstanceHookEnvByObject(self, self.instance)
6226 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6229 def BuildHooksNodes(self):
6230 """Build hooks nodes.
6233 nl = [self.cfg.GetMasterNode()]
6234 nl_post = list(self.instance.all_nodes) + nl
6235 return (nl, nl_post)
6237 def CheckPrereq(self):
6238 """Check prerequisites.
6240 This checks that the instance is in the cluster.
6243 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6244 assert self.instance is not None, \
6245 "Cannot retrieve locked instance %s" % self.op.instance_name
6247 def Exec(self, feedback_fn):
6248 """Remove the instance.
6251 instance = self.instance
6252 logging.info("Shutting down instance %s on node %s",
6253 instance.name, instance.primary_node)
6255 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6256 self.op.shutdown_timeout)
6257 msg = result.fail_msg
6259 if self.op.ignore_failures:
6260 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6262 raise errors.OpExecError("Could not shutdown instance %s on"
6264 (instance.name, instance.primary_node, msg))
6266 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6269 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6270 """Utility function to remove an instance.
6273 logging.info("Removing block devices for instance %s", instance.name)
6275 if not _RemoveDisks(lu, instance):
6276 if not ignore_failures:
6277 raise errors.OpExecError("Can't remove instance's disks")
6278 feedback_fn("Warning: can't remove instance's disks")
6280 logging.info("Removing instance %s out of cluster config", instance.name)
6282 lu.cfg.RemoveInstance(instance.name)
6284 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6285 "Instance lock removal conflict"
6287 # Remove lock for the instance
6288 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6291 class LUInstanceQuery(NoHooksLU):
6292 """Logical unit for querying instances.
6295 # pylint: disable-msg=W0142
6298 def CheckArguments(self):
6299 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6300 self.op.output_fields, self.op.use_locking)
6302 def ExpandNames(self):
6303 self.iq.ExpandNames(self)
6305 def DeclareLocks(self, level):
6306 self.iq.DeclareLocks(self, level)
6308 def Exec(self, feedback_fn):
6309 return self.iq.OldStyleQuery(self)
6312 class LUInstanceFailover(LogicalUnit):
6313 """Failover an instance.
6316 HPATH = "instance-failover"
6317 HTYPE = constants.HTYPE_INSTANCE
6320 def CheckArguments(self):
6321 """Check the arguments.
6324 self.iallocator = getattr(self.op, "iallocator", None)
6325 self.target_node = getattr(self.op, "target_node", None)
6327 def ExpandNames(self):
6328 self._ExpandAndLockInstance()
6330 if self.op.target_node is not None:
6331 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6333 self.needed_locks[locking.LEVEL_NODE] = []
6334 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6336 ignore_consistency = self.op.ignore_consistency
6337 shutdown_timeout = self.op.shutdown_timeout
6338 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6341 ignore_consistency=ignore_consistency,
6342 shutdown_timeout=shutdown_timeout)
6343 self.tasklets = [self._migrater]
6345 def DeclareLocks(self, level):
6346 if level == locking.LEVEL_NODE:
6347 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6348 if instance.disk_template in constants.DTS_EXT_MIRROR:
6349 if self.op.target_node is None:
6350 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6352 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6353 self.op.target_node]
6354 del self.recalculate_locks[locking.LEVEL_NODE]
6356 self._LockInstancesNodes()
6358 def BuildHooksEnv(self):
6361 This runs on master, primary and secondary nodes of the instance.
6364 instance = self._migrater.instance
6365 source_node = instance.primary_node
6366 target_node = self.op.target_node
6368 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6369 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6370 "OLD_PRIMARY": source_node,
6371 "NEW_PRIMARY": target_node,
6374 if instance.disk_template in constants.DTS_INT_MIRROR:
6375 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6376 env["NEW_SECONDARY"] = source_node
6378 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6380 env.update(_BuildInstanceHookEnvByObject(self, instance))
6384 def BuildHooksNodes(self):
6385 """Build hooks nodes.
6388 instance = self._migrater.instance
6389 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6390 return (nl, nl + [instance.primary_node])
6393 class LUInstanceMigrate(LogicalUnit):
6394 """Migrate an instance.
6396 This is migration without shutting down, compared to the failover,
6397 which is done with shutdown.
6400 HPATH = "instance-migrate"
6401 HTYPE = constants.HTYPE_INSTANCE
6404 def ExpandNames(self):
6405 self._ExpandAndLockInstance()
6407 if self.op.target_node is not None:
6408 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6410 self.needed_locks[locking.LEVEL_NODE] = []
6411 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6413 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6414 cleanup=self.op.cleanup,
6416 fallback=self.op.allow_failover)
6417 self.tasklets = [self._migrater]
6419 def DeclareLocks(self, level):
6420 if level == locking.LEVEL_NODE:
6421 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6422 if instance.disk_template in constants.DTS_EXT_MIRROR:
6423 if self.op.target_node is None:
6424 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6426 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6427 self.op.target_node]
6428 del self.recalculate_locks[locking.LEVEL_NODE]
6430 self._LockInstancesNodes()
6432 def BuildHooksEnv(self):
6435 This runs on master, primary and secondary nodes of the instance.
6438 instance = self._migrater.instance
6439 source_node = instance.primary_node
6440 target_node = self.op.target_node
6441 env = _BuildInstanceHookEnvByObject(self, instance)
6443 "MIGRATE_LIVE": self._migrater.live,
6444 "MIGRATE_CLEANUP": self.op.cleanup,
6445 "OLD_PRIMARY": source_node,
6446 "NEW_PRIMARY": target_node,
6449 if instance.disk_template in constants.DTS_INT_MIRROR:
6450 env["OLD_SECONDARY"] = target_node
6451 env["NEW_SECONDARY"] = source_node
6453 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6457 def BuildHooksNodes(self):
6458 """Build hooks nodes.
6461 instance = self._migrater.instance
6462 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6463 return (nl, nl + [instance.primary_node])
6466 class LUInstanceMove(LogicalUnit):
6467 """Move an instance by data-copying.
6470 HPATH = "instance-move"
6471 HTYPE = constants.HTYPE_INSTANCE
6474 def ExpandNames(self):
6475 self._ExpandAndLockInstance()
6476 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6477 self.op.target_node = target_node
6478 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6479 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6481 def DeclareLocks(self, level):
6482 if level == locking.LEVEL_NODE:
6483 self._LockInstancesNodes(primary_only=True)
6485 def BuildHooksEnv(self):
6488 This runs on master, primary and secondary nodes of the instance.
6492 "TARGET_NODE": self.op.target_node,
6493 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6495 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6498 def BuildHooksNodes(self):
6499 """Build hooks nodes.
6503 self.cfg.GetMasterNode(),
6504 self.instance.primary_node,
6505 self.op.target_node,
6509 def CheckPrereq(self):
6510 """Check prerequisites.
6512 This checks that the instance is in the cluster.
6515 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6516 assert self.instance is not None, \
6517 "Cannot retrieve locked instance %s" % self.op.instance_name
6519 node = self.cfg.GetNodeInfo(self.op.target_node)
6520 assert node is not None, \
6521 "Cannot retrieve locked node %s" % self.op.target_node
6523 self.target_node = target_node = node.name
6525 if target_node == instance.primary_node:
6526 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6527 (instance.name, target_node),
6530 bep = self.cfg.GetClusterInfo().FillBE(instance)
6532 for idx, dsk in enumerate(instance.disks):
6533 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6534 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6535 " cannot copy" % idx, errors.ECODE_STATE)
6537 _CheckNodeOnline(self, target_node)
6538 _CheckNodeNotDrained(self, target_node)
6539 _CheckNodeVmCapable(self, target_node)
6541 if instance.admin_up:
6542 # check memory requirements on the secondary node
6543 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6544 instance.name, bep[constants.BE_MEMORY],
6545 instance.hypervisor)
6547 self.LogInfo("Not checking memory on the secondary node as"
6548 " instance will not be started")
6550 # check bridge existance
6551 _CheckInstanceBridgesExist(self, instance, node=target_node)
6553 def Exec(self, feedback_fn):
6554 """Move an instance.
6556 The move is done by shutting it down on its present node, copying
6557 the data over (slow) and starting it on the new node.
6560 instance = self.instance
6562 source_node = instance.primary_node
6563 target_node = self.target_node
6565 self.LogInfo("Shutting down instance %s on source node %s",
6566 instance.name, source_node)
6568 result = self.rpc.call_instance_shutdown(source_node, instance,
6569 self.op.shutdown_timeout)
6570 msg = result.fail_msg
6572 if self.op.ignore_consistency:
6573 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6574 " Proceeding anyway. Please make sure node"
6575 " %s is down. Error details: %s",
6576 instance.name, source_node, source_node, msg)
6578 raise errors.OpExecError("Could not shutdown instance %s on"
6580 (instance.name, source_node, msg))
6582 # create the target disks
6584 _CreateDisks(self, instance, target_node=target_node)
6585 except errors.OpExecError:
6586 self.LogWarning("Device creation failed, reverting...")
6588 _RemoveDisks(self, instance, target_node=target_node)
6590 self.cfg.ReleaseDRBDMinors(instance.name)
6593 cluster_name = self.cfg.GetClusterInfo().cluster_name
6596 # activate, get path, copy the data over
6597 for idx, disk in enumerate(instance.disks):
6598 self.LogInfo("Copying data for disk %d", idx)
6599 result = self.rpc.call_blockdev_assemble(target_node, disk,
6600 instance.name, True, idx)
6602 self.LogWarning("Can't assemble newly created disk %d: %s",
6603 idx, result.fail_msg)
6604 errs.append(result.fail_msg)
6606 dev_path = result.payload
6607 result = self.rpc.call_blockdev_export(source_node, disk,
6608 target_node, dev_path,
6611 self.LogWarning("Can't copy data over for disk %d: %s",
6612 idx, result.fail_msg)
6613 errs.append(result.fail_msg)
6617 self.LogWarning("Some disks failed to copy, aborting")
6619 _RemoveDisks(self, instance, target_node=target_node)
6621 self.cfg.ReleaseDRBDMinors(instance.name)
6622 raise errors.OpExecError("Errors during disk copy: %s" %
6625 instance.primary_node = target_node
6626 self.cfg.Update(instance, feedback_fn)
6628 self.LogInfo("Removing the disks on the original node")
6629 _RemoveDisks(self, instance, target_node=source_node)
6631 # Only start the instance if it's marked as up
6632 if instance.admin_up:
6633 self.LogInfo("Starting instance %s on node %s",
6634 instance.name, target_node)
6636 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6637 ignore_secondaries=True)
6639 _ShutdownInstanceDisks(self, instance)
6640 raise errors.OpExecError("Can't activate the instance's disks")
6642 result = self.rpc.call_instance_start(target_node, instance, None, None)
6643 msg = result.fail_msg
6645 _ShutdownInstanceDisks(self, instance)
6646 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6647 (instance.name, target_node, msg))
6650 class LUNodeMigrate(LogicalUnit):
6651 """Migrate all instances from a node.
6654 HPATH = "node-migrate"
6655 HTYPE = constants.HTYPE_NODE
6658 def CheckArguments(self):
6661 def ExpandNames(self):
6662 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6664 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6665 self.needed_locks = {
6666 locking.LEVEL_NODE: [self.op.node_name],
6669 def BuildHooksEnv(self):
6672 This runs on the master, the primary and all the secondaries.
6676 "NODE_NAME": self.op.node_name,
6679 def BuildHooksNodes(self):
6680 """Build hooks nodes.
6683 nl = [self.cfg.GetMasterNode()]
6686 def CheckPrereq(self):
6689 def Exec(self, feedback_fn):
6690 # Prepare jobs for migration instances
6692 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6695 iallocator=self.op.iallocator,
6696 target_node=self.op.target_node)]
6697 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6700 # TODO: Run iallocator in this opcode and pass correct placement options to
6701 # OpInstanceMigrate. Since other jobs can modify the cluster between
6702 # running the iallocator and the actual migration, a good consistency model
6703 # will have to be found.
6705 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6706 frozenset([self.op.node_name]))
6708 return ResultWithJobs(jobs)
6711 class TLMigrateInstance(Tasklet):
6712 """Tasklet class for instance migration.
6715 @ivar live: whether the migration will be done live or non-live;
6716 this variable is initalized only after CheckPrereq has run
6717 @type cleanup: boolean
6718 @ivar cleanup: Wheater we cleanup from a failed migration
6719 @type iallocator: string
6720 @ivar iallocator: The iallocator used to determine target_node
6721 @type target_node: string
6722 @ivar target_node: If given, the target_node to reallocate the instance to
6723 @type failover: boolean
6724 @ivar failover: Whether operation results in failover or migration
6725 @type fallback: boolean
6726 @ivar fallback: Whether fallback to failover is allowed if migration not
6728 @type ignore_consistency: boolean
6729 @ivar ignore_consistency: Wheter we should ignore consistency between source
6731 @type shutdown_timeout: int
6732 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6735 def __init__(self, lu, instance_name, cleanup=False,
6736 failover=False, fallback=False,
6737 ignore_consistency=False,
6738 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6739 """Initializes this class.
6742 Tasklet.__init__(self, lu)
6745 self.instance_name = instance_name
6746 self.cleanup = cleanup
6747 self.live = False # will be overridden later
6748 self.failover = failover
6749 self.fallback = fallback
6750 self.ignore_consistency = ignore_consistency
6751 self.shutdown_timeout = shutdown_timeout
6753 def CheckPrereq(self):
6754 """Check prerequisites.
6756 This checks that the instance is in the cluster.
6759 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6760 instance = self.cfg.GetInstanceInfo(instance_name)
6761 assert instance is not None
6762 self.instance = instance
6764 if (not self.cleanup and not instance.admin_up and not self.failover and
6766 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6768 self.failover = True
6770 if instance.disk_template not in constants.DTS_MIRRORED:
6775 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6776 " %s" % (instance.disk_template, text),
6779 if instance.disk_template in constants.DTS_EXT_MIRROR:
6780 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6782 if self.lu.op.iallocator:
6783 self._RunAllocator()
6785 # We set set self.target_node as it is required by
6787 self.target_node = self.lu.op.target_node
6789 # self.target_node is already populated, either directly or by the
6791 target_node = self.target_node
6792 if self.target_node == instance.primary_node:
6793 raise errors.OpPrereqError("Cannot migrate instance %s"
6794 " to its primary (%s)" %
6795 (instance.name, instance.primary_node))
6797 if len(self.lu.tasklets) == 1:
6798 # It is safe to release locks only when we're the only tasklet
6800 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6801 keep=[instance.primary_node, self.target_node])
6804 secondary_nodes = instance.secondary_nodes
6805 if not secondary_nodes:
6806 raise errors.ConfigurationError("No secondary node but using"
6807 " %s disk template" %
6808 instance.disk_template)
6809 target_node = secondary_nodes[0]
6810 if self.lu.op.iallocator or (self.lu.op.target_node and
6811 self.lu.op.target_node != target_node):
6813 text = "failed over"
6816 raise errors.OpPrereqError("Instances with disk template %s cannot"
6817 " be %s to arbitrary nodes"
6818 " (neither an iallocator nor a target"
6819 " node can be passed)" %
6820 (instance.disk_template, text),
6823 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6825 # check memory requirements on the secondary node
6826 if not self.failover or instance.admin_up:
6827 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6828 instance.name, i_be[constants.BE_MEMORY],
6829 instance.hypervisor)
6831 self.lu.LogInfo("Not checking memory on the secondary node as"
6832 " instance will not be started")
6834 # check bridge existance
6835 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6837 if not self.cleanup:
6838 _CheckNodeNotDrained(self.lu, target_node)
6839 if not self.failover:
6840 result = self.rpc.call_instance_migratable(instance.primary_node,
6842 if result.fail_msg and self.fallback:
6843 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6845 self.failover = True
6847 result.Raise("Can't migrate, please use failover",
6848 prereq=True, ecode=errors.ECODE_STATE)
6850 assert not (self.failover and self.cleanup)
6852 if not self.failover:
6853 if self.lu.op.live is not None and self.lu.op.mode is not None:
6854 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6855 " parameters are accepted",
6857 if self.lu.op.live is not None:
6859 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6861 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6862 # reset the 'live' parameter to None so that repeated
6863 # invocations of CheckPrereq do not raise an exception
6864 self.lu.op.live = None
6865 elif self.lu.op.mode is None:
6866 # read the default value from the hypervisor
6867 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6869 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6871 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6873 # Failover is never live
6876 def _RunAllocator(self):
6877 """Run the allocator based on input opcode.
6880 ial = IAllocator(self.cfg, self.rpc,
6881 mode=constants.IALLOCATOR_MODE_RELOC,
6882 name=self.instance_name,
6883 # TODO See why hail breaks with a single node below
6884 relocate_from=[self.instance.primary_node,
6885 self.instance.primary_node],
6888 ial.Run(self.lu.op.iallocator)
6891 raise errors.OpPrereqError("Can't compute nodes using"
6892 " iallocator '%s': %s" %
6893 (self.lu.op.iallocator, ial.info),
6895 if len(ial.result) != ial.required_nodes:
6896 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6897 " of nodes (%s), required %s" %
6898 (self.lu.op.iallocator, len(ial.result),
6899 ial.required_nodes), errors.ECODE_FAULT)
6900 self.target_node = ial.result[0]
6901 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6902 self.instance_name, self.lu.op.iallocator,
6903 utils.CommaJoin(ial.result))
6905 def _WaitUntilSync(self):
6906 """Poll with custom rpc for disk sync.
6908 This uses our own step-based rpc call.
6911 self.feedback_fn("* wait until resync is done")
6915 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6917 self.instance.disks)
6919 for node, nres in result.items():
6920 nres.Raise("Cannot resync disks on node %s" % node)
6921 node_done, node_percent = nres.payload
6922 all_done = all_done and node_done
6923 if node_percent is not None:
6924 min_percent = min(min_percent, node_percent)
6926 if min_percent < 100:
6927 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6930 def _EnsureSecondary(self, node):
6931 """Demote a node to secondary.
6934 self.feedback_fn("* switching node %s to secondary mode" % node)
6936 for dev in self.instance.disks:
6937 self.cfg.SetDiskID(dev, node)
6939 result = self.rpc.call_blockdev_close(node, self.instance.name,
6940 self.instance.disks)
6941 result.Raise("Cannot change disk to secondary on node %s" % node)
6943 def _GoStandalone(self):
6944 """Disconnect from the network.
6947 self.feedback_fn("* changing into standalone mode")
6948 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6949 self.instance.disks)
6950 for node, nres in result.items():
6951 nres.Raise("Cannot disconnect disks node %s" % node)
6953 def _GoReconnect(self, multimaster):
6954 """Reconnect to the network.
6960 msg = "single-master"
6961 self.feedback_fn("* changing disks into %s mode" % msg)
6962 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6963 self.instance.disks,
6964 self.instance.name, multimaster)
6965 for node, nres in result.items():
6966 nres.Raise("Cannot change disks config on node %s" % node)
6968 def _ExecCleanup(self):
6969 """Try to cleanup after a failed migration.
6971 The cleanup is done by:
6972 - check that the instance is running only on one node
6973 (and update the config if needed)
6974 - change disks on its secondary node to secondary
6975 - wait until disks are fully synchronized
6976 - disconnect from the network
6977 - change disks into single-master mode
6978 - wait again until disks are fully synchronized
6981 instance = self.instance
6982 target_node = self.target_node
6983 source_node = self.source_node
6985 # check running on only one node
6986 self.feedback_fn("* checking where the instance actually runs"
6987 " (if this hangs, the hypervisor might be in"
6989 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6990 for node, result in ins_l.items():
6991 result.Raise("Can't contact node %s" % node)
6993 runningon_source = instance.name in ins_l[source_node].payload
6994 runningon_target = instance.name in ins_l[target_node].payload
6996 if runningon_source and runningon_target:
6997 raise errors.OpExecError("Instance seems to be running on two nodes,"
6998 " or the hypervisor is confused; you will have"
6999 " to ensure manually that it runs only on one"
7000 " and restart this operation")
7002 if not (runningon_source or runningon_target):
7003 raise errors.OpExecError("Instance does not seem to be running at all;"
7004 " in this case it's safer to repair by"
7005 " running 'gnt-instance stop' to ensure disk"
7006 " shutdown, and then restarting it")
7008 if runningon_target:
7009 # the migration has actually succeeded, we need to update the config
7010 self.feedback_fn("* instance running on secondary node (%s),"
7011 " updating config" % target_node)
7012 instance.primary_node = target_node
7013 self.cfg.Update(instance, self.feedback_fn)
7014 demoted_node = source_node
7016 self.feedback_fn("* instance confirmed to be running on its"
7017 " primary node (%s)" % source_node)
7018 demoted_node = target_node
7020 if instance.disk_template in constants.DTS_INT_MIRROR:
7021 self._EnsureSecondary(demoted_node)
7023 self._WaitUntilSync()
7024 except errors.OpExecError:
7025 # we ignore here errors, since if the device is standalone, it
7026 # won't be able to sync
7028 self._GoStandalone()
7029 self._GoReconnect(False)
7030 self._WaitUntilSync()
7032 self.feedback_fn("* done")
7034 def _RevertDiskStatus(self):
7035 """Try to revert the disk status after a failed migration.
7038 target_node = self.target_node
7039 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7043 self._EnsureSecondary(target_node)
7044 self._GoStandalone()
7045 self._GoReconnect(False)
7046 self._WaitUntilSync()
7047 except errors.OpExecError, err:
7048 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7049 " please try to recover the instance manually;"
7050 " error '%s'" % str(err))
7052 def _AbortMigration(self):
7053 """Call the hypervisor code to abort a started migration.
7056 instance = self.instance
7057 target_node = self.target_node
7058 migration_info = self.migration_info
7060 abort_result = self.rpc.call_finalize_migration(target_node,
7064 abort_msg = abort_result.fail_msg
7066 logging.error("Aborting migration failed on target node %s: %s",
7067 target_node, abort_msg)
7068 # Don't raise an exception here, as we stil have to try to revert the
7069 # disk status, even if this step failed.
7071 def _ExecMigration(self):
7072 """Migrate an instance.
7074 The migrate is done by:
7075 - change the disks into dual-master mode
7076 - wait until disks are fully synchronized again
7077 - migrate the instance
7078 - change disks on the new secondary node (the old primary) to secondary
7079 - wait until disks are fully synchronized
7080 - change disks into single-master mode
7083 instance = self.instance
7084 target_node = self.target_node
7085 source_node = self.source_node
7087 self.feedback_fn("* checking disk consistency between source and target")
7088 for dev in instance.disks:
7089 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7090 raise errors.OpExecError("Disk %s is degraded or not fully"
7091 " synchronized on target node,"
7092 " aborting migration" % dev.iv_name)
7094 # First get the migration information from the remote node
7095 result = self.rpc.call_migration_info(source_node, instance)
7096 msg = result.fail_msg
7098 log_err = ("Failed fetching source migration information from %s: %s" %
7100 logging.error(log_err)
7101 raise errors.OpExecError(log_err)
7103 self.migration_info = migration_info = result.payload
7105 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7106 # Then switch the disks to master/master mode
7107 self._EnsureSecondary(target_node)
7108 self._GoStandalone()
7109 self._GoReconnect(True)
7110 self._WaitUntilSync()
7112 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7113 result = self.rpc.call_accept_instance(target_node,
7116 self.nodes_ip[target_node])
7118 msg = result.fail_msg
7120 logging.error("Instance pre-migration failed, trying to revert"
7121 " disk status: %s", msg)
7122 self.feedback_fn("Pre-migration failed, aborting")
7123 self._AbortMigration()
7124 self._RevertDiskStatus()
7125 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7126 (instance.name, msg))
7128 self.feedback_fn("* migrating instance to %s" % target_node)
7129 result = self.rpc.call_instance_migrate(source_node, instance,
7130 self.nodes_ip[target_node],
7132 msg = result.fail_msg
7134 logging.error("Instance migration failed, trying to revert"
7135 " disk status: %s", msg)
7136 self.feedback_fn("Migration failed, aborting")
7137 self._AbortMigration()
7138 self._RevertDiskStatus()
7139 raise errors.OpExecError("Could not migrate instance %s: %s" %
7140 (instance.name, msg))
7142 instance.primary_node = target_node
7143 # distribute new instance config to the other nodes
7144 self.cfg.Update(instance, self.feedback_fn)
7146 result = self.rpc.call_finalize_migration(target_node,
7150 msg = result.fail_msg
7152 logging.error("Instance migration succeeded, but finalization failed:"
7154 raise errors.OpExecError("Could not finalize instance migration: %s" %
7157 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7158 self._EnsureSecondary(source_node)
7159 self._WaitUntilSync()
7160 self._GoStandalone()
7161 self._GoReconnect(False)
7162 self._WaitUntilSync()
7164 self.feedback_fn("* done")
7166 def _ExecFailover(self):
7167 """Failover an instance.
7169 The failover is done by shutting it down on its present node and
7170 starting it on the secondary.
7173 instance = self.instance
7174 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7176 source_node = instance.primary_node
7177 target_node = self.target_node
7179 if instance.admin_up:
7180 self.feedback_fn("* checking disk consistency between source and target")
7181 for dev in instance.disks:
7182 # for drbd, these are drbd over lvm
7183 if not _CheckDiskConsistency(self, dev, target_node, False):
7184 if not self.ignore_consistency:
7185 raise errors.OpExecError("Disk %s is degraded on target node,"
7186 " aborting failover" % dev.iv_name)
7188 self.feedback_fn("* not checking disk consistency as instance is not"
7191 self.feedback_fn("* shutting down instance on source node")
7192 logging.info("Shutting down instance %s on node %s",
7193 instance.name, source_node)
7195 result = self.rpc.call_instance_shutdown(source_node, instance,
7196 self.shutdown_timeout)
7197 msg = result.fail_msg
7199 if self.ignore_consistency or primary_node.offline:
7200 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7201 " proceeding anyway; please make sure node"
7202 " %s is down; error details: %s",
7203 instance.name, source_node, source_node, msg)
7205 raise errors.OpExecError("Could not shutdown instance %s on"
7207 (instance.name, source_node, msg))
7209 self.feedback_fn("* deactivating the instance's disks on source node")
7210 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7211 raise errors.OpExecError("Can't shut down the instance's disks.")
7213 instance.primary_node = target_node
7214 # distribute new instance config to the other nodes
7215 self.cfg.Update(instance, self.feedback_fn)
7217 # Only start the instance if it's marked as up
7218 if instance.admin_up:
7219 self.feedback_fn("* activating the instance's disks on target node")
7220 logging.info("Starting instance %s on node %s",
7221 instance.name, target_node)
7223 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7224 ignore_secondaries=True)
7226 _ShutdownInstanceDisks(self, instance)
7227 raise errors.OpExecError("Can't activate the instance's disks")
7229 self.feedback_fn("* starting the instance on the target node")
7230 result = self.rpc.call_instance_start(target_node, instance, None, None)
7231 msg = result.fail_msg
7233 _ShutdownInstanceDisks(self, instance)
7234 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7235 (instance.name, target_node, msg))
7237 def Exec(self, feedback_fn):
7238 """Perform the migration.
7241 self.feedback_fn = feedback_fn
7242 self.source_node = self.instance.primary_node
7244 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7245 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7246 self.target_node = self.instance.secondary_nodes[0]
7247 # Otherwise self.target_node has been populated either
7248 # directly, or through an iallocator.
7250 self.all_nodes = [self.source_node, self.target_node]
7252 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7253 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7257 feedback_fn("Failover instance %s" % self.instance.name)
7258 self._ExecFailover()
7260 feedback_fn("Migrating instance %s" % self.instance.name)
7263 return self._ExecCleanup()
7265 return self._ExecMigration()
7268 def _CreateBlockDev(lu, node, instance, device, force_create,
7270 """Create a tree of block devices on a given node.
7272 If this device type has to be created on secondaries, create it and
7275 If not, just recurse to children keeping the same 'force' value.
7277 @param lu: the lu on whose behalf we execute
7278 @param node: the node on which to create the device
7279 @type instance: L{objects.Instance}
7280 @param instance: the instance which owns the device
7281 @type device: L{objects.Disk}
7282 @param device: the device to create
7283 @type force_create: boolean
7284 @param force_create: whether to force creation of this device; this
7285 will be change to True whenever we find a device which has
7286 CreateOnSecondary() attribute
7287 @param info: the extra 'metadata' we should attach to the device
7288 (this will be represented as a LVM tag)
7289 @type force_open: boolean
7290 @param force_open: this parameter will be passes to the
7291 L{backend.BlockdevCreate} function where it specifies
7292 whether we run on primary or not, and it affects both
7293 the child assembly and the device own Open() execution
7296 if device.CreateOnSecondary():
7300 for child in device.children:
7301 _CreateBlockDev(lu, node, instance, child, force_create,
7304 if not force_create:
7307 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7310 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7311 """Create a single block device on a given node.
7313 This will not recurse over children of the device, so they must be
7316 @param lu: the lu on whose behalf we execute
7317 @param node: the node on which to create the device
7318 @type instance: L{objects.Instance}
7319 @param instance: the instance which owns the device
7320 @type device: L{objects.Disk}
7321 @param device: the device to create
7322 @param info: the extra 'metadata' we should attach to the device
7323 (this will be represented as a LVM tag)
7324 @type force_open: boolean
7325 @param force_open: this parameter will be passes to the
7326 L{backend.BlockdevCreate} function where it specifies
7327 whether we run on primary or not, and it affects both
7328 the child assembly and the device own Open() execution
7331 lu.cfg.SetDiskID(device, node)
7332 result = lu.rpc.call_blockdev_create(node, device, device.size,
7333 instance.name, force_open, info)
7334 result.Raise("Can't create block device %s on"
7335 " node %s for instance %s" % (device, node, instance.name))
7336 if device.physical_id is None:
7337 device.physical_id = result.payload
7340 def _GenerateUniqueNames(lu, exts):
7341 """Generate a suitable LV name.
7343 This will generate a logical volume name for the given instance.
7348 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7349 results.append("%s%s" % (new_id, val))
7353 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7354 iv_name, p_minor, s_minor):
7355 """Generate a drbd8 device complete with its children.
7358 assert len(vgnames) == len(names) == 2
7359 port = lu.cfg.AllocatePort()
7360 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7361 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7362 logical_id=(vgnames[0], names[0]))
7363 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7364 logical_id=(vgnames[1], names[1]))
7365 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7366 logical_id=(primary, secondary, port,
7369 children=[dev_data, dev_meta],
7374 def _GenerateDiskTemplate(lu, template_name,
7375 instance_name, primary_node,
7376 secondary_nodes, disk_info,
7377 file_storage_dir, file_driver,
7378 base_index, feedback_fn):
7379 """Generate the entire disk layout for a given template type.
7382 #TODO: compute space requirements
7384 vgname = lu.cfg.GetVGName()
7385 disk_count = len(disk_info)
7387 if template_name == constants.DT_DISKLESS:
7389 elif template_name == constants.DT_PLAIN:
7390 if len(secondary_nodes) != 0:
7391 raise errors.ProgrammerError("Wrong template configuration")
7393 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7394 for i in range(disk_count)])
7395 for idx, disk in enumerate(disk_info):
7396 disk_index = idx + base_index
7397 vg = disk.get(constants.IDISK_VG, vgname)
7398 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7399 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7400 size=disk[constants.IDISK_SIZE],
7401 logical_id=(vg, names[idx]),
7402 iv_name="disk/%d" % disk_index,
7403 mode=disk[constants.IDISK_MODE])
7404 disks.append(disk_dev)
7405 elif template_name == constants.DT_DRBD8:
7406 if len(secondary_nodes) != 1:
7407 raise errors.ProgrammerError("Wrong template configuration")
7408 remote_node = secondary_nodes[0]
7409 minors = lu.cfg.AllocateDRBDMinor(
7410 [primary_node, remote_node] * len(disk_info), instance_name)
7413 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7414 for i in range(disk_count)]):
7415 names.append(lv_prefix + "_data")
7416 names.append(lv_prefix + "_meta")
7417 for idx, disk in enumerate(disk_info):
7418 disk_index = idx + base_index
7419 data_vg = disk.get(constants.IDISK_VG, vgname)
7420 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7421 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7422 disk[constants.IDISK_SIZE],
7424 names[idx * 2:idx * 2 + 2],
7425 "disk/%d" % disk_index,
7426 minors[idx * 2], minors[idx * 2 + 1])
7427 disk_dev.mode = disk[constants.IDISK_MODE]
7428 disks.append(disk_dev)
7429 elif template_name == constants.DT_FILE:
7430 if len(secondary_nodes) != 0:
7431 raise errors.ProgrammerError("Wrong template configuration")
7433 opcodes.RequireFileStorage()
7435 for idx, disk in enumerate(disk_info):
7436 disk_index = idx + base_index
7437 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7438 size=disk[constants.IDISK_SIZE],
7439 iv_name="disk/%d" % disk_index,
7440 logical_id=(file_driver,
7441 "%s/disk%d" % (file_storage_dir,
7443 mode=disk[constants.IDISK_MODE])
7444 disks.append(disk_dev)
7445 elif template_name == constants.DT_SHARED_FILE:
7446 if len(secondary_nodes) != 0:
7447 raise errors.ProgrammerError("Wrong template configuration")
7449 opcodes.RequireSharedFileStorage()
7451 for idx, disk in enumerate(disk_info):
7452 disk_index = idx + base_index
7453 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7454 size=disk[constants.IDISK_SIZE],
7455 iv_name="disk/%d" % disk_index,
7456 logical_id=(file_driver,
7457 "%s/disk%d" % (file_storage_dir,
7459 mode=disk[constants.IDISK_MODE])
7460 disks.append(disk_dev)
7461 elif template_name == constants.DT_BLOCK:
7462 if len(secondary_nodes) != 0:
7463 raise errors.ProgrammerError("Wrong template configuration")
7465 for idx, disk in enumerate(disk_info):
7466 disk_index = idx + base_index
7467 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7468 size=disk[constants.IDISK_SIZE],
7469 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7470 disk[constants.IDISK_ADOPT]),
7471 iv_name="disk/%d" % disk_index,
7472 mode=disk[constants.IDISK_MODE])
7473 disks.append(disk_dev)
7476 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7480 def _GetInstanceInfoText(instance):
7481 """Compute that text that should be added to the disk's metadata.
7484 return "originstname+%s" % instance.name
7487 def _CalcEta(time_taken, written, total_size):
7488 """Calculates the ETA based on size written and total size.
7490 @param time_taken: The time taken so far
7491 @param written: amount written so far
7492 @param total_size: The total size of data to be written
7493 @return: The remaining time in seconds
7496 avg_time = time_taken / float(written)
7497 return (total_size - written) * avg_time
7500 def _WipeDisks(lu, instance):
7501 """Wipes instance disks.
7503 @type lu: L{LogicalUnit}
7504 @param lu: the logical unit on whose behalf we execute
7505 @type instance: L{objects.Instance}
7506 @param instance: the instance whose disks we should create
7507 @return: the success of the wipe
7510 node = instance.primary_node
7512 for device in instance.disks:
7513 lu.cfg.SetDiskID(device, node)
7515 logging.info("Pause sync of instance %s disks", instance.name)
7516 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7518 for idx, success in enumerate(result.payload):
7520 logging.warn("pause-sync of instance %s for disks %d failed",
7524 for idx, device in enumerate(instance.disks):
7525 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7526 # MAX_WIPE_CHUNK at max
7527 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7528 constants.MIN_WIPE_CHUNK_PERCENT)
7529 # we _must_ make this an int, otherwise rounding errors will
7531 wipe_chunk_size = int(wipe_chunk_size)
7533 lu.LogInfo("* Wiping disk %d", idx)
7534 logging.info("Wiping disk %d for instance %s, node %s using"
7535 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7540 start_time = time.time()
7542 while offset < size:
7543 wipe_size = min(wipe_chunk_size, size - offset)
7544 logging.debug("Wiping disk %d, offset %s, chunk %s",
7545 idx, offset, wipe_size)
7546 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7547 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7548 (idx, offset, wipe_size))
7551 if now - last_output >= 60:
7552 eta = _CalcEta(now - start_time, offset, size)
7553 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7554 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7557 logging.info("Resume sync of instance %s disks", instance.name)
7559 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7561 for idx, success in enumerate(result.payload):
7563 lu.LogWarning("Resume sync of disk %d failed, please have a"
7564 " look at the status and troubleshoot the issue", idx)
7565 logging.warn("resume-sync of instance %s for disks %d failed",
7569 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7570 """Create all disks for an instance.
7572 This abstracts away some work from AddInstance.
7574 @type lu: L{LogicalUnit}
7575 @param lu: the logical unit on whose behalf we execute
7576 @type instance: L{objects.Instance}
7577 @param instance: the instance whose disks we should create
7579 @param to_skip: list of indices to skip
7580 @type target_node: string
7581 @param target_node: if passed, overrides the target node for creation
7583 @return: the success of the creation
7586 info = _GetInstanceInfoText(instance)
7587 if target_node is None:
7588 pnode = instance.primary_node
7589 all_nodes = instance.all_nodes
7594 if instance.disk_template in constants.DTS_FILEBASED:
7595 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7596 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7598 result.Raise("Failed to create directory '%s' on"
7599 " node %s" % (file_storage_dir, pnode))
7601 # Note: this needs to be kept in sync with adding of disks in
7602 # LUInstanceSetParams
7603 for idx, device in enumerate(instance.disks):
7604 if to_skip and idx in to_skip:
7606 logging.info("Creating volume %s for instance %s",
7607 device.iv_name, instance.name)
7609 for node in all_nodes:
7610 f_create = node == pnode
7611 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7614 def _RemoveDisks(lu, instance, target_node=None):
7615 """Remove all disks for an instance.
7617 This abstracts away some work from `AddInstance()` and
7618 `RemoveInstance()`. Note that in case some of the devices couldn't
7619 be removed, the removal will continue with the other ones (compare
7620 with `_CreateDisks()`).
7622 @type lu: L{LogicalUnit}
7623 @param lu: the logical unit on whose behalf we execute
7624 @type instance: L{objects.Instance}
7625 @param instance: the instance whose disks we should remove
7626 @type target_node: string
7627 @param target_node: used to override the node on which to remove the disks
7629 @return: the success of the removal
7632 logging.info("Removing block devices for instance %s", instance.name)
7635 for device in instance.disks:
7637 edata = [(target_node, device)]
7639 edata = device.ComputeNodeTree(instance.primary_node)
7640 for node, disk in edata:
7641 lu.cfg.SetDiskID(disk, node)
7642 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7644 lu.LogWarning("Could not remove block device %s on node %s,"
7645 " continuing anyway: %s", device.iv_name, node, msg)
7648 if instance.disk_template == constants.DT_FILE:
7649 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7653 tgt = instance.primary_node
7654 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7656 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7657 file_storage_dir, instance.primary_node, result.fail_msg)
7663 def _ComputeDiskSizePerVG(disk_template, disks):
7664 """Compute disk size requirements in the volume group
7667 def _compute(disks, payload):
7668 """Universal algorithm.
7673 vgs[disk[constants.IDISK_VG]] = \
7674 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7678 # Required free disk space as a function of disk and swap space
7680 constants.DT_DISKLESS: {},
7681 constants.DT_PLAIN: _compute(disks, 0),
7682 # 128 MB are added for drbd metadata for each disk
7683 constants.DT_DRBD8: _compute(disks, 128),
7684 constants.DT_FILE: {},
7685 constants.DT_SHARED_FILE: {},
7688 if disk_template not in req_size_dict:
7689 raise errors.ProgrammerError("Disk template '%s' size requirement"
7690 " is unknown" % disk_template)
7692 return req_size_dict[disk_template]
7695 def _ComputeDiskSize(disk_template, disks):
7696 """Compute disk size requirements in the volume group
7699 # Required free disk space as a function of disk and swap space
7701 constants.DT_DISKLESS: None,
7702 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7703 # 128 MB are added for drbd metadata for each disk
7704 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7705 constants.DT_FILE: None,
7706 constants.DT_SHARED_FILE: 0,
7707 constants.DT_BLOCK: 0,
7710 if disk_template not in req_size_dict:
7711 raise errors.ProgrammerError("Disk template '%s' size requirement"
7712 " is unknown" % disk_template)
7714 return req_size_dict[disk_template]
7717 def _FilterVmNodes(lu, nodenames):
7718 """Filters out non-vm_capable nodes from a list.
7720 @type lu: L{LogicalUnit}
7721 @param lu: the logical unit for which we check
7722 @type nodenames: list
7723 @param nodenames: the list of nodes on which we should check
7725 @return: the list of vm-capable nodes
7728 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7729 return [name for name in nodenames if name not in vm_nodes]
7732 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7733 """Hypervisor parameter validation.
7735 This function abstract the hypervisor parameter validation to be
7736 used in both instance create and instance modify.
7738 @type lu: L{LogicalUnit}
7739 @param lu: the logical unit for which we check
7740 @type nodenames: list
7741 @param nodenames: the list of nodes on which we should check
7742 @type hvname: string
7743 @param hvname: the name of the hypervisor we should use
7744 @type hvparams: dict
7745 @param hvparams: the parameters which we need to check
7746 @raise errors.OpPrereqError: if the parameters are not valid
7749 nodenames = _FilterVmNodes(lu, nodenames)
7750 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7753 for node in nodenames:
7757 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7760 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7761 """OS parameters validation.
7763 @type lu: L{LogicalUnit}
7764 @param lu: the logical unit for which we check
7765 @type required: boolean
7766 @param required: whether the validation should fail if the OS is not
7768 @type nodenames: list
7769 @param nodenames: the list of nodes on which we should check
7770 @type osname: string
7771 @param osname: the name of the hypervisor we should use
7772 @type osparams: dict
7773 @param osparams: the parameters which we need to check
7774 @raise errors.OpPrereqError: if the parameters are not valid
7777 nodenames = _FilterVmNodes(lu, nodenames)
7778 result = lu.rpc.call_os_validate(required, nodenames, osname,
7779 [constants.OS_VALIDATE_PARAMETERS],
7781 for node, nres in result.items():
7782 # we don't check for offline cases since this should be run only
7783 # against the master node and/or an instance's nodes
7784 nres.Raise("OS Parameters validation failed on node %s" % node)
7785 if not nres.payload:
7786 lu.LogInfo("OS %s not found on node %s, validation skipped",
7790 class LUInstanceCreate(LogicalUnit):
7791 """Create an instance.
7794 HPATH = "instance-add"
7795 HTYPE = constants.HTYPE_INSTANCE
7798 def CheckArguments(self):
7802 # do not require name_check to ease forward/backward compatibility
7804 if self.op.no_install and self.op.start:
7805 self.LogInfo("No-installation mode selected, disabling startup")
7806 self.op.start = False
7807 # validate/normalize the instance name
7808 self.op.instance_name = \
7809 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7811 if self.op.ip_check and not self.op.name_check:
7812 # TODO: make the ip check more flexible and not depend on the name check
7813 raise errors.OpPrereqError("Cannot do IP address check without a name"
7814 " check", errors.ECODE_INVAL)
7816 # check nics' parameter names
7817 for nic in self.op.nics:
7818 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7820 # check disks. parameter names and consistent adopt/no-adopt strategy
7821 has_adopt = has_no_adopt = False
7822 for disk in self.op.disks:
7823 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7824 if constants.IDISK_ADOPT in disk:
7828 if has_adopt and has_no_adopt:
7829 raise errors.OpPrereqError("Either all disks are adopted or none is",
7832 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7833 raise errors.OpPrereqError("Disk adoption is not supported for the"
7834 " '%s' disk template" %
7835 self.op.disk_template,
7837 if self.op.iallocator is not None:
7838 raise errors.OpPrereqError("Disk adoption not allowed with an"
7839 " iallocator script", errors.ECODE_INVAL)
7840 if self.op.mode == constants.INSTANCE_IMPORT:
7841 raise errors.OpPrereqError("Disk adoption not allowed for"
7842 " instance import", errors.ECODE_INVAL)
7844 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7845 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7846 " but no 'adopt' parameter given" %
7847 self.op.disk_template,
7850 self.adopt_disks = has_adopt
7852 # instance name verification
7853 if self.op.name_check:
7854 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7855 self.op.instance_name = self.hostname1.name
7856 # used in CheckPrereq for ip ping check
7857 self.check_ip = self.hostname1.ip
7859 self.check_ip = None
7861 # file storage checks
7862 if (self.op.file_driver and
7863 not self.op.file_driver in constants.FILE_DRIVER):
7864 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7865 self.op.file_driver, errors.ECODE_INVAL)
7867 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7868 raise errors.OpPrereqError("File storage directory path not absolute",
7871 ### Node/iallocator related checks
7872 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7874 if self.op.pnode is not None:
7875 if self.op.disk_template in constants.DTS_INT_MIRROR:
7876 if self.op.snode is None:
7877 raise errors.OpPrereqError("The networked disk templates need"
7878 " a mirror node", errors.ECODE_INVAL)
7880 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7882 self.op.snode = None
7884 self._cds = _GetClusterDomainSecret()
7886 if self.op.mode == constants.INSTANCE_IMPORT:
7887 # On import force_variant must be True, because if we forced it at
7888 # initial install, our only chance when importing it back is that it
7890 self.op.force_variant = True
7892 if self.op.no_install:
7893 self.LogInfo("No-installation mode has no effect during import")
7895 elif self.op.mode == constants.INSTANCE_CREATE:
7896 if self.op.os_type is None:
7897 raise errors.OpPrereqError("No guest OS specified",
7899 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7900 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7901 " installation" % self.op.os_type,
7903 if self.op.disk_template is None:
7904 raise errors.OpPrereqError("No disk template specified",
7907 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7908 # Check handshake to ensure both clusters have the same domain secret
7909 src_handshake = self.op.source_handshake
7910 if not src_handshake:
7911 raise errors.OpPrereqError("Missing source handshake",
7914 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7917 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7920 # Load and check source CA
7921 self.source_x509_ca_pem = self.op.source_x509_ca
7922 if not self.source_x509_ca_pem:
7923 raise errors.OpPrereqError("Missing source X509 CA",
7927 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7929 except OpenSSL.crypto.Error, err:
7930 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7931 (err, ), errors.ECODE_INVAL)
7933 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7934 if errcode is not None:
7935 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7938 self.source_x509_ca = cert
7940 src_instance_name = self.op.source_instance_name
7941 if not src_instance_name:
7942 raise errors.OpPrereqError("Missing source instance name",
7945 self.source_instance_name = \
7946 netutils.GetHostname(name=src_instance_name).name
7949 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7950 self.op.mode, errors.ECODE_INVAL)
7952 def ExpandNames(self):
7953 """ExpandNames for CreateInstance.
7955 Figure out the right locks for instance creation.
7958 self.needed_locks = {}
7960 instance_name = self.op.instance_name
7961 # this is just a preventive check, but someone might still add this
7962 # instance in the meantime, and creation will fail at lock-add time
7963 if instance_name in self.cfg.GetInstanceList():
7964 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7965 instance_name, errors.ECODE_EXISTS)
7967 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7969 if self.op.iallocator:
7970 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7972 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7973 nodelist = [self.op.pnode]
7974 if self.op.snode is not None:
7975 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7976 nodelist.append(self.op.snode)
7977 self.needed_locks[locking.LEVEL_NODE] = nodelist
7979 # in case of import lock the source node too
7980 if self.op.mode == constants.INSTANCE_IMPORT:
7981 src_node = self.op.src_node
7982 src_path = self.op.src_path
7984 if src_path is None:
7985 self.op.src_path = src_path = self.op.instance_name
7987 if src_node is None:
7988 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7989 self.op.src_node = None
7990 if os.path.isabs(src_path):
7991 raise errors.OpPrereqError("Importing an instance from an absolute"
7992 " path requires a source node option",
7995 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7996 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7997 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7998 if not os.path.isabs(src_path):
7999 self.op.src_path = src_path = \
8000 utils.PathJoin(constants.EXPORT_DIR, src_path)
8002 def _RunAllocator(self):
8003 """Run the allocator based on input opcode.
8006 nics = [n.ToDict() for n in self.nics]
8007 ial = IAllocator(self.cfg, self.rpc,
8008 mode=constants.IALLOCATOR_MODE_ALLOC,
8009 name=self.op.instance_name,
8010 disk_template=self.op.disk_template,
8013 vcpus=self.be_full[constants.BE_VCPUS],
8014 memory=self.be_full[constants.BE_MEMORY],
8017 hypervisor=self.op.hypervisor,
8020 ial.Run(self.op.iallocator)
8023 raise errors.OpPrereqError("Can't compute nodes using"
8024 " iallocator '%s': %s" %
8025 (self.op.iallocator, ial.info),
8027 if len(ial.result) != ial.required_nodes:
8028 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8029 " of nodes (%s), required %s" %
8030 (self.op.iallocator, len(ial.result),
8031 ial.required_nodes), errors.ECODE_FAULT)
8032 self.op.pnode = ial.result[0]
8033 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8034 self.op.instance_name, self.op.iallocator,
8035 utils.CommaJoin(ial.result))
8036 if ial.required_nodes == 2:
8037 self.op.snode = ial.result[1]
8039 def BuildHooksEnv(self):
8042 This runs on master, primary and secondary nodes of the instance.
8046 "ADD_MODE": self.op.mode,
8048 if self.op.mode == constants.INSTANCE_IMPORT:
8049 env["SRC_NODE"] = self.op.src_node
8050 env["SRC_PATH"] = self.op.src_path
8051 env["SRC_IMAGES"] = self.src_images
8053 env.update(_BuildInstanceHookEnv(
8054 name=self.op.instance_name,
8055 primary_node=self.op.pnode,
8056 secondary_nodes=self.secondaries,
8057 status=self.op.start,
8058 os_type=self.op.os_type,
8059 memory=self.be_full[constants.BE_MEMORY],
8060 vcpus=self.be_full[constants.BE_VCPUS],
8061 nics=_NICListToTuple(self, self.nics),
8062 disk_template=self.op.disk_template,
8063 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8064 for d in self.disks],
8067 hypervisor_name=self.op.hypervisor,
8073 def BuildHooksNodes(self):
8074 """Build hooks nodes.
8077 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8080 def _ReadExportInfo(self):
8081 """Reads the export information from disk.
8083 It will override the opcode source node and path with the actual
8084 information, if these two were not specified before.
8086 @return: the export information
8089 assert self.op.mode == constants.INSTANCE_IMPORT
8091 src_node = self.op.src_node
8092 src_path = self.op.src_path
8094 if src_node is None:
8095 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8096 exp_list = self.rpc.call_export_list(locked_nodes)
8098 for node in exp_list:
8099 if exp_list[node].fail_msg:
8101 if src_path in exp_list[node].payload:
8103 self.op.src_node = src_node = node
8104 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8108 raise errors.OpPrereqError("No export found for relative path %s" %
8109 src_path, errors.ECODE_INVAL)
8111 _CheckNodeOnline(self, src_node)
8112 result = self.rpc.call_export_info(src_node, src_path)
8113 result.Raise("No export or invalid export found in dir %s" % src_path)
8115 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8116 if not export_info.has_section(constants.INISECT_EXP):
8117 raise errors.ProgrammerError("Corrupted export config",
8118 errors.ECODE_ENVIRON)
8120 ei_version = export_info.get(constants.INISECT_EXP, "version")
8121 if (int(ei_version) != constants.EXPORT_VERSION):
8122 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8123 (ei_version, constants.EXPORT_VERSION),
8124 errors.ECODE_ENVIRON)
8127 def _ReadExportParams(self, einfo):
8128 """Use export parameters as defaults.
8130 In case the opcode doesn't specify (as in override) some instance
8131 parameters, then try to use them from the export information, if
8135 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8137 if self.op.disk_template is None:
8138 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8139 self.op.disk_template = einfo.get(constants.INISECT_INS,
8142 raise errors.OpPrereqError("No disk template specified and the export"
8143 " is missing the disk_template information",
8146 if not self.op.disks:
8147 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8149 # TODO: import the disk iv_name too
8150 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8151 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8152 disks.append({constants.IDISK_SIZE: disk_sz})
8153 self.op.disks = disks
8155 raise errors.OpPrereqError("No disk info specified and the export"
8156 " is missing the disk information",
8159 if (not self.op.nics and
8160 einfo.has_option(constants.INISECT_INS, "nic_count")):
8162 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8164 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8165 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8170 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8171 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8173 if (self.op.hypervisor is None and
8174 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8175 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8177 if einfo.has_section(constants.INISECT_HYP):
8178 # use the export parameters but do not override the ones
8179 # specified by the user
8180 for name, value in einfo.items(constants.INISECT_HYP):
8181 if name not in self.op.hvparams:
8182 self.op.hvparams[name] = value
8184 if einfo.has_section(constants.INISECT_BEP):
8185 # use the parameters, without overriding
8186 for name, value in einfo.items(constants.INISECT_BEP):
8187 if name not in self.op.beparams:
8188 self.op.beparams[name] = value
8190 # try to read the parameters old style, from the main section
8191 for name in constants.BES_PARAMETERS:
8192 if (name not in self.op.beparams and
8193 einfo.has_option(constants.INISECT_INS, name)):
8194 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8196 if einfo.has_section(constants.INISECT_OSP):
8197 # use the parameters, without overriding
8198 for name, value in einfo.items(constants.INISECT_OSP):
8199 if name not in self.op.osparams:
8200 self.op.osparams[name] = value
8202 def _RevertToDefaults(self, cluster):
8203 """Revert the instance parameters to the default values.
8207 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8208 for name in self.op.hvparams.keys():
8209 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8210 del self.op.hvparams[name]
8212 be_defs = cluster.SimpleFillBE({})
8213 for name in self.op.beparams.keys():
8214 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8215 del self.op.beparams[name]
8217 nic_defs = cluster.SimpleFillNIC({})
8218 for nic in self.op.nics:
8219 for name in constants.NICS_PARAMETERS:
8220 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8223 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8224 for name in self.op.osparams.keys():
8225 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8226 del self.op.osparams[name]
8228 def CheckPrereq(self):
8229 """Check prerequisites.
8232 if self.op.mode == constants.INSTANCE_IMPORT:
8233 export_info = self._ReadExportInfo()
8234 self._ReadExportParams(export_info)
8236 if (not self.cfg.GetVGName() and
8237 self.op.disk_template not in constants.DTS_NOT_LVM):
8238 raise errors.OpPrereqError("Cluster does not support lvm-based"
8239 " instances", errors.ECODE_STATE)
8241 if self.op.hypervisor is None:
8242 self.op.hypervisor = self.cfg.GetHypervisorType()
8244 cluster = self.cfg.GetClusterInfo()
8245 enabled_hvs = cluster.enabled_hypervisors
8246 if self.op.hypervisor not in enabled_hvs:
8247 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8248 " cluster (%s)" % (self.op.hypervisor,
8249 ",".join(enabled_hvs)),
8252 # Check tag validity
8253 for tag in self.op.tags:
8254 objects.TaggableObject.ValidateTag(tag)
8256 # check hypervisor parameter syntax (locally)
8257 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8258 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8260 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8261 hv_type.CheckParameterSyntax(filled_hvp)
8262 self.hv_full = filled_hvp
8263 # check that we don't specify global parameters on an instance
8264 _CheckGlobalHvParams(self.op.hvparams)
8266 # fill and remember the beparams dict
8267 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8268 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8270 # build os parameters
8271 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8273 # now that hvp/bep are in final format, let's reset to defaults,
8275 if self.op.identify_defaults:
8276 self._RevertToDefaults(cluster)
8280 for idx, nic in enumerate(self.op.nics):
8281 nic_mode_req = nic.get(constants.INIC_MODE, None)
8282 nic_mode = nic_mode_req
8283 if nic_mode is None:
8284 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8286 # in routed mode, for the first nic, the default ip is 'auto'
8287 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8288 default_ip_mode = constants.VALUE_AUTO
8290 default_ip_mode = constants.VALUE_NONE
8292 # ip validity checks
8293 ip = nic.get(constants.INIC_IP, default_ip_mode)
8294 if ip is None or ip.lower() == constants.VALUE_NONE:
8296 elif ip.lower() == constants.VALUE_AUTO:
8297 if not self.op.name_check:
8298 raise errors.OpPrereqError("IP address set to auto but name checks"
8299 " have been skipped",
8301 nic_ip = self.hostname1.ip
8303 if not netutils.IPAddress.IsValid(ip):
8304 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8308 # TODO: check the ip address for uniqueness
8309 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8310 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8313 # MAC address verification
8314 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8315 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8316 mac = utils.NormalizeAndValidateMac(mac)
8319 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8320 except errors.ReservationError:
8321 raise errors.OpPrereqError("MAC address %s already in use"
8322 " in cluster" % mac,
8323 errors.ECODE_NOTUNIQUE)
8325 # Build nic parameters
8326 link = nic.get(constants.INIC_LINK, None)
8329 nicparams[constants.NIC_MODE] = nic_mode_req
8331 nicparams[constants.NIC_LINK] = link
8333 check_params = cluster.SimpleFillNIC(nicparams)
8334 objects.NIC.CheckParameterSyntax(check_params)
8335 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8337 # disk checks/pre-build
8338 default_vg = self.cfg.GetVGName()
8340 for disk in self.op.disks:
8341 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8342 if mode not in constants.DISK_ACCESS_SET:
8343 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8344 mode, errors.ECODE_INVAL)
8345 size = disk.get(constants.IDISK_SIZE, None)
8347 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8350 except (TypeError, ValueError):
8351 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8354 data_vg = disk.get(constants.IDISK_VG, default_vg)
8356 constants.IDISK_SIZE: size,
8357 constants.IDISK_MODE: mode,
8358 constants.IDISK_VG: data_vg,
8359 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8361 if constants.IDISK_ADOPT in disk:
8362 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8363 self.disks.append(new_disk)
8365 if self.op.mode == constants.INSTANCE_IMPORT:
8367 # Check that the new instance doesn't have less disks than the export
8368 instance_disks = len(self.disks)
8369 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8370 if instance_disks < export_disks:
8371 raise errors.OpPrereqError("Not enough disks to import."
8372 " (instance: %d, export: %d)" %
8373 (instance_disks, export_disks),
8377 for idx in range(export_disks):
8378 option = 'disk%d_dump' % idx
8379 if export_info.has_option(constants.INISECT_INS, option):
8380 # FIXME: are the old os-es, disk sizes, etc. useful?
8381 export_name = export_info.get(constants.INISECT_INS, option)
8382 image = utils.PathJoin(self.op.src_path, export_name)
8383 disk_images.append(image)
8385 disk_images.append(False)
8387 self.src_images = disk_images
8389 old_name = export_info.get(constants.INISECT_INS, 'name')
8391 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8392 except (TypeError, ValueError), err:
8393 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8394 " an integer: %s" % str(err),
8396 if self.op.instance_name == old_name:
8397 for idx, nic in enumerate(self.nics):
8398 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8399 nic_mac_ini = 'nic%d_mac' % idx
8400 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8402 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8404 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8405 if self.op.ip_check:
8406 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8407 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8408 (self.check_ip, self.op.instance_name),
8409 errors.ECODE_NOTUNIQUE)
8411 #### mac address generation
8412 # By generating here the mac address both the allocator and the hooks get
8413 # the real final mac address rather than the 'auto' or 'generate' value.
8414 # There is a race condition between the generation and the instance object
8415 # creation, which means that we know the mac is valid now, but we're not
8416 # sure it will be when we actually add the instance. If things go bad
8417 # adding the instance will abort because of a duplicate mac, and the
8418 # creation job will fail.
8419 for nic in self.nics:
8420 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8421 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8425 if self.op.iallocator is not None:
8426 self._RunAllocator()
8428 #### node related checks
8430 # check primary node
8431 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8432 assert self.pnode is not None, \
8433 "Cannot retrieve locked node %s" % self.op.pnode
8435 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8436 pnode.name, errors.ECODE_STATE)
8438 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8439 pnode.name, errors.ECODE_STATE)
8440 if not pnode.vm_capable:
8441 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8442 " '%s'" % pnode.name, errors.ECODE_STATE)
8444 self.secondaries = []
8446 # mirror node verification
8447 if self.op.disk_template in constants.DTS_INT_MIRROR:
8448 if self.op.snode == pnode.name:
8449 raise errors.OpPrereqError("The secondary node cannot be the"
8450 " primary node", errors.ECODE_INVAL)
8451 _CheckNodeOnline(self, self.op.snode)
8452 _CheckNodeNotDrained(self, self.op.snode)
8453 _CheckNodeVmCapable(self, self.op.snode)
8454 self.secondaries.append(self.op.snode)
8456 nodenames = [pnode.name] + self.secondaries
8458 if not self.adopt_disks:
8459 # Check lv size requirements, if not adopting
8460 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8461 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8463 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8464 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8465 disk[constants.IDISK_ADOPT])
8466 for disk in self.disks])
8467 if len(all_lvs) != len(self.disks):
8468 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8470 for lv_name in all_lvs:
8472 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8473 # to ReserveLV uses the same syntax
8474 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8475 except errors.ReservationError:
8476 raise errors.OpPrereqError("LV named %s used by another instance" %
8477 lv_name, errors.ECODE_NOTUNIQUE)
8479 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8480 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8482 node_lvs = self.rpc.call_lv_list([pnode.name],
8483 vg_names.payload.keys())[pnode.name]
8484 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8485 node_lvs = node_lvs.payload
8487 delta = all_lvs.difference(node_lvs.keys())
8489 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8490 utils.CommaJoin(delta),
8492 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8494 raise errors.OpPrereqError("Online logical volumes found, cannot"
8495 " adopt: %s" % utils.CommaJoin(online_lvs),
8497 # update the size of disk based on what is found
8498 for dsk in self.disks:
8499 dsk[constants.IDISK_SIZE] = \
8500 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8501 dsk[constants.IDISK_ADOPT])][0]))
8503 elif self.op.disk_template == constants.DT_BLOCK:
8504 # Normalize and de-duplicate device paths
8505 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8506 for disk in self.disks])
8507 if len(all_disks) != len(self.disks):
8508 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8510 baddisks = [d for d in all_disks
8511 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8513 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8514 " cannot be adopted" %
8515 (", ".join(baddisks),
8516 constants.ADOPTABLE_BLOCKDEV_ROOT),
8519 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8520 list(all_disks))[pnode.name]
8521 node_disks.Raise("Cannot get block device information from node %s" %
8523 node_disks = node_disks.payload
8524 delta = all_disks.difference(node_disks.keys())
8526 raise errors.OpPrereqError("Missing block device(s): %s" %
8527 utils.CommaJoin(delta),
8529 for dsk in self.disks:
8530 dsk[constants.IDISK_SIZE] = \
8531 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8533 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8535 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8536 # check OS parameters (remotely)
8537 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8539 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8541 # memory check on primary node
8543 _CheckNodeFreeMemory(self, self.pnode.name,
8544 "creating instance %s" % self.op.instance_name,
8545 self.be_full[constants.BE_MEMORY],
8548 self.dry_run_result = list(nodenames)
8550 def Exec(self, feedback_fn):
8551 """Create and add the instance to the cluster.
8554 instance = self.op.instance_name
8555 pnode_name = self.pnode.name
8557 ht_kind = self.op.hypervisor
8558 if ht_kind in constants.HTS_REQ_PORT:
8559 network_port = self.cfg.AllocatePort()
8563 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8564 # this is needed because os.path.join does not accept None arguments
8565 if self.op.file_storage_dir is None:
8566 string_file_storage_dir = ""
8568 string_file_storage_dir = self.op.file_storage_dir
8570 # build the full file storage dir path
8571 if self.op.disk_template == constants.DT_SHARED_FILE:
8572 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8574 get_fsd_fn = self.cfg.GetFileStorageDir
8576 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8577 string_file_storage_dir, instance)
8579 file_storage_dir = ""
8581 disks = _GenerateDiskTemplate(self,
8582 self.op.disk_template,
8583 instance, pnode_name,
8587 self.op.file_driver,
8591 iobj = objects.Instance(name=instance, os=self.op.os_type,
8592 primary_node=pnode_name,
8593 nics=self.nics, disks=disks,
8594 disk_template=self.op.disk_template,
8596 network_port=network_port,
8597 beparams=self.op.beparams,
8598 hvparams=self.op.hvparams,
8599 hypervisor=self.op.hypervisor,
8600 osparams=self.op.osparams,
8604 for tag in self.op.tags:
8607 if self.adopt_disks:
8608 if self.op.disk_template == constants.DT_PLAIN:
8609 # rename LVs to the newly-generated names; we need to construct
8610 # 'fake' LV disks with the old data, plus the new unique_id
8611 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8613 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8614 rename_to.append(t_dsk.logical_id)
8615 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8616 self.cfg.SetDiskID(t_dsk, pnode_name)
8617 result = self.rpc.call_blockdev_rename(pnode_name,
8618 zip(tmp_disks, rename_to))
8619 result.Raise("Failed to rename adoped LVs")
8621 feedback_fn("* creating instance disks...")
8623 _CreateDisks(self, iobj)
8624 except errors.OpExecError:
8625 self.LogWarning("Device creation failed, reverting...")
8627 _RemoveDisks(self, iobj)
8629 self.cfg.ReleaseDRBDMinors(instance)
8632 feedback_fn("adding instance %s to cluster config" % instance)
8634 self.cfg.AddInstance(iobj, self.proc.GetECId())
8636 # Declare that we don't want to remove the instance lock anymore, as we've
8637 # added the instance to the config
8638 del self.remove_locks[locking.LEVEL_INSTANCE]
8640 if self.op.mode == constants.INSTANCE_IMPORT:
8641 # Release unused nodes
8642 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8645 _ReleaseLocks(self, locking.LEVEL_NODE)
8648 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8649 feedback_fn("* wiping instance disks...")
8651 _WipeDisks(self, iobj)
8652 except errors.OpExecError, err:
8653 logging.exception("Wiping disks failed")
8654 self.LogWarning("Wiping instance disks failed (%s)", err)
8658 # Something is already wrong with the disks, don't do anything else
8660 elif self.op.wait_for_sync:
8661 disk_abort = not _WaitForSync(self, iobj)
8662 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8663 # make sure the disks are not degraded (still sync-ing is ok)
8665 feedback_fn("* checking mirrors status")
8666 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8671 _RemoveDisks(self, iobj)
8672 self.cfg.RemoveInstance(iobj.name)
8673 # Make sure the instance lock gets removed
8674 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8675 raise errors.OpExecError("There are some degraded disks for"
8678 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8679 if self.op.mode == constants.INSTANCE_CREATE:
8680 if not self.op.no_install:
8681 feedback_fn("* running the instance OS create scripts...")
8682 # FIXME: pass debug option from opcode to backend
8683 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8684 self.op.debug_level)
8685 result.Raise("Could not add os for instance %s"
8686 " on node %s" % (instance, pnode_name))
8688 elif self.op.mode == constants.INSTANCE_IMPORT:
8689 feedback_fn("* running the instance OS import scripts...")
8693 for idx, image in enumerate(self.src_images):
8697 # FIXME: pass debug option from opcode to backend
8698 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8699 constants.IEIO_FILE, (image, ),
8700 constants.IEIO_SCRIPT,
8701 (iobj.disks[idx], idx),
8703 transfers.append(dt)
8706 masterd.instance.TransferInstanceData(self, feedback_fn,
8707 self.op.src_node, pnode_name,
8708 self.pnode.secondary_ip,
8710 if not compat.all(import_result):
8711 self.LogWarning("Some disks for instance %s on node %s were not"
8712 " imported successfully" % (instance, pnode_name))
8714 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8715 feedback_fn("* preparing remote import...")
8716 # The source cluster will stop the instance before attempting to make a
8717 # connection. In some cases stopping an instance can take a long time,
8718 # hence the shutdown timeout is added to the connection timeout.
8719 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8720 self.op.source_shutdown_timeout)
8721 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8723 assert iobj.primary_node == self.pnode.name
8725 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8726 self.source_x509_ca,
8727 self._cds, timeouts)
8728 if not compat.all(disk_results):
8729 # TODO: Should the instance still be started, even if some disks
8730 # failed to import (valid for local imports, too)?
8731 self.LogWarning("Some disks for instance %s on node %s were not"
8732 " imported successfully" % (instance, pnode_name))
8734 # Run rename script on newly imported instance
8735 assert iobj.name == instance
8736 feedback_fn("Running rename script for %s" % instance)
8737 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8738 self.source_instance_name,
8739 self.op.debug_level)
8741 self.LogWarning("Failed to run rename script for %s on node"
8742 " %s: %s" % (instance, pnode_name, result.fail_msg))
8745 # also checked in the prereq part
8746 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8750 iobj.admin_up = True
8751 self.cfg.Update(iobj, feedback_fn)
8752 logging.info("Starting instance %s on node %s", instance, pnode_name)
8753 feedback_fn("* starting instance...")
8754 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8755 result.Raise("Could not start instance")
8757 return list(iobj.all_nodes)
8760 class LUInstanceConsole(NoHooksLU):
8761 """Connect to an instance's console.
8763 This is somewhat special in that it returns the command line that
8764 you need to run on the master node in order to connect to the
8770 def ExpandNames(self):
8771 self._ExpandAndLockInstance()
8773 def CheckPrereq(self):
8774 """Check prerequisites.
8776 This checks that the instance is in the cluster.
8779 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8780 assert self.instance is not None, \
8781 "Cannot retrieve locked instance %s" % self.op.instance_name
8782 _CheckNodeOnline(self, self.instance.primary_node)
8784 def Exec(self, feedback_fn):
8785 """Connect to the console of an instance
8788 instance = self.instance
8789 node = instance.primary_node
8791 node_insts = self.rpc.call_instance_list([node],
8792 [instance.hypervisor])[node]
8793 node_insts.Raise("Can't get node information from %s" % node)
8795 if instance.name not in node_insts.payload:
8796 if instance.admin_up:
8797 state = constants.INSTST_ERRORDOWN
8799 state = constants.INSTST_ADMINDOWN
8800 raise errors.OpExecError("Instance %s is not running (state %s)" %
8801 (instance.name, state))
8803 logging.debug("Connecting to console of %s on %s", instance.name, node)
8805 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8808 def _GetInstanceConsole(cluster, instance):
8809 """Returns console information for an instance.
8811 @type cluster: L{objects.Cluster}
8812 @type instance: L{objects.Instance}
8816 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8817 # beparams and hvparams are passed separately, to avoid editing the
8818 # instance and then saving the defaults in the instance itself.
8819 hvparams = cluster.FillHV(instance)
8820 beparams = cluster.FillBE(instance)
8821 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8823 assert console.instance == instance.name
8824 assert console.Validate()
8826 return console.ToDict()
8829 class LUInstanceReplaceDisks(LogicalUnit):
8830 """Replace the disks of an instance.
8833 HPATH = "mirrors-replace"
8834 HTYPE = constants.HTYPE_INSTANCE
8837 def CheckArguments(self):
8838 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8841 def ExpandNames(self):
8842 self._ExpandAndLockInstance()
8844 assert locking.LEVEL_NODE not in self.needed_locks
8845 assert locking.LEVEL_NODEGROUP not in self.needed_locks
8847 assert self.op.iallocator is None or self.op.remote_node is None, \
8848 "Conflicting options"
8850 if self.op.remote_node is not None:
8851 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8853 # Warning: do not remove the locking of the new secondary here
8854 # unless DRBD8.AddChildren is changed to work in parallel;
8855 # currently it doesn't since parallel invocations of
8856 # FindUnusedMinor will conflict
8857 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8858 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8860 self.needed_locks[locking.LEVEL_NODE] = []
8861 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8863 if self.op.iallocator is not None:
8864 # iallocator will select a new node in the same group
8865 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8867 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8868 self.op.iallocator, self.op.remote_node,
8869 self.op.disks, False, self.op.early_release)
8871 self.tasklets = [self.replacer]
8873 def DeclareLocks(self, level):
8874 if level == locking.LEVEL_NODEGROUP:
8875 assert self.op.remote_node is None
8876 assert self.op.iallocator is not None
8877 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8879 self.share_locks[locking.LEVEL_NODEGROUP] = 1
8880 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8881 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8883 elif level == locking.LEVEL_NODE:
8884 if self.op.iallocator is not None:
8885 assert self.op.remote_node is None
8886 assert not self.needed_locks[locking.LEVEL_NODE]
8888 # Lock member nodes of all locked groups
8889 self.needed_locks[locking.LEVEL_NODE] = [node_name
8890 for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8891 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8893 self._LockInstancesNodes()
8895 def BuildHooksEnv(self):
8898 This runs on the master, the primary and all the secondaries.
8901 instance = self.replacer.instance
8903 "MODE": self.op.mode,
8904 "NEW_SECONDARY": self.op.remote_node,
8905 "OLD_SECONDARY": instance.secondary_nodes[0],
8907 env.update(_BuildInstanceHookEnvByObject(self, instance))
8910 def BuildHooksNodes(self):
8911 """Build hooks nodes.
8914 instance = self.replacer.instance
8916 self.cfg.GetMasterNode(),
8917 instance.primary_node,
8919 if self.op.remote_node is not None:
8920 nl.append(self.op.remote_node)
8923 def CheckPrereq(self):
8924 """Check prerequisites.
8927 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8928 self.op.iallocator is None)
8930 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8932 groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8933 if owned_groups != groups:
8934 raise errors.OpExecError("Node groups used by instance '%s' changed"
8935 " since lock was acquired, current list is %r,"
8936 " used to be '%s'" %
8937 (self.op.instance_name,
8938 utils.CommaJoin(groups),
8939 utils.CommaJoin(owned_groups)))
8941 return LogicalUnit.CheckPrereq(self)
8944 class TLReplaceDisks(Tasklet):
8945 """Replaces disks for an instance.
8947 Note: Locking is not within the scope of this class.
8950 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8951 disks, delay_iallocator, early_release):
8952 """Initializes this class.
8955 Tasklet.__init__(self, lu)
8958 self.instance_name = instance_name
8960 self.iallocator_name = iallocator_name
8961 self.remote_node = remote_node
8963 self.delay_iallocator = delay_iallocator
8964 self.early_release = early_release
8967 self.instance = None
8968 self.new_node = None
8969 self.target_node = None
8970 self.other_node = None
8971 self.remote_node_info = None
8972 self.node_secondary_ip = None
8975 def CheckArguments(mode, remote_node, iallocator):
8976 """Helper function for users of this class.
8979 # check for valid parameter combination
8980 if mode == constants.REPLACE_DISK_CHG:
8981 if remote_node is None and iallocator is None:
8982 raise errors.OpPrereqError("When changing the secondary either an"
8983 " iallocator script must be used or the"
8984 " new node given", errors.ECODE_INVAL)
8986 if remote_node is not None and iallocator is not None:
8987 raise errors.OpPrereqError("Give either the iallocator or the new"
8988 " secondary, not both", errors.ECODE_INVAL)
8990 elif remote_node is not None or iallocator is not None:
8991 # Not replacing the secondary
8992 raise errors.OpPrereqError("The iallocator and new node options can"
8993 " only be used when changing the"
8994 " secondary node", errors.ECODE_INVAL)
8997 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8998 """Compute a new secondary node using an IAllocator.
9001 ial = IAllocator(lu.cfg, lu.rpc,
9002 mode=constants.IALLOCATOR_MODE_RELOC,
9004 relocate_from=relocate_from)
9006 ial.Run(iallocator_name)
9009 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9010 " %s" % (iallocator_name, ial.info),
9013 if len(ial.result) != ial.required_nodes:
9014 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9015 " of nodes (%s), required %s" %
9017 len(ial.result), ial.required_nodes),
9020 remote_node_name = ial.result[0]
9022 lu.LogInfo("Selected new secondary for instance '%s': %s",
9023 instance_name, remote_node_name)
9025 return remote_node_name
9027 def _FindFaultyDisks(self, node_name):
9028 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9031 def _CheckDisksActivated(self, instance):
9032 """Checks if the instance disks are activated.
9034 @param instance: The instance to check disks
9035 @return: True if they are activated, False otherwise
9038 nodes = instance.all_nodes
9040 for idx, dev in enumerate(instance.disks):
9042 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9043 self.cfg.SetDiskID(dev, node)
9045 result = self.rpc.call_blockdev_find(node, dev)
9049 elif result.fail_msg or not result.payload:
9054 def CheckPrereq(self):
9055 """Check prerequisites.
9057 This checks that the instance is in the cluster.
9060 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9061 assert instance is not None, \
9062 "Cannot retrieve locked instance %s" % self.instance_name
9064 if instance.disk_template != constants.DT_DRBD8:
9065 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9066 " instances", errors.ECODE_INVAL)
9068 if len(instance.secondary_nodes) != 1:
9069 raise errors.OpPrereqError("The instance has a strange layout,"
9070 " expected one secondary but found %d" %
9071 len(instance.secondary_nodes),
9074 if not self.delay_iallocator:
9075 self._CheckPrereq2()
9077 def _CheckPrereq2(self):
9078 """Check prerequisites, second part.
9080 This function should always be part of CheckPrereq. It was separated and is
9081 now called from Exec because during node evacuation iallocator was only
9082 called with an unmodified cluster model, not taking planned changes into
9086 instance = self.instance
9087 secondary_node = instance.secondary_nodes[0]
9089 if self.iallocator_name is None:
9090 remote_node = self.remote_node
9092 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9093 instance.name, instance.secondary_nodes)
9095 if remote_node is None:
9096 self.remote_node_info = None
9098 assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9099 "Remote node '%s' is not locked" % remote_node
9101 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9102 assert self.remote_node_info is not None, \
9103 "Cannot retrieve locked node %s" % remote_node
9105 if remote_node == self.instance.primary_node:
9106 raise errors.OpPrereqError("The specified node is the primary node of"
9107 " the instance", errors.ECODE_INVAL)
9109 if remote_node == secondary_node:
9110 raise errors.OpPrereqError("The specified node is already the"
9111 " secondary node of the instance",
9114 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9115 constants.REPLACE_DISK_CHG):
9116 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9119 if self.mode == constants.REPLACE_DISK_AUTO:
9120 if not self._CheckDisksActivated(instance):
9121 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9122 " first" % self.instance_name,
9124 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9125 faulty_secondary = self._FindFaultyDisks(secondary_node)
9127 if faulty_primary and faulty_secondary:
9128 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9129 " one node and can not be repaired"
9130 " automatically" % self.instance_name,
9134 self.disks = faulty_primary
9135 self.target_node = instance.primary_node
9136 self.other_node = secondary_node
9137 check_nodes = [self.target_node, self.other_node]
9138 elif faulty_secondary:
9139 self.disks = faulty_secondary
9140 self.target_node = secondary_node
9141 self.other_node = instance.primary_node
9142 check_nodes = [self.target_node, self.other_node]
9148 # Non-automatic modes
9149 if self.mode == constants.REPLACE_DISK_PRI:
9150 self.target_node = instance.primary_node
9151 self.other_node = secondary_node
9152 check_nodes = [self.target_node, self.other_node]
9154 elif self.mode == constants.REPLACE_DISK_SEC:
9155 self.target_node = secondary_node
9156 self.other_node = instance.primary_node
9157 check_nodes = [self.target_node, self.other_node]
9159 elif self.mode == constants.REPLACE_DISK_CHG:
9160 self.new_node = remote_node
9161 self.other_node = instance.primary_node
9162 self.target_node = secondary_node
9163 check_nodes = [self.new_node, self.other_node]
9165 _CheckNodeNotDrained(self.lu, remote_node)
9166 _CheckNodeVmCapable(self.lu, remote_node)
9168 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9169 assert old_node_info is not None
9170 if old_node_info.offline and not self.early_release:
9171 # doesn't make sense to delay the release
9172 self.early_release = True
9173 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9174 " early-release mode", secondary_node)
9177 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9180 # If not specified all disks should be replaced
9182 self.disks = range(len(self.instance.disks))
9184 for node in check_nodes:
9185 _CheckNodeOnline(self.lu, node)
9187 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9190 if node_name is not None)
9192 # Release unneeded node locks
9193 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9195 # Release any owned node group
9196 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9197 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9199 # Check whether disks are valid
9200 for disk_idx in self.disks:
9201 instance.FindDisk(disk_idx)
9203 # Get secondary node IP addresses
9204 self.node_secondary_ip = \
9205 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9206 for node_name in touched_nodes)
9208 def Exec(self, feedback_fn):
9209 """Execute disk replacement.
9211 This dispatches the disk replacement to the appropriate handler.
9214 if self.delay_iallocator:
9215 self._CheckPrereq2()
9218 # Verify owned locks before starting operation
9219 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9220 assert set(owned_locks) == set(self.node_secondary_ip), \
9221 ("Incorrect node locks, owning %s, expected %s" %
9222 (owned_locks, self.node_secondary_ip.keys()))
9224 owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9225 assert list(owned_locks) == [self.instance_name], \
9226 "Instance '%s' not locked" % self.instance_name
9228 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9229 "Should not own any node group lock at this point"
9232 feedback_fn("No disks need replacement")
9235 feedback_fn("Replacing disk(s) %s for %s" %
9236 (utils.CommaJoin(self.disks), self.instance.name))
9238 activate_disks = (not self.instance.admin_up)
9240 # Activate the instance disks if we're replacing them on a down instance
9242 _StartInstanceDisks(self.lu, self.instance, True)
9245 # Should we replace the secondary node?
9246 if self.new_node is not None:
9247 fn = self._ExecDrbd8Secondary
9249 fn = self._ExecDrbd8DiskOnly
9251 result = fn(feedback_fn)
9253 # Deactivate the instance disks if we're replacing them on a
9256 _SafeShutdownInstanceDisks(self.lu, self.instance)
9259 # Verify owned locks
9260 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9261 nodes = frozenset(self.node_secondary_ip)
9262 assert ((self.early_release and not owned_locks) or
9263 (not self.early_release and not (set(owned_locks) - nodes))), \
9264 ("Not owning the correct locks, early_release=%s, owned=%r,"
9265 " nodes=%r" % (self.early_release, owned_locks, nodes))
9269 def _CheckVolumeGroup(self, nodes):
9270 self.lu.LogInfo("Checking volume groups")
9272 vgname = self.cfg.GetVGName()
9274 # Make sure volume group exists on all involved nodes
9275 results = self.rpc.call_vg_list(nodes)
9277 raise errors.OpExecError("Can't list volume groups on the nodes")
9281 res.Raise("Error checking node %s" % node)
9282 if vgname not in res.payload:
9283 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9286 def _CheckDisksExistence(self, nodes):
9287 # Check disk existence
9288 for idx, dev in enumerate(self.instance.disks):
9289 if idx not in self.disks:
9293 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9294 self.cfg.SetDiskID(dev, node)
9296 result = self.rpc.call_blockdev_find(node, dev)
9298 msg = result.fail_msg
9299 if msg or not result.payload:
9301 msg = "disk not found"
9302 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9305 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9306 for idx, dev in enumerate(self.instance.disks):
9307 if idx not in self.disks:
9310 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9313 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9315 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9316 " replace disks for instance %s" %
9317 (node_name, self.instance.name))
9319 def _CreateNewStorage(self, node_name):
9322 for idx, dev in enumerate(self.instance.disks):
9323 if idx not in self.disks:
9326 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9328 self.cfg.SetDiskID(dev, node_name)
9330 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9331 names = _GenerateUniqueNames(self.lu, lv_names)
9333 vg_data = dev.children[0].logical_id[0]
9334 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9335 logical_id=(vg_data, names[0]))
9336 vg_meta = dev.children[1].logical_id[0]
9337 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9338 logical_id=(vg_meta, names[1]))
9340 new_lvs = [lv_data, lv_meta]
9341 old_lvs = dev.children
9342 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9344 # we pass force_create=True to force the LVM creation
9345 for new_lv in new_lvs:
9346 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9347 _GetInstanceInfoText(self.instance), False)
9351 def _CheckDevices(self, node_name, iv_names):
9352 for name, (dev, _, _) in iv_names.iteritems():
9353 self.cfg.SetDiskID(dev, node_name)
9355 result = self.rpc.call_blockdev_find(node_name, dev)
9357 msg = result.fail_msg
9358 if msg or not result.payload:
9360 msg = "disk not found"
9361 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9364 if result.payload.is_degraded:
9365 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9367 def _RemoveOldStorage(self, node_name, iv_names):
9368 for name, (_, old_lvs, _) in iv_names.iteritems():
9369 self.lu.LogInfo("Remove logical volumes for %s" % name)
9372 self.cfg.SetDiskID(lv, node_name)
9374 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9376 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9377 hint="remove unused LVs manually")
9379 def _ExecDrbd8DiskOnly(self, feedback_fn):
9380 """Replace a disk on the primary or secondary for DRBD 8.
9382 The algorithm for replace is quite complicated:
9384 1. for each disk to be replaced:
9386 1. create new LVs on the target node with unique names
9387 1. detach old LVs from the drbd device
9388 1. rename old LVs to name_replaced.<time_t>
9389 1. rename new LVs to old LVs
9390 1. attach the new LVs (with the old names now) to the drbd device
9392 1. wait for sync across all devices
9394 1. for each modified disk:
9396 1. remove old LVs (which have the name name_replaces.<time_t>)
9398 Failures are not very well handled.
9403 # Step: check device activation
9404 self.lu.LogStep(1, steps_total, "Check device existence")
9405 self._CheckDisksExistence([self.other_node, self.target_node])
9406 self._CheckVolumeGroup([self.target_node, self.other_node])
9408 # Step: check other node consistency
9409 self.lu.LogStep(2, steps_total, "Check peer consistency")
9410 self._CheckDisksConsistency(self.other_node,
9411 self.other_node == self.instance.primary_node,
9414 # Step: create new storage
9415 self.lu.LogStep(3, steps_total, "Allocate new storage")
9416 iv_names = self._CreateNewStorage(self.target_node)
9418 # Step: for each lv, detach+rename*2+attach
9419 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9420 for dev, old_lvs, new_lvs in iv_names.itervalues():
9421 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9423 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9425 result.Raise("Can't detach drbd from local storage on node"
9426 " %s for device %s" % (self.target_node, dev.iv_name))
9428 #cfg.Update(instance)
9430 # ok, we created the new LVs, so now we know we have the needed
9431 # storage; as such, we proceed on the target node to rename
9432 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9433 # using the assumption that logical_id == physical_id (which in
9434 # turn is the unique_id on that node)
9436 # FIXME(iustin): use a better name for the replaced LVs
9437 temp_suffix = int(time.time())
9438 ren_fn = lambda d, suff: (d.physical_id[0],
9439 d.physical_id[1] + "_replaced-%s" % suff)
9441 # Build the rename list based on what LVs exist on the node
9442 rename_old_to_new = []
9443 for to_ren in old_lvs:
9444 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9445 if not result.fail_msg and result.payload:
9447 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9449 self.lu.LogInfo("Renaming the old LVs on the target node")
9450 result = self.rpc.call_blockdev_rename(self.target_node,
9452 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9454 # Now we rename the new LVs to the old LVs
9455 self.lu.LogInfo("Renaming the new LVs on the target node")
9456 rename_new_to_old = [(new, old.physical_id)
9457 for old, new in zip(old_lvs, new_lvs)]
9458 result = self.rpc.call_blockdev_rename(self.target_node,
9460 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9462 for old, new in zip(old_lvs, new_lvs):
9463 new.logical_id = old.logical_id
9464 self.cfg.SetDiskID(new, self.target_node)
9466 for disk in old_lvs:
9467 disk.logical_id = ren_fn(disk, temp_suffix)
9468 self.cfg.SetDiskID(disk, self.target_node)
9470 # Now that the new lvs have the old name, we can add them to the device
9471 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9472 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9474 msg = result.fail_msg
9476 for new_lv in new_lvs:
9477 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9480 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9481 hint=("cleanup manually the unused logical"
9483 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9485 dev.children = new_lvs
9487 self.cfg.Update(self.instance, feedback_fn)
9490 if self.early_release:
9491 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9493 self._RemoveOldStorage(self.target_node, iv_names)
9494 # WARNING: we release both node locks here, do not do other RPCs
9495 # than WaitForSync to the primary node
9496 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9497 names=[self.target_node, self.other_node])
9500 # This can fail as the old devices are degraded and _WaitForSync
9501 # does a combined result over all disks, so we don't check its return value
9502 self.lu.LogStep(cstep, steps_total, "Sync devices")
9504 _WaitForSync(self.lu, self.instance)
9506 # Check all devices manually
9507 self._CheckDevices(self.instance.primary_node, iv_names)
9509 # Step: remove old storage
9510 if not self.early_release:
9511 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9513 self._RemoveOldStorage(self.target_node, iv_names)
9515 def _ExecDrbd8Secondary(self, feedback_fn):
9516 """Replace the secondary node for DRBD 8.
9518 The algorithm for replace is quite complicated:
9519 - for all disks of the instance:
9520 - create new LVs on the new node with same names
9521 - shutdown the drbd device on the old secondary
9522 - disconnect the drbd network on the primary
9523 - create the drbd device on the new secondary
9524 - network attach the drbd on the primary, using an artifice:
9525 the drbd code for Attach() will connect to the network if it
9526 finds a device which is connected to the good local disks but
9528 - wait for sync across all devices
9529 - remove all disks from the old secondary
9531 Failures are not very well handled.
9536 # Step: check device activation
9537 self.lu.LogStep(1, steps_total, "Check device existence")
9538 self._CheckDisksExistence([self.instance.primary_node])
9539 self._CheckVolumeGroup([self.instance.primary_node])
9541 # Step: check other node consistency
9542 self.lu.LogStep(2, steps_total, "Check peer consistency")
9543 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9545 # Step: create new storage
9546 self.lu.LogStep(3, steps_total, "Allocate new storage")
9547 for idx, dev in enumerate(self.instance.disks):
9548 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9549 (self.new_node, idx))
9550 # we pass force_create=True to force LVM creation
9551 for new_lv in dev.children:
9552 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9553 _GetInstanceInfoText(self.instance), False)
9555 # Step 4: dbrd minors and drbd setups changes
9556 # after this, we must manually remove the drbd minors on both the
9557 # error and the success paths
9558 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9559 minors = self.cfg.AllocateDRBDMinor([self.new_node
9560 for dev in self.instance.disks],
9562 logging.debug("Allocated minors %r", minors)
9565 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9566 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9567 (self.new_node, idx))
9568 # create new devices on new_node; note that we create two IDs:
9569 # one without port, so the drbd will be activated without
9570 # networking information on the new node at this stage, and one
9571 # with network, for the latter activation in step 4
9572 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9573 if self.instance.primary_node == o_node1:
9576 assert self.instance.primary_node == o_node2, "Three-node instance?"
9579 new_alone_id = (self.instance.primary_node, self.new_node, None,
9580 p_minor, new_minor, o_secret)
9581 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9582 p_minor, new_minor, o_secret)
9584 iv_names[idx] = (dev, dev.children, new_net_id)
9585 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9587 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9588 logical_id=new_alone_id,
9589 children=dev.children,
9592 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9593 _GetInstanceInfoText(self.instance), False)
9594 except errors.GenericError:
9595 self.cfg.ReleaseDRBDMinors(self.instance.name)
9598 # We have new devices, shutdown the drbd on the old secondary
9599 for idx, dev in enumerate(self.instance.disks):
9600 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9601 self.cfg.SetDiskID(dev, self.target_node)
9602 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9604 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9605 "node: %s" % (idx, msg),
9606 hint=("Please cleanup this device manually as"
9607 " soon as possible"))
9609 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9610 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9611 self.node_secondary_ip,
9612 self.instance.disks)\
9613 [self.instance.primary_node]
9615 msg = result.fail_msg
9617 # detaches didn't succeed (unlikely)
9618 self.cfg.ReleaseDRBDMinors(self.instance.name)
9619 raise errors.OpExecError("Can't detach the disks from the network on"
9620 " old node: %s" % (msg,))
9622 # if we managed to detach at least one, we update all the disks of
9623 # the instance to point to the new secondary
9624 self.lu.LogInfo("Updating instance configuration")
9625 for dev, _, new_logical_id in iv_names.itervalues():
9626 dev.logical_id = new_logical_id
9627 self.cfg.SetDiskID(dev, self.instance.primary_node)
9629 self.cfg.Update(self.instance, feedback_fn)
9631 # and now perform the drbd attach
9632 self.lu.LogInfo("Attaching primary drbds to new secondary"
9633 " (standalone => connected)")
9634 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9636 self.node_secondary_ip,
9637 self.instance.disks,
9640 for to_node, to_result in result.items():
9641 msg = to_result.fail_msg
9643 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9645 hint=("please do a gnt-instance info to see the"
9646 " status of disks"))
9648 if self.early_release:
9649 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9651 self._RemoveOldStorage(self.target_node, iv_names)
9652 # WARNING: we release all node locks here, do not do other RPCs
9653 # than WaitForSync to the primary node
9654 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9655 names=[self.instance.primary_node,
9660 # This can fail as the old devices are degraded and _WaitForSync
9661 # does a combined result over all disks, so we don't check its return value
9662 self.lu.LogStep(cstep, steps_total, "Sync devices")
9664 _WaitForSync(self.lu, self.instance)
9666 # Check all devices manually
9667 self._CheckDevices(self.instance.primary_node, iv_names)
9669 # Step: remove old storage
9670 if not self.early_release:
9671 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9672 self._RemoveOldStorage(self.target_node, iv_names)
9675 class LURepairNodeStorage(NoHooksLU):
9676 """Repairs the volume group on a node.
9681 def CheckArguments(self):
9682 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9684 storage_type = self.op.storage_type
9686 if (constants.SO_FIX_CONSISTENCY not in
9687 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9688 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9689 " repaired" % storage_type,
9692 def ExpandNames(self):
9693 self.needed_locks = {
9694 locking.LEVEL_NODE: [self.op.node_name],
9697 def _CheckFaultyDisks(self, instance, node_name):
9698 """Ensure faulty disks abort the opcode or at least warn."""
9700 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9702 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9703 " node '%s'" % (instance.name, node_name),
9705 except errors.OpPrereqError, err:
9706 if self.op.ignore_consistency:
9707 self.proc.LogWarning(str(err.args[0]))
9711 def CheckPrereq(self):
9712 """Check prerequisites.
9715 # Check whether any instance on this node has faulty disks
9716 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9717 if not inst.admin_up:
9719 check_nodes = set(inst.all_nodes)
9720 check_nodes.discard(self.op.node_name)
9721 for inst_node_name in check_nodes:
9722 self._CheckFaultyDisks(inst, inst_node_name)
9724 def Exec(self, feedback_fn):
9725 feedback_fn("Repairing storage unit '%s' on %s ..." %
9726 (self.op.name, self.op.node_name))
9728 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9729 result = self.rpc.call_storage_execute(self.op.node_name,
9730 self.op.storage_type, st_args,
9732 constants.SO_FIX_CONSISTENCY)
9733 result.Raise("Failed to repair storage unit '%s' on %s" %
9734 (self.op.name, self.op.node_name))
9737 class LUNodeEvacStrategy(NoHooksLU):
9738 """Computes the node evacuation strategy.
9743 def CheckArguments(self):
9744 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9746 def ExpandNames(self):
9747 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9748 self.needed_locks = locks = {}
9749 if self.op.remote_node is None:
9750 locks[locking.LEVEL_NODE] = locking.ALL_SET
9752 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9753 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9755 def Exec(self, feedback_fn):
9757 for node in self.op.nodes:
9758 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9762 if self.op.remote_node is not None:
9765 if i.primary_node == self.op.remote_node:
9766 raise errors.OpPrereqError("Node %s is the primary node of"
9767 " instance %s, cannot use it as"
9769 (self.op.remote_node, i.name),
9771 result.append([i.name, self.op.remote_node])
9773 ial = IAllocator(self.cfg, self.rpc,
9774 mode=constants.IALLOCATOR_MODE_MEVAC,
9775 evac_nodes=self.op.nodes)
9776 ial.Run(self.op.iallocator, validate=True)
9778 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9784 class LUInstanceGrowDisk(LogicalUnit):
9785 """Grow a disk of an instance.
9789 HTYPE = constants.HTYPE_INSTANCE
9792 def ExpandNames(self):
9793 self._ExpandAndLockInstance()
9794 self.needed_locks[locking.LEVEL_NODE] = []
9795 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9797 def DeclareLocks(self, level):
9798 if level == locking.LEVEL_NODE:
9799 self._LockInstancesNodes()
9801 def BuildHooksEnv(self):
9804 This runs on the master, the primary and all the secondaries.
9808 "DISK": self.op.disk,
9809 "AMOUNT": self.op.amount,
9811 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9814 def BuildHooksNodes(self):
9815 """Build hooks nodes.
9818 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9821 def CheckPrereq(self):
9822 """Check prerequisites.
9824 This checks that the instance is in the cluster.
9827 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9828 assert instance is not None, \
9829 "Cannot retrieve locked instance %s" % self.op.instance_name
9830 nodenames = list(instance.all_nodes)
9831 for node in nodenames:
9832 _CheckNodeOnline(self, node)
9834 self.instance = instance
9836 if instance.disk_template not in constants.DTS_GROWABLE:
9837 raise errors.OpPrereqError("Instance's disk layout does not support"
9838 " growing", errors.ECODE_INVAL)
9840 self.disk = instance.FindDisk(self.op.disk)
9842 if instance.disk_template not in (constants.DT_FILE,
9843 constants.DT_SHARED_FILE):
9844 # TODO: check the free disk space for file, when that feature will be
9846 _CheckNodesFreeDiskPerVG(self, nodenames,
9847 self.disk.ComputeGrowth(self.op.amount))
9849 def Exec(self, feedback_fn):
9850 """Execute disk grow.
9853 instance = self.instance
9856 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9858 raise errors.OpExecError("Cannot activate block device to grow")
9860 # First run all grow ops in dry-run mode
9861 for node in instance.all_nodes:
9862 self.cfg.SetDiskID(disk, node)
9863 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9864 result.Raise("Grow request failed to node %s" % node)
9866 # We know that (as far as we can test) operations across different
9867 # nodes will succeed, time to run it for real
9868 for node in instance.all_nodes:
9869 self.cfg.SetDiskID(disk, node)
9870 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9871 result.Raise("Grow request failed to node %s" % node)
9873 # TODO: Rewrite code to work properly
9874 # DRBD goes into sync mode for a short amount of time after executing the
9875 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9876 # calling "resize" in sync mode fails. Sleeping for a short amount of
9877 # time is a work-around.
9880 disk.RecordGrow(self.op.amount)
9881 self.cfg.Update(instance, feedback_fn)
9882 if self.op.wait_for_sync:
9883 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9885 self.proc.LogWarning("Disk sync-ing has not returned a good"
9886 " status; please check the instance")
9887 if not instance.admin_up:
9888 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9889 elif not instance.admin_up:
9890 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9891 " not supposed to be running because no wait for"
9892 " sync mode was requested")
9895 class LUInstanceQueryData(NoHooksLU):
9896 """Query runtime instance data.
9901 def ExpandNames(self):
9902 self.needed_locks = {}
9904 # Use locking if requested or when non-static information is wanted
9905 if not (self.op.static or self.op.use_locking):
9906 self.LogWarning("Non-static data requested, locks need to be acquired")
9907 self.op.use_locking = True
9909 if self.op.instances or not self.op.use_locking:
9910 # Expand instance names right here
9911 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9913 # Will use acquired locks
9914 self.wanted_names = None
9916 if self.op.use_locking:
9917 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9919 if self.wanted_names is None:
9920 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9922 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9924 self.needed_locks[locking.LEVEL_NODE] = []
9925 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9926 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9928 def DeclareLocks(self, level):
9929 if self.op.use_locking and level == locking.LEVEL_NODE:
9930 self._LockInstancesNodes()
9932 def CheckPrereq(self):
9933 """Check prerequisites.
9935 This only checks the optional instance list against the existing names.
9938 if self.wanted_names is None:
9939 assert self.op.use_locking, "Locking was not used"
9940 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9942 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9943 for name in self.wanted_names]
9945 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9946 """Returns the status of a block device
9949 if self.op.static or not node:
9952 self.cfg.SetDiskID(dev, node)
9954 result = self.rpc.call_blockdev_find(node, dev)
9958 result.Raise("Can't compute disk status for %s" % instance_name)
9960 status = result.payload
9964 return (status.dev_path, status.major, status.minor,
9965 status.sync_percent, status.estimated_time,
9966 status.is_degraded, status.ldisk_status)
9968 def _ComputeDiskStatus(self, instance, snode, dev):
9969 """Compute block device status.
9972 if dev.dev_type in constants.LDS_DRBD:
9973 # we change the snode then (otherwise we use the one passed in)
9974 if dev.logical_id[0] == instance.primary_node:
9975 snode = dev.logical_id[1]
9977 snode = dev.logical_id[0]
9979 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9981 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9984 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9985 for child in dev.children]
9990 "iv_name": dev.iv_name,
9991 "dev_type": dev.dev_type,
9992 "logical_id": dev.logical_id,
9993 "physical_id": dev.physical_id,
9994 "pstatus": dev_pstatus,
9995 "sstatus": dev_sstatus,
9996 "children": dev_children,
10001 def Exec(self, feedback_fn):
10002 """Gather and return data"""
10005 cluster = self.cfg.GetClusterInfo()
10007 for instance in self.wanted_instances:
10008 if not self.op.static:
10009 remote_info = self.rpc.call_instance_info(instance.primary_node,
10011 instance.hypervisor)
10012 remote_info.Raise("Error checking node %s" % instance.primary_node)
10013 remote_info = remote_info.payload
10014 if remote_info and "state" in remote_info:
10015 remote_state = "up"
10017 remote_state = "down"
10019 remote_state = None
10020 if instance.admin_up:
10021 config_state = "up"
10023 config_state = "down"
10025 disks = [self._ComputeDiskStatus(instance, None, device)
10026 for device in instance.disks]
10028 result[instance.name] = {
10029 "name": instance.name,
10030 "config_state": config_state,
10031 "run_state": remote_state,
10032 "pnode": instance.primary_node,
10033 "snodes": instance.secondary_nodes,
10035 # this happens to be the same format used for hooks
10036 "nics": _NICListToTuple(self, instance.nics),
10037 "disk_template": instance.disk_template,
10039 "hypervisor": instance.hypervisor,
10040 "network_port": instance.network_port,
10041 "hv_instance": instance.hvparams,
10042 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10043 "be_instance": instance.beparams,
10044 "be_actual": cluster.FillBE(instance),
10045 "os_instance": instance.osparams,
10046 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10047 "serial_no": instance.serial_no,
10048 "mtime": instance.mtime,
10049 "ctime": instance.ctime,
10050 "uuid": instance.uuid,
10056 class LUInstanceSetParams(LogicalUnit):
10057 """Modifies an instances's parameters.
10060 HPATH = "instance-modify"
10061 HTYPE = constants.HTYPE_INSTANCE
10064 def CheckArguments(self):
10065 if not (self.op.nics or self.op.disks or self.op.disk_template or
10066 self.op.hvparams or self.op.beparams or self.op.os_name):
10067 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10069 if self.op.hvparams:
10070 _CheckGlobalHvParams(self.op.hvparams)
10074 for disk_op, disk_dict in self.op.disks:
10075 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10076 if disk_op == constants.DDM_REMOVE:
10077 disk_addremove += 1
10079 elif disk_op == constants.DDM_ADD:
10080 disk_addremove += 1
10082 if not isinstance(disk_op, int):
10083 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10084 if not isinstance(disk_dict, dict):
10085 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10086 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10088 if disk_op == constants.DDM_ADD:
10089 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10090 if mode not in constants.DISK_ACCESS_SET:
10091 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10092 errors.ECODE_INVAL)
10093 size = disk_dict.get(constants.IDISK_SIZE, None)
10095 raise errors.OpPrereqError("Required disk parameter size missing",
10096 errors.ECODE_INVAL)
10099 except (TypeError, ValueError), err:
10100 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10101 str(err), errors.ECODE_INVAL)
10102 disk_dict[constants.IDISK_SIZE] = size
10104 # modification of disk
10105 if constants.IDISK_SIZE in disk_dict:
10106 raise errors.OpPrereqError("Disk size change not possible, use"
10107 " grow-disk", errors.ECODE_INVAL)
10109 if disk_addremove > 1:
10110 raise errors.OpPrereqError("Only one disk add or remove operation"
10111 " supported at a time", errors.ECODE_INVAL)
10113 if self.op.disks and self.op.disk_template is not None:
10114 raise errors.OpPrereqError("Disk template conversion and other disk"
10115 " changes not supported at the same time",
10116 errors.ECODE_INVAL)
10118 if (self.op.disk_template and
10119 self.op.disk_template in constants.DTS_INT_MIRROR and
10120 self.op.remote_node is None):
10121 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10122 " one requires specifying a secondary node",
10123 errors.ECODE_INVAL)
10127 for nic_op, nic_dict in self.op.nics:
10128 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10129 if nic_op == constants.DDM_REMOVE:
10132 elif nic_op == constants.DDM_ADD:
10135 if not isinstance(nic_op, int):
10136 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10137 if not isinstance(nic_dict, dict):
10138 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10139 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10141 # nic_dict should be a dict
10142 nic_ip = nic_dict.get(constants.INIC_IP, None)
10143 if nic_ip is not None:
10144 if nic_ip.lower() == constants.VALUE_NONE:
10145 nic_dict[constants.INIC_IP] = None
10147 if not netutils.IPAddress.IsValid(nic_ip):
10148 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10149 errors.ECODE_INVAL)
10151 nic_bridge = nic_dict.get('bridge', None)
10152 nic_link = nic_dict.get(constants.INIC_LINK, None)
10153 if nic_bridge and nic_link:
10154 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10155 " at the same time", errors.ECODE_INVAL)
10156 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10157 nic_dict['bridge'] = None
10158 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10159 nic_dict[constants.INIC_LINK] = None
10161 if nic_op == constants.DDM_ADD:
10162 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10163 if nic_mac is None:
10164 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10166 if constants.INIC_MAC in nic_dict:
10167 nic_mac = nic_dict[constants.INIC_MAC]
10168 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10169 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10171 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10172 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10173 " modifying an existing nic",
10174 errors.ECODE_INVAL)
10176 if nic_addremove > 1:
10177 raise errors.OpPrereqError("Only one NIC add or remove operation"
10178 " supported at a time", errors.ECODE_INVAL)
10180 def ExpandNames(self):
10181 self._ExpandAndLockInstance()
10182 self.needed_locks[locking.LEVEL_NODE] = []
10183 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10185 def DeclareLocks(self, level):
10186 if level == locking.LEVEL_NODE:
10187 self._LockInstancesNodes()
10188 if self.op.disk_template and self.op.remote_node:
10189 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10190 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10192 def BuildHooksEnv(self):
10193 """Build hooks env.
10195 This runs on the master, primary and secondaries.
10199 if constants.BE_MEMORY in self.be_new:
10200 args['memory'] = self.be_new[constants.BE_MEMORY]
10201 if constants.BE_VCPUS in self.be_new:
10202 args['vcpus'] = self.be_new[constants.BE_VCPUS]
10203 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10204 # information at all.
10207 nic_override = dict(self.op.nics)
10208 for idx, nic in enumerate(self.instance.nics):
10209 if idx in nic_override:
10210 this_nic_override = nic_override[idx]
10212 this_nic_override = {}
10213 if constants.INIC_IP in this_nic_override:
10214 ip = this_nic_override[constants.INIC_IP]
10217 if constants.INIC_MAC in this_nic_override:
10218 mac = this_nic_override[constants.INIC_MAC]
10221 if idx in self.nic_pnew:
10222 nicparams = self.nic_pnew[idx]
10224 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10225 mode = nicparams[constants.NIC_MODE]
10226 link = nicparams[constants.NIC_LINK]
10227 args['nics'].append((ip, mac, mode, link))
10228 if constants.DDM_ADD in nic_override:
10229 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10230 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10231 nicparams = self.nic_pnew[constants.DDM_ADD]
10232 mode = nicparams[constants.NIC_MODE]
10233 link = nicparams[constants.NIC_LINK]
10234 args['nics'].append((ip, mac, mode, link))
10235 elif constants.DDM_REMOVE in nic_override:
10236 del args['nics'][-1]
10238 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10239 if self.op.disk_template:
10240 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10244 def BuildHooksNodes(self):
10245 """Build hooks nodes.
10248 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10251 def CheckPrereq(self):
10252 """Check prerequisites.
10254 This only checks the instance list against the existing names.
10257 # checking the new params on the primary/secondary nodes
10259 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10260 cluster = self.cluster = self.cfg.GetClusterInfo()
10261 assert self.instance is not None, \
10262 "Cannot retrieve locked instance %s" % self.op.instance_name
10263 pnode = instance.primary_node
10264 nodelist = list(instance.all_nodes)
10267 if self.op.os_name and not self.op.force:
10268 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10269 self.op.force_variant)
10270 instance_os = self.op.os_name
10272 instance_os = instance.os
10274 if self.op.disk_template:
10275 if instance.disk_template == self.op.disk_template:
10276 raise errors.OpPrereqError("Instance already has disk template %s" %
10277 instance.disk_template, errors.ECODE_INVAL)
10279 if (instance.disk_template,
10280 self.op.disk_template) not in self._DISK_CONVERSIONS:
10281 raise errors.OpPrereqError("Unsupported disk template conversion from"
10282 " %s to %s" % (instance.disk_template,
10283 self.op.disk_template),
10284 errors.ECODE_INVAL)
10285 _CheckInstanceDown(self, instance, "cannot change disk template")
10286 if self.op.disk_template in constants.DTS_INT_MIRROR:
10287 if self.op.remote_node == pnode:
10288 raise errors.OpPrereqError("Given new secondary node %s is the same"
10289 " as the primary node of the instance" %
10290 self.op.remote_node, errors.ECODE_STATE)
10291 _CheckNodeOnline(self, self.op.remote_node)
10292 _CheckNodeNotDrained(self, self.op.remote_node)
10293 # FIXME: here we assume that the old instance type is DT_PLAIN
10294 assert instance.disk_template == constants.DT_PLAIN
10295 disks = [{constants.IDISK_SIZE: d.size,
10296 constants.IDISK_VG: d.logical_id[0]}
10297 for d in instance.disks]
10298 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10299 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10301 # hvparams processing
10302 if self.op.hvparams:
10303 hv_type = instance.hypervisor
10304 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10305 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10306 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10309 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10310 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10311 self.hv_new = hv_new # the new actual values
10312 self.hv_inst = i_hvdict # the new dict (without defaults)
10314 self.hv_new = self.hv_inst = {}
10316 # beparams processing
10317 if self.op.beparams:
10318 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10320 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10321 be_new = cluster.SimpleFillBE(i_bedict)
10322 self.be_new = be_new # the new actual values
10323 self.be_inst = i_bedict # the new dict (without defaults)
10325 self.be_new = self.be_inst = {}
10326 be_old = cluster.FillBE(instance)
10328 # osparams processing
10329 if self.op.osparams:
10330 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10331 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10332 self.os_inst = i_osdict # the new dict (without defaults)
10338 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10339 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10340 mem_check_list = [pnode]
10341 if be_new[constants.BE_AUTO_BALANCE]:
10342 # either we changed auto_balance to yes or it was from before
10343 mem_check_list.extend(instance.secondary_nodes)
10344 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10345 instance.hypervisor)
10346 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10347 instance.hypervisor)
10348 pninfo = nodeinfo[pnode]
10349 msg = pninfo.fail_msg
10351 # Assume the primary node is unreachable and go ahead
10352 self.warn.append("Can't get info from primary node %s: %s" %
10354 elif not isinstance(pninfo.payload.get('memory_free', None), int):
10355 self.warn.append("Node data from primary node %s doesn't contain"
10356 " free memory information" % pnode)
10357 elif instance_info.fail_msg:
10358 self.warn.append("Can't get instance runtime information: %s" %
10359 instance_info.fail_msg)
10361 if instance_info.payload:
10362 current_mem = int(instance_info.payload['memory'])
10364 # Assume instance not running
10365 # (there is a slight race condition here, but it's not very probable,
10366 # and we have no other way to check)
10368 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10369 pninfo.payload['memory_free'])
10371 raise errors.OpPrereqError("This change will prevent the instance"
10372 " from starting, due to %d MB of memory"
10373 " missing on its primary node" % miss_mem,
10374 errors.ECODE_NORES)
10376 if be_new[constants.BE_AUTO_BALANCE]:
10377 for node, nres in nodeinfo.items():
10378 if node not in instance.secondary_nodes:
10380 nres.Raise("Can't get info from secondary node %s" % node,
10381 prereq=True, ecode=errors.ECODE_STATE)
10382 if not isinstance(nres.payload.get('memory_free', None), int):
10383 raise errors.OpPrereqError("Secondary node %s didn't return free"
10384 " memory information" % node,
10385 errors.ECODE_STATE)
10386 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10387 raise errors.OpPrereqError("This change will prevent the instance"
10388 " from failover to its secondary node"
10389 " %s, due to not enough memory" % node,
10390 errors.ECODE_STATE)
10394 self.nic_pinst = {}
10395 for nic_op, nic_dict in self.op.nics:
10396 if nic_op == constants.DDM_REMOVE:
10397 if not instance.nics:
10398 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10399 errors.ECODE_INVAL)
10401 if nic_op != constants.DDM_ADD:
10403 if not instance.nics:
10404 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10405 " no NICs" % nic_op,
10406 errors.ECODE_INVAL)
10407 if nic_op < 0 or nic_op >= len(instance.nics):
10408 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10410 (nic_op, len(instance.nics) - 1),
10411 errors.ECODE_INVAL)
10412 old_nic_params = instance.nics[nic_op].nicparams
10413 old_nic_ip = instance.nics[nic_op].ip
10415 old_nic_params = {}
10418 update_params_dict = dict([(key, nic_dict[key])
10419 for key in constants.NICS_PARAMETERS
10420 if key in nic_dict])
10422 if 'bridge' in nic_dict:
10423 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10425 new_nic_params = _GetUpdatedParams(old_nic_params,
10426 update_params_dict)
10427 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10428 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10429 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10430 self.nic_pinst[nic_op] = new_nic_params
10431 self.nic_pnew[nic_op] = new_filled_nic_params
10432 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10434 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10435 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10436 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10438 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10440 self.warn.append(msg)
10442 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10443 if new_nic_mode == constants.NIC_MODE_ROUTED:
10444 if constants.INIC_IP in nic_dict:
10445 nic_ip = nic_dict[constants.INIC_IP]
10447 nic_ip = old_nic_ip
10449 raise errors.OpPrereqError('Cannot set the nic ip to None'
10450 ' on a routed nic', errors.ECODE_INVAL)
10451 if constants.INIC_MAC in nic_dict:
10452 nic_mac = nic_dict[constants.INIC_MAC]
10453 if nic_mac is None:
10454 raise errors.OpPrereqError('Cannot set the nic mac to None',
10455 errors.ECODE_INVAL)
10456 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10457 # otherwise generate the mac
10458 nic_dict[constants.INIC_MAC] = \
10459 self.cfg.GenerateMAC(self.proc.GetECId())
10461 # or validate/reserve the current one
10463 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10464 except errors.ReservationError:
10465 raise errors.OpPrereqError("MAC address %s already in use"
10466 " in cluster" % nic_mac,
10467 errors.ECODE_NOTUNIQUE)
10470 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10471 raise errors.OpPrereqError("Disk operations not supported for"
10472 " diskless instances",
10473 errors.ECODE_INVAL)
10474 for disk_op, _ in self.op.disks:
10475 if disk_op == constants.DDM_REMOVE:
10476 if len(instance.disks) == 1:
10477 raise errors.OpPrereqError("Cannot remove the last disk of"
10478 " an instance", errors.ECODE_INVAL)
10479 _CheckInstanceDown(self, instance, "cannot remove disks")
10481 if (disk_op == constants.DDM_ADD and
10482 len(instance.disks) >= constants.MAX_DISKS):
10483 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10484 " add more" % constants.MAX_DISKS,
10485 errors.ECODE_STATE)
10486 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10488 if disk_op < 0 or disk_op >= len(instance.disks):
10489 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10491 (disk_op, len(instance.disks)),
10492 errors.ECODE_INVAL)
10496 def _ConvertPlainToDrbd(self, feedback_fn):
10497 """Converts an instance from plain to drbd.
10500 feedback_fn("Converting template to drbd")
10501 instance = self.instance
10502 pnode = instance.primary_node
10503 snode = self.op.remote_node
10505 # create a fake disk info for _GenerateDiskTemplate
10506 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10507 constants.IDISK_VG: d.logical_id[0]}
10508 for d in instance.disks]
10509 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10510 instance.name, pnode, [snode],
10511 disk_info, None, None, 0, feedback_fn)
10512 info = _GetInstanceInfoText(instance)
10513 feedback_fn("Creating aditional volumes...")
10514 # first, create the missing data and meta devices
10515 for disk in new_disks:
10516 # unfortunately this is... not too nice
10517 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10519 for child in disk.children:
10520 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10521 # at this stage, all new LVs have been created, we can rename the
10523 feedback_fn("Renaming original volumes...")
10524 rename_list = [(o, n.children[0].logical_id)
10525 for (o, n) in zip(instance.disks, new_disks)]
10526 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10527 result.Raise("Failed to rename original LVs")
10529 feedback_fn("Initializing DRBD devices...")
10530 # all child devices are in place, we can now create the DRBD devices
10531 for disk in new_disks:
10532 for node in [pnode, snode]:
10533 f_create = node == pnode
10534 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10536 # at this point, the instance has been modified
10537 instance.disk_template = constants.DT_DRBD8
10538 instance.disks = new_disks
10539 self.cfg.Update(instance, feedback_fn)
10541 # disks are created, waiting for sync
10542 disk_abort = not _WaitForSync(self, instance,
10543 oneshot=not self.op.wait_for_sync)
10545 raise errors.OpExecError("There are some degraded disks for"
10546 " this instance, please cleanup manually")
10548 def _ConvertDrbdToPlain(self, feedback_fn):
10549 """Converts an instance from drbd to plain.
10552 instance = self.instance
10553 assert len(instance.secondary_nodes) == 1
10554 pnode = instance.primary_node
10555 snode = instance.secondary_nodes[0]
10556 feedback_fn("Converting template to plain")
10558 old_disks = instance.disks
10559 new_disks = [d.children[0] for d in old_disks]
10561 # copy over size and mode
10562 for parent, child in zip(old_disks, new_disks):
10563 child.size = parent.size
10564 child.mode = parent.mode
10566 # update instance structure
10567 instance.disks = new_disks
10568 instance.disk_template = constants.DT_PLAIN
10569 self.cfg.Update(instance, feedback_fn)
10571 feedback_fn("Removing volumes on the secondary node...")
10572 for disk in old_disks:
10573 self.cfg.SetDiskID(disk, snode)
10574 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10576 self.LogWarning("Could not remove block device %s on node %s,"
10577 " continuing anyway: %s", disk.iv_name, snode, msg)
10579 feedback_fn("Removing unneeded volumes on the primary node...")
10580 for idx, disk in enumerate(old_disks):
10581 meta = disk.children[1]
10582 self.cfg.SetDiskID(meta, pnode)
10583 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10585 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10586 " continuing anyway: %s", idx, pnode, msg)
10588 def Exec(self, feedback_fn):
10589 """Modifies an instance.
10591 All parameters take effect only at the next restart of the instance.
10594 # Process here the warnings from CheckPrereq, as we don't have a
10595 # feedback_fn there.
10596 for warn in self.warn:
10597 feedback_fn("WARNING: %s" % warn)
10600 instance = self.instance
10602 for disk_op, disk_dict in self.op.disks:
10603 if disk_op == constants.DDM_REMOVE:
10604 # remove the last disk
10605 device = instance.disks.pop()
10606 device_idx = len(instance.disks)
10607 for node, disk in device.ComputeNodeTree(instance.primary_node):
10608 self.cfg.SetDiskID(disk, node)
10609 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10611 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10612 " continuing anyway", device_idx, node, msg)
10613 result.append(("disk/%d" % device_idx, "remove"))
10614 elif disk_op == constants.DDM_ADD:
10616 if instance.disk_template in (constants.DT_FILE,
10617 constants.DT_SHARED_FILE):
10618 file_driver, file_path = instance.disks[0].logical_id
10619 file_path = os.path.dirname(file_path)
10621 file_driver = file_path = None
10622 disk_idx_base = len(instance.disks)
10623 new_disk = _GenerateDiskTemplate(self,
10624 instance.disk_template,
10625 instance.name, instance.primary_node,
10626 instance.secondary_nodes,
10630 disk_idx_base, feedback_fn)[0]
10631 instance.disks.append(new_disk)
10632 info = _GetInstanceInfoText(instance)
10634 logging.info("Creating volume %s for instance %s",
10635 new_disk.iv_name, instance.name)
10636 # Note: this needs to be kept in sync with _CreateDisks
10638 for node in instance.all_nodes:
10639 f_create = node == instance.primary_node
10641 _CreateBlockDev(self, node, instance, new_disk,
10642 f_create, info, f_create)
10643 except errors.OpExecError, err:
10644 self.LogWarning("Failed to create volume %s (%s) on"
10646 new_disk.iv_name, new_disk, node, err)
10647 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10648 (new_disk.size, new_disk.mode)))
10650 # change a given disk
10651 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10652 result.append(("disk.mode/%d" % disk_op,
10653 disk_dict[constants.IDISK_MODE]))
10655 if self.op.disk_template:
10656 r_shut = _ShutdownInstanceDisks(self, instance)
10658 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10659 " proceed with disk template conversion")
10660 mode = (instance.disk_template, self.op.disk_template)
10662 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10664 self.cfg.ReleaseDRBDMinors(instance.name)
10666 result.append(("disk_template", self.op.disk_template))
10669 for nic_op, nic_dict in self.op.nics:
10670 if nic_op == constants.DDM_REMOVE:
10671 # remove the last nic
10672 del instance.nics[-1]
10673 result.append(("nic.%d" % len(instance.nics), "remove"))
10674 elif nic_op == constants.DDM_ADD:
10675 # mac and bridge should be set, by now
10676 mac = nic_dict[constants.INIC_MAC]
10677 ip = nic_dict.get(constants.INIC_IP, None)
10678 nicparams = self.nic_pinst[constants.DDM_ADD]
10679 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10680 instance.nics.append(new_nic)
10681 result.append(("nic.%d" % (len(instance.nics) - 1),
10682 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10683 (new_nic.mac, new_nic.ip,
10684 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10685 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10688 for key in (constants.INIC_MAC, constants.INIC_IP):
10689 if key in nic_dict:
10690 setattr(instance.nics[nic_op], key, nic_dict[key])
10691 if nic_op in self.nic_pinst:
10692 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10693 for key, val in nic_dict.iteritems():
10694 result.append(("nic.%s/%d" % (key, nic_op), val))
10697 if self.op.hvparams:
10698 instance.hvparams = self.hv_inst
10699 for key, val in self.op.hvparams.iteritems():
10700 result.append(("hv/%s" % key, val))
10703 if self.op.beparams:
10704 instance.beparams = self.be_inst
10705 for key, val in self.op.beparams.iteritems():
10706 result.append(("be/%s" % key, val))
10709 if self.op.os_name:
10710 instance.os = self.op.os_name
10713 if self.op.osparams:
10714 instance.osparams = self.os_inst
10715 for key, val in self.op.osparams.iteritems():
10716 result.append(("os/%s" % key, val))
10718 self.cfg.Update(instance, feedback_fn)
10722 _DISK_CONVERSIONS = {
10723 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10724 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10728 class LUBackupQuery(NoHooksLU):
10729 """Query the exports list
10734 def ExpandNames(self):
10735 self.needed_locks = {}
10736 self.share_locks[locking.LEVEL_NODE] = 1
10737 if not self.op.nodes:
10738 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10740 self.needed_locks[locking.LEVEL_NODE] = \
10741 _GetWantedNodes(self, self.op.nodes)
10743 def Exec(self, feedback_fn):
10744 """Compute the list of all the exported system images.
10747 @return: a dictionary with the structure node->(export-list)
10748 where export-list is a list of the instances exported on
10752 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10753 rpcresult = self.rpc.call_export_list(self.nodes)
10755 for node in rpcresult:
10756 if rpcresult[node].fail_msg:
10757 result[node] = False
10759 result[node] = rpcresult[node].payload
10764 class LUBackupPrepare(NoHooksLU):
10765 """Prepares an instance for an export and returns useful information.
10770 def ExpandNames(self):
10771 self._ExpandAndLockInstance()
10773 def CheckPrereq(self):
10774 """Check prerequisites.
10777 instance_name = self.op.instance_name
10779 self.instance = self.cfg.GetInstanceInfo(instance_name)
10780 assert self.instance is not None, \
10781 "Cannot retrieve locked instance %s" % self.op.instance_name
10782 _CheckNodeOnline(self, self.instance.primary_node)
10784 self._cds = _GetClusterDomainSecret()
10786 def Exec(self, feedback_fn):
10787 """Prepares an instance for an export.
10790 instance = self.instance
10792 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10793 salt = utils.GenerateSecret(8)
10795 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10796 result = self.rpc.call_x509_cert_create(instance.primary_node,
10797 constants.RIE_CERT_VALIDITY)
10798 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10800 (name, cert_pem) = result.payload
10802 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10806 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10807 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10809 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10815 class LUBackupExport(LogicalUnit):
10816 """Export an instance to an image in the cluster.
10819 HPATH = "instance-export"
10820 HTYPE = constants.HTYPE_INSTANCE
10823 def CheckArguments(self):
10824 """Check the arguments.
10827 self.x509_key_name = self.op.x509_key_name
10828 self.dest_x509_ca_pem = self.op.destination_x509_ca
10830 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10831 if not self.x509_key_name:
10832 raise errors.OpPrereqError("Missing X509 key name for encryption",
10833 errors.ECODE_INVAL)
10835 if not self.dest_x509_ca_pem:
10836 raise errors.OpPrereqError("Missing destination X509 CA",
10837 errors.ECODE_INVAL)
10839 def ExpandNames(self):
10840 self._ExpandAndLockInstance()
10842 # Lock all nodes for local exports
10843 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10844 # FIXME: lock only instance primary and destination node
10846 # Sad but true, for now we have do lock all nodes, as we don't know where
10847 # the previous export might be, and in this LU we search for it and
10848 # remove it from its current node. In the future we could fix this by:
10849 # - making a tasklet to search (share-lock all), then create the
10850 # new one, then one to remove, after
10851 # - removing the removal operation altogether
10852 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10854 def DeclareLocks(self, level):
10855 """Last minute lock declaration."""
10856 # All nodes are locked anyway, so nothing to do here.
10858 def BuildHooksEnv(self):
10859 """Build hooks env.
10861 This will run on the master, primary node and target node.
10865 "EXPORT_MODE": self.op.mode,
10866 "EXPORT_NODE": self.op.target_node,
10867 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10868 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10869 # TODO: Generic function for boolean env variables
10870 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10873 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10877 def BuildHooksNodes(self):
10878 """Build hooks nodes.
10881 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10883 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10884 nl.append(self.op.target_node)
10888 def CheckPrereq(self):
10889 """Check prerequisites.
10891 This checks that the instance and node names are valid.
10894 instance_name = self.op.instance_name
10896 self.instance = self.cfg.GetInstanceInfo(instance_name)
10897 assert self.instance is not None, \
10898 "Cannot retrieve locked instance %s" % self.op.instance_name
10899 _CheckNodeOnline(self, self.instance.primary_node)
10901 if (self.op.remove_instance and self.instance.admin_up and
10902 not self.op.shutdown):
10903 raise errors.OpPrereqError("Can not remove instance without shutting it"
10906 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10907 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10908 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10909 assert self.dst_node is not None
10911 _CheckNodeOnline(self, self.dst_node.name)
10912 _CheckNodeNotDrained(self, self.dst_node.name)
10915 self.dest_disk_info = None
10916 self.dest_x509_ca = None
10918 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10919 self.dst_node = None
10921 if len(self.op.target_node) != len(self.instance.disks):
10922 raise errors.OpPrereqError(("Received destination information for %s"
10923 " disks, but instance %s has %s disks") %
10924 (len(self.op.target_node), instance_name,
10925 len(self.instance.disks)),
10926 errors.ECODE_INVAL)
10928 cds = _GetClusterDomainSecret()
10930 # Check X509 key name
10932 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10933 except (TypeError, ValueError), err:
10934 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10936 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10937 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10938 errors.ECODE_INVAL)
10940 # Load and verify CA
10942 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10943 except OpenSSL.crypto.Error, err:
10944 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10945 (err, ), errors.ECODE_INVAL)
10947 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10948 if errcode is not None:
10949 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10950 (msg, ), errors.ECODE_INVAL)
10952 self.dest_x509_ca = cert
10954 # Verify target information
10956 for idx, disk_data in enumerate(self.op.target_node):
10958 (host, port, magic) = \
10959 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10960 except errors.GenericError, err:
10961 raise errors.OpPrereqError("Target info for disk %s: %s" %
10962 (idx, err), errors.ECODE_INVAL)
10964 disk_info.append((host, port, magic))
10966 assert len(disk_info) == len(self.op.target_node)
10967 self.dest_disk_info = disk_info
10970 raise errors.ProgrammerError("Unhandled export mode %r" %
10973 # instance disk type verification
10974 # TODO: Implement export support for file-based disks
10975 for disk in self.instance.disks:
10976 if disk.dev_type == constants.LD_FILE:
10977 raise errors.OpPrereqError("Export not supported for instances with"
10978 " file-based disks", errors.ECODE_INVAL)
10980 def _CleanupExports(self, feedback_fn):
10981 """Removes exports of current instance from all other nodes.
10983 If an instance in a cluster with nodes A..D was exported to node C, its
10984 exports will be removed from the nodes A, B and D.
10987 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10989 nodelist = self.cfg.GetNodeList()
10990 nodelist.remove(self.dst_node.name)
10992 # on one-node clusters nodelist will be empty after the removal
10993 # if we proceed the backup would be removed because OpBackupQuery
10994 # substitutes an empty list with the full cluster node list.
10995 iname = self.instance.name
10997 feedback_fn("Removing old exports for instance %s" % iname)
10998 exportlist = self.rpc.call_export_list(nodelist)
10999 for node in exportlist:
11000 if exportlist[node].fail_msg:
11002 if iname in exportlist[node].payload:
11003 msg = self.rpc.call_export_remove(node, iname).fail_msg
11005 self.LogWarning("Could not remove older export for instance %s"
11006 " on node %s: %s", iname, node, msg)
11008 def Exec(self, feedback_fn):
11009 """Export an instance to an image in the cluster.
11012 assert self.op.mode in constants.EXPORT_MODES
11014 instance = self.instance
11015 src_node = instance.primary_node
11017 if self.op.shutdown:
11018 # shutdown the instance, but not the disks
11019 feedback_fn("Shutting down instance %s" % instance.name)
11020 result = self.rpc.call_instance_shutdown(src_node, instance,
11021 self.op.shutdown_timeout)
11022 # TODO: Maybe ignore failures if ignore_remove_failures is set
11023 result.Raise("Could not shutdown instance %s on"
11024 " node %s" % (instance.name, src_node))
11026 # set the disks ID correctly since call_instance_start needs the
11027 # correct drbd minor to create the symlinks
11028 for disk in instance.disks:
11029 self.cfg.SetDiskID(disk, src_node)
11031 activate_disks = (not instance.admin_up)
11034 # Activate the instance disks if we'exporting a stopped instance
11035 feedback_fn("Activating disks for %s" % instance.name)
11036 _StartInstanceDisks(self, instance, None)
11039 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11042 helper.CreateSnapshots()
11044 if (self.op.shutdown and instance.admin_up and
11045 not self.op.remove_instance):
11046 assert not activate_disks
11047 feedback_fn("Starting instance %s" % instance.name)
11048 result = self.rpc.call_instance_start(src_node, instance, None, None)
11049 msg = result.fail_msg
11051 feedback_fn("Failed to start instance: %s" % msg)
11052 _ShutdownInstanceDisks(self, instance)
11053 raise errors.OpExecError("Could not start instance: %s" % msg)
11055 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11056 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11057 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11058 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11059 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11061 (key_name, _, _) = self.x509_key_name
11064 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11067 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11068 key_name, dest_ca_pem,
11073 # Check for backwards compatibility
11074 assert len(dresults) == len(instance.disks)
11075 assert compat.all(isinstance(i, bool) for i in dresults), \
11076 "Not all results are boolean: %r" % dresults
11080 feedback_fn("Deactivating disks for %s" % instance.name)
11081 _ShutdownInstanceDisks(self, instance)
11083 if not (compat.all(dresults) and fin_resu):
11086 failures.append("export finalization")
11087 if not compat.all(dresults):
11088 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11090 failures.append("disk export: disk(s) %s" % fdsk)
11092 raise errors.OpExecError("Export failed, errors in %s" %
11093 utils.CommaJoin(failures))
11095 # At this point, the export was successful, we can cleanup/finish
11097 # Remove instance if requested
11098 if self.op.remove_instance:
11099 feedback_fn("Removing instance %s" % instance.name)
11100 _RemoveInstance(self, feedback_fn, instance,
11101 self.op.ignore_remove_failures)
11103 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11104 self._CleanupExports(feedback_fn)
11106 return fin_resu, dresults
11109 class LUBackupRemove(NoHooksLU):
11110 """Remove exports related to the named instance.
11115 def ExpandNames(self):
11116 self.needed_locks = {}
11117 # We need all nodes to be locked in order for RemoveExport to work, but we
11118 # don't need to lock the instance itself, as nothing will happen to it (and
11119 # we can remove exports also for a removed instance)
11120 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11122 def Exec(self, feedback_fn):
11123 """Remove any export.
11126 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11127 # If the instance was not found we'll try with the name that was passed in.
11128 # This will only work if it was an FQDN, though.
11130 if not instance_name:
11132 instance_name = self.op.instance_name
11134 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11135 exportlist = self.rpc.call_export_list(locked_nodes)
11137 for node in exportlist:
11138 msg = exportlist[node].fail_msg
11140 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11142 if instance_name in exportlist[node].payload:
11144 result = self.rpc.call_export_remove(node, instance_name)
11145 msg = result.fail_msg
11147 logging.error("Could not remove export for instance %s"
11148 " on node %s: %s", instance_name, node, msg)
11150 if fqdn_warn and not found:
11151 feedback_fn("Export not found. If trying to remove an export belonging"
11152 " to a deleted instance please use its Fully Qualified"
11156 class LUGroupAdd(LogicalUnit):
11157 """Logical unit for creating node groups.
11160 HPATH = "group-add"
11161 HTYPE = constants.HTYPE_GROUP
11164 def ExpandNames(self):
11165 # We need the new group's UUID here so that we can create and acquire the
11166 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11167 # that it should not check whether the UUID exists in the configuration.
11168 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11169 self.needed_locks = {}
11170 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11172 def CheckPrereq(self):
11173 """Check prerequisites.
11175 This checks that the given group name is not an existing node group
11180 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11181 except errors.OpPrereqError:
11184 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11185 " node group (UUID: %s)" %
11186 (self.op.group_name, existing_uuid),
11187 errors.ECODE_EXISTS)
11189 if self.op.ndparams:
11190 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11192 def BuildHooksEnv(self):
11193 """Build hooks env.
11197 "GROUP_NAME": self.op.group_name,
11200 def BuildHooksNodes(self):
11201 """Build hooks nodes.
11204 mn = self.cfg.GetMasterNode()
11205 return ([mn], [mn])
11207 def Exec(self, feedback_fn):
11208 """Add the node group to the cluster.
11211 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11212 uuid=self.group_uuid,
11213 alloc_policy=self.op.alloc_policy,
11214 ndparams=self.op.ndparams)
11216 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11217 del self.remove_locks[locking.LEVEL_NODEGROUP]
11220 class LUGroupAssignNodes(NoHooksLU):
11221 """Logical unit for assigning nodes to groups.
11226 def ExpandNames(self):
11227 # These raise errors.OpPrereqError on their own:
11228 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11229 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11231 # We want to lock all the affected nodes and groups. We have readily
11232 # available the list of nodes, and the *destination* group. To gather the
11233 # list of "source" groups, we need to fetch node information later on.
11234 self.needed_locks = {
11235 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11236 locking.LEVEL_NODE: self.op.nodes,
11239 def DeclareLocks(self, level):
11240 if level == locking.LEVEL_NODEGROUP:
11241 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11243 # Try to get all affected nodes' groups without having the group or node
11244 # lock yet. Needs verification later in the code flow.
11245 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11247 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11249 def CheckPrereq(self):
11250 """Check prerequisites.
11253 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11254 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11255 frozenset(self.op.nodes))
11257 expected_locks = (set([self.group_uuid]) |
11258 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11259 actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11260 if actual_locks != expected_locks:
11261 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11262 " current groups are '%s', used to be '%s'" %
11263 (utils.CommaJoin(expected_locks),
11264 utils.CommaJoin(actual_locks)))
11266 self.node_data = self.cfg.GetAllNodesInfo()
11267 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11268 instance_data = self.cfg.GetAllInstancesInfo()
11270 if self.group is None:
11271 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11272 (self.op.group_name, self.group_uuid))
11274 (new_splits, previous_splits) = \
11275 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11276 for node in self.op.nodes],
11277 self.node_data, instance_data)
11280 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11282 if not self.op.force:
11283 raise errors.OpExecError("The following instances get split by this"
11284 " change and --force was not given: %s" %
11287 self.LogWarning("This operation will split the following instances: %s",
11290 if previous_splits:
11291 self.LogWarning("In addition, these already-split instances continue"
11292 " to be split across groups: %s",
11293 utils.CommaJoin(utils.NiceSort(previous_splits)))
11295 def Exec(self, feedback_fn):
11296 """Assign nodes to a new group.
11299 for node in self.op.nodes:
11300 self.node_data[node].group = self.group_uuid
11302 # FIXME: Depends on side-effects of modifying the result of
11303 # C{cfg.GetAllNodesInfo}
11305 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11308 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11309 """Check for split instances after a node assignment.
11311 This method considers a series of node assignments as an atomic operation,
11312 and returns information about split instances after applying the set of
11315 In particular, it returns information about newly split instances, and
11316 instances that were already split, and remain so after the change.
11318 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11321 @type changes: list of (node_name, new_group_uuid) pairs.
11322 @param changes: list of node assignments to consider.
11323 @param node_data: a dict with data for all nodes
11324 @param instance_data: a dict with all instances to consider
11325 @rtype: a two-tuple
11326 @return: a list of instances that were previously okay and result split as a
11327 consequence of this change, and a list of instances that were previously
11328 split and this change does not fix.
11331 changed_nodes = dict((node, group) for node, group in changes
11332 if node_data[node].group != group)
11334 all_split_instances = set()
11335 previously_split_instances = set()
11337 def InstanceNodes(instance):
11338 return [instance.primary_node] + list(instance.secondary_nodes)
11340 for inst in instance_data.values():
11341 if inst.disk_template not in constants.DTS_INT_MIRROR:
11344 instance_nodes = InstanceNodes(inst)
11346 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11347 previously_split_instances.add(inst.name)
11349 if len(set(changed_nodes.get(node, node_data[node].group)
11350 for node in instance_nodes)) > 1:
11351 all_split_instances.add(inst.name)
11353 return (list(all_split_instances - previously_split_instances),
11354 list(previously_split_instances & all_split_instances))
11357 class _GroupQuery(_QueryBase):
11358 FIELDS = query.GROUP_FIELDS
11360 def ExpandNames(self, lu):
11361 lu.needed_locks = {}
11363 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11364 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11367 self.wanted = [name_to_uuid[name]
11368 for name in utils.NiceSort(name_to_uuid.keys())]
11370 # Accept names to be either names or UUIDs.
11373 all_uuid = frozenset(self._all_groups.keys())
11375 for name in self.names:
11376 if name in all_uuid:
11377 self.wanted.append(name)
11378 elif name in name_to_uuid:
11379 self.wanted.append(name_to_uuid[name])
11381 missing.append(name)
11384 raise errors.OpPrereqError("Some groups do not exist: %s" %
11385 utils.CommaJoin(missing),
11386 errors.ECODE_NOENT)
11388 def DeclareLocks(self, lu, level):
11391 def _GetQueryData(self, lu):
11392 """Computes the list of node groups and their attributes.
11395 do_nodes = query.GQ_NODE in self.requested_data
11396 do_instances = query.GQ_INST in self.requested_data
11398 group_to_nodes = None
11399 group_to_instances = None
11401 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11402 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11403 # latter GetAllInstancesInfo() is not enough, for we have to go through
11404 # instance->node. Hence, we will need to process nodes even if we only need
11405 # instance information.
11406 if do_nodes or do_instances:
11407 all_nodes = lu.cfg.GetAllNodesInfo()
11408 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11411 for node in all_nodes.values():
11412 if node.group in group_to_nodes:
11413 group_to_nodes[node.group].append(node.name)
11414 node_to_group[node.name] = node.group
11417 all_instances = lu.cfg.GetAllInstancesInfo()
11418 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11420 for instance in all_instances.values():
11421 node = instance.primary_node
11422 if node in node_to_group:
11423 group_to_instances[node_to_group[node]].append(instance.name)
11426 # Do not pass on node information if it was not requested.
11427 group_to_nodes = None
11429 return query.GroupQueryData([self._all_groups[uuid]
11430 for uuid in self.wanted],
11431 group_to_nodes, group_to_instances)
11434 class LUGroupQuery(NoHooksLU):
11435 """Logical unit for querying node groups.
11440 def CheckArguments(self):
11441 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11442 self.op.output_fields, False)
11444 def ExpandNames(self):
11445 self.gq.ExpandNames(self)
11447 def Exec(self, feedback_fn):
11448 return self.gq.OldStyleQuery(self)
11451 class LUGroupSetParams(LogicalUnit):
11452 """Modifies the parameters of a node group.
11455 HPATH = "group-modify"
11456 HTYPE = constants.HTYPE_GROUP
11459 def CheckArguments(self):
11462 self.op.alloc_policy,
11465 if all_changes.count(None) == len(all_changes):
11466 raise errors.OpPrereqError("Please pass at least one modification",
11467 errors.ECODE_INVAL)
11469 def ExpandNames(self):
11470 # This raises errors.OpPrereqError on its own:
11471 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11473 self.needed_locks = {
11474 locking.LEVEL_NODEGROUP: [self.group_uuid],
11477 def CheckPrereq(self):
11478 """Check prerequisites.
11481 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11483 if self.group is None:
11484 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11485 (self.op.group_name, self.group_uuid))
11487 if self.op.ndparams:
11488 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11489 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11490 self.new_ndparams = new_ndparams
11492 def BuildHooksEnv(self):
11493 """Build hooks env.
11497 "GROUP_NAME": self.op.group_name,
11498 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11501 def BuildHooksNodes(self):
11502 """Build hooks nodes.
11505 mn = self.cfg.GetMasterNode()
11506 return ([mn], [mn])
11508 def Exec(self, feedback_fn):
11509 """Modifies the node group.
11514 if self.op.ndparams:
11515 self.group.ndparams = self.new_ndparams
11516 result.append(("ndparams", str(self.group.ndparams)))
11518 if self.op.alloc_policy:
11519 self.group.alloc_policy = self.op.alloc_policy
11521 self.cfg.Update(self.group, feedback_fn)
11526 class LUGroupRemove(LogicalUnit):
11527 HPATH = "group-remove"
11528 HTYPE = constants.HTYPE_GROUP
11531 def ExpandNames(self):
11532 # This will raises errors.OpPrereqError on its own:
11533 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11534 self.needed_locks = {
11535 locking.LEVEL_NODEGROUP: [self.group_uuid],
11538 def CheckPrereq(self):
11539 """Check prerequisites.
11541 This checks that the given group name exists as a node group, that is
11542 empty (i.e., contains no nodes), and that is not the last group of the
11546 # Verify that the group is empty.
11547 group_nodes = [node.name
11548 for node in self.cfg.GetAllNodesInfo().values()
11549 if node.group == self.group_uuid]
11552 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11554 (self.op.group_name,
11555 utils.CommaJoin(utils.NiceSort(group_nodes))),
11556 errors.ECODE_STATE)
11558 # Verify the cluster would not be left group-less.
11559 if len(self.cfg.GetNodeGroupList()) == 1:
11560 raise errors.OpPrereqError("Group '%s' is the only group,"
11561 " cannot be removed" %
11562 self.op.group_name,
11563 errors.ECODE_STATE)
11565 def BuildHooksEnv(self):
11566 """Build hooks env.
11570 "GROUP_NAME": self.op.group_name,
11573 def BuildHooksNodes(self):
11574 """Build hooks nodes.
11577 mn = self.cfg.GetMasterNode()
11578 return ([mn], [mn])
11580 def Exec(self, feedback_fn):
11581 """Remove the node group.
11585 self.cfg.RemoveNodeGroup(self.group_uuid)
11586 except errors.ConfigurationError:
11587 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11588 (self.op.group_name, self.group_uuid))
11590 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11593 class LUGroupRename(LogicalUnit):
11594 HPATH = "group-rename"
11595 HTYPE = constants.HTYPE_GROUP
11598 def ExpandNames(self):
11599 # This raises errors.OpPrereqError on its own:
11600 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11602 self.needed_locks = {
11603 locking.LEVEL_NODEGROUP: [self.group_uuid],
11606 def CheckPrereq(self):
11607 """Check prerequisites.
11609 Ensures requested new name is not yet used.
11613 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11614 except errors.OpPrereqError:
11617 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11618 " node group (UUID: %s)" %
11619 (self.op.new_name, new_name_uuid),
11620 errors.ECODE_EXISTS)
11622 def BuildHooksEnv(self):
11623 """Build hooks env.
11627 "OLD_NAME": self.op.group_name,
11628 "NEW_NAME": self.op.new_name,
11631 def BuildHooksNodes(self):
11632 """Build hooks nodes.
11635 mn = self.cfg.GetMasterNode()
11637 all_nodes = self.cfg.GetAllNodesInfo()
11638 all_nodes.pop(mn, None)
11641 run_nodes.extend(node.name for node in all_nodes.values()
11642 if node.group == self.group_uuid)
11644 return (run_nodes, run_nodes)
11646 def Exec(self, feedback_fn):
11647 """Rename the node group.
11650 group = self.cfg.GetNodeGroup(self.group_uuid)
11653 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11654 (self.op.group_name, self.group_uuid))
11656 group.name = self.op.new_name
11657 self.cfg.Update(group, feedback_fn)
11659 return self.op.new_name
11662 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11663 """Generic tags LU.
11665 This is an abstract class which is the parent of all the other tags LUs.
11668 def ExpandNames(self):
11669 self.group_uuid = None
11670 self.needed_locks = {}
11671 if self.op.kind == constants.TAG_NODE:
11672 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11673 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11674 elif self.op.kind == constants.TAG_INSTANCE:
11675 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11676 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11677 elif self.op.kind == constants.TAG_NODEGROUP:
11678 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11680 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11681 # not possible to acquire the BGL based on opcode parameters)
11683 def CheckPrereq(self):
11684 """Check prerequisites.
11687 if self.op.kind == constants.TAG_CLUSTER:
11688 self.target = self.cfg.GetClusterInfo()
11689 elif self.op.kind == constants.TAG_NODE:
11690 self.target = self.cfg.GetNodeInfo(self.op.name)
11691 elif self.op.kind == constants.TAG_INSTANCE:
11692 self.target = self.cfg.GetInstanceInfo(self.op.name)
11693 elif self.op.kind == constants.TAG_NODEGROUP:
11694 self.target = self.cfg.GetNodeGroup(self.group_uuid)
11696 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11697 str(self.op.kind), errors.ECODE_INVAL)
11700 class LUTagsGet(TagsLU):
11701 """Returns the tags of a given object.
11706 def ExpandNames(self):
11707 TagsLU.ExpandNames(self)
11709 # Share locks as this is only a read operation
11710 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11712 def Exec(self, feedback_fn):
11713 """Returns the tag list.
11716 return list(self.target.GetTags())
11719 class LUTagsSearch(NoHooksLU):
11720 """Searches the tags for a given pattern.
11725 def ExpandNames(self):
11726 self.needed_locks = {}
11728 def CheckPrereq(self):
11729 """Check prerequisites.
11731 This checks the pattern passed for validity by compiling it.
11735 self.re = re.compile(self.op.pattern)
11736 except re.error, err:
11737 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11738 (self.op.pattern, err), errors.ECODE_INVAL)
11740 def Exec(self, feedback_fn):
11741 """Returns the tag list.
11745 tgts = [("/cluster", cfg.GetClusterInfo())]
11746 ilist = cfg.GetAllInstancesInfo().values()
11747 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11748 nlist = cfg.GetAllNodesInfo().values()
11749 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11750 tgts.extend(("/nodegroup/%s" % n.name, n)
11751 for n in cfg.GetAllNodeGroupsInfo().values())
11753 for path, target in tgts:
11754 for tag in target.GetTags():
11755 if self.re.search(tag):
11756 results.append((path, tag))
11760 class LUTagsSet(TagsLU):
11761 """Sets a tag on a given object.
11766 def CheckPrereq(self):
11767 """Check prerequisites.
11769 This checks the type and length of the tag name and value.
11772 TagsLU.CheckPrereq(self)
11773 for tag in self.op.tags:
11774 objects.TaggableObject.ValidateTag(tag)
11776 def Exec(self, feedback_fn):
11781 for tag in self.op.tags:
11782 self.target.AddTag(tag)
11783 except errors.TagError, err:
11784 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11785 self.cfg.Update(self.target, feedback_fn)
11788 class LUTagsDel(TagsLU):
11789 """Delete a list of tags from a given object.
11794 def CheckPrereq(self):
11795 """Check prerequisites.
11797 This checks that we have the given tag.
11800 TagsLU.CheckPrereq(self)
11801 for tag in self.op.tags:
11802 objects.TaggableObject.ValidateTag(tag)
11803 del_tags = frozenset(self.op.tags)
11804 cur_tags = self.target.GetTags()
11806 diff_tags = del_tags - cur_tags
11808 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11809 raise errors.OpPrereqError("Tag(s) %s not found" %
11810 (utils.CommaJoin(diff_names), ),
11811 errors.ECODE_NOENT)
11813 def Exec(self, feedback_fn):
11814 """Remove the tag from the object.
11817 for tag in self.op.tags:
11818 self.target.RemoveTag(tag)
11819 self.cfg.Update(self.target, feedback_fn)
11822 class LUTestDelay(NoHooksLU):
11823 """Sleep for a specified amount of time.
11825 This LU sleeps on the master and/or nodes for a specified amount of
11831 def ExpandNames(self):
11832 """Expand names and set required locks.
11834 This expands the node list, if any.
11837 self.needed_locks = {}
11838 if self.op.on_nodes:
11839 # _GetWantedNodes can be used here, but is not always appropriate to use
11840 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11841 # more information.
11842 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11843 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11845 def _TestDelay(self):
11846 """Do the actual sleep.
11849 if self.op.on_master:
11850 if not utils.TestDelay(self.op.duration):
11851 raise errors.OpExecError("Error during master delay test")
11852 if self.op.on_nodes:
11853 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11854 for node, node_result in result.items():
11855 node_result.Raise("Failure during rpc call to node %s" % node)
11857 def Exec(self, feedback_fn):
11858 """Execute the test delay opcode, with the wanted repetitions.
11861 if self.op.repeat == 0:
11864 top_value = self.op.repeat - 1
11865 for i in range(self.op.repeat):
11866 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11870 class LUTestJqueue(NoHooksLU):
11871 """Utility LU to test some aspects of the job queue.
11876 # Must be lower than default timeout for WaitForJobChange to see whether it
11877 # notices changed jobs
11878 _CLIENT_CONNECT_TIMEOUT = 20.0
11879 _CLIENT_CONFIRM_TIMEOUT = 60.0
11882 def _NotifyUsingSocket(cls, cb, errcls):
11883 """Opens a Unix socket and waits for another program to connect.
11886 @param cb: Callback to send socket name to client
11887 @type errcls: class
11888 @param errcls: Exception class to use for errors
11891 # Using a temporary directory as there's no easy way to create temporary
11892 # sockets without writing a custom loop around tempfile.mktemp and
11894 tmpdir = tempfile.mkdtemp()
11896 tmpsock = utils.PathJoin(tmpdir, "sock")
11898 logging.debug("Creating temporary socket at %s", tmpsock)
11899 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11904 # Send details to client
11907 # Wait for client to connect before continuing
11908 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11910 (conn, _) = sock.accept()
11911 except socket.error, err:
11912 raise errcls("Client didn't connect in time (%s)" % err)
11916 # Remove as soon as client is connected
11917 shutil.rmtree(tmpdir)
11919 # Wait for client to close
11922 # pylint: disable-msg=E1101
11923 # Instance of '_socketobject' has no ... member
11924 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11926 except socket.error, err:
11927 raise errcls("Client failed to confirm notification (%s)" % err)
11931 def _SendNotification(self, test, arg, sockname):
11932 """Sends a notification to the client.
11935 @param test: Test name
11936 @param arg: Test argument (depends on test)
11937 @type sockname: string
11938 @param sockname: Socket path
11941 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11943 def _Notify(self, prereq, test, arg):
11944 """Notifies the client of a test.
11947 @param prereq: Whether this is a prereq-phase test
11949 @param test: Test name
11950 @param arg: Test argument (depends on test)
11954 errcls = errors.OpPrereqError
11956 errcls = errors.OpExecError
11958 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11962 def CheckArguments(self):
11963 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11964 self.expandnames_calls = 0
11966 def ExpandNames(self):
11967 checkargs_calls = getattr(self, "checkargs_calls", 0)
11968 if checkargs_calls < 1:
11969 raise errors.ProgrammerError("CheckArguments was not called")
11971 self.expandnames_calls += 1
11973 if self.op.notify_waitlock:
11974 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11976 self.LogInfo("Expanding names")
11978 # Get lock on master node (just to get a lock, not for a particular reason)
11979 self.needed_locks = {
11980 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11983 def Exec(self, feedback_fn):
11984 if self.expandnames_calls < 1:
11985 raise errors.ProgrammerError("ExpandNames was not called")
11987 if self.op.notify_exec:
11988 self._Notify(False, constants.JQT_EXEC, None)
11990 self.LogInfo("Executing")
11992 if self.op.log_messages:
11993 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11994 for idx, msg in enumerate(self.op.log_messages):
11995 self.LogInfo("Sending log message %s", idx + 1)
11996 feedback_fn(constants.JQT_MSGPREFIX + msg)
11997 # Report how many test messages have been sent
11998 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12001 raise errors.OpExecError("Opcode failure was requested")
12006 class IAllocator(object):
12007 """IAllocator framework.
12009 An IAllocator instance has three sets of attributes:
12010 - cfg that is needed to query the cluster
12011 - input data (all members of the _KEYS class attribute are required)
12012 - four buffer attributes (in|out_data|text), that represent the
12013 input (to the external script) in text and data structure format,
12014 and the output from it, again in two formats
12015 - the result variables from the script (success, info, nodes) for
12019 # pylint: disable-msg=R0902
12020 # lots of instance attributes
12022 def __init__(self, cfg, rpc, mode, **kwargs):
12025 # init buffer variables
12026 self.in_text = self.out_text = self.in_data = self.out_data = None
12027 # init all input fields so that pylint is happy
12029 self.memory = self.disks = self.disk_template = None
12030 self.os = self.tags = self.nics = self.vcpus = None
12031 self.hypervisor = None
12032 self.relocate_from = None
12034 self.evac_nodes = None
12035 self.instances = None
12036 self.evac_mode = None
12037 self.target_groups = []
12039 self.required_nodes = None
12040 # init result fields
12041 self.success = self.info = self.result = None
12044 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12046 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12047 " IAllocator" % self.mode)
12049 keyset = [n for (n, _) in keydata]
12052 if key not in keyset:
12053 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12054 " IAllocator" % key)
12055 setattr(self, key, kwargs[key])
12058 if key not in kwargs:
12059 raise errors.ProgrammerError("Missing input parameter '%s' to"
12060 " IAllocator" % key)
12061 self._BuildInputData(compat.partial(fn, self), keydata)
12063 def _ComputeClusterData(self):
12064 """Compute the generic allocator input data.
12066 This is the data that is independent of the actual operation.
12070 cluster_info = cfg.GetClusterInfo()
12073 "version": constants.IALLOCATOR_VERSION,
12074 "cluster_name": cfg.GetClusterName(),
12075 "cluster_tags": list(cluster_info.GetTags()),
12076 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12077 # we don't have job IDs
12079 ninfo = cfg.GetAllNodesInfo()
12080 iinfo = cfg.GetAllInstancesInfo().values()
12081 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12084 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12086 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12087 hypervisor_name = self.hypervisor
12088 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12089 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12091 hypervisor_name = cluster_info.enabled_hypervisors[0]
12093 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12096 self.rpc.call_all_instances_info(node_list,
12097 cluster_info.enabled_hypervisors)
12099 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12101 config_ndata = self._ComputeBasicNodeData(ninfo)
12102 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12103 i_list, config_ndata)
12104 assert len(data["nodes"]) == len(ninfo), \
12105 "Incomplete node data computed"
12107 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12109 self.in_data = data
12112 def _ComputeNodeGroupData(cfg):
12113 """Compute node groups data.
12116 ng = dict((guuid, {
12117 "name": gdata.name,
12118 "alloc_policy": gdata.alloc_policy,
12120 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12125 def _ComputeBasicNodeData(node_cfg):
12126 """Compute global node data.
12129 @returns: a dict of name: (node dict, node config)
12132 # fill in static (config-based) values
12133 node_results = dict((ninfo.name, {
12134 "tags": list(ninfo.GetTags()),
12135 "primary_ip": ninfo.primary_ip,
12136 "secondary_ip": ninfo.secondary_ip,
12137 "offline": ninfo.offline,
12138 "drained": ninfo.drained,
12139 "master_candidate": ninfo.master_candidate,
12140 "group": ninfo.group,
12141 "master_capable": ninfo.master_capable,
12142 "vm_capable": ninfo.vm_capable,
12144 for ninfo in node_cfg.values())
12146 return node_results
12149 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12151 """Compute global node data.
12153 @param node_results: the basic node structures as filled from the config
12156 # make a copy of the current dict
12157 node_results = dict(node_results)
12158 for nname, nresult in node_data.items():
12159 assert nname in node_results, "Missing basic data for node %s" % nname
12160 ninfo = node_cfg[nname]
12162 if not (ninfo.offline or ninfo.drained):
12163 nresult.Raise("Can't get data for node %s" % nname)
12164 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12166 remote_info = nresult.payload
12168 for attr in ['memory_total', 'memory_free', 'memory_dom0',
12169 'vg_size', 'vg_free', 'cpu_total']:
12170 if attr not in remote_info:
12171 raise errors.OpExecError("Node '%s' didn't return attribute"
12172 " '%s'" % (nname, attr))
12173 if not isinstance(remote_info[attr], int):
12174 raise errors.OpExecError("Node '%s' returned invalid value"
12176 (nname, attr, remote_info[attr]))
12177 # compute memory used by primary instances
12178 i_p_mem = i_p_up_mem = 0
12179 for iinfo, beinfo in i_list:
12180 if iinfo.primary_node == nname:
12181 i_p_mem += beinfo[constants.BE_MEMORY]
12182 if iinfo.name not in node_iinfo[nname].payload:
12185 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12186 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12187 remote_info['memory_free'] -= max(0, i_mem_diff)
12190 i_p_up_mem += beinfo[constants.BE_MEMORY]
12192 # compute memory used by instances
12194 "total_memory": remote_info['memory_total'],
12195 "reserved_memory": remote_info['memory_dom0'],
12196 "free_memory": remote_info['memory_free'],
12197 "total_disk": remote_info['vg_size'],
12198 "free_disk": remote_info['vg_free'],
12199 "total_cpus": remote_info['cpu_total'],
12200 "i_pri_memory": i_p_mem,
12201 "i_pri_up_memory": i_p_up_mem,
12203 pnr_dyn.update(node_results[nname])
12204 node_results[nname] = pnr_dyn
12206 return node_results
12209 def _ComputeInstanceData(cluster_info, i_list):
12210 """Compute global instance data.
12214 for iinfo, beinfo in i_list:
12216 for nic in iinfo.nics:
12217 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12221 "mode": filled_params[constants.NIC_MODE],
12222 "link": filled_params[constants.NIC_LINK],
12224 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12225 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12226 nic_data.append(nic_dict)
12228 "tags": list(iinfo.GetTags()),
12229 "admin_up": iinfo.admin_up,
12230 "vcpus": beinfo[constants.BE_VCPUS],
12231 "memory": beinfo[constants.BE_MEMORY],
12233 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12235 "disks": [{constants.IDISK_SIZE: dsk.size,
12236 constants.IDISK_MODE: dsk.mode}
12237 for dsk in iinfo.disks],
12238 "disk_template": iinfo.disk_template,
12239 "hypervisor": iinfo.hypervisor,
12241 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12243 instance_data[iinfo.name] = pir
12245 return instance_data
12247 def _AddNewInstance(self):
12248 """Add new instance data to allocator structure.
12250 This in combination with _AllocatorGetClusterData will create the
12251 correct structure needed as input for the allocator.
12253 The checks for the completeness of the opcode must have already been
12257 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12259 if self.disk_template in constants.DTS_INT_MIRROR:
12260 self.required_nodes = 2
12262 self.required_nodes = 1
12266 "disk_template": self.disk_template,
12269 "vcpus": self.vcpus,
12270 "memory": self.memory,
12271 "disks": self.disks,
12272 "disk_space_total": disk_space,
12274 "required_nodes": self.required_nodes,
12275 "hypervisor": self.hypervisor,
12280 def _AddRelocateInstance(self):
12281 """Add relocate instance data to allocator structure.
12283 This in combination with _IAllocatorGetClusterData will create the
12284 correct structure needed as input for the allocator.
12286 The checks for the completeness of the opcode must have already been
12290 instance = self.cfg.GetInstanceInfo(self.name)
12291 if instance is None:
12292 raise errors.ProgrammerError("Unknown instance '%s' passed to"
12293 " IAllocator" % self.name)
12295 if instance.disk_template not in constants.DTS_MIRRORED:
12296 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12297 errors.ECODE_INVAL)
12299 if instance.disk_template in constants.DTS_INT_MIRROR and \
12300 len(instance.secondary_nodes) != 1:
12301 raise errors.OpPrereqError("Instance has not exactly one secondary node",
12302 errors.ECODE_STATE)
12304 self.required_nodes = 1
12305 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12306 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12310 "disk_space_total": disk_space,
12311 "required_nodes": self.required_nodes,
12312 "relocate_from": self.relocate_from,
12316 def _AddEvacuateNodes(self):
12317 """Add evacuate nodes data to allocator structure.
12321 "evac_nodes": self.evac_nodes
12325 def _AddNodeEvacuate(self):
12326 """Get data for node-evacuate requests.
12330 "instances": self.instances,
12331 "evac_mode": self.evac_mode,
12334 def _AddChangeGroup(self):
12335 """Get data for node-evacuate requests.
12339 "instances": self.instances,
12340 "target_groups": self.target_groups,
12343 def _BuildInputData(self, fn, keydata):
12344 """Build input data structures.
12347 self._ComputeClusterData()
12350 request["type"] = self.mode
12351 for keyname, keytype in keydata:
12352 if keyname not in request:
12353 raise errors.ProgrammerError("Request parameter %s is missing" %
12355 val = request[keyname]
12356 if not keytype(val):
12357 raise errors.ProgrammerError("Request parameter %s doesn't pass"
12358 " validation, value %s, expected"
12359 " type %s" % (keyname, val, keytype))
12360 self.in_data["request"] = request
12362 self.in_text = serializer.Dump(self.in_data)
12364 _STRING_LIST = ht.TListOf(ht.TString)
12365 _JOBSET_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12366 # pylint: disable-msg=E1101
12367 # Class '...' has no 'OP_ID' member
12368 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12369 opcodes.OpInstanceMigrate.OP_ID,
12370 opcodes.OpInstanceReplaceDisks.OP_ID])
12373 constants.IALLOCATOR_MODE_ALLOC:
12376 ("name", ht.TString),
12377 ("memory", ht.TInt),
12378 ("disks", ht.TListOf(ht.TDict)),
12379 ("disk_template", ht.TString),
12380 ("os", ht.TString),
12381 ("tags", _STRING_LIST),
12382 ("nics", ht.TListOf(ht.TDict)),
12383 ("vcpus", ht.TInt),
12384 ("hypervisor", ht.TString),
12386 constants.IALLOCATOR_MODE_RELOC:
12387 (_AddRelocateInstance,
12388 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12390 constants.IALLOCATOR_MODE_MEVAC:
12391 (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12392 ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12393 constants.IALLOCATOR_MODE_NODE_EVAC:
12394 (_AddNodeEvacuate, [
12395 ("instances", _STRING_LIST),
12396 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12398 constants.IALLOCATOR_MODE_CHG_GROUP:
12399 (_AddChangeGroup, [
12400 ("instances", _STRING_LIST),
12401 ("target_groups", _STRING_LIST),
12405 def Run(self, name, validate=True, call_fn=None):
12406 """Run an instance allocator and return the results.
12409 if call_fn is None:
12410 call_fn = self.rpc.call_iallocator_runner
12412 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12413 result.Raise("Failure while running the iallocator script")
12415 self.out_text = result.payload
12417 self._ValidateResult()
12419 def _ValidateResult(self):
12420 """Process the allocator results.
12422 This will process and if successful save the result in
12423 self.out_data and the other parameters.
12427 rdict = serializer.Load(self.out_text)
12428 except Exception, err:
12429 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12431 if not isinstance(rdict, dict):
12432 raise errors.OpExecError("Can't parse iallocator results: not a dict")
12434 # TODO: remove backwards compatiblity in later versions
12435 if "nodes" in rdict and "result" not in rdict:
12436 rdict["result"] = rdict["nodes"]
12439 for key in "success", "info", "result":
12440 if key not in rdict:
12441 raise errors.OpExecError("Can't parse iallocator results:"
12442 " missing key '%s'" % key)
12443 setattr(self, key, rdict[key])
12445 if not self._result_check(self.result):
12446 raise errors.OpExecError("Iallocator returned invalid result,"
12447 " expected %s, got %s" %
12448 (self._result_check, self.result),
12449 errors.ECODE_INVAL)
12451 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12452 constants.IALLOCATOR_MODE_MEVAC):
12453 node2group = dict((name, ndata["group"])
12454 for (name, ndata) in self.in_data["nodes"].items())
12456 fn = compat.partial(self._NodesToGroups, node2group,
12457 self.in_data["nodegroups"])
12459 if self.mode == constants.IALLOCATOR_MODE_RELOC:
12460 assert self.relocate_from is not None
12461 assert self.required_nodes == 1
12463 request_groups = fn(self.relocate_from)
12464 result_groups = fn(rdict["result"])
12466 if result_groups != request_groups:
12467 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12468 " differ from original groups (%s)" %
12469 (utils.CommaJoin(result_groups),
12470 utils.CommaJoin(request_groups)))
12471 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12472 request_groups = fn(self.evac_nodes)
12473 for (instance_name, secnode) in self.result:
12474 result_groups = fn([secnode])
12475 if result_groups != request_groups:
12476 raise errors.OpExecError("Iallocator returned new secondary node"
12477 " '%s' (group '%s') for instance '%s'"
12478 " which is not in original group '%s'" %
12479 (secnode, utils.CommaJoin(result_groups),
12481 utils.CommaJoin(request_groups)))
12483 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12485 self.out_data = rdict
12488 def _NodesToGroups(node2group, groups, nodes):
12489 """Returns a list of unique group names for a list of nodes.
12491 @type node2group: dict
12492 @param node2group: Map from node name to group UUID
12494 @param groups: Group information
12496 @param nodes: Node names
12503 group_uuid = node2group[node]
12505 # Ignore unknown node
12509 group = groups[group_uuid]
12511 # Can't find group, let's use UUID
12512 group_name = group_uuid
12514 group_name = group["name"]
12516 result.add(group_name)
12518 return sorted(result)
12521 class LUTestAllocator(NoHooksLU):
12522 """Run allocator tests.
12524 This LU runs the allocator tests
12527 def CheckPrereq(self):
12528 """Check prerequisites.
12530 This checks the opcode parameters depending on the director and mode test.
12533 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12534 for attr in ["memory", "disks", "disk_template",
12535 "os", "tags", "nics", "vcpus"]:
12536 if not hasattr(self.op, attr):
12537 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12538 attr, errors.ECODE_INVAL)
12539 iname = self.cfg.ExpandInstanceName(self.op.name)
12540 if iname is not None:
12541 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12542 iname, errors.ECODE_EXISTS)
12543 if not isinstance(self.op.nics, list):
12544 raise errors.OpPrereqError("Invalid parameter 'nics'",
12545 errors.ECODE_INVAL)
12546 if not isinstance(self.op.disks, list):
12547 raise errors.OpPrereqError("Invalid parameter 'disks'",
12548 errors.ECODE_INVAL)
12549 for row in self.op.disks:
12550 if (not isinstance(row, dict) or
12551 constants.IDISK_SIZE not in row or
12552 not isinstance(row[constants.IDISK_SIZE], int) or
12553 constants.IDISK_MODE not in row or
12554 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12555 raise errors.OpPrereqError("Invalid contents of the 'disks'"
12556 " parameter", errors.ECODE_INVAL)
12557 if self.op.hypervisor is None:
12558 self.op.hypervisor = self.cfg.GetHypervisorType()
12559 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12560 fname = _ExpandInstanceName(self.cfg, self.op.name)
12561 self.op.name = fname
12562 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12563 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12564 if not hasattr(self.op, "evac_nodes"):
12565 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12566 " opcode input", errors.ECODE_INVAL)
12567 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12568 constants.IALLOCATOR_MODE_NODE_EVAC):
12569 if not self.op.instances:
12570 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
12571 self.op.instances = _GetWantedInstances(self, self.op.instances)
12573 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12574 self.op.mode, errors.ECODE_INVAL)
12576 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12577 if self.op.allocator is None:
12578 raise errors.OpPrereqError("Missing allocator name",
12579 errors.ECODE_INVAL)
12580 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12581 raise errors.OpPrereqError("Wrong allocator test '%s'" %
12582 self.op.direction, errors.ECODE_INVAL)
12584 def Exec(self, feedback_fn):
12585 """Run the allocator test.
12588 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12589 ial = IAllocator(self.cfg, self.rpc,
12592 memory=self.op.memory,
12593 disks=self.op.disks,
12594 disk_template=self.op.disk_template,
12598 vcpus=self.op.vcpus,
12599 hypervisor=self.op.hypervisor,
12601 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12602 ial = IAllocator(self.cfg, self.rpc,
12605 relocate_from=list(self.relocate_from),
12607 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12608 ial = IAllocator(self.cfg, self.rpc,
12610 evac_nodes=self.op.evac_nodes)
12611 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
12612 ial = IAllocator(self.cfg, self.rpc,
12614 instances=self.op.instances,
12615 target_groups=self.op.target_groups)
12616 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12617 ial = IAllocator(self.cfg, self.rpc,
12619 instances=self.op.instances,
12620 evac_mode=self.op.evac_mode)
12622 raise errors.ProgrammerError("Uncatched mode %s in"
12623 " LUTestAllocator.Exec", self.op.mode)
12625 if self.op.direction == constants.IALLOCATOR_DIR_IN:
12626 result = ial.in_text
12628 ial.Run(self.op.allocator, validate=False)
12629 result = ial.out_text
12633 #: Query type implementations
12635 constants.QR_INSTANCE: _InstanceQuery,
12636 constants.QR_NODE: _NodeQuery,
12637 constants.QR_GROUP: _GroupQuery,
12638 constants.QR_OS: _OsQuery,
12641 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12644 def _GetQueryImplementation(name):
12645 """Returns the implemtnation for a query type.
12647 @param name: Query type, must be one of L{constants.QR_VIA_OP}
12651 return _QUERY_IMPL[name]
12653 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12654 errors.ECODE_INVAL)