4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577 """Checks if the owned node groups are still correct for an instance.
579 @type cfg: L{config.ConfigWriter}
580 @param cfg: The cluster configuration
581 @type instance_name: string
582 @param instance_name: Instance name
583 @type owned_groups: set or frozenset
584 @param owned_groups: List of currently owned node groups
587 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
589 if not owned_groups.issuperset(inst_groups):
590 raise errors.OpPrereqError("Instance %s's node groups changed since"
591 " locks were acquired, current groups are"
592 " are '%s', owning groups '%s'; retry the"
595 utils.CommaJoin(inst_groups),
596 utils.CommaJoin(owned_groups)),
602 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603 """Checks if the instances in a node group are still correct.
605 @type cfg: L{config.ConfigWriter}
606 @param cfg: The cluster configuration
607 @type group_uuid: string
608 @param group_uuid: Node group UUID
609 @type owned_instances: set or frozenset
610 @param owned_instances: List of currently owned instances
613 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614 if owned_instances != wanted_instances:
615 raise errors.OpPrereqError("Instances in node group '%s' changed since"
616 " locks were acquired, wanted '%s', have '%s';"
617 " retry the operation" %
619 utils.CommaJoin(wanted_instances),
620 utils.CommaJoin(owned_instances)),
623 return wanted_instances
626 def _SupportsOob(cfg, node):
627 """Tells if node supports OOB.
629 @type cfg: L{config.ConfigWriter}
630 @param cfg: The cluster configuration
631 @type node: L{objects.Node}
632 @param node: The node
633 @return: The OOB script if supported or an empty string otherwise
636 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
639 def _GetWantedNodes(lu, nodes):
640 """Returns list of checked and expanded node names.
642 @type lu: L{LogicalUnit}
643 @param lu: the logical unit on whose behalf we execute
645 @param nodes: list of node names or None for all nodes
647 @return: the list of nodes, sorted
648 @raise errors.ProgrammerError: if the nodes parameter is wrong type
652 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
654 return utils.NiceSort(lu.cfg.GetNodeList())
657 def _GetWantedInstances(lu, instances):
658 """Returns list of checked and expanded instance names.
660 @type lu: L{LogicalUnit}
661 @param lu: the logical unit on whose behalf we execute
662 @type instances: list
663 @param instances: list of instance names or None for all instances
665 @return: the list of instances, sorted
666 @raise errors.OpPrereqError: if the instances parameter is wrong type
667 @raise errors.OpPrereqError: if any of the passed instances is not found
671 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
673 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
677 def _GetUpdatedParams(old_params, update_dict,
678 use_default=True, use_none=False):
679 """Return the new version of a parameter dictionary.
681 @type old_params: dict
682 @param old_params: old parameters
683 @type update_dict: dict
684 @param update_dict: dict containing new parameter values, or
685 constants.VALUE_DEFAULT to reset the parameter to its default
687 @param use_default: boolean
688 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689 values as 'to be deleted' values
690 @param use_none: boolean
691 @type use_none: whether to recognise C{None} values as 'to be
694 @return: the new parameter dictionary
697 params_copy = copy.deepcopy(old_params)
698 for key, val in update_dict.iteritems():
699 if ((use_default and val == constants.VALUE_DEFAULT) or
700 (use_none and val is None)):
706 params_copy[key] = val
710 def _ReleaseLocks(lu, level, names=None, keep=None):
711 """Releases locks owned by an LU.
713 @type lu: L{LogicalUnit}
714 @param level: Lock level
715 @type names: list or None
716 @param names: Names of locks to release
717 @type keep: list or None
718 @param keep: Names of locks to retain
721 assert not (keep is not None and names is not None), \
722 "Only one of the 'names' and the 'keep' parameters can be given"
724 if names is not None:
725 should_release = names.__contains__
727 should_release = lambda name: name not in keep
729 should_release = None
731 owned = lu.owned_locks(level)
733 # Not owning any lock at this level, do nothing
740 # Determine which locks to release
742 if should_release(name):
747 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
749 # Release just some locks
750 lu.glm.release(level, names=release)
752 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
755 lu.glm.release(level)
757 assert not lu.glm.is_owned(level), "No locks should be owned"
760 def _MapInstanceDisksToNodes(instances):
761 """Creates a map from (node, volume) to instance name.
763 @type instances: list of L{objects.Instance}
764 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
767 return dict(((node, vol), inst.name)
768 for inst in instances
769 for (node, vols) in inst.MapLVsByNode().items()
773 def _RunPostHook(lu, node_name):
774 """Runs the post-hook for an opcode on a single node.
777 hm = lu.proc.BuildHooksManager(lu)
779 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
781 # pylint: disable=W0702
782 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
785 def _CheckOutputFields(static, dynamic, selected):
786 """Checks whether all selected fields are valid.
788 @type static: L{utils.FieldSet}
789 @param static: static fields set
790 @type dynamic: L{utils.FieldSet}
791 @param dynamic: dynamic fields set
798 delta = f.NonMatching(selected)
800 raise errors.OpPrereqError("Unknown output fields selected: %s"
801 % ",".join(delta), errors.ECODE_INVAL)
804 def _CheckGlobalHvParams(params):
805 """Validates that given hypervisor params are not global ones.
807 This will ensure that instances don't get customised versions of
811 used_globals = constants.HVC_GLOBALS.intersection(params)
813 msg = ("The following hypervisor parameters are global and cannot"
814 " be customized at instance level, please modify them at"
815 " cluster level: %s" % utils.CommaJoin(used_globals))
816 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
819 def _CheckNodeOnline(lu, node, msg=None):
820 """Ensure that a given node is online.
822 @param lu: the LU on behalf of which we make the check
823 @param node: the node to check
824 @param msg: if passed, should be a message to replace the default one
825 @raise errors.OpPrereqError: if the node is offline
829 msg = "Can't use offline node"
830 if lu.cfg.GetNodeInfo(node).offline:
831 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
834 def _CheckNodeNotDrained(lu, node):
835 """Ensure that a given node is not drained.
837 @param lu: the LU on behalf of which we make the check
838 @param node: the node to check
839 @raise errors.OpPrereqError: if the node is drained
842 if lu.cfg.GetNodeInfo(node).drained:
843 raise errors.OpPrereqError("Can't use drained node %s" % node,
847 def _CheckNodeVmCapable(lu, node):
848 """Ensure that a given node is vm capable.
850 @param lu: the LU on behalf of which we make the check
851 @param node: the node to check
852 @raise errors.OpPrereqError: if the node is not vm capable
855 if not lu.cfg.GetNodeInfo(node).vm_capable:
856 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
860 def _CheckNodeHasOS(lu, node, os_name, force_variant):
861 """Ensure that a node supports a given OS.
863 @param lu: the LU on behalf of which we make the check
864 @param node: the node to check
865 @param os_name: the OS to query about
866 @param force_variant: whether to ignore variant errors
867 @raise errors.OpPrereqError: if the node is not supporting the OS
870 result = lu.rpc.call_os_get(node, os_name)
871 result.Raise("OS '%s' not in supported OS list for node %s" %
873 prereq=True, ecode=errors.ECODE_INVAL)
874 if not force_variant:
875 _CheckOSVariant(result.payload, os_name)
878 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879 """Ensure that a node has the given secondary ip.
881 @type lu: L{LogicalUnit}
882 @param lu: the LU on behalf of which we make the check
884 @param node: the node to check
885 @type secondary_ip: string
886 @param secondary_ip: the ip to check
887 @type prereq: boolean
888 @param prereq: whether to throw a prerequisite or an execute error
889 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
893 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894 result.Raise("Failure checking secondary ip on node %s" % node,
895 prereq=prereq, ecode=errors.ECODE_ENVIRON)
896 if not result.payload:
897 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898 " please fix and re-run this command" % secondary_ip)
900 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
902 raise errors.OpExecError(msg)
905 def _GetClusterDomainSecret():
906 """Reads the cluster domain secret.
909 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
913 def _CheckInstanceState(lu, instance, req_states, msg=None):
914 """Ensure that an instance is in one of the required states.
916 @param lu: the LU on behalf of which we make the check
917 @param instance: the instance to check
918 @param msg: if passed, should be a message to replace the default one
919 @raise errors.OpPrereqError: if the instance is not in the required state
923 msg = "can't use instance from outside %s states" % ", ".join(req_states)
924 if instance.admin_state not in req_states:
925 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926 (instance, instance.admin_state, msg),
929 if constants.ADMINST_UP not in req_states:
930 pnode = instance.primary_node
931 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932 ins_l.Raise("Can't contact node %s for instance information" % pnode,
933 prereq=True, ecode=errors.ECODE_ENVIRON)
935 if instance.name in ins_l.payload:
936 raise errors.OpPrereqError("Instance %s is running, %s" %
937 (instance.name, msg), errors.ECODE_STATE)
940 def _ExpandItemName(fn, name, kind):
941 """Expand an item name.
943 @param fn: the function to use for expansion
944 @param name: requested item name
945 @param kind: text description ('Node' or 'Instance')
946 @return: the resolved (full) name
947 @raise errors.OpPrereqError: if the item is not found
951 if full_name is None:
952 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
957 def _ExpandNodeName(cfg, name):
958 """Wrapper over L{_ExpandItemName} for nodes."""
959 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
962 def _ExpandInstanceName(cfg, name):
963 """Wrapper over L{_ExpandItemName} for instance."""
964 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
967 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968 memory, vcpus, nics, disk_template, disks,
969 bep, hvp, hypervisor_name, tags):
970 """Builds instance related env variables for hooks
972 This builds the hook environment from individual variables.
975 @param name: the name of the instance
976 @type primary_node: string
977 @param primary_node: the name of the instance's primary node
978 @type secondary_nodes: list
979 @param secondary_nodes: list of secondary nodes as strings
980 @type os_type: string
981 @param os_type: the name of the instance's OS
983 @param status: the desired status of the instance
985 @param memory: the memory size of the instance
987 @param vcpus: the count of VCPUs the instance has
989 @param nics: list of tuples (ip, mac, mode, link) representing
990 the NICs the instance has
991 @type disk_template: string
992 @param disk_template: the disk template of the instance
994 @param disks: the list of (size, mode) pairs
996 @param bep: the backend parameters for the instance
998 @param hvp: the hypervisor parameters for the instance
999 @type hypervisor_name: string
1000 @param hypervisor_name: the hypervisor for the instance
1002 @param tags: list of instance tags as strings
1004 @return: the hook environment for this instance
1009 "INSTANCE_NAME": name,
1010 "INSTANCE_PRIMARY": primary_node,
1011 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1012 "INSTANCE_OS_TYPE": os_type,
1013 "INSTANCE_STATUS": status,
1014 "INSTANCE_MEMORY": memory,
1015 "INSTANCE_VCPUS": vcpus,
1016 "INSTANCE_DISK_TEMPLATE": disk_template,
1017 "INSTANCE_HYPERVISOR": hypervisor_name,
1021 nic_count = len(nics)
1022 for idx, (ip, mac, mode, link) in enumerate(nics):
1025 env["INSTANCE_NIC%d_IP" % idx] = ip
1026 env["INSTANCE_NIC%d_MAC" % idx] = mac
1027 env["INSTANCE_NIC%d_MODE" % idx] = mode
1028 env["INSTANCE_NIC%d_LINK" % idx] = link
1029 if mode == constants.NIC_MODE_BRIDGED:
1030 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1034 env["INSTANCE_NIC_COUNT"] = nic_count
1037 disk_count = len(disks)
1038 for idx, (size, mode) in enumerate(disks):
1039 env["INSTANCE_DISK%d_SIZE" % idx] = size
1040 env["INSTANCE_DISK%d_MODE" % idx] = mode
1044 env["INSTANCE_DISK_COUNT"] = disk_count
1049 env["INSTANCE_TAGS"] = " ".join(tags)
1051 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1052 for key, value in source.items():
1053 env["INSTANCE_%s_%s" % (kind, key)] = value
1058 def _NICListToTuple(lu, nics):
1059 """Build a list of nic information tuples.
1061 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1062 value in LUInstanceQueryData.
1064 @type lu: L{LogicalUnit}
1065 @param lu: the logical unit on whose behalf we execute
1066 @type nics: list of L{objects.NIC}
1067 @param nics: list of nics to convert to hooks tuples
1071 cluster = lu.cfg.GetClusterInfo()
1075 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1076 mode = filled_params[constants.NIC_MODE]
1077 link = filled_params[constants.NIC_LINK]
1078 hooks_nics.append((ip, mac, mode, link))
1082 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1083 """Builds instance related env variables for hooks from an object.
1085 @type lu: L{LogicalUnit}
1086 @param lu: the logical unit on whose behalf we execute
1087 @type instance: L{objects.Instance}
1088 @param instance: the instance for which we should build the
1090 @type override: dict
1091 @param override: dictionary with key/values that will override
1094 @return: the hook environment dictionary
1097 cluster = lu.cfg.GetClusterInfo()
1098 bep = cluster.FillBE(instance)
1099 hvp = cluster.FillHV(instance)
1101 "name": instance.name,
1102 "primary_node": instance.primary_node,
1103 "secondary_nodes": instance.secondary_nodes,
1104 "os_type": instance.os,
1105 "status": instance.admin_state,
1106 "memory": bep[constants.BE_MEMORY],
1107 "vcpus": bep[constants.BE_VCPUS],
1108 "nics": _NICListToTuple(lu, instance.nics),
1109 "disk_template": instance.disk_template,
1110 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1113 "hypervisor_name": instance.hypervisor,
1114 "tags": instance.tags,
1117 args.update(override)
1118 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1121 def _AdjustCandidatePool(lu, exceptions):
1122 """Adjust the candidate pool after node operations.
1125 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1127 lu.LogInfo("Promoted nodes to master candidate role: %s",
1128 utils.CommaJoin(node.name for node in mod_list))
1129 for name in mod_list:
1130 lu.context.ReaddNode(name)
1131 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1133 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1137 def _DecideSelfPromotion(lu, exceptions=None):
1138 """Decide whether I should promote myself as a master candidate.
1141 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1142 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1143 # the new node will increase mc_max with one, so:
1144 mc_should = min(mc_should + 1, cp_size)
1145 return mc_now < mc_should
1148 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1149 """Check that the brigdes needed by a list of nics exist.
1152 cluster = lu.cfg.GetClusterInfo()
1153 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1154 brlist = [params[constants.NIC_LINK] for params in paramslist
1155 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1157 result = lu.rpc.call_bridges_exist(target_node, brlist)
1158 result.Raise("Error checking bridges on destination node '%s'" %
1159 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1162 def _CheckInstanceBridgesExist(lu, instance, node=None):
1163 """Check that the brigdes needed by an instance exist.
1167 node = instance.primary_node
1168 _CheckNicsBridgesExist(lu, instance.nics, node)
1171 def _CheckOSVariant(os_obj, name):
1172 """Check whether an OS name conforms to the os variants specification.
1174 @type os_obj: L{objects.OS}
1175 @param os_obj: OS object to check
1177 @param name: OS name passed by the user, to check for validity
1180 variant = objects.OS.GetVariant(name)
1181 if not os_obj.supported_variants:
1183 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1184 " passed)" % (os_obj.name, variant),
1188 raise errors.OpPrereqError("OS name must include a variant",
1191 if variant not in os_obj.supported_variants:
1192 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1195 def _GetNodeInstancesInner(cfg, fn):
1196 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1199 def _GetNodeInstances(cfg, node_name):
1200 """Returns a list of all primary and secondary instances on a node.
1204 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1207 def _GetNodePrimaryInstances(cfg, node_name):
1208 """Returns primary instances on a node.
1211 return _GetNodeInstancesInner(cfg,
1212 lambda inst: node_name == inst.primary_node)
1215 def _GetNodeSecondaryInstances(cfg, node_name):
1216 """Returns secondary instances on a node.
1219 return _GetNodeInstancesInner(cfg,
1220 lambda inst: node_name in inst.secondary_nodes)
1223 def _GetStorageTypeArgs(cfg, storage_type):
1224 """Returns the arguments for a storage type.
1227 # Special case for file storage
1228 if storage_type == constants.ST_FILE:
1229 # storage.FileStorage wants a list of storage directories
1230 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1235 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1238 for dev in instance.disks:
1239 cfg.SetDiskID(dev, node_name)
1241 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1242 result.Raise("Failed to get disk status from node %s" % node_name,
1243 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1245 for idx, bdev_status in enumerate(result.payload):
1246 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1252 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1253 """Check the sanity of iallocator and node arguments and use the
1254 cluster-wide iallocator if appropriate.
1256 Check that at most one of (iallocator, node) is specified. If none is
1257 specified, then the LU's opcode's iallocator slot is filled with the
1258 cluster-wide default iallocator.
1260 @type iallocator_slot: string
1261 @param iallocator_slot: the name of the opcode iallocator slot
1262 @type node_slot: string
1263 @param node_slot: the name of the opcode target node slot
1266 node = getattr(lu.op, node_slot, None)
1267 iallocator = getattr(lu.op, iallocator_slot, None)
1269 if node is not None and iallocator is not None:
1270 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1272 elif node is None and iallocator is None:
1273 default_iallocator = lu.cfg.GetDefaultIAllocator()
1274 if default_iallocator:
1275 setattr(lu.op, iallocator_slot, default_iallocator)
1277 raise errors.OpPrereqError("No iallocator or node given and no"
1278 " cluster-wide default iallocator found;"
1279 " please specify either an iallocator or a"
1280 " node, or set a cluster-wide default"
1284 def _GetDefaultIAllocator(cfg, iallocator):
1285 """Decides on which iallocator to use.
1287 @type cfg: L{config.ConfigWriter}
1288 @param cfg: Cluster configuration object
1289 @type iallocator: string or None
1290 @param iallocator: Iallocator specified in opcode
1292 @return: Iallocator name
1296 # Use default iallocator
1297 iallocator = cfg.GetDefaultIAllocator()
1300 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1301 " opcode nor as a cluster-wide default",
1307 class LUClusterPostInit(LogicalUnit):
1308 """Logical unit for running hooks after cluster initialization.
1311 HPATH = "cluster-init"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1326 return ([], [self.cfg.GetMasterNode()])
1328 def Exec(self, feedback_fn):
1335 class LUClusterDestroy(LogicalUnit):
1336 """Logical unit for destroying the cluster.
1339 HPATH = "cluster-destroy"
1340 HTYPE = constants.HTYPE_CLUSTER
1342 def BuildHooksEnv(self):
1347 "OP_TARGET": self.cfg.GetClusterName(),
1350 def BuildHooksNodes(self):
1351 """Build hooks nodes.
1356 def CheckPrereq(self):
1357 """Check prerequisites.
1359 This checks whether the cluster is empty.
1361 Any errors are signaled by raising errors.OpPrereqError.
1364 master = self.cfg.GetMasterNode()
1366 nodelist = self.cfg.GetNodeList()
1367 if len(nodelist) != 1 or nodelist[0] != master:
1368 raise errors.OpPrereqError("There are still %d node(s) in"
1369 " this cluster." % (len(nodelist) - 1),
1371 instancelist = self.cfg.GetInstanceList()
1373 raise errors.OpPrereqError("There are still %d instance(s) in"
1374 " this cluster." % len(instancelist),
1377 def Exec(self, feedback_fn):
1378 """Destroys the cluster.
1381 master_params = self.cfg.GetMasterNetworkParameters()
1383 # Run post hooks on master node before it's removed
1384 _RunPostHook(self, master_params.name)
1386 ems = self.cfg.GetUseExternalMipScript()
1387 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1389 result.Raise("Could not disable the master role")
1391 return master_params.name
1394 def _VerifyCertificate(filename):
1395 """Verifies a certificate for L{LUClusterVerifyConfig}.
1397 @type filename: string
1398 @param filename: Path to PEM file
1402 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1403 utils.ReadFile(filename))
1404 except Exception, err: # pylint: disable=W0703
1405 return (LUClusterVerifyConfig.ETYPE_ERROR,
1406 "Failed to load X509 certificate %s: %s" % (filename, err))
1409 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1410 constants.SSL_CERT_EXPIRATION_ERROR)
1413 fnamemsg = "While verifying %s: %s" % (filename, msg)
1418 return (None, fnamemsg)
1419 elif errcode == utils.CERT_WARNING:
1420 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1421 elif errcode == utils.CERT_ERROR:
1422 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1424 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1427 def _GetAllHypervisorParameters(cluster, instances):
1428 """Compute the set of all hypervisor parameters.
1430 @type cluster: L{objects.Cluster}
1431 @param cluster: the cluster object
1432 @param instances: list of L{objects.Instance}
1433 @param instances: additional instances from which to obtain parameters
1434 @rtype: list of (origin, hypervisor, parameters)
1435 @return: a list with all parameters found, indicating the hypervisor they
1436 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1441 for hv_name in cluster.enabled_hypervisors:
1442 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1444 for os_name, os_hvp in cluster.os_hvp.items():
1445 for hv_name, hv_params in os_hvp.items():
1447 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1448 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1450 # TODO: collapse identical parameter values in a single one
1451 for instance in instances:
1452 if instance.hvparams:
1453 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1454 cluster.FillHV(instance)))
1459 class _VerifyErrors(object):
1460 """Mix-in for cluster/group verify LUs.
1462 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1463 self.op and self._feedback_fn to be available.)
1467 ETYPE_FIELD = "code"
1468 ETYPE_ERROR = "ERROR"
1469 ETYPE_WARNING = "WARNING"
1471 def _Error(self, ecode, item, msg, *args, **kwargs):
1472 """Format an error message.
1474 Based on the opcode's error_codes parameter, either format a
1475 parseable error code, or a simpler error string.
1477 This must be called only from Exec and functions called from Exec.
1480 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1481 itype, etxt, _ = ecode
1482 # first complete the msg
1485 # then format the whole message
1486 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1487 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1493 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1494 # and finally report it via the feedback_fn
1495 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1497 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1498 """Log an error message if the passed condition is True.
1502 or self.op.debug_simulate_errors) # pylint: disable=E1101
1504 # If the error code is in the list of ignored errors, demote the error to a
1506 (_, etxt, _) = ecode
1507 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1508 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1511 self._Error(ecode, *args, **kwargs)
1513 # do not mark the operation as failed for WARN cases only
1514 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1515 self.bad = self.bad or cond
1518 class LUClusterVerify(NoHooksLU):
1519 """Submits all jobs necessary to verify the cluster.
1524 def ExpandNames(self):
1525 self.needed_locks = {}
1527 def Exec(self, feedback_fn):
1530 if self.op.group_name:
1531 groups = [self.op.group_name]
1532 depends_fn = lambda: None
1534 groups = self.cfg.GetNodeGroupList()
1536 # Verify global configuration
1538 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1541 # Always depend on global verification
1542 depends_fn = lambda: [(-len(jobs), [])]
1544 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1545 ignore_errors=self.op.ignore_errors,
1546 depends=depends_fn())]
1547 for group in groups)
1549 # Fix up all parameters
1550 for op in itertools.chain(*jobs): # pylint: disable=W0142
1551 op.debug_simulate_errors = self.op.debug_simulate_errors
1552 op.verbose = self.op.verbose
1553 op.error_codes = self.op.error_codes
1555 op.skip_checks = self.op.skip_checks
1556 except AttributeError:
1557 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1559 return ResultWithJobs(jobs)
1562 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1563 """Verifies the cluster config.
1568 def _VerifyHVP(self, hvp_data):
1569 """Verifies locally the syntax of the hypervisor parameters.
1572 for item, hv_name, hv_params in hvp_data:
1573 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1576 hv_class = hypervisor.GetHypervisor(hv_name)
1577 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1578 hv_class.CheckParameterSyntax(hv_params)
1579 except errors.GenericError, err:
1580 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1582 def ExpandNames(self):
1583 # Information can be safely retrieved as the BGL is acquired in exclusive
1585 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1586 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1587 self.all_node_info = self.cfg.GetAllNodesInfo()
1588 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1589 self.needed_locks = {}
1591 def Exec(self, feedback_fn):
1592 """Verify integrity of cluster, performing various test on nodes.
1596 self._feedback_fn = feedback_fn
1598 feedback_fn("* Verifying cluster config")
1600 for msg in self.cfg.VerifyConfig():
1601 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1603 feedback_fn("* Verifying cluster certificate files")
1605 for cert_filename in constants.ALL_CERT_FILES:
1606 (errcode, msg) = _VerifyCertificate(cert_filename)
1607 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1609 feedback_fn("* Verifying hypervisor parameters")
1611 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1612 self.all_inst_info.values()))
1614 feedback_fn("* Verifying all nodes belong to an existing group")
1616 # We do this verification here because, should this bogus circumstance
1617 # occur, it would never be caught by VerifyGroup, which only acts on
1618 # nodes/instances reachable from existing node groups.
1620 dangling_nodes = set(node.name for node in self.all_node_info.values()
1621 if node.group not in self.all_group_info)
1623 dangling_instances = {}
1624 no_node_instances = []
1626 for inst in self.all_inst_info.values():
1627 if inst.primary_node in dangling_nodes:
1628 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1629 elif inst.primary_node not in self.all_node_info:
1630 no_node_instances.append(inst.name)
1635 utils.CommaJoin(dangling_instances.get(node.name,
1637 for node in dangling_nodes]
1639 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1641 "the following nodes (and their instances) belong to a non"
1642 " existing group: %s", utils.CommaJoin(pretty_dangling))
1644 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1646 "the following instances have a non-existing primary-node:"
1647 " %s", utils.CommaJoin(no_node_instances))
1652 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1653 """Verifies the status of a node group.
1656 HPATH = "cluster-verify"
1657 HTYPE = constants.HTYPE_CLUSTER
1660 _HOOKS_INDENT_RE = re.compile("^", re.M)
1662 class NodeImage(object):
1663 """A class representing the logical and physical status of a node.
1666 @ivar name: the node name to which this object refers
1667 @ivar volumes: a structure as returned from
1668 L{ganeti.backend.GetVolumeList} (runtime)
1669 @ivar instances: a list of running instances (runtime)
1670 @ivar pinst: list of configured primary instances (config)
1671 @ivar sinst: list of configured secondary instances (config)
1672 @ivar sbp: dictionary of {primary-node: list of instances} for all
1673 instances for which this node is secondary (config)
1674 @ivar mfree: free memory, as reported by hypervisor (runtime)
1675 @ivar dfree: free disk, as reported by the node (runtime)
1676 @ivar offline: the offline status (config)
1677 @type rpc_fail: boolean
1678 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1679 not whether the individual keys were correct) (runtime)
1680 @type lvm_fail: boolean
1681 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1682 @type hyp_fail: boolean
1683 @ivar hyp_fail: whether the RPC call didn't return the instance list
1684 @type ghost: boolean
1685 @ivar ghost: whether this is a known node or not (config)
1686 @type os_fail: boolean
1687 @ivar os_fail: whether the RPC call didn't return valid OS data
1689 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1690 @type vm_capable: boolean
1691 @ivar vm_capable: whether the node can host instances
1694 def __init__(self, offline=False, name=None, vm_capable=True):
1703 self.offline = offline
1704 self.vm_capable = vm_capable
1705 self.rpc_fail = False
1706 self.lvm_fail = False
1707 self.hyp_fail = False
1709 self.os_fail = False
1712 def ExpandNames(self):
1713 # This raises errors.OpPrereqError on its own:
1714 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1716 # Get instances in node group; this is unsafe and needs verification later
1717 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1719 self.needed_locks = {
1720 locking.LEVEL_INSTANCE: inst_names,
1721 locking.LEVEL_NODEGROUP: [self.group_uuid],
1722 locking.LEVEL_NODE: [],
1725 self.share_locks = _ShareAll()
1727 def DeclareLocks(self, level):
1728 if level == locking.LEVEL_NODE:
1729 # Get members of node group; this is unsafe and needs verification later
1730 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1732 all_inst_info = self.cfg.GetAllInstancesInfo()
1734 # In Exec(), we warn about mirrored instances that have primary and
1735 # secondary living in separate node groups. To fully verify that
1736 # volumes for these instances are healthy, we will need to do an
1737 # extra call to their secondaries. We ensure here those nodes will
1739 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1740 # Important: access only the instances whose lock is owned
1741 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1742 nodes.update(all_inst_info[inst].secondary_nodes)
1744 self.needed_locks[locking.LEVEL_NODE] = nodes
1746 def CheckPrereq(self):
1747 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1748 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1750 group_nodes = set(self.group_info.members)
1751 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1754 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1756 unlocked_instances = \
1757 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1760 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1761 utils.CommaJoin(unlocked_nodes))
1763 if unlocked_instances:
1764 raise errors.OpPrereqError("Missing lock for instances: %s" %
1765 utils.CommaJoin(unlocked_instances))
1767 self.all_node_info = self.cfg.GetAllNodesInfo()
1768 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1770 self.my_node_names = utils.NiceSort(group_nodes)
1771 self.my_inst_names = utils.NiceSort(group_instances)
1773 self.my_node_info = dict((name, self.all_node_info[name])
1774 for name in self.my_node_names)
1776 self.my_inst_info = dict((name, self.all_inst_info[name])
1777 for name in self.my_inst_names)
1779 # We detect here the nodes that will need the extra RPC calls for verifying
1780 # split LV volumes; they should be locked.
1781 extra_lv_nodes = set()
1783 for inst in self.my_inst_info.values():
1784 if inst.disk_template in constants.DTS_INT_MIRROR:
1785 group = self.my_node_info[inst.primary_node].group
1786 for nname in inst.secondary_nodes:
1787 if self.all_node_info[nname].group != group:
1788 extra_lv_nodes.add(nname)
1790 unlocked_lv_nodes = \
1791 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1793 if unlocked_lv_nodes:
1794 raise errors.OpPrereqError("these nodes could be locked: %s" %
1795 utils.CommaJoin(unlocked_lv_nodes))
1796 self.extra_lv_nodes = list(extra_lv_nodes)
1798 def _VerifyNode(self, ninfo, nresult):
1799 """Perform some basic validation on data returned from a node.
1801 - check the result data structure is well formed and has all the
1803 - check ganeti version
1805 @type ninfo: L{objects.Node}
1806 @param ninfo: the node to check
1807 @param nresult: the results from the node
1809 @return: whether overall this call was successful (and we can expect
1810 reasonable values in the respose)
1814 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1816 # main result, nresult should be a non-empty dict
1817 test = not nresult or not isinstance(nresult, dict)
1818 _ErrorIf(test, constants.CV_ENODERPC, node,
1819 "unable to verify node: no data returned")
1823 # compares ganeti version
1824 local_version = constants.PROTOCOL_VERSION
1825 remote_version = nresult.get("version", None)
1826 test = not (remote_version and
1827 isinstance(remote_version, (list, tuple)) and
1828 len(remote_version) == 2)
1829 _ErrorIf(test, constants.CV_ENODERPC, node,
1830 "connection to node returned invalid data")
1834 test = local_version != remote_version[0]
1835 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1836 "incompatible protocol versions: master %s,"
1837 " node %s", local_version, remote_version[0])
1841 # node seems compatible, we can actually try to look into its results
1843 # full package version
1844 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1845 constants.CV_ENODEVERSION, node,
1846 "software version mismatch: master %s, node %s",
1847 constants.RELEASE_VERSION, remote_version[1],
1848 code=self.ETYPE_WARNING)
1850 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1851 if ninfo.vm_capable and isinstance(hyp_result, dict):
1852 for hv_name, hv_result in hyp_result.iteritems():
1853 test = hv_result is not None
1854 _ErrorIf(test, constants.CV_ENODEHV, node,
1855 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1857 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1858 if ninfo.vm_capable and isinstance(hvp_result, list):
1859 for item, hv_name, hv_result in hvp_result:
1860 _ErrorIf(True, constants.CV_ENODEHV, node,
1861 "hypervisor %s parameter verify failure (source %s): %s",
1862 hv_name, item, hv_result)
1864 test = nresult.get(constants.NV_NODESETUP,
1865 ["Missing NODESETUP results"])
1866 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1871 def _VerifyNodeTime(self, ninfo, nresult,
1872 nvinfo_starttime, nvinfo_endtime):
1873 """Check the node time.
1875 @type ninfo: L{objects.Node}
1876 @param ninfo: the node to check
1877 @param nresult: the remote results for the node
1878 @param nvinfo_starttime: the start time of the RPC call
1879 @param nvinfo_endtime: the end time of the RPC call
1883 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1885 ntime = nresult.get(constants.NV_TIME, None)
1887 ntime_merged = utils.MergeTime(ntime)
1888 except (ValueError, TypeError):
1889 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1892 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1893 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1894 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1895 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1899 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1900 "Node time diverges by at least %s from master node time",
1903 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1904 """Check the node LVM results.
1906 @type ninfo: L{objects.Node}
1907 @param ninfo: the node to check
1908 @param nresult: the remote results for the node
1909 @param vg_name: the configured VG name
1916 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1918 # checks vg existence and size > 20G
1919 vglist = nresult.get(constants.NV_VGLIST, None)
1921 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1923 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1924 constants.MIN_VG_SIZE)
1925 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1928 pvlist = nresult.get(constants.NV_PVLIST, None)
1929 test = pvlist is None
1930 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1932 # check that ':' is not present in PV names, since it's a
1933 # special character for lvcreate (denotes the range of PEs to
1935 for _, pvname, owner_vg in pvlist:
1936 test = ":" in pvname
1937 _ErrorIf(test, constants.CV_ENODELVM, node,
1938 "Invalid character ':' in PV '%s' of VG '%s'",
1941 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1942 """Check the node bridges.
1944 @type ninfo: L{objects.Node}
1945 @param ninfo: the node to check
1946 @param nresult: the remote results for the node
1947 @param bridges: the expected list of bridges
1954 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1956 missing = nresult.get(constants.NV_BRIDGES, None)
1957 test = not isinstance(missing, list)
1958 _ErrorIf(test, constants.CV_ENODENET, node,
1959 "did not return valid bridge information")
1961 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1962 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1964 def _VerifyNodeUserScripts(self, ninfo, nresult):
1965 """Check the results of user scripts presence and executability on the node
1967 @type ninfo: L{objects.Node}
1968 @param ninfo: the node to check
1969 @param nresult: the remote results for the node
1974 test = not constants.NV_USERSCRIPTS in nresult
1975 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1976 "did not return user scripts information")
1978 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1980 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1981 "user scripts not present or not executable: %s" %
1982 utils.CommaJoin(sorted(broken_scripts)))
1984 def _VerifyNodeNetwork(self, ninfo, nresult):
1985 """Check the node network connectivity results.
1987 @type ninfo: L{objects.Node}
1988 @param ninfo: the node to check
1989 @param nresult: the remote results for the node
1993 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1995 test = constants.NV_NODELIST not in nresult
1996 _ErrorIf(test, constants.CV_ENODESSH, node,
1997 "node hasn't returned node ssh connectivity data")
1999 if nresult[constants.NV_NODELIST]:
2000 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2001 _ErrorIf(True, constants.CV_ENODESSH, node,
2002 "ssh communication with node '%s': %s", a_node, a_msg)
2004 test = constants.NV_NODENETTEST not in nresult
2005 _ErrorIf(test, constants.CV_ENODENET, node,
2006 "node hasn't returned node tcp connectivity data")
2008 if nresult[constants.NV_NODENETTEST]:
2009 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2011 _ErrorIf(True, constants.CV_ENODENET, node,
2012 "tcp communication with node '%s': %s",
2013 anode, nresult[constants.NV_NODENETTEST][anode])
2015 test = constants.NV_MASTERIP not in nresult
2016 _ErrorIf(test, constants.CV_ENODENET, node,
2017 "node hasn't returned node master IP reachability data")
2019 if not nresult[constants.NV_MASTERIP]:
2020 if node == self.master_node:
2021 msg = "the master node cannot reach the master IP (not configured?)"
2023 msg = "cannot reach the master IP"
2024 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2026 def _VerifyInstance(self, instance, instanceconfig, node_image,
2028 """Verify an instance.
2030 This function checks to see if the required block devices are
2031 available on the instance's node.
2034 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2035 node_current = instanceconfig.primary_node
2037 node_vol_should = {}
2038 instanceconfig.MapLVsByNode(node_vol_should)
2040 for node in node_vol_should:
2041 n_img = node_image[node]
2042 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2043 # ignore missing volumes on offline or broken nodes
2045 for volume in node_vol_should[node]:
2046 test = volume not in n_img.volumes
2047 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2048 "volume %s missing on node %s", volume, node)
2050 if instanceconfig.admin_state == constants.ADMINST_UP:
2051 pri_img = node_image[node_current]
2052 test = instance not in pri_img.instances and not pri_img.offline
2053 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2054 "instance not running on its primary node %s",
2057 diskdata = [(nname, success, status, idx)
2058 for (nname, disks) in diskstatus.items()
2059 for idx, (success, status) in enumerate(disks)]
2061 for nname, success, bdev_status, idx in diskdata:
2062 # the 'ghost node' construction in Exec() ensures that we have a
2064 snode = node_image[nname]
2065 bad_snode = snode.ghost or snode.offline
2066 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2067 not success and not bad_snode,
2068 constants.CV_EINSTANCEFAULTYDISK, instance,
2069 "couldn't retrieve status for disk/%s on %s: %s",
2070 idx, nname, bdev_status)
2071 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2072 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2073 constants.CV_EINSTANCEFAULTYDISK, instance,
2074 "disk/%s on %s is faulty", idx, nname)
2076 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2077 """Verify if there are any unknown volumes in the cluster.
2079 The .os, .swap and backup volumes are ignored. All other volumes are
2080 reported as unknown.
2082 @type reserved: L{ganeti.utils.FieldSet}
2083 @param reserved: a FieldSet of reserved volume names
2086 for node, n_img in node_image.items():
2087 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2088 # skip non-healthy nodes
2090 for volume in n_img.volumes:
2091 test = ((node not in node_vol_should or
2092 volume not in node_vol_should[node]) and
2093 not reserved.Matches(volume))
2094 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2095 "volume %s is unknown", volume)
2097 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2098 """Verify N+1 Memory Resilience.
2100 Check that if one single node dies we can still start all the
2101 instances it was primary for.
2104 cluster_info = self.cfg.GetClusterInfo()
2105 for node, n_img in node_image.items():
2106 # This code checks that every node which is now listed as
2107 # secondary has enough memory to host all instances it is
2108 # supposed to should a single other node in the cluster fail.
2109 # FIXME: not ready for failover to an arbitrary node
2110 # FIXME: does not support file-backed instances
2111 # WARNING: we currently take into account down instances as well
2112 # as up ones, considering that even if they're down someone
2113 # might want to start them even in the event of a node failure.
2115 # we're skipping offline nodes from the N+1 warning, since
2116 # most likely we don't have good memory infromation from them;
2117 # we already list instances living on such nodes, and that's
2120 for prinode, instances in n_img.sbp.items():
2122 for instance in instances:
2123 bep = cluster_info.FillBE(instance_cfg[instance])
2124 if bep[constants.BE_AUTO_BALANCE]:
2125 needed_mem += bep[constants.BE_MEMORY]
2126 test = n_img.mfree < needed_mem
2127 self._ErrorIf(test, constants.CV_ENODEN1, node,
2128 "not enough memory to accomodate instance failovers"
2129 " should node %s fail (%dMiB needed, %dMiB available)",
2130 prinode, needed_mem, n_img.mfree)
2133 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2134 (files_all, files_opt, files_mc, files_vm)):
2135 """Verifies file checksums collected from all nodes.
2137 @param errorif: Callback for reporting errors
2138 @param nodeinfo: List of L{objects.Node} objects
2139 @param master_node: Name of master node
2140 @param all_nvinfo: RPC results
2143 # Define functions determining which nodes to consider for a file
2146 (files_mc, lambda node: (node.master_candidate or
2147 node.name == master_node)),
2148 (files_vm, lambda node: node.vm_capable),
2151 # Build mapping from filename to list of nodes which should have the file
2153 for (files, fn) in files2nodefn:
2155 filenodes = nodeinfo
2157 filenodes = filter(fn, nodeinfo)
2158 nodefiles.update((filename,
2159 frozenset(map(operator.attrgetter("name"), filenodes)))
2160 for filename in files)
2162 assert set(nodefiles) == (files_all | files_mc | files_vm)
2164 fileinfo = dict((filename, {}) for filename in nodefiles)
2165 ignore_nodes = set()
2167 for node in nodeinfo:
2169 ignore_nodes.add(node.name)
2172 nresult = all_nvinfo[node.name]
2174 if nresult.fail_msg or not nresult.payload:
2177 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2179 test = not (node_files and isinstance(node_files, dict))
2180 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2181 "Node did not return file checksum data")
2183 ignore_nodes.add(node.name)
2186 # Build per-checksum mapping from filename to nodes having it
2187 for (filename, checksum) in node_files.items():
2188 assert filename in nodefiles
2189 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2191 for (filename, checksums) in fileinfo.items():
2192 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2194 # Nodes having the file
2195 with_file = frozenset(node_name
2196 for nodes in fileinfo[filename].values()
2197 for node_name in nodes) - ignore_nodes
2199 expected_nodes = nodefiles[filename] - ignore_nodes
2201 # Nodes missing file
2202 missing_file = expected_nodes - with_file
2204 if filename in files_opt:
2206 errorif(missing_file and missing_file != expected_nodes,
2207 constants.CV_ECLUSTERFILECHECK, None,
2208 "File %s is optional, but it must exist on all or no"
2209 " nodes (not found on %s)",
2210 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2212 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2213 "File %s is missing from node(s) %s", filename,
2214 utils.CommaJoin(utils.NiceSort(missing_file)))
2216 # Warn if a node has a file it shouldn't
2217 unexpected = with_file - expected_nodes
2219 constants.CV_ECLUSTERFILECHECK, None,
2220 "File %s should not exist on node(s) %s",
2221 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2223 # See if there are multiple versions of the file
2224 test = len(checksums) > 1
2226 variants = ["variant %s on %s" %
2227 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2228 for (idx, (checksum, nodes)) in
2229 enumerate(sorted(checksums.items()))]
2233 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2234 "File %s found with %s different checksums (%s)",
2235 filename, len(checksums), "; ".join(variants))
2237 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2239 """Verifies and the node DRBD status.
2241 @type ninfo: L{objects.Node}
2242 @param ninfo: the node to check
2243 @param nresult: the remote results for the node
2244 @param instanceinfo: the dict of instances
2245 @param drbd_helper: the configured DRBD usermode helper
2246 @param drbd_map: the DRBD map as returned by
2247 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2251 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2254 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2255 test = (helper_result == None)
2256 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2257 "no drbd usermode helper returned")
2259 status, payload = helper_result
2261 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2262 "drbd usermode helper check unsuccessful: %s", payload)
2263 test = status and (payload != drbd_helper)
2264 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2265 "wrong drbd usermode helper: %s", payload)
2267 # compute the DRBD minors
2269 for minor, instance in drbd_map[node].items():
2270 test = instance not in instanceinfo
2271 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2272 "ghost instance '%s' in temporary DRBD map", instance)
2273 # ghost instance should not be running, but otherwise we
2274 # don't give double warnings (both ghost instance and
2275 # unallocated minor in use)
2277 node_drbd[minor] = (instance, False)
2279 instance = instanceinfo[instance]
2280 node_drbd[minor] = (instance.name,
2281 instance.admin_state == constants.ADMINST_UP)
2283 # and now check them
2284 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2285 test = not isinstance(used_minors, (tuple, list))
2286 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2287 "cannot parse drbd status file: %s", str(used_minors))
2289 # we cannot check drbd status
2292 for minor, (iname, must_exist) in node_drbd.items():
2293 test = minor not in used_minors and must_exist
2294 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2295 "drbd minor %d of instance %s is not active", minor, iname)
2296 for minor in used_minors:
2297 test = minor not in node_drbd
2298 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2299 "unallocated drbd minor %d is in use", minor)
2301 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2302 """Builds the node OS structures.
2304 @type ninfo: L{objects.Node}
2305 @param ninfo: the node to check
2306 @param nresult: the remote results for the node
2307 @param nimg: the node image object
2311 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2313 remote_os = nresult.get(constants.NV_OSLIST, None)
2314 test = (not isinstance(remote_os, list) or
2315 not compat.all(isinstance(v, list) and len(v) == 7
2316 for v in remote_os))
2318 _ErrorIf(test, constants.CV_ENODEOS, node,
2319 "node hasn't returned valid OS data")
2328 for (name, os_path, status, diagnose,
2329 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2331 if name not in os_dict:
2334 # parameters is a list of lists instead of list of tuples due to
2335 # JSON lacking a real tuple type, fix it:
2336 parameters = [tuple(v) for v in parameters]
2337 os_dict[name].append((os_path, status, diagnose,
2338 set(variants), set(parameters), set(api_ver)))
2340 nimg.oslist = os_dict
2342 def _VerifyNodeOS(self, ninfo, nimg, base):
2343 """Verifies the node OS list.
2345 @type ninfo: L{objects.Node}
2346 @param ninfo: the node to check
2347 @param nimg: the node image object
2348 @param base: the 'template' node we match against (e.g. from the master)
2352 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2354 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2356 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2357 for os_name, os_data in nimg.oslist.items():
2358 assert os_data, "Empty OS status for OS %s?!" % os_name
2359 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2360 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2361 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2362 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2363 "OS '%s' has multiple entries (first one shadows the rest): %s",
2364 os_name, utils.CommaJoin([v[0] for v in os_data]))
2365 # comparisons with the 'base' image
2366 test = os_name not in base.oslist
2367 _ErrorIf(test, constants.CV_ENODEOS, node,
2368 "Extra OS %s not present on reference node (%s)",
2372 assert base.oslist[os_name], "Base node has empty OS status?"
2373 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2375 # base OS is invalid, skipping
2377 for kind, a, b in [("API version", f_api, b_api),
2378 ("variants list", f_var, b_var),
2379 ("parameters", beautify_params(f_param),
2380 beautify_params(b_param))]:
2381 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2382 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2383 kind, os_name, base.name,
2384 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2386 # check any missing OSes
2387 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2388 _ErrorIf(missing, constants.CV_ENODEOS, node,
2389 "OSes present on reference node %s but missing on this node: %s",
2390 base.name, utils.CommaJoin(missing))
2392 def _VerifyOob(self, ninfo, nresult):
2393 """Verifies out of band functionality of a node.
2395 @type ninfo: L{objects.Node}
2396 @param ninfo: the node to check
2397 @param nresult: the remote results for the node
2401 # We just have to verify the paths on master and/or master candidates
2402 # as the oob helper is invoked on the master
2403 if ((ninfo.master_candidate or ninfo.master_capable) and
2404 constants.NV_OOB_PATHS in nresult):
2405 for path_result in nresult[constants.NV_OOB_PATHS]:
2406 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2408 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2409 """Verifies and updates the node volume data.
2411 This function will update a L{NodeImage}'s internal structures
2412 with data from the remote call.
2414 @type ninfo: L{objects.Node}
2415 @param ninfo: the node to check
2416 @param nresult: the remote results for the node
2417 @param nimg: the node image object
2418 @param vg_name: the configured VG name
2422 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2424 nimg.lvm_fail = True
2425 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2428 elif isinstance(lvdata, basestring):
2429 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2430 utils.SafeEncode(lvdata))
2431 elif not isinstance(lvdata, dict):
2432 _ErrorIf(True, constants.CV_ENODELVM, node,
2433 "rpc call to node failed (lvlist)")
2435 nimg.volumes = lvdata
2436 nimg.lvm_fail = False
2438 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2439 """Verifies and updates the node instance list.
2441 If the listing was successful, then updates this node's instance
2442 list. Otherwise, it marks the RPC call as failed for the instance
2445 @type ninfo: L{objects.Node}
2446 @param ninfo: the node to check
2447 @param nresult: the remote results for the node
2448 @param nimg: the node image object
2451 idata = nresult.get(constants.NV_INSTANCELIST, None)
2452 test = not isinstance(idata, list)
2453 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2454 "rpc call to node failed (instancelist): %s",
2455 utils.SafeEncode(str(idata)))
2457 nimg.hyp_fail = True
2459 nimg.instances = idata
2461 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2462 """Verifies and computes a node information map
2464 @type ninfo: L{objects.Node}
2465 @param ninfo: the node to check
2466 @param nresult: the remote results for the node
2467 @param nimg: the node image object
2468 @param vg_name: the configured VG name
2472 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2474 # try to read free memory (from the hypervisor)
2475 hv_info = nresult.get(constants.NV_HVINFO, None)
2476 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2477 _ErrorIf(test, constants.CV_ENODEHV, node,
2478 "rpc call to node failed (hvinfo)")
2481 nimg.mfree = int(hv_info["memory_free"])
2482 except (ValueError, TypeError):
2483 _ErrorIf(True, constants.CV_ENODERPC, node,
2484 "node returned invalid nodeinfo, check hypervisor")
2486 # FIXME: devise a free space model for file based instances as well
2487 if vg_name is not None:
2488 test = (constants.NV_VGLIST not in nresult or
2489 vg_name not in nresult[constants.NV_VGLIST])
2490 _ErrorIf(test, constants.CV_ENODELVM, node,
2491 "node didn't return data for the volume group '%s'"
2492 " - it is either missing or broken", vg_name)
2495 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496 except (ValueError, TypeError):
2497 _ErrorIf(True, constants.CV_ENODERPC, node,
2498 "node returned invalid LVM info, check LVM status")
2500 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501 """Gets per-disk status information for all instances.
2503 @type nodelist: list of strings
2504 @param nodelist: Node names
2505 @type node_image: dict of (name, L{objects.Node})
2506 @param node_image: Node objects
2507 @type instanceinfo: dict of (name, L{objects.Instance})
2508 @param instanceinfo: Instance objects
2509 @rtype: {instance: {node: [(succes, payload)]}}
2510 @return: a dictionary of per-instance dictionaries with nodes as
2511 keys and disk information as values; the disk information is a
2512 list of tuples (success, payload)
2515 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2518 node_disks_devonly = {}
2519 diskless_instances = set()
2520 diskless = constants.DT_DISKLESS
2522 for nname in nodelist:
2523 node_instances = list(itertools.chain(node_image[nname].pinst,
2524 node_image[nname].sinst))
2525 diskless_instances.update(inst for inst in node_instances
2526 if instanceinfo[inst].disk_template == diskless)
2527 disks = [(inst, disk)
2528 for inst in node_instances
2529 for disk in instanceinfo[inst].disks]
2532 # No need to collect data
2535 node_disks[nname] = disks
2537 # Creating copies as SetDiskID below will modify the objects and that can
2538 # lead to incorrect data returned from nodes
2539 devonly = [dev.Copy() for (_, dev) in disks]
2542 self.cfg.SetDiskID(dev, nname)
2544 node_disks_devonly[nname] = devonly
2546 assert len(node_disks) == len(node_disks_devonly)
2548 # Collect data from all nodes with disks
2549 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2552 assert len(result) == len(node_disks)
2556 for (nname, nres) in result.items():
2557 disks = node_disks[nname]
2560 # No data from this node
2561 data = len(disks) * [(False, "node offline")]
2564 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2565 "while getting disk information: %s", msg)
2567 # No data from this node
2568 data = len(disks) * [(False, msg)]
2571 for idx, i in enumerate(nres.payload):
2572 if isinstance(i, (tuple, list)) and len(i) == 2:
2575 logging.warning("Invalid result from node %s, entry %d: %s",
2577 data.append((False, "Invalid result from the remote node"))
2579 for ((inst, _), status) in zip(disks, data):
2580 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2582 # Add empty entries for diskless instances.
2583 for inst in diskless_instances:
2584 assert inst not in instdisk
2587 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589 compat.all(isinstance(s, (tuple, list)) and
2590 len(s) == 2 for s in statuses)
2591 for inst, nnames in instdisk.items()
2592 for nname, statuses in nnames.items())
2593 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2598 def _SshNodeSelector(group_uuid, all_nodes):
2599 """Create endless iterators for all potential SSH check hosts.
2602 nodes = [node for node in all_nodes
2603 if (node.group != group_uuid and
2605 keyfunc = operator.attrgetter("group")
2607 return map(itertools.cycle,
2608 [sorted(map(operator.attrgetter("name"), names))
2609 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2613 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614 """Choose which nodes should talk to which other nodes.
2616 We will make nodes contact all nodes in their group, and one node from
2619 @warning: This algorithm has a known issue if one node group is much
2620 smaller than others (e.g. just one node). In such a case all other
2621 nodes will talk to the single node.
2624 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2627 return (online_nodes,
2628 dict((name, sorted([i.next() for i in sel]))
2629 for name in online_nodes))
2631 def BuildHooksEnv(self):
2634 Cluster-Verify hooks just ran in the post phase and their failure makes
2635 the output be logged in the verify output and the verification to fail.
2639 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2642 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643 for node in self.my_node_info.values())
2647 def BuildHooksNodes(self):
2648 """Build hooks nodes.
2651 return ([], self.my_node_names)
2653 def Exec(self, feedback_fn):
2654 """Verify integrity of the node group, performing various test on nodes.
2657 # This method has too many local variables. pylint: disable=R0914
2658 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2660 if not self.my_node_names:
2662 feedback_fn("* Empty node group, skipping verification")
2666 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667 verbose = self.op.verbose
2668 self._feedback_fn = feedback_fn
2670 vg_name = self.cfg.GetVGName()
2671 drbd_helper = self.cfg.GetDRBDHelper()
2672 cluster = self.cfg.GetClusterInfo()
2673 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674 hypervisors = cluster.enabled_hypervisors
2675 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2677 i_non_redundant = [] # Non redundant instances
2678 i_non_a_balanced = [] # Non auto-balanced instances
2679 i_offline = 0 # Count of offline instances
2680 n_offline = 0 # Count of offline nodes
2681 n_drained = 0 # Count of nodes being drained
2682 node_vol_should = {}
2684 # FIXME: verify OS list
2687 filemap = _ComputeAncillaryFiles(cluster, False)
2689 # do local checksums
2690 master_node = self.master_node = self.cfg.GetMasterNode()
2691 master_ip = self.cfg.GetMasterIP()
2693 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2696 if self.cfg.GetUseExternalMipScript():
2697 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2699 node_verify_param = {
2700 constants.NV_FILELIST:
2701 utils.UniqueSequence(filename
2702 for files in filemap
2703 for filename in files),
2704 constants.NV_NODELIST:
2705 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2706 self.all_node_info.values()),
2707 constants.NV_HYPERVISOR: hypervisors,
2708 constants.NV_HVPARAMS:
2709 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2710 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2711 for node in node_data_list
2712 if not node.offline],
2713 constants.NV_INSTANCELIST: hypervisors,
2714 constants.NV_VERSION: None,
2715 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2716 constants.NV_NODESETUP: None,
2717 constants.NV_TIME: None,
2718 constants.NV_MASTERIP: (master_node, master_ip),
2719 constants.NV_OSLIST: None,
2720 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2721 constants.NV_USERSCRIPTS: user_scripts,
2724 if vg_name is not None:
2725 node_verify_param[constants.NV_VGLIST] = None
2726 node_verify_param[constants.NV_LVLIST] = vg_name
2727 node_verify_param[constants.NV_PVLIST] = [vg_name]
2728 node_verify_param[constants.NV_DRBDLIST] = None
2731 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2734 # FIXME: this needs to be changed per node-group, not cluster-wide
2736 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2737 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2738 bridges.add(default_nicpp[constants.NIC_LINK])
2739 for instance in self.my_inst_info.values():
2740 for nic in instance.nics:
2741 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2742 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2743 bridges.add(full_nic[constants.NIC_LINK])
2746 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2748 # Build our expected cluster state
2749 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2751 vm_capable=node.vm_capable))
2752 for node in node_data_list)
2756 for node in self.all_node_info.values():
2757 path = _SupportsOob(self.cfg, node)
2758 if path and path not in oob_paths:
2759 oob_paths.append(path)
2762 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2764 for instance in self.my_inst_names:
2765 inst_config = self.my_inst_info[instance]
2767 for nname in inst_config.all_nodes:
2768 if nname not in node_image:
2769 gnode = self.NodeImage(name=nname)
2770 gnode.ghost = (nname not in self.all_node_info)
2771 node_image[nname] = gnode
2773 inst_config.MapLVsByNode(node_vol_should)
2775 pnode = inst_config.primary_node
2776 node_image[pnode].pinst.append(instance)
2778 for snode in inst_config.secondary_nodes:
2779 nimg = node_image[snode]
2780 nimg.sinst.append(instance)
2781 if pnode not in nimg.sbp:
2782 nimg.sbp[pnode] = []
2783 nimg.sbp[pnode].append(instance)
2785 # At this point, we have the in-memory data structures complete,
2786 # except for the runtime information, which we'll gather next
2788 # Due to the way our RPC system works, exact response times cannot be
2789 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2790 # time before and after executing the request, we can at least have a time
2792 nvinfo_starttime = time.time()
2793 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2795 self.cfg.GetClusterName())
2796 nvinfo_endtime = time.time()
2798 if self.extra_lv_nodes and vg_name is not None:
2800 self.rpc.call_node_verify(self.extra_lv_nodes,
2801 {constants.NV_LVLIST: vg_name},
2802 self.cfg.GetClusterName())
2804 extra_lv_nvinfo = {}
2806 all_drbd_map = self.cfg.ComputeDRBDMap()
2808 feedback_fn("* Gathering disk information (%s nodes)" %
2809 len(self.my_node_names))
2810 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2813 feedback_fn("* Verifying configuration file consistency")
2815 # If not all nodes are being checked, we need to make sure the master node
2816 # and a non-checked vm_capable node are in the list.
2817 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2819 vf_nvinfo = all_nvinfo.copy()
2820 vf_node_info = list(self.my_node_info.values())
2821 additional_nodes = []
2822 if master_node not in self.my_node_info:
2823 additional_nodes.append(master_node)
2824 vf_node_info.append(self.all_node_info[master_node])
2825 # Add the first vm_capable node we find which is not included
2826 for node in absent_nodes:
2827 nodeinfo = self.all_node_info[node]
2828 if nodeinfo.vm_capable and not nodeinfo.offline:
2829 additional_nodes.append(node)
2830 vf_node_info.append(self.all_node_info[node])
2832 key = constants.NV_FILELIST
2833 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2834 {key: node_verify_param[key]},
2835 self.cfg.GetClusterName()))
2837 vf_nvinfo = all_nvinfo
2838 vf_node_info = self.my_node_info.values()
2840 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2842 feedback_fn("* Verifying node status")
2846 for node_i in node_data_list:
2848 nimg = node_image[node]
2852 feedback_fn("* Skipping offline node %s" % (node,))
2856 if node == master_node:
2858 elif node_i.master_candidate:
2859 ntype = "master candidate"
2860 elif node_i.drained:
2866 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2868 msg = all_nvinfo[node].fail_msg
2869 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2872 nimg.rpc_fail = True
2875 nresult = all_nvinfo[node].payload
2877 nimg.call_ok = self._VerifyNode(node_i, nresult)
2878 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2879 self._VerifyNodeNetwork(node_i, nresult)
2880 self._VerifyNodeUserScripts(node_i, nresult)
2881 self._VerifyOob(node_i, nresult)
2884 self._VerifyNodeLVM(node_i, nresult, vg_name)
2885 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2888 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2889 self._UpdateNodeInstances(node_i, nresult, nimg)
2890 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2891 self._UpdateNodeOS(node_i, nresult, nimg)
2893 if not nimg.os_fail:
2894 if refos_img is None:
2896 self._VerifyNodeOS(node_i, nimg, refos_img)
2897 self._VerifyNodeBridges(node_i, nresult, bridges)
2899 # Check whether all running instancies are primary for the node. (This
2900 # can no longer be done from _VerifyInstance below, since some of the
2901 # wrong instances could be from other node groups.)
2902 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2904 for inst in non_primary_inst:
2905 # FIXME: investigate best way to handle offline insts
2906 if inst.admin_state == constants.ADMINST_OFFLINE:
2908 feedback_fn("* Skipping offline instance %s" % inst.name)
2911 test = inst in self.all_inst_info
2912 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2913 "instance should not run on node %s", node_i.name)
2914 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2915 "node is running unknown instance %s", inst)
2917 for node, result in extra_lv_nvinfo.items():
2918 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2919 node_image[node], vg_name)
2921 feedback_fn("* Verifying instance status")
2922 for instance in self.my_inst_names:
2924 feedback_fn("* Verifying instance %s" % instance)
2925 inst_config = self.my_inst_info[instance]
2926 self._VerifyInstance(instance, inst_config, node_image,
2928 inst_nodes_offline = []
2930 pnode = inst_config.primary_node
2931 pnode_img = node_image[pnode]
2932 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2933 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2934 " primary node failed", instance)
2936 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2938 constants.CV_EINSTANCEBADNODE, instance,
2939 "instance is marked as running and lives on offline node %s",
2940 inst_config.primary_node)
2942 # If the instance is non-redundant we cannot survive losing its primary
2943 # node, so we are not N+1 compliant. On the other hand we have no disk
2944 # templates with more than one secondary so that situation is not well
2946 # FIXME: does not support file-backed instances
2947 if not inst_config.secondary_nodes:
2948 i_non_redundant.append(instance)
2950 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2951 constants.CV_EINSTANCELAYOUT,
2952 instance, "instance has multiple secondary nodes: %s",
2953 utils.CommaJoin(inst_config.secondary_nodes),
2954 code=self.ETYPE_WARNING)
2956 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2957 pnode = inst_config.primary_node
2958 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2959 instance_groups = {}
2961 for node in instance_nodes:
2962 instance_groups.setdefault(self.all_node_info[node].group,
2966 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2967 # Sort so that we always list the primary node first.
2968 for group, nodes in sorted(instance_groups.items(),
2969 key=lambda (_, nodes): pnode in nodes,
2972 self._ErrorIf(len(instance_groups) > 1,
2973 constants.CV_EINSTANCESPLITGROUPS,
2974 instance, "instance has primary and secondary nodes in"
2975 " different groups: %s", utils.CommaJoin(pretty_list),
2976 code=self.ETYPE_WARNING)
2978 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2979 i_non_a_balanced.append(instance)
2981 for snode in inst_config.secondary_nodes:
2982 s_img = node_image[snode]
2983 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2984 snode, "instance %s, connection to secondary node failed",
2988 inst_nodes_offline.append(snode)
2990 # warn that the instance lives on offline nodes
2991 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2992 "instance has offline secondary node(s) %s",
2993 utils.CommaJoin(inst_nodes_offline))
2994 # ... or ghost/non-vm_capable nodes
2995 for node in inst_config.all_nodes:
2996 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2997 instance, "instance lives on ghost node %s", node)
2998 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2999 instance, "instance lives on non-vm_capable node %s", node)
3001 feedback_fn("* Verifying orphan volumes")
3002 reserved = utils.FieldSet(*cluster.reserved_lvs)
3004 # We will get spurious "unknown volume" warnings if any node of this group
3005 # is secondary for an instance whose primary is in another group. To avoid
3006 # them, we find these instances and add their volumes to node_vol_should.
3007 for inst in self.all_inst_info.values():
3008 for secondary in inst.secondary_nodes:
3009 if (secondary in self.my_node_info
3010 and inst.name not in self.my_inst_info):
3011 inst.MapLVsByNode(node_vol_should)
3014 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3016 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3017 feedback_fn("* Verifying N+1 Memory redundancy")
3018 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3020 feedback_fn("* Other Notes")
3022 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3023 % len(i_non_redundant))
3025 if i_non_a_balanced:
3026 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3027 % len(i_non_a_balanced))
3030 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3033 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3036 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3040 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3041 """Analyze the post-hooks' result
3043 This method analyses the hook result, handles it, and sends some
3044 nicely-formatted feedback back to the user.
3046 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3047 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3048 @param hooks_results: the results of the multi-node hooks rpc call
3049 @param feedback_fn: function used send feedback back to the caller
3050 @param lu_result: previous Exec result
3051 @return: the new Exec result, based on the previous result
3055 # We only really run POST phase hooks, only for non-empty groups,
3056 # and are only interested in their results
3057 if not self.my_node_names:
3060 elif phase == constants.HOOKS_PHASE_POST:
3061 # Used to change hooks' output to proper indentation
3062 feedback_fn("* Hooks Results")
3063 assert hooks_results, "invalid result from hooks"
3065 for node_name in hooks_results:
3066 res = hooks_results[node_name]
3068 test = msg and not res.offline
3069 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3070 "Communication failure in hooks execution: %s", msg)
3071 if res.offline or msg:
3072 # No need to investigate payload if node is offline or gave
3075 for script, hkr, output in res.payload:
3076 test = hkr == constants.HKR_FAIL
3077 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3078 "Script %s failed, output:", script)
3080 output = self._HOOKS_INDENT_RE.sub(" ", output)
3081 feedback_fn("%s" % output)
3087 class LUClusterVerifyDisks(NoHooksLU):
3088 """Verifies the cluster disks status.
3093 def ExpandNames(self):
3094 self.share_locks = _ShareAll()
3095 self.needed_locks = {
3096 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3099 def Exec(self, feedback_fn):
3100 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3102 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3103 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3104 for group in group_names])
3107 class LUGroupVerifyDisks(NoHooksLU):
3108 """Verifies the status of all disks in a node group.
3113 def ExpandNames(self):
3114 # Raises errors.OpPrereqError on its own if group can't be found
3115 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3117 self.share_locks = _ShareAll()
3118 self.needed_locks = {
3119 locking.LEVEL_INSTANCE: [],
3120 locking.LEVEL_NODEGROUP: [],
3121 locking.LEVEL_NODE: [],
3124 def DeclareLocks(self, level):
3125 if level == locking.LEVEL_INSTANCE:
3126 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3128 # Lock instances optimistically, needs verification once node and group
3129 # locks have been acquired
3130 self.needed_locks[locking.LEVEL_INSTANCE] = \
3131 self.cfg.GetNodeGroupInstances(self.group_uuid)
3133 elif level == locking.LEVEL_NODEGROUP:
3134 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3136 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3137 set([self.group_uuid] +
3138 # Lock all groups used by instances optimistically; this requires
3139 # going via the node before it's locked, requiring verification
3142 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3143 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3145 elif level == locking.LEVEL_NODE:
3146 # This will only lock the nodes in the group to be verified which contain
3148 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3149 self._LockInstancesNodes()
3151 # Lock all nodes in group to be verified
3152 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3153 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3154 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3156 def CheckPrereq(self):
3157 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3158 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3159 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3161 assert self.group_uuid in owned_groups
3163 # Check if locked instances are still correct
3164 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3166 # Get instance information
3167 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3169 # Check if node groups for locked instances are still correct
3170 for (instance_name, inst) in self.instances.items():
3171 assert owned_nodes.issuperset(inst.all_nodes), \
3172 "Instance %s's nodes changed while we kept the lock" % instance_name
3174 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3177 assert self.group_uuid in inst_groups, \
3178 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3180 def Exec(self, feedback_fn):
3181 """Verify integrity of cluster disks.
3183 @rtype: tuple of three items
3184 @return: a tuple of (dict of node-to-node_error, list of instances
3185 which need activate-disks, dict of instance: (node, volume) for
3190 res_instances = set()
3193 nv_dict = _MapInstanceDisksToNodes([inst
3194 for inst in self.instances.values()
3195 if inst.admin_state == constants.ADMINST_UP])
3198 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3199 set(self.cfg.GetVmCapableNodeList()))
3201 node_lvs = self.rpc.call_lv_list(nodes, [])
3203 for (node, node_res) in node_lvs.items():
3204 if node_res.offline:
3207 msg = node_res.fail_msg
3209 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3210 res_nodes[node] = msg
3213 for lv_name, (_, _, lv_online) in node_res.payload.items():
3214 inst = nv_dict.pop((node, lv_name), None)
3215 if not (lv_online or inst is None):
3216 res_instances.add(inst)
3218 # any leftover items in nv_dict are missing LVs, let's arrange the data
3220 for key, inst in nv_dict.iteritems():
3221 res_missing.setdefault(inst, []).append(list(key))
3223 return (res_nodes, list(res_instances), res_missing)
3226 class LUClusterRepairDiskSizes(NoHooksLU):
3227 """Verifies the cluster disks sizes.
3232 def ExpandNames(self):
3233 if self.op.instances:
3234 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3235 self.needed_locks = {
3236 locking.LEVEL_NODE_RES: [],
3237 locking.LEVEL_INSTANCE: self.wanted_names,
3239 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3241 self.wanted_names = None
3242 self.needed_locks = {
3243 locking.LEVEL_NODE_RES: locking.ALL_SET,
3244 locking.LEVEL_INSTANCE: locking.ALL_SET,
3246 self.share_locks = _ShareAll()
3248 def DeclareLocks(self, level):
3249 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3250 self._LockInstancesNodes(primary_only=True, level=level)
3252 def CheckPrereq(self):
3253 """Check prerequisites.
3255 This only checks the optional instance list against the existing names.
3258 if self.wanted_names is None:
3259 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3261 self.wanted_instances = \
3262 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3264 def _EnsureChildSizes(self, disk):
3265 """Ensure children of the disk have the needed disk size.
3267 This is valid mainly for DRBD8 and fixes an issue where the
3268 children have smaller disk size.
3270 @param disk: an L{ganeti.objects.Disk} object
3273 if disk.dev_type == constants.LD_DRBD8:
3274 assert disk.children, "Empty children for DRBD8?"
3275 fchild = disk.children[0]
3276 mismatch = fchild.size < disk.size
3278 self.LogInfo("Child disk has size %d, parent %d, fixing",
3279 fchild.size, disk.size)
3280 fchild.size = disk.size
3282 # and we recurse on this child only, not on the metadev
3283 return self._EnsureChildSizes(fchild) or mismatch
3287 def Exec(self, feedback_fn):
3288 """Verify the size of cluster disks.
3291 # TODO: check child disks too
3292 # TODO: check differences in size between primary/secondary nodes
3294 for instance in self.wanted_instances:
3295 pnode = instance.primary_node
3296 if pnode not in per_node_disks:
3297 per_node_disks[pnode] = []
3298 for idx, disk in enumerate(instance.disks):
3299 per_node_disks[pnode].append((instance, idx, disk))
3301 assert not (frozenset(per_node_disks.keys()) -
3302 self.owned_locks(locking.LEVEL_NODE_RES)), \
3303 "Not owning correct locks"
3304 assert not self.owned_locks(locking.LEVEL_NODE)
3307 for node, dskl in per_node_disks.items():
3308 newl = [v[2].Copy() for v in dskl]
3310 self.cfg.SetDiskID(dsk, node)
3311 result = self.rpc.call_blockdev_getsize(node, newl)
3313 self.LogWarning("Failure in blockdev_getsize call to node"
3314 " %s, ignoring", node)
3316 if len(result.payload) != len(dskl):
3317 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3318 " result.payload=%s", node, len(dskl), result.payload)
3319 self.LogWarning("Invalid result from node %s, ignoring node results",
3322 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3324 self.LogWarning("Disk %d of instance %s did not return size"
3325 " information, ignoring", idx, instance.name)
3327 if not isinstance(size, (int, long)):
3328 self.LogWarning("Disk %d of instance %s did not return valid"
3329 " size information, ignoring", idx, instance.name)
3332 if size != disk.size:
3333 self.LogInfo("Disk %d of instance %s has mismatched size,"
3334 " correcting: recorded %d, actual %d", idx,
3335 instance.name, disk.size, size)
3337 self.cfg.Update(instance, feedback_fn)
3338 changed.append((instance.name, idx, size))
3339 if self._EnsureChildSizes(disk):
3340 self.cfg.Update(instance, feedback_fn)
3341 changed.append((instance.name, idx, disk.size))
3345 class LUClusterRename(LogicalUnit):
3346 """Rename the cluster.
3349 HPATH = "cluster-rename"
3350 HTYPE = constants.HTYPE_CLUSTER
3352 def BuildHooksEnv(self):
3357 "OP_TARGET": self.cfg.GetClusterName(),
3358 "NEW_NAME": self.op.name,
3361 def BuildHooksNodes(self):
3362 """Build hooks nodes.
3365 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3367 def CheckPrereq(self):
3368 """Verify that the passed name is a valid one.
3371 hostname = netutils.GetHostname(name=self.op.name,
3372 family=self.cfg.GetPrimaryIPFamily())
3374 new_name = hostname.name
3375 self.ip = new_ip = hostname.ip
3376 old_name = self.cfg.GetClusterName()
3377 old_ip = self.cfg.GetMasterIP()
3378 if new_name == old_name and new_ip == old_ip:
3379 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3380 " cluster has changed",
3382 if new_ip != old_ip:
3383 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3384 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3385 " reachable on the network" %
3386 new_ip, errors.ECODE_NOTUNIQUE)
3388 self.op.name = new_name
3390 def Exec(self, feedback_fn):
3391 """Rename the cluster.
3394 clustername = self.op.name
3397 # shutdown the master IP
3398 master_params = self.cfg.GetMasterNetworkParameters()
3399 ems = self.cfg.GetUseExternalMipScript()
3400 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3402 result.Raise("Could not disable the master role")
3405 cluster = self.cfg.GetClusterInfo()
3406 cluster.cluster_name = clustername
3407 cluster.master_ip = new_ip
3408 self.cfg.Update(cluster, feedback_fn)
3410 # update the known hosts file
3411 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3412 node_list = self.cfg.GetOnlineNodeList()
3414 node_list.remove(master_params.name)
3417 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3419 master_params.ip = new_ip
3420 result = self.rpc.call_node_activate_master_ip(master_params.name,
3422 msg = result.fail_msg
3424 self.LogWarning("Could not re-enable the master role on"
3425 " the master, please restart manually: %s", msg)
3430 def _ValidateNetmask(cfg, netmask):
3431 """Checks if a netmask is valid.
3433 @type cfg: L{config.ConfigWriter}
3434 @param cfg: The cluster configuration
3436 @param netmask: the netmask to be verified
3437 @raise errors.OpPrereqError: if the validation fails
3440 ip_family = cfg.GetPrimaryIPFamily()
3442 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3443 except errors.ProgrammerError:
3444 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3446 if not ipcls.ValidateNetmask(netmask):
3447 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3451 class LUClusterSetParams(LogicalUnit):
3452 """Change the parameters of the cluster.
3455 HPATH = "cluster-modify"
3456 HTYPE = constants.HTYPE_CLUSTER
3459 def CheckArguments(self):
3463 if self.op.uid_pool:
3464 uidpool.CheckUidPool(self.op.uid_pool)
3466 if self.op.add_uids:
3467 uidpool.CheckUidPool(self.op.add_uids)
3469 if self.op.remove_uids:
3470 uidpool.CheckUidPool(self.op.remove_uids)
3472 if self.op.master_netmask is not None:
3473 _ValidateNetmask(self.cfg, self.op.master_netmask)
3475 def ExpandNames(self):
3476 # FIXME: in the future maybe other cluster params won't require checking on
3477 # all nodes to be modified.
3478 self.needed_locks = {
3479 locking.LEVEL_NODE: locking.ALL_SET,
3481 self.share_locks[locking.LEVEL_NODE] = 1
3483 def BuildHooksEnv(self):
3488 "OP_TARGET": self.cfg.GetClusterName(),
3489 "NEW_VG_NAME": self.op.vg_name,
3492 def BuildHooksNodes(self):
3493 """Build hooks nodes.
3496 mn = self.cfg.GetMasterNode()
3499 def CheckPrereq(self):
3500 """Check prerequisites.
3502 This checks whether the given params don't conflict and
3503 if the given volume group is valid.
3506 if self.op.vg_name is not None and not self.op.vg_name:
3507 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3508 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3509 " instances exist", errors.ECODE_INVAL)
3511 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3512 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3513 raise errors.OpPrereqError("Cannot disable drbd helper while"
3514 " drbd-based instances exist",
3517 node_list = self.owned_locks(locking.LEVEL_NODE)
3519 # if vg_name not None, checks given volume group on all nodes
3521 vglist = self.rpc.call_vg_list(node_list)
3522 for node in node_list:
3523 msg = vglist[node].fail_msg
3525 # ignoring down node
3526 self.LogWarning("Error while gathering data on node %s"
3527 " (ignoring node): %s", node, msg)
3529 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3531 constants.MIN_VG_SIZE)
3533 raise errors.OpPrereqError("Error on node '%s': %s" %
3534 (node, vgstatus), errors.ECODE_ENVIRON)
3536 if self.op.drbd_helper:
3537 # checks given drbd helper on all nodes
3538 helpers = self.rpc.call_drbd_helper(node_list)
3539 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3541 self.LogInfo("Not checking drbd helper on offline node %s", node)
3543 msg = helpers[node].fail_msg
3545 raise errors.OpPrereqError("Error checking drbd helper on node"
3546 " '%s': %s" % (node, msg),
3547 errors.ECODE_ENVIRON)
3548 node_helper = helpers[node].payload
3549 if node_helper != self.op.drbd_helper:
3550 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3551 (node, node_helper), errors.ECODE_ENVIRON)
3553 self.cluster = cluster = self.cfg.GetClusterInfo()
3554 # validate params changes
3555 if self.op.beparams:
3556 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3557 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3559 if self.op.ndparams:
3560 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3561 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3563 # TODO: we need a more general way to handle resetting
3564 # cluster-level parameters to default values
3565 if self.new_ndparams["oob_program"] == "":
3566 self.new_ndparams["oob_program"] = \
3567 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3569 if self.op.nicparams:
3570 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3571 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3572 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3575 # check all instances for consistency
3576 for instance in self.cfg.GetAllInstancesInfo().values():
3577 for nic_idx, nic in enumerate(instance.nics):
3578 params_copy = copy.deepcopy(nic.nicparams)
3579 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3581 # check parameter syntax
3583 objects.NIC.CheckParameterSyntax(params_filled)
3584 except errors.ConfigurationError, err:
3585 nic_errors.append("Instance %s, nic/%d: %s" %
3586 (instance.name, nic_idx, err))
3588 # if we're moving instances to routed, check that they have an ip
3589 target_mode = params_filled[constants.NIC_MODE]
3590 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3591 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3592 " address" % (instance.name, nic_idx))
3594 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3595 "\n".join(nic_errors))
3597 # hypervisor list/parameters
3598 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3599 if self.op.hvparams:
3600 for hv_name, hv_dict in self.op.hvparams.items():
3601 if hv_name not in self.new_hvparams:
3602 self.new_hvparams[hv_name] = hv_dict
3604 self.new_hvparams[hv_name].update(hv_dict)
3606 # os hypervisor parameters
3607 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3609 for os_name, hvs in self.op.os_hvp.items():
3610 if os_name not in self.new_os_hvp:
3611 self.new_os_hvp[os_name] = hvs
3613 for hv_name, hv_dict in hvs.items():
3614 if hv_name not in self.new_os_hvp[os_name]:
3615 self.new_os_hvp[os_name][hv_name] = hv_dict
3617 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3620 self.new_osp = objects.FillDict(cluster.osparams, {})
3621 if self.op.osparams:
3622 for os_name, osp in self.op.osparams.items():
3623 if os_name not in self.new_osp:
3624 self.new_osp[os_name] = {}
3626 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3629 if not self.new_osp[os_name]:
3630 # we removed all parameters
3631 del self.new_osp[os_name]
3633 # check the parameter validity (remote check)
3634 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3635 os_name, self.new_osp[os_name])
3637 # changes to the hypervisor list
3638 if self.op.enabled_hypervisors is not None:
3639 self.hv_list = self.op.enabled_hypervisors
3640 for hv in self.hv_list:
3641 # if the hypervisor doesn't already exist in the cluster
3642 # hvparams, we initialize it to empty, and then (in both
3643 # cases) we make sure to fill the defaults, as we might not
3644 # have a complete defaults list if the hypervisor wasn't
3646 if hv not in new_hvp:
3648 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3649 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3651 self.hv_list = cluster.enabled_hypervisors
3653 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3654 # either the enabled list has changed, or the parameters have, validate
3655 for hv_name, hv_params in self.new_hvparams.items():
3656 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3657 (self.op.enabled_hypervisors and
3658 hv_name in self.op.enabled_hypervisors)):
3659 # either this is a new hypervisor, or its parameters have changed
3660 hv_class = hypervisor.GetHypervisor(hv_name)
3661 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3662 hv_class.CheckParameterSyntax(hv_params)
3663 _CheckHVParams(self, node_list, hv_name, hv_params)
3666 # no need to check any newly-enabled hypervisors, since the
3667 # defaults have already been checked in the above code-block
3668 for os_name, os_hvp in self.new_os_hvp.items():
3669 for hv_name, hv_params in os_hvp.items():
3670 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3671 # we need to fill in the new os_hvp on top of the actual hv_p
3672 cluster_defaults = self.new_hvparams.get(hv_name, {})
3673 new_osp = objects.FillDict(cluster_defaults, hv_params)
3674 hv_class = hypervisor.GetHypervisor(hv_name)
3675 hv_class.CheckParameterSyntax(new_osp)
3676 _CheckHVParams(self, node_list, hv_name, new_osp)
3678 if self.op.default_iallocator:
3679 alloc_script = utils.FindFile(self.op.default_iallocator,
3680 constants.IALLOCATOR_SEARCH_PATH,
3682 if alloc_script is None:
3683 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3684 " specified" % self.op.default_iallocator,
3687 def Exec(self, feedback_fn):
3688 """Change the parameters of the cluster.
3691 if self.op.vg_name is not None:
3692 new_volume = self.op.vg_name
3695 if new_volume != self.cfg.GetVGName():
3696 self.cfg.SetVGName(new_volume)
3698 feedback_fn("Cluster LVM configuration already in desired"
3699 " state, not changing")
3700 if self.op.drbd_helper is not None:
3701 new_helper = self.op.drbd_helper
3704 if new_helper != self.cfg.GetDRBDHelper():
3705 self.cfg.SetDRBDHelper(new_helper)
3707 feedback_fn("Cluster DRBD helper already in desired state,"
3709 if self.op.hvparams:
3710 self.cluster.hvparams = self.new_hvparams
3712 self.cluster.os_hvp = self.new_os_hvp
3713 if self.op.enabled_hypervisors is not None:
3714 self.cluster.hvparams = self.new_hvparams
3715 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3716 if self.op.beparams:
3717 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3718 if self.op.nicparams:
3719 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3720 if self.op.osparams:
3721 self.cluster.osparams = self.new_osp
3722 if self.op.ndparams:
3723 self.cluster.ndparams = self.new_ndparams
3725 if self.op.candidate_pool_size is not None:
3726 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3727 # we need to update the pool size here, otherwise the save will fail
3728 _AdjustCandidatePool(self, [])
3730 if self.op.maintain_node_health is not None:
3731 self.cluster.maintain_node_health = self.op.maintain_node_health
3733 if self.op.prealloc_wipe_disks is not None:
3734 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3736 if self.op.add_uids is not None:
3737 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3739 if self.op.remove_uids is not None:
3740 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3742 if self.op.uid_pool is not None:
3743 self.cluster.uid_pool = self.op.uid_pool
3745 if self.op.default_iallocator is not None:
3746 self.cluster.default_iallocator = self.op.default_iallocator
3748 if self.op.reserved_lvs is not None:
3749 self.cluster.reserved_lvs = self.op.reserved_lvs
3751 if self.op.use_external_mip_script is not None:
3752 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3754 def helper_os(aname, mods, desc):
3756 lst = getattr(self.cluster, aname)
3757 for key, val in mods:
3758 if key == constants.DDM_ADD:
3760 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3763 elif key == constants.DDM_REMOVE:
3767 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3769 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3771 if self.op.hidden_os:
3772 helper_os("hidden_os", self.op.hidden_os, "hidden")
3774 if self.op.blacklisted_os:
3775 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3777 if self.op.master_netdev:
3778 master_params = self.cfg.GetMasterNetworkParameters()
3779 ems = self.cfg.GetUseExternalMipScript()
3780 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3781 self.cluster.master_netdev)
3782 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3784 result.Raise("Could not disable the master ip")
3785 feedback_fn("Changing master_netdev from %s to %s" %
3786 (master_params.netdev, self.op.master_netdev))
3787 self.cluster.master_netdev = self.op.master_netdev
3789 if self.op.master_netmask:
3790 master_params = self.cfg.GetMasterNetworkParameters()
3791 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3792 result = self.rpc.call_node_change_master_netmask(master_params.name,
3793 master_params.netmask,
3794 self.op.master_netmask,
3796 master_params.netdev)
3798 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3801 self.cluster.master_netmask = self.op.master_netmask
3803 self.cfg.Update(self.cluster, feedback_fn)
3805 if self.op.master_netdev:
3806 master_params = self.cfg.GetMasterNetworkParameters()
3807 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3808 self.op.master_netdev)
3809 ems = self.cfg.GetUseExternalMipScript()
3810 result = self.rpc.call_node_activate_master_ip(master_params.name,
3813 self.LogWarning("Could not re-enable the master ip on"
3814 " the master, please restart manually: %s",
3818 def _UploadHelper(lu, nodes, fname):
3819 """Helper for uploading a file and showing warnings.
3822 if os.path.exists(fname):
3823 result = lu.rpc.call_upload_file(nodes, fname)
3824 for to_node, to_result in result.items():
3825 msg = to_result.fail_msg
3827 msg = ("Copy of file %s to node %s failed: %s" %
3828 (fname, to_node, msg))
3829 lu.proc.LogWarning(msg)
3832 def _ComputeAncillaryFiles(cluster, redist):
3833 """Compute files external to Ganeti which need to be consistent.
3835 @type redist: boolean
3836 @param redist: Whether to include files which need to be redistributed
3839 # Compute files for all nodes
3841 constants.SSH_KNOWN_HOSTS_FILE,
3842 constants.CONFD_HMAC_KEY,
3843 constants.CLUSTER_DOMAIN_SECRET_FILE,
3844 constants.SPICE_CERT_FILE,
3845 constants.SPICE_CACERT_FILE,
3846 constants.RAPI_USERS_FILE,
3850 files_all.update(constants.ALL_CERT_FILES)
3851 files_all.update(ssconf.SimpleStore().GetFileList())
3853 # we need to ship at least the RAPI certificate
3854 files_all.add(constants.RAPI_CERT_FILE)
3856 if cluster.modify_etc_hosts:
3857 files_all.add(constants.ETC_HOSTS)
3859 # Files which are optional, these must:
3860 # - be present in one other category as well
3861 # - either exist or not exist on all nodes of that category (mc, vm all)
3863 constants.RAPI_USERS_FILE,
3866 # Files which should only be on master candidates
3870 files_mc.add(constants.CLUSTER_CONF_FILE)
3872 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3874 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3876 # Files which should only be on VM-capable nodes
3877 files_vm = set(filename
3878 for hv_name in cluster.enabled_hypervisors
3879 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3881 files_opt |= set(filename
3882 for hv_name in cluster.enabled_hypervisors
3883 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3885 # Filenames in each category must be unique
3886 all_files_set = files_all | files_mc | files_vm
3887 assert (len(all_files_set) ==
3888 sum(map(len, [files_all, files_mc, files_vm]))), \
3889 "Found file listed in more than one file list"
3891 # Optional files must be present in one other category
3892 assert all_files_set.issuperset(files_opt), \
3893 "Optional file not in a different required list"
3895 return (files_all, files_opt, files_mc, files_vm)
3898 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3899 """Distribute additional files which are part of the cluster configuration.
3901 ConfigWriter takes care of distributing the config and ssconf files, but
3902 there are more files which should be distributed to all nodes. This function
3903 makes sure those are copied.
3905 @param lu: calling logical unit
3906 @param additional_nodes: list of nodes not in the config to distribute to
3907 @type additional_vm: boolean
3908 @param additional_vm: whether the additional nodes are vm-capable or not
3911 # Gather target nodes
3912 cluster = lu.cfg.GetClusterInfo()
3913 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3915 online_nodes = lu.cfg.GetOnlineNodeList()
3916 vm_nodes = lu.cfg.GetVmCapableNodeList()
3918 if additional_nodes is not None:
3919 online_nodes.extend(additional_nodes)
3921 vm_nodes.extend(additional_nodes)
3923 # Never distribute to master node
3924 for nodelist in [online_nodes, vm_nodes]:
3925 if master_info.name in nodelist:
3926 nodelist.remove(master_info.name)
3929 (files_all, _, files_mc, files_vm) = \
3930 _ComputeAncillaryFiles(cluster, True)
3932 # Never re-distribute configuration file from here
3933 assert not (constants.CLUSTER_CONF_FILE in files_all or
3934 constants.CLUSTER_CONF_FILE in files_vm)
3935 assert not files_mc, "Master candidates not handled in this function"
3938 (online_nodes, files_all),
3939 (vm_nodes, files_vm),
3943 for (node_list, files) in filemap:
3945 _UploadHelper(lu, node_list, fname)
3948 class LUClusterRedistConf(NoHooksLU):
3949 """Force the redistribution of cluster configuration.
3951 This is a very simple LU.
3956 def ExpandNames(self):
3957 self.needed_locks = {
3958 locking.LEVEL_NODE: locking.ALL_SET,
3960 self.share_locks[locking.LEVEL_NODE] = 1
3962 def Exec(self, feedback_fn):
3963 """Redistribute the configuration.
3966 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3967 _RedistributeAncillaryFiles(self)
3970 class LUClusterActivateMasterIp(NoHooksLU):
3971 """Activate the master IP on the master node.
3974 def Exec(self, feedback_fn):
3975 """Activate the master IP.
3978 master_params = self.cfg.GetMasterNetworkParameters()
3979 ems = self.cfg.GetUseExternalMipScript()
3980 self.rpc.call_node_activate_master_ip(master_params.name,
3984 class LUClusterDeactivateMasterIp(NoHooksLU):
3985 """Deactivate the master IP on the master node.
3988 def Exec(self, feedback_fn):
3989 """Deactivate the master IP.
3992 master_params = self.cfg.GetMasterNetworkParameters()
3993 ems = self.cfg.GetUseExternalMipScript()
3994 self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3998 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3999 """Sleep and poll for an instance's disk to sync.
4002 if not instance.disks or disks is not None and not disks:
4005 disks = _ExpandCheckDisks(instance, disks)
4008 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4010 node = instance.primary_node
4013 lu.cfg.SetDiskID(dev, node)
4015 # TODO: Convert to utils.Retry
4018 degr_retries = 10 # in seconds, as we sleep 1 second each time
4022 cumul_degraded = False
4023 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4024 msg = rstats.fail_msg
4026 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4029 raise errors.RemoteError("Can't contact node %s for mirror data,"
4030 " aborting." % node)
4033 rstats = rstats.payload
4035 for i, mstat in enumerate(rstats):
4037 lu.LogWarning("Can't compute data for node %s/%s",
4038 node, disks[i].iv_name)
4041 cumul_degraded = (cumul_degraded or
4042 (mstat.is_degraded and mstat.sync_percent is None))
4043 if mstat.sync_percent is not None:
4045 if mstat.estimated_time is not None:
4046 rem_time = ("%s remaining (estimated)" %
4047 utils.FormatSeconds(mstat.estimated_time))
4048 max_time = mstat.estimated_time
4050 rem_time = "no time estimate"
4051 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4052 (disks[i].iv_name, mstat.sync_percent, rem_time))
4054 # if we're done but degraded, let's do a few small retries, to
4055 # make sure we see a stable and not transient situation; therefore
4056 # we force restart of the loop
4057 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4058 logging.info("Degraded disks found, %d retries left", degr_retries)
4066 time.sleep(min(60, max_time))
4069 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4070 return not cumul_degraded
4073 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4074 """Check that mirrors are not degraded.
4076 The ldisk parameter, if True, will change the test from the
4077 is_degraded attribute (which represents overall non-ok status for
4078 the device(s)) to the ldisk (representing the local storage status).
4081 lu.cfg.SetDiskID(dev, node)
4085 if on_primary or dev.AssembleOnSecondary():
4086 rstats = lu.rpc.call_blockdev_find(node, dev)
4087 msg = rstats.fail_msg
4089 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4091 elif not rstats.payload:
4092 lu.LogWarning("Can't find disk on node %s", node)
4096 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4098 result = result and not rstats.payload.is_degraded
4101 for child in dev.children:
4102 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4107 class LUOobCommand(NoHooksLU):
4108 """Logical unit for OOB handling.
4112 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4114 def ExpandNames(self):
4115 """Gather locks we need.
4118 if self.op.node_names:
4119 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4120 lock_names = self.op.node_names
4122 lock_names = locking.ALL_SET
4124 self.needed_locks = {
4125 locking.LEVEL_NODE: lock_names,
4128 def CheckPrereq(self):
4129 """Check prerequisites.
4132 - the node exists in the configuration
4135 Any errors are signaled by raising errors.OpPrereqError.
4139 self.master_node = self.cfg.GetMasterNode()
4141 assert self.op.power_delay >= 0.0
4143 if self.op.node_names:
4144 if (self.op.command in self._SKIP_MASTER and
4145 self.master_node in self.op.node_names):
4146 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4147 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4149 if master_oob_handler:
4150 additional_text = ("run '%s %s %s' if you want to operate on the"
4151 " master regardless") % (master_oob_handler,
4155 additional_text = "it does not support out-of-band operations"
4157 raise errors.OpPrereqError(("Operating on the master node %s is not"
4158 " allowed for %s; %s") %
4159 (self.master_node, self.op.command,
4160 additional_text), errors.ECODE_INVAL)
4162 self.op.node_names = self.cfg.GetNodeList()
4163 if self.op.command in self._SKIP_MASTER:
4164 self.op.node_names.remove(self.master_node)
4166 if self.op.command in self._SKIP_MASTER:
4167 assert self.master_node not in self.op.node_names
4169 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4171 raise errors.OpPrereqError("Node %s not found" % node_name,
4174 self.nodes.append(node)
4176 if (not self.op.ignore_status and
4177 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4178 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4179 " not marked offline") % node_name,
4182 def Exec(self, feedback_fn):
4183 """Execute OOB and return result if we expect any.
4186 master_node = self.master_node
4189 for idx, node in enumerate(utils.NiceSort(self.nodes,
4190 key=lambda node: node.name)):
4191 node_entry = [(constants.RS_NORMAL, node.name)]
4192 ret.append(node_entry)
4194 oob_program = _SupportsOob(self.cfg, node)
4197 node_entry.append((constants.RS_UNAVAIL, None))
4200 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4201 self.op.command, oob_program, node.name)
4202 result = self.rpc.call_run_oob(master_node, oob_program,
4203 self.op.command, node.name,
4207 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4208 node.name, result.fail_msg)
4209 node_entry.append((constants.RS_NODATA, None))
4212 self._CheckPayload(result)
4213 except errors.OpExecError, err:
4214 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4216 node_entry.append((constants.RS_NODATA, None))
4218 if self.op.command == constants.OOB_HEALTH:
4219 # For health we should log important events
4220 for item, status in result.payload:
4221 if status in [constants.OOB_STATUS_WARNING,
4222 constants.OOB_STATUS_CRITICAL]:
4223 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4224 item, node.name, status)
4226 if self.op.command == constants.OOB_POWER_ON:
4228 elif self.op.command == constants.OOB_POWER_OFF:
4229 node.powered = False
4230 elif self.op.command == constants.OOB_POWER_STATUS:
4231 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4232 if powered != node.powered:
4233 logging.warning(("Recorded power state (%s) of node '%s' does not"
4234 " match actual power state (%s)"), node.powered,
4237 # For configuration changing commands we should update the node
4238 if self.op.command in (constants.OOB_POWER_ON,
4239 constants.OOB_POWER_OFF):
4240 self.cfg.Update(node, feedback_fn)
4242 node_entry.append((constants.RS_NORMAL, result.payload))
4244 if (self.op.command == constants.OOB_POWER_ON and
4245 idx < len(self.nodes) - 1):
4246 time.sleep(self.op.power_delay)
4250 def _CheckPayload(self, result):
4251 """Checks if the payload is valid.
4253 @param result: RPC result
4254 @raises errors.OpExecError: If payload is not valid
4258 if self.op.command == constants.OOB_HEALTH:
4259 if not isinstance(result.payload, list):
4260 errs.append("command 'health' is expected to return a list but got %s" %
4261 type(result.payload))
4263 for item, status in result.payload:
4264 if status not in constants.OOB_STATUSES:
4265 errs.append("health item '%s' has invalid status '%s'" %
4268 if self.op.command == constants.OOB_POWER_STATUS:
4269 if not isinstance(result.payload, dict):
4270 errs.append("power-status is expected to return a dict but got %s" %
4271 type(result.payload))
4273 if self.op.command in [
4274 constants.OOB_POWER_ON,
4275 constants.OOB_POWER_OFF,
4276 constants.OOB_POWER_CYCLE,
4278 if result.payload is not None:
4279 errs.append("%s is expected to not return payload but got '%s'" %
4280 (self.op.command, result.payload))
4283 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4284 utils.CommaJoin(errs))
4287 class _OsQuery(_QueryBase):
4288 FIELDS = query.OS_FIELDS
4290 def ExpandNames(self, lu):
4291 # Lock all nodes in shared mode
4292 # Temporary removal of locks, should be reverted later
4293 # TODO: reintroduce locks when they are lighter-weight
4294 lu.needed_locks = {}
4295 #self.share_locks[locking.LEVEL_NODE] = 1
4296 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4298 # The following variables interact with _QueryBase._GetNames
4300 self.wanted = self.names
4302 self.wanted = locking.ALL_SET
4304 self.do_locking = self.use_locking
4306 def DeclareLocks(self, lu, level):
4310 def _DiagnoseByOS(rlist):
4311 """Remaps a per-node return list into an a per-os per-node dictionary
4313 @param rlist: a map with node names as keys and OS objects as values
4316 @return: a dictionary with osnames as keys and as value another
4317 map, with nodes as keys and tuples of (path, status, diagnose,
4318 variants, parameters, api_versions) as values, eg::
4320 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4321 (/srv/..., False, "invalid api")],
4322 "node2": [(/srv/..., True, "", [], [])]}
4327 # we build here the list of nodes that didn't fail the RPC (at RPC
4328 # level), so that nodes with a non-responding node daemon don't
4329 # make all OSes invalid
4330 good_nodes = [node_name for node_name in rlist
4331 if not rlist[node_name].fail_msg]
4332 for node_name, nr in rlist.items():
4333 if nr.fail_msg or not nr.payload:
4335 for (name, path, status, diagnose, variants,
4336 params, api_versions) in nr.payload:
4337 if name not in all_os:
4338 # build a list of nodes for this os containing empty lists
4339 # for each node in node_list
4341 for nname in good_nodes:
4342 all_os[name][nname] = []
4343 # convert params from [name, help] to (name, help)
4344 params = [tuple(v) for v in params]
4345 all_os[name][node_name].append((path, status, diagnose,
4346 variants, params, api_versions))
4349 def _GetQueryData(self, lu):
4350 """Computes the list of nodes and their attributes.
4353 # Locking is not used
4354 assert not (compat.any(lu.glm.is_owned(level)
4355 for level in locking.LEVELS
4356 if level != locking.LEVEL_CLUSTER) or
4357 self.do_locking or self.use_locking)
4359 valid_nodes = [node.name
4360 for node in lu.cfg.GetAllNodesInfo().values()
4361 if not node.offline and node.vm_capable]
4362 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4363 cluster = lu.cfg.GetClusterInfo()
4367 for (os_name, os_data) in pol.items():
4368 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4369 hidden=(os_name in cluster.hidden_os),
4370 blacklisted=(os_name in cluster.blacklisted_os))
4374 api_versions = set()
4376 for idx, osl in enumerate(os_data.values()):
4377 info.valid = bool(info.valid and osl and osl[0][1])
4381 (node_variants, node_params, node_api) = osl[0][3:6]
4384 variants.update(node_variants)
4385 parameters.update(node_params)
4386 api_versions.update(node_api)
4388 # Filter out inconsistent values
4389 variants.intersection_update(node_variants)
4390 parameters.intersection_update(node_params)
4391 api_versions.intersection_update(node_api)
4393 info.variants = list(variants)
4394 info.parameters = list(parameters)
4395 info.api_versions = list(api_versions)
4397 data[os_name] = info
4399 # Prepare data in requested order
4400 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4404 class LUOsDiagnose(NoHooksLU):
4405 """Logical unit for OS diagnose/query.
4411 def _BuildFilter(fields, names):
4412 """Builds a filter for querying OSes.
4415 name_filter = qlang.MakeSimpleFilter("name", names)
4417 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4418 # respective field is not requested
4419 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4420 for fname in ["hidden", "blacklisted"]
4421 if fname not in fields]
4422 if "valid" not in fields:
4423 status_filter.append([qlang.OP_TRUE, "valid"])
4426 status_filter.insert(0, qlang.OP_AND)
4428 status_filter = None
4430 if name_filter and status_filter:
4431 return [qlang.OP_AND, name_filter, status_filter]
4435 return status_filter
4437 def CheckArguments(self):
4438 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4439 self.op.output_fields, False)
4441 def ExpandNames(self):
4442 self.oq.ExpandNames(self)
4444 def Exec(self, feedback_fn):
4445 return self.oq.OldStyleQuery(self)
4448 class LUNodeRemove(LogicalUnit):
4449 """Logical unit for removing a node.
4452 HPATH = "node-remove"
4453 HTYPE = constants.HTYPE_NODE
4455 def BuildHooksEnv(self):
4458 This doesn't run on the target node in the pre phase as a failed
4459 node would then be impossible to remove.
4463 "OP_TARGET": self.op.node_name,
4464 "NODE_NAME": self.op.node_name,
4467 def BuildHooksNodes(self):
4468 """Build hooks nodes.
4471 all_nodes = self.cfg.GetNodeList()
4473 all_nodes.remove(self.op.node_name)
4475 logging.warning("Node '%s', which is about to be removed, was not found"
4476 " in the list of all nodes", self.op.node_name)
4477 return (all_nodes, all_nodes)
4479 def CheckPrereq(self):
4480 """Check prerequisites.
4483 - the node exists in the configuration
4484 - it does not have primary or secondary instances
4485 - it's not the master
4487 Any errors are signaled by raising errors.OpPrereqError.
4490 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4491 node = self.cfg.GetNodeInfo(self.op.node_name)
4492 assert node is not None
4494 masternode = self.cfg.GetMasterNode()
4495 if node.name == masternode:
4496 raise errors.OpPrereqError("Node is the master node, failover to another"
4497 " node is required", errors.ECODE_INVAL)
4499 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4500 if node.name in instance.all_nodes:
4501 raise errors.OpPrereqError("Instance %s is still running on the node,"
4502 " please remove first" % instance_name,
4504 self.op.node_name = node.name
4507 def Exec(self, feedback_fn):
4508 """Removes the node from the cluster.
4512 logging.info("Stopping the node daemon and removing configs from node %s",
4515 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4517 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4520 # Promote nodes to master candidate as needed
4521 _AdjustCandidatePool(self, exceptions=[node.name])
4522 self.context.RemoveNode(node.name)
4524 # Run post hooks on the node before it's removed
4525 _RunPostHook(self, node.name)
4527 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4528 msg = result.fail_msg
4530 self.LogWarning("Errors encountered on the remote node while leaving"
4531 " the cluster: %s", msg)
4533 # Remove node from our /etc/hosts
4534 if self.cfg.GetClusterInfo().modify_etc_hosts:
4535 master_node = self.cfg.GetMasterNode()
4536 result = self.rpc.call_etc_hosts_modify(master_node,
4537 constants.ETC_HOSTS_REMOVE,
4539 result.Raise("Can't update hosts file with new host data")
4540 _RedistributeAncillaryFiles(self)
4543 class _NodeQuery(_QueryBase):
4544 FIELDS = query.NODE_FIELDS
4546 def ExpandNames(self, lu):
4547 lu.needed_locks = {}
4548 lu.share_locks = _ShareAll()
4551 self.wanted = _GetWantedNodes(lu, self.names)
4553 self.wanted = locking.ALL_SET
4555 self.do_locking = (self.use_locking and
4556 query.NQ_LIVE in self.requested_data)
4559 # If any non-static field is requested we need to lock the nodes
4560 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4562 def DeclareLocks(self, lu, level):
4565 def _GetQueryData(self, lu):
4566 """Computes the list of nodes and their attributes.
4569 all_info = lu.cfg.GetAllNodesInfo()
4571 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4573 # Gather data as requested
4574 if query.NQ_LIVE in self.requested_data:
4575 # filter out non-vm_capable nodes
4576 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4578 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4579 lu.cfg.GetHypervisorType())
4580 live_data = dict((name, nresult.payload)
4581 for (name, nresult) in node_data.items()
4582 if not nresult.fail_msg and nresult.payload)
4586 if query.NQ_INST in self.requested_data:
4587 node_to_primary = dict([(name, set()) for name in nodenames])
4588 node_to_secondary = dict([(name, set()) for name in nodenames])
4590 inst_data = lu.cfg.GetAllInstancesInfo()
4592 for inst in inst_data.values():
4593 if inst.primary_node in node_to_primary:
4594 node_to_primary[inst.primary_node].add(inst.name)
4595 for secnode in inst.secondary_nodes:
4596 if secnode in node_to_secondary:
4597 node_to_secondary[secnode].add(inst.name)
4599 node_to_primary = None
4600 node_to_secondary = None
4602 if query.NQ_OOB in self.requested_data:
4603 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4604 for name, node in all_info.iteritems())
4608 if query.NQ_GROUP in self.requested_data:
4609 groups = lu.cfg.GetAllNodeGroupsInfo()
4613 return query.NodeQueryData([all_info[name] for name in nodenames],
4614 live_data, lu.cfg.GetMasterNode(),
4615 node_to_primary, node_to_secondary, groups,
4616 oob_support, lu.cfg.GetClusterInfo())
4619 class LUNodeQuery(NoHooksLU):
4620 """Logical unit for querying nodes.
4623 # pylint: disable=W0142
4626 def CheckArguments(self):
4627 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4628 self.op.output_fields, self.op.use_locking)
4630 def ExpandNames(self):
4631 self.nq.ExpandNames(self)
4633 def DeclareLocks(self, level):
4634 self.nq.DeclareLocks(self, level)
4636 def Exec(self, feedback_fn):
4637 return self.nq.OldStyleQuery(self)
4640 class LUNodeQueryvols(NoHooksLU):
4641 """Logical unit for getting volumes on node(s).
4645 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4646 _FIELDS_STATIC = utils.FieldSet("node")
4648 def CheckArguments(self):
4649 _CheckOutputFields(static=self._FIELDS_STATIC,
4650 dynamic=self._FIELDS_DYNAMIC,
4651 selected=self.op.output_fields)
4653 def ExpandNames(self):
4654 self.share_locks = _ShareAll()
4655 self.needed_locks = {}
4657 if not self.op.nodes:
4658 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4660 self.needed_locks[locking.LEVEL_NODE] = \
4661 _GetWantedNodes(self, self.op.nodes)
4663 def Exec(self, feedback_fn):
4664 """Computes the list of nodes and their attributes.
4667 nodenames = self.owned_locks(locking.LEVEL_NODE)
4668 volumes = self.rpc.call_node_volumes(nodenames)
4670 ilist = self.cfg.GetAllInstancesInfo()
4671 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4674 for node in nodenames:
4675 nresult = volumes[node]
4678 msg = nresult.fail_msg
4680 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4683 node_vols = sorted(nresult.payload,
4684 key=operator.itemgetter("dev"))
4686 for vol in node_vols:
4688 for field in self.op.output_fields:
4691 elif field == "phys":
4695 elif field == "name":
4697 elif field == "size":
4698 val = int(float(vol["size"]))
4699 elif field == "instance":
4700 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4702 raise errors.ParameterError(field)
4703 node_output.append(str(val))
4705 output.append(node_output)
4710 class LUNodeQueryStorage(NoHooksLU):
4711 """Logical unit for getting information on storage units on node(s).
4714 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4717 def CheckArguments(self):
4718 _CheckOutputFields(static=self._FIELDS_STATIC,
4719 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4720 selected=self.op.output_fields)
4722 def ExpandNames(self):
4723 self.share_locks = _ShareAll()
4724 self.needed_locks = {}
4727 self.needed_locks[locking.LEVEL_NODE] = \
4728 _GetWantedNodes(self, self.op.nodes)
4730 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4732 def Exec(self, feedback_fn):
4733 """Computes the list of nodes and their attributes.
4736 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4738 # Always get name to sort by
4739 if constants.SF_NAME in self.op.output_fields:
4740 fields = self.op.output_fields[:]
4742 fields = [constants.SF_NAME] + self.op.output_fields
4744 # Never ask for node or type as it's only known to the LU
4745 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4746 while extra in fields:
4747 fields.remove(extra)
4749 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4750 name_idx = field_idx[constants.SF_NAME]
4752 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4753 data = self.rpc.call_storage_list(self.nodes,
4754 self.op.storage_type, st_args,
4755 self.op.name, fields)
4759 for node in utils.NiceSort(self.nodes):
4760 nresult = data[node]
4764 msg = nresult.fail_msg
4766 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4769 rows = dict([(row[name_idx], row) for row in nresult.payload])
4771 for name in utils.NiceSort(rows.keys()):
4776 for field in self.op.output_fields:
4777 if field == constants.SF_NODE:
4779 elif field == constants.SF_TYPE:
4780 val = self.op.storage_type
4781 elif field in field_idx:
4782 val = row[field_idx[field]]
4784 raise errors.ParameterError(field)
4793 class _InstanceQuery(_QueryBase):
4794 FIELDS = query.INSTANCE_FIELDS
4796 def ExpandNames(self, lu):
4797 lu.needed_locks = {}
4798 lu.share_locks = _ShareAll()
4801 self.wanted = _GetWantedInstances(lu, self.names)
4803 self.wanted = locking.ALL_SET
4805 self.do_locking = (self.use_locking and
4806 query.IQ_LIVE in self.requested_data)
4808 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4809 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4810 lu.needed_locks[locking.LEVEL_NODE] = []
4811 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4813 self.do_grouplocks = (self.do_locking and
4814 query.IQ_NODES in self.requested_data)
4816 def DeclareLocks(self, lu, level):
4818 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4819 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4821 # Lock all groups used by instances optimistically; this requires going
4822 # via the node before it's locked, requiring verification later on
4823 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4825 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4826 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4827 elif level == locking.LEVEL_NODE:
4828 lu._LockInstancesNodes() # pylint: disable=W0212
4831 def _CheckGroupLocks(lu):
4832 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4833 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4835 # Check if node groups for locked instances are still correct
4836 for instance_name in owned_instances:
4837 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4839 def _GetQueryData(self, lu):
4840 """Computes the list of instances and their attributes.
4843 if self.do_grouplocks:
4844 self._CheckGroupLocks(lu)
4846 cluster = lu.cfg.GetClusterInfo()
4847 all_info = lu.cfg.GetAllInstancesInfo()
4849 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4851 instance_list = [all_info[name] for name in instance_names]
4852 nodes = frozenset(itertools.chain(*(inst.all_nodes
4853 for inst in instance_list)))
4854 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4857 wrongnode_inst = set()
4859 # Gather data as requested
4860 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4862 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4864 result = node_data[name]
4866 # offline nodes will be in both lists
4867 assert result.fail_msg
4868 offline_nodes.append(name)
4870 bad_nodes.append(name)
4871 elif result.payload:
4872 for inst in result.payload:
4873 if inst in all_info:
4874 if all_info[inst].primary_node == name:
4875 live_data.update(result.payload)
4877 wrongnode_inst.add(inst)
4879 # orphan instance; we don't list it here as we don't
4880 # handle this case yet in the output of instance listing
4881 logging.warning("Orphan instance '%s' found on node %s",
4883 # else no instance is alive
4887 if query.IQ_DISKUSAGE in self.requested_data:
4888 disk_usage = dict((inst.name,
4889 _ComputeDiskSize(inst.disk_template,
4890 [{constants.IDISK_SIZE: disk.size}
4891 for disk in inst.disks]))
4892 for inst in instance_list)
4896 if query.IQ_CONSOLE in self.requested_data:
4898 for inst in instance_list:
4899 if inst.name in live_data:
4900 # Instance is running
4901 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4903 consinfo[inst.name] = None
4904 assert set(consinfo.keys()) == set(instance_names)
4908 if query.IQ_NODES in self.requested_data:
4909 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4911 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4912 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4913 for uuid in set(map(operator.attrgetter("group"),
4919 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4920 disk_usage, offline_nodes, bad_nodes,
4921 live_data, wrongnode_inst, consinfo,
4925 class LUQuery(NoHooksLU):
4926 """Query for resources/items of a certain kind.
4929 # pylint: disable=W0142
4932 def CheckArguments(self):
4933 qcls = _GetQueryImplementation(self.op.what)
4935 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4937 def ExpandNames(self):
4938 self.impl.ExpandNames(self)
4940 def DeclareLocks(self, level):
4941 self.impl.DeclareLocks(self, level)
4943 def Exec(self, feedback_fn):
4944 return self.impl.NewStyleQuery(self)
4947 class LUQueryFields(NoHooksLU):
4948 """Query for resources/items of a certain kind.
4951 # pylint: disable=W0142
4954 def CheckArguments(self):
4955 self.qcls = _GetQueryImplementation(self.op.what)
4957 def ExpandNames(self):
4958 self.needed_locks = {}
4960 def Exec(self, feedback_fn):
4961 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4964 class LUNodeModifyStorage(NoHooksLU):
4965 """Logical unit for modifying a storage volume on a node.
4970 def CheckArguments(self):
4971 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4973 storage_type = self.op.storage_type
4976 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4978 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4979 " modified" % storage_type,
4982 diff = set(self.op.changes.keys()) - modifiable
4984 raise errors.OpPrereqError("The following fields can not be modified for"
4985 " storage units of type '%s': %r" %
4986 (storage_type, list(diff)),
4989 def ExpandNames(self):
4990 self.needed_locks = {
4991 locking.LEVEL_NODE: self.op.node_name,
4994 def Exec(self, feedback_fn):
4995 """Computes the list of nodes and their attributes.
4998 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4999 result = self.rpc.call_storage_modify(self.op.node_name,
5000 self.op.storage_type, st_args,
5001 self.op.name, self.op.changes)
5002 result.Raise("Failed to modify storage unit '%s' on %s" %
5003 (self.op.name, self.op.node_name))
5006 class LUNodeAdd(LogicalUnit):
5007 """Logical unit for adding node to the cluster.
5011 HTYPE = constants.HTYPE_NODE
5012 _NFLAGS = ["master_capable", "vm_capable"]
5014 def CheckArguments(self):
5015 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5016 # validate/normalize the node name
5017 self.hostname = netutils.GetHostname(name=self.op.node_name,
5018 family=self.primary_ip_family)
5019 self.op.node_name = self.hostname.name
5021 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5022 raise errors.OpPrereqError("Cannot readd the master node",
5025 if self.op.readd and self.op.group:
5026 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5027 " being readded", errors.ECODE_INVAL)
5029 def BuildHooksEnv(self):
5032 This will run on all nodes before, and on all nodes + the new node after.
5036 "OP_TARGET": self.op.node_name,
5037 "NODE_NAME": self.op.node_name,
5038 "NODE_PIP": self.op.primary_ip,
5039 "NODE_SIP": self.op.secondary_ip,
5040 "MASTER_CAPABLE": str(self.op.master_capable),
5041 "VM_CAPABLE": str(self.op.vm_capable),
5044 def BuildHooksNodes(self):
5045 """Build hooks nodes.
5048 # Exclude added node
5049 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5050 post_nodes = pre_nodes + [self.op.node_name, ]
5052 return (pre_nodes, post_nodes)
5054 def CheckPrereq(self):
5055 """Check prerequisites.
5058 - the new node is not already in the config
5060 - its parameters (single/dual homed) matches the cluster
5062 Any errors are signaled by raising errors.OpPrereqError.
5066 hostname = self.hostname
5067 node = hostname.name
5068 primary_ip = self.op.primary_ip = hostname.ip
5069 if self.op.secondary_ip is None:
5070 if self.primary_ip_family == netutils.IP6Address.family:
5071 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5072 " IPv4 address must be given as secondary",
5074 self.op.secondary_ip = primary_ip
5076 secondary_ip = self.op.secondary_ip
5077 if not netutils.IP4Address.IsValid(secondary_ip):
5078 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5079 " address" % secondary_ip, errors.ECODE_INVAL)
5081 node_list = cfg.GetNodeList()
5082 if not self.op.readd and node in node_list:
5083 raise errors.OpPrereqError("Node %s is already in the configuration" %
5084 node, errors.ECODE_EXISTS)
5085 elif self.op.readd and node not in node_list:
5086 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5089 self.changed_primary_ip = False
5091 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5092 if self.op.readd and node == existing_node_name:
5093 if existing_node.secondary_ip != secondary_ip:
5094 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5095 " address configuration as before",
5097 if existing_node.primary_ip != primary_ip:
5098 self.changed_primary_ip = True
5102 if (existing_node.primary_ip == primary_ip or
5103 existing_node.secondary_ip == primary_ip or
5104 existing_node.primary_ip == secondary_ip or
5105 existing_node.secondary_ip == secondary_ip):
5106 raise errors.OpPrereqError("New node ip address(es) conflict with"
5107 " existing node %s" % existing_node.name,
5108 errors.ECODE_NOTUNIQUE)
5110 # After this 'if' block, None is no longer a valid value for the
5111 # _capable op attributes
5113 old_node = self.cfg.GetNodeInfo(node)
5114 assert old_node is not None, "Can't retrieve locked node %s" % node
5115 for attr in self._NFLAGS:
5116 if getattr(self.op, attr) is None:
5117 setattr(self.op, attr, getattr(old_node, attr))
5119 for attr in self._NFLAGS:
5120 if getattr(self.op, attr) is None:
5121 setattr(self.op, attr, True)
5123 if self.op.readd and not self.op.vm_capable:
5124 pri, sec = cfg.GetNodeInstances(node)
5126 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5127 " flag set to false, but it already holds"
5128 " instances" % node,
5131 # check that the type of the node (single versus dual homed) is the
5132 # same as for the master
5133 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5134 master_singlehomed = myself.secondary_ip == myself.primary_ip
5135 newbie_singlehomed = secondary_ip == primary_ip
5136 if master_singlehomed != newbie_singlehomed:
5137 if master_singlehomed:
5138 raise errors.OpPrereqError("The master has no secondary ip but the"
5139 " new node has one",
5142 raise errors.OpPrereqError("The master has a secondary ip but the"
5143 " new node doesn't have one",
5146 # checks reachability
5147 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5148 raise errors.OpPrereqError("Node not reachable by ping",
5149 errors.ECODE_ENVIRON)
5151 if not newbie_singlehomed:
5152 # check reachability from my secondary ip to newbie's secondary ip
5153 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5154 source=myself.secondary_ip):
5155 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5156 " based ping to node daemon port",
5157 errors.ECODE_ENVIRON)
5164 if self.op.master_capable:
5165 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5167 self.master_candidate = False
5170 self.new_node = old_node
5172 node_group = cfg.LookupNodeGroup(self.op.group)
5173 self.new_node = objects.Node(name=node,
5174 primary_ip=primary_ip,
5175 secondary_ip=secondary_ip,
5176 master_candidate=self.master_candidate,
5177 offline=False, drained=False,
5180 if self.op.ndparams:
5181 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5183 def Exec(self, feedback_fn):
5184 """Adds the new node to the cluster.
5187 new_node = self.new_node
5188 node = new_node.name
5190 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5193 # We adding a new node so we assume it's powered
5194 new_node.powered = True
5196 # for re-adds, reset the offline/drained/master-candidate flags;
5197 # we need to reset here, otherwise offline would prevent RPC calls
5198 # later in the procedure; this also means that if the re-add
5199 # fails, we are left with a non-offlined, broken node
5201 new_node.drained = new_node.offline = False # pylint: disable=W0201
5202 self.LogInfo("Readding a node, the offline/drained flags were reset")
5203 # if we demote the node, we do cleanup later in the procedure
5204 new_node.master_candidate = self.master_candidate
5205 if self.changed_primary_ip:
5206 new_node.primary_ip = self.op.primary_ip
5208 # copy the master/vm_capable flags
5209 for attr in self._NFLAGS:
5210 setattr(new_node, attr, getattr(self.op, attr))
5212 # notify the user about any possible mc promotion
5213 if new_node.master_candidate:
5214 self.LogInfo("Node will be a master candidate")
5216 if self.op.ndparams:
5217 new_node.ndparams = self.op.ndparams
5219 new_node.ndparams = {}
5221 # check connectivity
5222 result = self.rpc.call_version([node])[node]
5223 result.Raise("Can't get version information from node %s" % node)
5224 if constants.PROTOCOL_VERSION == result.payload:
5225 logging.info("Communication to node %s fine, sw version %s match",
5226 node, result.payload)
5228 raise errors.OpExecError("Version mismatch master version %s,"
5229 " node version %s" %
5230 (constants.PROTOCOL_VERSION, result.payload))
5232 # Add node to our /etc/hosts, and add key to known_hosts
5233 if self.cfg.GetClusterInfo().modify_etc_hosts:
5234 master_node = self.cfg.GetMasterNode()
5235 result = self.rpc.call_etc_hosts_modify(master_node,
5236 constants.ETC_HOSTS_ADD,
5239 result.Raise("Can't update hosts file with new host data")
5241 if new_node.secondary_ip != new_node.primary_ip:
5242 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5245 node_verify_list = [self.cfg.GetMasterNode()]
5246 node_verify_param = {
5247 constants.NV_NODELIST: ([node], {}),
5248 # TODO: do a node-net-test as well?
5251 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5252 self.cfg.GetClusterName())
5253 for verifier in node_verify_list:
5254 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5255 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5257 for failed in nl_payload:
5258 feedback_fn("ssh/hostname verification failed"
5259 " (checking from %s): %s" %
5260 (verifier, nl_payload[failed]))
5261 raise errors.OpExecError("ssh/hostname verification failed")
5264 _RedistributeAncillaryFiles(self)
5265 self.context.ReaddNode(new_node)
5266 # make sure we redistribute the config
5267 self.cfg.Update(new_node, feedback_fn)
5268 # and make sure the new node will not have old files around
5269 if not new_node.master_candidate:
5270 result = self.rpc.call_node_demote_from_mc(new_node.name)
5271 msg = result.fail_msg
5273 self.LogWarning("Node failed to demote itself from master"
5274 " candidate status: %s" % msg)
5276 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5277 additional_vm=self.op.vm_capable)
5278 self.context.AddNode(new_node, self.proc.GetECId())
5281 class LUNodeSetParams(LogicalUnit):
5282 """Modifies the parameters of a node.
5284 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5285 to the node role (as _ROLE_*)
5286 @cvar _R2F: a dictionary from node role to tuples of flags
5287 @cvar _FLAGS: a list of attribute names corresponding to the flags
5290 HPATH = "node-modify"
5291 HTYPE = constants.HTYPE_NODE
5293 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5295 (True, False, False): _ROLE_CANDIDATE,
5296 (False, True, False): _ROLE_DRAINED,
5297 (False, False, True): _ROLE_OFFLINE,
5298 (False, False, False): _ROLE_REGULAR,
5300 _R2F = dict((v, k) for k, v in _F2R.items())
5301 _FLAGS = ["master_candidate", "drained", "offline"]
5303 def CheckArguments(self):
5304 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5305 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5306 self.op.master_capable, self.op.vm_capable,
5307 self.op.secondary_ip, self.op.ndparams]
5308 if all_mods.count(None) == len(all_mods):
5309 raise errors.OpPrereqError("Please pass at least one modification",
5311 if all_mods.count(True) > 1:
5312 raise errors.OpPrereqError("Can't set the node into more than one"
5313 " state at the same time",
5316 # Boolean value that tells us whether we might be demoting from MC
5317 self.might_demote = (self.op.master_candidate == False or
5318 self.op.offline == True or
5319 self.op.drained == True or
5320 self.op.master_capable == False)
5322 if self.op.secondary_ip:
5323 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5324 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5325 " address" % self.op.secondary_ip,
5328 self.lock_all = self.op.auto_promote and self.might_demote
5329 self.lock_instances = self.op.secondary_ip is not None
5331 def _InstanceFilter(self, instance):
5332 """Filter for getting affected instances.
5335 return (instance.disk_template in constants.DTS_INT_MIRROR and
5336 self.op.node_name in instance.all_nodes)
5338 def ExpandNames(self):
5340 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5342 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5344 # Since modifying a node can have severe effects on currently running
5345 # operations the resource lock is at least acquired in shared mode
5346 self.needed_locks[locking.LEVEL_NODE_RES] = \
5347 self.needed_locks[locking.LEVEL_NODE]
5349 # Get node resource and instance locks in shared mode; they are not used
5350 # for anything but read-only access
5351 self.share_locks[locking.LEVEL_NODE_RES] = 1
5352 self.share_locks[locking.LEVEL_INSTANCE] = 1
5354 if self.lock_instances:
5355 self.needed_locks[locking.LEVEL_INSTANCE] = \
5356 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5358 def BuildHooksEnv(self):
5361 This runs on the master node.
5365 "OP_TARGET": self.op.node_name,
5366 "MASTER_CANDIDATE": str(self.op.master_candidate),
5367 "OFFLINE": str(self.op.offline),
5368 "DRAINED": str(self.op.drained),
5369 "MASTER_CAPABLE": str(self.op.master_capable),
5370 "VM_CAPABLE": str(self.op.vm_capable),
5373 def BuildHooksNodes(self):
5374 """Build hooks nodes.
5377 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5380 def CheckPrereq(self):
5381 """Check prerequisites.
5383 This only checks the instance list against the existing names.
5386 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5388 if self.lock_instances:
5389 affected_instances = \
5390 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5392 # Verify instance locks
5393 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5394 wanted_instances = frozenset(affected_instances.keys())
5395 if wanted_instances - owned_instances:
5396 raise errors.OpPrereqError("Instances affected by changing node %s's"
5397 " secondary IP address have changed since"
5398 " locks were acquired, wanted '%s', have"
5399 " '%s'; retry the operation" %
5401 utils.CommaJoin(wanted_instances),
5402 utils.CommaJoin(owned_instances)),
5405 affected_instances = None
5407 if (self.op.master_candidate is not None or
5408 self.op.drained is not None or
5409 self.op.offline is not None):
5410 # we can't change the master's node flags
5411 if self.op.node_name == self.cfg.GetMasterNode():
5412 raise errors.OpPrereqError("The master role can be changed"
5413 " only via master-failover",
5416 if self.op.master_candidate and not node.master_capable:
5417 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5418 " it a master candidate" % node.name,
5421 if self.op.vm_capable == False:
5422 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5424 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5425 " the vm_capable flag" % node.name,
5428 if node.master_candidate and self.might_demote and not self.lock_all:
5429 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5430 # check if after removing the current node, we're missing master
5432 (mc_remaining, mc_should, _) = \
5433 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5434 if mc_remaining < mc_should:
5435 raise errors.OpPrereqError("Not enough master candidates, please"
5436 " pass auto promote option to allow"
5437 " promotion", errors.ECODE_STATE)
5439 self.old_flags = old_flags = (node.master_candidate,
5440 node.drained, node.offline)
5441 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5442 self.old_role = old_role = self._F2R[old_flags]
5444 # Check for ineffective changes
5445 for attr in self._FLAGS:
5446 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5447 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5448 setattr(self.op, attr, None)
5450 # Past this point, any flag change to False means a transition
5451 # away from the respective state, as only real changes are kept
5453 # TODO: We might query the real power state if it supports OOB
5454 if _SupportsOob(self.cfg, node):
5455 if self.op.offline is False and not (node.powered or
5456 self.op.powered == True):
5457 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5458 " offline status can be reset") %
5460 elif self.op.powered is not None:
5461 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5462 " as it does not support out-of-band"
5463 " handling") % self.op.node_name)
5465 # If we're being deofflined/drained, we'll MC ourself if needed
5466 if (self.op.drained == False or self.op.offline == False or
5467 (self.op.master_capable and not node.master_capable)):
5468 if _DecideSelfPromotion(self):
5469 self.op.master_candidate = True
5470 self.LogInfo("Auto-promoting node to master candidate")
5472 # If we're no longer master capable, we'll demote ourselves from MC
5473 if self.op.master_capable == False and node.master_candidate:
5474 self.LogInfo("Demoting from master candidate")
5475 self.op.master_candidate = False
5478 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5479 if self.op.master_candidate:
5480 new_role = self._ROLE_CANDIDATE
5481 elif self.op.drained:
5482 new_role = self._ROLE_DRAINED
5483 elif self.op.offline:
5484 new_role = self._ROLE_OFFLINE
5485 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5486 # False is still in new flags, which means we're un-setting (the
5488 new_role = self._ROLE_REGULAR
5489 else: # no new flags, nothing, keep old role
5492 self.new_role = new_role
5494 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5495 # Trying to transition out of offline status
5496 # TODO: Use standard RPC runner, but make sure it works when the node is
5497 # still marked offline
5498 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5500 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5501 " to report its version: %s" %
5502 (node.name, result.fail_msg),
5505 self.LogWarning("Transitioning node from offline to online state"
5506 " without using re-add. Please make sure the node"
5509 if self.op.secondary_ip:
5510 # Ok even without locking, because this can't be changed by any LU
5511 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5512 master_singlehomed = master.secondary_ip == master.primary_ip
5513 if master_singlehomed and self.op.secondary_ip:
5514 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5515 " homed cluster", errors.ECODE_INVAL)
5517 assert not (frozenset(affected_instances) -
5518 self.owned_locks(locking.LEVEL_INSTANCE))
5521 if affected_instances:
5522 raise errors.OpPrereqError("Cannot change secondary IP address:"
5523 " offline node has instances (%s)"
5524 " configured to use it" %
5525 utils.CommaJoin(affected_instances.keys()))
5527 # On online nodes, check that no instances are running, and that
5528 # the node has the new ip and we can reach it.
5529 for instance in affected_instances.values():
5530 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5531 msg="cannot change secondary ip")
5533 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5534 if master.name != node.name:
5535 # check reachability from master secondary ip to new secondary ip
5536 if not netutils.TcpPing(self.op.secondary_ip,
5537 constants.DEFAULT_NODED_PORT,
5538 source=master.secondary_ip):
5539 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5540 " based ping to node daemon port",
5541 errors.ECODE_ENVIRON)
5543 if self.op.ndparams:
5544 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5545 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5546 self.new_ndparams = new_ndparams
5548 def Exec(self, feedback_fn):
5553 old_role = self.old_role
5554 new_role = self.new_role
5558 if self.op.ndparams:
5559 node.ndparams = self.new_ndparams
5561 if self.op.powered is not None:
5562 node.powered = self.op.powered
5564 for attr in ["master_capable", "vm_capable"]:
5565 val = getattr(self.op, attr)
5567 setattr(node, attr, val)
5568 result.append((attr, str(val)))
5570 if new_role != old_role:
5571 # Tell the node to demote itself, if no longer MC and not offline
5572 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5573 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5575 self.LogWarning("Node failed to demote itself: %s", msg)
5577 new_flags = self._R2F[new_role]
5578 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5580 result.append((desc, str(nf)))
5581 (node.master_candidate, node.drained, node.offline) = new_flags
5583 # we locked all nodes, we adjust the CP before updating this node
5585 _AdjustCandidatePool(self, [node.name])
5587 if self.op.secondary_ip:
5588 node.secondary_ip = self.op.secondary_ip
5589 result.append(("secondary_ip", self.op.secondary_ip))
5591 # this will trigger configuration file update, if needed
5592 self.cfg.Update(node, feedback_fn)
5594 # this will trigger job queue propagation or cleanup if the mc
5596 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5597 self.context.ReaddNode(node)
5602 class LUNodePowercycle(NoHooksLU):
5603 """Powercycles a node.
5608 def CheckArguments(self):
5609 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5610 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5611 raise errors.OpPrereqError("The node is the master and the force"
5612 " parameter was not set",
5615 def ExpandNames(self):
5616 """Locking for PowercycleNode.
5618 This is a last-resort option and shouldn't block on other
5619 jobs. Therefore, we grab no locks.
5622 self.needed_locks = {}
5624 def Exec(self, feedback_fn):
5628 result = self.rpc.call_node_powercycle(self.op.node_name,
5629 self.cfg.GetHypervisorType())
5630 result.Raise("Failed to schedule the reboot")
5631 return result.payload
5634 class LUClusterQuery(NoHooksLU):
5635 """Query cluster configuration.
5640 def ExpandNames(self):
5641 self.needed_locks = {}
5643 def Exec(self, feedback_fn):
5644 """Return cluster config.
5647 cluster = self.cfg.GetClusterInfo()
5650 # Filter just for enabled hypervisors
5651 for os_name, hv_dict in cluster.os_hvp.items():
5652 os_hvp[os_name] = {}
5653 for hv_name, hv_params in hv_dict.items():
5654 if hv_name in cluster.enabled_hypervisors:
5655 os_hvp[os_name][hv_name] = hv_params
5657 # Convert ip_family to ip_version
5658 primary_ip_version = constants.IP4_VERSION
5659 if cluster.primary_ip_family == netutils.IP6Address.family:
5660 primary_ip_version = constants.IP6_VERSION
5663 "software_version": constants.RELEASE_VERSION,
5664 "protocol_version": constants.PROTOCOL_VERSION,
5665 "config_version": constants.CONFIG_VERSION,
5666 "os_api_version": max(constants.OS_API_VERSIONS),
5667 "export_version": constants.EXPORT_VERSION,
5668 "architecture": (platform.architecture()[0], platform.machine()),
5669 "name": cluster.cluster_name,
5670 "master": cluster.master_node,
5671 "default_hypervisor": cluster.enabled_hypervisors[0],
5672 "enabled_hypervisors": cluster.enabled_hypervisors,
5673 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5674 for hypervisor_name in cluster.enabled_hypervisors]),
5676 "beparams": cluster.beparams,
5677 "osparams": cluster.osparams,
5678 "nicparams": cluster.nicparams,
5679 "ndparams": cluster.ndparams,
5680 "candidate_pool_size": cluster.candidate_pool_size,
5681 "master_netdev": cluster.master_netdev,
5682 "master_netmask": cluster.master_netmask,
5683 "use_external_mip_script": cluster.use_external_mip_script,
5684 "volume_group_name": cluster.volume_group_name,
5685 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5686 "file_storage_dir": cluster.file_storage_dir,
5687 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5688 "maintain_node_health": cluster.maintain_node_health,
5689 "ctime": cluster.ctime,
5690 "mtime": cluster.mtime,
5691 "uuid": cluster.uuid,
5692 "tags": list(cluster.GetTags()),
5693 "uid_pool": cluster.uid_pool,
5694 "default_iallocator": cluster.default_iallocator,
5695 "reserved_lvs": cluster.reserved_lvs,
5696 "primary_ip_version": primary_ip_version,
5697 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5698 "hidden_os": cluster.hidden_os,
5699 "blacklisted_os": cluster.blacklisted_os,
5705 class LUClusterConfigQuery(NoHooksLU):
5706 """Return configuration values.
5710 _FIELDS_DYNAMIC = utils.FieldSet()
5711 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5712 "watcher_pause", "volume_group_name")
5714 def CheckArguments(self):
5715 _CheckOutputFields(static=self._FIELDS_STATIC,
5716 dynamic=self._FIELDS_DYNAMIC,
5717 selected=self.op.output_fields)
5719 def ExpandNames(self):
5720 self.needed_locks = {}
5722 def Exec(self, feedback_fn):
5723 """Dump a representation of the cluster config to the standard output.
5727 for field in self.op.output_fields:
5728 if field == "cluster_name":
5729 entry = self.cfg.GetClusterName()
5730 elif field == "master_node":
5731 entry = self.cfg.GetMasterNode()
5732 elif field == "drain_flag":
5733 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5734 elif field == "watcher_pause":
5735 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5736 elif field == "volume_group_name":
5737 entry = self.cfg.GetVGName()
5739 raise errors.ParameterError(field)
5740 values.append(entry)
5744 class LUInstanceActivateDisks(NoHooksLU):
5745 """Bring up an instance's disks.
5750 def ExpandNames(self):
5751 self._ExpandAndLockInstance()
5752 self.needed_locks[locking.LEVEL_NODE] = []
5753 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5755 def DeclareLocks(self, level):
5756 if level == locking.LEVEL_NODE:
5757 self._LockInstancesNodes()
5759 def CheckPrereq(self):
5760 """Check prerequisites.
5762 This checks that the instance is in the cluster.
5765 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5766 assert self.instance is not None, \
5767 "Cannot retrieve locked instance %s" % self.op.instance_name
5768 _CheckNodeOnline(self, self.instance.primary_node)
5770 def Exec(self, feedback_fn):
5771 """Activate the disks.
5774 disks_ok, disks_info = \
5775 _AssembleInstanceDisks(self, self.instance,
5776 ignore_size=self.op.ignore_size)
5778 raise errors.OpExecError("Cannot activate block devices")
5783 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5785 """Prepare the block devices for an instance.
5787 This sets up the block devices on all nodes.
5789 @type lu: L{LogicalUnit}
5790 @param lu: the logical unit on whose behalf we execute
5791 @type instance: L{objects.Instance}
5792 @param instance: the instance for whose disks we assemble
5793 @type disks: list of L{objects.Disk} or None
5794 @param disks: which disks to assemble (or all, if None)
5795 @type ignore_secondaries: boolean
5796 @param ignore_secondaries: if true, errors on secondary nodes
5797 won't result in an error return from the function
5798 @type ignore_size: boolean
5799 @param ignore_size: if true, the current known size of the disk
5800 will not be used during the disk activation, useful for cases
5801 when the size is wrong
5802 @return: False if the operation failed, otherwise a list of
5803 (host, instance_visible_name, node_visible_name)
5804 with the mapping from node devices to instance devices
5809 iname = instance.name
5810 disks = _ExpandCheckDisks(instance, disks)
5812 # With the two passes mechanism we try to reduce the window of
5813 # opportunity for the race condition of switching DRBD to primary
5814 # before handshaking occured, but we do not eliminate it
5816 # The proper fix would be to wait (with some limits) until the
5817 # connection has been made and drbd transitions from WFConnection
5818 # into any other network-connected state (Connected, SyncTarget,
5821 # 1st pass, assemble on all nodes in secondary mode
5822 for idx, inst_disk in enumerate(disks):
5823 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5825 node_disk = node_disk.Copy()
5826 node_disk.UnsetSize()
5827 lu.cfg.SetDiskID(node_disk, node)
5828 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5829 msg = result.fail_msg
5831 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5832 " (is_primary=False, pass=1): %s",
5833 inst_disk.iv_name, node, msg)
5834 if not ignore_secondaries:
5837 # FIXME: race condition on drbd migration to primary
5839 # 2nd pass, do only the primary node
5840 for idx, inst_disk in enumerate(disks):
5843 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5844 if node != instance.primary_node:
5847 node_disk = node_disk.Copy()
5848 node_disk.UnsetSize()
5849 lu.cfg.SetDiskID(node_disk, node)
5850 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5851 msg = result.fail_msg
5853 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5854 " (is_primary=True, pass=2): %s",
5855 inst_disk.iv_name, node, msg)
5858 dev_path = result.payload
5860 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5862 # leave the disks configured for the primary node
5863 # this is a workaround that would be fixed better by
5864 # improving the logical/physical id handling
5866 lu.cfg.SetDiskID(disk, instance.primary_node)
5868 return disks_ok, device_info
5871 def _StartInstanceDisks(lu, instance, force):
5872 """Start the disks of an instance.
5875 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5876 ignore_secondaries=force)
5878 _ShutdownInstanceDisks(lu, instance)
5879 if force is not None and not force:
5880 lu.proc.LogWarning("", hint="If the message above refers to a"
5882 " you can retry the operation using '--force'.")
5883 raise errors.OpExecError("Disk consistency error")
5886 class LUInstanceDeactivateDisks(NoHooksLU):
5887 """Shutdown an instance's disks.
5892 def ExpandNames(self):
5893 self._ExpandAndLockInstance()
5894 self.needed_locks[locking.LEVEL_NODE] = []
5895 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5897 def DeclareLocks(self, level):
5898 if level == locking.LEVEL_NODE:
5899 self._LockInstancesNodes()
5901 def CheckPrereq(self):
5902 """Check prerequisites.
5904 This checks that the instance is in the cluster.
5907 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5908 assert self.instance is not None, \
5909 "Cannot retrieve locked instance %s" % self.op.instance_name
5911 def Exec(self, feedback_fn):
5912 """Deactivate the disks
5915 instance = self.instance
5917 _ShutdownInstanceDisks(self, instance)
5919 _SafeShutdownInstanceDisks(self, instance)
5922 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5923 """Shutdown block devices of an instance.
5925 This function checks if an instance is running, before calling
5926 _ShutdownInstanceDisks.
5929 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5930 _ShutdownInstanceDisks(lu, instance, disks=disks)
5933 def _ExpandCheckDisks(instance, disks):
5934 """Return the instance disks selected by the disks list
5936 @type disks: list of L{objects.Disk} or None
5937 @param disks: selected disks
5938 @rtype: list of L{objects.Disk}
5939 @return: selected instance disks to act on
5943 return instance.disks
5945 if not set(disks).issubset(instance.disks):
5946 raise errors.ProgrammerError("Can only act on disks belonging to the"
5951 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5952 """Shutdown block devices of an instance.
5954 This does the shutdown on all nodes of the instance.
5956 If the ignore_primary is false, errors on the primary node are
5961 disks = _ExpandCheckDisks(instance, disks)
5964 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5965 lu.cfg.SetDiskID(top_disk, node)
5966 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5967 msg = result.fail_msg
5969 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5970 disk.iv_name, node, msg)
5971 if ((node == instance.primary_node and not ignore_primary) or
5972 (node != instance.primary_node and not result.offline)):
5977 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5978 """Checks if a node has enough free memory.
5980 This function check if a given node has the needed amount of free
5981 memory. In case the node has less memory or we cannot get the
5982 information from the node, this function raise an OpPrereqError
5985 @type lu: C{LogicalUnit}
5986 @param lu: a logical unit from which we get configuration data
5988 @param node: the node to check
5989 @type reason: C{str}
5990 @param reason: string to use in the error message
5991 @type requested: C{int}
5992 @param requested: the amount of memory in MiB to check for
5993 @type hypervisor_name: C{str}
5994 @param hypervisor_name: the hypervisor to ask for memory stats
5995 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5996 we cannot check the node
5999 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6000 nodeinfo[node].Raise("Can't get data from node %s" % node,
6001 prereq=True, ecode=errors.ECODE_ENVIRON)
6002 free_mem = nodeinfo[node].payload.get("memory_free", None)
6003 if not isinstance(free_mem, int):
6004 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6005 " was '%s'" % (node, free_mem),
6006 errors.ECODE_ENVIRON)
6007 if requested > free_mem:
6008 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6009 " needed %s MiB, available %s MiB" %
6010 (node, reason, requested, free_mem),
6014 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6015 """Checks if nodes have enough free disk space in the all VGs.
6017 This function check if all given nodes have the needed amount of
6018 free disk. In case any node has less disk or we cannot get the
6019 information from the node, this function raise an OpPrereqError
6022 @type lu: C{LogicalUnit}
6023 @param lu: a logical unit from which we get configuration data
6024 @type nodenames: C{list}
6025 @param nodenames: the list of node names to check
6026 @type req_sizes: C{dict}
6027 @param req_sizes: the hash of vg and corresponding amount of disk in
6029 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6030 or we cannot check the node
6033 for vg, req_size in req_sizes.items():
6034 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6037 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6038 """Checks if nodes have enough free disk space in the specified VG.
6040 This function check if all given nodes have the needed amount of
6041 free disk. In case any node has less disk or we cannot get the
6042 information from the node, this function raise an OpPrereqError
6045 @type lu: C{LogicalUnit}
6046 @param lu: a logical unit from which we get configuration data
6047 @type nodenames: C{list}
6048 @param nodenames: the list of node names to check
6050 @param vg: the volume group to check
6051 @type requested: C{int}
6052 @param requested: the amount of disk in MiB to check for
6053 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6054 or we cannot check the node
6057 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6058 for node in nodenames:
6059 info = nodeinfo[node]
6060 info.Raise("Cannot get current information from node %s" % node,
6061 prereq=True, ecode=errors.ECODE_ENVIRON)
6062 vg_free = info.payload.get("vg_free", None)
6063 if not isinstance(vg_free, int):
6064 raise errors.OpPrereqError("Can't compute free disk space on node"
6065 " %s for vg %s, result was '%s'" %
6066 (node, vg, vg_free), errors.ECODE_ENVIRON)
6067 if requested > vg_free:
6068 raise errors.OpPrereqError("Not enough disk space on target node %s"
6069 " vg %s: required %d MiB, available %d MiB" %
6070 (node, vg, requested, vg_free),
6074 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6075 """Checks if nodes have enough physical CPUs
6077 This function checks if all given nodes have the needed number of
6078 physical CPUs. In case any node has less CPUs or we cannot get the
6079 information from the node, this function raises an OpPrereqError
6082 @type lu: C{LogicalUnit}
6083 @param lu: a logical unit from which we get configuration data
6084 @type nodenames: C{list}
6085 @param nodenames: the list of node names to check
6086 @type requested: C{int}
6087 @param requested: the minimum acceptable number of physical CPUs
6088 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6089 or we cannot check the node
6092 nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6093 for node in nodenames:
6094 info = nodeinfo[node]
6095 info.Raise("Cannot get current information from node %s" % node,
6096 prereq=True, ecode=errors.ECODE_ENVIRON)
6097 num_cpus = info.payload.get("cpu_total", None)
6098 if not isinstance(num_cpus, int):
6099 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6100 " on node %s, result was '%s'" %
6101 (node, num_cpus), errors.ECODE_ENVIRON)
6102 if requested > num_cpus:
6103 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6104 "required" % (node, num_cpus, requested),
6108 class LUInstanceStartup(LogicalUnit):
6109 """Starts an instance.
6112 HPATH = "instance-start"
6113 HTYPE = constants.HTYPE_INSTANCE
6116 def CheckArguments(self):
6118 if self.op.beparams:
6119 # fill the beparams dict
6120 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6122 def ExpandNames(self):
6123 self._ExpandAndLockInstance()
6125 def BuildHooksEnv(self):
6128 This runs on master, primary and secondary nodes of the instance.
6132 "FORCE": self.op.force,
6135 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6139 def BuildHooksNodes(self):
6140 """Build hooks nodes.
6143 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6146 def CheckPrereq(self):
6147 """Check prerequisites.
6149 This checks that the instance is in the cluster.
6152 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6153 assert self.instance is not None, \
6154 "Cannot retrieve locked instance %s" % self.op.instance_name
6157 if self.op.hvparams:
6158 # check hypervisor parameter syntax (locally)
6159 cluster = self.cfg.GetClusterInfo()
6160 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6161 filled_hvp = cluster.FillHV(instance)
6162 filled_hvp.update(self.op.hvparams)
6163 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6164 hv_type.CheckParameterSyntax(filled_hvp)
6165 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6167 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6169 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6171 if self.primary_offline and self.op.ignore_offline_nodes:
6172 self.proc.LogWarning("Ignoring offline primary node")
6174 if self.op.hvparams or self.op.beparams:
6175 self.proc.LogWarning("Overridden parameters are ignored")
6177 _CheckNodeOnline(self, instance.primary_node)
6179 bep = self.cfg.GetClusterInfo().FillBE(instance)
6181 # check bridges existence
6182 _CheckInstanceBridgesExist(self, instance)
6184 remote_info = self.rpc.call_instance_info(instance.primary_node,
6186 instance.hypervisor)
6187 remote_info.Raise("Error checking node %s" % instance.primary_node,
6188 prereq=True, ecode=errors.ECODE_ENVIRON)
6189 if not remote_info.payload: # not running already
6190 _CheckNodeFreeMemory(self, instance.primary_node,
6191 "starting instance %s" % instance.name,
6192 bep[constants.BE_MEMORY], instance.hypervisor)
6194 def Exec(self, feedback_fn):
6195 """Start the instance.
6198 instance = self.instance
6199 force = self.op.force
6201 if not self.op.no_remember:
6202 self.cfg.MarkInstanceUp(instance.name)
6204 if self.primary_offline:
6205 assert self.op.ignore_offline_nodes
6206 self.proc.LogInfo("Primary node offline, marked instance as started")
6208 node_current = instance.primary_node
6210 _StartInstanceDisks(self, instance, force)
6213 self.rpc.call_instance_start(node_current,
6214 (instance, self.op.hvparams,
6216 self.op.startup_paused)
6217 msg = result.fail_msg
6219 _ShutdownInstanceDisks(self, instance)
6220 raise errors.OpExecError("Could not start instance: %s" % msg)
6223 class LUInstanceReboot(LogicalUnit):
6224 """Reboot an instance.
6227 HPATH = "instance-reboot"
6228 HTYPE = constants.HTYPE_INSTANCE
6231 def ExpandNames(self):
6232 self._ExpandAndLockInstance()
6234 def BuildHooksEnv(self):
6237 This runs on master, primary and secondary nodes of the instance.
6241 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6242 "REBOOT_TYPE": self.op.reboot_type,
6243 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6246 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6250 def BuildHooksNodes(self):
6251 """Build hooks nodes.
6254 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6257 def CheckPrereq(self):
6258 """Check prerequisites.
6260 This checks that the instance is in the cluster.
6263 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6264 assert self.instance is not None, \
6265 "Cannot retrieve locked instance %s" % self.op.instance_name
6266 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6267 _CheckNodeOnline(self, instance.primary_node)
6269 # check bridges existence
6270 _CheckInstanceBridgesExist(self, instance)
6272 def Exec(self, feedback_fn):
6273 """Reboot the instance.
6276 instance = self.instance
6277 ignore_secondaries = self.op.ignore_secondaries
6278 reboot_type = self.op.reboot_type
6280 remote_info = self.rpc.call_instance_info(instance.primary_node,
6282 instance.hypervisor)
6283 remote_info.Raise("Error checking node %s" % instance.primary_node)
6284 instance_running = bool(remote_info.payload)
6286 node_current = instance.primary_node
6288 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6289 constants.INSTANCE_REBOOT_HARD]:
6290 for disk in instance.disks:
6291 self.cfg.SetDiskID(disk, node_current)
6292 result = self.rpc.call_instance_reboot(node_current, instance,
6294 self.op.shutdown_timeout)
6295 result.Raise("Could not reboot instance")
6297 if instance_running:
6298 result = self.rpc.call_instance_shutdown(node_current, instance,
6299 self.op.shutdown_timeout)
6300 result.Raise("Could not shutdown instance for full reboot")
6301 _ShutdownInstanceDisks(self, instance)
6303 self.LogInfo("Instance %s was already stopped, starting now",
6305 _StartInstanceDisks(self, instance, ignore_secondaries)
6306 result = self.rpc.call_instance_start(node_current,
6307 (instance, None, None), False)
6308 msg = result.fail_msg
6310 _ShutdownInstanceDisks(self, instance)
6311 raise errors.OpExecError("Could not start instance for"
6312 " full reboot: %s" % msg)
6314 self.cfg.MarkInstanceUp(instance.name)
6317 class LUInstanceShutdown(LogicalUnit):
6318 """Shutdown an instance.
6321 HPATH = "instance-stop"
6322 HTYPE = constants.HTYPE_INSTANCE
6325 def ExpandNames(self):
6326 self._ExpandAndLockInstance()
6328 def BuildHooksEnv(self):
6331 This runs on master, primary and secondary nodes of the instance.
6334 env = _BuildInstanceHookEnvByObject(self, self.instance)
6335 env["TIMEOUT"] = self.op.timeout
6338 def BuildHooksNodes(self):
6339 """Build hooks nodes.
6342 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6345 def CheckPrereq(self):
6346 """Check prerequisites.
6348 This checks that the instance is in the cluster.
6351 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6352 assert self.instance is not None, \
6353 "Cannot retrieve locked instance %s" % self.op.instance_name
6355 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6357 self.primary_offline = \
6358 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6360 if self.primary_offline and self.op.ignore_offline_nodes:
6361 self.proc.LogWarning("Ignoring offline primary node")
6363 _CheckNodeOnline(self, self.instance.primary_node)
6365 def Exec(self, feedback_fn):
6366 """Shutdown the instance.
6369 instance = self.instance
6370 node_current = instance.primary_node
6371 timeout = self.op.timeout
6373 if not self.op.no_remember:
6374 self.cfg.MarkInstanceDown(instance.name)
6376 if self.primary_offline:
6377 assert self.op.ignore_offline_nodes
6378 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6380 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6381 msg = result.fail_msg
6383 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6385 _ShutdownInstanceDisks(self, instance)
6388 class LUInstanceReinstall(LogicalUnit):
6389 """Reinstall an instance.
6392 HPATH = "instance-reinstall"
6393 HTYPE = constants.HTYPE_INSTANCE
6396 def ExpandNames(self):
6397 self._ExpandAndLockInstance()
6399 def BuildHooksEnv(self):
6402 This runs on master, primary and secondary nodes of the instance.
6405 return _BuildInstanceHookEnvByObject(self, self.instance)
6407 def BuildHooksNodes(self):
6408 """Build hooks nodes.
6411 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6414 def CheckPrereq(self):
6415 """Check prerequisites.
6417 This checks that the instance is in the cluster and is not running.
6420 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6421 assert instance is not None, \
6422 "Cannot retrieve locked instance %s" % self.op.instance_name
6423 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6424 " offline, cannot reinstall")
6425 for node in instance.secondary_nodes:
6426 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6427 " cannot reinstall")
6429 if instance.disk_template == constants.DT_DISKLESS:
6430 raise errors.OpPrereqError("Instance '%s' has no disks" %
6431 self.op.instance_name,
6433 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6435 if self.op.os_type is not None:
6437 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6438 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6439 instance_os = self.op.os_type
6441 instance_os = instance.os
6443 nodelist = list(instance.all_nodes)
6445 if self.op.osparams:
6446 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6447 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6448 self.os_inst = i_osdict # the new dict (without defaults)
6452 self.instance = instance
6454 def Exec(self, feedback_fn):
6455 """Reinstall the instance.
6458 inst = self.instance
6460 if self.op.os_type is not None:
6461 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6462 inst.os = self.op.os_type
6463 # Write to configuration
6464 self.cfg.Update(inst, feedback_fn)
6466 _StartInstanceDisks(self, inst, None)
6468 feedback_fn("Running the instance OS create scripts...")
6469 # FIXME: pass debug option from opcode to backend
6470 result = self.rpc.call_instance_os_add(inst.primary_node,
6471 (inst, self.os_inst), True,
6472 self.op.debug_level)
6473 result.Raise("Could not install OS for instance %s on node %s" %
6474 (inst.name, inst.primary_node))
6476 _ShutdownInstanceDisks(self, inst)
6479 class LUInstanceRecreateDisks(LogicalUnit):
6480 """Recreate an instance's missing disks.
6483 HPATH = "instance-recreate-disks"
6484 HTYPE = constants.HTYPE_INSTANCE
6487 def CheckArguments(self):
6488 # normalise the disk list
6489 self.op.disks = sorted(frozenset(self.op.disks))
6491 def ExpandNames(self):
6492 self._ExpandAndLockInstance()
6493 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6495 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6496 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6498 self.needed_locks[locking.LEVEL_NODE] = []
6500 def DeclareLocks(self, level):
6501 if level == locking.LEVEL_NODE:
6502 # if we replace the nodes, we only need to lock the old primary,
6503 # otherwise we need to lock all nodes for disk re-creation
6504 primary_only = bool(self.op.nodes)
6505 self._LockInstancesNodes(primary_only=primary_only)
6506 elif level == locking.LEVEL_NODE_RES:
6508 self.needed_locks[locking.LEVEL_NODE_RES] = \
6509 self.needed_locks[locking.LEVEL_NODE][:]
6511 def BuildHooksEnv(self):
6514 This runs on master, primary and secondary nodes of the instance.
6517 return _BuildInstanceHookEnvByObject(self, self.instance)
6519 def BuildHooksNodes(self):
6520 """Build hooks nodes.
6523 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6526 def CheckPrereq(self):
6527 """Check prerequisites.
6529 This checks that the instance is in the cluster and is not running.
6532 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6533 assert instance is not None, \
6534 "Cannot retrieve locked instance %s" % self.op.instance_name
6536 if len(self.op.nodes) != len(instance.all_nodes):
6537 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6538 " %d replacement nodes were specified" %
6539 (instance.name, len(instance.all_nodes),
6540 len(self.op.nodes)),
6542 assert instance.disk_template != constants.DT_DRBD8 or \
6543 len(self.op.nodes) == 2
6544 assert instance.disk_template != constants.DT_PLAIN or \
6545 len(self.op.nodes) == 1
6546 primary_node = self.op.nodes[0]
6548 primary_node = instance.primary_node
6549 _CheckNodeOnline(self, primary_node)
6551 if instance.disk_template == constants.DT_DISKLESS:
6552 raise errors.OpPrereqError("Instance '%s' has no disks" %
6553 self.op.instance_name, errors.ECODE_INVAL)
6554 # if we replace nodes *and* the old primary is offline, we don't
6556 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6557 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6558 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6559 if not (self.op.nodes and old_pnode.offline):
6560 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6561 msg="cannot recreate disks")
6563 if not self.op.disks:
6564 self.op.disks = range(len(instance.disks))
6566 for idx in self.op.disks:
6567 if idx >= len(instance.disks):
6568 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6570 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6571 raise errors.OpPrereqError("Can't recreate disks partially and"
6572 " change the nodes at the same time",
6574 self.instance = instance
6576 def Exec(self, feedback_fn):
6577 """Recreate the disks.
6580 instance = self.instance
6582 assert (self.owned_locks(locking.LEVEL_NODE) ==
6583 self.owned_locks(locking.LEVEL_NODE_RES))
6586 mods = [] # keeps track of needed logical_id changes
6588 for idx, disk in enumerate(instance.disks):
6589 if idx not in self.op.disks: # disk idx has not been passed in
6592 # update secondaries for disks, if needed
6594 if disk.dev_type == constants.LD_DRBD8:
6595 # need to update the nodes and minors
6596 assert len(self.op.nodes) == 2
6597 assert len(disk.logical_id) == 6 # otherwise disk internals
6599 (_, _, old_port, _, _, old_secret) = disk.logical_id
6600 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6601 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6602 new_minors[0], new_minors[1], old_secret)
6603 assert len(disk.logical_id) == len(new_id)
6604 mods.append((idx, new_id))
6606 # now that we have passed all asserts above, we can apply the mods
6607 # in a single run (to avoid partial changes)
6608 for idx, new_id in mods:
6609 instance.disks[idx].logical_id = new_id
6611 # change primary node, if needed
6613 instance.primary_node = self.op.nodes[0]
6614 self.LogWarning("Changing the instance's nodes, you will have to"
6615 " remove any disks left on the older nodes manually")
6618 self.cfg.Update(instance, feedback_fn)
6620 _CreateDisks(self, instance, to_skip=to_skip)
6623 class LUInstanceRename(LogicalUnit):
6624 """Rename an instance.
6627 HPATH = "instance-rename"
6628 HTYPE = constants.HTYPE_INSTANCE
6630 def CheckArguments(self):
6634 if self.op.ip_check and not self.op.name_check:
6635 # TODO: make the ip check more flexible and not depend on the name check
6636 raise errors.OpPrereqError("IP address check requires a name check",
6639 def BuildHooksEnv(self):
6642 This runs on master, primary and secondary nodes of the instance.
6645 env = _BuildInstanceHookEnvByObject(self, self.instance)
6646 env["INSTANCE_NEW_NAME"] = self.op.new_name
6649 def BuildHooksNodes(self):
6650 """Build hooks nodes.
6653 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6656 def CheckPrereq(self):
6657 """Check prerequisites.
6659 This checks that the instance is in the cluster and is not running.
6662 self.op.instance_name = _ExpandInstanceName(self.cfg,
6663 self.op.instance_name)
6664 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6665 assert instance is not None
6666 _CheckNodeOnline(self, instance.primary_node)
6667 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6668 msg="cannot rename")
6669 self.instance = instance
6671 new_name = self.op.new_name
6672 if self.op.name_check:
6673 hostname = netutils.GetHostname(name=new_name)
6674 if hostname != new_name:
6675 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6677 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6678 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6679 " same as given hostname '%s'") %
6680 (hostname.name, self.op.new_name),
6682 new_name = self.op.new_name = hostname.name
6683 if (self.op.ip_check and
6684 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6685 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6686 (hostname.ip, new_name),
6687 errors.ECODE_NOTUNIQUE)
6689 instance_list = self.cfg.GetInstanceList()
6690 if new_name in instance_list and new_name != instance.name:
6691 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6692 new_name, errors.ECODE_EXISTS)
6694 def Exec(self, feedback_fn):
6695 """Rename the instance.
6698 inst = self.instance
6699 old_name = inst.name
6701 rename_file_storage = False
6702 if (inst.disk_template in constants.DTS_FILEBASED and
6703 self.op.new_name != inst.name):
6704 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6705 rename_file_storage = True
6707 self.cfg.RenameInstance(inst.name, self.op.new_name)
6708 # Change the instance lock. This is definitely safe while we hold the BGL.
6709 # Otherwise the new lock would have to be added in acquired mode.
6711 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6712 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6714 # re-read the instance from the configuration after rename
6715 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6717 if rename_file_storage:
6718 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6719 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6720 old_file_storage_dir,
6721 new_file_storage_dir)
6722 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6723 " (but the instance has been renamed in Ganeti)" %
6724 (inst.primary_node, old_file_storage_dir,
6725 new_file_storage_dir))
6727 _StartInstanceDisks(self, inst, None)
6729 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6730 old_name, self.op.debug_level)
6731 msg = result.fail_msg
6733 msg = ("Could not run OS rename script for instance %s on node %s"
6734 " (but the instance has been renamed in Ganeti): %s" %
6735 (inst.name, inst.primary_node, msg))
6736 self.proc.LogWarning(msg)
6738 _ShutdownInstanceDisks(self, inst)
6743 class LUInstanceRemove(LogicalUnit):
6744 """Remove an instance.
6747 HPATH = "instance-remove"
6748 HTYPE = constants.HTYPE_INSTANCE
6751 def ExpandNames(self):
6752 self._ExpandAndLockInstance()
6753 self.needed_locks[locking.LEVEL_NODE] = []
6754 self.needed_locks[locking.LEVEL_NODE_RES] = []
6755 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6757 def DeclareLocks(self, level):
6758 if level == locking.LEVEL_NODE:
6759 self._LockInstancesNodes()
6760 elif level == locking.LEVEL_NODE_RES:
6762 self.needed_locks[locking.LEVEL_NODE_RES] = \
6763 self.needed_locks[locking.LEVEL_NODE][:]
6765 def BuildHooksEnv(self):
6768 This runs on master, primary and secondary nodes of the instance.
6771 env = _BuildInstanceHookEnvByObject(self, self.instance)
6772 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6775 def BuildHooksNodes(self):
6776 """Build hooks nodes.
6779 nl = [self.cfg.GetMasterNode()]
6780 nl_post = list(self.instance.all_nodes) + nl
6781 return (nl, nl_post)
6783 def CheckPrereq(self):
6784 """Check prerequisites.
6786 This checks that the instance is in the cluster.
6789 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6790 assert self.instance is not None, \
6791 "Cannot retrieve locked instance %s" % self.op.instance_name
6793 def Exec(self, feedback_fn):
6794 """Remove the instance.
6797 instance = self.instance
6798 logging.info("Shutting down instance %s on node %s",
6799 instance.name, instance.primary_node)
6801 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6802 self.op.shutdown_timeout)
6803 msg = result.fail_msg
6805 if self.op.ignore_failures:
6806 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6808 raise errors.OpExecError("Could not shutdown instance %s on"
6810 (instance.name, instance.primary_node, msg))
6812 assert (self.owned_locks(locking.LEVEL_NODE) ==
6813 self.owned_locks(locking.LEVEL_NODE_RES))
6814 assert not (set(instance.all_nodes) -
6815 self.owned_locks(locking.LEVEL_NODE)), \
6816 "Not owning correct locks"
6818 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6821 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6822 """Utility function to remove an instance.
6825 logging.info("Removing block devices for instance %s", instance.name)
6827 if not _RemoveDisks(lu, instance):
6828 if not ignore_failures:
6829 raise errors.OpExecError("Can't remove instance's disks")
6830 feedback_fn("Warning: can't remove instance's disks")
6832 logging.info("Removing instance %s out of cluster config", instance.name)
6834 lu.cfg.RemoveInstance(instance.name)
6836 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6837 "Instance lock removal conflict"
6839 # Remove lock for the instance
6840 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6843 class LUInstanceQuery(NoHooksLU):
6844 """Logical unit for querying instances.
6847 # pylint: disable=W0142
6850 def CheckArguments(self):
6851 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6852 self.op.output_fields, self.op.use_locking)
6854 def ExpandNames(self):
6855 self.iq.ExpandNames(self)
6857 def DeclareLocks(self, level):
6858 self.iq.DeclareLocks(self, level)
6860 def Exec(self, feedback_fn):
6861 return self.iq.OldStyleQuery(self)
6864 class LUInstanceFailover(LogicalUnit):
6865 """Failover an instance.
6868 HPATH = "instance-failover"
6869 HTYPE = constants.HTYPE_INSTANCE
6872 def CheckArguments(self):
6873 """Check the arguments.
6876 self.iallocator = getattr(self.op, "iallocator", None)
6877 self.target_node = getattr(self.op, "target_node", None)
6879 def ExpandNames(self):
6880 self._ExpandAndLockInstance()
6882 if self.op.target_node is not None:
6883 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6885 self.needed_locks[locking.LEVEL_NODE] = []
6886 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6888 ignore_consistency = self.op.ignore_consistency
6889 shutdown_timeout = self.op.shutdown_timeout
6890 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6893 ignore_consistency=ignore_consistency,
6894 shutdown_timeout=shutdown_timeout)
6895 self.tasklets = [self._migrater]
6897 def DeclareLocks(self, level):
6898 if level == locking.LEVEL_NODE:
6899 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6900 if instance.disk_template in constants.DTS_EXT_MIRROR:
6901 if self.op.target_node is None:
6902 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6904 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6905 self.op.target_node]
6906 del self.recalculate_locks[locking.LEVEL_NODE]
6908 self._LockInstancesNodes()
6910 def BuildHooksEnv(self):
6913 This runs on master, primary and secondary nodes of the instance.
6916 instance = self._migrater.instance
6917 source_node = instance.primary_node
6918 target_node = self.op.target_node
6920 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6921 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6922 "OLD_PRIMARY": source_node,
6923 "NEW_PRIMARY": target_node,
6926 if instance.disk_template in constants.DTS_INT_MIRROR:
6927 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6928 env["NEW_SECONDARY"] = source_node
6930 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6932 env.update(_BuildInstanceHookEnvByObject(self, instance))
6936 def BuildHooksNodes(self):
6937 """Build hooks nodes.
6940 instance = self._migrater.instance
6941 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6942 return (nl, nl + [instance.primary_node])
6945 class LUInstanceMigrate(LogicalUnit):
6946 """Migrate an instance.
6948 This is migration without shutting down, compared to the failover,
6949 which is done with shutdown.
6952 HPATH = "instance-migrate"
6953 HTYPE = constants.HTYPE_INSTANCE
6956 def ExpandNames(self):
6957 self._ExpandAndLockInstance()
6959 if self.op.target_node is not None:
6960 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6962 self.needed_locks[locking.LEVEL_NODE] = []
6963 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6965 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6966 cleanup=self.op.cleanup,
6968 fallback=self.op.allow_failover)
6969 self.tasklets = [self._migrater]
6971 def DeclareLocks(self, level):
6972 if level == locking.LEVEL_NODE:
6973 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6974 if instance.disk_template in constants.DTS_EXT_MIRROR:
6975 if self.op.target_node is None:
6976 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6978 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6979 self.op.target_node]
6980 del self.recalculate_locks[locking.LEVEL_NODE]
6982 self._LockInstancesNodes()
6984 def BuildHooksEnv(self):
6987 This runs on master, primary and secondary nodes of the instance.
6990 instance = self._migrater.instance
6991 source_node = instance.primary_node
6992 target_node = self.op.target_node
6993 env = _BuildInstanceHookEnvByObject(self, instance)
6995 "MIGRATE_LIVE": self._migrater.live,
6996 "MIGRATE_CLEANUP": self.op.cleanup,
6997 "OLD_PRIMARY": source_node,
6998 "NEW_PRIMARY": target_node,
7001 if instance.disk_template in constants.DTS_INT_MIRROR:
7002 env["OLD_SECONDARY"] = target_node
7003 env["NEW_SECONDARY"] = source_node
7005 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7009 def BuildHooksNodes(self):
7010 """Build hooks nodes.
7013 instance = self._migrater.instance
7014 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7015 return (nl, nl + [instance.primary_node])
7018 class LUInstanceMove(LogicalUnit):
7019 """Move an instance by data-copying.
7022 HPATH = "instance-move"
7023 HTYPE = constants.HTYPE_INSTANCE
7026 def ExpandNames(self):
7027 self._ExpandAndLockInstance()
7028 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7029 self.op.target_node = target_node
7030 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7031 self.needed_locks[locking.LEVEL_NODE_RES] = []
7032 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7034 def DeclareLocks(self, level):
7035 if level == locking.LEVEL_NODE:
7036 self._LockInstancesNodes(primary_only=True)
7037 elif level == locking.LEVEL_NODE_RES:
7039 self.needed_locks[locking.LEVEL_NODE_RES] = \
7040 self.needed_locks[locking.LEVEL_NODE][:]
7042 def BuildHooksEnv(self):
7045 This runs on master, primary and secondary nodes of the instance.
7049 "TARGET_NODE": self.op.target_node,
7050 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7052 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7055 def BuildHooksNodes(self):
7056 """Build hooks nodes.
7060 self.cfg.GetMasterNode(),
7061 self.instance.primary_node,
7062 self.op.target_node,
7066 def CheckPrereq(self):
7067 """Check prerequisites.
7069 This checks that the instance is in the cluster.
7072 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7073 assert self.instance is not None, \
7074 "Cannot retrieve locked instance %s" % self.op.instance_name
7076 node = self.cfg.GetNodeInfo(self.op.target_node)
7077 assert node is not None, \
7078 "Cannot retrieve locked node %s" % self.op.target_node
7080 self.target_node = target_node = node.name
7082 if target_node == instance.primary_node:
7083 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7084 (instance.name, target_node),
7087 bep = self.cfg.GetClusterInfo().FillBE(instance)
7089 for idx, dsk in enumerate(instance.disks):
7090 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7091 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7092 " cannot copy" % idx, errors.ECODE_STATE)
7094 _CheckNodeOnline(self, target_node)
7095 _CheckNodeNotDrained(self, target_node)
7096 _CheckNodeVmCapable(self, target_node)
7098 if instance.admin_state == constants.ADMINST_UP:
7099 # check memory requirements on the secondary node
7100 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7101 instance.name, bep[constants.BE_MEMORY],
7102 instance.hypervisor)
7104 self.LogInfo("Not checking memory on the secondary node as"
7105 " instance will not be started")
7107 # check bridge existance
7108 _CheckInstanceBridgesExist(self, instance, node=target_node)
7110 def Exec(self, feedback_fn):
7111 """Move an instance.
7113 The move is done by shutting it down on its present node, copying
7114 the data over (slow) and starting it on the new node.
7117 instance = self.instance
7119 source_node = instance.primary_node
7120 target_node = self.target_node
7122 self.LogInfo("Shutting down instance %s on source node %s",
7123 instance.name, source_node)
7125 assert (self.owned_locks(locking.LEVEL_NODE) ==
7126 self.owned_locks(locking.LEVEL_NODE_RES))
7128 result = self.rpc.call_instance_shutdown(source_node, instance,
7129 self.op.shutdown_timeout)
7130 msg = result.fail_msg
7132 if self.op.ignore_consistency:
7133 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7134 " Proceeding anyway. Please make sure node"
7135 " %s is down. Error details: %s",
7136 instance.name, source_node, source_node, msg)
7138 raise errors.OpExecError("Could not shutdown instance %s on"
7140 (instance.name, source_node, msg))
7142 # create the target disks
7144 _CreateDisks(self, instance, target_node=target_node)
7145 except errors.OpExecError:
7146 self.LogWarning("Device creation failed, reverting...")
7148 _RemoveDisks(self, instance, target_node=target_node)
7150 self.cfg.ReleaseDRBDMinors(instance.name)
7153 cluster_name = self.cfg.GetClusterInfo().cluster_name
7156 # activate, get path, copy the data over
7157 for idx, disk in enumerate(instance.disks):
7158 self.LogInfo("Copying data for disk %d", idx)
7159 result = self.rpc.call_blockdev_assemble(target_node, disk,
7160 instance.name, True, idx)
7162 self.LogWarning("Can't assemble newly created disk %d: %s",
7163 idx, result.fail_msg)
7164 errs.append(result.fail_msg)
7166 dev_path = result.payload
7167 result = self.rpc.call_blockdev_export(source_node, disk,
7168 target_node, dev_path,
7171 self.LogWarning("Can't copy data over for disk %d: %s",
7172 idx, result.fail_msg)
7173 errs.append(result.fail_msg)
7177 self.LogWarning("Some disks failed to copy, aborting")
7179 _RemoveDisks(self, instance, target_node=target_node)
7181 self.cfg.ReleaseDRBDMinors(instance.name)
7182 raise errors.OpExecError("Errors during disk copy: %s" %
7185 instance.primary_node = target_node
7186 self.cfg.Update(instance, feedback_fn)
7188 self.LogInfo("Removing the disks on the original node")
7189 _RemoveDisks(self, instance, target_node=source_node)
7191 # Only start the instance if it's marked as up
7192 if instance.admin_state == constants.ADMINST_UP:
7193 self.LogInfo("Starting instance %s on node %s",
7194 instance.name, target_node)
7196 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7197 ignore_secondaries=True)
7199 _ShutdownInstanceDisks(self, instance)
7200 raise errors.OpExecError("Can't activate the instance's disks")
7202 result = self.rpc.call_instance_start(target_node,
7203 (instance, None, None), False)
7204 msg = result.fail_msg
7206 _ShutdownInstanceDisks(self, instance)
7207 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7208 (instance.name, target_node, msg))
7211 class LUNodeMigrate(LogicalUnit):
7212 """Migrate all instances from a node.
7215 HPATH = "node-migrate"
7216 HTYPE = constants.HTYPE_NODE
7219 def CheckArguments(self):
7222 def ExpandNames(self):
7223 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7225 self.share_locks = _ShareAll()
7226 self.needed_locks = {
7227 locking.LEVEL_NODE: [self.op.node_name],
7230 def BuildHooksEnv(self):
7233 This runs on the master, the primary and all the secondaries.
7237 "NODE_NAME": self.op.node_name,
7240 def BuildHooksNodes(self):
7241 """Build hooks nodes.
7244 nl = [self.cfg.GetMasterNode()]
7247 def CheckPrereq(self):
7250 def Exec(self, feedback_fn):
7251 # Prepare jobs for migration instances
7253 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7256 iallocator=self.op.iallocator,
7257 target_node=self.op.target_node)]
7258 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7261 # TODO: Run iallocator in this opcode and pass correct placement options to
7262 # OpInstanceMigrate. Since other jobs can modify the cluster between
7263 # running the iallocator and the actual migration, a good consistency model
7264 # will have to be found.
7266 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7267 frozenset([self.op.node_name]))
7269 return ResultWithJobs(jobs)
7272 class TLMigrateInstance(Tasklet):
7273 """Tasklet class for instance migration.
7276 @ivar live: whether the migration will be done live or non-live;
7277 this variable is initalized only after CheckPrereq has run
7278 @type cleanup: boolean
7279 @ivar cleanup: Wheater we cleanup from a failed migration
7280 @type iallocator: string
7281 @ivar iallocator: The iallocator used to determine target_node
7282 @type target_node: string
7283 @ivar target_node: If given, the target_node to reallocate the instance to
7284 @type failover: boolean
7285 @ivar failover: Whether operation results in failover or migration
7286 @type fallback: boolean
7287 @ivar fallback: Whether fallback to failover is allowed if migration not
7289 @type ignore_consistency: boolean
7290 @ivar ignore_consistency: Wheter we should ignore consistency between source
7292 @type shutdown_timeout: int
7293 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7298 _MIGRATION_POLL_INTERVAL = 1 # seconds
7299 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7301 def __init__(self, lu, instance_name, cleanup=False,
7302 failover=False, fallback=False,
7303 ignore_consistency=False,
7304 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7305 """Initializes this class.
7308 Tasklet.__init__(self, lu)
7311 self.instance_name = instance_name
7312 self.cleanup = cleanup
7313 self.live = False # will be overridden later
7314 self.failover = failover
7315 self.fallback = fallback
7316 self.ignore_consistency = ignore_consistency
7317 self.shutdown_timeout = shutdown_timeout
7319 def CheckPrereq(self):
7320 """Check prerequisites.
7322 This checks that the instance is in the cluster.
7325 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7326 instance = self.cfg.GetInstanceInfo(instance_name)
7327 assert instance is not None
7328 self.instance = instance
7330 if (not self.cleanup and
7331 not instance.admin_state == constants.ADMINST_UP and
7332 not self.failover and self.fallback):
7333 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7334 " switching to failover")
7335 self.failover = True
7337 if instance.disk_template not in constants.DTS_MIRRORED:
7342 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7343 " %s" % (instance.disk_template, text),
7346 if instance.disk_template in constants.DTS_EXT_MIRROR:
7347 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7349 if self.lu.op.iallocator:
7350 self._RunAllocator()
7352 # We set set self.target_node as it is required by
7354 self.target_node = self.lu.op.target_node
7356 # self.target_node is already populated, either directly or by the
7358 target_node = self.target_node
7359 if self.target_node == instance.primary_node:
7360 raise errors.OpPrereqError("Cannot migrate instance %s"
7361 " to its primary (%s)" %
7362 (instance.name, instance.primary_node))
7364 if len(self.lu.tasklets) == 1:
7365 # It is safe to release locks only when we're the only tasklet
7367 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7368 keep=[instance.primary_node, self.target_node])
7371 secondary_nodes = instance.secondary_nodes
7372 if not secondary_nodes:
7373 raise errors.ConfigurationError("No secondary node but using"
7374 " %s disk template" %
7375 instance.disk_template)
7376 target_node = secondary_nodes[0]
7377 if self.lu.op.iallocator or (self.lu.op.target_node and
7378 self.lu.op.target_node != target_node):
7380 text = "failed over"
7383 raise errors.OpPrereqError("Instances with disk template %s cannot"
7384 " be %s to arbitrary nodes"
7385 " (neither an iallocator nor a target"
7386 " node can be passed)" %
7387 (instance.disk_template, text),
7390 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7392 # check memory requirements on the secondary node
7393 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7394 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7395 instance.name, i_be[constants.BE_MEMORY],
7396 instance.hypervisor)
7398 self.lu.LogInfo("Not checking memory on the secondary node as"
7399 " instance will not be started")
7401 # check bridge existance
7402 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7404 if not self.cleanup:
7405 _CheckNodeNotDrained(self.lu, target_node)
7406 if not self.failover:
7407 result = self.rpc.call_instance_migratable(instance.primary_node,
7409 if result.fail_msg and self.fallback:
7410 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7412 self.failover = True
7414 result.Raise("Can't migrate, please use failover",
7415 prereq=True, ecode=errors.ECODE_STATE)
7417 assert not (self.failover and self.cleanup)
7419 if not self.failover:
7420 if self.lu.op.live is not None and self.lu.op.mode is not None:
7421 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7422 " parameters are accepted",
7424 if self.lu.op.live is not None:
7426 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7428 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7429 # reset the 'live' parameter to None so that repeated
7430 # invocations of CheckPrereq do not raise an exception
7431 self.lu.op.live = None
7432 elif self.lu.op.mode is None:
7433 # read the default value from the hypervisor
7434 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7436 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7438 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7440 # Failover is never live
7443 def _RunAllocator(self):
7444 """Run the allocator based on input opcode.
7447 ial = IAllocator(self.cfg, self.rpc,
7448 mode=constants.IALLOCATOR_MODE_RELOC,
7449 name=self.instance_name,
7450 # TODO See why hail breaks with a single node below
7451 relocate_from=[self.instance.primary_node,
7452 self.instance.primary_node],
7455 ial.Run(self.lu.op.iallocator)
7458 raise errors.OpPrereqError("Can't compute nodes using"
7459 " iallocator '%s': %s" %
7460 (self.lu.op.iallocator, ial.info),
7462 if len(ial.result) != ial.required_nodes:
7463 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7464 " of nodes (%s), required %s" %
7465 (self.lu.op.iallocator, len(ial.result),
7466 ial.required_nodes), errors.ECODE_FAULT)
7467 self.target_node = ial.result[0]
7468 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7469 self.instance_name, self.lu.op.iallocator,
7470 utils.CommaJoin(ial.result))
7472 def _WaitUntilSync(self):
7473 """Poll with custom rpc for disk sync.
7475 This uses our own step-based rpc call.
7478 self.feedback_fn("* wait until resync is done")
7482 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7484 self.instance.disks)
7486 for node, nres in result.items():
7487 nres.Raise("Cannot resync disks on node %s" % node)
7488 node_done, node_percent = nres.payload
7489 all_done = all_done and node_done
7490 if node_percent is not None:
7491 min_percent = min(min_percent, node_percent)
7493 if min_percent < 100:
7494 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7497 def _EnsureSecondary(self, node):
7498 """Demote a node to secondary.
7501 self.feedback_fn("* switching node %s to secondary mode" % node)
7503 for dev in self.instance.disks:
7504 self.cfg.SetDiskID(dev, node)
7506 result = self.rpc.call_blockdev_close(node, self.instance.name,
7507 self.instance.disks)
7508 result.Raise("Cannot change disk to secondary on node %s" % node)
7510 def _GoStandalone(self):
7511 """Disconnect from the network.
7514 self.feedback_fn("* changing into standalone mode")
7515 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7516 self.instance.disks)
7517 for node, nres in result.items():
7518 nres.Raise("Cannot disconnect disks node %s" % node)
7520 def _GoReconnect(self, multimaster):
7521 """Reconnect to the network.
7527 msg = "single-master"
7528 self.feedback_fn("* changing disks into %s mode" % msg)
7529 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7530 self.instance.disks,
7531 self.instance.name, multimaster)
7532 for node, nres in result.items():
7533 nres.Raise("Cannot change disks config on node %s" % node)
7535 def _ExecCleanup(self):
7536 """Try to cleanup after a failed migration.
7538 The cleanup is done by:
7539 - check that the instance is running only on one node
7540 (and update the config if needed)
7541 - change disks on its secondary node to secondary
7542 - wait until disks are fully synchronized
7543 - disconnect from the network
7544 - change disks into single-master mode
7545 - wait again until disks are fully synchronized
7548 instance = self.instance
7549 target_node = self.target_node
7550 source_node = self.source_node
7552 # check running on only one node
7553 self.feedback_fn("* checking where the instance actually runs"
7554 " (if this hangs, the hypervisor might be in"
7556 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7557 for node, result in ins_l.items():
7558 result.Raise("Can't contact node %s" % node)
7560 runningon_source = instance.name in ins_l[source_node].payload
7561 runningon_target = instance.name in ins_l[target_node].payload
7563 if runningon_source and runningon_target:
7564 raise errors.OpExecError("Instance seems to be running on two nodes,"
7565 " or the hypervisor is confused; you will have"
7566 " to ensure manually that it runs only on one"
7567 " and restart this operation")
7569 if not (runningon_source or runningon_target):
7570 raise errors.OpExecError("Instance does not seem to be running at all;"
7571 " in this case it's safer to repair by"
7572 " running 'gnt-instance stop' to ensure disk"
7573 " shutdown, and then restarting it")
7575 if runningon_target:
7576 # the migration has actually succeeded, we need to update the config
7577 self.feedback_fn("* instance running on secondary node (%s),"
7578 " updating config" % target_node)
7579 instance.primary_node = target_node
7580 self.cfg.Update(instance, self.feedback_fn)
7581 demoted_node = source_node
7583 self.feedback_fn("* instance confirmed to be running on its"
7584 " primary node (%s)" % source_node)
7585 demoted_node = target_node
7587 if instance.disk_template in constants.DTS_INT_MIRROR:
7588 self._EnsureSecondary(demoted_node)
7590 self._WaitUntilSync()
7591 except errors.OpExecError:
7592 # we ignore here errors, since if the device is standalone, it
7593 # won't be able to sync
7595 self._GoStandalone()
7596 self._GoReconnect(False)
7597 self._WaitUntilSync()
7599 self.feedback_fn("* done")
7601 def _RevertDiskStatus(self):
7602 """Try to revert the disk status after a failed migration.
7605 target_node = self.target_node
7606 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7610 self._EnsureSecondary(target_node)
7611 self._GoStandalone()
7612 self._GoReconnect(False)
7613 self._WaitUntilSync()
7614 except errors.OpExecError, err:
7615 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7616 " please try to recover the instance manually;"
7617 " error '%s'" % str(err))
7619 def _AbortMigration(self):
7620 """Call the hypervisor code to abort a started migration.
7623 instance = self.instance
7624 target_node = self.target_node
7625 source_node = self.source_node
7626 migration_info = self.migration_info
7628 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7632 abort_msg = abort_result.fail_msg
7634 logging.error("Aborting migration failed on target node %s: %s",
7635 target_node, abort_msg)
7636 # Don't raise an exception here, as we stil have to try to revert the
7637 # disk status, even if this step failed.
7639 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7640 instance, False, self.live)
7641 abort_msg = abort_result.fail_msg
7643 logging.error("Aborting migration failed on source node %s: %s",
7644 source_node, abort_msg)
7646 def _ExecMigration(self):
7647 """Migrate an instance.
7649 The migrate is done by:
7650 - change the disks into dual-master mode
7651 - wait until disks are fully synchronized again
7652 - migrate the instance
7653 - change disks on the new secondary node (the old primary) to secondary
7654 - wait until disks are fully synchronized
7655 - change disks into single-master mode
7658 instance = self.instance
7659 target_node = self.target_node
7660 source_node = self.source_node
7662 # Check for hypervisor version mismatch and warn the user.
7663 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7664 None, self.instance.hypervisor)
7665 src_info = nodeinfo[source_node]
7666 dst_info = nodeinfo[target_node]
7668 if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7669 (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7670 src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7671 dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7672 if src_version != dst_version:
7673 self.feedback_fn("* warning: hypervisor version mismatch between"
7674 " source (%s) and target (%s) node" %
7675 (src_version, dst_version))
7677 self.feedback_fn("* checking disk consistency between source and target")
7678 for dev in instance.disks:
7679 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7680 raise errors.OpExecError("Disk %s is degraded or not fully"
7681 " synchronized on target node,"
7682 " aborting migration" % dev.iv_name)
7684 # First get the migration information from the remote node
7685 result = self.rpc.call_migration_info(source_node, instance)
7686 msg = result.fail_msg
7688 log_err = ("Failed fetching source migration information from %s: %s" %
7690 logging.error(log_err)
7691 raise errors.OpExecError(log_err)
7693 self.migration_info = migration_info = result.payload
7695 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7696 # Then switch the disks to master/master mode
7697 self._EnsureSecondary(target_node)
7698 self._GoStandalone()
7699 self._GoReconnect(True)
7700 self._WaitUntilSync()
7702 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7703 result = self.rpc.call_accept_instance(target_node,
7706 self.nodes_ip[target_node])
7708 msg = result.fail_msg
7710 logging.error("Instance pre-migration failed, trying to revert"
7711 " disk status: %s", msg)
7712 self.feedback_fn("Pre-migration failed, aborting")
7713 self._AbortMigration()
7714 self._RevertDiskStatus()
7715 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7716 (instance.name, msg))
7718 self.feedback_fn("* migrating instance to %s" % target_node)
7719 result = self.rpc.call_instance_migrate(source_node, instance,
7720 self.nodes_ip[target_node],
7722 msg = result.fail_msg
7724 logging.error("Instance migration failed, trying to revert"
7725 " disk status: %s", msg)
7726 self.feedback_fn("Migration failed, aborting")
7727 self._AbortMigration()
7728 self._RevertDiskStatus()
7729 raise errors.OpExecError("Could not migrate instance %s: %s" %
7730 (instance.name, msg))
7732 self.feedback_fn("* starting memory transfer")
7733 last_feedback = time.time()
7735 result = self.rpc.call_instance_get_migration_status(source_node,
7737 msg = result.fail_msg
7738 ms = result.payload # MigrationStatus instance
7739 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7740 logging.error("Instance migration failed, trying to revert"
7741 " disk status: %s", msg)
7742 self.feedback_fn("Migration failed, aborting")
7743 self._AbortMigration()
7744 self._RevertDiskStatus()
7745 raise errors.OpExecError("Could not migrate instance %s: %s" %
7746 (instance.name, msg))
7748 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7749 self.feedback_fn("* memory transfer complete")
7752 if (utils.TimeoutExpired(last_feedback,
7753 self._MIGRATION_FEEDBACK_INTERVAL) and
7754 ms.transferred_ram is not None):
7755 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7756 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7757 last_feedback = time.time()
7759 time.sleep(self._MIGRATION_POLL_INTERVAL)
7761 result = self.rpc.call_instance_finalize_migration_src(source_node,
7765 msg = result.fail_msg
7767 logging.error("Instance migration succeeded, but finalization failed"
7768 " on the source node: %s", msg)
7769 raise errors.OpExecError("Could not finalize instance migration: %s" %
7772 instance.primary_node = target_node
7774 # distribute new instance config to the other nodes
7775 self.cfg.Update(instance, self.feedback_fn)
7777 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7781 msg = result.fail_msg
7783 logging.error("Instance migration succeeded, but finalization failed"
7784 " on the target node: %s", msg)
7785 raise errors.OpExecError("Could not finalize instance migration: %s" %
7788 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7789 self._EnsureSecondary(source_node)
7790 self._WaitUntilSync()
7791 self._GoStandalone()
7792 self._GoReconnect(False)
7793 self._WaitUntilSync()
7795 self.feedback_fn("* done")
7797 def _ExecFailover(self):
7798 """Failover an instance.
7800 The failover is done by shutting it down on its present node and
7801 starting it on the secondary.
7804 instance = self.instance
7805 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7807 source_node = instance.primary_node
7808 target_node = self.target_node
7810 if instance.admin_state == constants.ADMINST_UP:
7811 self.feedback_fn("* checking disk consistency between source and target")
7812 for dev in instance.disks:
7813 # for drbd, these are drbd over lvm
7814 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7815 if primary_node.offline:
7816 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7818 (primary_node.name, dev.iv_name, target_node))
7819 elif not self.ignore_consistency:
7820 raise errors.OpExecError("Disk %s is degraded on target node,"
7821 " aborting failover" % dev.iv_name)
7823 self.feedback_fn("* not checking disk consistency as instance is not"
7826 self.feedback_fn("* shutting down instance on source node")
7827 logging.info("Shutting down instance %s on node %s",
7828 instance.name, source_node)
7830 result = self.rpc.call_instance_shutdown(source_node, instance,
7831 self.shutdown_timeout)
7832 msg = result.fail_msg
7834 if self.ignore_consistency or primary_node.offline:
7835 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7836 " proceeding anyway; please make sure node"
7837 " %s is down; error details: %s",
7838 instance.name, source_node, source_node, msg)
7840 raise errors.OpExecError("Could not shutdown instance %s on"
7842 (instance.name, source_node, msg))
7844 self.feedback_fn("* deactivating the instance's disks on source node")
7845 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7846 raise errors.OpExecError("Can't shut down the instance's disks")
7848 instance.primary_node = target_node
7849 # distribute new instance config to the other nodes
7850 self.cfg.Update(instance, self.feedback_fn)
7852 # Only start the instance if it's marked as up
7853 if instance.admin_state == constants.ADMINST_UP:
7854 self.feedback_fn("* activating the instance's disks on target node %s" %
7856 logging.info("Starting instance %s on node %s",
7857 instance.name, target_node)
7859 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7860 ignore_secondaries=True)
7862 _ShutdownInstanceDisks(self.lu, instance)
7863 raise errors.OpExecError("Can't activate the instance's disks")
7865 self.feedback_fn("* starting the instance on the target node %s" %
7867 result = self.rpc.call_instance_start(target_node, (instance, None, None),
7869 msg = result.fail_msg
7871 _ShutdownInstanceDisks(self.lu, instance)
7872 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7873 (instance.name, target_node, msg))
7875 def Exec(self, feedback_fn):
7876 """Perform the migration.
7879 self.feedback_fn = feedback_fn
7880 self.source_node = self.instance.primary_node
7882 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7883 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7884 self.target_node = self.instance.secondary_nodes[0]
7885 # Otherwise self.target_node has been populated either
7886 # directly, or through an iallocator.
7888 self.all_nodes = [self.source_node, self.target_node]
7889 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7890 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7893 feedback_fn("Failover instance %s" % self.instance.name)
7894 self._ExecFailover()
7896 feedback_fn("Migrating instance %s" % self.instance.name)
7899 return self._ExecCleanup()
7901 return self._ExecMigration()
7904 def _CreateBlockDev(lu, node, instance, device, force_create,
7906 """Create a tree of block devices on a given node.
7908 If this device type has to be created on secondaries, create it and
7911 If not, just recurse to children keeping the same 'force' value.
7913 @param lu: the lu on whose behalf we execute
7914 @param node: the node on which to create the device
7915 @type instance: L{objects.Instance}
7916 @param instance: the instance which owns the device
7917 @type device: L{objects.Disk}
7918 @param device: the device to create
7919 @type force_create: boolean
7920 @param force_create: whether to force creation of this device; this
7921 will be change to True whenever we find a device which has
7922 CreateOnSecondary() attribute
7923 @param info: the extra 'metadata' we should attach to the device
7924 (this will be represented as a LVM tag)
7925 @type force_open: boolean
7926 @param force_open: this parameter will be passes to the
7927 L{backend.BlockdevCreate} function where it specifies
7928 whether we run on primary or not, and it affects both
7929 the child assembly and the device own Open() execution
7932 if device.CreateOnSecondary():
7936 for child in device.children:
7937 _CreateBlockDev(lu, node, instance, child, force_create,
7940 if not force_create:
7943 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7946 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7947 """Create a single block device on a given node.
7949 This will not recurse over children of the device, so they must be
7952 @param lu: the lu on whose behalf we execute
7953 @param node: the node on which to create the device
7954 @type instance: L{objects.Instance}
7955 @param instance: the instance which owns the device
7956 @type device: L{objects.Disk}
7957 @param device: the device to create
7958 @param info: the extra 'metadata' we should attach to the device
7959 (this will be represented as a LVM tag)
7960 @type force_open: boolean
7961 @param force_open: this parameter will be passes to the
7962 L{backend.BlockdevCreate} function where it specifies
7963 whether we run on primary or not, and it affects both
7964 the child assembly and the device own Open() execution
7967 lu.cfg.SetDiskID(device, node)
7968 result = lu.rpc.call_blockdev_create(node, device, device.size,
7969 instance.name, force_open, info)
7970 result.Raise("Can't create block device %s on"
7971 " node %s for instance %s" % (device, node, instance.name))
7972 if device.physical_id is None:
7973 device.physical_id = result.payload
7976 def _GenerateUniqueNames(lu, exts):
7977 """Generate a suitable LV name.
7979 This will generate a logical volume name for the given instance.
7984 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7985 results.append("%s%s" % (new_id, val))
7989 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7990 iv_name, p_minor, s_minor):
7991 """Generate a drbd8 device complete with its children.
7994 assert len(vgnames) == len(names) == 2
7995 port = lu.cfg.AllocatePort()
7996 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7997 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7998 logical_id=(vgnames[0], names[0]))
7999 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8000 logical_id=(vgnames[1], names[1]))
8001 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8002 logical_id=(primary, secondary, port,
8005 children=[dev_data, dev_meta],
8010 def _GenerateDiskTemplate(lu, template_name,
8011 instance_name, primary_node,
8012 secondary_nodes, disk_info,
8013 file_storage_dir, file_driver,
8014 base_index, feedback_fn):
8015 """Generate the entire disk layout for a given template type.
8018 #TODO: compute space requirements
8020 vgname = lu.cfg.GetVGName()
8021 disk_count = len(disk_info)
8023 if template_name == constants.DT_DISKLESS:
8025 elif template_name == constants.DT_PLAIN:
8026 if len(secondary_nodes) != 0:
8027 raise errors.ProgrammerError("Wrong template configuration")
8029 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8030 for i in range(disk_count)])
8031 for idx, disk in enumerate(disk_info):
8032 disk_index = idx + base_index
8033 vg = disk.get(constants.IDISK_VG, vgname)
8034 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8035 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8036 size=disk[constants.IDISK_SIZE],
8037 logical_id=(vg, names[idx]),
8038 iv_name="disk/%d" % disk_index,
8039 mode=disk[constants.IDISK_MODE])
8040 disks.append(disk_dev)
8041 elif template_name == constants.DT_DRBD8:
8042 if len(secondary_nodes) != 1:
8043 raise errors.ProgrammerError("Wrong template configuration")
8044 remote_node = secondary_nodes[0]
8045 minors = lu.cfg.AllocateDRBDMinor(
8046 [primary_node, remote_node] * len(disk_info), instance_name)
8049 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8050 for i in range(disk_count)]):
8051 names.append(lv_prefix + "_data")
8052 names.append(lv_prefix + "_meta")
8053 for idx, disk in enumerate(disk_info):
8054 disk_index = idx + base_index
8055 data_vg = disk.get(constants.IDISK_VG, vgname)
8056 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8057 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8058 disk[constants.IDISK_SIZE],
8060 names[idx * 2:idx * 2 + 2],
8061 "disk/%d" % disk_index,
8062 minors[idx * 2], minors[idx * 2 + 1])
8063 disk_dev.mode = disk[constants.IDISK_MODE]
8064 disks.append(disk_dev)
8065 elif template_name == constants.DT_FILE:
8066 if len(secondary_nodes) != 0:
8067 raise errors.ProgrammerError("Wrong template configuration")
8069 opcodes.RequireFileStorage()
8071 for idx, disk in enumerate(disk_info):
8072 disk_index = idx + base_index
8073 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8074 size=disk[constants.IDISK_SIZE],
8075 iv_name="disk/%d" % disk_index,
8076 logical_id=(file_driver,
8077 "%s/disk%d" % (file_storage_dir,
8079 mode=disk[constants.IDISK_MODE])
8080 disks.append(disk_dev)
8081 elif template_name == constants.DT_SHARED_FILE:
8082 if len(secondary_nodes) != 0:
8083 raise errors.ProgrammerError("Wrong template configuration")
8085 opcodes.RequireSharedFileStorage()
8087 for idx, disk in enumerate(disk_info):
8088 disk_index = idx + base_index
8089 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8090 size=disk[constants.IDISK_SIZE],
8091 iv_name="disk/%d" % disk_index,
8092 logical_id=(file_driver,
8093 "%s/disk%d" % (file_storage_dir,
8095 mode=disk[constants.IDISK_MODE])
8096 disks.append(disk_dev)
8097 elif template_name == constants.DT_BLOCK:
8098 if len(secondary_nodes) != 0:
8099 raise errors.ProgrammerError("Wrong template configuration")
8101 for idx, disk in enumerate(disk_info):
8102 disk_index = idx + base_index
8103 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8104 size=disk[constants.IDISK_SIZE],
8105 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8106 disk[constants.IDISK_ADOPT]),
8107 iv_name="disk/%d" % disk_index,
8108 mode=disk[constants.IDISK_MODE])
8109 disks.append(disk_dev)
8112 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8116 def _GetInstanceInfoText(instance):
8117 """Compute that text that should be added to the disk's metadata.
8120 return "originstname+%s" % instance.name
8123 def _CalcEta(time_taken, written, total_size):
8124 """Calculates the ETA based on size written and total size.
8126 @param time_taken: The time taken so far
8127 @param written: amount written so far
8128 @param total_size: The total size of data to be written
8129 @return: The remaining time in seconds
8132 avg_time = time_taken / float(written)
8133 return (total_size - written) * avg_time
8136 def _WipeDisks(lu, instance):
8137 """Wipes instance disks.
8139 @type lu: L{LogicalUnit}
8140 @param lu: the logical unit on whose behalf we execute
8141 @type instance: L{objects.Instance}
8142 @param instance: the instance whose disks we should create
8143 @return: the success of the wipe
8146 node = instance.primary_node
8148 for device in instance.disks:
8149 lu.cfg.SetDiskID(device, node)
8151 logging.info("Pause sync of instance %s disks", instance.name)
8152 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8154 for idx, success in enumerate(result.payload):
8156 logging.warn("pause-sync of instance %s for disks %d failed",
8160 for idx, device in enumerate(instance.disks):
8161 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8162 # MAX_WIPE_CHUNK at max
8163 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8164 constants.MIN_WIPE_CHUNK_PERCENT)
8165 # we _must_ make this an int, otherwise rounding errors will
8167 wipe_chunk_size = int(wipe_chunk_size)
8169 lu.LogInfo("* Wiping disk %d", idx)
8170 logging.info("Wiping disk %d for instance %s, node %s using"
8171 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8176 start_time = time.time()
8178 while offset < size:
8179 wipe_size = min(wipe_chunk_size, size - offset)
8180 logging.debug("Wiping disk %d, offset %s, chunk %s",
8181 idx, offset, wipe_size)
8182 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8183 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8184 (idx, offset, wipe_size))
8187 if now - last_output >= 60:
8188 eta = _CalcEta(now - start_time, offset, size)
8189 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8190 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8193 logging.info("Resume sync of instance %s disks", instance.name)
8195 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8197 for idx, success in enumerate(result.payload):
8199 lu.LogWarning("Resume sync of disk %d failed, please have a"
8200 " look at the status and troubleshoot the issue", idx)
8201 logging.warn("resume-sync of instance %s for disks %d failed",
8205 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8206 """Create all disks for an instance.
8208 This abstracts away some work from AddInstance.
8210 @type lu: L{LogicalUnit}
8211 @param lu: the logical unit on whose behalf we execute
8212 @type instance: L{objects.Instance}
8213 @param instance: the instance whose disks we should create
8215 @param to_skip: list of indices to skip
8216 @type target_node: string
8217 @param target_node: if passed, overrides the target node for creation
8219 @return: the success of the creation
8222 info = _GetInstanceInfoText(instance)
8223 if target_node is None:
8224 pnode = instance.primary_node
8225 all_nodes = instance.all_nodes
8230 if instance.disk_template in constants.DTS_FILEBASED:
8231 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8232 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8234 result.Raise("Failed to create directory '%s' on"
8235 " node %s" % (file_storage_dir, pnode))
8237 # Note: this needs to be kept in sync with adding of disks in
8238 # LUInstanceSetParams
8239 for idx, device in enumerate(instance.disks):
8240 if to_skip and idx in to_skip:
8242 logging.info("Creating volume %s for instance %s",
8243 device.iv_name, instance.name)
8245 for node in all_nodes:
8246 f_create = node == pnode
8247 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8250 def _RemoveDisks(lu, instance, target_node=None):
8251 """Remove all disks for an instance.
8253 This abstracts away some work from `AddInstance()` and
8254 `RemoveInstance()`. Note that in case some of the devices couldn't
8255 be removed, the removal will continue with the other ones (compare
8256 with `_CreateDisks()`).
8258 @type lu: L{LogicalUnit}
8259 @param lu: the logical unit on whose behalf we execute
8260 @type instance: L{objects.Instance}
8261 @param instance: the instance whose disks we should remove
8262 @type target_node: string
8263 @param target_node: used to override the node on which to remove the disks
8265 @return: the success of the removal
8268 logging.info("Removing block devices for instance %s", instance.name)
8271 for device in instance.disks:
8273 edata = [(target_node, device)]
8275 edata = device.ComputeNodeTree(instance.primary_node)
8276 for node, disk in edata:
8277 lu.cfg.SetDiskID(disk, node)
8278 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8280 lu.LogWarning("Could not remove block device %s on node %s,"
8281 " continuing anyway: %s", device.iv_name, node, msg)
8284 if instance.disk_template == constants.DT_FILE:
8285 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8289 tgt = instance.primary_node
8290 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8292 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8293 file_storage_dir, instance.primary_node, result.fail_msg)
8299 def _ComputeDiskSizePerVG(disk_template, disks):
8300 """Compute disk size requirements in the volume group
8303 def _compute(disks, payload):
8304 """Universal algorithm.
8309 vgs[disk[constants.IDISK_VG]] = \
8310 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8314 # Required free disk space as a function of disk and swap space
8316 constants.DT_DISKLESS: {},
8317 constants.DT_PLAIN: _compute(disks, 0),
8318 # 128 MB are added for drbd metadata for each disk
8319 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8320 constants.DT_FILE: {},
8321 constants.DT_SHARED_FILE: {},
8324 if disk_template not in req_size_dict:
8325 raise errors.ProgrammerError("Disk template '%s' size requirement"
8326 " is unknown" % disk_template)
8328 return req_size_dict[disk_template]
8331 def _ComputeDiskSize(disk_template, disks):
8332 """Compute disk size requirements in the volume group
8335 # Required free disk space as a function of disk and swap space
8337 constants.DT_DISKLESS: None,
8338 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8339 # 128 MB are added for drbd metadata for each disk
8341 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8342 constants.DT_FILE: None,
8343 constants.DT_SHARED_FILE: 0,
8344 constants.DT_BLOCK: 0,
8347 if disk_template not in req_size_dict:
8348 raise errors.ProgrammerError("Disk template '%s' size requirement"
8349 " is unknown" % disk_template)
8351 return req_size_dict[disk_template]
8354 def _FilterVmNodes(lu, nodenames):
8355 """Filters out non-vm_capable nodes from a list.
8357 @type lu: L{LogicalUnit}
8358 @param lu: the logical unit for which we check
8359 @type nodenames: list
8360 @param nodenames: the list of nodes on which we should check
8362 @return: the list of vm-capable nodes
8365 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8366 return [name for name in nodenames if name not in vm_nodes]
8369 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8370 """Hypervisor parameter validation.
8372 This function abstract the hypervisor parameter validation to be
8373 used in both instance create and instance modify.
8375 @type lu: L{LogicalUnit}
8376 @param lu: the logical unit for which we check
8377 @type nodenames: list
8378 @param nodenames: the list of nodes on which we should check
8379 @type hvname: string
8380 @param hvname: the name of the hypervisor we should use
8381 @type hvparams: dict
8382 @param hvparams: the parameters which we need to check
8383 @raise errors.OpPrereqError: if the parameters are not valid
8386 nodenames = _FilterVmNodes(lu, nodenames)
8388 cluster = lu.cfg.GetClusterInfo()
8389 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8391 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8392 for node in nodenames:
8396 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8399 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8400 """OS parameters validation.
8402 @type lu: L{LogicalUnit}
8403 @param lu: the logical unit for which we check
8404 @type required: boolean
8405 @param required: whether the validation should fail if the OS is not
8407 @type nodenames: list
8408 @param nodenames: the list of nodes on which we should check
8409 @type osname: string
8410 @param osname: the name of the hypervisor we should use
8411 @type osparams: dict
8412 @param osparams: the parameters which we need to check
8413 @raise errors.OpPrereqError: if the parameters are not valid
8416 nodenames = _FilterVmNodes(lu, nodenames)
8417 result = lu.rpc.call_os_validate(nodenames, required, osname,
8418 [constants.OS_VALIDATE_PARAMETERS],
8420 for node, nres in result.items():
8421 # we don't check for offline cases since this should be run only
8422 # against the master node and/or an instance's nodes
8423 nres.Raise("OS Parameters validation failed on node %s" % node)
8424 if not nres.payload:
8425 lu.LogInfo("OS %s not found on node %s, validation skipped",
8429 class LUInstanceCreate(LogicalUnit):
8430 """Create an instance.
8433 HPATH = "instance-add"
8434 HTYPE = constants.HTYPE_INSTANCE
8437 def CheckArguments(self):
8441 # do not require name_check to ease forward/backward compatibility
8443 if self.op.no_install and self.op.start:
8444 self.LogInfo("No-installation mode selected, disabling startup")
8445 self.op.start = False
8446 # validate/normalize the instance name
8447 self.op.instance_name = \
8448 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8450 if self.op.ip_check and not self.op.name_check:
8451 # TODO: make the ip check more flexible and not depend on the name check
8452 raise errors.OpPrereqError("Cannot do IP address check without a name"
8453 " check", errors.ECODE_INVAL)
8455 # check nics' parameter names
8456 for nic in self.op.nics:
8457 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8459 # check disks. parameter names and consistent adopt/no-adopt strategy
8460 has_adopt = has_no_adopt = False
8461 for disk in self.op.disks:
8462 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8463 if constants.IDISK_ADOPT in disk:
8467 if has_adopt and has_no_adopt:
8468 raise errors.OpPrereqError("Either all disks are adopted or none is",
8471 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8472 raise errors.OpPrereqError("Disk adoption is not supported for the"
8473 " '%s' disk template" %
8474 self.op.disk_template,
8476 if self.op.iallocator is not None:
8477 raise errors.OpPrereqError("Disk adoption not allowed with an"
8478 " iallocator script", errors.ECODE_INVAL)
8479 if self.op.mode == constants.INSTANCE_IMPORT:
8480 raise errors.OpPrereqError("Disk adoption not allowed for"
8481 " instance import", errors.ECODE_INVAL)
8483 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8484 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8485 " but no 'adopt' parameter given" %
8486 self.op.disk_template,
8489 self.adopt_disks = has_adopt
8491 # instance name verification
8492 if self.op.name_check:
8493 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8494 self.op.instance_name = self.hostname1.name
8495 # used in CheckPrereq for ip ping check
8496 self.check_ip = self.hostname1.ip
8498 self.check_ip = None
8500 # file storage checks
8501 if (self.op.file_driver and
8502 not self.op.file_driver in constants.FILE_DRIVER):
8503 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8504 self.op.file_driver, errors.ECODE_INVAL)
8506 if self.op.disk_template == constants.DT_FILE:
8507 opcodes.RequireFileStorage()
8508 elif self.op.disk_template == constants.DT_SHARED_FILE:
8509 opcodes.RequireSharedFileStorage()
8511 ### Node/iallocator related checks
8512 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8514 if self.op.pnode is not None:
8515 if self.op.disk_template in constants.DTS_INT_MIRROR:
8516 if self.op.snode is None:
8517 raise errors.OpPrereqError("The networked disk templates need"
8518 " a mirror node", errors.ECODE_INVAL)
8520 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8522 self.op.snode = None
8524 self._cds = _GetClusterDomainSecret()
8526 if self.op.mode == constants.INSTANCE_IMPORT:
8527 # On import force_variant must be True, because if we forced it at
8528 # initial install, our only chance when importing it back is that it
8530 self.op.force_variant = True
8532 if self.op.no_install:
8533 self.LogInfo("No-installation mode has no effect during import")
8535 elif self.op.mode == constants.INSTANCE_CREATE:
8536 if self.op.os_type is None:
8537 raise errors.OpPrereqError("No guest OS specified",
8539 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8540 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8541 " installation" % self.op.os_type,
8543 if self.op.disk_template is None:
8544 raise errors.OpPrereqError("No disk template specified",
8547 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8548 # Check handshake to ensure both clusters have the same domain secret
8549 src_handshake = self.op.source_handshake
8550 if not src_handshake:
8551 raise errors.OpPrereqError("Missing source handshake",
8554 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8557 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8560 # Load and check source CA
8561 self.source_x509_ca_pem = self.op.source_x509_ca
8562 if not self.source_x509_ca_pem:
8563 raise errors.OpPrereqError("Missing source X509 CA",
8567 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8569 except OpenSSL.crypto.Error, err:
8570 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8571 (err, ), errors.ECODE_INVAL)
8573 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8574 if errcode is not None:
8575 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8578 self.source_x509_ca = cert
8580 src_instance_name = self.op.source_instance_name
8581 if not src_instance_name:
8582 raise errors.OpPrereqError("Missing source instance name",
8585 self.source_instance_name = \
8586 netutils.GetHostname(name=src_instance_name).name
8589 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8590 self.op.mode, errors.ECODE_INVAL)
8592 def ExpandNames(self):
8593 """ExpandNames for CreateInstance.
8595 Figure out the right locks for instance creation.
8598 self.needed_locks = {}
8600 instance_name = self.op.instance_name
8601 # this is just a preventive check, but someone might still add this
8602 # instance in the meantime, and creation will fail at lock-add time
8603 if instance_name in self.cfg.GetInstanceList():
8604 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8605 instance_name, errors.ECODE_EXISTS)
8607 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8609 if self.op.iallocator:
8610 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8611 # specifying a group on instance creation and then selecting nodes from
8613 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8614 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8616 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8617 nodelist = [self.op.pnode]
8618 if self.op.snode is not None:
8619 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8620 nodelist.append(self.op.snode)
8621 self.needed_locks[locking.LEVEL_NODE] = nodelist
8622 # Lock resources of instance's primary and secondary nodes (copy to
8623 # prevent accidential modification)
8624 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8626 # in case of import lock the source node too
8627 if self.op.mode == constants.INSTANCE_IMPORT:
8628 src_node = self.op.src_node
8629 src_path = self.op.src_path
8631 if src_path is None:
8632 self.op.src_path = src_path = self.op.instance_name
8634 if src_node is None:
8635 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8636 self.op.src_node = None
8637 if os.path.isabs(src_path):
8638 raise errors.OpPrereqError("Importing an instance from a path"
8639 " requires a source node option",
8642 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8643 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8644 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8645 if not os.path.isabs(src_path):
8646 self.op.src_path = src_path = \
8647 utils.PathJoin(constants.EXPORT_DIR, src_path)
8649 def _RunAllocator(self):
8650 """Run the allocator based on input opcode.
8653 nics = [n.ToDict() for n in self.nics]
8654 ial = IAllocator(self.cfg, self.rpc,
8655 mode=constants.IALLOCATOR_MODE_ALLOC,
8656 name=self.op.instance_name,
8657 disk_template=self.op.disk_template,
8660 vcpus=self.be_full[constants.BE_VCPUS],
8661 memory=self.be_full[constants.BE_MEMORY],
8664 hypervisor=self.op.hypervisor,
8667 ial.Run(self.op.iallocator)
8670 raise errors.OpPrereqError("Can't compute nodes using"
8671 " iallocator '%s': %s" %
8672 (self.op.iallocator, ial.info),
8674 if len(ial.result) != ial.required_nodes:
8675 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8676 " of nodes (%s), required %s" %
8677 (self.op.iallocator, len(ial.result),
8678 ial.required_nodes), errors.ECODE_FAULT)
8679 self.op.pnode = ial.result[0]
8680 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8681 self.op.instance_name, self.op.iallocator,
8682 utils.CommaJoin(ial.result))
8683 if ial.required_nodes == 2:
8684 self.op.snode = ial.result[1]
8686 def BuildHooksEnv(self):
8689 This runs on master, primary and secondary nodes of the instance.
8693 "ADD_MODE": self.op.mode,
8695 if self.op.mode == constants.INSTANCE_IMPORT:
8696 env["SRC_NODE"] = self.op.src_node
8697 env["SRC_PATH"] = self.op.src_path
8698 env["SRC_IMAGES"] = self.src_images
8700 env.update(_BuildInstanceHookEnv(
8701 name=self.op.instance_name,
8702 primary_node=self.op.pnode,
8703 secondary_nodes=self.secondaries,
8704 status=self.op.start,
8705 os_type=self.op.os_type,
8706 memory=self.be_full[constants.BE_MEMORY],
8707 vcpus=self.be_full[constants.BE_VCPUS],
8708 nics=_NICListToTuple(self, self.nics),
8709 disk_template=self.op.disk_template,
8710 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8711 for d in self.disks],
8714 hypervisor_name=self.op.hypervisor,
8720 def BuildHooksNodes(self):
8721 """Build hooks nodes.
8724 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8727 def _ReadExportInfo(self):
8728 """Reads the export information from disk.
8730 It will override the opcode source node and path with the actual
8731 information, if these two were not specified before.
8733 @return: the export information
8736 assert self.op.mode == constants.INSTANCE_IMPORT
8738 src_node = self.op.src_node
8739 src_path = self.op.src_path
8741 if src_node is None:
8742 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8743 exp_list = self.rpc.call_export_list(locked_nodes)
8745 for node in exp_list:
8746 if exp_list[node].fail_msg:
8748 if src_path in exp_list[node].payload:
8750 self.op.src_node = src_node = node
8751 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8755 raise errors.OpPrereqError("No export found for relative path %s" %
8756 src_path, errors.ECODE_INVAL)
8758 _CheckNodeOnline(self, src_node)
8759 result = self.rpc.call_export_info(src_node, src_path)
8760 result.Raise("No export or invalid export found in dir %s" % src_path)
8762 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8763 if not export_info.has_section(constants.INISECT_EXP):
8764 raise errors.ProgrammerError("Corrupted export config",
8765 errors.ECODE_ENVIRON)
8767 ei_version = export_info.get(constants.INISECT_EXP, "version")
8768 if (int(ei_version) != constants.EXPORT_VERSION):
8769 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8770 (ei_version, constants.EXPORT_VERSION),
8771 errors.ECODE_ENVIRON)
8774 def _ReadExportParams(self, einfo):
8775 """Use export parameters as defaults.
8777 In case the opcode doesn't specify (as in override) some instance
8778 parameters, then try to use them from the export information, if
8782 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8784 if self.op.disk_template is None:
8785 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8786 self.op.disk_template = einfo.get(constants.INISECT_INS,
8788 if self.op.disk_template not in constants.DISK_TEMPLATES:
8789 raise errors.OpPrereqError("Disk template specified in configuration"
8790 " file is not one of the allowed values:"
8791 " %s" % " ".join(constants.DISK_TEMPLATES))
8793 raise errors.OpPrereqError("No disk template specified and the export"
8794 " is missing the disk_template information",
8797 if not self.op.disks:
8799 # TODO: import the disk iv_name too
8800 for idx in range(constants.MAX_DISKS):
8801 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8802 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8803 disks.append({constants.IDISK_SIZE: disk_sz})
8804 self.op.disks = disks
8805 if not disks and self.op.disk_template != constants.DT_DISKLESS:
8806 raise errors.OpPrereqError("No disk info specified and the export"
8807 " is missing the disk information",
8810 if not self.op.nics:
8812 for idx in range(constants.MAX_NICS):
8813 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8815 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8816 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8823 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8824 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8826 if (self.op.hypervisor is None and
8827 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8828 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8830 if einfo.has_section(constants.INISECT_HYP):
8831 # use the export parameters but do not override the ones
8832 # specified by the user
8833 for name, value in einfo.items(constants.INISECT_HYP):
8834 if name not in self.op.hvparams:
8835 self.op.hvparams[name] = value
8837 if einfo.has_section(constants.INISECT_BEP):
8838 # use the parameters, without overriding
8839 for name, value in einfo.items(constants.INISECT_BEP):
8840 if name not in self.op.beparams:
8841 self.op.beparams[name] = value
8843 # try to read the parameters old style, from the main section
8844 for name in constants.BES_PARAMETERS:
8845 if (name not in self.op.beparams and
8846 einfo.has_option(constants.INISECT_INS, name)):
8847 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8849 if einfo.has_section(constants.INISECT_OSP):
8850 # use the parameters, without overriding
8851 for name, value in einfo.items(constants.INISECT_OSP):
8852 if name not in self.op.osparams:
8853 self.op.osparams[name] = value
8855 def _RevertToDefaults(self, cluster):
8856 """Revert the instance parameters to the default values.
8860 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8861 for name in self.op.hvparams.keys():
8862 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8863 del self.op.hvparams[name]
8865 be_defs = cluster.SimpleFillBE({})
8866 for name in self.op.beparams.keys():
8867 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8868 del self.op.beparams[name]
8870 nic_defs = cluster.SimpleFillNIC({})
8871 for nic in self.op.nics:
8872 for name in constants.NICS_PARAMETERS:
8873 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8876 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8877 for name in self.op.osparams.keys():
8878 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8879 del self.op.osparams[name]
8881 def _CalculateFileStorageDir(self):
8882 """Calculate final instance file storage dir.
8885 # file storage dir calculation/check
8886 self.instance_file_storage_dir = None
8887 if self.op.disk_template in constants.DTS_FILEBASED:
8888 # build the full file storage dir path
8891 if self.op.disk_template == constants.DT_SHARED_FILE:
8892 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8894 get_fsd_fn = self.cfg.GetFileStorageDir
8896 cfg_storagedir = get_fsd_fn()
8897 if not cfg_storagedir:
8898 raise errors.OpPrereqError("Cluster file storage dir not defined")
8899 joinargs.append(cfg_storagedir)
8901 if self.op.file_storage_dir is not None:
8902 joinargs.append(self.op.file_storage_dir)
8904 joinargs.append(self.op.instance_name)
8906 # pylint: disable=W0142
8907 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8909 def CheckPrereq(self):
8910 """Check prerequisites.
8913 self._CalculateFileStorageDir()
8915 if self.op.mode == constants.INSTANCE_IMPORT:
8916 export_info = self._ReadExportInfo()
8917 self._ReadExportParams(export_info)
8919 if (not self.cfg.GetVGName() and
8920 self.op.disk_template not in constants.DTS_NOT_LVM):
8921 raise errors.OpPrereqError("Cluster does not support lvm-based"
8922 " instances", errors.ECODE_STATE)
8924 if (self.op.hypervisor is None or
8925 self.op.hypervisor == constants.VALUE_AUTO):
8926 self.op.hypervisor = self.cfg.GetHypervisorType()
8928 cluster = self.cfg.GetClusterInfo()
8929 enabled_hvs = cluster.enabled_hypervisors
8930 if self.op.hypervisor not in enabled_hvs:
8931 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8932 " cluster (%s)" % (self.op.hypervisor,
8933 ",".join(enabled_hvs)),
8936 # Check tag validity
8937 for tag in self.op.tags:
8938 objects.TaggableObject.ValidateTag(tag)
8940 # check hypervisor parameter syntax (locally)
8941 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8942 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8944 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8945 hv_type.CheckParameterSyntax(filled_hvp)
8946 self.hv_full = filled_hvp
8947 # check that we don't specify global parameters on an instance
8948 _CheckGlobalHvParams(self.op.hvparams)
8950 # fill and remember the beparams dict
8951 default_beparams = cluster.beparams[constants.PP_DEFAULT]
8952 for param, value in self.op.beparams.iteritems():
8953 if value == constants.VALUE_AUTO:
8954 self.op.beparams[param] = default_beparams[param]
8955 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8956 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8958 # build os parameters
8959 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8961 # now that hvp/bep are in final format, let's reset to defaults,
8963 if self.op.identify_defaults:
8964 self._RevertToDefaults(cluster)
8968 for idx, nic in enumerate(self.op.nics):
8969 nic_mode_req = nic.get(constants.INIC_MODE, None)
8970 nic_mode = nic_mode_req
8971 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8972 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8974 # in routed mode, for the first nic, the default ip is 'auto'
8975 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8976 default_ip_mode = constants.VALUE_AUTO
8978 default_ip_mode = constants.VALUE_NONE
8980 # ip validity checks
8981 ip = nic.get(constants.INIC_IP, default_ip_mode)
8982 if ip is None or ip.lower() == constants.VALUE_NONE:
8984 elif ip.lower() == constants.VALUE_AUTO:
8985 if not self.op.name_check:
8986 raise errors.OpPrereqError("IP address set to auto but name checks"
8987 " have been skipped",
8989 nic_ip = self.hostname1.ip
8991 if not netutils.IPAddress.IsValid(ip):
8992 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8996 # TODO: check the ip address for uniqueness
8997 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8998 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9001 # MAC address verification
9002 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9003 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9004 mac = utils.NormalizeAndValidateMac(mac)
9007 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9008 except errors.ReservationError:
9009 raise errors.OpPrereqError("MAC address %s already in use"
9010 " in cluster" % mac,
9011 errors.ECODE_NOTUNIQUE)
9013 # Build nic parameters
9014 link = nic.get(constants.INIC_LINK, None)
9015 if link == constants.VALUE_AUTO:
9016 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9019 nicparams[constants.NIC_MODE] = nic_mode
9021 nicparams[constants.NIC_LINK] = link
9023 check_params = cluster.SimpleFillNIC(nicparams)
9024 objects.NIC.CheckParameterSyntax(check_params)
9025 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9027 # disk checks/pre-build
9028 default_vg = self.cfg.GetVGName()
9030 for disk in self.op.disks:
9031 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9032 if mode not in constants.DISK_ACCESS_SET:
9033 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9034 mode, errors.ECODE_INVAL)
9035 size = disk.get(constants.IDISK_SIZE, None)
9037 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9040 except (TypeError, ValueError):
9041 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9044 data_vg = disk.get(constants.IDISK_VG, default_vg)
9046 constants.IDISK_SIZE: size,
9047 constants.IDISK_MODE: mode,
9048 constants.IDISK_VG: data_vg,
9049 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9051 if constants.IDISK_ADOPT in disk:
9052 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9053 self.disks.append(new_disk)
9055 if self.op.mode == constants.INSTANCE_IMPORT:
9057 for idx in range(len(self.disks)):
9058 option = "disk%d_dump" % idx
9059 if export_info.has_option(constants.INISECT_INS, option):
9060 # FIXME: are the old os-es, disk sizes, etc. useful?
9061 export_name = export_info.get(constants.INISECT_INS, option)
9062 image = utils.PathJoin(self.op.src_path, export_name)
9063 disk_images.append(image)
9065 disk_images.append(False)
9067 self.src_images = disk_images
9069 old_name = export_info.get(constants.INISECT_INS, "name")
9070 if self.op.instance_name == old_name:
9071 for idx, nic in enumerate(self.nics):
9072 if nic.mac == constants.VALUE_AUTO:
9073 nic_mac_ini = "nic%d_mac" % idx
9074 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9076 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9078 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9079 if self.op.ip_check:
9080 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9081 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9082 (self.check_ip, self.op.instance_name),
9083 errors.ECODE_NOTUNIQUE)
9085 #### mac address generation
9086 # By generating here the mac address both the allocator and the hooks get
9087 # the real final mac address rather than the 'auto' or 'generate' value.
9088 # There is a race condition between the generation and the instance object
9089 # creation, which means that we know the mac is valid now, but we're not
9090 # sure it will be when we actually add the instance. If things go bad
9091 # adding the instance will abort because of a duplicate mac, and the
9092 # creation job will fail.
9093 for nic in self.nics:
9094 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9095 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9099 if self.op.iallocator is not None:
9100 self._RunAllocator()
9102 #### node related checks
9104 # check primary node
9105 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9106 assert self.pnode is not None, \
9107 "Cannot retrieve locked node %s" % self.op.pnode
9109 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9110 pnode.name, errors.ECODE_STATE)
9112 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9113 pnode.name, errors.ECODE_STATE)
9114 if not pnode.vm_capable:
9115 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9116 " '%s'" % pnode.name, errors.ECODE_STATE)
9118 self.secondaries = []
9120 # mirror node verification
9121 if self.op.disk_template in constants.DTS_INT_MIRROR:
9122 if self.op.snode == pnode.name:
9123 raise errors.OpPrereqError("The secondary node cannot be the"
9124 " primary node", errors.ECODE_INVAL)
9125 _CheckNodeOnline(self, self.op.snode)
9126 _CheckNodeNotDrained(self, self.op.snode)
9127 _CheckNodeVmCapable(self, self.op.snode)
9128 self.secondaries.append(self.op.snode)
9130 nodenames = [pnode.name] + self.secondaries
9132 if not self.adopt_disks:
9133 # Check lv size requirements, if not adopting
9134 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9135 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9137 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9138 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9139 disk[constants.IDISK_ADOPT])
9140 for disk in self.disks])
9141 if len(all_lvs) != len(self.disks):
9142 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9144 for lv_name in all_lvs:
9146 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9147 # to ReserveLV uses the same syntax
9148 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9149 except errors.ReservationError:
9150 raise errors.OpPrereqError("LV named %s used by another instance" %
9151 lv_name, errors.ECODE_NOTUNIQUE)
9153 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9154 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9156 node_lvs = self.rpc.call_lv_list([pnode.name],
9157 vg_names.payload.keys())[pnode.name]
9158 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9159 node_lvs = node_lvs.payload
9161 delta = all_lvs.difference(node_lvs.keys())
9163 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9164 utils.CommaJoin(delta),
9166 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9168 raise errors.OpPrereqError("Online logical volumes found, cannot"
9169 " adopt: %s" % utils.CommaJoin(online_lvs),
9171 # update the size of disk based on what is found
9172 for dsk in self.disks:
9173 dsk[constants.IDISK_SIZE] = \
9174 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9175 dsk[constants.IDISK_ADOPT])][0]))
9177 elif self.op.disk_template == constants.DT_BLOCK:
9178 # Normalize and de-duplicate device paths
9179 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9180 for disk in self.disks])
9181 if len(all_disks) != len(self.disks):
9182 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9184 baddisks = [d for d in all_disks
9185 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9187 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9188 " cannot be adopted" %
9189 (", ".join(baddisks),
9190 constants.ADOPTABLE_BLOCKDEV_ROOT),
9193 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9194 list(all_disks))[pnode.name]
9195 node_disks.Raise("Cannot get block device information from node %s" %
9197 node_disks = node_disks.payload
9198 delta = all_disks.difference(node_disks.keys())
9200 raise errors.OpPrereqError("Missing block device(s): %s" %
9201 utils.CommaJoin(delta),
9203 for dsk in self.disks:
9204 dsk[constants.IDISK_SIZE] = \
9205 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9207 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9209 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9210 # check OS parameters (remotely)
9211 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9213 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9215 # memory check on primary node
9217 _CheckNodeFreeMemory(self, self.pnode.name,
9218 "creating instance %s" % self.op.instance_name,
9219 self.be_full[constants.BE_MEMORY],
9222 self.dry_run_result = list(nodenames)
9224 def Exec(self, feedback_fn):
9225 """Create and add the instance to the cluster.
9228 instance = self.op.instance_name
9229 pnode_name = self.pnode.name
9231 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9232 self.owned_locks(locking.LEVEL_NODE)), \
9233 "Node locks differ from node resource locks"
9235 ht_kind = self.op.hypervisor
9236 if ht_kind in constants.HTS_REQ_PORT:
9237 network_port = self.cfg.AllocatePort()
9241 disks = _GenerateDiskTemplate(self,
9242 self.op.disk_template,
9243 instance, pnode_name,
9246 self.instance_file_storage_dir,
9247 self.op.file_driver,
9251 iobj = objects.Instance(name=instance, os=self.op.os_type,
9252 primary_node=pnode_name,
9253 nics=self.nics, disks=disks,
9254 disk_template=self.op.disk_template,
9255 admin_state=constants.ADMINST_DOWN,
9256 network_port=network_port,
9257 beparams=self.op.beparams,
9258 hvparams=self.op.hvparams,
9259 hypervisor=self.op.hypervisor,
9260 osparams=self.op.osparams,
9264 for tag in self.op.tags:
9267 if self.adopt_disks:
9268 if self.op.disk_template == constants.DT_PLAIN:
9269 # rename LVs to the newly-generated names; we need to construct
9270 # 'fake' LV disks with the old data, plus the new unique_id
9271 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9273 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9274 rename_to.append(t_dsk.logical_id)
9275 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9276 self.cfg.SetDiskID(t_dsk, pnode_name)
9277 result = self.rpc.call_blockdev_rename(pnode_name,
9278 zip(tmp_disks, rename_to))
9279 result.Raise("Failed to rename adoped LVs")
9281 feedback_fn("* creating instance disks...")
9283 _CreateDisks(self, iobj)
9284 except errors.OpExecError:
9285 self.LogWarning("Device creation failed, reverting...")
9287 _RemoveDisks(self, iobj)
9289 self.cfg.ReleaseDRBDMinors(instance)
9292 feedback_fn("adding instance %s to cluster config" % instance)
9294 self.cfg.AddInstance(iobj, self.proc.GetECId())
9296 # Declare that we don't want to remove the instance lock anymore, as we've
9297 # added the instance to the config
9298 del self.remove_locks[locking.LEVEL_INSTANCE]
9300 if self.op.mode == constants.INSTANCE_IMPORT:
9301 # Release unused nodes
9302 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9305 _ReleaseLocks(self, locking.LEVEL_NODE)
9308 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9309 feedback_fn("* wiping instance disks...")
9311 _WipeDisks(self, iobj)
9312 except errors.OpExecError, err:
9313 logging.exception("Wiping disks failed")
9314 self.LogWarning("Wiping instance disks failed (%s)", err)
9318 # Something is already wrong with the disks, don't do anything else
9320 elif self.op.wait_for_sync:
9321 disk_abort = not _WaitForSync(self, iobj)
9322 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9323 # make sure the disks are not degraded (still sync-ing is ok)
9324 feedback_fn("* checking mirrors status")
9325 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9330 _RemoveDisks(self, iobj)
9331 self.cfg.RemoveInstance(iobj.name)
9332 # Make sure the instance lock gets removed
9333 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9334 raise errors.OpExecError("There are some degraded disks for"
9337 # Release all node resource locks
9338 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9340 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9341 if self.op.mode == constants.INSTANCE_CREATE:
9342 if not self.op.no_install:
9343 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9344 not self.op.wait_for_sync)
9346 feedback_fn("* pausing disk sync to install instance OS")
9347 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9349 for idx, success in enumerate(result.payload):
9351 logging.warn("pause-sync of instance %s for disk %d failed",
9354 feedback_fn("* running the instance OS create scripts...")
9355 # FIXME: pass debug option from opcode to backend
9357 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9358 self.op.debug_level)
9360 feedback_fn("* resuming disk sync")
9361 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9363 for idx, success in enumerate(result.payload):
9365 logging.warn("resume-sync of instance %s for disk %d failed",
9368 os_add_result.Raise("Could not add os for instance %s"
9369 " on node %s" % (instance, pnode_name))
9371 elif self.op.mode == constants.INSTANCE_IMPORT:
9372 feedback_fn("* running the instance OS import scripts...")
9376 for idx, image in enumerate(self.src_images):
9380 # FIXME: pass debug option from opcode to backend
9381 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9382 constants.IEIO_FILE, (image, ),
9383 constants.IEIO_SCRIPT,
9384 (iobj.disks[idx], idx),
9386 transfers.append(dt)
9389 masterd.instance.TransferInstanceData(self, feedback_fn,
9390 self.op.src_node, pnode_name,
9391 self.pnode.secondary_ip,
9393 if not compat.all(import_result):
9394 self.LogWarning("Some disks for instance %s on node %s were not"
9395 " imported successfully" % (instance, pnode_name))
9397 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9398 feedback_fn("* preparing remote import...")
9399 # The source cluster will stop the instance before attempting to make a
9400 # connection. In some cases stopping an instance can take a long time,
9401 # hence the shutdown timeout is added to the connection timeout.
9402 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9403 self.op.source_shutdown_timeout)
9404 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9406 assert iobj.primary_node == self.pnode.name
9408 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9409 self.source_x509_ca,
9410 self._cds, timeouts)
9411 if not compat.all(disk_results):
9412 # TODO: Should the instance still be started, even if some disks
9413 # failed to import (valid for local imports, too)?
9414 self.LogWarning("Some disks for instance %s on node %s were not"
9415 " imported successfully" % (instance, pnode_name))
9417 # Run rename script on newly imported instance
9418 assert iobj.name == instance
9419 feedback_fn("Running rename script for %s" % instance)
9420 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9421 self.source_instance_name,
9422 self.op.debug_level)
9424 self.LogWarning("Failed to run rename script for %s on node"
9425 " %s: %s" % (instance, pnode_name, result.fail_msg))
9428 # also checked in the prereq part
9429 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9432 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9435 iobj.admin_state = constants.ADMINST_UP
9436 self.cfg.Update(iobj, feedback_fn)
9437 logging.info("Starting instance %s on node %s", instance, pnode_name)
9438 feedback_fn("* starting instance...")
9439 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9441 result.Raise("Could not start instance")
9443 return list(iobj.all_nodes)
9446 class LUInstanceConsole(NoHooksLU):
9447 """Connect to an instance's console.
9449 This is somewhat special in that it returns the command line that
9450 you need to run on the master node in order to connect to the
9456 def ExpandNames(self):
9457 self.share_locks = _ShareAll()
9458 self._ExpandAndLockInstance()
9460 def CheckPrereq(self):
9461 """Check prerequisites.
9463 This checks that the instance is in the cluster.
9466 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9467 assert self.instance is not None, \
9468 "Cannot retrieve locked instance %s" % self.op.instance_name
9469 _CheckNodeOnline(self, self.instance.primary_node)
9471 def Exec(self, feedback_fn):
9472 """Connect to the console of an instance
9475 instance = self.instance
9476 node = instance.primary_node
9478 node_insts = self.rpc.call_instance_list([node],
9479 [instance.hypervisor])[node]
9480 node_insts.Raise("Can't get node information from %s" % node)
9482 if instance.name not in node_insts.payload:
9483 if instance.admin_state == constants.ADMINST_UP:
9484 state = constants.INSTST_ERRORDOWN
9485 elif instance.admin_state == constants.ADMINST_DOWN:
9486 state = constants.INSTST_ADMINDOWN
9488 state = constants.INSTST_ADMINOFFLINE
9489 raise errors.OpExecError("Instance %s is not running (state %s)" %
9490 (instance.name, state))
9492 logging.debug("Connecting to console of %s on %s", instance.name, node)
9494 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9497 def _GetInstanceConsole(cluster, instance):
9498 """Returns console information for an instance.
9500 @type cluster: L{objects.Cluster}
9501 @type instance: L{objects.Instance}
9505 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9506 # beparams and hvparams are passed separately, to avoid editing the
9507 # instance and then saving the defaults in the instance itself.
9508 hvparams = cluster.FillHV(instance)
9509 beparams = cluster.FillBE(instance)
9510 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9512 assert console.instance == instance.name
9513 assert console.Validate()
9515 return console.ToDict()
9518 class LUInstanceReplaceDisks(LogicalUnit):
9519 """Replace the disks of an instance.
9522 HPATH = "mirrors-replace"
9523 HTYPE = constants.HTYPE_INSTANCE
9526 def CheckArguments(self):
9527 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9530 def ExpandNames(self):
9531 self._ExpandAndLockInstance()
9533 assert locking.LEVEL_NODE not in self.needed_locks
9534 assert locking.LEVEL_NODE_RES not in self.needed_locks
9535 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9537 assert self.op.iallocator is None or self.op.remote_node is None, \
9538 "Conflicting options"
9540 if self.op.remote_node is not None:
9541 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9543 # Warning: do not remove the locking of the new secondary here
9544 # unless DRBD8.AddChildren is changed to work in parallel;
9545 # currently it doesn't since parallel invocations of
9546 # FindUnusedMinor will conflict
9547 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9548 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9550 self.needed_locks[locking.LEVEL_NODE] = []
9551 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9553 if self.op.iallocator is not None:
9554 # iallocator will select a new node in the same group
9555 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9557 self.needed_locks[locking.LEVEL_NODE_RES] = []
9559 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9560 self.op.iallocator, self.op.remote_node,
9561 self.op.disks, False, self.op.early_release)
9563 self.tasklets = [self.replacer]
9565 def DeclareLocks(self, level):
9566 if level == locking.LEVEL_NODEGROUP:
9567 assert self.op.remote_node is None
9568 assert self.op.iallocator is not None
9569 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9571 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9572 # Lock all groups used by instance optimistically; this requires going
9573 # via the node before it's locked, requiring verification later on
9574 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9575 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9577 elif level == locking.LEVEL_NODE:
9578 if self.op.iallocator is not None:
9579 assert self.op.remote_node is None
9580 assert not self.needed_locks[locking.LEVEL_NODE]
9582 # Lock member nodes of all locked groups
9583 self.needed_locks[locking.LEVEL_NODE] = [node_name
9584 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9585 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9587 self._LockInstancesNodes()
9588 elif level == locking.LEVEL_NODE_RES:
9590 self.needed_locks[locking.LEVEL_NODE_RES] = \
9591 self.needed_locks[locking.LEVEL_NODE]
9593 def BuildHooksEnv(self):
9596 This runs on the master, the primary and all the secondaries.
9599 instance = self.replacer.instance
9601 "MODE": self.op.mode,
9602 "NEW_SECONDARY": self.op.remote_node,
9603 "OLD_SECONDARY": instance.secondary_nodes[0],
9605 env.update(_BuildInstanceHookEnvByObject(self, instance))
9608 def BuildHooksNodes(self):
9609 """Build hooks nodes.
9612 instance = self.replacer.instance
9614 self.cfg.GetMasterNode(),
9615 instance.primary_node,
9617 if self.op.remote_node is not None:
9618 nl.append(self.op.remote_node)
9621 def CheckPrereq(self):
9622 """Check prerequisites.
9625 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9626 self.op.iallocator is None)
9628 # Verify if node group locks are still correct
9629 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9631 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9633 return LogicalUnit.CheckPrereq(self)
9636 class TLReplaceDisks(Tasklet):
9637 """Replaces disks for an instance.
9639 Note: Locking is not within the scope of this class.
9642 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9643 disks, delay_iallocator, early_release):
9644 """Initializes this class.
9647 Tasklet.__init__(self, lu)
9650 self.instance_name = instance_name
9652 self.iallocator_name = iallocator_name
9653 self.remote_node = remote_node
9655 self.delay_iallocator = delay_iallocator
9656 self.early_release = early_release
9659 self.instance = None
9660 self.new_node = None
9661 self.target_node = None
9662 self.other_node = None
9663 self.remote_node_info = None
9664 self.node_secondary_ip = None
9667 def CheckArguments(mode, remote_node, iallocator):
9668 """Helper function for users of this class.
9671 # check for valid parameter combination
9672 if mode == constants.REPLACE_DISK_CHG:
9673 if remote_node is None and iallocator is None:
9674 raise errors.OpPrereqError("When changing the secondary either an"
9675 " iallocator script must be used or the"
9676 " new node given", errors.ECODE_INVAL)
9678 if remote_node is not None and iallocator is not None:
9679 raise errors.OpPrereqError("Give either the iallocator or the new"
9680 " secondary, not both", errors.ECODE_INVAL)
9682 elif remote_node is not None or iallocator is not None:
9683 # Not replacing the secondary
9684 raise errors.OpPrereqError("The iallocator and new node options can"
9685 " only be used when changing the"
9686 " secondary node", errors.ECODE_INVAL)
9689 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9690 """Compute a new secondary node using an IAllocator.
9693 ial = IAllocator(lu.cfg, lu.rpc,
9694 mode=constants.IALLOCATOR_MODE_RELOC,
9696 relocate_from=list(relocate_from))
9698 ial.Run(iallocator_name)
9701 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9702 " %s" % (iallocator_name, ial.info),
9705 if len(ial.result) != ial.required_nodes:
9706 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9707 " of nodes (%s), required %s" %
9709 len(ial.result), ial.required_nodes),
9712 remote_node_name = ial.result[0]
9714 lu.LogInfo("Selected new secondary for instance '%s': %s",
9715 instance_name, remote_node_name)
9717 return remote_node_name
9719 def _FindFaultyDisks(self, node_name):
9720 """Wrapper for L{_FindFaultyInstanceDisks}.
9723 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9726 def _CheckDisksActivated(self, instance):
9727 """Checks if the instance disks are activated.
9729 @param instance: The instance to check disks
9730 @return: True if they are activated, False otherwise
9733 nodes = instance.all_nodes
9735 for idx, dev in enumerate(instance.disks):
9737 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9738 self.cfg.SetDiskID(dev, node)
9740 result = self.rpc.call_blockdev_find(node, dev)
9744 elif result.fail_msg or not result.payload:
9749 def CheckPrereq(self):
9750 """Check prerequisites.
9752 This checks that the instance is in the cluster.
9755 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9756 assert instance is not None, \
9757 "Cannot retrieve locked instance %s" % self.instance_name
9759 if instance.disk_template != constants.DT_DRBD8:
9760 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9761 " instances", errors.ECODE_INVAL)
9763 if len(instance.secondary_nodes) != 1:
9764 raise errors.OpPrereqError("The instance has a strange layout,"
9765 " expected one secondary but found %d" %
9766 len(instance.secondary_nodes),
9769 if not self.delay_iallocator:
9770 self._CheckPrereq2()
9772 def _CheckPrereq2(self):
9773 """Check prerequisites, second part.
9775 This function should always be part of CheckPrereq. It was separated and is
9776 now called from Exec because during node evacuation iallocator was only
9777 called with an unmodified cluster model, not taking planned changes into
9781 instance = self.instance
9782 secondary_node = instance.secondary_nodes[0]
9784 if self.iallocator_name is None:
9785 remote_node = self.remote_node
9787 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9788 instance.name, instance.secondary_nodes)
9790 if remote_node is None:
9791 self.remote_node_info = None
9793 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9794 "Remote node '%s' is not locked" % remote_node
9796 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9797 assert self.remote_node_info is not None, \
9798 "Cannot retrieve locked node %s" % remote_node
9800 if remote_node == self.instance.primary_node:
9801 raise errors.OpPrereqError("The specified node is the primary node of"
9802 " the instance", errors.ECODE_INVAL)
9804 if remote_node == secondary_node:
9805 raise errors.OpPrereqError("The specified node is already the"
9806 " secondary node of the instance",
9809 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9810 constants.REPLACE_DISK_CHG):
9811 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9814 if self.mode == constants.REPLACE_DISK_AUTO:
9815 if not self._CheckDisksActivated(instance):
9816 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9817 " first" % self.instance_name,
9819 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9820 faulty_secondary = self._FindFaultyDisks(secondary_node)
9822 if faulty_primary and faulty_secondary:
9823 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9824 " one node and can not be repaired"
9825 " automatically" % self.instance_name,
9829 self.disks = faulty_primary
9830 self.target_node = instance.primary_node
9831 self.other_node = secondary_node
9832 check_nodes = [self.target_node, self.other_node]
9833 elif faulty_secondary:
9834 self.disks = faulty_secondary
9835 self.target_node = secondary_node
9836 self.other_node = instance.primary_node
9837 check_nodes = [self.target_node, self.other_node]
9843 # Non-automatic modes
9844 if self.mode == constants.REPLACE_DISK_PRI:
9845 self.target_node = instance.primary_node
9846 self.other_node = secondary_node
9847 check_nodes = [self.target_node, self.other_node]
9849 elif self.mode == constants.REPLACE_DISK_SEC:
9850 self.target_node = secondary_node
9851 self.other_node = instance.primary_node
9852 check_nodes = [self.target_node, self.other_node]
9854 elif self.mode == constants.REPLACE_DISK_CHG:
9855 self.new_node = remote_node
9856 self.other_node = instance.primary_node
9857 self.target_node = secondary_node
9858 check_nodes = [self.new_node, self.other_node]
9860 _CheckNodeNotDrained(self.lu, remote_node)
9861 _CheckNodeVmCapable(self.lu, remote_node)
9863 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9864 assert old_node_info is not None
9865 if old_node_info.offline and not self.early_release:
9866 # doesn't make sense to delay the release
9867 self.early_release = True
9868 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9869 " early-release mode", secondary_node)
9872 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9875 # If not specified all disks should be replaced
9877 self.disks = range(len(self.instance.disks))
9879 for node in check_nodes:
9880 _CheckNodeOnline(self.lu, node)
9882 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9885 if node_name is not None)
9887 # Release unneeded node and node resource locks
9888 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9889 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9891 # Release any owned node group
9892 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9893 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9895 # Check whether disks are valid
9896 for disk_idx in self.disks:
9897 instance.FindDisk(disk_idx)
9899 # Get secondary node IP addresses
9900 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9901 in self.cfg.GetMultiNodeInfo(touched_nodes))
9903 def Exec(self, feedback_fn):
9904 """Execute disk replacement.
9906 This dispatches the disk replacement to the appropriate handler.
9909 if self.delay_iallocator:
9910 self._CheckPrereq2()
9913 # Verify owned locks before starting operation
9914 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9915 assert set(owned_nodes) == set(self.node_secondary_ip), \
9916 ("Incorrect node locks, owning %s, expected %s" %
9917 (owned_nodes, self.node_secondary_ip.keys()))
9918 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9919 self.lu.owned_locks(locking.LEVEL_NODE_RES))
9921 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9922 assert list(owned_instances) == [self.instance_name], \
9923 "Instance '%s' not locked" % self.instance_name
9925 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9926 "Should not own any node group lock at this point"
9929 feedback_fn("No disks need replacement")
9932 feedback_fn("Replacing disk(s) %s for %s" %
9933 (utils.CommaJoin(self.disks), self.instance.name))
9935 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9937 # Activate the instance disks if we're replacing them on a down instance
9939 _StartInstanceDisks(self.lu, self.instance, True)
9942 # Should we replace the secondary node?
9943 if self.new_node is not None:
9944 fn = self._ExecDrbd8Secondary
9946 fn = self._ExecDrbd8DiskOnly
9948 result = fn(feedback_fn)
9950 # Deactivate the instance disks if we're replacing them on a
9953 _SafeShutdownInstanceDisks(self.lu, self.instance)
9955 assert not self.lu.owned_locks(locking.LEVEL_NODE)
9958 # Verify owned locks
9959 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9960 nodes = frozenset(self.node_secondary_ip)
9961 assert ((self.early_release and not owned_nodes) or
9962 (not self.early_release and not (set(owned_nodes) - nodes))), \
9963 ("Not owning the correct locks, early_release=%s, owned=%r,"
9964 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9968 def _CheckVolumeGroup(self, nodes):
9969 self.lu.LogInfo("Checking volume groups")
9971 vgname = self.cfg.GetVGName()
9973 # Make sure volume group exists on all involved nodes
9974 results = self.rpc.call_vg_list(nodes)
9976 raise errors.OpExecError("Can't list volume groups on the nodes")
9980 res.Raise("Error checking node %s" % node)
9981 if vgname not in res.payload:
9982 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9985 def _CheckDisksExistence(self, nodes):
9986 # Check disk existence
9987 for idx, dev in enumerate(self.instance.disks):
9988 if idx not in self.disks:
9992 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9993 self.cfg.SetDiskID(dev, node)
9995 result = self.rpc.call_blockdev_find(node, dev)
9997 msg = result.fail_msg
9998 if msg or not result.payload:
10000 msg = "disk not found"
10001 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10004 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10005 for idx, dev in enumerate(self.instance.disks):
10006 if idx not in self.disks:
10009 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10012 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10014 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10015 " replace disks for instance %s" %
10016 (node_name, self.instance.name))
10018 def _CreateNewStorage(self, node_name):
10019 """Create new storage on the primary or secondary node.
10021 This is only used for same-node replaces, not for changing the
10022 secondary node, hence we don't want to modify the existing disk.
10027 for idx, dev in enumerate(self.instance.disks):
10028 if idx not in self.disks:
10031 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10033 self.cfg.SetDiskID(dev, node_name)
10035 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10036 names = _GenerateUniqueNames(self.lu, lv_names)
10038 vg_data = dev.children[0].logical_id[0]
10039 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10040 logical_id=(vg_data, names[0]))
10041 vg_meta = dev.children[1].logical_id[0]
10042 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10043 logical_id=(vg_meta, names[1]))
10045 new_lvs = [lv_data, lv_meta]
10046 old_lvs = [child.Copy() for child in dev.children]
10047 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10049 # we pass force_create=True to force the LVM creation
10050 for new_lv in new_lvs:
10051 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10052 _GetInstanceInfoText(self.instance), False)
10056 def _CheckDevices(self, node_name, iv_names):
10057 for name, (dev, _, _) in iv_names.iteritems():
10058 self.cfg.SetDiskID(dev, node_name)
10060 result = self.rpc.call_blockdev_find(node_name, dev)
10062 msg = result.fail_msg
10063 if msg or not result.payload:
10065 msg = "disk not found"
10066 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10069 if result.payload.is_degraded:
10070 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10072 def _RemoveOldStorage(self, node_name, iv_names):
10073 for name, (_, old_lvs, _) in iv_names.iteritems():
10074 self.lu.LogInfo("Remove logical volumes for %s" % name)
10077 self.cfg.SetDiskID(lv, node_name)
10079 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10081 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10082 hint="remove unused LVs manually")
10084 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10085 """Replace a disk on the primary or secondary for DRBD 8.
10087 The algorithm for replace is quite complicated:
10089 1. for each disk to be replaced:
10091 1. create new LVs on the target node with unique names
10092 1. detach old LVs from the drbd device
10093 1. rename old LVs to name_replaced.<time_t>
10094 1. rename new LVs to old LVs
10095 1. attach the new LVs (with the old names now) to the drbd device
10097 1. wait for sync across all devices
10099 1. for each modified disk:
10101 1. remove old LVs (which have the name name_replaces.<time_t>)
10103 Failures are not very well handled.
10108 # Step: check device activation
10109 self.lu.LogStep(1, steps_total, "Check device existence")
10110 self._CheckDisksExistence([self.other_node, self.target_node])
10111 self._CheckVolumeGroup([self.target_node, self.other_node])
10113 # Step: check other node consistency
10114 self.lu.LogStep(2, steps_total, "Check peer consistency")
10115 self._CheckDisksConsistency(self.other_node,
10116 self.other_node == self.instance.primary_node,
10119 # Step: create new storage
10120 self.lu.LogStep(3, steps_total, "Allocate new storage")
10121 iv_names = self._CreateNewStorage(self.target_node)
10123 # Step: for each lv, detach+rename*2+attach
10124 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10125 for dev, old_lvs, new_lvs in iv_names.itervalues():
10126 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10128 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10130 result.Raise("Can't detach drbd from local storage on node"
10131 " %s for device %s" % (self.target_node, dev.iv_name))
10133 #cfg.Update(instance)
10135 # ok, we created the new LVs, so now we know we have the needed
10136 # storage; as such, we proceed on the target node to rename
10137 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10138 # using the assumption that logical_id == physical_id (which in
10139 # turn is the unique_id on that node)
10141 # FIXME(iustin): use a better name for the replaced LVs
10142 temp_suffix = int(time.time())
10143 ren_fn = lambda d, suff: (d.physical_id[0],
10144 d.physical_id[1] + "_replaced-%s" % suff)
10146 # Build the rename list based on what LVs exist on the node
10147 rename_old_to_new = []
10148 for to_ren in old_lvs:
10149 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10150 if not result.fail_msg and result.payload:
10152 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10154 self.lu.LogInfo("Renaming the old LVs on the target node")
10155 result = self.rpc.call_blockdev_rename(self.target_node,
10157 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10159 # Now we rename the new LVs to the old LVs
10160 self.lu.LogInfo("Renaming the new LVs on the target node")
10161 rename_new_to_old = [(new, old.physical_id)
10162 for old, new in zip(old_lvs, new_lvs)]
10163 result = self.rpc.call_blockdev_rename(self.target_node,
10165 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10167 # Intermediate steps of in memory modifications
10168 for old, new in zip(old_lvs, new_lvs):
10169 new.logical_id = old.logical_id
10170 self.cfg.SetDiskID(new, self.target_node)
10172 # We need to modify old_lvs so that removal later removes the
10173 # right LVs, not the newly added ones; note that old_lvs is a
10175 for disk in old_lvs:
10176 disk.logical_id = ren_fn(disk, temp_suffix)
10177 self.cfg.SetDiskID(disk, self.target_node)
10179 # Now that the new lvs have the old name, we can add them to the device
10180 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10181 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10183 msg = result.fail_msg
10185 for new_lv in new_lvs:
10186 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10189 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10190 hint=("cleanup manually the unused logical"
10192 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10195 if self.early_release:
10196 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10198 self._RemoveOldStorage(self.target_node, iv_names)
10199 # TODO: Check if releasing locks early still makes sense
10200 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10202 # Release all resource locks except those used by the instance
10203 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10204 keep=self.node_secondary_ip.keys())
10206 # Release all node locks while waiting for sync
10207 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10209 # TODO: Can the instance lock be downgraded here? Take the optional disk
10210 # shutdown in the caller into consideration.
10213 # This can fail as the old devices are degraded and _WaitForSync
10214 # does a combined result over all disks, so we don't check its return value
10215 self.lu.LogStep(cstep, steps_total, "Sync devices")
10217 _WaitForSync(self.lu, self.instance)
10219 # Check all devices manually
10220 self._CheckDevices(self.instance.primary_node, iv_names)
10222 # Step: remove old storage
10223 if not self.early_release:
10224 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10226 self._RemoveOldStorage(self.target_node, iv_names)
10228 def _ExecDrbd8Secondary(self, feedback_fn):
10229 """Replace the secondary node for DRBD 8.
10231 The algorithm for replace is quite complicated:
10232 - for all disks of the instance:
10233 - create new LVs on the new node with same names
10234 - shutdown the drbd device on the old secondary
10235 - disconnect the drbd network on the primary
10236 - create the drbd device on the new secondary
10237 - network attach the drbd on the primary, using an artifice:
10238 the drbd code for Attach() will connect to the network if it
10239 finds a device which is connected to the good local disks but
10240 not network enabled
10241 - wait for sync across all devices
10242 - remove all disks from the old secondary
10244 Failures are not very well handled.
10249 pnode = self.instance.primary_node
10251 # Step: check device activation
10252 self.lu.LogStep(1, steps_total, "Check device existence")
10253 self._CheckDisksExistence([self.instance.primary_node])
10254 self._CheckVolumeGroup([self.instance.primary_node])
10256 # Step: check other node consistency
10257 self.lu.LogStep(2, steps_total, "Check peer consistency")
10258 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10260 # Step: create new storage
10261 self.lu.LogStep(3, steps_total, "Allocate new storage")
10262 for idx, dev in enumerate(self.instance.disks):
10263 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10264 (self.new_node, idx))
10265 # we pass force_create=True to force LVM creation
10266 for new_lv in dev.children:
10267 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10268 _GetInstanceInfoText(self.instance), False)
10270 # Step 4: dbrd minors and drbd setups changes
10271 # after this, we must manually remove the drbd minors on both the
10272 # error and the success paths
10273 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10274 minors = self.cfg.AllocateDRBDMinor([self.new_node
10275 for dev in self.instance.disks],
10276 self.instance.name)
10277 logging.debug("Allocated minors %r", minors)
10280 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10281 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10282 (self.new_node, idx))
10283 # create new devices on new_node; note that we create two IDs:
10284 # one without port, so the drbd will be activated without
10285 # networking information on the new node at this stage, and one
10286 # with network, for the latter activation in step 4
10287 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10288 if self.instance.primary_node == o_node1:
10291 assert self.instance.primary_node == o_node2, "Three-node instance?"
10294 new_alone_id = (self.instance.primary_node, self.new_node, None,
10295 p_minor, new_minor, o_secret)
10296 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10297 p_minor, new_minor, o_secret)
10299 iv_names[idx] = (dev, dev.children, new_net_id)
10300 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10302 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10303 logical_id=new_alone_id,
10304 children=dev.children,
10307 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10308 _GetInstanceInfoText(self.instance), False)
10309 except errors.GenericError:
10310 self.cfg.ReleaseDRBDMinors(self.instance.name)
10313 # We have new devices, shutdown the drbd on the old secondary
10314 for idx, dev in enumerate(self.instance.disks):
10315 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10316 self.cfg.SetDiskID(dev, self.target_node)
10317 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10319 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10320 "node: %s" % (idx, msg),
10321 hint=("Please cleanup this device manually as"
10322 " soon as possible"))
10324 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10325 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10326 self.instance.disks)[pnode]
10328 msg = result.fail_msg
10330 # detaches didn't succeed (unlikely)
10331 self.cfg.ReleaseDRBDMinors(self.instance.name)
10332 raise errors.OpExecError("Can't detach the disks from the network on"
10333 " old node: %s" % (msg,))
10335 # if we managed to detach at least one, we update all the disks of
10336 # the instance to point to the new secondary
10337 self.lu.LogInfo("Updating instance configuration")
10338 for dev, _, new_logical_id in iv_names.itervalues():
10339 dev.logical_id = new_logical_id
10340 self.cfg.SetDiskID(dev, self.instance.primary_node)
10342 self.cfg.Update(self.instance, feedback_fn)
10344 # Release all node locks (the configuration has been updated)
10345 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10347 # and now perform the drbd attach
10348 self.lu.LogInfo("Attaching primary drbds to new secondary"
10349 " (standalone => connected)")
10350 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10352 self.node_secondary_ip,
10353 self.instance.disks,
10354 self.instance.name,
10356 for to_node, to_result in result.items():
10357 msg = to_result.fail_msg
10359 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10361 hint=("please do a gnt-instance info to see the"
10362 " status of disks"))
10364 if self.early_release:
10365 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10367 self._RemoveOldStorage(self.target_node, iv_names)
10368 # TODO: Check if releasing locks early still makes sense
10369 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10371 # Release all resource locks except those used by the instance
10372 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10373 keep=self.node_secondary_ip.keys())
10375 # TODO: Can the instance lock be downgraded here? Take the optional disk
10376 # shutdown in the caller into consideration.
10379 # This can fail as the old devices are degraded and _WaitForSync
10380 # does a combined result over all disks, so we don't check its return value
10381 self.lu.LogStep(cstep, steps_total, "Sync devices")
10383 _WaitForSync(self.lu, self.instance)
10385 # Check all devices manually
10386 self._CheckDevices(self.instance.primary_node, iv_names)
10388 # Step: remove old storage
10389 if not self.early_release:
10390 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10391 self._RemoveOldStorage(self.target_node, iv_names)
10394 class LURepairNodeStorage(NoHooksLU):
10395 """Repairs the volume group on a node.
10400 def CheckArguments(self):
10401 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10403 storage_type = self.op.storage_type
10405 if (constants.SO_FIX_CONSISTENCY not in
10406 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10407 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10408 " repaired" % storage_type,
10409 errors.ECODE_INVAL)
10411 def ExpandNames(self):
10412 self.needed_locks = {
10413 locking.LEVEL_NODE: [self.op.node_name],
10416 def _CheckFaultyDisks(self, instance, node_name):
10417 """Ensure faulty disks abort the opcode or at least warn."""
10419 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10421 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10422 " node '%s'" % (instance.name, node_name),
10423 errors.ECODE_STATE)
10424 except errors.OpPrereqError, err:
10425 if self.op.ignore_consistency:
10426 self.proc.LogWarning(str(err.args[0]))
10430 def CheckPrereq(self):
10431 """Check prerequisites.
10434 # Check whether any instance on this node has faulty disks
10435 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10436 if inst.admin_state != constants.ADMINST_UP:
10438 check_nodes = set(inst.all_nodes)
10439 check_nodes.discard(self.op.node_name)
10440 for inst_node_name in check_nodes:
10441 self._CheckFaultyDisks(inst, inst_node_name)
10443 def Exec(self, feedback_fn):
10444 feedback_fn("Repairing storage unit '%s' on %s ..." %
10445 (self.op.name, self.op.node_name))
10447 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10448 result = self.rpc.call_storage_execute(self.op.node_name,
10449 self.op.storage_type, st_args,
10451 constants.SO_FIX_CONSISTENCY)
10452 result.Raise("Failed to repair storage unit '%s' on %s" %
10453 (self.op.name, self.op.node_name))
10456 class LUNodeEvacuate(NoHooksLU):
10457 """Evacuates instances off a list of nodes.
10462 def CheckArguments(self):
10463 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10465 def ExpandNames(self):
10466 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10468 if self.op.remote_node is not None:
10469 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10470 assert self.op.remote_node
10472 if self.op.remote_node == self.op.node_name:
10473 raise errors.OpPrereqError("Can not use evacuated node as a new"
10474 " secondary node", errors.ECODE_INVAL)
10476 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10477 raise errors.OpPrereqError("Without the use of an iallocator only"
10478 " secondary instances can be evacuated",
10479 errors.ECODE_INVAL)
10482 self.share_locks = _ShareAll()
10483 self.needed_locks = {
10484 locking.LEVEL_INSTANCE: [],
10485 locking.LEVEL_NODEGROUP: [],
10486 locking.LEVEL_NODE: [],
10489 if self.op.remote_node is None:
10490 # Iallocator will choose any node(s) in the same group
10491 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10493 group_nodes = frozenset([self.op.remote_node])
10495 # Determine nodes to be locked
10496 self.lock_nodes = set([self.op.node_name]) | group_nodes
10498 def _DetermineInstances(self):
10499 """Builds list of instances to operate on.
10502 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10504 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10505 # Primary instances only
10506 inst_fn = _GetNodePrimaryInstances
10507 assert self.op.remote_node is None, \
10508 "Evacuating primary instances requires iallocator"
10509 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10510 # Secondary instances only
10511 inst_fn = _GetNodeSecondaryInstances
10514 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10515 inst_fn = _GetNodeInstances
10517 return inst_fn(self.cfg, self.op.node_name)
10519 def DeclareLocks(self, level):
10520 if level == locking.LEVEL_INSTANCE:
10521 # Lock instances optimistically, needs verification once node and group
10522 # locks have been acquired
10523 self.needed_locks[locking.LEVEL_INSTANCE] = \
10524 set(i.name for i in self._DetermineInstances())
10526 elif level == locking.LEVEL_NODEGROUP:
10527 # Lock node groups optimistically, needs verification once nodes have
10529 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10530 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10532 elif level == locking.LEVEL_NODE:
10533 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10535 def CheckPrereq(self):
10537 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10538 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10539 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10541 assert owned_nodes == self.lock_nodes
10543 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10544 if owned_groups != wanted_groups:
10545 raise errors.OpExecError("Node groups changed since locks were acquired,"
10546 " current groups are '%s', used to be '%s'" %
10547 (utils.CommaJoin(wanted_groups),
10548 utils.CommaJoin(owned_groups)))
10550 # Determine affected instances
10551 self.instances = self._DetermineInstances()
10552 self.instance_names = [i.name for i in self.instances]
10554 if set(self.instance_names) != owned_instances:
10555 raise errors.OpExecError("Instances on node '%s' changed since locks"
10556 " were acquired, current instances are '%s',"
10557 " used to be '%s'" %
10558 (self.op.node_name,
10559 utils.CommaJoin(self.instance_names),
10560 utils.CommaJoin(owned_instances)))
10562 if self.instance_names:
10563 self.LogInfo("Evacuating instances from node '%s': %s",
10565 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10567 self.LogInfo("No instances to evacuate from node '%s'",
10570 if self.op.remote_node is not None:
10571 for i in self.instances:
10572 if i.primary_node == self.op.remote_node:
10573 raise errors.OpPrereqError("Node %s is the primary node of"
10574 " instance %s, cannot use it as"
10576 (self.op.remote_node, i.name),
10577 errors.ECODE_INVAL)
10579 def Exec(self, feedback_fn):
10580 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10582 if not self.instance_names:
10583 # No instances to evacuate
10586 elif self.op.iallocator is not None:
10587 # TODO: Implement relocation to other group
10588 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10589 evac_mode=self.op.mode,
10590 instances=list(self.instance_names))
10592 ial.Run(self.op.iallocator)
10594 if not ial.success:
10595 raise errors.OpPrereqError("Can't compute node evacuation using"
10596 " iallocator '%s': %s" %
10597 (self.op.iallocator, ial.info),
10598 errors.ECODE_NORES)
10600 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10602 elif self.op.remote_node is not None:
10603 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10605 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10606 remote_node=self.op.remote_node,
10608 mode=constants.REPLACE_DISK_CHG,
10609 early_release=self.op.early_release)]
10610 for instance_name in self.instance_names
10614 raise errors.ProgrammerError("No iallocator or remote node")
10616 return ResultWithJobs(jobs)
10619 def _SetOpEarlyRelease(early_release, op):
10620 """Sets C{early_release} flag on opcodes if available.
10624 op.early_release = early_release
10625 except AttributeError:
10626 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10631 def _NodeEvacDest(use_nodes, group, nodes):
10632 """Returns group or nodes depending on caller's choice.
10636 return utils.CommaJoin(nodes)
10641 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10642 """Unpacks the result of change-group and node-evacuate iallocator requests.
10644 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10645 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10647 @type lu: L{LogicalUnit}
10648 @param lu: Logical unit instance
10649 @type alloc_result: tuple/list
10650 @param alloc_result: Result from iallocator
10651 @type early_release: bool
10652 @param early_release: Whether to release locks early if possible
10653 @type use_nodes: bool
10654 @param use_nodes: Whether to display node names instead of groups
10657 (moved, failed, jobs) = alloc_result
10660 lu.LogWarning("Unable to evacuate instances %s",
10661 utils.CommaJoin("%s (%s)" % (name, reason)
10662 for (name, reason) in failed))
10665 lu.LogInfo("Instances to be moved: %s",
10666 utils.CommaJoin("%s (to %s)" %
10667 (name, _NodeEvacDest(use_nodes, group, nodes))
10668 for (name, group, nodes) in moved))
10670 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10671 map(opcodes.OpCode.LoadOpCode, ops))
10675 class LUInstanceGrowDisk(LogicalUnit):
10676 """Grow a disk of an instance.
10679 HPATH = "disk-grow"
10680 HTYPE = constants.HTYPE_INSTANCE
10683 def ExpandNames(self):
10684 self._ExpandAndLockInstance()
10685 self.needed_locks[locking.LEVEL_NODE] = []
10686 self.needed_locks[locking.LEVEL_NODE_RES] = []
10687 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10689 def DeclareLocks(self, level):
10690 if level == locking.LEVEL_NODE:
10691 self._LockInstancesNodes()
10692 elif level == locking.LEVEL_NODE_RES:
10694 self.needed_locks[locking.LEVEL_NODE_RES] = \
10695 self.needed_locks[locking.LEVEL_NODE][:]
10697 def BuildHooksEnv(self):
10698 """Build hooks env.
10700 This runs on the master, the primary and all the secondaries.
10704 "DISK": self.op.disk,
10705 "AMOUNT": self.op.amount,
10707 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10710 def BuildHooksNodes(self):
10711 """Build hooks nodes.
10714 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10717 def CheckPrereq(self):
10718 """Check prerequisites.
10720 This checks that the instance is in the cluster.
10723 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10724 assert instance is not None, \
10725 "Cannot retrieve locked instance %s" % self.op.instance_name
10726 nodenames = list(instance.all_nodes)
10727 for node in nodenames:
10728 _CheckNodeOnline(self, node)
10730 self.instance = instance
10732 if instance.disk_template not in constants.DTS_GROWABLE:
10733 raise errors.OpPrereqError("Instance's disk layout does not support"
10734 " growing", errors.ECODE_INVAL)
10736 self.disk = instance.FindDisk(self.op.disk)
10738 if instance.disk_template not in (constants.DT_FILE,
10739 constants.DT_SHARED_FILE):
10740 # TODO: check the free disk space for file, when that feature will be
10742 _CheckNodesFreeDiskPerVG(self, nodenames,
10743 self.disk.ComputeGrowth(self.op.amount))
10745 def Exec(self, feedback_fn):
10746 """Execute disk grow.
10749 instance = self.instance
10752 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10753 assert (self.owned_locks(locking.LEVEL_NODE) ==
10754 self.owned_locks(locking.LEVEL_NODE_RES))
10756 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10758 raise errors.OpExecError("Cannot activate block device to grow")
10760 feedback_fn("Growing disk %s of instance '%s' by %s" %
10761 (self.op.disk, instance.name,
10762 utils.FormatUnit(self.op.amount, "h")))
10764 # First run all grow ops in dry-run mode
10765 for node in instance.all_nodes:
10766 self.cfg.SetDiskID(disk, node)
10767 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10768 result.Raise("Grow request failed to node %s" % node)
10770 # We know that (as far as we can test) operations across different
10771 # nodes will succeed, time to run it for real
10772 for node in instance.all_nodes:
10773 self.cfg.SetDiskID(disk, node)
10774 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10775 result.Raise("Grow request failed to node %s" % node)
10777 # TODO: Rewrite code to work properly
10778 # DRBD goes into sync mode for a short amount of time after executing the
10779 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10780 # calling "resize" in sync mode fails. Sleeping for a short amount of
10781 # time is a work-around.
10784 disk.RecordGrow(self.op.amount)
10785 self.cfg.Update(instance, feedback_fn)
10787 # Changes have been recorded, release node lock
10788 _ReleaseLocks(self, locking.LEVEL_NODE)
10790 # Downgrade lock while waiting for sync
10791 self.glm.downgrade(locking.LEVEL_INSTANCE)
10793 if self.op.wait_for_sync:
10794 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10796 self.proc.LogWarning("Disk sync-ing has not returned a good"
10797 " status; please check the instance")
10798 if instance.admin_state != constants.ADMINST_UP:
10799 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10800 elif instance.admin_state != constants.ADMINST_UP:
10801 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10802 " not supposed to be running because no wait for"
10803 " sync mode was requested")
10805 assert self.owned_locks(locking.LEVEL_NODE_RES)
10806 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10809 class LUInstanceQueryData(NoHooksLU):
10810 """Query runtime instance data.
10815 def ExpandNames(self):
10816 self.needed_locks = {}
10818 # Use locking if requested or when non-static information is wanted
10819 if not (self.op.static or self.op.use_locking):
10820 self.LogWarning("Non-static data requested, locks need to be acquired")
10821 self.op.use_locking = True
10823 if self.op.instances or not self.op.use_locking:
10824 # Expand instance names right here
10825 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10827 # Will use acquired locks
10828 self.wanted_names = None
10830 if self.op.use_locking:
10831 self.share_locks = _ShareAll()
10833 if self.wanted_names is None:
10834 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10836 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10838 self.needed_locks[locking.LEVEL_NODE] = []
10839 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10841 def DeclareLocks(self, level):
10842 if self.op.use_locking and level == locking.LEVEL_NODE:
10843 self._LockInstancesNodes()
10845 def CheckPrereq(self):
10846 """Check prerequisites.
10848 This only checks the optional instance list against the existing names.
10851 if self.wanted_names is None:
10852 assert self.op.use_locking, "Locking was not used"
10853 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10855 self.wanted_instances = \
10856 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10858 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10859 """Returns the status of a block device
10862 if self.op.static or not node:
10865 self.cfg.SetDiskID(dev, node)
10867 result = self.rpc.call_blockdev_find(node, dev)
10871 result.Raise("Can't compute disk status for %s" % instance_name)
10873 status = result.payload
10877 return (status.dev_path, status.major, status.minor,
10878 status.sync_percent, status.estimated_time,
10879 status.is_degraded, status.ldisk_status)
10881 def _ComputeDiskStatus(self, instance, snode, dev):
10882 """Compute block device status.
10885 if dev.dev_type in constants.LDS_DRBD:
10886 # we change the snode then (otherwise we use the one passed in)
10887 if dev.logical_id[0] == instance.primary_node:
10888 snode = dev.logical_id[1]
10890 snode = dev.logical_id[0]
10892 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10893 instance.name, dev)
10894 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10897 dev_children = map(compat.partial(self._ComputeDiskStatus,
10904 "iv_name": dev.iv_name,
10905 "dev_type": dev.dev_type,
10906 "logical_id": dev.logical_id,
10907 "physical_id": dev.physical_id,
10908 "pstatus": dev_pstatus,
10909 "sstatus": dev_sstatus,
10910 "children": dev_children,
10915 def Exec(self, feedback_fn):
10916 """Gather and return data"""
10919 cluster = self.cfg.GetClusterInfo()
10921 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10922 for i in self.wanted_instances)
10923 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10924 if self.op.static or pnode.offline:
10925 remote_state = None
10927 self.LogWarning("Primary node %s is marked offline, returning static"
10928 " information only for instance %s" %
10929 (pnode.name, instance.name))
10931 remote_info = self.rpc.call_instance_info(instance.primary_node,
10933 instance.hypervisor)
10934 remote_info.Raise("Error checking node %s" % instance.primary_node)
10935 remote_info = remote_info.payload
10936 if remote_info and "state" in remote_info:
10937 remote_state = "up"
10939 if instance.admin_state == constants.ADMINST_UP:
10940 remote_state = "down"
10942 remote_state = instance.admin_state
10944 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10947 result[instance.name] = {
10948 "name": instance.name,
10949 "config_state": instance.admin_state,
10950 "run_state": remote_state,
10951 "pnode": instance.primary_node,
10952 "snodes": instance.secondary_nodes,
10954 # this happens to be the same format used for hooks
10955 "nics": _NICListToTuple(self, instance.nics),
10956 "disk_template": instance.disk_template,
10958 "hypervisor": instance.hypervisor,
10959 "network_port": instance.network_port,
10960 "hv_instance": instance.hvparams,
10961 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10962 "be_instance": instance.beparams,
10963 "be_actual": cluster.FillBE(instance),
10964 "os_instance": instance.osparams,
10965 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10966 "serial_no": instance.serial_no,
10967 "mtime": instance.mtime,
10968 "ctime": instance.ctime,
10969 "uuid": instance.uuid,
10975 class LUInstanceSetParams(LogicalUnit):
10976 """Modifies an instances's parameters.
10979 HPATH = "instance-modify"
10980 HTYPE = constants.HTYPE_INSTANCE
10983 def CheckArguments(self):
10984 if not (self.op.nics or self.op.disks or self.op.disk_template or
10985 self.op.hvparams or self.op.beparams or self.op.os_name or
10986 self.op.online_inst or self.op.offline_inst):
10987 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10989 if self.op.hvparams:
10990 _CheckGlobalHvParams(self.op.hvparams)
10994 for disk_op, disk_dict in self.op.disks:
10995 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10996 if disk_op == constants.DDM_REMOVE:
10997 disk_addremove += 1
10999 elif disk_op == constants.DDM_ADD:
11000 disk_addremove += 1
11002 if not isinstance(disk_op, int):
11003 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11004 if not isinstance(disk_dict, dict):
11005 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11006 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11008 if disk_op == constants.DDM_ADD:
11009 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11010 if mode not in constants.DISK_ACCESS_SET:
11011 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11012 errors.ECODE_INVAL)
11013 size = disk_dict.get(constants.IDISK_SIZE, None)
11015 raise errors.OpPrereqError("Required disk parameter size missing",
11016 errors.ECODE_INVAL)
11019 except (TypeError, ValueError), err:
11020 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11021 str(err), errors.ECODE_INVAL)
11022 disk_dict[constants.IDISK_SIZE] = size
11024 # modification of disk
11025 if constants.IDISK_SIZE in disk_dict:
11026 raise errors.OpPrereqError("Disk size change not possible, use"
11027 " grow-disk", errors.ECODE_INVAL)
11029 if disk_addremove > 1:
11030 raise errors.OpPrereqError("Only one disk add or remove operation"
11031 " supported at a time", errors.ECODE_INVAL)
11033 if self.op.disks and self.op.disk_template is not None:
11034 raise errors.OpPrereqError("Disk template conversion and other disk"
11035 " changes not supported at the same time",
11036 errors.ECODE_INVAL)
11038 if (self.op.disk_template and
11039 self.op.disk_template in constants.DTS_INT_MIRROR and
11040 self.op.remote_node is None):
11041 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11042 " one requires specifying a secondary node",
11043 errors.ECODE_INVAL)
11047 for nic_op, nic_dict in self.op.nics:
11048 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11049 if nic_op == constants.DDM_REMOVE:
11052 elif nic_op == constants.DDM_ADD:
11055 if not isinstance(nic_op, int):
11056 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11057 if not isinstance(nic_dict, dict):
11058 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11059 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11061 # nic_dict should be a dict
11062 nic_ip = nic_dict.get(constants.INIC_IP, None)
11063 if nic_ip is not None:
11064 if nic_ip.lower() == constants.VALUE_NONE:
11065 nic_dict[constants.INIC_IP] = None
11067 if not netutils.IPAddress.IsValid(nic_ip):
11068 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11069 errors.ECODE_INVAL)
11071 nic_bridge = nic_dict.get("bridge", None)
11072 nic_link = nic_dict.get(constants.INIC_LINK, None)
11073 if nic_bridge and nic_link:
11074 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11075 " at the same time", errors.ECODE_INVAL)
11076 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11077 nic_dict["bridge"] = None
11078 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11079 nic_dict[constants.INIC_LINK] = None
11081 if nic_op == constants.DDM_ADD:
11082 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11083 if nic_mac is None:
11084 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11086 if constants.INIC_MAC in nic_dict:
11087 nic_mac = nic_dict[constants.INIC_MAC]
11088 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11089 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11091 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11092 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11093 " modifying an existing nic",
11094 errors.ECODE_INVAL)
11096 if nic_addremove > 1:
11097 raise errors.OpPrereqError("Only one NIC add or remove operation"
11098 " supported at a time", errors.ECODE_INVAL)
11100 def ExpandNames(self):
11101 self._ExpandAndLockInstance()
11102 # Can't even acquire node locks in shared mode as upcoming changes in
11103 # Ganeti 2.6 will start to modify the node object on disk conversion
11104 self.needed_locks[locking.LEVEL_NODE] = []
11105 self.needed_locks[locking.LEVEL_NODE_RES] = []
11106 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11108 def DeclareLocks(self, level):
11109 if level == locking.LEVEL_NODE:
11110 self._LockInstancesNodes()
11111 if self.op.disk_template and self.op.remote_node:
11112 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11113 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11114 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11116 self.needed_locks[locking.LEVEL_NODE_RES] = \
11117 self.needed_locks[locking.LEVEL_NODE][:]
11119 def BuildHooksEnv(self):
11120 """Build hooks env.
11122 This runs on the master, primary and secondaries.
11126 if constants.BE_MEMORY in self.be_new:
11127 args["memory"] = self.be_new[constants.BE_MEMORY]
11128 if constants.BE_VCPUS in self.be_new:
11129 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11130 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11131 # information at all.
11134 nic_override = dict(self.op.nics)
11135 for idx, nic in enumerate(self.instance.nics):
11136 if idx in nic_override:
11137 this_nic_override = nic_override[idx]
11139 this_nic_override = {}
11140 if constants.INIC_IP in this_nic_override:
11141 ip = this_nic_override[constants.INIC_IP]
11144 if constants.INIC_MAC in this_nic_override:
11145 mac = this_nic_override[constants.INIC_MAC]
11148 if idx in self.nic_pnew:
11149 nicparams = self.nic_pnew[idx]
11151 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11152 mode = nicparams[constants.NIC_MODE]
11153 link = nicparams[constants.NIC_LINK]
11154 args["nics"].append((ip, mac, mode, link))
11155 if constants.DDM_ADD in nic_override:
11156 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11157 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11158 nicparams = self.nic_pnew[constants.DDM_ADD]
11159 mode = nicparams[constants.NIC_MODE]
11160 link = nicparams[constants.NIC_LINK]
11161 args["nics"].append((ip, mac, mode, link))
11162 elif constants.DDM_REMOVE in nic_override:
11163 del args["nics"][-1]
11165 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11166 if self.op.disk_template:
11167 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11171 def BuildHooksNodes(self):
11172 """Build hooks nodes.
11175 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11178 def CheckPrereq(self):
11179 """Check prerequisites.
11181 This only checks the instance list against the existing names.
11184 # checking the new params on the primary/secondary nodes
11186 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11187 cluster = self.cluster = self.cfg.GetClusterInfo()
11188 assert self.instance is not None, \
11189 "Cannot retrieve locked instance %s" % self.op.instance_name
11190 pnode = instance.primary_node
11191 nodelist = list(instance.all_nodes)
11194 if self.op.os_name and not self.op.force:
11195 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11196 self.op.force_variant)
11197 instance_os = self.op.os_name
11199 instance_os = instance.os
11201 if self.op.disk_template:
11202 if instance.disk_template == self.op.disk_template:
11203 raise errors.OpPrereqError("Instance already has disk template %s" %
11204 instance.disk_template, errors.ECODE_INVAL)
11206 if (instance.disk_template,
11207 self.op.disk_template) not in self._DISK_CONVERSIONS:
11208 raise errors.OpPrereqError("Unsupported disk template conversion from"
11209 " %s to %s" % (instance.disk_template,
11210 self.op.disk_template),
11211 errors.ECODE_INVAL)
11212 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11213 msg="cannot change disk template")
11214 if self.op.disk_template in constants.DTS_INT_MIRROR:
11215 if self.op.remote_node == pnode:
11216 raise errors.OpPrereqError("Given new secondary node %s is the same"
11217 " as the primary node of the instance" %
11218 self.op.remote_node, errors.ECODE_STATE)
11219 _CheckNodeOnline(self, self.op.remote_node)
11220 _CheckNodeNotDrained(self, self.op.remote_node)
11221 # FIXME: here we assume that the old instance type is DT_PLAIN
11222 assert instance.disk_template == constants.DT_PLAIN
11223 disks = [{constants.IDISK_SIZE: d.size,
11224 constants.IDISK_VG: d.logical_id[0]}
11225 for d in instance.disks]
11226 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11227 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11229 # hvparams processing
11230 if self.op.hvparams:
11231 hv_type = instance.hypervisor
11232 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11233 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11234 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11237 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11238 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11239 self.hv_proposed = self.hv_new = hv_new # the new actual values
11240 self.hv_inst = i_hvdict # the new dict (without defaults)
11242 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11244 self.hv_new = self.hv_inst = {}
11246 # beparams processing
11247 if self.op.beparams:
11248 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11250 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11251 be_new = cluster.SimpleFillBE(i_bedict)
11252 self.be_proposed = self.be_new = be_new # the new actual values
11253 self.be_inst = i_bedict # the new dict (without defaults)
11255 self.be_new = self.be_inst = {}
11256 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11257 be_old = cluster.FillBE(instance)
11259 # CPU param validation -- checking every time a paramtere is
11260 # changed to cover all cases where either CPU mask or vcpus have
11262 if (constants.BE_VCPUS in self.be_proposed and
11263 constants.HV_CPU_MASK in self.hv_proposed):
11265 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11266 # Verify mask is consistent with number of vCPUs. Can skip this
11267 # test if only 1 entry in the CPU mask, which means same mask
11268 # is applied to all vCPUs.
11269 if (len(cpu_list) > 1 and
11270 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11271 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11273 (self.be_proposed[constants.BE_VCPUS],
11274 self.hv_proposed[constants.HV_CPU_MASK]),
11275 errors.ECODE_INVAL)
11277 # Only perform this test if a new CPU mask is given
11278 if constants.HV_CPU_MASK in self.hv_new:
11279 # Calculate the largest CPU number requested
11280 max_requested_cpu = max(map(max, cpu_list))
11281 # Check that all of the instance's nodes have enough physical CPUs to
11282 # satisfy the requested CPU mask
11283 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11284 max_requested_cpu + 1, instance.hypervisor)
11286 # osparams processing
11287 if self.op.osparams:
11288 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11289 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11290 self.os_inst = i_osdict # the new dict (without defaults)
11296 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11297 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11298 mem_check_list = [pnode]
11299 if be_new[constants.BE_AUTO_BALANCE]:
11300 # either we changed auto_balance to yes or it was from before
11301 mem_check_list.extend(instance.secondary_nodes)
11302 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11303 instance.hypervisor)
11304 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11305 instance.hypervisor)
11306 pninfo = nodeinfo[pnode]
11307 msg = pninfo.fail_msg
11309 # Assume the primary node is unreachable and go ahead
11310 self.warn.append("Can't get info from primary node %s: %s" %
11312 elif not isinstance(pninfo.payload.get("memory_free", None), int):
11313 self.warn.append("Node data from primary node %s doesn't contain"
11314 " free memory information" % pnode)
11315 elif instance_info.fail_msg:
11316 self.warn.append("Can't get instance runtime information: %s" %
11317 instance_info.fail_msg)
11319 if instance_info.payload:
11320 current_mem = int(instance_info.payload["memory"])
11322 # Assume instance not running
11323 # (there is a slight race condition here, but it's not very probable,
11324 # and we have no other way to check)
11326 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11327 pninfo.payload["memory_free"])
11329 raise errors.OpPrereqError("This change will prevent the instance"
11330 " from starting, due to %d MB of memory"
11331 " missing on its primary node" % miss_mem,
11332 errors.ECODE_NORES)
11334 if be_new[constants.BE_AUTO_BALANCE]:
11335 for node, nres in nodeinfo.items():
11336 if node not in instance.secondary_nodes:
11338 nres.Raise("Can't get info from secondary node %s" % node,
11339 prereq=True, ecode=errors.ECODE_STATE)
11340 if not isinstance(nres.payload.get("memory_free", None), int):
11341 raise errors.OpPrereqError("Secondary node %s didn't return free"
11342 " memory information" % node,
11343 errors.ECODE_STATE)
11344 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11345 raise errors.OpPrereqError("This change will prevent the instance"
11346 " from failover to its secondary node"
11347 " %s, due to not enough memory" % node,
11348 errors.ECODE_STATE)
11352 self.nic_pinst = {}
11353 for nic_op, nic_dict in self.op.nics:
11354 if nic_op == constants.DDM_REMOVE:
11355 if not instance.nics:
11356 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11357 errors.ECODE_INVAL)
11359 if nic_op != constants.DDM_ADD:
11361 if not instance.nics:
11362 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11363 " no NICs" % nic_op,
11364 errors.ECODE_INVAL)
11365 if nic_op < 0 or nic_op >= len(instance.nics):
11366 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11368 (nic_op, len(instance.nics) - 1),
11369 errors.ECODE_INVAL)
11370 old_nic_params = instance.nics[nic_op].nicparams
11371 old_nic_ip = instance.nics[nic_op].ip
11373 old_nic_params = {}
11376 update_params_dict = dict([(key, nic_dict[key])
11377 for key in constants.NICS_PARAMETERS
11378 if key in nic_dict])
11380 if "bridge" in nic_dict:
11381 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11383 new_nic_params = _GetUpdatedParams(old_nic_params,
11384 update_params_dict)
11385 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11386 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11387 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11388 self.nic_pinst[nic_op] = new_nic_params
11389 self.nic_pnew[nic_op] = new_filled_nic_params
11390 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11392 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11393 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11394 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11396 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11398 self.warn.append(msg)
11400 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11401 if new_nic_mode == constants.NIC_MODE_ROUTED:
11402 if constants.INIC_IP in nic_dict:
11403 nic_ip = nic_dict[constants.INIC_IP]
11405 nic_ip = old_nic_ip
11407 raise errors.OpPrereqError("Cannot set the nic ip to None"
11408 " on a routed nic", errors.ECODE_INVAL)
11409 if constants.INIC_MAC in nic_dict:
11410 nic_mac = nic_dict[constants.INIC_MAC]
11411 if nic_mac is None:
11412 raise errors.OpPrereqError("Cannot set the nic mac to None",
11413 errors.ECODE_INVAL)
11414 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11415 # otherwise generate the mac
11416 nic_dict[constants.INIC_MAC] = \
11417 self.cfg.GenerateMAC(self.proc.GetECId())
11419 # or validate/reserve the current one
11421 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11422 except errors.ReservationError:
11423 raise errors.OpPrereqError("MAC address %s already in use"
11424 " in cluster" % nic_mac,
11425 errors.ECODE_NOTUNIQUE)
11428 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11429 raise errors.OpPrereqError("Disk operations not supported for"
11430 " diskless instances",
11431 errors.ECODE_INVAL)
11432 for disk_op, _ in self.op.disks:
11433 if disk_op == constants.DDM_REMOVE:
11434 if len(instance.disks) == 1:
11435 raise errors.OpPrereqError("Cannot remove the last disk of"
11436 " an instance", errors.ECODE_INVAL)
11437 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11438 msg="cannot remove disks")
11440 if (disk_op == constants.DDM_ADD and
11441 len(instance.disks) >= constants.MAX_DISKS):
11442 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11443 " add more" % constants.MAX_DISKS,
11444 errors.ECODE_STATE)
11445 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11447 if disk_op < 0 or disk_op >= len(instance.disks):
11448 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11450 (disk_op, len(instance.disks)),
11451 errors.ECODE_INVAL)
11453 # disabling the instance
11454 if self.op.offline_inst:
11455 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11456 msg="cannot change instance state to offline")
11458 # enabling the instance
11459 if self.op.online_inst:
11460 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11461 msg="cannot make instance go online")
11463 def _ConvertPlainToDrbd(self, feedback_fn):
11464 """Converts an instance from plain to drbd.
11467 feedback_fn("Converting template to drbd")
11468 instance = self.instance
11469 pnode = instance.primary_node
11470 snode = self.op.remote_node
11472 assert instance.disk_template == constants.DT_PLAIN
11474 # create a fake disk info for _GenerateDiskTemplate
11475 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11476 constants.IDISK_VG: d.logical_id[0]}
11477 for d in instance.disks]
11478 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11479 instance.name, pnode, [snode],
11480 disk_info, None, None, 0, feedback_fn)
11481 info = _GetInstanceInfoText(instance)
11482 feedback_fn("Creating aditional volumes...")
11483 # first, create the missing data and meta devices
11484 for disk in new_disks:
11485 # unfortunately this is... not too nice
11486 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11488 for child in disk.children:
11489 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11490 # at this stage, all new LVs have been created, we can rename the
11492 feedback_fn("Renaming original volumes...")
11493 rename_list = [(o, n.children[0].logical_id)
11494 for (o, n) in zip(instance.disks, new_disks)]
11495 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11496 result.Raise("Failed to rename original LVs")
11498 feedback_fn("Initializing DRBD devices...")
11499 # all child devices are in place, we can now create the DRBD devices
11500 for disk in new_disks:
11501 for node in [pnode, snode]:
11502 f_create = node == pnode
11503 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11505 # at this point, the instance has been modified
11506 instance.disk_template = constants.DT_DRBD8
11507 instance.disks = new_disks
11508 self.cfg.Update(instance, feedback_fn)
11510 # Release node locks while waiting for sync
11511 _ReleaseLocks(self, locking.LEVEL_NODE)
11513 # disks are created, waiting for sync
11514 disk_abort = not _WaitForSync(self, instance,
11515 oneshot=not self.op.wait_for_sync)
11517 raise errors.OpExecError("There are some degraded disks for"
11518 " this instance, please cleanup manually")
11520 # Node resource locks will be released by caller
11522 def _ConvertDrbdToPlain(self, feedback_fn):
11523 """Converts an instance from drbd to plain.
11526 instance = self.instance
11528 assert len(instance.secondary_nodes) == 1
11529 assert instance.disk_template == constants.DT_DRBD8
11531 pnode = instance.primary_node
11532 snode = instance.secondary_nodes[0]
11533 feedback_fn("Converting template to plain")
11535 old_disks = instance.disks
11536 new_disks = [d.children[0] for d in old_disks]
11538 # copy over size and mode
11539 for parent, child in zip(old_disks, new_disks):
11540 child.size = parent.size
11541 child.mode = parent.mode
11543 # update instance structure
11544 instance.disks = new_disks
11545 instance.disk_template = constants.DT_PLAIN
11546 self.cfg.Update(instance, feedback_fn)
11548 # Release locks in case removing disks takes a while
11549 _ReleaseLocks(self, locking.LEVEL_NODE)
11551 feedback_fn("Removing volumes on the secondary node...")
11552 for disk in old_disks:
11553 self.cfg.SetDiskID(disk, snode)
11554 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11556 self.LogWarning("Could not remove block device %s on node %s,"
11557 " continuing anyway: %s", disk.iv_name, snode, msg)
11559 feedback_fn("Removing unneeded volumes on the primary node...")
11560 for idx, disk in enumerate(old_disks):
11561 meta = disk.children[1]
11562 self.cfg.SetDiskID(meta, pnode)
11563 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11565 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11566 " continuing anyway: %s", idx, pnode, msg)
11568 # Node resource locks will be released by caller
11570 def Exec(self, feedback_fn):
11571 """Modifies an instance.
11573 All parameters take effect only at the next restart of the instance.
11576 # Process here the warnings from CheckPrereq, as we don't have a
11577 # feedback_fn there.
11578 for warn in self.warn:
11579 feedback_fn("WARNING: %s" % warn)
11581 assert ((self.op.disk_template is None) ^
11582 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11583 "Not owning any node resource locks"
11586 instance = self.instance
11588 for disk_op, disk_dict in self.op.disks:
11589 if disk_op == constants.DDM_REMOVE:
11590 # remove the last disk
11591 device = instance.disks.pop()
11592 device_idx = len(instance.disks)
11593 for node, disk in device.ComputeNodeTree(instance.primary_node):
11594 self.cfg.SetDiskID(disk, node)
11595 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11597 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11598 " continuing anyway", device_idx, node, msg)
11599 result.append(("disk/%d" % device_idx, "remove"))
11600 elif disk_op == constants.DDM_ADD:
11602 if instance.disk_template in (constants.DT_FILE,
11603 constants.DT_SHARED_FILE):
11604 file_driver, file_path = instance.disks[0].logical_id
11605 file_path = os.path.dirname(file_path)
11607 file_driver = file_path = None
11608 disk_idx_base = len(instance.disks)
11609 new_disk = _GenerateDiskTemplate(self,
11610 instance.disk_template,
11611 instance.name, instance.primary_node,
11612 instance.secondary_nodes,
11616 disk_idx_base, feedback_fn)[0]
11617 instance.disks.append(new_disk)
11618 info = _GetInstanceInfoText(instance)
11620 logging.info("Creating volume %s for instance %s",
11621 new_disk.iv_name, instance.name)
11622 # Note: this needs to be kept in sync with _CreateDisks
11624 for node in instance.all_nodes:
11625 f_create = node == instance.primary_node
11627 _CreateBlockDev(self, node, instance, new_disk,
11628 f_create, info, f_create)
11629 except errors.OpExecError, err:
11630 self.LogWarning("Failed to create volume %s (%s) on"
11632 new_disk.iv_name, new_disk, node, err)
11633 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11634 (new_disk.size, new_disk.mode)))
11636 # change a given disk
11637 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11638 result.append(("disk.mode/%d" % disk_op,
11639 disk_dict[constants.IDISK_MODE]))
11641 if self.op.disk_template:
11643 check_nodes = set(instance.all_nodes)
11644 if self.op.remote_node:
11645 check_nodes.add(self.op.remote_node)
11646 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11647 owned = self.owned_locks(level)
11648 assert not (check_nodes - owned), \
11649 ("Not owning the correct locks, owning %r, expected at least %r" %
11650 (owned, check_nodes))
11652 r_shut = _ShutdownInstanceDisks(self, instance)
11654 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11655 " proceed with disk template conversion")
11656 mode = (instance.disk_template, self.op.disk_template)
11658 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11660 self.cfg.ReleaseDRBDMinors(instance.name)
11662 result.append(("disk_template", self.op.disk_template))
11664 assert instance.disk_template == self.op.disk_template, \
11665 ("Expected disk template '%s', found '%s'" %
11666 (self.op.disk_template, instance.disk_template))
11668 # Release node and resource locks if there are any (they might already have
11669 # been released during disk conversion)
11670 _ReleaseLocks(self, locking.LEVEL_NODE)
11671 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11674 for nic_op, nic_dict in self.op.nics:
11675 if nic_op == constants.DDM_REMOVE:
11676 # remove the last nic
11677 del instance.nics[-1]
11678 result.append(("nic.%d" % len(instance.nics), "remove"))
11679 elif nic_op == constants.DDM_ADD:
11680 # mac and bridge should be set, by now
11681 mac = nic_dict[constants.INIC_MAC]
11682 ip = nic_dict.get(constants.INIC_IP, None)
11683 nicparams = self.nic_pinst[constants.DDM_ADD]
11684 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11685 instance.nics.append(new_nic)
11686 result.append(("nic.%d" % (len(instance.nics) - 1),
11687 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11688 (new_nic.mac, new_nic.ip,
11689 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11690 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11693 for key in (constants.INIC_MAC, constants.INIC_IP):
11694 if key in nic_dict:
11695 setattr(instance.nics[nic_op], key, nic_dict[key])
11696 if nic_op in self.nic_pinst:
11697 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11698 for key, val in nic_dict.iteritems():
11699 result.append(("nic.%s/%d" % (key, nic_op), val))
11702 if self.op.hvparams:
11703 instance.hvparams = self.hv_inst
11704 for key, val in self.op.hvparams.iteritems():
11705 result.append(("hv/%s" % key, val))
11708 if self.op.beparams:
11709 instance.beparams = self.be_inst
11710 for key, val in self.op.beparams.iteritems():
11711 result.append(("be/%s" % key, val))
11714 if self.op.os_name:
11715 instance.os = self.op.os_name
11718 if self.op.osparams:
11719 instance.osparams = self.os_inst
11720 for key, val in self.op.osparams.iteritems():
11721 result.append(("os/%s" % key, val))
11723 # online/offline instance
11724 if self.op.online_inst:
11725 self.cfg.MarkInstanceDown(instance.name)
11726 result.append(("admin_state", constants.ADMINST_DOWN))
11727 if self.op.offline_inst:
11728 self.cfg.MarkInstanceOffline(instance.name)
11729 result.append(("admin_state", constants.ADMINST_OFFLINE))
11731 self.cfg.Update(instance, feedback_fn)
11733 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11734 self.owned_locks(locking.LEVEL_NODE)), \
11735 "All node locks should have been released by now"
11739 _DISK_CONVERSIONS = {
11740 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11741 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11745 class LUInstanceChangeGroup(LogicalUnit):
11746 HPATH = "instance-change-group"
11747 HTYPE = constants.HTYPE_INSTANCE
11750 def ExpandNames(self):
11751 self.share_locks = _ShareAll()
11752 self.needed_locks = {
11753 locking.LEVEL_NODEGROUP: [],
11754 locking.LEVEL_NODE: [],
11757 self._ExpandAndLockInstance()
11759 if self.op.target_groups:
11760 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11761 self.op.target_groups)
11763 self.req_target_uuids = None
11765 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11767 def DeclareLocks(self, level):
11768 if level == locking.LEVEL_NODEGROUP:
11769 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11771 if self.req_target_uuids:
11772 lock_groups = set(self.req_target_uuids)
11774 # Lock all groups used by instance optimistically; this requires going
11775 # via the node before it's locked, requiring verification later on
11776 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11777 lock_groups.update(instance_groups)
11779 # No target groups, need to lock all of them
11780 lock_groups = locking.ALL_SET
11782 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11784 elif level == locking.LEVEL_NODE:
11785 if self.req_target_uuids:
11786 # Lock all nodes used by instances
11787 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11788 self._LockInstancesNodes()
11790 # Lock all nodes in all potential target groups
11791 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11792 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11793 member_nodes = [node_name
11794 for group in lock_groups
11795 for node_name in self.cfg.GetNodeGroup(group).members]
11796 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11798 # Lock all nodes as all groups are potential targets
11799 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11801 def CheckPrereq(self):
11802 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11803 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11804 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11806 assert (self.req_target_uuids is None or
11807 owned_groups.issuperset(self.req_target_uuids))
11808 assert owned_instances == set([self.op.instance_name])
11810 # Get instance information
11811 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11813 # Check if node groups for locked instance are still correct
11814 assert owned_nodes.issuperset(self.instance.all_nodes), \
11815 ("Instance %s's nodes changed while we kept the lock" %
11816 self.op.instance_name)
11818 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11821 if self.req_target_uuids:
11822 # User requested specific target groups
11823 self.target_uuids = self.req_target_uuids
11825 # All groups except those used by the instance are potential targets
11826 self.target_uuids = owned_groups - inst_groups
11828 conflicting_groups = self.target_uuids & inst_groups
11829 if conflicting_groups:
11830 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11831 " used by the instance '%s'" %
11832 (utils.CommaJoin(conflicting_groups),
11833 self.op.instance_name),
11834 errors.ECODE_INVAL)
11836 if not self.target_uuids:
11837 raise errors.OpPrereqError("There are no possible target groups",
11838 errors.ECODE_INVAL)
11840 def BuildHooksEnv(self):
11841 """Build hooks env.
11844 assert self.target_uuids
11847 "TARGET_GROUPS": " ".join(self.target_uuids),
11850 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11854 def BuildHooksNodes(self):
11855 """Build hooks nodes.
11858 mn = self.cfg.GetMasterNode()
11859 return ([mn], [mn])
11861 def Exec(self, feedback_fn):
11862 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11864 assert instances == [self.op.instance_name], "Instance not locked"
11866 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11867 instances=instances, target_groups=list(self.target_uuids))
11869 ial.Run(self.op.iallocator)
11871 if not ial.success:
11872 raise errors.OpPrereqError("Can't compute solution for changing group of"
11873 " instance '%s' using iallocator '%s': %s" %
11874 (self.op.instance_name, self.op.iallocator,
11876 errors.ECODE_NORES)
11878 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11880 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11881 " instance '%s'", len(jobs), self.op.instance_name)
11883 return ResultWithJobs(jobs)
11886 class LUBackupQuery(NoHooksLU):
11887 """Query the exports list
11892 def ExpandNames(self):
11893 self.needed_locks = {}
11894 self.share_locks[locking.LEVEL_NODE] = 1
11895 if not self.op.nodes:
11896 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11898 self.needed_locks[locking.LEVEL_NODE] = \
11899 _GetWantedNodes(self, self.op.nodes)
11901 def Exec(self, feedback_fn):
11902 """Compute the list of all the exported system images.
11905 @return: a dictionary with the structure node->(export-list)
11906 where export-list is a list of the instances exported on
11910 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11911 rpcresult = self.rpc.call_export_list(self.nodes)
11913 for node in rpcresult:
11914 if rpcresult[node].fail_msg:
11915 result[node] = False
11917 result[node] = rpcresult[node].payload
11922 class LUBackupPrepare(NoHooksLU):
11923 """Prepares an instance for an export and returns useful information.
11928 def ExpandNames(self):
11929 self._ExpandAndLockInstance()
11931 def CheckPrereq(self):
11932 """Check prerequisites.
11935 instance_name = self.op.instance_name
11937 self.instance = self.cfg.GetInstanceInfo(instance_name)
11938 assert self.instance is not None, \
11939 "Cannot retrieve locked instance %s" % self.op.instance_name
11940 _CheckNodeOnline(self, self.instance.primary_node)
11942 self._cds = _GetClusterDomainSecret()
11944 def Exec(self, feedback_fn):
11945 """Prepares an instance for an export.
11948 instance = self.instance
11950 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11951 salt = utils.GenerateSecret(8)
11953 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11954 result = self.rpc.call_x509_cert_create(instance.primary_node,
11955 constants.RIE_CERT_VALIDITY)
11956 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11958 (name, cert_pem) = result.payload
11960 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11964 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11965 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11967 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11973 class LUBackupExport(LogicalUnit):
11974 """Export an instance to an image in the cluster.
11977 HPATH = "instance-export"
11978 HTYPE = constants.HTYPE_INSTANCE
11981 def CheckArguments(self):
11982 """Check the arguments.
11985 self.x509_key_name = self.op.x509_key_name
11986 self.dest_x509_ca_pem = self.op.destination_x509_ca
11988 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11989 if not self.x509_key_name:
11990 raise errors.OpPrereqError("Missing X509 key name for encryption",
11991 errors.ECODE_INVAL)
11993 if not self.dest_x509_ca_pem:
11994 raise errors.OpPrereqError("Missing destination X509 CA",
11995 errors.ECODE_INVAL)
11997 def ExpandNames(self):
11998 self._ExpandAndLockInstance()
12000 # Lock all nodes for local exports
12001 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12002 # FIXME: lock only instance primary and destination node
12004 # Sad but true, for now we have do lock all nodes, as we don't know where
12005 # the previous export might be, and in this LU we search for it and
12006 # remove it from its current node. In the future we could fix this by:
12007 # - making a tasklet to search (share-lock all), then create the
12008 # new one, then one to remove, after
12009 # - removing the removal operation altogether
12010 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12012 def DeclareLocks(self, level):
12013 """Last minute lock declaration."""
12014 # All nodes are locked anyway, so nothing to do here.
12016 def BuildHooksEnv(self):
12017 """Build hooks env.
12019 This will run on the master, primary node and target node.
12023 "EXPORT_MODE": self.op.mode,
12024 "EXPORT_NODE": self.op.target_node,
12025 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12026 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12027 # TODO: Generic function for boolean env variables
12028 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12031 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12035 def BuildHooksNodes(self):
12036 """Build hooks nodes.
12039 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12041 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12042 nl.append(self.op.target_node)
12046 def CheckPrereq(self):
12047 """Check prerequisites.
12049 This checks that the instance and node names are valid.
12052 instance_name = self.op.instance_name
12054 self.instance = self.cfg.GetInstanceInfo(instance_name)
12055 assert self.instance is not None, \
12056 "Cannot retrieve locked instance %s" % self.op.instance_name
12057 _CheckNodeOnline(self, self.instance.primary_node)
12059 if (self.op.remove_instance and
12060 self.instance.admin_state == constants.ADMINST_UP and
12061 not self.op.shutdown):
12062 raise errors.OpPrereqError("Can not remove instance without shutting it"
12065 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12066 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12067 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12068 assert self.dst_node is not None
12070 _CheckNodeOnline(self, self.dst_node.name)
12071 _CheckNodeNotDrained(self, self.dst_node.name)
12074 self.dest_disk_info = None
12075 self.dest_x509_ca = None
12077 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12078 self.dst_node = None
12080 if len(self.op.target_node) != len(self.instance.disks):
12081 raise errors.OpPrereqError(("Received destination information for %s"
12082 " disks, but instance %s has %s disks") %
12083 (len(self.op.target_node), instance_name,
12084 len(self.instance.disks)),
12085 errors.ECODE_INVAL)
12087 cds = _GetClusterDomainSecret()
12089 # Check X509 key name
12091 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12092 except (TypeError, ValueError), err:
12093 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12095 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12096 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12097 errors.ECODE_INVAL)
12099 # Load and verify CA
12101 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12102 except OpenSSL.crypto.Error, err:
12103 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12104 (err, ), errors.ECODE_INVAL)
12106 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12107 if errcode is not None:
12108 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12109 (msg, ), errors.ECODE_INVAL)
12111 self.dest_x509_ca = cert
12113 # Verify target information
12115 for idx, disk_data in enumerate(self.op.target_node):
12117 (host, port, magic) = \
12118 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12119 except errors.GenericError, err:
12120 raise errors.OpPrereqError("Target info for disk %s: %s" %
12121 (idx, err), errors.ECODE_INVAL)
12123 disk_info.append((host, port, magic))
12125 assert len(disk_info) == len(self.op.target_node)
12126 self.dest_disk_info = disk_info
12129 raise errors.ProgrammerError("Unhandled export mode %r" %
12132 # instance disk type verification
12133 # TODO: Implement export support for file-based disks
12134 for disk in self.instance.disks:
12135 if disk.dev_type == constants.LD_FILE:
12136 raise errors.OpPrereqError("Export not supported for instances with"
12137 " file-based disks", errors.ECODE_INVAL)
12139 def _CleanupExports(self, feedback_fn):
12140 """Removes exports of current instance from all other nodes.
12142 If an instance in a cluster with nodes A..D was exported to node C, its
12143 exports will be removed from the nodes A, B and D.
12146 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12148 nodelist = self.cfg.GetNodeList()
12149 nodelist.remove(self.dst_node.name)
12151 # on one-node clusters nodelist will be empty after the removal
12152 # if we proceed the backup would be removed because OpBackupQuery
12153 # substitutes an empty list with the full cluster node list.
12154 iname = self.instance.name
12156 feedback_fn("Removing old exports for instance %s" % iname)
12157 exportlist = self.rpc.call_export_list(nodelist)
12158 for node in exportlist:
12159 if exportlist[node].fail_msg:
12161 if iname in exportlist[node].payload:
12162 msg = self.rpc.call_export_remove(node, iname).fail_msg
12164 self.LogWarning("Could not remove older export for instance %s"
12165 " on node %s: %s", iname, node, msg)
12167 def Exec(self, feedback_fn):
12168 """Export an instance to an image in the cluster.
12171 assert self.op.mode in constants.EXPORT_MODES
12173 instance = self.instance
12174 src_node = instance.primary_node
12176 if self.op.shutdown:
12177 # shutdown the instance, but not the disks
12178 feedback_fn("Shutting down instance %s" % instance.name)
12179 result = self.rpc.call_instance_shutdown(src_node, instance,
12180 self.op.shutdown_timeout)
12181 # TODO: Maybe ignore failures if ignore_remove_failures is set
12182 result.Raise("Could not shutdown instance %s on"
12183 " node %s" % (instance.name, src_node))
12185 # set the disks ID correctly since call_instance_start needs the
12186 # correct drbd minor to create the symlinks
12187 for disk in instance.disks:
12188 self.cfg.SetDiskID(disk, src_node)
12190 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12193 # Activate the instance disks if we'exporting a stopped instance
12194 feedback_fn("Activating disks for %s" % instance.name)
12195 _StartInstanceDisks(self, instance, None)
12198 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12201 helper.CreateSnapshots()
12203 if (self.op.shutdown and
12204 instance.admin_state == constants.ADMINST_UP and
12205 not self.op.remove_instance):
12206 assert not activate_disks
12207 feedback_fn("Starting instance %s" % instance.name)
12208 result = self.rpc.call_instance_start(src_node,
12209 (instance, None, None), False)
12210 msg = result.fail_msg
12212 feedback_fn("Failed to start instance: %s" % msg)
12213 _ShutdownInstanceDisks(self, instance)
12214 raise errors.OpExecError("Could not start instance: %s" % msg)
12216 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12217 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12218 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12219 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12220 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12222 (key_name, _, _) = self.x509_key_name
12225 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12228 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12229 key_name, dest_ca_pem,
12234 # Check for backwards compatibility
12235 assert len(dresults) == len(instance.disks)
12236 assert compat.all(isinstance(i, bool) for i in dresults), \
12237 "Not all results are boolean: %r" % dresults
12241 feedback_fn("Deactivating disks for %s" % instance.name)
12242 _ShutdownInstanceDisks(self, instance)
12244 if not (compat.all(dresults) and fin_resu):
12247 failures.append("export finalization")
12248 if not compat.all(dresults):
12249 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12251 failures.append("disk export: disk(s) %s" % fdsk)
12253 raise errors.OpExecError("Export failed, errors in %s" %
12254 utils.CommaJoin(failures))
12256 # At this point, the export was successful, we can cleanup/finish
12258 # Remove instance if requested
12259 if self.op.remove_instance:
12260 feedback_fn("Removing instance %s" % instance.name)
12261 _RemoveInstance(self, feedback_fn, instance,
12262 self.op.ignore_remove_failures)
12264 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12265 self._CleanupExports(feedback_fn)
12267 return fin_resu, dresults
12270 class LUBackupRemove(NoHooksLU):
12271 """Remove exports related to the named instance.
12276 def ExpandNames(self):
12277 self.needed_locks = {}
12278 # We need all nodes to be locked in order for RemoveExport to work, but we
12279 # don't need to lock the instance itself, as nothing will happen to it (and
12280 # we can remove exports also for a removed instance)
12281 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12283 def Exec(self, feedback_fn):
12284 """Remove any export.
12287 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12288 # If the instance was not found we'll try with the name that was passed in.
12289 # This will only work if it was an FQDN, though.
12291 if not instance_name:
12293 instance_name = self.op.instance_name
12295 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12296 exportlist = self.rpc.call_export_list(locked_nodes)
12298 for node in exportlist:
12299 msg = exportlist[node].fail_msg
12301 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12303 if instance_name in exportlist[node].payload:
12305 result = self.rpc.call_export_remove(node, instance_name)
12306 msg = result.fail_msg
12308 logging.error("Could not remove export for instance %s"
12309 " on node %s: %s", instance_name, node, msg)
12311 if fqdn_warn and not found:
12312 feedback_fn("Export not found. If trying to remove an export belonging"
12313 " to a deleted instance please use its Fully Qualified"
12317 class LUGroupAdd(LogicalUnit):
12318 """Logical unit for creating node groups.
12321 HPATH = "group-add"
12322 HTYPE = constants.HTYPE_GROUP
12325 def ExpandNames(self):
12326 # We need the new group's UUID here so that we can create and acquire the
12327 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12328 # that it should not check whether the UUID exists in the configuration.
12329 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12330 self.needed_locks = {}
12331 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12333 def CheckPrereq(self):
12334 """Check prerequisites.
12336 This checks that the given group name is not an existing node group
12341 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12342 except errors.OpPrereqError:
12345 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12346 " node group (UUID: %s)" %
12347 (self.op.group_name, existing_uuid),
12348 errors.ECODE_EXISTS)
12350 if self.op.ndparams:
12351 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12353 def BuildHooksEnv(self):
12354 """Build hooks env.
12358 "GROUP_NAME": self.op.group_name,
12361 def BuildHooksNodes(self):
12362 """Build hooks nodes.
12365 mn = self.cfg.GetMasterNode()
12366 return ([mn], [mn])
12368 def Exec(self, feedback_fn):
12369 """Add the node group to the cluster.
12372 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12373 uuid=self.group_uuid,
12374 alloc_policy=self.op.alloc_policy,
12375 ndparams=self.op.ndparams)
12377 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12378 del self.remove_locks[locking.LEVEL_NODEGROUP]
12381 class LUGroupAssignNodes(NoHooksLU):
12382 """Logical unit for assigning nodes to groups.
12387 def ExpandNames(self):
12388 # These raise errors.OpPrereqError on their own:
12389 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12390 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12392 # We want to lock all the affected nodes and groups. We have readily
12393 # available the list of nodes, and the *destination* group. To gather the
12394 # list of "source" groups, we need to fetch node information later on.
12395 self.needed_locks = {
12396 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12397 locking.LEVEL_NODE: self.op.nodes,
12400 def DeclareLocks(self, level):
12401 if level == locking.LEVEL_NODEGROUP:
12402 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12404 # Try to get all affected nodes' groups without having the group or node
12405 # lock yet. Needs verification later in the code flow.
12406 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12408 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12410 def CheckPrereq(self):
12411 """Check prerequisites.
12414 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12415 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12416 frozenset(self.op.nodes))
12418 expected_locks = (set([self.group_uuid]) |
12419 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12420 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12421 if actual_locks != expected_locks:
12422 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12423 " current groups are '%s', used to be '%s'" %
12424 (utils.CommaJoin(expected_locks),
12425 utils.CommaJoin(actual_locks)))
12427 self.node_data = self.cfg.GetAllNodesInfo()
12428 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12429 instance_data = self.cfg.GetAllInstancesInfo()
12431 if self.group is None:
12432 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12433 (self.op.group_name, self.group_uuid))
12435 (new_splits, previous_splits) = \
12436 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12437 for node in self.op.nodes],
12438 self.node_data, instance_data)
12441 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12443 if not self.op.force:
12444 raise errors.OpExecError("The following instances get split by this"
12445 " change and --force was not given: %s" %
12448 self.LogWarning("This operation will split the following instances: %s",
12451 if previous_splits:
12452 self.LogWarning("In addition, these already-split instances continue"
12453 " to be split across groups: %s",
12454 utils.CommaJoin(utils.NiceSort(previous_splits)))
12456 def Exec(self, feedback_fn):
12457 """Assign nodes to a new group.
12460 for node in self.op.nodes:
12461 self.node_data[node].group = self.group_uuid
12463 # FIXME: Depends on side-effects of modifying the result of
12464 # C{cfg.GetAllNodesInfo}
12466 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12469 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12470 """Check for split instances after a node assignment.
12472 This method considers a series of node assignments as an atomic operation,
12473 and returns information about split instances after applying the set of
12476 In particular, it returns information about newly split instances, and
12477 instances that were already split, and remain so after the change.
12479 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12482 @type changes: list of (node_name, new_group_uuid) pairs.
12483 @param changes: list of node assignments to consider.
12484 @param node_data: a dict with data for all nodes
12485 @param instance_data: a dict with all instances to consider
12486 @rtype: a two-tuple
12487 @return: a list of instances that were previously okay and result split as a
12488 consequence of this change, and a list of instances that were previously
12489 split and this change does not fix.
12492 changed_nodes = dict((node, group) for node, group in changes
12493 if node_data[node].group != group)
12495 all_split_instances = set()
12496 previously_split_instances = set()
12498 def InstanceNodes(instance):
12499 return [instance.primary_node] + list(instance.secondary_nodes)
12501 for inst in instance_data.values():
12502 if inst.disk_template not in constants.DTS_INT_MIRROR:
12505 instance_nodes = InstanceNodes(inst)
12507 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12508 previously_split_instances.add(inst.name)
12510 if len(set(changed_nodes.get(node, node_data[node].group)
12511 for node in instance_nodes)) > 1:
12512 all_split_instances.add(inst.name)
12514 return (list(all_split_instances - previously_split_instances),
12515 list(previously_split_instances & all_split_instances))
12518 class _GroupQuery(_QueryBase):
12519 FIELDS = query.GROUP_FIELDS
12521 def ExpandNames(self, lu):
12522 lu.needed_locks = {}
12524 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12525 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12528 self.wanted = [name_to_uuid[name]
12529 for name in utils.NiceSort(name_to_uuid.keys())]
12531 # Accept names to be either names or UUIDs.
12534 all_uuid = frozenset(self._all_groups.keys())
12536 for name in self.names:
12537 if name in all_uuid:
12538 self.wanted.append(name)
12539 elif name in name_to_uuid:
12540 self.wanted.append(name_to_uuid[name])
12542 missing.append(name)
12545 raise errors.OpPrereqError("Some groups do not exist: %s" %
12546 utils.CommaJoin(missing),
12547 errors.ECODE_NOENT)
12549 def DeclareLocks(self, lu, level):
12552 def _GetQueryData(self, lu):
12553 """Computes the list of node groups and their attributes.
12556 do_nodes = query.GQ_NODE in self.requested_data
12557 do_instances = query.GQ_INST in self.requested_data
12559 group_to_nodes = None
12560 group_to_instances = None
12562 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12563 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12564 # latter GetAllInstancesInfo() is not enough, for we have to go through
12565 # instance->node. Hence, we will need to process nodes even if we only need
12566 # instance information.
12567 if do_nodes or do_instances:
12568 all_nodes = lu.cfg.GetAllNodesInfo()
12569 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12572 for node in all_nodes.values():
12573 if node.group in group_to_nodes:
12574 group_to_nodes[node.group].append(node.name)
12575 node_to_group[node.name] = node.group
12578 all_instances = lu.cfg.GetAllInstancesInfo()
12579 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12581 for instance in all_instances.values():
12582 node = instance.primary_node
12583 if node in node_to_group:
12584 group_to_instances[node_to_group[node]].append(instance.name)
12587 # Do not pass on node information if it was not requested.
12588 group_to_nodes = None
12590 return query.GroupQueryData([self._all_groups[uuid]
12591 for uuid in self.wanted],
12592 group_to_nodes, group_to_instances)
12595 class LUGroupQuery(NoHooksLU):
12596 """Logical unit for querying node groups.
12601 def CheckArguments(self):
12602 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12603 self.op.output_fields, False)
12605 def ExpandNames(self):
12606 self.gq.ExpandNames(self)
12608 def DeclareLocks(self, level):
12609 self.gq.DeclareLocks(self, level)
12611 def Exec(self, feedback_fn):
12612 return self.gq.OldStyleQuery(self)
12615 class LUGroupSetParams(LogicalUnit):
12616 """Modifies the parameters of a node group.
12619 HPATH = "group-modify"
12620 HTYPE = constants.HTYPE_GROUP
12623 def CheckArguments(self):
12626 self.op.alloc_policy,
12629 if all_changes.count(None) == len(all_changes):
12630 raise errors.OpPrereqError("Please pass at least one modification",
12631 errors.ECODE_INVAL)
12633 def ExpandNames(self):
12634 # This raises errors.OpPrereqError on its own:
12635 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12637 self.needed_locks = {
12638 locking.LEVEL_NODEGROUP: [self.group_uuid],
12641 def CheckPrereq(self):
12642 """Check prerequisites.
12645 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12647 if self.group is None:
12648 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12649 (self.op.group_name, self.group_uuid))
12651 if self.op.ndparams:
12652 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12653 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12654 self.new_ndparams = new_ndparams
12656 def BuildHooksEnv(self):
12657 """Build hooks env.
12661 "GROUP_NAME": self.op.group_name,
12662 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12665 def BuildHooksNodes(self):
12666 """Build hooks nodes.
12669 mn = self.cfg.GetMasterNode()
12670 return ([mn], [mn])
12672 def Exec(self, feedback_fn):
12673 """Modifies the node group.
12678 if self.op.ndparams:
12679 self.group.ndparams = self.new_ndparams
12680 result.append(("ndparams", str(self.group.ndparams)))
12682 if self.op.alloc_policy:
12683 self.group.alloc_policy = self.op.alloc_policy
12685 self.cfg.Update(self.group, feedback_fn)
12689 class LUGroupRemove(LogicalUnit):
12690 HPATH = "group-remove"
12691 HTYPE = constants.HTYPE_GROUP
12694 def ExpandNames(self):
12695 # This will raises errors.OpPrereqError on its own:
12696 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12697 self.needed_locks = {
12698 locking.LEVEL_NODEGROUP: [self.group_uuid],
12701 def CheckPrereq(self):
12702 """Check prerequisites.
12704 This checks that the given group name exists as a node group, that is
12705 empty (i.e., contains no nodes), and that is not the last group of the
12709 # Verify that the group is empty.
12710 group_nodes = [node.name
12711 for node in self.cfg.GetAllNodesInfo().values()
12712 if node.group == self.group_uuid]
12715 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12717 (self.op.group_name,
12718 utils.CommaJoin(utils.NiceSort(group_nodes))),
12719 errors.ECODE_STATE)
12721 # Verify the cluster would not be left group-less.
12722 if len(self.cfg.GetNodeGroupList()) == 1:
12723 raise errors.OpPrereqError("Group '%s' is the only group,"
12724 " cannot be removed" %
12725 self.op.group_name,
12726 errors.ECODE_STATE)
12728 def BuildHooksEnv(self):
12729 """Build hooks env.
12733 "GROUP_NAME": self.op.group_name,
12736 def BuildHooksNodes(self):
12737 """Build hooks nodes.
12740 mn = self.cfg.GetMasterNode()
12741 return ([mn], [mn])
12743 def Exec(self, feedback_fn):
12744 """Remove the node group.
12748 self.cfg.RemoveNodeGroup(self.group_uuid)
12749 except errors.ConfigurationError:
12750 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12751 (self.op.group_name, self.group_uuid))
12753 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12756 class LUGroupRename(LogicalUnit):
12757 HPATH = "group-rename"
12758 HTYPE = constants.HTYPE_GROUP
12761 def ExpandNames(self):
12762 # This raises errors.OpPrereqError on its own:
12763 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12765 self.needed_locks = {
12766 locking.LEVEL_NODEGROUP: [self.group_uuid],
12769 def CheckPrereq(self):
12770 """Check prerequisites.
12772 Ensures requested new name is not yet used.
12776 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12777 except errors.OpPrereqError:
12780 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12781 " node group (UUID: %s)" %
12782 (self.op.new_name, new_name_uuid),
12783 errors.ECODE_EXISTS)
12785 def BuildHooksEnv(self):
12786 """Build hooks env.
12790 "OLD_NAME": self.op.group_name,
12791 "NEW_NAME": self.op.new_name,
12794 def BuildHooksNodes(self):
12795 """Build hooks nodes.
12798 mn = self.cfg.GetMasterNode()
12800 all_nodes = self.cfg.GetAllNodesInfo()
12801 all_nodes.pop(mn, None)
12804 run_nodes.extend(node.name for node in all_nodes.values()
12805 if node.group == self.group_uuid)
12807 return (run_nodes, run_nodes)
12809 def Exec(self, feedback_fn):
12810 """Rename the node group.
12813 group = self.cfg.GetNodeGroup(self.group_uuid)
12816 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12817 (self.op.group_name, self.group_uuid))
12819 group.name = self.op.new_name
12820 self.cfg.Update(group, feedback_fn)
12822 return self.op.new_name
12825 class LUGroupEvacuate(LogicalUnit):
12826 HPATH = "group-evacuate"
12827 HTYPE = constants.HTYPE_GROUP
12830 def ExpandNames(self):
12831 # This raises errors.OpPrereqError on its own:
12832 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12834 if self.op.target_groups:
12835 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12836 self.op.target_groups)
12838 self.req_target_uuids = []
12840 if self.group_uuid in self.req_target_uuids:
12841 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12842 " as a target group (targets are %s)" %
12844 utils.CommaJoin(self.req_target_uuids)),
12845 errors.ECODE_INVAL)
12847 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12849 self.share_locks = _ShareAll()
12850 self.needed_locks = {
12851 locking.LEVEL_INSTANCE: [],
12852 locking.LEVEL_NODEGROUP: [],
12853 locking.LEVEL_NODE: [],
12856 def DeclareLocks(self, level):
12857 if level == locking.LEVEL_INSTANCE:
12858 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12860 # Lock instances optimistically, needs verification once node and group
12861 # locks have been acquired
12862 self.needed_locks[locking.LEVEL_INSTANCE] = \
12863 self.cfg.GetNodeGroupInstances(self.group_uuid)
12865 elif level == locking.LEVEL_NODEGROUP:
12866 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12868 if self.req_target_uuids:
12869 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12871 # Lock all groups used by instances optimistically; this requires going
12872 # via the node before it's locked, requiring verification later on
12873 lock_groups.update(group_uuid
12874 for instance_name in
12875 self.owned_locks(locking.LEVEL_INSTANCE)
12877 self.cfg.GetInstanceNodeGroups(instance_name))
12879 # No target groups, need to lock all of them
12880 lock_groups = locking.ALL_SET
12882 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12884 elif level == locking.LEVEL_NODE:
12885 # This will only lock the nodes in the group to be evacuated which
12886 # contain actual instances
12887 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12888 self._LockInstancesNodes()
12890 # Lock all nodes in group to be evacuated and target groups
12891 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12892 assert self.group_uuid in owned_groups
12893 member_nodes = [node_name
12894 for group in owned_groups
12895 for node_name in self.cfg.GetNodeGroup(group).members]
12896 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12898 def CheckPrereq(self):
12899 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12900 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12901 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12903 assert owned_groups.issuperset(self.req_target_uuids)
12904 assert self.group_uuid in owned_groups
12906 # Check if locked instances are still correct
12907 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12909 # Get instance information
12910 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12912 # Check if node groups for locked instances are still correct
12913 for instance_name in owned_instances:
12914 inst = self.instances[instance_name]
12915 assert owned_nodes.issuperset(inst.all_nodes), \
12916 "Instance %s's nodes changed while we kept the lock" % instance_name
12918 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12921 assert self.group_uuid in inst_groups, \
12922 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12924 if self.req_target_uuids:
12925 # User requested specific target groups
12926 self.target_uuids = self.req_target_uuids
12928 # All groups except the one to be evacuated are potential targets
12929 self.target_uuids = [group_uuid for group_uuid in owned_groups
12930 if group_uuid != self.group_uuid]
12932 if not self.target_uuids:
12933 raise errors.OpPrereqError("There are no possible target groups",
12934 errors.ECODE_INVAL)
12936 def BuildHooksEnv(self):
12937 """Build hooks env.
12941 "GROUP_NAME": self.op.group_name,
12942 "TARGET_GROUPS": " ".join(self.target_uuids),
12945 def BuildHooksNodes(self):
12946 """Build hooks nodes.
12949 mn = self.cfg.GetMasterNode()
12951 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12953 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12955 return (run_nodes, run_nodes)
12957 def Exec(self, feedback_fn):
12958 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12960 assert self.group_uuid not in self.target_uuids
12962 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12963 instances=instances, target_groups=self.target_uuids)
12965 ial.Run(self.op.iallocator)
12967 if not ial.success:
12968 raise errors.OpPrereqError("Can't compute group evacuation using"
12969 " iallocator '%s': %s" %
12970 (self.op.iallocator, ial.info),
12971 errors.ECODE_NORES)
12973 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12975 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12976 len(jobs), self.op.group_name)
12978 return ResultWithJobs(jobs)
12981 class TagsLU(NoHooksLU): # pylint: disable=W0223
12982 """Generic tags LU.
12984 This is an abstract class which is the parent of all the other tags LUs.
12987 def ExpandNames(self):
12988 self.group_uuid = None
12989 self.needed_locks = {}
12990 if self.op.kind == constants.TAG_NODE:
12991 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12992 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12993 elif self.op.kind == constants.TAG_INSTANCE:
12994 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12995 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12996 elif self.op.kind == constants.TAG_NODEGROUP:
12997 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12999 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13000 # not possible to acquire the BGL based on opcode parameters)
13002 def CheckPrereq(self):
13003 """Check prerequisites.
13006 if self.op.kind == constants.TAG_CLUSTER:
13007 self.target = self.cfg.GetClusterInfo()
13008 elif self.op.kind == constants.TAG_NODE:
13009 self.target = self.cfg.GetNodeInfo(self.op.name)
13010 elif self.op.kind == constants.TAG_INSTANCE:
13011 self.target = self.cfg.GetInstanceInfo(self.op.name)
13012 elif self.op.kind == constants.TAG_NODEGROUP:
13013 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13015 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13016 str(self.op.kind), errors.ECODE_INVAL)
13019 class LUTagsGet(TagsLU):
13020 """Returns the tags of a given object.
13025 def ExpandNames(self):
13026 TagsLU.ExpandNames(self)
13028 # Share locks as this is only a read operation
13029 self.share_locks = _ShareAll()
13031 def Exec(self, feedback_fn):
13032 """Returns the tag list.
13035 return list(self.target.GetTags())
13038 class LUTagsSearch(NoHooksLU):
13039 """Searches the tags for a given pattern.
13044 def ExpandNames(self):
13045 self.needed_locks = {}
13047 def CheckPrereq(self):
13048 """Check prerequisites.
13050 This checks the pattern passed for validity by compiling it.
13054 self.re = re.compile(self.op.pattern)
13055 except re.error, err:
13056 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13057 (self.op.pattern, err), errors.ECODE_INVAL)
13059 def Exec(self, feedback_fn):
13060 """Returns the tag list.
13064 tgts = [("/cluster", cfg.GetClusterInfo())]
13065 ilist = cfg.GetAllInstancesInfo().values()
13066 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13067 nlist = cfg.GetAllNodesInfo().values()
13068 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13069 tgts.extend(("/nodegroup/%s" % n.name, n)
13070 for n in cfg.GetAllNodeGroupsInfo().values())
13072 for path, target in tgts:
13073 for tag in target.GetTags():
13074 if self.re.search(tag):
13075 results.append((path, tag))
13079 class LUTagsSet(TagsLU):
13080 """Sets a tag on a given object.
13085 def CheckPrereq(self):
13086 """Check prerequisites.
13088 This checks the type and length of the tag name and value.
13091 TagsLU.CheckPrereq(self)
13092 for tag in self.op.tags:
13093 objects.TaggableObject.ValidateTag(tag)
13095 def Exec(self, feedback_fn):
13100 for tag in self.op.tags:
13101 self.target.AddTag(tag)
13102 except errors.TagError, err:
13103 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13104 self.cfg.Update(self.target, feedback_fn)
13107 class LUTagsDel(TagsLU):
13108 """Delete a list of tags from a given object.
13113 def CheckPrereq(self):
13114 """Check prerequisites.
13116 This checks that we have the given tag.
13119 TagsLU.CheckPrereq(self)
13120 for tag in self.op.tags:
13121 objects.TaggableObject.ValidateTag(tag)
13122 del_tags = frozenset(self.op.tags)
13123 cur_tags = self.target.GetTags()
13125 diff_tags = del_tags - cur_tags
13127 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13128 raise errors.OpPrereqError("Tag(s) %s not found" %
13129 (utils.CommaJoin(diff_names), ),
13130 errors.ECODE_NOENT)
13132 def Exec(self, feedback_fn):
13133 """Remove the tag from the object.
13136 for tag in self.op.tags:
13137 self.target.RemoveTag(tag)
13138 self.cfg.Update(self.target, feedback_fn)
13141 class LUTestDelay(NoHooksLU):
13142 """Sleep for a specified amount of time.
13144 This LU sleeps on the master and/or nodes for a specified amount of
13150 def ExpandNames(self):
13151 """Expand names and set required locks.
13153 This expands the node list, if any.
13156 self.needed_locks = {}
13157 if self.op.on_nodes:
13158 # _GetWantedNodes can be used here, but is not always appropriate to use
13159 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13160 # more information.
13161 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13162 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13164 def _TestDelay(self):
13165 """Do the actual sleep.
13168 if self.op.on_master:
13169 if not utils.TestDelay(self.op.duration):
13170 raise errors.OpExecError("Error during master delay test")
13171 if self.op.on_nodes:
13172 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13173 for node, node_result in result.items():
13174 node_result.Raise("Failure during rpc call to node %s" % node)
13176 def Exec(self, feedback_fn):
13177 """Execute the test delay opcode, with the wanted repetitions.
13180 if self.op.repeat == 0:
13183 top_value = self.op.repeat - 1
13184 for i in range(self.op.repeat):
13185 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13189 class LUTestJqueue(NoHooksLU):
13190 """Utility LU to test some aspects of the job queue.
13195 # Must be lower than default timeout for WaitForJobChange to see whether it
13196 # notices changed jobs
13197 _CLIENT_CONNECT_TIMEOUT = 20.0
13198 _CLIENT_CONFIRM_TIMEOUT = 60.0
13201 def _NotifyUsingSocket(cls, cb, errcls):
13202 """Opens a Unix socket and waits for another program to connect.
13205 @param cb: Callback to send socket name to client
13206 @type errcls: class
13207 @param errcls: Exception class to use for errors
13210 # Using a temporary directory as there's no easy way to create temporary
13211 # sockets without writing a custom loop around tempfile.mktemp and
13213 tmpdir = tempfile.mkdtemp()
13215 tmpsock = utils.PathJoin(tmpdir, "sock")
13217 logging.debug("Creating temporary socket at %s", tmpsock)
13218 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13223 # Send details to client
13226 # Wait for client to connect before continuing
13227 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13229 (conn, _) = sock.accept()
13230 except socket.error, err:
13231 raise errcls("Client didn't connect in time (%s)" % err)
13235 # Remove as soon as client is connected
13236 shutil.rmtree(tmpdir)
13238 # Wait for client to close
13241 # pylint: disable=E1101
13242 # Instance of '_socketobject' has no ... member
13243 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13245 except socket.error, err:
13246 raise errcls("Client failed to confirm notification (%s)" % err)
13250 def _SendNotification(self, test, arg, sockname):
13251 """Sends a notification to the client.
13254 @param test: Test name
13255 @param arg: Test argument (depends on test)
13256 @type sockname: string
13257 @param sockname: Socket path
13260 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13262 def _Notify(self, prereq, test, arg):
13263 """Notifies the client of a test.
13266 @param prereq: Whether this is a prereq-phase test
13268 @param test: Test name
13269 @param arg: Test argument (depends on test)
13273 errcls = errors.OpPrereqError
13275 errcls = errors.OpExecError
13277 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13281 def CheckArguments(self):
13282 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13283 self.expandnames_calls = 0
13285 def ExpandNames(self):
13286 checkargs_calls = getattr(self, "checkargs_calls", 0)
13287 if checkargs_calls < 1:
13288 raise errors.ProgrammerError("CheckArguments was not called")
13290 self.expandnames_calls += 1
13292 if self.op.notify_waitlock:
13293 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13295 self.LogInfo("Expanding names")
13297 # Get lock on master node (just to get a lock, not for a particular reason)
13298 self.needed_locks = {
13299 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13302 def Exec(self, feedback_fn):
13303 if self.expandnames_calls < 1:
13304 raise errors.ProgrammerError("ExpandNames was not called")
13306 if self.op.notify_exec:
13307 self._Notify(False, constants.JQT_EXEC, None)
13309 self.LogInfo("Executing")
13311 if self.op.log_messages:
13312 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13313 for idx, msg in enumerate(self.op.log_messages):
13314 self.LogInfo("Sending log message %s", idx + 1)
13315 feedback_fn(constants.JQT_MSGPREFIX + msg)
13316 # Report how many test messages have been sent
13317 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13320 raise errors.OpExecError("Opcode failure was requested")
13325 class IAllocator(object):
13326 """IAllocator framework.
13328 An IAllocator instance has three sets of attributes:
13329 - cfg that is needed to query the cluster
13330 - input data (all members of the _KEYS class attribute are required)
13331 - four buffer attributes (in|out_data|text), that represent the
13332 input (to the external script) in text and data structure format,
13333 and the output from it, again in two formats
13334 - the result variables from the script (success, info, nodes) for
13338 # pylint: disable=R0902
13339 # lots of instance attributes
13341 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13343 self.rpc = rpc_runner
13344 # init buffer variables
13345 self.in_text = self.out_text = self.in_data = self.out_data = None
13346 # init all input fields so that pylint is happy
13348 self.memory = self.disks = self.disk_template = None
13349 self.os = self.tags = self.nics = self.vcpus = None
13350 self.hypervisor = None
13351 self.relocate_from = None
13353 self.instances = None
13354 self.evac_mode = None
13355 self.target_groups = []
13357 self.required_nodes = None
13358 # init result fields
13359 self.success = self.info = self.result = None
13362 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13364 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13365 " IAllocator" % self.mode)
13367 keyset = [n for (n, _) in keydata]
13370 if key not in keyset:
13371 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13372 " IAllocator" % key)
13373 setattr(self, key, kwargs[key])
13376 if key not in kwargs:
13377 raise errors.ProgrammerError("Missing input parameter '%s' to"
13378 " IAllocator" % key)
13379 self._BuildInputData(compat.partial(fn, self), keydata)
13381 def _ComputeClusterData(self):
13382 """Compute the generic allocator input data.
13384 This is the data that is independent of the actual operation.
13388 cluster_info = cfg.GetClusterInfo()
13391 "version": constants.IALLOCATOR_VERSION,
13392 "cluster_name": cfg.GetClusterName(),
13393 "cluster_tags": list(cluster_info.GetTags()),
13394 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13395 # we don't have job IDs
13397 ninfo = cfg.GetAllNodesInfo()
13398 iinfo = cfg.GetAllInstancesInfo().values()
13399 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13402 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13404 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13405 hypervisor_name = self.hypervisor
13406 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13407 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13409 hypervisor_name = cluster_info.enabled_hypervisors[0]
13411 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13414 self.rpc.call_all_instances_info(node_list,
13415 cluster_info.enabled_hypervisors)
13417 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13419 config_ndata = self._ComputeBasicNodeData(ninfo)
13420 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13421 i_list, config_ndata)
13422 assert len(data["nodes"]) == len(ninfo), \
13423 "Incomplete node data computed"
13425 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13427 self.in_data = data
13430 def _ComputeNodeGroupData(cfg):
13431 """Compute node groups data.
13434 ng = dict((guuid, {
13435 "name": gdata.name,
13436 "alloc_policy": gdata.alloc_policy,
13438 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13443 def _ComputeBasicNodeData(node_cfg):
13444 """Compute global node data.
13447 @returns: a dict of name: (node dict, node config)
13450 # fill in static (config-based) values
13451 node_results = dict((ninfo.name, {
13452 "tags": list(ninfo.GetTags()),
13453 "primary_ip": ninfo.primary_ip,
13454 "secondary_ip": ninfo.secondary_ip,
13455 "offline": ninfo.offline,
13456 "drained": ninfo.drained,
13457 "master_candidate": ninfo.master_candidate,
13458 "group": ninfo.group,
13459 "master_capable": ninfo.master_capable,
13460 "vm_capable": ninfo.vm_capable,
13462 for ninfo in node_cfg.values())
13464 return node_results
13467 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13469 """Compute global node data.
13471 @param node_results: the basic node structures as filled from the config
13474 # make a copy of the current dict
13475 node_results = dict(node_results)
13476 for nname, nresult in node_data.items():
13477 assert nname in node_results, "Missing basic data for node %s" % nname
13478 ninfo = node_cfg[nname]
13480 if not (ninfo.offline or ninfo.drained):
13481 nresult.Raise("Can't get data for node %s" % nname)
13482 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13484 remote_info = nresult.payload
13486 for attr in ["memory_total", "memory_free", "memory_dom0",
13487 "vg_size", "vg_free", "cpu_total"]:
13488 if attr not in remote_info:
13489 raise errors.OpExecError("Node '%s' didn't return attribute"
13490 " '%s'" % (nname, attr))
13491 if not isinstance(remote_info[attr], int):
13492 raise errors.OpExecError("Node '%s' returned invalid value"
13494 (nname, attr, remote_info[attr]))
13495 # compute memory used by primary instances
13496 i_p_mem = i_p_up_mem = 0
13497 for iinfo, beinfo in i_list:
13498 if iinfo.primary_node == nname:
13499 i_p_mem += beinfo[constants.BE_MEMORY]
13500 if iinfo.name not in node_iinfo[nname].payload:
13503 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13504 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13505 remote_info["memory_free"] -= max(0, i_mem_diff)
13507 if iinfo.admin_state == constants.ADMINST_UP:
13508 i_p_up_mem += beinfo[constants.BE_MEMORY]
13510 # compute memory used by instances
13512 "total_memory": remote_info["memory_total"],
13513 "reserved_memory": remote_info["memory_dom0"],
13514 "free_memory": remote_info["memory_free"],
13515 "total_disk": remote_info["vg_size"],
13516 "free_disk": remote_info["vg_free"],
13517 "total_cpus": remote_info["cpu_total"],
13518 "i_pri_memory": i_p_mem,
13519 "i_pri_up_memory": i_p_up_mem,
13521 pnr_dyn.update(node_results[nname])
13522 node_results[nname] = pnr_dyn
13524 return node_results
13527 def _ComputeInstanceData(cluster_info, i_list):
13528 """Compute global instance data.
13532 for iinfo, beinfo in i_list:
13534 for nic in iinfo.nics:
13535 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13539 "mode": filled_params[constants.NIC_MODE],
13540 "link": filled_params[constants.NIC_LINK],
13542 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13543 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13544 nic_data.append(nic_dict)
13546 "tags": list(iinfo.GetTags()),
13547 "admin_state": iinfo.admin_state,
13548 "vcpus": beinfo[constants.BE_VCPUS],
13549 "memory": beinfo[constants.BE_MEMORY],
13551 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13553 "disks": [{constants.IDISK_SIZE: dsk.size,
13554 constants.IDISK_MODE: dsk.mode}
13555 for dsk in iinfo.disks],
13556 "disk_template": iinfo.disk_template,
13557 "hypervisor": iinfo.hypervisor,
13559 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13561 instance_data[iinfo.name] = pir
13563 return instance_data
13565 def _AddNewInstance(self):
13566 """Add new instance data to allocator structure.
13568 This in combination with _AllocatorGetClusterData will create the
13569 correct structure needed as input for the allocator.
13571 The checks for the completeness of the opcode must have already been
13575 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13577 if self.disk_template in constants.DTS_INT_MIRROR:
13578 self.required_nodes = 2
13580 self.required_nodes = 1
13584 "disk_template": self.disk_template,
13587 "vcpus": self.vcpus,
13588 "memory": self.memory,
13589 "disks": self.disks,
13590 "disk_space_total": disk_space,
13592 "required_nodes": self.required_nodes,
13593 "hypervisor": self.hypervisor,
13598 def _AddRelocateInstance(self):
13599 """Add relocate instance data to allocator structure.
13601 This in combination with _IAllocatorGetClusterData will create the
13602 correct structure needed as input for the allocator.
13604 The checks for the completeness of the opcode must have already been
13608 instance = self.cfg.GetInstanceInfo(self.name)
13609 if instance is None:
13610 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13611 " IAllocator" % self.name)
13613 if instance.disk_template not in constants.DTS_MIRRORED:
13614 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13615 errors.ECODE_INVAL)
13617 if instance.disk_template in constants.DTS_INT_MIRROR and \
13618 len(instance.secondary_nodes) != 1:
13619 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13620 errors.ECODE_STATE)
13622 self.required_nodes = 1
13623 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13624 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13628 "disk_space_total": disk_space,
13629 "required_nodes": self.required_nodes,
13630 "relocate_from": self.relocate_from,
13634 def _AddNodeEvacuate(self):
13635 """Get data for node-evacuate requests.
13639 "instances": self.instances,
13640 "evac_mode": self.evac_mode,
13643 def _AddChangeGroup(self):
13644 """Get data for node-evacuate requests.
13648 "instances": self.instances,
13649 "target_groups": self.target_groups,
13652 def _BuildInputData(self, fn, keydata):
13653 """Build input data structures.
13656 self._ComputeClusterData()
13659 request["type"] = self.mode
13660 for keyname, keytype in keydata:
13661 if keyname not in request:
13662 raise errors.ProgrammerError("Request parameter %s is missing" %
13664 val = request[keyname]
13665 if not keytype(val):
13666 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13667 " validation, value %s, expected"
13668 " type %s" % (keyname, val, keytype))
13669 self.in_data["request"] = request
13671 self.in_text = serializer.Dump(self.in_data)
13673 _STRING_LIST = ht.TListOf(ht.TString)
13674 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13675 # pylint: disable=E1101
13676 # Class '...' has no 'OP_ID' member
13677 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13678 opcodes.OpInstanceMigrate.OP_ID,
13679 opcodes.OpInstanceReplaceDisks.OP_ID])
13683 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13684 ht.TItems([ht.TNonEmptyString,
13685 ht.TNonEmptyString,
13686 ht.TListOf(ht.TNonEmptyString),
13689 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13690 ht.TItems([ht.TNonEmptyString,
13693 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13694 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13697 constants.IALLOCATOR_MODE_ALLOC:
13700 ("name", ht.TString),
13701 ("memory", ht.TInt),
13702 ("disks", ht.TListOf(ht.TDict)),
13703 ("disk_template", ht.TString),
13704 ("os", ht.TString),
13705 ("tags", _STRING_LIST),
13706 ("nics", ht.TListOf(ht.TDict)),
13707 ("vcpus", ht.TInt),
13708 ("hypervisor", ht.TString),
13710 constants.IALLOCATOR_MODE_RELOC:
13711 (_AddRelocateInstance,
13712 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13714 constants.IALLOCATOR_MODE_NODE_EVAC:
13715 (_AddNodeEvacuate, [
13716 ("instances", _STRING_LIST),
13717 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13719 constants.IALLOCATOR_MODE_CHG_GROUP:
13720 (_AddChangeGroup, [
13721 ("instances", _STRING_LIST),
13722 ("target_groups", _STRING_LIST),
13726 def Run(self, name, validate=True, call_fn=None):
13727 """Run an instance allocator and return the results.
13730 if call_fn is None:
13731 call_fn = self.rpc.call_iallocator_runner
13733 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13734 result.Raise("Failure while running the iallocator script")
13736 self.out_text = result.payload
13738 self._ValidateResult()
13740 def _ValidateResult(self):
13741 """Process the allocator results.
13743 This will process and if successful save the result in
13744 self.out_data and the other parameters.
13748 rdict = serializer.Load(self.out_text)
13749 except Exception, err:
13750 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13752 if not isinstance(rdict, dict):
13753 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13755 # TODO: remove backwards compatiblity in later versions
13756 if "nodes" in rdict and "result" not in rdict:
13757 rdict["result"] = rdict["nodes"]
13760 for key in "success", "info", "result":
13761 if key not in rdict:
13762 raise errors.OpExecError("Can't parse iallocator results:"
13763 " missing key '%s'" % key)
13764 setattr(self, key, rdict[key])
13766 if not self._result_check(self.result):
13767 raise errors.OpExecError("Iallocator returned invalid result,"
13768 " expected %s, got %s" %
13769 (self._result_check, self.result),
13770 errors.ECODE_INVAL)
13772 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13773 assert self.relocate_from is not None
13774 assert self.required_nodes == 1
13776 node2group = dict((name, ndata["group"])
13777 for (name, ndata) in self.in_data["nodes"].items())
13779 fn = compat.partial(self._NodesToGroups, node2group,
13780 self.in_data["nodegroups"])
13782 instance = self.cfg.GetInstanceInfo(self.name)
13783 request_groups = fn(self.relocate_from + [instance.primary_node])
13784 result_groups = fn(rdict["result"] + [instance.primary_node])
13786 if self.success and not set(result_groups).issubset(request_groups):
13787 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13788 " differ from original groups (%s)" %
13789 (utils.CommaJoin(result_groups),
13790 utils.CommaJoin(request_groups)))
13792 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13793 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13795 self.out_data = rdict
13798 def _NodesToGroups(node2group, groups, nodes):
13799 """Returns a list of unique group names for a list of nodes.
13801 @type node2group: dict
13802 @param node2group: Map from node name to group UUID
13804 @param groups: Group information
13806 @param nodes: Node names
13813 group_uuid = node2group[node]
13815 # Ignore unknown node
13819 group = groups[group_uuid]
13821 # Can't find group, let's use UUID
13822 group_name = group_uuid
13824 group_name = group["name"]
13826 result.add(group_name)
13828 return sorted(result)
13831 class LUTestAllocator(NoHooksLU):
13832 """Run allocator tests.
13834 This LU runs the allocator tests
13837 def CheckPrereq(self):
13838 """Check prerequisites.
13840 This checks the opcode parameters depending on the director and mode test.
13843 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13844 for attr in ["memory", "disks", "disk_template",
13845 "os", "tags", "nics", "vcpus"]:
13846 if not hasattr(self.op, attr):
13847 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13848 attr, errors.ECODE_INVAL)
13849 iname = self.cfg.ExpandInstanceName(self.op.name)
13850 if iname is not None:
13851 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13852 iname, errors.ECODE_EXISTS)
13853 if not isinstance(self.op.nics, list):
13854 raise errors.OpPrereqError("Invalid parameter 'nics'",
13855 errors.ECODE_INVAL)
13856 if not isinstance(self.op.disks, list):
13857 raise errors.OpPrereqError("Invalid parameter 'disks'",
13858 errors.ECODE_INVAL)
13859 for row in self.op.disks:
13860 if (not isinstance(row, dict) or
13861 constants.IDISK_SIZE not in row or
13862 not isinstance(row[constants.IDISK_SIZE], int) or
13863 constants.IDISK_MODE not in row or
13864 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13865 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13866 " parameter", errors.ECODE_INVAL)
13867 if self.op.hypervisor is None:
13868 self.op.hypervisor = self.cfg.GetHypervisorType()
13869 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13870 fname = _ExpandInstanceName(self.cfg, self.op.name)
13871 self.op.name = fname
13872 self.relocate_from = \
13873 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13874 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13875 constants.IALLOCATOR_MODE_NODE_EVAC):
13876 if not self.op.instances:
13877 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13878 self.op.instances = _GetWantedInstances(self, self.op.instances)
13880 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13881 self.op.mode, errors.ECODE_INVAL)
13883 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13884 if self.op.allocator is None:
13885 raise errors.OpPrereqError("Missing allocator name",
13886 errors.ECODE_INVAL)
13887 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13888 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13889 self.op.direction, errors.ECODE_INVAL)
13891 def Exec(self, feedback_fn):
13892 """Run the allocator test.
13895 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13896 ial = IAllocator(self.cfg, self.rpc,
13899 memory=self.op.memory,
13900 disks=self.op.disks,
13901 disk_template=self.op.disk_template,
13905 vcpus=self.op.vcpus,
13906 hypervisor=self.op.hypervisor,
13908 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13909 ial = IAllocator(self.cfg, self.rpc,
13912 relocate_from=list(self.relocate_from),
13914 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13915 ial = IAllocator(self.cfg, self.rpc,
13917 instances=self.op.instances,
13918 target_groups=self.op.target_groups)
13919 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13920 ial = IAllocator(self.cfg, self.rpc,
13922 instances=self.op.instances,
13923 evac_mode=self.op.evac_mode)
13925 raise errors.ProgrammerError("Uncatched mode %s in"
13926 " LUTestAllocator.Exec", self.op.mode)
13928 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13929 result = ial.in_text
13931 ial.Run(self.op.allocator, validate=False)
13932 result = ial.out_text
13936 #: Query type implementations
13938 constants.QR_INSTANCE: _InstanceQuery,
13939 constants.QR_NODE: _NodeQuery,
13940 constants.QR_GROUP: _GroupQuery,
13941 constants.QR_OS: _OsQuery,
13944 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13947 def _GetQueryImplementation(name):
13948 """Returns the implemtnation for a query type.
13950 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13954 return _QUERY_IMPL[name]
13956 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13957 errors.ECODE_INVAL)