4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577 """Checks if the owned node groups are still correct for an instance.
579 @type cfg: L{config.ConfigWriter}
580 @param cfg: The cluster configuration
581 @type instance_name: string
582 @param instance_name: Instance name
583 @type owned_groups: set or frozenset
584 @param owned_groups: List of currently owned node groups
587 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
589 if not owned_groups.issuperset(inst_groups):
590 raise errors.OpPrereqError("Instance %s's node groups changed since"
591 " locks were acquired, current groups are"
592 " are '%s', owning groups '%s'; retry the"
595 utils.CommaJoin(inst_groups),
596 utils.CommaJoin(owned_groups)),
602 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603 """Checks if the instances in a node group are still correct.
605 @type cfg: L{config.ConfigWriter}
606 @param cfg: The cluster configuration
607 @type group_uuid: string
608 @param group_uuid: Node group UUID
609 @type owned_instances: set or frozenset
610 @param owned_instances: List of currently owned instances
613 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614 if owned_instances != wanted_instances:
615 raise errors.OpPrereqError("Instances in node group '%s' changed since"
616 " locks were acquired, wanted '%s', have '%s';"
617 " retry the operation" %
619 utils.CommaJoin(wanted_instances),
620 utils.CommaJoin(owned_instances)),
623 return wanted_instances
626 def _SupportsOob(cfg, node):
627 """Tells if node supports OOB.
629 @type cfg: L{config.ConfigWriter}
630 @param cfg: The cluster configuration
631 @type node: L{objects.Node}
632 @param node: The node
633 @return: The OOB script if supported or an empty string otherwise
636 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
639 def _GetWantedNodes(lu, nodes):
640 """Returns list of checked and expanded node names.
642 @type lu: L{LogicalUnit}
643 @param lu: the logical unit on whose behalf we execute
645 @param nodes: list of node names or None for all nodes
647 @return: the list of nodes, sorted
648 @raise errors.ProgrammerError: if the nodes parameter is wrong type
652 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
654 return utils.NiceSort(lu.cfg.GetNodeList())
657 def _GetWantedInstances(lu, instances):
658 """Returns list of checked and expanded instance names.
660 @type lu: L{LogicalUnit}
661 @param lu: the logical unit on whose behalf we execute
662 @type instances: list
663 @param instances: list of instance names or None for all instances
665 @return: the list of instances, sorted
666 @raise errors.OpPrereqError: if the instances parameter is wrong type
667 @raise errors.OpPrereqError: if any of the passed instances is not found
671 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
673 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
677 def _GetUpdatedParams(old_params, update_dict,
678 use_default=True, use_none=False):
679 """Return the new version of a parameter dictionary.
681 @type old_params: dict
682 @param old_params: old parameters
683 @type update_dict: dict
684 @param update_dict: dict containing new parameter values, or
685 constants.VALUE_DEFAULT to reset the parameter to its default
687 @param use_default: boolean
688 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689 values as 'to be deleted' values
690 @param use_none: boolean
691 @type use_none: whether to recognise C{None} values as 'to be
694 @return: the new parameter dictionary
697 params_copy = copy.deepcopy(old_params)
698 for key, val in update_dict.iteritems():
699 if ((use_default and val == constants.VALUE_DEFAULT) or
700 (use_none and val is None)):
706 params_copy[key] = val
710 def _ReleaseLocks(lu, level, names=None, keep=None):
711 """Releases locks owned by an LU.
713 @type lu: L{LogicalUnit}
714 @param level: Lock level
715 @type names: list or None
716 @param names: Names of locks to release
717 @type keep: list or None
718 @param keep: Names of locks to retain
721 assert not (keep is not None and names is not None), \
722 "Only one of the 'names' and the 'keep' parameters can be given"
724 if names is not None:
725 should_release = names.__contains__
727 should_release = lambda name: name not in keep
729 should_release = None
731 owned = lu.owned_locks(level)
733 # Not owning any lock at this level, do nothing
740 # Determine which locks to release
742 if should_release(name):
747 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
749 # Release just some locks
750 lu.glm.release(level, names=release)
752 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
755 lu.glm.release(level)
757 assert not lu.glm.is_owned(level), "No locks should be owned"
760 def _MapInstanceDisksToNodes(instances):
761 """Creates a map from (node, volume) to instance name.
763 @type instances: list of L{objects.Instance}
764 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
767 return dict(((node, vol), inst.name)
768 for inst in instances
769 for (node, vols) in inst.MapLVsByNode().items()
773 def _RunPostHook(lu, node_name):
774 """Runs the post-hook for an opcode on a single node.
777 hm = lu.proc.BuildHooksManager(lu)
779 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
781 # pylint: disable=W0702
782 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
785 def _CheckOutputFields(static, dynamic, selected):
786 """Checks whether all selected fields are valid.
788 @type static: L{utils.FieldSet}
789 @param static: static fields set
790 @type dynamic: L{utils.FieldSet}
791 @param dynamic: dynamic fields set
798 delta = f.NonMatching(selected)
800 raise errors.OpPrereqError("Unknown output fields selected: %s"
801 % ",".join(delta), errors.ECODE_INVAL)
804 def _CheckGlobalHvParams(params):
805 """Validates that given hypervisor params are not global ones.
807 This will ensure that instances don't get customised versions of
811 used_globals = constants.HVC_GLOBALS.intersection(params)
813 msg = ("The following hypervisor parameters are global and cannot"
814 " be customized at instance level, please modify them at"
815 " cluster level: %s" % utils.CommaJoin(used_globals))
816 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
819 def _CheckNodeOnline(lu, node, msg=None):
820 """Ensure that a given node is online.
822 @param lu: the LU on behalf of which we make the check
823 @param node: the node to check
824 @param msg: if passed, should be a message to replace the default one
825 @raise errors.OpPrereqError: if the node is offline
829 msg = "Can't use offline node"
830 if lu.cfg.GetNodeInfo(node).offline:
831 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
834 def _CheckNodeNotDrained(lu, node):
835 """Ensure that a given node is not drained.
837 @param lu: the LU on behalf of which we make the check
838 @param node: the node to check
839 @raise errors.OpPrereqError: if the node is drained
842 if lu.cfg.GetNodeInfo(node).drained:
843 raise errors.OpPrereqError("Can't use drained node %s" % node,
847 def _CheckNodeVmCapable(lu, node):
848 """Ensure that a given node is vm capable.
850 @param lu: the LU on behalf of which we make the check
851 @param node: the node to check
852 @raise errors.OpPrereqError: if the node is not vm capable
855 if not lu.cfg.GetNodeInfo(node).vm_capable:
856 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
860 def _CheckNodeHasOS(lu, node, os_name, force_variant):
861 """Ensure that a node supports a given OS.
863 @param lu: the LU on behalf of which we make the check
864 @param node: the node to check
865 @param os_name: the OS to query about
866 @param force_variant: whether to ignore variant errors
867 @raise errors.OpPrereqError: if the node is not supporting the OS
870 result = lu.rpc.call_os_get(node, os_name)
871 result.Raise("OS '%s' not in supported OS list for node %s" %
873 prereq=True, ecode=errors.ECODE_INVAL)
874 if not force_variant:
875 _CheckOSVariant(result.payload, os_name)
878 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879 """Ensure that a node has the given secondary ip.
881 @type lu: L{LogicalUnit}
882 @param lu: the LU on behalf of which we make the check
884 @param node: the node to check
885 @type secondary_ip: string
886 @param secondary_ip: the ip to check
887 @type prereq: boolean
888 @param prereq: whether to throw a prerequisite or an execute error
889 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
893 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894 result.Raise("Failure checking secondary ip on node %s" % node,
895 prereq=prereq, ecode=errors.ECODE_ENVIRON)
896 if not result.payload:
897 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898 " please fix and re-run this command" % secondary_ip)
900 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
902 raise errors.OpExecError(msg)
905 def _GetClusterDomainSecret():
906 """Reads the cluster domain secret.
909 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
913 def _CheckInstanceState(lu, instance, req_states, msg=None):
914 """Ensure that an instance is in one of the required states.
916 @param lu: the LU on behalf of which we make the check
917 @param instance: the instance to check
918 @param msg: if passed, should be a message to replace the default one
919 @raise errors.OpPrereqError: if the instance is not in the required state
923 msg = "can't use instance from outside %s states" % ", ".join(req_states)
924 if instance.admin_state not in req_states:
925 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926 (instance, instance.admin_state, msg),
929 if constants.ADMINST_UP not in req_states:
930 pnode = instance.primary_node
931 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932 ins_l.Raise("Can't contact node %s for instance information" % pnode,
933 prereq=True, ecode=errors.ECODE_ENVIRON)
935 if instance.name in ins_l.payload:
936 raise errors.OpPrereqError("Instance %s is running, %s" %
937 (instance.name, msg), errors.ECODE_STATE)
940 def _ExpandItemName(fn, name, kind):
941 """Expand an item name.
943 @param fn: the function to use for expansion
944 @param name: requested item name
945 @param kind: text description ('Node' or 'Instance')
946 @return: the resolved (full) name
947 @raise errors.OpPrereqError: if the item is not found
951 if full_name is None:
952 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
957 def _ExpandNodeName(cfg, name):
958 """Wrapper over L{_ExpandItemName} for nodes."""
959 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
962 def _ExpandInstanceName(cfg, name):
963 """Wrapper over L{_ExpandItemName} for instance."""
964 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
967 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968 memory, vcpus, nics, disk_template, disks,
969 bep, hvp, hypervisor_name, tags):
970 """Builds instance related env variables for hooks
972 This builds the hook environment from individual variables.
975 @param name: the name of the instance
976 @type primary_node: string
977 @param primary_node: the name of the instance's primary node
978 @type secondary_nodes: list
979 @param secondary_nodes: list of secondary nodes as strings
980 @type os_type: string
981 @param os_type: the name of the instance's OS
983 @param status: the desired status of the instance
985 @param memory: the memory size of the instance
987 @param vcpus: the count of VCPUs the instance has
989 @param nics: list of tuples (ip, mac, mode, link) representing
990 the NICs the instance has
991 @type disk_template: string
992 @param disk_template: the disk template of the instance
994 @param disks: the list of (size, mode) pairs
996 @param bep: the backend parameters for the instance
998 @param hvp: the hypervisor parameters for the instance
999 @type hypervisor_name: string
1000 @param hypervisor_name: the hypervisor for the instance
1002 @param tags: list of instance tags as strings
1004 @return: the hook environment for this instance
1009 "INSTANCE_NAME": name,
1010 "INSTANCE_PRIMARY": primary_node,
1011 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1012 "INSTANCE_OS_TYPE": os_type,
1013 "INSTANCE_STATUS": status,
1014 "INSTANCE_MEMORY": memory,
1015 "INSTANCE_VCPUS": vcpus,
1016 "INSTANCE_DISK_TEMPLATE": disk_template,
1017 "INSTANCE_HYPERVISOR": hypervisor_name,
1021 nic_count = len(nics)
1022 for idx, (ip, mac, mode, link) in enumerate(nics):
1025 env["INSTANCE_NIC%d_IP" % idx] = ip
1026 env["INSTANCE_NIC%d_MAC" % idx] = mac
1027 env["INSTANCE_NIC%d_MODE" % idx] = mode
1028 env["INSTANCE_NIC%d_LINK" % idx] = link
1029 if mode == constants.NIC_MODE_BRIDGED:
1030 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1034 env["INSTANCE_NIC_COUNT"] = nic_count
1037 disk_count = len(disks)
1038 for idx, (size, mode) in enumerate(disks):
1039 env["INSTANCE_DISK%d_SIZE" % idx] = size
1040 env["INSTANCE_DISK%d_MODE" % idx] = mode
1044 env["INSTANCE_DISK_COUNT"] = disk_count
1049 env["INSTANCE_TAGS"] = " ".join(tags)
1051 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1052 for key, value in source.items():
1053 env["INSTANCE_%s_%s" % (kind, key)] = value
1058 def _NICListToTuple(lu, nics):
1059 """Build a list of nic information tuples.
1061 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1062 value in LUInstanceQueryData.
1064 @type lu: L{LogicalUnit}
1065 @param lu: the logical unit on whose behalf we execute
1066 @type nics: list of L{objects.NIC}
1067 @param nics: list of nics to convert to hooks tuples
1071 cluster = lu.cfg.GetClusterInfo()
1075 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1076 mode = filled_params[constants.NIC_MODE]
1077 link = filled_params[constants.NIC_LINK]
1078 hooks_nics.append((ip, mac, mode, link))
1082 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1083 """Builds instance related env variables for hooks from an object.
1085 @type lu: L{LogicalUnit}
1086 @param lu: the logical unit on whose behalf we execute
1087 @type instance: L{objects.Instance}
1088 @param instance: the instance for which we should build the
1090 @type override: dict
1091 @param override: dictionary with key/values that will override
1094 @return: the hook environment dictionary
1097 cluster = lu.cfg.GetClusterInfo()
1098 bep = cluster.FillBE(instance)
1099 hvp = cluster.FillHV(instance)
1101 "name": instance.name,
1102 "primary_node": instance.primary_node,
1103 "secondary_nodes": instance.secondary_nodes,
1104 "os_type": instance.os,
1105 "status": instance.admin_state,
1106 "memory": bep[constants.BE_MEMORY],
1107 "vcpus": bep[constants.BE_VCPUS],
1108 "nics": _NICListToTuple(lu, instance.nics),
1109 "disk_template": instance.disk_template,
1110 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1113 "hypervisor_name": instance.hypervisor,
1114 "tags": instance.tags,
1117 args.update(override)
1118 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1121 def _AdjustCandidatePool(lu, exceptions):
1122 """Adjust the candidate pool after node operations.
1125 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1127 lu.LogInfo("Promoted nodes to master candidate role: %s",
1128 utils.CommaJoin(node.name for node in mod_list))
1129 for name in mod_list:
1130 lu.context.ReaddNode(name)
1131 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1133 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1137 def _DecideSelfPromotion(lu, exceptions=None):
1138 """Decide whether I should promote myself as a master candidate.
1141 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1142 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1143 # the new node will increase mc_max with one, so:
1144 mc_should = min(mc_should + 1, cp_size)
1145 return mc_now < mc_should
1148 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1149 """Check that the brigdes needed by a list of nics exist.
1152 cluster = lu.cfg.GetClusterInfo()
1153 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1154 brlist = [params[constants.NIC_LINK] for params in paramslist
1155 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1157 result = lu.rpc.call_bridges_exist(target_node, brlist)
1158 result.Raise("Error checking bridges on destination node '%s'" %
1159 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1162 def _CheckInstanceBridgesExist(lu, instance, node=None):
1163 """Check that the brigdes needed by an instance exist.
1167 node = instance.primary_node
1168 _CheckNicsBridgesExist(lu, instance.nics, node)
1171 def _CheckOSVariant(os_obj, name):
1172 """Check whether an OS name conforms to the os variants specification.
1174 @type os_obj: L{objects.OS}
1175 @param os_obj: OS object to check
1177 @param name: OS name passed by the user, to check for validity
1180 variant = objects.OS.GetVariant(name)
1181 if not os_obj.supported_variants:
1183 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1184 " passed)" % (os_obj.name, variant),
1188 raise errors.OpPrereqError("OS name must include a variant",
1191 if variant not in os_obj.supported_variants:
1192 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1195 def _GetNodeInstancesInner(cfg, fn):
1196 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1199 def _GetNodeInstances(cfg, node_name):
1200 """Returns a list of all primary and secondary instances on a node.
1204 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1207 def _GetNodePrimaryInstances(cfg, node_name):
1208 """Returns primary instances on a node.
1211 return _GetNodeInstancesInner(cfg,
1212 lambda inst: node_name == inst.primary_node)
1215 def _GetNodeSecondaryInstances(cfg, node_name):
1216 """Returns secondary instances on a node.
1219 return _GetNodeInstancesInner(cfg,
1220 lambda inst: node_name in inst.secondary_nodes)
1223 def _GetStorageTypeArgs(cfg, storage_type):
1224 """Returns the arguments for a storage type.
1227 # Special case for file storage
1228 if storage_type == constants.ST_FILE:
1229 # storage.FileStorage wants a list of storage directories
1230 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1235 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1238 for dev in instance.disks:
1239 cfg.SetDiskID(dev, node_name)
1241 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1242 result.Raise("Failed to get disk status from node %s" % node_name,
1243 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1245 for idx, bdev_status in enumerate(result.payload):
1246 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1252 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1253 """Check the sanity of iallocator and node arguments and use the
1254 cluster-wide iallocator if appropriate.
1256 Check that at most one of (iallocator, node) is specified. If none is
1257 specified, then the LU's opcode's iallocator slot is filled with the
1258 cluster-wide default iallocator.
1260 @type iallocator_slot: string
1261 @param iallocator_slot: the name of the opcode iallocator slot
1262 @type node_slot: string
1263 @param node_slot: the name of the opcode target node slot
1266 node = getattr(lu.op, node_slot, None)
1267 iallocator = getattr(lu.op, iallocator_slot, None)
1269 if node is not None and iallocator is not None:
1270 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1272 elif node is None and iallocator is None:
1273 default_iallocator = lu.cfg.GetDefaultIAllocator()
1274 if default_iallocator:
1275 setattr(lu.op, iallocator_slot, default_iallocator)
1277 raise errors.OpPrereqError("No iallocator or node given and no"
1278 " cluster-wide default iallocator found;"
1279 " please specify either an iallocator or a"
1280 " node, or set a cluster-wide default"
1284 def _GetDefaultIAllocator(cfg, iallocator):
1285 """Decides on which iallocator to use.
1287 @type cfg: L{config.ConfigWriter}
1288 @param cfg: Cluster configuration object
1289 @type iallocator: string or None
1290 @param iallocator: Iallocator specified in opcode
1292 @return: Iallocator name
1296 # Use default iallocator
1297 iallocator = cfg.GetDefaultIAllocator()
1300 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1301 " opcode nor as a cluster-wide default",
1307 class LUClusterPostInit(LogicalUnit):
1308 """Logical unit for running hooks after cluster initialization.
1311 HPATH = "cluster-init"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1326 return ([], [self.cfg.GetMasterNode()])
1328 def Exec(self, feedback_fn):
1335 class LUClusterDestroy(LogicalUnit):
1336 """Logical unit for destroying the cluster.
1339 HPATH = "cluster-destroy"
1340 HTYPE = constants.HTYPE_CLUSTER
1342 def BuildHooksEnv(self):
1347 "OP_TARGET": self.cfg.GetClusterName(),
1350 def BuildHooksNodes(self):
1351 """Build hooks nodes.
1356 def CheckPrereq(self):
1357 """Check prerequisites.
1359 This checks whether the cluster is empty.
1361 Any errors are signaled by raising errors.OpPrereqError.
1364 master = self.cfg.GetMasterNode()
1366 nodelist = self.cfg.GetNodeList()
1367 if len(nodelist) != 1 or nodelist[0] != master:
1368 raise errors.OpPrereqError("There are still %d node(s) in"
1369 " this cluster." % (len(nodelist) - 1),
1371 instancelist = self.cfg.GetInstanceList()
1373 raise errors.OpPrereqError("There are still %d instance(s) in"
1374 " this cluster." % len(instancelist),
1377 def Exec(self, feedback_fn):
1378 """Destroys the cluster.
1381 master_params = self.cfg.GetMasterNetworkParameters()
1383 # Run post hooks on master node before it's removed
1384 _RunPostHook(self, master_params.name)
1386 ems = self.cfg.GetUseExternalMipScript()
1387 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1389 result.Raise("Could not disable the master role")
1391 return master_params.name
1394 def _VerifyCertificate(filename):
1395 """Verifies a certificate for L{LUClusterVerifyConfig}.
1397 @type filename: string
1398 @param filename: Path to PEM file
1402 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1403 utils.ReadFile(filename))
1404 except Exception, err: # pylint: disable=W0703
1405 return (LUClusterVerifyConfig.ETYPE_ERROR,
1406 "Failed to load X509 certificate %s: %s" % (filename, err))
1409 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1410 constants.SSL_CERT_EXPIRATION_ERROR)
1413 fnamemsg = "While verifying %s: %s" % (filename, msg)
1418 return (None, fnamemsg)
1419 elif errcode == utils.CERT_WARNING:
1420 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1421 elif errcode == utils.CERT_ERROR:
1422 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1424 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1427 def _GetAllHypervisorParameters(cluster, instances):
1428 """Compute the set of all hypervisor parameters.
1430 @type cluster: L{objects.Cluster}
1431 @param cluster: the cluster object
1432 @param instances: list of L{objects.Instance}
1433 @param instances: additional instances from which to obtain parameters
1434 @rtype: list of (origin, hypervisor, parameters)
1435 @return: a list with all parameters found, indicating the hypervisor they
1436 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1441 for hv_name in cluster.enabled_hypervisors:
1442 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1444 for os_name, os_hvp in cluster.os_hvp.items():
1445 for hv_name, hv_params in os_hvp.items():
1447 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1448 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1450 # TODO: collapse identical parameter values in a single one
1451 for instance in instances:
1452 if instance.hvparams:
1453 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1454 cluster.FillHV(instance)))
1459 class _VerifyErrors(object):
1460 """Mix-in for cluster/group verify LUs.
1462 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1463 self.op and self._feedback_fn to be available.)
1467 ETYPE_FIELD = "code"
1468 ETYPE_ERROR = "ERROR"
1469 ETYPE_WARNING = "WARNING"
1471 def _Error(self, ecode, item, msg, *args, **kwargs):
1472 """Format an error message.
1474 Based on the opcode's error_codes parameter, either format a
1475 parseable error code, or a simpler error string.
1477 This must be called only from Exec and functions called from Exec.
1480 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1481 itype, etxt, _ = ecode
1482 # first complete the msg
1485 # then format the whole message
1486 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1487 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1493 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1494 # and finally report it via the feedback_fn
1495 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1497 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1498 """Log an error message if the passed condition is True.
1502 or self.op.debug_simulate_errors) # pylint: disable=E1101
1504 # If the error code is in the list of ignored errors, demote the error to a
1506 (_, etxt, _) = ecode
1507 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1508 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1511 self._Error(ecode, *args, **kwargs)
1513 # do not mark the operation as failed for WARN cases only
1514 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1515 self.bad = self.bad or cond
1518 class LUClusterVerify(NoHooksLU):
1519 """Submits all jobs necessary to verify the cluster.
1524 def ExpandNames(self):
1525 self.needed_locks = {}
1527 def Exec(self, feedback_fn):
1530 if self.op.group_name:
1531 groups = [self.op.group_name]
1532 depends_fn = lambda: None
1534 groups = self.cfg.GetNodeGroupList()
1536 # Verify global configuration
1538 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1541 # Always depend on global verification
1542 depends_fn = lambda: [(-len(jobs), [])]
1544 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1545 ignore_errors=self.op.ignore_errors,
1546 depends=depends_fn())]
1547 for group in groups)
1549 # Fix up all parameters
1550 for op in itertools.chain(*jobs): # pylint: disable=W0142
1551 op.debug_simulate_errors = self.op.debug_simulate_errors
1552 op.verbose = self.op.verbose
1553 op.error_codes = self.op.error_codes
1555 op.skip_checks = self.op.skip_checks
1556 except AttributeError:
1557 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1559 return ResultWithJobs(jobs)
1562 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1563 """Verifies the cluster config.
1568 def _VerifyHVP(self, hvp_data):
1569 """Verifies locally the syntax of the hypervisor parameters.
1572 for item, hv_name, hv_params in hvp_data:
1573 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1576 hv_class = hypervisor.GetHypervisor(hv_name)
1577 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1578 hv_class.CheckParameterSyntax(hv_params)
1579 except errors.GenericError, err:
1580 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1582 def ExpandNames(self):
1583 # Information can be safely retrieved as the BGL is acquired in exclusive
1585 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1586 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1587 self.all_node_info = self.cfg.GetAllNodesInfo()
1588 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1589 self.needed_locks = {}
1591 def Exec(self, feedback_fn):
1592 """Verify integrity of cluster, performing various test on nodes.
1596 self._feedback_fn = feedback_fn
1598 feedback_fn("* Verifying cluster config")
1600 for msg in self.cfg.VerifyConfig():
1601 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1603 feedback_fn("* Verifying cluster certificate files")
1605 for cert_filename in constants.ALL_CERT_FILES:
1606 (errcode, msg) = _VerifyCertificate(cert_filename)
1607 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1609 feedback_fn("* Verifying hypervisor parameters")
1611 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1612 self.all_inst_info.values()))
1614 feedback_fn("* Verifying all nodes belong to an existing group")
1616 # We do this verification here because, should this bogus circumstance
1617 # occur, it would never be caught by VerifyGroup, which only acts on
1618 # nodes/instances reachable from existing node groups.
1620 dangling_nodes = set(node.name for node in self.all_node_info.values()
1621 if node.group not in self.all_group_info)
1623 dangling_instances = {}
1624 no_node_instances = []
1626 for inst in self.all_inst_info.values():
1627 if inst.primary_node in dangling_nodes:
1628 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1629 elif inst.primary_node not in self.all_node_info:
1630 no_node_instances.append(inst.name)
1635 utils.CommaJoin(dangling_instances.get(node.name,
1637 for node in dangling_nodes]
1639 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1641 "the following nodes (and their instances) belong to a non"
1642 " existing group: %s", utils.CommaJoin(pretty_dangling))
1644 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1646 "the following instances have a non-existing primary-node:"
1647 " %s", utils.CommaJoin(no_node_instances))
1652 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1653 """Verifies the status of a node group.
1656 HPATH = "cluster-verify"
1657 HTYPE = constants.HTYPE_CLUSTER
1660 _HOOKS_INDENT_RE = re.compile("^", re.M)
1662 class NodeImage(object):
1663 """A class representing the logical and physical status of a node.
1666 @ivar name: the node name to which this object refers
1667 @ivar volumes: a structure as returned from
1668 L{ganeti.backend.GetVolumeList} (runtime)
1669 @ivar instances: a list of running instances (runtime)
1670 @ivar pinst: list of configured primary instances (config)
1671 @ivar sinst: list of configured secondary instances (config)
1672 @ivar sbp: dictionary of {primary-node: list of instances} for all
1673 instances for which this node is secondary (config)
1674 @ivar mfree: free memory, as reported by hypervisor (runtime)
1675 @ivar dfree: free disk, as reported by the node (runtime)
1676 @ivar offline: the offline status (config)
1677 @type rpc_fail: boolean
1678 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1679 not whether the individual keys were correct) (runtime)
1680 @type lvm_fail: boolean
1681 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1682 @type hyp_fail: boolean
1683 @ivar hyp_fail: whether the RPC call didn't return the instance list
1684 @type ghost: boolean
1685 @ivar ghost: whether this is a known node or not (config)
1686 @type os_fail: boolean
1687 @ivar os_fail: whether the RPC call didn't return valid OS data
1689 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1690 @type vm_capable: boolean
1691 @ivar vm_capable: whether the node can host instances
1694 def __init__(self, offline=False, name=None, vm_capable=True):
1703 self.offline = offline
1704 self.vm_capable = vm_capable
1705 self.rpc_fail = False
1706 self.lvm_fail = False
1707 self.hyp_fail = False
1709 self.os_fail = False
1712 def ExpandNames(self):
1713 # This raises errors.OpPrereqError on its own:
1714 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1716 # Get instances in node group; this is unsafe and needs verification later
1717 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1719 self.needed_locks = {
1720 locking.LEVEL_INSTANCE: inst_names,
1721 locking.LEVEL_NODEGROUP: [self.group_uuid],
1722 locking.LEVEL_NODE: [],
1725 self.share_locks = _ShareAll()
1727 def DeclareLocks(self, level):
1728 if level == locking.LEVEL_NODE:
1729 # Get members of node group; this is unsafe and needs verification later
1730 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1732 all_inst_info = self.cfg.GetAllInstancesInfo()
1734 # In Exec(), we warn about mirrored instances that have primary and
1735 # secondary living in separate node groups. To fully verify that
1736 # volumes for these instances are healthy, we will need to do an
1737 # extra call to their secondaries. We ensure here those nodes will
1739 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1740 # Important: access only the instances whose lock is owned
1741 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1742 nodes.update(all_inst_info[inst].secondary_nodes)
1744 self.needed_locks[locking.LEVEL_NODE] = nodes
1746 def CheckPrereq(self):
1747 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1748 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1750 group_nodes = set(self.group_info.members)
1751 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1754 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1756 unlocked_instances = \
1757 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1760 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1761 utils.CommaJoin(unlocked_nodes))
1763 if unlocked_instances:
1764 raise errors.OpPrereqError("Missing lock for instances: %s" %
1765 utils.CommaJoin(unlocked_instances))
1767 self.all_node_info = self.cfg.GetAllNodesInfo()
1768 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1770 self.my_node_names = utils.NiceSort(group_nodes)
1771 self.my_inst_names = utils.NiceSort(group_instances)
1773 self.my_node_info = dict((name, self.all_node_info[name])
1774 for name in self.my_node_names)
1776 self.my_inst_info = dict((name, self.all_inst_info[name])
1777 for name in self.my_inst_names)
1779 # We detect here the nodes that will need the extra RPC calls for verifying
1780 # split LV volumes; they should be locked.
1781 extra_lv_nodes = set()
1783 for inst in self.my_inst_info.values():
1784 if inst.disk_template in constants.DTS_INT_MIRROR:
1785 group = self.my_node_info[inst.primary_node].group
1786 for nname in inst.secondary_nodes:
1787 if self.all_node_info[nname].group != group:
1788 extra_lv_nodes.add(nname)
1790 unlocked_lv_nodes = \
1791 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1793 if unlocked_lv_nodes:
1794 raise errors.OpPrereqError("these nodes could be locked: %s" %
1795 utils.CommaJoin(unlocked_lv_nodes))
1796 self.extra_lv_nodes = list(extra_lv_nodes)
1798 def _VerifyNode(self, ninfo, nresult):
1799 """Perform some basic validation on data returned from a node.
1801 - check the result data structure is well formed and has all the
1803 - check ganeti version
1805 @type ninfo: L{objects.Node}
1806 @param ninfo: the node to check
1807 @param nresult: the results from the node
1809 @return: whether overall this call was successful (and we can expect
1810 reasonable values in the respose)
1814 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1816 # main result, nresult should be a non-empty dict
1817 test = not nresult or not isinstance(nresult, dict)
1818 _ErrorIf(test, constants.CV_ENODERPC, node,
1819 "unable to verify node: no data returned")
1823 # compares ganeti version
1824 local_version = constants.PROTOCOL_VERSION
1825 remote_version = nresult.get("version", None)
1826 test = not (remote_version and
1827 isinstance(remote_version, (list, tuple)) and
1828 len(remote_version) == 2)
1829 _ErrorIf(test, constants.CV_ENODERPC, node,
1830 "connection to node returned invalid data")
1834 test = local_version != remote_version[0]
1835 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1836 "incompatible protocol versions: master %s,"
1837 " node %s", local_version, remote_version[0])
1841 # node seems compatible, we can actually try to look into its results
1843 # full package version
1844 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1845 constants.CV_ENODEVERSION, node,
1846 "software version mismatch: master %s, node %s",
1847 constants.RELEASE_VERSION, remote_version[1],
1848 code=self.ETYPE_WARNING)
1850 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1851 if ninfo.vm_capable and isinstance(hyp_result, dict):
1852 for hv_name, hv_result in hyp_result.iteritems():
1853 test = hv_result is not None
1854 _ErrorIf(test, constants.CV_ENODEHV, node,
1855 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1857 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1858 if ninfo.vm_capable and isinstance(hvp_result, list):
1859 for item, hv_name, hv_result in hvp_result:
1860 _ErrorIf(True, constants.CV_ENODEHV, node,
1861 "hypervisor %s parameter verify failure (source %s): %s",
1862 hv_name, item, hv_result)
1864 test = nresult.get(constants.NV_NODESETUP,
1865 ["Missing NODESETUP results"])
1866 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1871 def _VerifyNodeTime(self, ninfo, nresult,
1872 nvinfo_starttime, nvinfo_endtime):
1873 """Check the node time.
1875 @type ninfo: L{objects.Node}
1876 @param ninfo: the node to check
1877 @param nresult: the remote results for the node
1878 @param nvinfo_starttime: the start time of the RPC call
1879 @param nvinfo_endtime: the end time of the RPC call
1883 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1885 ntime = nresult.get(constants.NV_TIME, None)
1887 ntime_merged = utils.MergeTime(ntime)
1888 except (ValueError, TypeError):
1889 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1892 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1893 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1894 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1895 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1899 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1900 "Node time diverges by at least %s from master node time",
1903 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1904 """Check the node LVM results.
1906 @type ninfo: L{objects.Node}
1907 @param ninfo: the node to check
1908 @param nresult: the remote results for the node
1909 @param vg_name: the configured VG name
1916 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1918 # checks vg existence and size > 20G
1919 vglist = nresult.get(constants.NV_VGLIST, None)
1921 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1923 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1924 constants.MIN_VG_SIZE)
1925 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1928 pvlist = nresult.get(constants.NV_PVLIST, None)
1929 test = pvlist is None
1930 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1932 # check that ':' is not present in PV names, since it's a
1933 # special character for lvcreate (denotes the range of PEs to
1935 for _, pvname, owner_vg in pvlist:
1936 test = ":" in pvname
1937 _ErrorIf(test, constants.CV_ENODELVM, node,
1938 "Invalid character ':' in PV '%s' of VG '%s'",
1941 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1942 """Check the node bridges.
1944 @type ninfo: L{objects.Node}
1945 @param ninfo: the node to check
1946 @param nresult: the remote results for the node
1947 @param bridges: the expected list of bridges
1954 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1956 missing = nresult.get(constants.NV_BRIDGES, None)
1957 test = not isinstance(missing, list)
1958 _ErrorIf(test, constants.CV_ENODENET, node,
1959 "did not return valid bridge information")
1961 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1962 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1964 def _VerifyNodeUserScripts(self, ninfo, nresult):
1965 """Check the results of user scripts presence and executability on the node
1967 @type ninfo: L{objects.Node}
1968 @param ninfo: the node to check
1969 @param nresult: the remote results for the node
1974 test = not constants.NV_USERSCRIPTS in nresult
1975 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1976 "did not return user scripts information")
1978 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1980 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1981 "user scripts not present or not executable: %s" %
1982 utils.CommaJoin(sorted(broken_scripts)))
1984 def _VerifyNodeNetwork(self, ninfo, nresult):
1985 """Check the node network connectivity results.
1987 @type ninfo: L{objects.Node}
1988 @param ninfo: the node to check
1989 @param nresult: the remote results for the node
1993 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1995 test = constants.NV_NODELIST not in nresult
1996 _ErrorIf(test, constants.CV_ENODESSH, node,
1997 "node hasn't returned node ssh connectivity data")
1999 if nresult[constants.NV_NODELIST]:
2000 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2001 _ErrorIf(True, constants.CV_ENODESSH, node,
2002 "ssh communication with node '%s': %s", a_node, a_msg)
2004 test = constants.NV_NODENETTEST not in nresult
2005 _ErrorIf(test, constants.CV_ENODENET, node,
2006 "node hasn't returned node tcp connectivity data")
2008 if nresult[constants.NV_NODENETTEST]:
2009 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2011 _ErrorIf(True, constants.CV_ENODENET, node,
2012 "tcp communication with node '%s': %s",
2013 anode, nresult[constants.NV_NODENETTEST][anode])
2015 test = constants.NV_MASTERIP not in nresult
2016 _ErrorIf(test, constants.CV_ENODENET, node,
2017 "node hasn't returned node master IP reachability data")
2019 if not nresult[constants.NV_MASTERIP]:
2020 if node == self.master_node:
2021 msg = "the master node cannot reach the master IP (not configured?)"
2023 msg = "cannot reach the master IP"
2024 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2026 def _VerifyInstance(self, instance, instanceconfig, node_image,
2028 """Verify an instance.
2030 This function checks to see if the required block devices are
2031 available on the instance's node.
2034 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2035 node_current = instanceconfig.primary_node
2037 node_vol_should = {}
2038 instanceconfig.MapLVsByNode(node_vol_should)
2040 for node in node_vol_should:
2041 n_img = node_image[node]
2042 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2043 # ignore missing volumes on offline or broken nodes
2045 for volume in node_vol_should[node]:
2046 test = volume not in n_img.volumes
2047 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2048 "volume %s missing on node %s", volume, node)
2050 if instanceconfig.admin_state == constants.ADMINST_UP:
2051 pri_img = node_image[node_current]
2052 test = instance not in pri_img.instances and not pri_img.offline
2053 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2054 "instance not running on its primary node %s",
2057 diskdata = [(nname, success, status, idx)
2058 for (nname, disks) in diskstatus.items()
2059 for idx, (success, status) in enumerate(disks)]
2061 for nname, success, bdev_status, idx in diskdata:
2062 # the 'ghost node' construction in Exec() ensures that we have a
2064 snode = node_image[nname]
2065 bad_snode = snode.ghost or snode.offline
2066 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2067 not success and not bad_snode,
2068 constants.CV_EINSTANCEFAULTYDISK, instance,
2069 "couldn't retrieve status for disk/%s on %s: %s",
2070 idx, nname, bdev_status)
2071 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2072 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2073 constants.CV_EINSTANCEFAULTYDISK, instance,
2074 "disk/%s on %s is faulty", idx, nname)
2076 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2077 """Verify if there are any unknown volumes in the cluster.
2079 The .os, .swap and backup volumes are ignored. All other volumes are
2080 reported as unknown.
2082 @type reserved: L{ganeti.utils.FieldSet}
2083 @param reserved: a FieldSet of reserved volume names
2086 for node, n_img in node_image.items():
2087 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2088 # skip non-healthy nodes
2090 for volume in n_img.volumes:
2091 test = ((node not in node_vol_should or
2092 volume not in node_vol_should[node]) and
2093 not reserved.Matches(volume))
2094 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2095 "volume %s is unknown", volume)
2097 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2098 """Verify N+1 Memory Resilience.
2100 Check that if one single node dies we can still start all the
2101 instances it was primary for.
2104 cluster_info = self.cfg.GetClusterInfo()
2105 for node, n_img in node_image.items():
2106 # This code checks that every node which is now listed as
2107 # secondary has enough memory to host all instances it is
2108 # supposed to should a single other node in the cluster fail.
2109 # FIXME: not ready for failover to an arbitrary node
2110 # FIXME: does not support file-backed instances
2111 # WARNING: we currently take into account down instances as well
2112 # as up ones, considering that even if they're down someone
2113 # might want to start them even in the event of a node failure.
2115 # we're skipping offline nodes from the N+1 warning, since
2116 # most likely we don't have good memory infromation from them;
2117 # we already list instances living on such nodes, and that's
2120 for prinode, instances in n_img.sbp.items():
2122 for instance in instances:
2123 bep = cluster_info.FillBE(instance_cfg[instance])
2124 if bep[constants.BE_AUTO_BALANCE]:
2125 needed_mem += bep[constants.BE_MEMORY]
2126 test = n_img.mfree < needed_mem
2127 self._ErrorIf(test, constants.CV_ENODEN1, node,
2128 "not enough memory to accomodate instance failovers"
2129 " should node %s fail (%dMiB needed, %dMiB available)",
2130 prinode, needed_mem, n_img.mfree)
2133 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2134 (files_all, files_opt, files_mc, files_vm)):
2135 """Verifies file checksums collected from all nodes.
2137 @param errorif: Callback for reporting errors
2138 @param nodeinfo: List of L{objects.Node} objects
2139 @param master_node: Name of master node
2140 @param all_nvinfo: RPC results
2143 # Define functions determining which nodes to consider for a file
2146 (files_mc, lambda node: (node.master_candidate or
2147 node.name == master_node)),
2148 (files_vm, lambda node: node.vm_capable),
2151 # Build mapping from filename to list of nodes which should have the file
2153 for (files, fn) in files2nodefn:
2155 filenodes = nodeinfo
2157 filenodes = filter(fn, nodeinfo)
2158 nodefiles.update((filename,
2159 frozenset(map(operator.attrgetter("name"), filenodes)))
2160 for filename in files)
2162 assert set(nodefiles) == (files_all | files_mc | files_vm)
2164 fileinfo = dict((filename, {}) for filename in nodefiles)
2165 ignore_nodes = set()
2167 for node in nodeinfo:
2169 ignore_nodes.add(node.name)
2172 nresult = all_nvinfo[node.name]
2174 if nresult.fail_msg or not nresult.payload:
2177 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2179 test = not (node_files and isinstance(node_files, dict))
2180 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2181 "Node did not return file checksum data")
2183 ignore_nodes.add(node.name)
2186 # Build per-checksum mapping from filename to nodes having it
2187 for (filename, checksum) in node_files.items():
2188 assert filename in nodefiles
2189 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2191 for (filename, checksums) in fileinfo.items():
2192 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2194 # Nodes having the file
2195 with_file = frozenset(node_name
2196 for nodes in fileinfo[filename].values()
2197 for node_name in nodes) - ignore_nodes
2199 expected_nodes = nodefiles[filename] - ignore_nodes
2201 # Nodes missing file
2202 missing_file = expected_nodes - with_file
2204 if filename in files_opt:
2206 errorif(missing_file and missing_file != expected_nodes,
2207 constants.CV_ECLUSTERFILECHECK, None,
2208 "File %s is optional, but it must exist on all or no"
2209 " nodes (not found on %s)",
2210 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2212 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2213 "File %s is missing from node(s) %s", filename,
2214 utils.CommaJoin(utils.NiceSort(missing_file)))
2216 # Warn if a node has a file it shouldn't
2217 unexpected = with_file - expected_nodes
2219 constants.CV_ECLUSTERFILECHECK, None,
2220 "File %s should not exist on node(s) %s",
2221 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2223 # See if there are multiple versions of the file
2224 test = len(checksums) > 1
2226 variants = ["variant %s on %s" %
2227 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2228 for (idx, (checksum, nodes)) in
2229 enumerate(sorted(checksums.items()))]
2233 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2234 "File %s found with %s different checksums (%s)",
2235 filename, len(checksums), "; ".join(variants))
2237 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2239 """Verifies and the node DRBD status.
2241 @type ninfo: L{objects.Node}
2242 @param ninfo: the node to check
2243 @param nresult: the remote results for the node
2244 @param instanceinfo: the dict of instances
2245 @param drbd_helper: the configured DRBD usermode helper
2246 @param drbd_map: the DRBD map as returned by
2247 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2251 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2254 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2255 test = (helper_result == None)
2256 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2257 "no drbd usermode helper returned")
2259 status, payload = helper_result
2261 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2262 "drbd usermode helper check unsuccessful: %s", payload)
2263 test = status and (payload != drbd_helper)
2264 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2265 "wrong drbd usermode helper: %s", payload)
2267 # compute the DRBD minors
2269 for minor, instance in drbd_map[node].items():
2270 test = instance not in instanceinfo
2271 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2272 "ghost instance '%s' in temporary DRBD map", instance)
2273 # ghost instance should not be running, but otherwise we
2274 # don't give double warnings (both ghost instance and
2275 # unallocated minor in use)
2277 node_drbd[minor] = (instance, False)
2279 instance = instanceinfo[instance]
2280 node_drbd[minor] = (instance.name,
2281 instance.admin_state == constants.ADMINST_UP)
2283 # and now check them
2284 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2285 test = not isinstance(used_minors, (tuple, list))
2286 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2287 "cannot parse drbd status file: %s", str(used_minors))
2289 # we cannot check drbd status
2292 for minor, (iname, must_exist) in node_drbd.items():
2293 test = minor not in used_minors and must_exist
2294 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2295 "drbd minor %d of instance %s is not active", minor, iname)
2296 for minor in used_minors:
2297 test = minor not in node_drbd
2298 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2299 "unallocated drbd minor %d is in use", minor)
2301 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2302 """Builds the node OS structures.
2304 @type ninfo: L{objects.Node}
2305 @param ninfo: the node to check
2306 @param nresult: the remote results for the node
2307 @param nimg: the node image object
2311 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2313 remote_os = nresult.get(constants.NV_OSLIST, None)
2314 test = (not isinstance(remote_os, list) or
2315 not compat.all(isinstance(v, list) and len(v) == 7
2316 for v in remote_os))
2318 _ErrorIf(test, constants.CV_ENODEOS, node,
2319 "node hasn't returned valid OS data")
2328 for (name, os_path, status, diagnose,
2329 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2331 if name not in os_dict:
2334 # parameters is a list of lists instead of list of tuples due to
2335 # JSON lacking a real tuple type, fix it:
2336 parameters = [tuple(v) for v in parameters]
2337 os_dict[name].append((os_path, status, diagnose,
2338 set(variants), set(parameters), set(api_ver)))
2340 nimg.oslist = os_dict
2342 def _VerifyNodeOS(self, ninfo, nimg, base):
2343 """Verifies the node OS list.
2345 @type ninfo: L{objects.Node}
2346 @param ninfo: the node to check
2347 @param nimg: the node image object
2348 @param base: the 'template' node we match against (e.g. from the master)
2352 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2354 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2356 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2357 for os_name, os_data in nimg.oslist.items():
2358 assert os_data, "Empty OS status for OS %s?!" % os_name
2359 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2360 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2361 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2362 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2363 "OS '%s' has multiple entries (first one shadows the rest): %s",
2364 os_name, utils.CommaJoin([v[0] for v in os_data]))
2365 # comparisons with the 'base' image
2366 test = os_name not in base.oslist
2367 _ErrorIf(test, constants.CV_ENODEOS, node,
2368 "Extra OS %s not present on reference node (%s)",
2372 assert base.oslist[os_name], "Base node has empty OS status?"
2373 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2375 # base OS is invalid, skipping
2377 for kind, a, b in [("API version", f_api, b_api),
2378 ("variants list", f_var, b_var),
2379 ("parameters", beautify_params(f_param),
2380 beautify_params(b_param))]:
2381 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2382 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2383 kind, os_name, base.name,
2384 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2386 # check any missing OSes
2387 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2388 _ErrorIf(missing, constants.CV_ENODEOS, node,
2389 "OSes present on reference node %s but missing on this node: %s",
2390 base.name, utils.CommaJoin(missing))
2392 def _VerifyOob(self, ninfo, nresult):
2393 """Verifies out of band functionality of a node.
2395 @type ninfo: L{objects.Node}
2396 @param ninfo: the node to check
2397 @param nresult: the remote results for the node
2401 # We just have to verify the paths on master and/or master candidates
2402 # as the oob helper is invoked on the master
2403 if ((ninfo.master_candidate or ninfo.master_capable) and
2404 constants.NV_OOB_PATHS in nresult):
2405 for path_result in nresult[constants.NV_OOB_PATHS]:
2406 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2408 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2409 """Verifies and updates the node volume data.
2411 This function will update a L{NodeImage}'s internal structures
2412 with data from the remote call.
2414 @type ninfo: L{objects.Node}
2415 @param ninfo: the node to check
2416 @param nresult: the remote results for the node
2417 @param nimg: the node image object
2418 @param vg_name: the configured VG name
2422 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2424 nimg.lvm_fail = True
2425 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2428 elif isinstance(lvdata, basestring):
2429 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2430 utils.SafeEncode(lvdata))
2431 elif not isinstance(lvdata, dict):
2432 _ErrorIf(True, constants.CV_ENODELVM, node,
2433 "rpc call to node failed (lvlist)")
2435 nimg.volumes = lvdata
2436 nimg.lvm_fail = False
2438 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2439 """Verifies and updates the node instance list.
2441 If the listing was successful, then updates this node's instance
2442 list. Otherwise, it marks the RPC call as failed for the instance
2445 @type ninfo: L{objects.Node}
2446 @param ninfo: the node to check
2447 @param nresult: the remote results for the node
2448 @param nimg: the node image object
2451 idata = nresult.get(constants.NV_INSTANCELIST, None)
2452 test = not isinstance(idata, list)
2453 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2454 "rpc call to node failed (instancelist): %s",
2455 utils.SafeEncode(str(idata)))
2457 nimg.hyp_fail = True
2459 nimg.instances = idata
2461 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2462 """Verifies and computes a node information map
2464 @type ninfo: L{objects.Node}
2465 @param ninfo: the node to check
2466 @param nresult: the remote results for the node
2467 @param nimg: the node image object
2468 @param vg_name: the configured VG name
2472 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2474 # try to read free memory (from the hypervisor)
2475 hv_info = nresult.get(constants.NV_HVINFO, None)
2476 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2477 _ErrorIf(test, constants.CV_ENODEHV, node,
2478 "rpc call to node failed (hvinfo)")
2481 nimg.mfree = int(hv_info["memory_free"])
2482 except (ValueError, TypeError):
2483 _ErrorIf(True, constants.CV_ENODERPC, node,
2484 "node returned invalid nodeinfo, check hypervisor")
2486 # FIXME: devise a free space model for file based instances as well
2487 if vg_name is not None:
2488 test = (constants.NV_VGLIST not in nresult or
2489 vg_name not in nresult[constants.NV_VGLIST])
2490 _ErrorIf(test, constants.CV_ENODELVM, node,
2491 "node didn't return data for the volume group '%s'"
2492 " - it is either missing or broken", vg_name)
2495 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496 except (ValueError, TypeError):
2497 _ErrorIf(True, constants.CV_ENODERPC, node,
2498 "node returned invalid LVM info, check LVM status")
2500 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501 """Gets per-disk status information for all instances.
2503 @type nodelist: list of strings
2504 @param nodelist: Node names
2505 @type node_image: dict of (name, L{objects.Node})
2506 @param node_image: Node objects
2507 @type instanceinfo: dict of (name, L{objects.Instance})
2508 @param instanceinfo: Instance objects
2509 @rtype: {instance: {node: [(succes, payload)]}}
2510 @return: a dictionary of per-instance dictionaries with nodes as
2511 keys and disk information as values; the disk information is a
2512 list of tuples (success, payload)
2515 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2518 node_disks_devonly = {}
2519 diskless_instances = set()
2520 diskless = constants.DT_DISKLESS
2522 for nname in nodelist:
2523 node_instances = list(itertools.chain(node_image[nname].pinst,
2524 node_image[nname].sinst))
2525 diskless_instances.update(inst for inst in node_instances
2526 if instanceinfo[inst].disk_template == diskless)
2527 disks = [(inst, disk)
2528 for inst in node_instances
2529 for disk in instanceinfo[inst].disks]
2532 # No need to collect data
2535 node_disks[nname] = disks
2537 # Creating copies as SetDiskID below will modify the objects and that can
2538 # lead to incorrect data returned from nodes
2539 devonly = [dev.Copy() for (_, dev) in disks]
2542 self.cfg.SetDiskID(dev, nname)
2544 node_disks_devonly[nname] = devonly
2546 assert len(node_disks) == len(node_disks_devonly)
2548 # Collect data from all nodes with disks
2549 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2552 assert len(result) == len(node_disks)
2556 for (nname, nres) in result.items():
2557 disks = node_disks[nname]
2560 # No data from this node
2561 data = len(disks) * [(False, "node offline")]
2564 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2565 "while getting disk information: %s", msg)
2567 # No data from this node
2568 data = len(disks) * [(False, msg)]
2571 for idx, i in enumerate(nres.payload):
2572 if isinstance(i, (tuple, list)) and len(i) == 2:
2575 logging.warning("Invalid result from node %s, entry %d: %s",
2577 data.append((False, "Invalid result from the remote node"))
2579 for ((inst, _), status) in zip(disks, data):
2580 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2582 # Add empty entries for diskless instances.
2583 for inst in diskless_instances:
2584 assert inst not in instdisk
2587 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589 compat.all(isinstance(s, (tuple, list)) and
2590 len(s) == 2 for s in statuses)
2591 for inst, nnames in instdisk.items()
2592 for nname, statuses in nnames.items())
2593 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2598 def _SshNodeSelector(group_uuid, all_nodes):
2599 """Create endless iterators for all potential SSH check hosts.
2602 nodes = [node for node in all_nodes
2603 if (node.group != group_uuid and
2605 keyfunc = operator.attrgetter("group")
2607 return map(itertools.cycle,
2608 [sorted(map(operator.attrgetter("name"), names))
2609 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2613 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614 """Choose which nodes should talk to which other nodes.
2616 We will make nodes contact all nodes in their group, and one node from
2619 @warning: This algorithm has a known issue if one node group is much
2620 smaller than others (e.g. just one node). In such a case all other
2621 nodes will talk to the single node.
2624 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2627 return (online_nodes,
2628 dict((name, sorted([i.next() for i in sel]))
2629 for name in online_nodes))
2631 def BuildHooksEnv(self):
2634 Cluster-Verify hooks just ran in the post phase and their failure makes
2635 the output be logged in the verify output and the verification to fail.
2639 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2642 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643 for node in self.my_node_info.values())
2647 def BuildHooksNodes(self):
2648 """Build hooks nodes.
2651 return ([], self.my_node_names)
2653 def Exec(self, feedback_fn):
2654 """Verify integrity of the node group, performing various test on nodes.
2657 # This method has too many local variables. pylint: disable=R0914
2658 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2660 if not self.my_node_names:
2662 feedback_fn("* Empty node group, skipping verification")
2666 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667 verbose = self.op.verbose
2668 self._feedback_fn = feedback_fn
2670 vg_name = self.cfg.GetVGName()
2671 drbd_helper = self.cfg.GetDRBDHelper()
2672 cluster = self.cfg.GetClusterInfo()
2673 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674 hypervisors = cluster.enabled_hypervisors
2675 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2677 i_non_redundant = [] # Non redundant instances
2678 i_non_a_balanced = [] # Non auto-balanced instances
2679 i_offline = 0 # Count of offline instances
2680 n_offline = 0 # Count of offline nodes
2681 n_drained = 0 # Count of nodes being drained
2682 node_vol_should = {}
2684 # FIXME: verify OS list
2687 filemap = _ComputeAncillaryFiles(cluster, False)
2689 # do local checksums
2690 master_node = self.master_node = self.cfg.GetMasterNode()
2691 master_ip = self.cfg.GetMasterIP()
2693 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2696 if self.cfg.GetUseExternalMipScript():
2697 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2699 node_verify_param = {
2700 constants.NV_FILELIST:
2701 utils.UniqueSequence(filename
2702 for files in filemap
2703 for filename in files),
2704 constants.NV_NODELIST:
2705 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2706 self.all_node_info.values()),
2707 constants.NV_HYPERVISOR: hypervisors,
2708 constants.NV_HVPARAMS:
2709 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2710 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2711 for node in node_data_list
2712 if not node.offline],
2713 constants.NV_INSTANCELIST: hypervisors,
2714 constants.NV_VERSION: None,
2715 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2716 constants.NV_NODESETUP: None,
2717 constants.NV_TIME: None,
2718 constants.NV_MASTERIP: (master_node, master_ip),
2719 constants.NV_OSLIST: None,
2720 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2721 constants.NV_USERSCRIPTS: user_scripts,
2724 if vg_name is not None:
2725 node_verify_param[constants.NV_VGLIST] = None
2726 node_verify_param[constants.NV_LVLIST] = vg_name
2727 node_verify_param[constants.NV_PVLIST] = [vg_name]
2728 node_verify_param[constants.NV_DRBDLIST] = None
2731 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2734 # FIXME: this needs to be changed per node-group, not cluster-wide
2736 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2737 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2738 bridges.add(default_nicpp[constants.NIC_LINK])
2739 for instance in self.my_inst_info.values():
2740 for nic in instance.nics:
2741 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2742 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2743 bridges.add(full_nic[constants.NIC_LINK])
2746 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2748 # Build our expected cluster state
2749 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2751 vm_capable=node.vm_capable))
2752 for node in node_data_list)
2756 for node in self.all_node_info.values():
2757 path = _SupportsOob(self.cfg, node)
2758 if path and path not in oob_paths:
2759 oob_paths.append(path)
2762 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2764 for instance in self.my_inst_names:
2765 inst_config = self.my_inst_info[instance]
2767 for nname in inst_config.all_nodes:
2768 if nname not in node_image:
2769 gnode = self.NodeImage(name=nname)
2770 gnode.ghost = (nname not in self.all_node_info)
2771 node_image[nname] = gnode
2773 inst_config.MapLVsByNode(node_vol_should)
2775 pnode = inst_config.primary_node
2776 node_image[pnode].pinst.append(instance)
2778 for snode in inst_config.secondary_nodes:
2779 nimg = node_image[snode]
2780 nimg.sinst.append(instance)
2781 if pnode not in nimg.sbp:
2782 nimg.sbp[pnode] = []
2783 nimg.sbp[pnode].append(instance)
2785 # At this point, we have the in-memory data structures complete,
2786 # except for the runtime information, which we'll gather next
2788 # Due to the way our RPC system works, exact response times cannot be
2789 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2790 # time before and after executing the request, we can at least have a time
2792 nvinfo_starttime = time.time()
2793 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2795 self.cfg.GetClusterName())
2796 nvinfo_endtime = time.time()
2798 if self.extra_lv_nodes and vg_name is not None:
2800 self.rpc.call_node_verify(self.extra_lv_nodes,
2801 {constants.NV_LVLIST: vg_name},
2802 self.cfg.GetClusterName())
2804 extra_lv_nvinfo = {}
2806 all_drbd_map = self.cfg.ComputeDRBDMap()
2808 feedback_fn("* Gathering disk information (%s nodes)" %
2809 len(self.my_node_names))
2810 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2813 feedback_fn("* Verifying configuration file consistency")
2815 # If not all nodes are being checked, we need to make sure the master node
2816 # and a non-checked vm_capable node are in the list.
2817 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2819 vf_nvinfo = all_nvinfo.copy()
2820 vf_node_info = list(self.my_node_info.values())
2821 additional_nodes = []
2822 if master_node not in self.my_node_info:
2823 additional_nodes.append(master_node)
2824 vf_node_info.append(self.all_node_info[master_node])
2825 # Add the first vm_capable node we find which is not included
2826 for node in absent_nodes:
2827 nodeinfo = self.all_node_info[node]
2828 if nodeinfo.vm_capable and not nodeinfo.offline:
2829 additional_nodes.append(node)
2830 vf_node_info.append(self.all_node_info[node])
2832 key = constants.NV_FILELIST
2833 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2834 {key: node_verify_param[key]},
2835 self.cfg.GetClusterName()))
2837 vf_nvinfo = all_nvinfo
2838 vf_node_info = self.my_node_info.values()
2840 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2842 feedback_fn("* Verifying node status")
2846 for node_i in node_data_list:
2848 nimg = node_image[node]
2852 feedback_fn("* Skipping offline node %s" % (node,))
2856 if node == master_node:
2858 elif node_i.master_candidate:
2859 ntype = "master candidate"
2860 elif node_i.drained:
2866 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2868 msg = all_nvinfo[node].fail_msg
2869 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2872 nimg.rpc_fail = True
2875 nresult = all_nvinfo[node].payload
2877 nimg.call_ok = self._VerifyNode(node_i, nresult)
2878 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2879 self._VerifyNodeNetwork(node_i, nresult)
2880 self._VerifyNodeUserScripts(node_i, nresult)
2881 self._VerifyOob(node_i, nresult)
2884 self._VerifyNodeLVM(node_i, nresult, vg_name)
2885 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2888 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2889 self._UpdateNodeInstances(node_i, nresult, nimg)
2890 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2891 self._UpdateNodeOS(node_i, nresult, nimg)
2893 if not nimg.os_fail:
2894 if refos_img is None:
2896 self._VerifyNodeOS(node_i, nimg, refos_img)
2897 self._VerifyNodeBridges(node_i, nresult, bridges)
2899 # Check whether all running instancies are primary for the node. (This
2900 # can no longer be done from _VerifyInstance below, since some of the
2901 # wrong instances could be from other node groups.)
2902 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2904 for inst in non_primary_inst:
2905 # FIXME: investigate best way to handle offline insts
2906 if inst.admin_state == constants.ADMINST_OFFLINE:
2908 feedback_fn("* Skipping offline instance %s" % inst.name)
2911 test = inst in self.all_inst_info
2912 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2913 "instance should not run on node %s", node_i.name)
2914 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2915 "node is running unknown instance %s", inst)
2917 for node, result in extra_lv_nvinfo.items():
2918 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2919 node_image[node], vg_name)
2921 feedback_fn("* Verifying instance status")
2922 for instance in self.my_inst_names:
2924 feedback_fn("* Verifying instance %s" % instance)
2925 inst_config = self.my_inst_info[instance]
2926 self._VerifyInstance(instance, inst_config, node_image,
2928 inst_nodes_offline = []
2930 pnode = inst_config.primary_node
2931 pnode_img = node_image[pnode]
2932 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2933 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2934 " primary node failed", instance)
2936 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2938 constants.CV_EINSTANCEBADNODE, instance,
2939 "instance is marked as running and lives on offline node %s",
2940 inst_config.primary_node)
2942 # If the instance is non-redundant we cannot survive losing its primary
2943 # node, so we are not N+1 compliant. On the other hand we have no disk
2944 # templates with more than one secondary so that situation is not well
2946 # FIXME: does not support file-backed instances
2947 if not inst_config.secondary_nodes:
2948 i_non_redundant.append(instance)
2950 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2951 constants.CV_EINSTANCELAYOUT,
2952 instance, "instance has multiple secondary nodes: %s",
2953 utils.CommaJoin(inst_config.secondary_nodes),
2954 code=self.ETYPE_WARNING)
2956 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2957 pnode = inst_config.primary_node
2958 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2959 instance_groups = {}
2961 for node in instance_nodes:
2962 instance_groups.setdefault(self.all_node_info[node].group,
2966 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2967 # Sort so that we always list the primary node first.
2968 for group, nodes in sorted(instance_groups.items(),
2969 key=lambda (_, nodes): pnode in nodes,
2972 self._ErrorIf(len(instance_groups) > 1,
2973 constants.CV_EINSTANCESPLITGROUPS,
2974 instance, "instance has primary and secondary nodes in"
2975 " different groups: %s", utils.CommaJoin(pretty_list),
2976 code=self.ETYPE_WARNING)
2978 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2979 i_non_a_balanced.append(instance)
2981 for snode in inst_config.secondary_nodes:
2982 s_img = node_image[snode]
2983 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2984 snode, "instance %s, connection to secondary node failed",
2988 inst_nodes_offline.append(snode)
2990 # warn that the instance lives on offline nodes
2991 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2992 "instance has offline secondary node(s) %s",
2993 utils.CommaJoin(inst_nodes_offline))
2994 # ... or ghost/non-vm_capable nodes
2995 for node in inst_config.all_nodes:
2996 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2997 instance, "instance lives on ghost node %s", node)
2998 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2999 instance, "instance lives on non-vm_capable node %s", node)
3001 feedback_fn("* Verifying orphan volumes")
3002 reserved = utils.FieldSet(*cluster.reserved_lvs)
3004 # We will get spurious "unknown volume" warnings if any node of this group
3005 # is secondary for an instance whose primary is in another group. To avoid
3006 # them, we find these instances and add their volumes to node_vol_should.
3007 for inst in self.all_inst_info.values():
3008 for secondary in inst.secondary_nodes:
3009 if (secondary in self.my_node_info
3010 and inst.name not in self.my_inst_info):
3011 inst.MapLVsByNode(node_vol_should)
3014 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3016 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3017 feedback_fn("* Verifying N+1 Memory redundancy")
3018 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3020 feedback_fn("* Other Notes")
3022 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3023 % len(i_non_redundant))
3025 if i_non_a_balanced:
3026 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3027 % len(i_non_a_balanced))
3030 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3033 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3036 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3040 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3041 """Analyze the post-hooks' result
3043 This method analyses the hook result, handles it, and sends some
3044 nicely-formatted feedback back to the user.
3046 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3047 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3048 @param hooks_results: the results of the multi-node hooks rpc call
3049 @param feedback_fn: function used send feedback back to the caller
3050 @param lu_result: previous Exec result
3051 @return: the new Exec result, based on the previous result
3055 # We only really run POST phase hooks, only for non-empty groups,
3056 # and are only interested in their results
3057 if not self.my_node_names:
3060 elif phase == constants.HOOKS_PHASE_POST:
3061 # Used to change hooks' output to proper indentation
3062 feedback_fn("* Hooks Results")
3063 assert hooks_results, "invalid result from hooks"
3065 for node_name in hooks_results:
3066 res = hooks_results[node_name]
3068 test = msg and not res.offline
3069 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3070 "Communication failure in hooks execution: %s", msg)
3071 if res.offline or msg:
3072 # No need to investigate payload if node is offline or gave
3075 for script, hkr, output in res.payload:
3076 test = hkr == constants.HKR_FAIL
3077 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3078 "Script %s failed, output:", script)
3080 output = self._HOOKS_INDENT_RE.sub(" ", output)
3081 feedback_fn("%s" % output)
3087 class LUClusterVerifyDisks(NoHooksLU):
3088 """Verifies the cluster disks status.
3093 def ExpandNames(self):
3094 self.share_locks = _ShareAll()
3095 self.needed_locks = {
3096 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3099 def Exec(self, feedback_fn):
3100 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3102 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3103 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3104 for group in group_names])
3107 class LUGroupVerifyDisks(NoHooksLU):
3108 """Verifies the status of all disks in a node group.
3113 def ExpandNames(self):
3114 # Raises errors.OpPrereqError on its own if group can't be found
3115 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3117 self.share_locks = _ShareAll()
3118 self.needed_locks = {
3119 locking.LEVEL_INSTANCE: [],
3120 locking.LEVEL_NODEGROUP: [],
3121 locking.LEVEL_NODE: [],
3124 def DeclareLocks(self, level):
3125 if level == locking.LEVEL_INSTANCE:
3126 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3128 # Lock instances optimistically, needs verification once node and group
3129 # locks have been acquired
3130 self.needed_locks[locking.LEVEL_INSTANCE] = \
3131 self.cfg.GetNodeGroupInstances(self.group_uuid)
3133 elif level == locking.LEVEL_NODEGROUP:
3134 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3136 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3137 set([self.group_uuid] +
3138 # Lock all groups used by instances optimistically; this requires
3139 # going via the node before it's locked, requiring verification
3142 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3143 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3145 elif level == locking.LEVEL_NODE:
3146 # This will only lock the nodes in the group to be verified which contain
3148 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3149 self._LockInstancesNodes()
3151 # Lock all nodes in group to be verified
3152 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3153 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3154 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3156 def CheckPrereq(self):
3157 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3158 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3159 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3161 assert self.group_uuid in owned_groups
3163 # Check if locked instances are still correct
3164 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3166 # Get instance information
3167 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3169 # Check if node groups for locked instances are still correct
3170 for (instance_name, inst) in self.instances.items():
3171 assert owned_nodes.issuperset(inst.all_nodes), \
3172 "Instance %s's nodes changed while we kept the lock" % instance_name
3174 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3177 assert self.group_uuid in inst_groups, \
3178 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3180 def Exec(self, feedback_fn):
3181 """Verify integrity of cluster disks.
3183 @rtype: tuple of three items
3184 @return: a tuple of (dict of node-to-node_error, list of instances
3185 which need activate-disks, dict of instance: (node, volume) for
3190 res_instances = set()
3193 nv_dict = _MapInstanceDisksToNodes([inst
3194 for inst in self.instances.values()
3195 if inst.admin_state == constants.ADMINST_UP])
3198 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3199 set(self.cfg.GetVmCapableNodeList()))
3201 node_lvs = self.rpc.call_lv_list(nodes, [])
3203 for (node, node_res) in node_lvs.items():
3204 if node_res.offline:
3207 msg = node_res.fail_msg
3209 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3210 res_nodes[node] = msg
3213 for lv_name, (_, _, lv_online) in node_res.payload.items():
3214 inst = nv_dict.pop((node, lv_name), None)
3215 if not (lv_online or inst is None):
3216 res_instances.add(inst)
3218 # any leftover items in nv_dict are missing LVs, let's arrange the data
3220 for key, inst in nv_dict.iteritems():
3221 res_missing.setdefault(inst, []).append(list(key))
3223 return (res_nodes, list(res_instances), res_missing)
3226 class LUClusterRepairDiskSizes(NoHooksLU):
3227 """Verifies the cluster disks sizes.
3232 def ExpandNames(self):
3233 if self.op.instances:
3234 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3235 self.needed_locks = {
3236 locking.LEVEL_NODE_RES: [],
3237 locking.LEVEL_INSTANCE: self.wanted_names,
3239 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3241 self.wanted_names = None
3242 self.needed_locks = {
3243 locking.LEVEL_NODE_RES: locking.ALL_SET,
3244 locking.LEVEL_INSTANCE: locking.ALL_SET,
3246 self.share_locks = {
3247 locking.LEVEL_NODE_RES: 1,
3248 locking.LEVEL_INSTANCE: 0,
3251 def DeclareLocks(self, level):
3252 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3253 self._LockInstancesNodes(primary_only=True, level=level)
3255 def CheckPrereq(self):
3256 """Check prerequisites.
3258 This only checks the optional instance list against the existing names.
3261 if self.wanted_names is None:
3262 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3264 self.wanted_instances = \
3265 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3267 def _EnsureChildSizes(self, disk):
3268 """Ensure children of the disk have the needed disk size.
3270 This is valid mainly for DRBD8 and fixes an issue where the
3271 children have smaller disk size.
3273 @param disk: an L{ganeti.objects.Disk} object
3276 if disk.dev_type == constants.LD_DRBD8:
3277 assert disk.children, "Empty children for DRBD8?"
3278 fchild = disk.children[0]
3279 mismatch = fchild.size < disk.size
3281 self.LogInfo("Child disk has size %d, parent %d, fixing",
3282 fchild.size, disk.size)
3283 fchild.size = disk.size
3285 # and we recurse on this child only, not on the metadev
3286 return self._EnsureChildSizes(fchild) or mismatch
3290 def Exec(self, feedback_fn):
3291 """Verify the size of cluster disks.
3294 # TODO: check child disks too
3295 # TODO: check differences in size between primary/secondary nodes
3297 for instance in self.wanted_instances:
3298 pnode = instance.primary_node
3299 if pnode not in per_node_disks:
3300 per_node_disks[pnode] = []
3301 for idx, disk in enumerate(instance.disks):
3302 per_node_disks[pnode].append((instance, idx, disk))
3304 assert not (frozenset(per_node_disks.keys()) -
3305 self.owned_locks(locking.LEVEL_NODE_RES)), \
3306 "Not owning correct locks"
3307 assert not self.owned_locks(locking.LEVEL_NODE)
3310 for node, dskl in per_node_disks.items():
3311 newl = [v[2].Copy() for v in dskl]
3313 self.cfg.SetDiskID(dsk, node)
3314 result = self.rpc.call_blockdev_getsize(node, newl)
3316 self.LogWarning("Failure in blockdev_getsize call to node"
3317 " %s, ignoring", node)
3319 if len(result.payload) != len(dskl):
3320 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3321 " result.payload=%s", node, len(dskl), result.payload)
3322 self.LogWarning("Invalid result from node %s, ignoring node results",
3325 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3327 self.LogWarning("Disk %d of instance %s did not return size"
3328 " information, ignoring", idx, instance.name)
3330 if not isinstance(size, (int, long)):
3331 self.LogWarning("Disk %d of instance %s did not return valid"
3332 " size information, ignoring", idx, instance.name)
3335 if size != disk.size:
3336 self.LogInfo("Disk %d of instance %s has mismatched size,"
3337 " correcting: recorded %d, actual %d", idx,
3338 instance.name, disk.size, size)
3340 self.cfg.Update(instance, feedback_fn)
3341 changed.append((instance.name, idx, size))
3342 if self._EnsureChildSizes(disk):
3343 self.cfg.Update(instance, feedback_fn)
3344 changed.append((instance.name, idx, disk.size))
3348 class LUClusterRename(LogicalUnit):
3349 """Rename the cluster.
3352 HPATH = "cluster-rename"
3353 HTYPE = constants.HTYPE_CLUSTER
3355 def BuildHooksEnv(self):
3360 "OP_TARGET": self.cfg.GetClusterName(),
3361 "NEW_NAME": self.op.name,
3364 def BuildHooksNodes(self):
3365 """Build hooks nodes.
3368 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3370 def CheckPrereq(self):
3371 """Verify that the passed name is a valid one.
3374 hostname = netutils.GetHostname(name=self.op.name,
3375 family=self.cfg.GetPrimaryIPFamily())
3377 new_name = hostname.name
3378 self.ip = new_ip = hostname.ip
3379 old_name = self.cfg.GetClusterName()
3380 old_ip = self.cfg.GetMasterIP()
3381 if new_name == old_name and new_ip == old_ip:
3382 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3383 " cluster has changed",
3385 if new_ip != old_ip:
3386 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3387 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3388 " reachable on the network" %
3389 new_ip, errors.ECODE_NOTUNIQUE)
3391 self.op.name = new_name
3393 def Exec(self, feedback_fn):
3394 """Rename the cluster.
3397 clustername = self.op.name
3400 # shutdown the master IP
3401 master_params = self.cfg.GetMasterNetworkParameters()
3402 ems = self.cfg.GetUseExternalMipScript()
3403 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3405 result.Raise("Could not disable the master role")
3408 cluster = self.cfg.GetClusterInfo()
3409 cluster.cluster_name = clustername
3410 cluster.master_ip = new_ip
3411 self.cfg.Update(cluster, feedback_fn)
3413 # update the known hosts file
3414 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3415 node_list = self.cfg.GetOnlineNodeList()
3417 node_list.remove(master_params.name)
3420 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3422 master_params.ip = new_ip
3423 result = self.rpc.call_node_activate_master_ip(master_params.name,
3425 msg = result.fail_msg
3427 self.LogWarning("Could not re-enable the master role on"
3428 " the master, please restart manually: %s", msg)
3433 def _ValidateNetmask(cfg, netmask):
3434 """Checks if a netmask is valid.
3436 @type cfg: L{config.ConfigWriter}
3437 @param cfg: The cluster configuration
3439 @param netmask: the netmask to be verified
3440 @raise errors.OpPrereqError: if the validation fails
3443 ip_family = cfg.GetPrimaryIPFamily()
3445 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3446 except errors.ProgrammerError:
3447 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3449 if not ipcls.ValidateNetmask(netmask):
3450 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3454 class LUClusterSetParams(LogicalUnit):
3455 """Change the parameters of the cluster.
3458 HPATH = "cluster-modify"
3459 HTYPE = constants.HTYPE_CLUSTER
3462 def CheckArguments(self):
3466 if self.op.uid_pool:
3467 uidpool.CheckUidPool(self.op.uid_pool)
3469 if self.op.add_uids:
3470 uidpool.CheckUidPool(self.op.add_uids)
3472 if self.op.remove_uids:
3473 uidpool.CheckUidPool(self.op.remove_uids)
3475 if self.op.master_netmask is not None:
3476 _ValidateNetmask(self.cfg, self.op.master_netmask)
3478 def ExpandNames(self):
3479 # FIXME: in the future maybe other cluster params won't require checking on
3480 # all nodes to be modified.
3481 self.needed_locks = {
3482 locking.LEVEL_NODE: locking.ALL_SET,
3484 self.share_locks[locking.LEVEL_NODE] = 1
3486 def BuildHooksEnv(self):
3491 "OP_TARGET": self.cfg.GetClusterName(),
3492 "NEW_VG_NAME": self.op.vg_name,
3495 def BuildHooksNodes(self):
3496 """Build hooks nodes.
3499 mn = self.cfg.GetMasterNode()
3502 def CheckPrereq(self):
3503 """Check prerequisites.
3505 This checks whether the given params don't conflict and
3506 if the given volume group is valid.
3509 if self.op.vg_name is not None and not self.op.vg_name:
3510 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3511 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3512 " instances exist", errors.ECODE_INVAL)
3514 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3515 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3516 raise errors.OpPrereqError("Cannot disable drbd helper while"
3517 " drbd-based instances exist",
3520 node_list = self.owned_locks(locking.LEVEL_NODE)
3522 # if vg_name not None, checks given volume group on all nodes
3524 vglist = self.rpc.call_vg_list(node_list)
3525 for node in node_list:
3526 msg = vglist[node].fail_msg
3528 # ignoring down node
3529 self.LogWarning("Error while gathering data on node %s"
3530 " (ignoring node): %s", node, msg)
3532 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3534 constants.MIN_VG_SIZE)
3536 raise errors.OpPrereqError("Error on node '%s': %s" %
3537 (node, vgstatus), errors.ECODE_ENVIRON)
3539 if self.op.drbd_helper:
3540 # checks given drbd helper on all nodes
3541 helpers = self.rpc.call_drbd_helper(node_list)
3542 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3544 self.LogInfo("Not checking drbd helper on offline node %s", node)
3546 msg = helpers[node].fail_msg
3548 raise errors.OpPrereqError("Error checking drbd helper on node"
3549 " '%s': %s" % (node, msg),
3550 errors.ECODE_ENVIRON)
3551 node_helper = helpers[node].payload
3552 if node_helper != self.op.drbd_helper:
3553 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3554 (node, node_helper), errors.ECODE_ENVIRON)
3556 self.cluster = cluster = self.cfg.GetClusterInfo()
3557 # validate params changes
3558 if self.op.beparams:
3559 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3560 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3562 if self.op.ndparams:
3563 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3564 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3566 # TODO: we need a more general way to handle resetting
3567 # cluster-level parameters to default values
3568 if self.new_ndparams["oob_program"] == "":
3569 self.new_ndparams["oob_program"] = \
3570 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3572 if self.op.nicparams:
3573 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3574 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3575 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3578 # check all instances for consistency
3579 for instance in self.cfg.GetAllInstancesInfo().values():
3580 for nic_idx, nic in enumerate(instance.nics):
3581 params_copy = copy.deepcopy(nic.nicparams)
3582 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3584 # check parameter syntax
3586 objects.NIC.CheckParameterSyntax(params_filled)
3587 except errors.ConfigurationError, err:
3588 nic_errors.append("Instance %s, nic/%d: %s" %
3589 (instance.name, nic_idx, err))
3591 # if we're moving instances to routed, check that they have an ip
3592 target_mode = params_filled[constants.NIC_MODE]
3593 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3594 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3595 " address" % (instance.name, nic_idx))
3597 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3598 "\n".join(nic_errors))
3600 # hypervisor list/parameters
3601 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3602 if self.op.hvparams:
3603 for hv_name, hv_dict in self.op.hvparams.items():
3604 if hv_name not in self.new_hvparams:
3605 self.new_hvparams[hv_name] = hv_dict
3607 self.new_hvparams[hv_name].update(hv_dict)
3609 # os hypervisor parameters
3610 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3612 for os_name, hvs in self.op.os_hvp.items():
3613 if os_name not in self.new_os_hvp:
3614 self.new_os_hvp[os_name] = hvs
3616 for hv_name, hv_dict in hvs.items():
3617 if hv_name not in self.new_os_hvp[os_name]:
3618 self.new_os_hvp[os_name][hv_name] = hv_dict
3620 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3623 self.new_osp = objects.FillDict(cluster.osparams, {})
3624 if self.op.osparams:
3625 for os_name, osp in self.op.osparams.items():
3626 if os_name not in self.new_osp:
3627 self.new_osp[os_name] = {}
3629 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3632 if not self.new_osp[os_name]:
3633 # we removed all parameters
3634 del self.new_osp[os_name]
3636 # check the parameter validity (remote check)
3637 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3638 os_name, self.new_osp[os_name])
3640 # changes to the hypervisor list
3641 if self.op.enabled_hypervisors is not None:
3642 self.hv_list = self.op.enabled_hypervisors
3643 for hv in self.hv_list:
3644 # if the hypervisor doesn't already exist in the cluster
3645 # hvparams, we initialize it to empty, and then (in both
3646 # cases) we make sure to fill the defaults, as we might not
3647 # have a complete defaults list if the hypervisor wasn't
3649 if hv not in new_hvp:
3651 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3652 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3654 self.hv_list = cluster.enabled_hypervisors
3656 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3657 # either the enabled list has changed, or the parameters have, validate
3658 for hv_name, hv_params in self.new_hvparams.items():
3659 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3660 (self.op.enabled_hypervisors and
3661 hv_name in self.op.enabled_hypervisors)):
3662 # either this is a new hypervisor, or its parameters have changed
3663 hv_class = hypervisor.GetHypervisor(hv_name)
3664 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3665 hv_class.CheckParameterSyntax(hv_params)
3666 _CheckHVParams(self, node_list, hv_name, hv_params)
3669 # no need to check any newly-enabled hypervisors, since the
3670 # defaults have already been checked in the above code-block
3671 for os_name, os_hvp in self.new_os_hvp.items():
3672 for hv_name, hv_params in os_hvp.items():
3673 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3674 # we need to fill in the new os_hvp on top of the actual hv_p
3675 cluster_defaults = self.new_hvparams.get(hv_name, {})
3676 new_osp = objects.FillDict(cluster_defaults, hv_params)
3677 hv_class = hypervisor.GetHypervisor(hv_name)
3678 hv_class.CheckParameterSyntax(new_osp)
3679 _CheckHVParams(self, node_list, hv_name, new_osp)
3681 if self.op.default_iallocator:
3682 alloc_script = utils.FindFile(self.op.default_iallocator,
3683 constants.IALLOCATOR_SEARCH_PATH,
3685 if alloc_script is None:
3686 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3687 " specified" % self.op.default_iallocator,
3690 def Exec(self, feedback_fn):
3691 """Change the parameters of the cluster.
3694 if self.op.vg_name is not None:
3695 new_volume = self.op.vg_name
3698 if new_volume != self.cfg.GetVGName():
3699 self.cfg.SetVGName(new_volume)
3701 feedback_fn("Cluster LVM configuration already in desired"
3702 " state, not changing")
3703 if self.op.drbd_helper is not None:
3704 new_helper = self.op.drbd_helper
3707 if new_helper != self.cfg.GetDRBDHelper():
3708 self.cfg.SetDRBDHelper(new_helper)
3710 feedback_fn("Cluster DRBD helper already in desired state,"
3712 if self.op.hvparams:
3713 self.cluster.hvparams = self.new_hvparams
3715 self.cluster.os_hvp = self.new_os_hvp
3716 if self.op.enabled_hypervisors is not None:
3717 self.cluster.hvparams = self.new_hvparams
3718 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3719 if self.op.beparams:
3720 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3721 if self.op.nicparams:
3722 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3723 if self.op.osparams:
3724 self.cluster.osparams = self.new_osp
3725 if self.op.ndparams:
3726 self.cluster.ndparams = self.new_ndparams
3728 if self.op.candidate_pool_size is not None:
3729 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3730 # we need to update the pool size here, otherwise the save will fail
3731 _AdjustCandidatePool(self, [])
3733 if self.op.maintain_node_health is not None:
3734 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3735 feedback_fn("Note: CONFD was disabled at build time, node health"
3736 " maintenance is not useful (still enabling it)")
3737 self.cluster.maintain_node_health = self.op.maintain_node_health
3739 if self.op.prealloc_wipe_disks is not None:
3740 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3742 if self.op.add_uids is not None:
3743 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3745 if self.op.remove_uids is not None:
3746 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3748 if self.op.uid_pool is not None:
3749 self.cluster.uid_pool = self.op.uid_pool
3751 if self.op.default_iallocator is not None:
3752 self.cluster.default_iallocator = self.op.default_iallocator
3754 if self.op.reserved_lvs is not None:
3755 self.cluster.reserved_lvs = self.op.reserved_lvs
3757 if self.op.use_external_mip_script is not None:
3758 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3760 def helper_os(aname, mods, desc):
3762 lst = getattr(self.cluster, aname)
3763 for key, val in mods:
3764 if key == constants.DDM_ADD:
3766 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3769 elif key == constants.DDM_REMOVE:
3773 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3775 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3777 if self.op.hidden_os:
3778 helper_os("hidden_os", self.op.hidden_os, "hidden")
3780 if self.op.blacklisted_os:
3781 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3783 if self.op.master_netdev:
3784 master_params = self.cfg.GetMasterNetworkParameters()
3785 ems = self.cfg.GetUseExternalMipScript()
3786 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3787 self.cluster.master_netdev)
3788 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3790 result.Raise("Could not disable the master ip")
3791 feedback_fn("Changing master_netdev from %s to %s" %
3792 (master_params.netdev, self.op.master_netdev))
3793 self.cluster.master_netdev = self.op.master_netdev
3795 if self.op.master_netmask:
3796 master_params = self.cfg.GetMasterNetworkParameters()
3797 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3798 result = self.rpc.call_node_change_master_netmask(master_params.name,
3799 master_params.netmask,
3800 self.op.master_netmask,
3802 master_params.netdev)
3804 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3807 self.cluster.master_netmask = self.op.master_netmask
3809 self.cfg.Update(self.cluster, feedback_fn)
3811 if self.op.master_netdev:
3812 master_params = self.cfg.GetMasterNetworkParameters()
3813 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3814 self.op.master_netdev)
3815 ems = self.cfg.GetUseExternalMipScript()
3816 result = self.rpc.call_node_activate_master_ip(master_params.name,
3819 self.LogWarning("Could not re-enable the master ip on"
3820 " the master, please restart manually: %s",
3824 def _UploadHelper(lu, nodes, fname):
3825 """Helper for uploading a file and showing warnings.
3828 if os.path.exists(fname):
3829 result = lu.rpc.call_upload_file(nodes, fname)
3830 for to_node, to_result in result.items():
3831 msg = to_result.fail_msg
3833 msg = ("Copy of file %s to node %s failed: %s" %
3834 (fname, to_node, msg))
3835 lu.proc.LogWarning(msg)
3838 def _ComputeAncillaryFiles(cluster, redist):
3839 """Compute files external to Ganeti which need to be consistent.
3841 @type redist: boolean
3842 @param redist: Whether to include files which need to be redistributed
3845 # Compute files for all nodes
3847 constants.SSH_KNOWN_HOSTS_FILE,
3848 constants.CONFD_HMAC_KEY,
3849 constants.CLUSTER_DOMAIN_SECRET_FILE,
3850 constants.SPICE_CERT_FILE,
3851 constants.SPICE_CACERT_FILE,
3852 constants.RAPI_USERS_FILE,
3856 files_all.update(constants.ALL_CERT_FILES)
3857 files_all.update(ssconf.SimpleStore().GetFileList())
3859 # we need to ship at least the RAPI certificate
3860 files_all.add(constants.RAPI_CERT_FILE)
3862 if cluster.modify_etc_hosts:
3863 files_all.add(constants.ETC_HOSTS)
3865 # Files which are optional, these must:
3866 # - be present in one other category as well
3867 # - either exist or not exist on all nodes of that category (mc, vm all)
3869 constants.RAPI_USERS_FILE,
3872 # Files which should only be on master candidates
3876 files_mc.add(constants.CLUSTER_CONF_FILE)
3878 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3880 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3882 # Files which should only be on VM-capable nodes
3883 files_vm = set(filename
3884 for hv_name in cluster.enabled_hypervisors
3885 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3887 files_opt |= set(filename
3888 for hv_name in cluster.enabled_hypervisors
3889 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3891 # Filenames in each category must be unique
3892 all_files_set = files_all | files_mc | files_vm
3893 assert (len(all_files_set) ==
3894 sum(map(len, [files_all, files_mc, files_vm]))), \
3895 "Found file listed in more than one file list"
3897 # Optional files must be present in one other category
3898 assert all_files_set.issuperset(files_opt), \
3899 "Optional file not in a different required list"
3901 return (files_all, files_opt, files_mc, files_vm)
3904 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3905 """Distribute additional files which are part of the cluster configuration.
3907 ConfigWriter takes care of distributing the config and ssconf files, but
3908 there are more files which should be distributed to all nodes. This function
3909 makes sure those are copied.
3911 @param lu: calling logical unit
3912 @param additional_nodes: list of nodes not in the config to distribute to
3913 @type additional_vm: boolean
3914 @param additional_vm: whether the additional nodes are vm-capable or not
3917 # Gather target nodes
3918 cluster = lu.cfg.GetClusterInfo()
3919 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3921 online_nodes = lu.cfg.GetOnlineNodeList()
3922 vm_nodes = lu.cfg.GetVmCapableNodeList()
3924 if additional_nodes is not None:
3925 online_nodes.extend(additional_nodes)
3927 vm_nodes.extend(additional_nodes)
3929 # Never distribute to master node
3930 for nodelist in [online_nodes, vm_nodes]:
3931 if master_info.name in nodelist:
3932 nodelist.remove(master_info.name)
3935 (files_all, _, files_mc, files_vm) = \
3936 _ComputeAncillaryFiles(cluster, True)
3938 # Never re-distribute configuration file from here
3939 assert not (constants.CLUSTER_CONF_FILE in files_all or
3940 constants.CLUSTER_CONF_FILE in files_vm)
3941 assert not files_mc, "Master candidates not handled in this function"
3944 (online_nodes, files_all),
3945 (vm_nodes, files_vm),
3949 for (node_list, files) in filemap:
3951 _UploadHelper(lu, node_list, fname)
3954 class LUClusterRedistConf(NoHooksLU):
3955 """Force the redistribution of cluster configuration.
3957 This is a very simple LU.
3962 def ExpandNames(self):
3963 self.needed_locks = {
3964 locking.LEVEL_NODE: locking.ALL_SET,
3966 self.share_locks[locking.LEVEL_NODE] = 1
3968 def Exec(self, feedback_fn):
3969 """Redistribute the configuration.
3972 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3973 _RedistributeAncillaryFiles(self)
3976 class LUClusterActivateMasterIp(NoHooksLU):
3977 """Activate the master IP on the master node.
3980 def Exec(self, feedback_fn):
3981 """Activate the master IP.
3984 master_params = self.cfg.GetMasterNetworkParameters()
3985 ems = self.cfg.GetUseExternalMipScript()
3986 result = self.rpc.call_node_activate_master_ip(master_params.name,
3988 result.Raise("Could not activate the master IP")
3991 class LUClusterDeactivateMasterIp(NoHooksLU):
3992 """Deactivate the master IP on the master node.
3995 def Exec(self, feedback_fn):
3996 """Deactivate the master IP.
3999 master_params = self.cfg.GetMasterNetworkParameters()
4000 ems = self.cfg.GetUseExternalMipScript()
4001 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4003 result.Raise("Could not deactivate the master IP")
4006 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4007 """Sleep and poll for an instance's disk to sync.
4010 if not instance.disks or disks is not None and not disks:
4013 disks = _ExpandCheckDisks(instance, disks)
4016 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4018 node = instance.primary_node
4021 lu.cfg.SetDiskID(dev, node)
4023 # TODO: Convert to utils.Retry
4026 degr_retries = 10 # in seconds, as we sleep 1 second each time
4030 cumul_degraded = False
4031 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4032 msg = rstats.fail_msg
4034 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4037 raise errors.RemoteError("Can't contact node %s for mirror data,"
4038 " aborting." % node)
4041 rstats = rstats.payload
4043 for i, mstat in enumerate(rstats):
4045 lu.LogWarning("Can't compute data for node %s/%s",
4046 node, disks[i].iv_name)
4049 cumul_degraded = (cumul_degraded or
4050 (mstat.is_degraded and mstat.sync_percent is None))
4051 if mstat.sync_percent is not None:
4053 if mstat.estimated_time is not None:
4054 rem_time = ("%s remaining (estimated)" %
4055 utils.FormatSeconds(mstat.estimated_time))
4056 max_time = mstat.estimated_time
4058 rem_time = "no time estimate"
4059 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4060 (disks[i].iv_name, mstat.sync_percent, rem_time))
4062 # if we're done but degraded, let's do a few small retries, to
4063 # make sure we see a stable and not transient situation; therefore
4064 # we force restart of the loop
4065 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4066 logging.info("Degraded disks found, %d retries left", degr_retries)
4074 time.sleep(min(60, max_time))
4077 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4078 return not cumul_degraded
4081 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4082 """Check that mirrors are not degraded.
4084 The ldisk parameter, if True, will change the test from the
4085 is_degraded attribute (which represents overall non-ok status for
4086 the device(s)) to the ldisk (representing the local storage status).
4089 lu.cfg.SetDiskID(dev, node)
4093 if on_primary or dev.AssembleOnSecondary():
4094 rstats = lu.rpc.call_blockdev_find(node, dev)
4095 msg = rstats.fail_msg
4097 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4099 elif not rstats.payload:
4100 lu.LogWarning("Can't find disk on node %s", node)
4104 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4106 result = result and not rstats.payload.is_degraded
4109 for child in dev.children:
4110 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4115 class LUOobCommand(NoHooksLU):
4116 """Logical unit for OOB handling.
4120 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4122 def ExpandNames(self):
4123 """Gather locks we need.
4126 if self.op.node_names:
4127 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4128 lock_names = self.op.node_names
4130 lock_names = locking.ALL_SET
4132 self.needed_locks = {
4133 locking.LEVEL_NODE: lock_names,
4136 def CheckPrereq(self):
4137 """Check prerequisites.
4140 - the node exists in the configuration
4143 Any errors are signaled by raising errors.OpPrereqError.
4147 self.master_node = self.cfg.GetMasterNode()
4149 assert self.op.power_delay >= 0.0
4151 if self.op.node_names:
4152 if (self.op.command in self._SKIP_MASTER and
4153 self.master_node in self.op.node_names):
4154 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4155 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4157 if master_oob_handler:
4158 additional_text = ("run '%s %s %s' if you want to operate on the"
4159 " master regardless") % (master_oob_handler,
4163 additional_text = "it does not support out-of-band operations"
4165 raise errors.OpPrereqError(("Operating on the master node %s is not"
4166 " allowed for %s; %s") %
4167 (self.master_node, self.op.command,
4168 additional_text), errors.ECODE_INVAL)
4170 self.op.node_names = self.cfg.GetNodeList()
4171 if self.op.command in self._SKIP_MASTER:
4172 self.op.node_names.remove(self.master_node)
4174 if self.op.command in self._SKIP_MASTER:
4175 assert self.master_node not in self.op.node_names
4177 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4179 raise errors.OpPrereqError("Node %s not found" % node_name,
4182 self.nodes.append(node)
4184 if (not self.op.ignore_status and
4185 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4186 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4187 " not marked offline") % node_name,
4190 def Exec(self, feedback_fn):
4191 """Execute OOB and return result if we expect any.
4194 master_node = self.master_node
4197 for idx, node in enumerate(utils.NiceSort(self.nodes,
4198 key=lambda node: node.name)):
4199 node_entry = [(constants.RS_NORMAL, node.name)]
4200 ret.append(node_entry)
4202 oob_program = _SupportsOob(self.cfg, node)
4205 node_entry.append((constants.RS_UNAVAIL, None))
4208 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4209 self.op.command, oob_program, node.name)
4210 result = self.rpc.call_run_oob(master_node, oob_program,
4211 self.op.command, node.name,
4215 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4216 node.name, result.fail_msg)
4217 node_entry.append((constants.RS_NODATA, None))
4220 self._CheckPayload(result)
4221 except errors.OpExecError, err:
4222 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4224 node_entry.append((constants.RS_NODATA, None))
4226 if self.op.command == constants.OOB_HEALTH:
4227 # For health we should log important events
4228 for item, status in result.payload:
4229 if status in [constants.OOB_STATUS_WARNING,
4230 constants.OOB_STATUS_CRITICAL]:
4231 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4232 item, node.name, status)
4234 if self.op.command == constants.OOB_POWER_ON:
4236 elif self.op.command == constants.OOB_POWER_OFF:
4237 node.powered = False
4238 elif self.op.command == constants.OOB_POWER_STATUS:
4239 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4240 if powered != node.powered:
4241 logging.warning(("Recorded power state (%s) of node '%s' does not"
4242 " match actual power state (%s)"), node.powered,
4245 # For configuration changing commands we should update the node
4246 if self.op.command in (constants.OOB_POWER_ON,
4247 constants.OOB_POWER_OFF):
4248 self.cfg.Update(node, feedback_fn)
4250 node_entry.append((constants.RS_NORMAL, result.payload))
4252 if (self.op.command == constants.OOB_POWER_ON and
4253 idx < len(self.nodes) - 1):
4254 time.sleep(self.op.power_delay)
4258 def _CheckPayload(self, result):
4259 """Checks if the payload is valid.
4261 @param result: RPC result
4262 @raises errors.OpExecError: If payload is not valid
4266 if self.op.command == constants.OOB_HEALTH:
4267 if not isinstance(result.payload, list):
4268 errs.append("command 'health' is expected to return a list but got %s" %
4269 type(result.payload))
4271 for item, status in result.payload:
4272 if status not in constants.OOB_STATUSES:
4273 errs.append("health item '%s' has invalid status '%s'" %
4276 if self.op.command == constants.OOB_POWER_STATUS:
4277 if not isinstance(result.payload, dict):
4278 errs.append("power-status is expected to return a dict but got %s" %
4279 type(result.payload))
4281 if self.op.command in [
4282 constants.OOB_POWER_ON,
4283 constants.OOB_POWER_OFF,
4284 constants.OOB_POWER_CYCLE,
4286 if result.payload is not None:
4287 errs.append("%s is expected to not return payload but got '%s'" %
4288 (self.op.command, result.payload))
4291 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4292 utils.CommaJoin(errs))
4295 class _OsQuery(_QueryBase):
4296 FIELDS = query.OS_FIELDS
4298 def ExpandNames(self, lu):
4299 # Lock all nodes in shared mode
4300 # Temporary removal of locks, should be reverted later
4301 # TODO: reintroduce locks when they are lighter-weight
4302 lu.needed_locks = {}
4303 #self.share_locks[locking.LEVEL_NODE] = 1
4304 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4306 # The following variables interact with _QueryBase._GetNames
4308 self.wanted = self.names
4310 self.wanted = locking.ALL_SET
4312 self.do_locking = self.use_locking
4314 def DeclareLocks(self, lu, level):
4318 def _DiagnoseByOS(rlist):
4319 """Remaps a per-node return list into an a per-os per-node dictionary
4321 @param rlist: a map with node names as keys and OS objects as values
4324 @return: a dictionary with osnames as keys and as value another
4325 map, with nodes as keys and tuples of (path, status, diagnose,
4326 variants, parameters, api_versions) as values, eg::
4328 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4329 (/srv/..., False, "invalid api")],
4330 "node2": [(/srv/..., True, "", [], [])]}
4335 # we build here the list of nodes that didn't fail the RPC (at RPC
4336 # level), so that nodes with a non-responding node daemon don't
4337 # make all OSes invalid
4338 good_nodes = [node_name for node_name in rlist
4339 if not rlist[node_name].fail_msg]
4340 for node_name, nr in rlist.items():
4341 if nr.fail_msg or not nr.payload:
4343 for (name, path, status, diagnose, variants,
4344 params, api_versions) in nr.payload:
4345 if name not in all_os:
4346 # build a list of nodes for this os containing empty lists
4347 # for each node in node_list
4349 for nname in good_nodes:
4350 all_os[name][nname] = []
4351 # convert params from [name, help] to (name, help)
4352 params = [tuple(v) for v in params]
4353 all_os[name][node_name].append((path, status, diagnose,
4354 variants, params, api_versions))
4357 def _GetQueryData(self, lu):
4358 """Computes the list of nodes and their attributes.
4361 # Locking is not used
4362 assert not (compat.any(lu.glm.is_owned(level)
4363 for level in locking.LEVELS
4364 if level != locking.LEVEL_CLUSTER) or
4365 self.do_locking or self.use_locking)
4367 valid_nodes = [node.name
4368 for node in lu.cfg.GetAllNodesInfo().values()
4369 if not node.offline and node.vm_capable]
4370 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4371 cluster = lu.cfg.GetClusterInfo()
4375 for (os_name, os_data) in pol.items():
4376 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4377 hidden=(os_name in cluster.hidden_os),
4378 blacklisted=(os_name in cluster.blacklisted_os))
4382 api_versions = set()
4384 for idx, osl in enumerate(os_data.values()):
4385 info.valid = bool(info.valid and osl and osl[0][1])
4389 (node_variants, node_params, node_api) = osl[0][3:6]
4392 variants.update(node_variants)
4393 parameters.update(node_params)
4394 api_versions.update(node_api)
4396 # Filter out inconsistent values
4397 variants.intersection_update(node_variants)
4398 parameters.intersection_update(node_params)
4399 api_versions.intersection_update(node_api)
4401 info.variants = list(variants)
4402 info.parameters = list(parameters)
4403 info.api_versions = list(api_versions)
4405 data[os_name] = info
4407 # Prepare data in requested order
4408 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4412 class LUOsDiagnose(NoHooksLU):
4413 """Logical unit for OS diagnose/query.
4419 def _BuildFilter(fields, names):
4420 """Builds a filter for querying OSes.
4423 name_filter = qlang.MakeSimpleFilter("name", names)
4425 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4426 # respective field is not requested
4427 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4428 for fname in ["hidden", "blacklisted"]
4429 if fname not in fields]
4430 if "valid" not in fields:
4431 status_filter.append([qlang.OP_TRUE, "valid"])
4434 status_filter.insert(0, qlang.OP_AND)
4436 status_filter = None
4438 if name_filter and status_filter:
4439 return [qlang.OP_AND, name_filter, status_filter]
4443 return status_filter
4445 def CheckArguments(self):
4446 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4447 self.op.output_fields, False)
4449 def ExpandNames(self):
4450 self.oq.ExpandNames(self)
4452 def Exec(self, feedback_fn):
4453 return self.oq.OldStyleQuery(self)
4456 class LUNodeRemove(LogicalUnit):
4457 """Logical unit for removing a node.
4460 HPATH = "node-remove"
4461 HTYPE = constants.HTYPE_NODE
4463 def BuildHooksEnv(self):
4466 This doesn't run on the target node in the pre phase as a failed
4467 node would then be impossible to remove.
4471 "OP_TARGET": self.op.node_name,
4472 "NODE_NAME": self.op.node_name,
4475 def BuildHooksNodes(self):
4476 """Build hooks nodes.
4479 all_nodes = self.cfg.GetNodeList()
4481 all_nodes.remove(self.op.node_name)
4483 logging.warning("Node '%s', which is about to be removed, was not found"
4484 " in the list of all nodes", self.op.node_name)
4485 return (all_nodes, all_nodes)
4487 def CheckPrereq(self):
4488 """Check prerequisites.
4491 - the node exists in the configuration
4492 - it does not have primary or secondary instances
4493 - it's not the master
4495 Any errors are signaled by raising errors.OpPrereqError.
4498 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4499 node = self.cfg.GetNodeInfo(self.op.node_name)
4500 assert node is not None
4502 masternode = self.cfg.GetMasterNode()
4503 if node.name == masternode:
4504 raise errors.OpPrereqError("Node is the master node, failover to another"
4505 " node is required", errors.ECODE_INVAL)
4507 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4508 if node.name in instance.all_nodes:
4509 raise errors.OpPrereqError("Instance %s is still running on the node,"
4510 " please remove first" % instance_name,
4512 self.op.node_name = node.name
4515 def Exec(self, feedback_fn):
4516 """Removes the node from the cluster.
4520 logging.info("Stopping the node daemon and removing configs from node %s",
4523 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4525 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4528 # Promote nodes to master candidate as needed
4529 _AdjustCandidatePool(self, exceptions=[node.name])
4530 self.context.RemoveNode(node.name)
4532 # Run post hooks on the node before it's removed
4533 _RunPostHook(self, node.name)
4535 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4536 msg = result.fail_msg
4538 self.LogWarning("Errors encountered on the remote node while leaving"
4539 " the cluster: %s", msg)
4541 # Remove node from our /etc/hosts
4542 if self.cfg.GetClusterInfo().modify_etc_hosts:
4543 master_node = self.cfg.GetMasterNode()
4544 result = self.rpc.call_etc_hosts_modify(master_node,
4545 constants.ETC_HOSTS_REMOVE,
4547 result.Raise("Can't update hosts file with new host data")
4548 _RedistributeAncillaryFiles(self)
4551 class _NodeQuery(_QueryBase):
4552 FIELDS = query.NODE_FIELDS
4554 def ExpandNames(self, lu):
4555 lu.needed_locks = {}
4556 lu.share_locks = _ShareAll()
4559 self.wanted = _GetWantedNodes(lu, self.names)
4561 self.wanted = locking.ALL_SET
4563 self.do_locking = (self.use_locking and
4564 query.NQ_LIVE in self.requested_data)
4567 # If any non-static field is requested we need to lock the nodes
4568 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4570 def DeclareLocks(self, lu, level):
4573 def _GetQueryData(self, lu):
4574 """Computes the list of nodes and their attributes.
4577 all_info = lu.cfg.GetAllNodesInfo()
4579 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4581 # Gather data as requested
4582 if query.NQ_LIVE in self.requested_data:
4583 # filter out non-vm_capable nodes
4584 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4586 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4587 lu.cfg.GetHypervisorType())
4588 live_data = dict((name, nresult.payload)
4589 for (name, nresult) in node_data.items()
4590 if not nresult.fail_msg and nresult.payload)
4594 if query.NQ_INST in self.requested_data:
4595 node_to_primary = dict([(name, set()) for name in nodenames])
4596 node_to_secondary = dict([(name, set()) for name in nodenames])
4598 inst_data = lu.cfg.GetAllInstancesInfo()
4600 for inst in inst_data.values():
4601 if inst.primary_node in node_to_primary:
4602 node_to_primary[inst.primary_node].add(inst.name)
4603 for secnode in inst.secondary_nodes:
4604 if secnode in node_to_secondary:
4605 node_to_secondary[secnode].add(inst.name)
4607 node_to_primary = None
4608 node_to_secondary = None
4610 if query.NQ_OOB in self.requested_data:
4611 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4612 for name, node in all_info.iteritems())
4616 if query.NQ_GROUP in self.requested_data:
4617 groups = lu.cfg.GetAllNodeGroupsInfo()
4621 return query.NodeQueryData([all_info[name] for name in nodenames],
4622 live_data, lu.cfg.GetMasterNode(),
4623 node_to_primary, node_to_secondary, groups,
4624 oob_support, lu.cfg.GetClusterInfo())
4627 class LUNodeQuery(NoHooksLU):
4628 """Logical unit for querying nodes.
4631 # pylint: disable=W0142
4634 def CheckArguments(self):
4635 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4636 self.op.output_fields, self.op.use_locking)
4638 def ExpandNames(self):
4639 self.nq.ExpandNames(self)
4641 def DeclareLocks(self, level):
4642 self.nq.DeclareLocks(self, level)
4644 def Exec(self, feedback_fn):
4645 return self.nq.OldStyleQuery(self)
4648 class LUNodeQueryvols(NoHooksLU):
4649 """Logical unit for getting volumes on node(s).
4653 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4654 _FIELDS_STATIC = utils.FieldSet("node")
4656 def CheckArguments(self):
4657 _CheckOutputFields(static=self._FIELDS_STATIC,
4658 dynamic=self._FIELDS_DYNAMIC,
4659 selected=self.op.output_fields)
4661 def ExpandNames(self):
4662 self.share_locks = _ShareAll()
4663 self.needed_locks = {}
4665 if not self.op.nodes:
4666 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4668 self.needed_locks[locking.LEVEL_NODE] = \
4669 _GetWantedNodes(self, self.op.nodes)
4671 def Exec(self, feedback_fn):
4672 """Computes the list of nodes and their attributes.
4675 nodenames = self.owned_locks(locking.LEVEL_NODE)
4676 volumes = self.rpc.call_node_volumes(nodenames)
4678 ilist = self.cfg.GetAllInstancesInfo()
4679 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4682 for node in nodenames:
4683 nresult = volumes[node]
4686 msg = nresult.fail_msg
4688 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4691 node_vols = sorted(nresult.payload,
4692 key=operator.itemgetter("dev"))
4694 for vol in node_vols:
4696 for field in self.op.output_fields:
4699 elif field == "phys":
4703 elif field == "name":
4705 elif field == "size":
4706 val = int(float(vol["size"]))
4707 elif field == "instance":
4708 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4710 raise errors.ParameterError(field)
4711 node_output.append(str(val))
4713 output.append(node_output)
4718 class LUNodeQueryStorage(NoHooksLU):
4719 """Logical unit for getting information on storage units on node(s).
4722 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4725 def CheckArguments(self):
4726 _CheckOutputFields(static=self._FIELDS_STATIC,
4727 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4728 selected=self.op.output_fields)
4730 def ExpandNames(self):
4731 self.share_locks = _ShareAll()
4732 self.needed_locks = {}
4735 self.needed_locks[locking.LEVEL_NODE] = \
4736 _GetWantedNodes(self, self.op.nodes)
4738 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4740 def Exec(self, feedback_fn):
4741 """Computes the list of nodes and their attributes.
4744 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4746 # Always get name to sort by
4747 if constants.SF_NAME in self.op.output_fields:
4748 fields = self.op.output_fields[:]
4750 fields = [constants.SF_NAME] + self.op.output_fields
4752 # Never ask for node or type as it's only known to the LU
4753 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4754 while extra in fields:
4755 fields.remove(extra)
4757 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4758 name_idx = field_idx[constants.SF_NAME]
4760 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4761 data = self.rpc.call_storage_list(self.nodes,
4762 self.op.storage_type, st_args,
4763 self.op.name, fields)
4767 for node in utils.NiceSort(self.nodes):
4768 nresult = data[node]
4772 msg = nresult.fail_msg
4774 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4777 rows = dict([(row[name_idx], row) for row in nresult.payload])
4779 for name in utils.NiceSort(rows.keys()):
4784 for field in self.op.output_fields:
4785 if field == constants.SF_NODE:
4787 elif field == constants.SF_TYPE:
4788 val = self.op.storage_type
4789 elif field in field_idx:
4790 val = row[field_idx[field]]
4792 raise errors.ParameterError(field)
4801 class _InstanceQuery(_QueryBase):
4802 FIELDS = query.INSTANCE_FIELDS
4804 def ExpandNames(self, lu):
4805 lu.needed_locks = {}
4806 lu.share_locks = _ShareAll()
4809 self.wanted = _GetWantedInstances(lu, self.names)
4811 self.wanted = locking.ALL_SET
4813 self.do_locking = (self.use_locking and
4814 query.IQ_LIVE in self.requested_data)
4816 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4817 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4818 lu.needed_locks[locking.LEVEL_NODE] = []
4819 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4821 self.do_grouplocks = (self.do_locking and
4822 query.IQ_NODES in self.requested_data)
4824 def DeclareLocks(self, lu, level):
4826 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4827 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4829 # Lock all groups used by instances optimistically; this requires going
4830 # via the node before it's locked, requiring verification later on
4831 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4833 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4834 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4835 elif level == locking.LEVEL_NODE:
4836 lu._LockInstancesNodes() # pylint: disable=W0212
4839 def _CheckGroupLocks(lu):
4840 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4841 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4843 # Check if node groups for locked instances are still correct
4844 for instance_name in owned_instances:
4845 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4847 def _GetQueryData(self, lu):
4848 """Computes the list of instances and their attributes.
4851 if self.do_grouplocks:
4852 self._CheckGroupLocks(lu)
4854 cluster = lu.cfg.GetClusterInfo()
4855 all_info = lu.cfg.GetAllInstancesInfo()
4857 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4859 instance_list = [all_info[name] for name in instance_names]
4860 nodes = frozenset(itertools.chain(*(inst.all_nodes
4861 for inst in instance_list)))
4862 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4865 wrongnode_inst = set()
4867 # Gather data as requested
4868 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4870 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4872 result = node_data[name]
4874 # offline nodes will be in both lists
4875 assert result.fail_msg
4876 offline_nodes.append(name)
4878 bad_nodes.append(name)
4879 elif result.payload:
4880 for inst in result.payload:
4881 if inst in all_info:
4882 if all_info[inst].primary_node == name:
4883 live_data.update(result.payload)
4885 wrongnode_inst.add(inst)
4887 # orphan instance; we don't list it here as we don't
4888 # handle this case yet in the output of instance listing
4889 logging.warning("Orphan instance '%s' found on node %s",
4891 # else no instance is alive
4895 if query.IQ_DISKUSAGE in self.requested_data:
4896 disk_usage = dict((inst.name,
4897 _ComputeDiskSize(inst.disk_template,
4898 [{constants.IDISK_SIZE: disk.size}
4899 for disk in inst.disks]))
4900 for inst in instance_list)
4904 if query.IQ_CONSOLE in self.requested_data:
4906 for inst in instance_list:
4907 if inst.name in live_data:
4908 # Instance is running
4909 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4911 consinfo[inst.name] = None
4912 assert set(consinfo.keys()) == set(instance_names)
4916 if query.IQ_NODES in self.requested_data:
4917 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4919 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4920 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4921 for uuid in set(map(operator.attrgetter("group"),
4927 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4928 disk_usage, offline_nodes, bad_nodes,
4929 live_data, wrongnode_inst, consinfo,
4933 class LUQuery(NoHooksLU):
4934 """Query for resources/items of a certain kind.
4937 # pylint: disable=W0142
4940 def CheckArguments(self):
4941 qcls = _GetQueryImplementation(self.op.what)
4943 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4945 def ExpandNames(self):
4946 self.impl.ExpandNames(self)
4948 def DeclareLocks(self, level):
4949 self.impl.DeclareLocks(self, level)
4951 def Exec(self, feedback_fn):
4952 return self.impl.NewStyleQuery(self)
4955 class LUQueryFields(NoHooksLU):
4956 """Query for resources/items of a certain kind.
4959 # pylint: disable=W0142
4962 def CheckArguments(self):
4963 self.qcls = _GetQueryImplementation(self.op.what)
4965 def ExpandNames(self):
4966 self.needed_locks = {}
4968 def Exec(self, feedback_fn):
4969 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4972 class LUNodeModifyStorage(NoHooksLU):
4973 """Logical unit for modifying a storage volume on a node.
4978 def CheckArguments(self):
4979 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4981 storage_type = self.op.storage_type
4984 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4986 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4987 " modified" % storage_type,
4990 diff = set(self.op.changes.keys()) - modifiable
4992 raise errors.OpPrereqError("The following fields can not be modified for"
4993 " storage units of type '%s': %r" %
4994 (storage_type, list(diff)),
4997 def ExpandNames(self):
4998 self.needed_locks = {
4999 locking.LEVEL_NODE: self.op.node_name,
5002 def Exec(self, feedback_fn):
5003 """Computes the list of nodes and their attributes.
5006 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5007 result = self.rpc.call_storage_modify(self.op.node_name,
5008 self.op.storage_type, st_args,
5009 self.op.name, self.op.changes)
5010 result.Raise("Failed to modify storage unit '%s' on %s" %
5011 (self.op.name, self.op.node_name))
5014 class LUNodeAdd(LogicalUnit):
5015 """Logical unit for adding node to the cluster.
5019 HTYPE = constants.HTYPE_NODE
5020 _NFLAGS = ["master_capable", "vm_capable"]
5022 def CheckArguments(self):
5023 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5024 # validate/normalize the node name
5025 self.hostname = netutils.GetHostname(name=self.op.node_name,
5026 family=self.primary_ip_family)
5027 self.op.node_name = self.hostname.name
5029 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5030 raise errors.OpPrereqError("Cannot readd the master node",
5033 if self.op.readd and self.op.group:
5034 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5035 " being readded", errors.ECODE_INVAL)
5037 def BuildHooksEnv(self):
5040 This will run on all nodes before, and on all nodes + the new node after.
5044 "OP_TARGET": self.op.node_name,
5045 "NODE_NAME": self.op.node_name,
5046 "NODE_PIP": self.op.primary_ip,
5047 "NODE_SIP": self.op.secondary_ip,
5048 "MASTER_CAPABLE": str(self.op.master_capable),
5049 "VM_CAPABLE": str(self.op.vm_capable),
5052 def BuildHooksNodes(self):
5053 """Build hooks nodes.
5056 # Exclude added node
5057 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5058 post_nodes = pre_nodes + [self.op.node_name, ]
5060 return (pre_nodes, post_nodes)
5062 def CheckPrereq(self):
5063 """Check prerequisites.
5066 - the new node is not already in the config
5068 - its parameters (single/dual homed) matches the cluster
5070 Any errors are signaled by raising errors.OpPrereqError.
5074 hostname = self.hostname
5075 node = hostname.name
5076 primary_ip = self.op.primary_ip = hostname.ip
5077 if self.op.secondary_ip is None:
5078 if self.primary_ip_family == netutils.IP6Address.family:
5079 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5080 " IPv4 address must be given as secondary",
5082 self.op.secondary_ip = primary_ip
5084 secondary_ip = self.op.secondary_ip
5085 if not netutils.IP4Address.IsValid(secondary_ip):
5086 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5087 " address" % secondary_ip, errors.ECODE_INVAL)
5089 node_list = cfg.GetNodeList()
5090 if not self.op.readd and node in node_list:
5091 raise errors.OpPrereqError("Node %s is already in the configuration" %
5092 node, errors.ECODE_EXISTS)
5093 elif self.op.readd and node not in node_list:
5094 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5097 self.changed_primary_ip = False
5099 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5100 if self.op.readd and node == existing_node_name:
5101 if existing_node.secondary_ip != secondary_ip:
5102 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5103 " address configuration as before",
5105 if existing_node.primary_ip != primary_ip:
5106 self.changed_primary_ip = True
5110 if (existing_node.primary_ip == primary_ip or
5111 existing_node.secondary_ip == primary_ip or
5112 existing_node.primary_ip == secondary_ip or
5113 existing_node.secondary_ip == secondary_ip):
5114 raise errors.OpPrereqError("New node ip address(es) conflict with"
5115 " existing node %s" % existing_node.name,
5116 errors.ECODE_NOTUNIQUE)
5118 # After this 'if' block, None is no longer a valid value for the
5119 # _capable op attributes
5121 old_node = self.cfg.GetNodeInfo(node)
5122 assert old_node is not None, "Can't retrieve locked node %s" % node
5123 for attr in self._NFLAGS:
5124 if getattr(self.op, attr) is None:
5125 setattr(self.op, attr, getattr(old_node, attr))
5127 for attr in self._NFLAGS:
5128 if getattr(self.op, attr) is None:
5129 setattr(self.op, attr, True)
5131 if self.op.readd and not self.op.vm_capable:
5132 pri, sec = cfg.GetNodeInstances(node)
5134 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5135 " flag set to false, but it already holds"
5136 " instances" % node,
5139 # check that the type of the node (single versus dual homed) is the
5140 # same as for the master
5141 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5142 master_singlehomed = myself.secondary_ip == myself.primary_ip
5143 newbie_singlehomed = secondary_ip == primary_ip
5144 if master_singlehomed != newbie_singlehomed:
5145 if master_singlehomed:
5146 raise errors.OpPrereqError("The master has no secondary ip but the"
5147 " new node has one",
5150 raise errors.OpPrereqError("The master has a secondary ip but the"
5151 " new node doesn't have one",
5154 # checks reachability
5155 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5156 raise errors.OpPrereqError("Node not reachable by ping",
5157 errors.ECODE_ENVIRON)
5159 if not newbie_singlehomed:
5160 # check reachability from my secondary ip to newbie's secondary ip
5161 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5162 source=myself.secondary_ip):
5163 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5164 " based ping to node daemon port",
5165 errors.ECODE_ENVIRON)
5172 if self.op.master_capable:
5173 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5175 self.master_candidate = False
5178 self.new_node = old_node
5180 node_group = cfg.LookupNodeGroup(self.op.group)
5181 self.new_node = objects.Node(name=node,
5182 primary_ip=primary_ip,
5183 secondary_ip=secondary_ip,
5184 master_candidate=self.master_candidate,
5185 offline=False, drained=False,
5188 if self.op.ndparams:
5189 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5191 def Exec(self, feedback_fn):
5192 """Adds the new node to the cluster.
5195 new_node = self.new_node
5196 node = new_node.name
5198 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5201 # We adding a new node so we assume it's powered
5202 new_node.powered = True
5204 # for re-adds, reset the offline/drained/master-candidate flags;
5205 # we need to reset here, otherwise offline would prevent RPC calls
5206 # later in the procedure; this also means that if the re-add
5207 # fails, we are left with a non-offlined, broken node
5209 new_node.drained = new_node.offline = False # pylint: disable=W0201
5210 self.LogInfo("Readding a node, the offline/drained flags were reset")
5211 # if we demote the node, we do cleanup later in the procedure
5212 new_node.master_candidate = self.master_candidate
5213 if self.changed_primary_ip:
5214 new_node.primary_ip = self.op.primary_ip
5216 # copy the master/vm_capable flags
5217 for attr in self._NFLAGS:
5218 setattr(new_node, attr, getattr(self.op, attr))
5220 # notify the user about any possible mc promotion
5221 if new_node.master_candidate:
5222 self.LogInfo("Node will be a master candidate")
5224 if self.op.ndparams:
5225 new_node.ndparams = self.op.ndparams
5227 new_node.ndparams = {}
5229 # check connectivity
5230 result = self.rpc.call_version([node])[node]
5231 result.Raise("Can't get version information from node %s" % node)
5232 if constants.PROTOCOL_VERSION == result.payload:
5233 logging.info("Communication to node %s fine, sw version %s match",
5234 node, result.payload)
5236 raise errors.OpExecError("Version mismatch master version %s,"
5237 " node version %s" %
5238 (constants.PROTOCOL_VERSION, result.payload))
5240 # Add node to our /etc/hosts, and add key to known_hosts
5241 if self.cfg.GetClusterInfo().modify_etc_hosts:
5242 master_node = self.cfg.GetMasterNode()
5243 result = self.rpc.call_etc_hosts_modify(master_node,
5244 constants.ETC_HOSTS_ADD,
5247 result.Raise("Can't update hosts file with new host data")
5249 if new_node.secondary_ip != new_node.primary_ip:
5250 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5253 node_verify_list = [self.cfg.GetMasterNode()]
5254 node_verify_param = {
5255 constants.NV_NODELIST: ([node], {}),
5256 # TODO: do a node-net-test as well?
5259 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5260 self.cfg.GetClusterName())
5261 for verifier in node_verify_list:
5262 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5263 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5265 for failed in nl_payload:
5266 feedback_fn("ssh/hostname verification failed"
5267 " (checking from %s): %s" %
5268 (verifier, nl_payload[failed]))
5269 raise errors.OpExecError("ssh/hostname verification failed")
5272 _RedistributeAncillaryFiles(self)
5273 self.context.ReaddNode(new_node)
5274 # make sure we redistribute the config
5275 self.cfg.Update(new_node, feedback_fn)
5276 # and make sure the new node will not have old files around
5277 if not new_node.master_candidate:
5278 result = self.rpc.call_node_demote_from_mc(new_node.name)
5279 msg = result.fail_msg
5281 self.LogWarning("Node failed to demote itself from master"
5282 " candidate status: %s" % msg)
5284 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5285 additional_vm=self.op.vm_capable)
5286 self.context.AddNode(new_node, self.proc.GetECId())
5289 class LUNodeSetParams(LogicalUnit):
5290 """Modifies the parameters of a node.
5292 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5293 to the node role (as _ROLE_*)
5294 @cvar _R2F: a dictionary from node role to tuples of flags
5295 @cvar _FLAGS: a list of attribute names corresponding to the flags
5298 HPATH = "node-modify"
5299 HTYPE = constants.HTYPE_NODE
5301 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5303 (True, False, False): _ROLE_CANDIDATE,
5304 (False, True, False): _ROLE_DRAINED,
5305 (False, False, True): _ROLE_OFFLINE,
5306 (False, False, False): _ROLE_REGULAR,
5308 _R2F = dict((v, k) for k, v in _F2R.items())
5309 _FLAGS = ["master_candidate", "drained", "offline"]
5311 def CheckArguments(self):
5312 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5313 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5314 self.op.master_capable, self.op.vm_capable,
5315 self.op.secondary_ip, self.op.ndparams]
5316 if all_mods.count(None) == len(all_mods):
5317 raise errors.OpPrereqError("Please pass at least one modification",
5319 if all_mods.count(True) > 1:
5320 raise errors.OpPrereqError("Can't set the node into more than one"
5321 " state at the same time",
5324 # Boolean value that tells us whether we might be demoting from MC
5325 self.might_demote = (self.op.master_candidate == False or
5326 self.op.offline == True or
5327 self.op.drained == True or
5328 self.op.master_capable == False)
5330 if self.op.secondary_ip:
5331 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5332 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5333 " address" % self.op.secondary_ip,
5336 self.lock_all = self.op.auto_promote and self.might_demote
5337 self.lock_instances = self.op.secondary_ip is not None
5339 def _InstanceFilter(self, instance):
5340 """Filter for getting affected instances.
5343 return (instance.disk_template in constants.DTS_INT_MIRROR and
5344 self.op.node_name in instance.all_nodes)
5346 def ExpandNames(self):
5348 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5350 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5352 # Since modifying a node can have severe effects on currently running
5353 # operations the resource lock is at least acquired in shared mode
5354 self.needed_locks[locking.LEVEL_NODE_RES] = \
5355 self.needed_locks[locking.LEVEL_NODE]
5357 # Get node resource and instance locks in shared mode; they are not used
5358 # for anything but read-only access
5359 self.share_locks[locking.LEVEL_NODE_RES] = 1
5360 self.share_locks[locking.LEVEL_INSTANCE] = 1
5362 if self.lock_instances:
5363 self.needed_locks[locking.LEVEL_INSTANCE] = \
5364 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5366 def BuildHooksEnv(self):
5369 This runs on the master node.
5373 "OP_TARGET": self.op.node_name,
5374 "MASTER_CANDIDATE": str(self.op.master_candidate),
5375 "OFFLINE": str(self.op.offline),
5376 "DRAINED": str(self.op.drained),
5377 "MASTER_CAPABLE": str(self.op.master_capable),
5378 "VM_CAPABLE": str(self.op.vm_capable),
5381 def BuildHooksNodes(self):
5382 """Build hooks nodes.
5385 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5388 def CheckPrereq(self):
5389 """Check prerequisites.
5391 This only checks the instance list against the existing names.
5394 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5396 if self.lock_instances:
5397 affected_instances = \
5398 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5400 # Verify instance locks
5401 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5402 wanted_instances = frozenset(affected_instances.keys())
5403 if wanted_instances - owned_instances:
5404 raise errors.OpPrereqError("Instances affected by changing node %s's"
5405 " secondary IP address have changed since"
5406 " locks were acquired, wanted '%s', have"
5407 " '%s'; retry the operation" %
5409 utils.CommaJoin(wanted_instances),
5410 utils.CommaJoin(owned_instances)),
5413 affected_instances = None
5415 if (self.op.master_candidate is not None or
5416 self.op.drained is not None or
5417 self.op.offline is not None):
5418 # we can't change the master's node flags
5419 if self.op.node_name == self.cfg.GetMasterNode():
5420 raise errors.OpPrereqError("The master role can be changed"
5421 " only via master-failover",
5424 if self.op.master_candidate and not node.master_capable:
5425 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5426 " it a master candidate" % node.name,
5429 if self.op.vm_capable == False:
5430 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5432 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5433 " the vm_capable flag" % node.name,
5436 if node.master_candidate and self.might_demote and not self.lock_all:
5437 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5438 # check if after removing the current node, we're missing master
5440 (mc_remaining, mc_should, _) = \
5441 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5442 if mc_remaining < mc_should:
5443 raise errors.OpPrereqError("Not enough master candidates, please"
5444 " pass auto promote option to allow"
5445 " promotion", errors.ECODE_STATE)
5447 self.old_flags = old_flags = (node.master_candidate,
5448 node.drained, node.offline)
5449 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5450 self.old_role = old_role = self._F2R[old_flags]
5452 # Check for ineffective changes
5453 for attr in self._FLAGS:
5454 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5455 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5456 setattr(self.op, attr, None)
5458 # Past this point, any flag change to False means a transition
5459 # away from the respective state, as only real changes are kept
5461 # TODO: We might query the real power state if it supports OOB
5462 if _SupportsOob(self.cfg, node):
5463 if self.op.offline is False and not (node.powered or
5464 self.op.powered == True):
5465 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5466 " offline status can be reset") %
5468 elif self.op.powered is not None:
5469 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5470 " as it does not support out-of-band"
5471 " handling") % self.op.node_name)
5473 # If we're being deofflined/drained, we'll MC ourself if needed
5474 if (self.op.drained == False or self.op.offline == False or
5475 (self.op.master_capable and not node.master_capable)):
5476 if _DecideSelfPromotion(self):
5477 self.op.master_candidate = True
5478 self.LogInfo("Auto-promoting node to master candidate")
5480 # If we're no longer master capable, we'll demote ourselves from MC
5481 if self.op.master_capable == False and node.master_candidate:
5482 self.LogInfo("Demoting from master candidate")
5483 self.op.master_candidate = False
5486 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5487 if self.op.master_candidate:
5488 new_role = self._ROLE_CANDIDATE
5489 elif self.op.drained:
5490 new_role = self._ROLE_DRAINED
5491 elif self.op.offline:
5492 new_role = self._ROLE_OFFLINE
5493 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5494 # False is still in new flags, which means we're un-setting (the
5496 new_role = self._ROLE_REGULAR
5497 else: # no new flags, nothing, keep old role
5500 self.new_role = new_role
5502 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5503 # Trying to transition out of offline status
5504 # TODO: Use standard RPC runner, but make sure it works when the node is
5505 # still marked offline
5506 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5508 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5509 " to report its version: %s" %
5510 (node.name, result.fail_msg),
5513 self.LogWarning("Transitioning node from offline to online state"
5514 " without using re-add. Please make sure the node"
5517 if self.op.secondary_ip:
5518 # Ok even without locking, because this can't be changed by any LU
5519 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5520 master_singlehomed = master.secondary_ip == master.primary_ip
5521 if master_singlehomed and self.op.secondary_ip:
5522 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5523 " homed cluster", errors.ECODE_INVAL)
5525 assert not (frozenset(affected_instances) -
5526 self.owned_locks(locking.LEVEL_INSTANCE))
5529 if affected_instances:
5530 raise errors.OpPrereqError("Cannot change secondary IP address:"
5531 " offline node has instances (%s)"
5532 " configured to use it" %
5533 utils.CommaJoin(affected_instances.keys()))
5535 # On online nodes, check that no instances are running, and that
5536 # the node has the new ip and we can reach it.
5537 for instance in affected_instances.values():
5538 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5539 msg="cannot change secondary ip")
5541 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5542 if master.name != node.name:
5543 # check reachability from master secondary ip to new secondary ip
5544 if not netutils.TcpPing(self.op.secondary_ip,
5545 constants.DEFAULT_NODED_PORT,
5546 source=master.secondary_ip):
5547 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5548 " based ping to node daemon port",
5549 errors.ECODE_ENVIRON)
5551 if self.op.ndparams:
5552 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5553 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5554 self.new_ndparams = new_ndparams
5556 def Exec(self, feedback_fn):
5561 old_role = self.old_role
5562 new_role = self.new_role
5566 if self.op.ndparams:
5567 node.ndparams = self.new_ndparams
5569 if self.op.powered is not None:
5570 node.powered = self.op.powered
5572 for attr in ["master_capable", "vm_capable"]:
5573 val = getattr(self.op, attr)
5575 setattr(node, attr, val)
5576 result.append((attr, str(val)))
5578 if new_role != old_role:
5579 # Tell the node to demote itself, if no longer MC and not offline
5580 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5581 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5583 self.LogWarning("Node failed to demote itself: %s", msg)
5585 new_flags = self._R2F[new_role]
5586 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5588 result.append((desc, str(nf)))
5589 (node.master_candidate, node.drained, node.offline) = new_flags
5591 # we locked all nodes, we adjust the CP before updating this node
5593 _AdjustCandidatePool(self, [node.name])
5595 if self.op.secondary_ip:
5596 node.secondary_ip = self.op.secondary_ip
5597 result.append(("secondary_ip", self.op.secondary_ip))
5599 # this will trigger configuration file update, if needed
5600 self.cfg.Update(node, feedback_fn)
5602 # this will trigger job queue propagation or cleanup if the mc
5604 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5605 self.context.ReaddNode(node)
5610 class LUNodePowercycle(NoHooksLU):
5611 """Powercycles a node.
5616 def CheckArguments(self):
5617 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5618 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5619 raise errors.OpPrereqError("The node is the master and the force"
5620 " parameter was not set",
5623 def ExpandNames(self):
5624 """Locking for PowercycleNode.
5626 This is a last-resort option and shouldn't block on other
5627 jobs. Therefore, we grab no locks.
5630 self.needed_locks = {}
5632 def Exec(self, feedback_fn):
5636 result = self.rpc.call_node_powercycle(self.op.node_name,
5637 self.cfg.GetHypervisorType())
5638 result.Raise("Failed to schedule the reboot")
5639 return result.payload
5642 class LUClusterQuery(NoHooksLU):
5643 """Query cluster configuration.
5648 def ExpandNames(self):
5649 self.needed_locks = {}
5651 def Exec(self, feedback_fn):
5652 """Return cluster config.
5655 cluster = self.cfg.GetClusterInfo()
5658 # Filter just for enabled hypervisors
5659 for os_name, hv_dict in cluster.os_hvp.items():
5660 os_hvp[os_name] = {}
5661 for hv_name, hv_params in hv_dict.items():
5662 if hv_name in cluster.enabled_hypervisors:
5663 os_hvp[os_name][hv_name] = hv_params
5665 # Convert ip_family to ip_version
5666 primary_ip_version = constants.IP4_VERSION
5667 if cluster.primary_ip_family == netutils.IP6Address.family:
5668 primary_ip_version = constants.IP6_VERSION
5671 "software_version": constants.RELEASE_VERSION,
5672 "protocol_version": constants.PROTOCOL_VERSION,
5673 "config_version": constants.CONFIG_VERSION,
5674 "os_api_version": max(constants.OS_API_VERSIONS),
5675 "export_version": constants.EXPORT_VERSION,
5676 "architecture": (platform.architecture()[0], platform.machine()),
5677 "name": cluster.cluster_name,
5678 "master": cluster.master_node,
5679 "default_hypervisor": cluster.enabled_hypervisors[0],
5680 "enabled_hypervisors": cluster.enabled_hypervisors,
5681 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5682 for hypervisor_name in cluster.enabled_hypervisors]),
5684 "beparams": cluster.beparams,
5685 "osparams": cluster.osparams,
5686 "nicparams": cluster.nicparams,
5687 "ndparams": cluster.ndparams,
5688 "candidate_pool_size": cluster.candidate_pool_size,
5689 "master_netdev": cluster.master_netdev,
5690 "master_netmask": cluster.master_netmask,
5691 "use_external_mip_script": cluster.use_external_mip_script,
5692 "volume_group_name": cluster.volume_group_name,
5693 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5694 "file_storage_dir": cluster.file_storage_dir,
5695 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5696 "maintain_node_health": cluster.maintain_node_health,
5697 "ctime": cluster.ctime,
5698 "mtime": cluster.mtime,
5699 "uuid": cluster.uuid,
5700 "tags": list(cluster.GetTags()),
5701 "uid_pool": cluster.uid_pool,
5702 "default_iallocator": cluster.default_iallocator,
5703 "reserved_lvs": cluster.reserved_lvs,
5704 "primary_ip_version": primary_ip_version,
5705 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5706 "hidden_os": cluster.hidden_os,
5707 "blacklisted_os": cluster.blacklisted_os,
5713 class LUClusterConfigQuery(NoHooksLU):
5714 """Return configuration values.
5718 _FIELDS_DYNAMIC = utils.FieldSet()
5719 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5720 "watcher_pause", "volume_group_name")
5722 def CheckArguments(self):
5723 _CheckOutputFields(static=self._FIELDS_STATIC,
5724 dynamic=self._FIELDS_DYNAMIC,
5725 selected=self.op.output_fields)
5727 def ExpandNames(self):
5728 self.needed_locks = {}
5730 def Exec(self, feedback_fn):
5731 """Dump a representation of the cluster config to the standard output.
5735 for field in self.op.output_fields:
5736 if field == "cluster_name":
5737 entry = self.cfg.GetClusterName()
5738 elif field == "master_node":
5739 entry = self.cfg.GetMasterNode()
5740 elif field == "drain_flag":
5741 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5742 elif field == "watcher_pause":
5743 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5744 elif field == "volume_group_name":
5745 entry = self.cfg.GetVGName()
5747 raise errors.ParameterError(field)
5748 values.append(entry)
5752 class LUInstanceActivateDisks(NoHooksLU):
5753 """Bring up an instance's disks.
5758 def ExpandNames(self):
5759 self._ExpandAndLockInstance()
5760 self.needed_locks[locking.LEVEL_NODE] = []
5761 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5763 def DeclareLocks(self, level):
5764 if level == locking.LEVEL_NODE:
5765 self._LockInstancesNodes()
5767 def CheckPrereq(self):
5768 """Check prerequisites.
5770 This checks that the instance is in the cluster.
5773 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5774 assert self.instance is not None, \
5775 "Cannot retrieve locked instance %s" % self.op.instance_name
5776 _CheckNodeOnline(self, self.instance.primary_node)
5778 def Exec(self, feedback_fn):
5779 """Activate the disks.
5782 disks_ok, disks_info = \
5783 _AssembleInstanceDisks(self, self.instance,
5784 ignore_size=self.op.ignore_size)
5786 raise errors.OpExecError("Cannot activate block devices")
5791 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5793 """Prepare the block devices for an instance.
5795 This sets up the block devices on all nodes.
5797 @type lu: L{LogicalUnit}
5798 @param lu: the logical unit on whose behalf we execute
5799 @type instance: L{objects.Instance}
5800 @param instance: the instance for whose disks we assemble
5801 @type disks: list of L{objects.Disk} or None
5802 @param disks: which disks to assemble (or all, if None)
5803 @type ignore_secondaries: boolean
5804 @param ignore_secondaries: if true, errors on secondary nodes
5805 won't result in an error return from the function
5806 @type ignore_size: boolean
5807 @param ignore_size: if true, the current known size of the disk
5808 will not be used during the disk activation, useful for cases
5809 when the size is wrong
5810 @return: False if the operation failed, otherwise a list of
5811 (host, instance_visible_name, node_visible_name)
5812 with the mapping from node devices to instance devices
5817 iname = instance.name
5818 disks = _ExpandCheckDisks(instance, disks)
5820 # With the two passes mechanism we try to reduce the window of
5821 # opportunity for the race condition of switching DRBD to primary
5822 # before handshaking occured, but we do not eliminate it
5824 # The proper fix would be to wait (with some limits) until the
5825 # connection has been made and drbd transitions from WFConnection
5826 # into any other network-connected state (Connected, SyncTarget,
5829 # 1st pass, assemble on all nodes in secondary mode
5830 for idx, inst_disk in enumerate(disks):
5831 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5833 node_disk = node_disk.Copy()
5834 node_disk.UnsetSize()
5835 lu.cfg.SetDiskID(node_disk, node)
5836 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5837 msg = result.fail_msg
5839 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5840 " (is_primary=False, pass=1): %s",
5841 inst_disk.iv_name, node, msg)
5842 if not ignore_secondaries:
5845 # FIXME: race condition on drbd migration to primary
5847 # 2nd pass, do only the primary node
5848 for idx, inst_disk in enumerate(disks):
5851 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5852 if node != instance.primary_node:
5855 node_disk = node_disk.Copy()
5856 node_disk.UnsetSize()
5857 lu.cfg.SetDiskID(node_disk, node)
5858 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5859 msg = result.fail_msg
5861 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5862 " (is_primary=True, pass=2): %s",
5863 inst_disk.iv_name, node, msg)
5866 dev_path = result.payload
5868 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5870 # leave the disks configured for the primary node
5871 # this is a workaround that would be fixed better by
5872 # improving the logical/physical id handling
5874 lu.cfg.SetDiskID(disk, instance.primary_node)
5876 return disks_ok, device_info
5879 def _StartInstanceDisks(lu, instance, force):
5880 """Start the disks of an instance.
5883 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5884 ignore_secondaries=force)
5886 _ShutdownInstanceDisks(lu, instance)
5887 if force is not None and not force:
5888 lu.proc.LogWarning("", hint="If the message above refers to a"
5890 " you can retry the operation using '--force'.")
5891 raise errors.OpExecError("Disk consistency error")
5894 class LUInstanceDeactivateDisks(NoHooksLU):
5895 """Shutdown an instance's disks.
5900 def ExpandNames(self):
5901 self._ExpandAndLockInstance()
5902 self.needed_locks[locking.LEVEL_NODE] = []
5903 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5905 def DeclareLocks(self, level):
5906 if level == locking.LEVEL_NODE:
5907 self._LockInstancesNodes()
5909 def CheckPrereq(self):
5910 """Check prerequisites.
5912 This checks that the instance is in the cluster.
5915 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5916 assert self.instance is not None, \
5917 "Cannot retrieve locked instance %s" % self.op.instance_name
5919 def Exec(self, feedback_fn):
5920 """Deactivate the disks
5923 instance = self.instance
5925 _ShutdownInstanceDisks(self, instance)
5927 _SafeShutdownInstanceDisks(self, instance)
5930 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5931 """Shutdown block devices of an instance.
5933 This function checks if an instance is running, before calling
5934 _ShutdownInstanceDisks.
5937 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5938 _ShutdownInstanceDisks(lu, instance, disks=disks)
5941 def _ExpandCheckDisks(instance, disks):
5942 """Return the instance disks selected by the disks list
5944 @type disks: list of L{objects.Disk} or None
5945 @param disks: selected disks
5946 @rtype: list of L{objects.Disk}
5947 @return: selected instance disks to act on
5951 return instance.disks
5953 if not set(disks).issubset(instance.disks):
5954 raise errors.ProgrammerError("Can only act on disks belonging to the"
5959 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5960 """Shutdown block devices of an instance.
5962 This does the shutdown on all nodes of the instance.
5964 If the ignore_primary is false, errors on the primary node are
5969 disks = _ExpandCheckDisks(instance, disks)
5972 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5973 lu.cfg.SetDiskID(top_disk, node)
5974 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5975 msg = result.fail_msg
5977 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5978 disk.iv_name, node, msg)
5979 if ((node == instance.primary_node and not ignore_primary) or
5980 (node != instance.primary_node and not result.offline)):
5985 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5986 """Checks if a node has enough free memory.
5988 This function check if a given node has the needed amount of free
5989 memory. In case the node has less memory or we cannot get the
5990 information from the node, this function raise an OpPrereqError
5993 @type lu: C{LogicalUnit}
5994 @param lu: a logical unit from which we get configuration data
5996 @param node: the node to check
5997 @type reason: C{str}
5998 @param reason: string to use in the error message
5999 @type requested: C{int}
6000 @param requested: the amount of memory in MiB to check for
6001 @type hypervisor_name: C{str}
6002 @param hypervisor_name: the hypervisor to ask for memory stats
6003 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6004 we cannot check the node
6007 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6008 nodeinfo[node].Raise("Can't get data from node %s" % node,
6009 prereq=True, ecode=errors.ECODE_ENVIRON)
6010 free_mem = nodeinfo[node].payload.get("memory_free", None)
6011 if not isinstance(free_mem, int):
6012 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6013 " was '%s'" % (node, free_mem),
6014 errors.ECODE_ENVIRON)
6015 if requested > free_mem:
6016 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6017 " needed %s MiB, available %s MiB" %
6018 (node, reason, requested, free_mem),
6022 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6023 """Checks if nodes have enough free disk space in the all VGs.
6025 This function check if all given nodes have the needed amount of
6026 free disk. In case any node has less disk or we cannot get the
6027 information from the node, this function raise an OpPrereqError
6030 @type lu: C{LogicalUnit}
6031 @param lu: a logical unit from which we get configuration data
6032 @type nodenames: C{list}
6033 @param nodenames: the list of node names to check
6034 @type req_sizes: C{dict}
6035 @param req_sizes: the hash of vg and corresponding amount of disk in
6037 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6038 or we cannot check the node
6041 for vg, req_size in req_sizes.items():
6042 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6045 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6046 """Checks if nodes have enough free disk space in the specified VG.
6048 This function check if all given nodes have the needed amount of
6049 free disk. In case any node has less disk or we cannot get the
6050 information from the node, this function raise an OpPrereqError
6053 @type lu: C{LogicalUnit}
6054 @param lu: a logical unit from which we get configuration data
6055 @type nodenames: C{list}
6056 @param nodenames: the list of node names to check
6058 @param vg: the volume group to check
6059 @type requested: C{int}
6060 @param requested: the amount of disk in MiB to check for
6061 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6062 or we cannot check the node
6065 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6066 for node in nodenames:
6067 info = nodeinfo[node]
6068 info.Raise("Cannot get current information from node %s" % node,
6069 prereq=True, ecode=errors.ECODE_ENVIRON)
6070 vg_free = info.payload.get("vg_free", None)
6071 if not isinstance(vg_free, int):
6072 raise errors.OpPrereqError("Can't compute free disk space on node"
6073 " %s for vg %s, result was '%s'" %
6074 (node, vg, vg_free), errors.ECODE_ENVIRON)
6075 if requested > vg_free:
6076 raise errors.OpPrereqError("Not enough disk space on target node %s"
6077 " vg %s: required %d MiB, available %d MiB" %
6078 (node, vg, requested, vg_free),
6082 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6083 """Checks if nodes have enough physical CPUs
6085 This function checks if all given nodes have the needed number of
6086 physical CPUs. In case any node has less CPUs or we cannot get the
6087 information from the node, this function raises an OpPrereqError
6090 @type lu: C{LogicalUnit}
6091 @param lu: a logical unit from which we get configuration data
6092 @type nodenames: C{list}
6093 @param nodenames: the list of node names to check
6094 @type requested: C{int}
6095 @param requested: the minimum acceptable number of physical CPUs
6096 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6097 or we cannot check the node
6100 nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6101 for node in nodenames:
6102 info = nodeinfo[node]
6103 info.Raise("Cannot get current information from node %s" % node,
6104 prereq=True, ecode=errors.ECODE_ENVIRON)
6105 num_cpus = info.payload.get("cpu_total", None)
6106 if not isinstance(num_cpus, int):
6107 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6108 " on node %s, result was '%s'" %
6109 (node, num_cpus), errors.ECODE_ENVIRON)
6110 if requested > num_cpus:
6111 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6112 "required" % (node, num_cpus, requested),
6116 class LUInstanceStartup(LogicalUnit):
6117 """Starts an instance.
6120 HPATH = "instance-start"
6121 HTYPE = constants.HTYPE_INSTANCE
6124 def CheckArguments(self):
6126 if self.op.beparams:
6127 # fill the beparams dict
6128 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6130 def ExpandNames(self):
6131 self._ExpandAndLockInstance()
6133 def BuildHooksEnv(self):
6136 This runs on master, primary and secondary nodes of the instance.
6140 "FORCE": self.op.force,
6143 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6147 def BuildHooksNodes(self):
6148 """Build hooks nodes.
6151 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6154 def CheckPrereq(self):
6155 """Check prerequisites.
6157 This checks that the instance is in the cluster.
6160 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6161 assert self.instance is not None, \
6162 "Cannot retrieve locked instance %s" % self.op.instance_name
6165 if self.op.hvparams:
6166 # check hypervisor parameter syntax (locally)
6167 cluster = self.cfg.GetClusterInfo()
6168 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6169 filled_hvp = cluster.FillHV(instance)
6170 filled_hvp.update(self.op.hvparams)
6171 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6172 hv_type.CheckParameterSyntax(filled_hvp)
6173 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6175 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6177 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6179 if self.primary_offline and self.op.ignore_offline_nodes:
6180 self.proc.LogWarning("Ignoring offline primary node")
6182 if self.op.hvparams or self.op.beparams:
6183 self.proc.LogWarning("Overridden parameters are ignored")
6185 _CheckNodeOnline(self, instance.primary_node)
6187 bep = self.cfg.GetClusterInfo().FillBE(instance)
6189 # check bridges existence
6190 _CheckInstanceBridgesExist(self, instance)
6192 remote_info = self.rpc.call_instance_info(instance.primary_node,
6194 instance.hypervisor)
6195 remote_info.Raise("Error checking node %s" % instance.primary_node,
6196 prereq=True, ecode=errors.ECODE_ENVIRON)
6197 if not remote_info.payload: # not running already
6198 _CheckNodeFreeMemory(self, instance.primary_node,
6199 "starting instance %s" % instance.name,
6200 bep[constants.BE_MEMORY], instance.hypervisor)
6202 def Exec(self, feedback_fn):
6203 """Start the instance.
6206 instance = self.instance
6207 force = self.op.force
6209 if not self.op.no_remember:
6210 self.cfg.MarkInstanceUp(instance.name)
6212 if self.primary_offline:
6213 assert self.op.ignore_offline_nodes
6214 self.proc.LogInfo("Primary node offline, marked instance as started")
6216 node_current = instance.primary_node
6218 _StartInstanceDisks(self, instance, force)
6221 self.rpc.call_instance_start(node_current,
6222 (instance, self.op.hvparams,
6224 self.op.startup_paused)
6225 msg = result.fail_msg
6227 _ShutdownInstanceDisks(self, instance)
6228 raise errors.OpExecError("Could not start instance: %s" % msg)
6231 class LUInstanceReboot(LogicalUnit):
6232 """Reboot an instance.
6235 HPATH = "instance-reboot"
6236 HTYPE = constants.HTYPE_INSTANCE
6239 def ExpandNames(self):
6240 self._ExpandAndLockInstance()
6242 def BuildHooksEnv(self):
6245 This runs on master, primary and secondary nodes of the instance.
6249 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6250 "REBOOT_TYPE": self.op.reboot_type,
6251 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6254 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6258 def BuildHooksNodes(self):
6259 """Build hooks nodes.
6262 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6265 def CheckPrereq(self):
6266 """Check prerequisites.
6268 This checks that the instance is in the cluster.
6271 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6272 assert self.instance is not None, \
6273 "Cannot retrieve locked instance %s" % self.op.instance_name
6274 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6275 _CheckNodeOnline(self, instance.primary_node)
6277 # check bridges existence
6278 _CheckInstanceBridgesExist(self, instance)
6280 def Exec(self, feedback_fn):
6281 """Reboot the instance.
6284 instance = self.instance
6285 ignore_secondaries = self.op.ignore_secondaries
6286 reboot_type = self.op.reboot_type
6288 remote_info = self.rpc.call_instance_info(instance.primary_node,
6290 instance.hypervisor)
6291 remote_info.Raise("Error checking node %s" % instance.primary_node)
6292 instance_running = bool(remote_info.payload)
6294 node_current = instance.primary_node
6296 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6297 constants.INSTANCE_REBOOT_HARD]:
6298 for disk in instance.disks:
6299 self.cfg.SetDiskID(disk, node_current)
6300 result = self.rpc.call_instance_reboot(node_current, instance,
6302 self.op.shutdown_timeout)
6303 result.Raise("Could not reboot instance")
6305 if instance_running:
6306 result = self.rpc.call_instance_shutdown(node_current, instance,
6307 self.op.shutdown_timeout)
6308 result.Raise("Could not shutdown instance for full reboot")
6309 _ShutdownInstanceDisks(self, instance)
6311 self.LogInfo("Instance %s was already stopped, starting now",
6313 _StartInstanceDisks(self, instance, ignore_secondaries)
6314 result = self.rpc.call_instance_start(node_current,
6315 (instance, None, None), False)
6316 msg = result.fail_msg
6318 _ShutdownInstanceDisks(self, instance)
6319 raise errors.OpExecError("Could not start instance for"
6320 " full reboot: %s" % msg)
6322 self.cfg.MarkInstanceUp(instance.name)
6325 class LUInstanceShutdown(LogicalUnit):
6326 """Shutdown an instance.
6329 HPATH = "instance-stop"
6330 HTYPE = constants.HTYPE_INSTANCE
6333 def ExpandNames(self):
6334 self._ExpandAndLockInstance()
6336 def BuildHooksEnv(self):
6339 This runs on master, primary and secondary nodes of the instance.
6342 env = _BuildInstanceHookEnvByObject(self, self.instance)
6343 env["TIMEOUT"] = self.op.timeout
6346 def BuildHooksNodes(self):
6347 """Build hooks nodes.
6350 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6353 def CheckPrereq(self):
6354 """Check prerequisites.
6356 This checks that the instance is in the cluster.
6359 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6360 assert self.instance is not None, \
6361 "Cannot retrieve locked instance %s" % self.op.instance_name
6363 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6365 self.primary_offline = \
6366 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6368 if self.primary_offline and self.op.ignore_offline_nodes:
6369 self.proc.LogWarning("Ignoring offline primary node")
6371 _CheckNodeOnline(self, self.instance.primary_node)
6373 def Exec(self, feedback_fn):
6374 """Shutdown the instance.
6377 instance = self.instance
6378 node_current = instance.primary_node
6379 timeout = self.op.timeout
6381 if not self.op.no_remember:
6382 self.cfg.MarkInstanceDown(instance.name)
6384 if self.primary_offline:
6385 assert self.op.ignore_offline_nodes
6386 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6388 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6389 msg = result.fail_msg
6391 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6393 _ShutdownInstanceDisks(self, instance)
6396 class LUInstanceReinstall(LogicalUnit):
6397 """Reinstall an instance.
6400 HPATH = "instance-reinstall"
6401 HTYPE = constants.HTYPE_INSTANCE
6404 def ExpandNames(self):
6405 self._ExpandAndLockInstance()
6407 def BuildHooksEnv(self):
6410 This runs on master, primary and secondary nodes of the instance.
6413 return _BuildInstanceHookEnvByObject(self, self.instance)
6415 def BuildHooksNodes(self):
6416 """Build hooks nodes.
6419 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6422 def CheckPrereq(self):
6423 """Check prerequisites.
6425 This checks that the instance is in the cluster and is not running.
6428 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6429 assert instance is not None, \
6430 "Cannot retrieve locked instance %s" % self.op.instance_name
6431 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6432 " offline, cannot reinstall")
6433 for node in instance.secondary_nodes:
6434 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6435 " cannot reinstall")
6437 if instance.disk_template == constants.DT_DISKLESS:
6438 raise errors.OpPrereqError("Instance '%s' has no disks" %
6439 self.op.instance_name,
6441 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6443 if self.op.os_type is not None:
6445 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6446 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6447 instance_os = self.op.os_type
6449 instance_os = instance.os
6451 nodelist = list(instance.all_nodes)
6453 if self.op.osparams:
6454 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6455 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6456 self.os_inst = i_osdict # the new dict (without defaults)
6460 self.instance = instance
6462 def Exec(self, feedback_fn):
6463 """Reinstall the instance.
6466 inst = self.instance
6468 if self.op.os_type is not None:
6469 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6470 inst.os = self.op.os_type
6471 # Write to configuration
6472 self.cfg.Update(inst, feedback_fn)
6474 _StartInstanceDisks(self, inst, None)
6476 feedback_fn("Running the instance OS create scripts...")
6477 # FIXME: pass debug option from opcode to backend
6478 result = self.rpc.call_instance_os_add(inst.primary_node,
6479 (inst, self.os_inst), True,
6480 self.op.debug_level)
6481 result.Raise("Could not install OS for instance %s on node %s" %
6482 (inst.name, inst.primary_node))
6484 _ShutdownInstanceDisks(self, inst)
6487 class LUInstanceRecreateDisks(LogicalUnit):
6488 """Recreate an instance's missing disks.
6491 HPATH = "instance-recreate-disks"
6492 HTYPE = constants.HTYPE_INSTANCE
6495 def CheckArguments(self):
6496 # normalise the disk list
6497 self.op.disks = sorted(frozenset(self.op.disks))
6499 def ExpandNames(self):
6500 self._ExpandAndLockInstance()
6501 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6503 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6504 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6506 self.needed_locks[locking.LEVEL_NODE] = []
6508 def DeclareLocks(self, level):
6509 if level == locking.LEVEL_NODE:
6510 # if we replace the nodes, we only need to lock the old primary,
6511 # otherwise we need to lock all nodes for disk re-creation
6512 primary_only = bool(self.op.nodes)
6513 self._LockInstancesNodes(primary_only=primary_only)
6514 elif level == locking.LEVEL_NODE_RES:
6516 self.needed_locks[locking.LEVEL_NODE_RES] = \
6517 self.needed_locks[locking.LEVEL_NODE][:]
6519 def BuildHooksEnv(self):
6522 This runs on master, primary and secondary nodes of the instance.
6525 return _BuildInstanceHookEnvByObject(self, self.instance)
6527 def BuildHooksNodes(self):
6528 """Build hooks nodes.
6531 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6534 def CheckPrereq(self):
6535 """Check prerequisites.
6537 This checks that the instance is in the cluster and is not running.
6540 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6541 assert instance is not None, \
6542 "Cannot retrieve locked instance %s" % self.op.instance_name
6544 if len(self.op.nodes) != len(instance.all_nodes):
6545 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6546 " %d replacement nodes were specified" %
6547 (instance.name, len(instance.all_nodes),
6548 len(self.op.nodes)),
6550 assert instance.disk_template != constants.DT_DRBD8 or \
6551 len(self.op.nodes) == 2
6552 assert instance.disk_template != constants.DT_PLAIN or \
6553 len(self.op.nodes) == 1
6554 primary_node = self.op.nodes[0]
6556 primary_node = instance.primary_node
6557 _CheckNodeOnline(self, primary_node)
6559 if instance.disk_template == constants.DT_DISKLESS:
6560 raise errors.OpPrereqError("Instance '%s' has no disks" %
6561 self.op.instance_name, errors.ECODE_INVAL)
6562 # if we replace nodes *and* the old primary is offline, we don't
6564 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6565 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6566 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6567 if not (self.op.nodes and old_pnode.offline):
6568 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6569 msg="cannot recreate disks")
6571 if not self.op.disks:
6572 self.op.disks = range(len(instance.disks))
6574 for idx in self.op.disks:
6575 if idx >= len(instance.disks):
6576 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6578 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6579 raise errors.OpPrereqError("Can't recreate disks partially and"
6580 " change the nodes at the same time",
6582 self.instance = instance
6584 def Exec(self, feedback_fn):
6585 """Recreate the disks.
6588 instance = self.instance
6590 assert (self.owned_locks(locking.LEVEL_NODE) ==
6591 self.owned_locks(locking.LEVEL_NODE_RES))
6594 mods = [] # keeps track of needed logical_id changes
6596 for idx, disk in enumerate(instance.disks):
6597 if idx not in self.op.disks: # disk idx has not been passed in
6600 # update secondaries for disks, if needed
6602 if disk.dev_type == constants.LD_DRBD8:
6603 # need to update the nodes and minors
6604 assert len(self.op.nodes) == 2
6605 assert len(disk.logical_id) == 6 # otherwise disk internals
6607 (_, _, old_port, _, _, old_secret) = disk.logical_id
6608 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6609 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6610 new_minors[0], new_minors[1], old_secret)
6611 assert len(disk.logical_id) == len(new_id)
6612 mods.append((idx, new_id))
6614 # now that we have passed all asserts above, we can apply the mods
6615 # in a single run (to avoid partial changes)
6616 for idx, new_id in mods:
6617 instance.disks[idx].logical_id = new_id
6619 # change primary node, if needed
6621 instance.primary_node = self.op.nodes[0]
6622 self.LogWarning("Changing the instance's nodes, you will have to"
6623 " remove any disks left on the older nodes manually")
6626 self.cfg.Update(instance, feedback_fn)
6628 _CreateDisks(self, instance, to_skip=to_skip)
6631 class LUInstanceRename(LogicalUnit):
6632 """Rename an instance.
6635 HPATH = "instance-rename"
6636 HTYPE = constants.HTYPE_INSTANCE
6638 def CheckArguments(self):
6642 if self.op.ip_check and not self.op.name_check:
6643 # TODO: make the ip check more flexible and not depend on the name check
6644 raise errors.OpPrereqError("IP address check requires a name check",
6647 def BuildHooksEnv(self):
6650 This runs on master, primary and secondary nodes of the instance.
6653 env = _BuildInstanceHookEnvByObject(self, self.instance)
6654 env["INSTANCE_NEW_NAME"] = self.op.new_name
6657 def BuildHooksNodes(self):
6658 """Build hooks nodes.
6661 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6664 def CheckPrereq(self):
6665 """Check prerequisites.
6667 This checks that the instance is in the cluster and is not running.
6670 self.op.instance_name = _ExpandInstanceName(self.cfg,
6671 self.op.instance_name)
6672 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6673 assert instance is not None
6674 _CheckNodeOnline(self, instance.primary_node)
6675 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6676 msg="cannot rename")
6677 self.instance = instance
6679 new_name = self.op.new_name
6680 if self.op.name_check:
6681 hostname = netutils.GetHostname(name=new_name)
6682 if hostname.name != new_name:
6683 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6685 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6686 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6687 " same as given hostname '%s'") %
6688 (hostname.name, self.op.new_name),
6690 new_name = self.op.new_name = hostname.name
6691 if (self.op.ip_check and
6692 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6693 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6694 (hostname.ip, new_name),
6695 errors.ECODE_NOTUNIQUE)
6697 instance_list = self.cfg.GetInstanceList()
6698 if new_name in instance_list and new_name != instance.name:
6699 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6700 new_name, errors.ECODE_EXISTS)
6702 def Exec(self, feedback_fn):
6703 """Rename the instance.
6706 inst = self.instance
6707 old_name = inst.name
6709 rename_file_storage = False
6710 if (inst.disk_template in constants.DTS_FILEBASED and
6711 self.op.new_name != inst.name):
6712 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6713 rename_file_storage = True
6715 self.cfg.RenameInstance(inst.name, self.op.new_name)
6716 # Change the instance lock. This is definitely safe while we hold the BGL.
6717 # Otherwise the new lock would have to be added in acquired mode.
6719 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6720 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6722 # re-read the instance from the configuration after rename
6723 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6725 if rename_file_storage:
6726 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6727 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6728 old_file_storage_dir,
6729 new_file_storage_dir)
6730 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6731 " (but the instance has been renamed in Ganeti)" %
6732 (inst.primary_node, old_file_storage_dir,
6733 new_file_storage_dir))
6735 _StartInstanceDisks(self, inst, None)
6737 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6738 old_name, self.op.debug_level)
6739 msg = result.fail_msg
6741 msg = ("Could not run OS rename script for instance %s on node %s"
6742 " (but the instance has been renamed in Ganeti): %s" %
6743 (inst.name, inst.primary_node, msg))
6744 self.proc.LogWarning(msg)
6746 _ShutdownInstanceDisks(self, inst)
6751 class LUInstanceRemove(LogicalUnit):
6752 """Remove an instance.
6755 HPATH = "instance-remove"
6756 HTYPE = constants.HTYPE_INSTANCE
6759 def ExpandNames(self):
6760 self._ExpandAndLockInstance()
6761 self.needed_locks[locking.LEVEL_NODE] = []
6762 self.needed_locks[locking.LEVEL_NODE_RES] = []
6763 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6765 def DeclareLocks(self, level):
6766 if level == locking.LEVEL_NODE:
6767 self._LockInstancesNodes()
6768 elif level == locking.LEVEL_NODE_RES:
6770 self.needed_locks[locking.LEVEL_NODE_RES] = \
6771 self.needed_locks[locking.LEVEL_NODE][:]
6773 def BuildHooksEnv(self):
6776 This runs on master, primary and secondary nodes of the instance.
6779 env = _BuildInstanceHookEnvByObject(self, self.instance)
6780 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6783 def BuildHooksNodes(self):
6784 """Build hooks nodes.
6787 nl = [self.cfg.GetMasterNode()]
6788 nl_post = list(self.instance.all_nodes) + nl
6789 return (nl, nl_post)
6791 def CheckPrereq(self):
6792 """Check prerequisites.
6794 This checks that the instance is in the cluster.
6797 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6798 assert self.instance is not None, \
6799 "Cannot retrieve locked instance %s" % self.op.instance_name
6801 def Exec(self, feedback_fn):
6802 """Remove the instance.
6805 instance = self.instance
6806 logging.info("Shutting down instance %s on node %s",
6807 instance.name, instance.primary_node)
6809 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6810 self.op.shutdown_timeout)
6811 msg = result.fail_msg
6813 if self.op.ignore_failures:
6814 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6816 raise errors.OpExecError("Could not shutdown instance %s on"
6818 (instance.name, instance.primary_node, msg))
6820 assert (self.owned_locks(locking.LEVEL_NODE) ==
6821 self.owned_locks(locking.LEVEL_NODE_RES))
6822 assert not (set(instance.all_nodes) -
6823 self.owned_locks(locking.LEVEL_NODE)), \
6824 "Not owning correct locks"
6826 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6829 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6830 """Utility function to remove an instance.
6833 logging.info("Removing block devices for instance %s", instance.name)
6835 if not _RemoveDisks(lu, instance):
6836 if not ignore_failures:
6837 raise errors.OpExecError("Can't remove instance's disks")
6838 feedback_fn("Warning: can't remove instance's disks")
6840 logging.info("Removing instance %s out of cluster config", instance.name)
6842 lu.cfg.RemoveInstance(instance.name)
6844 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6845 "Instance lock removal conflict"
6847 # Remove lock for the instance
6848 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6851 class LUInstanceQuery(NoHooksLU):
6852 """Logical unit for querying instances.
6855 # pylint: disable=W0142
6858 def CheckArguments(self):
6859 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6860 self.op.output_fields, self.op.use_locking)
6862 def ExpandNames(self):
6863 self.iq.ExpandNames(self)
6865 def DeclareLocks(self, level):
6866 self.iq.DeclareLocks(self, level)
6868 def Exec(self, feedback_fn):
6869 return self.iq.OldStyleQuery(self)
6872 class LUInstanceFailover(LogicalUnit):
6873 """Failover an instance.
6876 HPATH = "instance-failover"
6877 HTYPE = constants.HTYPE_INSTANCE
6880 def CheckArguments(self):
6881 """Check the arguments.
6884 self.iallocator = getattr(self.op, "iallocator", None)
6885 self.target_node = getattr(self.op, "target_node", None)
6887 def ExpandNames(self):
6888 self._ExpandAndLockInstance()
6890 if self.op.target_node is not None:
6891 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6893 self.needed_locks[locking.LEVEL_NODE] = []
6894 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6896 ignore_consistency = self.op.ignore_consistency
6897 shutdown_timeout = self.op.shutdown_timeout
6898 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6901 ignore_consistency=ignore_consistency,
6902 shutdown_timeout=shutdown_timeout)
6903 self.tasklets = [self._migrater]
6905 def DeclareLocks(self, level):
6906 if level == locking.LEVEL_NODE:
6907 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6908 if instance.disk_template in constants.DTS_EXT_MIRROR:
6909 if self.op.target_node is None:
6910 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6912 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6913 self.op.target_node]
6914 del self.recalculate_locks[locking.LEVEL_NODE]
6916 self._LockInstancesNodes()
6918 def BuildHooksEnv(self):
6921 This runs on master, primary and secondary nodes of the instance.
6924 instance = self._migrater.instance
6925 source_node = instance.primary_node
6926 target_node = self.op.target_node
6928 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6929 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6930 "OLD_PRIMARY": source_node,
6931 "NEW_PRIMARY": target_node,
6934 if instance.disk_template in constants.DTS_INT_MIRROR:
6935 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6936 env["NEW_SECONDARY"] = source_node
6938 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6940 env.update(_BuildInstanceHookEnvByObject(self, instance))
6944 def BuildHooksNodes(self):
6945 """Build hooks nodes.
6948 instance = self._migrater.instance
6949 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6950 return (nl, nl + [instance.primary_node])
6953 class LUInstanceMigrate(LogicalUnit):
6954 """Migrate an instance.
6956 This is migration without shutting down, compared to the failover,
6957 which is done with shutdown.
6960 HPATH = "instance-migrate"
6961 HTYPE = constants.HTYPE_INSTANCE
6964 def ExpandNames(self):
6965 self._ExpandAndLockInstance()
6967 if self.op.target_node is not None:
6968 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6970 self.needed_locks[locking.LEVEL_NODE] = []
6971 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6973 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6974 cleanup=self.op.cleanup,
6976 fallback=self.op.allow_failover)
6977 self.tasklets = [self._migrater]
6979 def DeclareLocks(self, level):
6980 if level == locking.LEVEL_NODE:
6981 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6982 if instance.disk_template in constants.DTS_EXT_MIRROR:
6983 if self.op.target_node is None:
6984 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6986 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6987 self.op.target_node]
6988 del self.recalculate_locks[locking.LEVEL_NODE]
6990 self._LockInstancesNodes()
6992 def BuildHooksEnv(self):
6995 This runs on master, primary and secondary nodes of the instance.
6998 instance = self._migrater.instance
6999 source_node = instance.primary_node
7000 target_node = self.op.target_node
7001 env = _BuildInstanceHookEnvByObject(self, instance)
7003 "MIGRATE_LIVE": self._migrater.live,
7004 "MIGRATE_CLEANUP": self.op.cleanup,
7005 "OLD_PRIMARY": source_node,
7006 "NEW_PRIMARY": target_node,
7009 if instance.disk_template in constants.DTS_INT_MIRROR:
7010 env["OLD_SECONDARY"] = target_node
7011 env["NEW_SECONDARY"] = source_node
7013 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7017 def BuildHooksNodes(self):
7018 """Build hooks nodes.
7021 instance = self._migrater.instance
7022 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7023 return (nl, nl + [instance.primary_node])
7026 class LUInstanceMove(LogicalUnit):
7027 """Move an instance by data-copying.
7030 HPATH = "instance-move"
7031 HTYPE = constants.HTYPE_INSTANCE
7034 def ExpandNames(self):
7035 self._ExpandAndLockInstance()
7036 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7037 self.op.target_node = target_node
7038 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7039 self.needed_locks[locking.LEVEL_NODE_RES] = []
7040 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7042 def DeclareLocks(self, level):
7043 if level == locking.LEVEL_NODE:
7044 self._LockInstancesNodes(primary_only=True)
7045 elif level == locking.LEVEL_NODE_RES:
7047 self.needed_locks[locking.LEVEL_NODE_RES] = \
7048 self.needed_locks[locking.LEVEL_NODE][:]
7050 def BuildHooksEnv(self):
7053 This runs on master, primary and secondary nodes of the instance.
7057 "TARGET_NODE": self.op.target_node,
7058 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7060 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7063 def BuildHooksNodes(self):
7064 """Build hooks nodes.
7068 self.cfg.GetMasterNode(),
7069 self.instance.primary_node,
7070 self.op.target_node,
7074 def CheckPrereq(self):
7075 """Check prerequisites.
7077 This checks that the instance is in the cluster.
7080 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7081 assert self.instance is not None, \
7082 "Cannot retrieve locked instance %s" % self.op.instance_name
7084 node = self.cfg.GetNodeInfo(self.op.target_node)
7085 assert node is not None, \
7086 "Cannot retrieve locked node %s" % self.op.target_node
7088 self.target_node = target_node = node.name
7090 if target_node == instance.primary_node:
7091 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7092 (instance.name, target_node),
7095 bep = self.cfg.GetClusterInfo().FillBE(instance)
7097 for idx, dsk in enumerate(instance.disks):
7098 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7099 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7100 " cannot copy" % idx, errors.ECODE_STATE)
7102 _CheckNodeOnline(self, target_node)
7103 _CheckNodeNotDrained(self, target_node)
7104 _CheckNodeVmCapable(self, target_node)
7106 if instance.admin_state == constants.ADMINST_UP:
7107 # check memory requirements on the secondary node
7108 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7109 instance.name, bep[constants.BE_MEMORY],
7110 instance.hypervisor)
7112 self.LogInfo("Not checking memory on the secondary node as"
7113 " instance will not be started")
7115 # check bridge existance
7116 _CheckInstanceBridgesExist(self, instance, node=target_node)
7118 def Exec(self, feedback_fn):
7119 """Move an instance.
7121 The move is done by shutting it down on its present node, copying
7122 the data over (slow) and starting it on the new node.
7125 instance = self.instance
7127 source_node = instance.primary_node
7128 target_node = self.target_node
7130 self.LogInfo("Shutting down instance %s on source node %s",
7131 instance.name, source_node)
7133 assert (self.owned_locks(locking.LEVEL_NODE) ==
7134 self.owned_locks(locking.LEVEL_NODE_RES))
7136 result = self.rpc.call_instance_shutdown(source_node, instance,
7137 self.op.shutdown_timeout)
7138 msg = result.fail_msg
7140 if self.op.ignore_consistency:
7141 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7142 " Proceeding anyway. Please make sure node"
7143 " %s is down. Error details: %s",
7144 instance.name, source_node, source_node, msg)
7146 raise errors.OpExecError("Could not shutdown instance %s on"
7148 (instance.name, source_node, msg))
7150 # create the target disks
7152 _CreateDisks(self, instance, target_node=target_node)
7153 except errors.OpExecError:
7154 self.LogWarning("Device creation failed, reverting...")
7156 _RemoveDisks(self, instance, target_node=target_node)
7158 self.cfg.ReleaseDRBDMinors(instance.name)
7161 cluster_name = self.cfg.GetClusterInfo().cluster_name
7164 # activate, get path, copy the data over
7165 for idx, disk in enumerate(instance.disks):
7166 self.LogInfo("Copying data for disk %d", idx)
7167 result = self.rpc.call_blockdev_assemble(target_node, disk,
7168 instance.name, True, idx)
7170 self.LogWarning("Can't assemble newly created disk %d: %s",
7171 idx, result.fail_msg)
7172 errs.append(result.fail_msg)
7174 dev_path = result.payload
7175 result = self.rpc.call_blockdev_export(source_node, disk,
7176 target_node, dev_path,
7179 self.LogWarning("Can't copy data over for disk %d: %s",
7180 idx, result.fail_msg)
7181 errs.append(result.fail_msg)
7185 self.LogWarning("Some disks failed to copy, aborting")
7187 _RemoveDisks(self, instance, target_node=target_node)
7189 self.cfg.ReleaseDRBDMinors(instance.name)
7190 raise errors.OpExecError("Errors during disk copy: %s" %
7193 instance.primary_node = target_node
7194 self.cfg.Update(instance, feedback_fn)
7196 self.LogInfo("Removing the disks on the original node")
7197 _RemoveDisks(self, instance, target_node=source_node)
7199 # Only start the instance if it's marked as up
7200 if instance.admin_state == constants.ADMINST_UP:
7201 self.LogInfo("Starting instance %s on node %s",
7202 instance.name, target_node)
7204 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7205 ignore_secondaries=True)
7207 _ShutdownInstanceDisks(self, instance)
7208 raise errors.OpExecError("Can't activate the instance's disks")
7210 result = self.rpc.call_instance_start(target_node,
7211 (instance, None, None), False)
7212 msg = result.fail_msg
7214 _ShutdownInstanceDisks(self, instance)
7215 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7216 (instance.name, target_node, msg))
7219 class LUNodeMigrate(LogicalUnit):
7220 """Migrate all instances from a node.
7223 HPATH = "node-migrate"
7224 HTYPE = constants.HTYPE_NODE
7227 def CheckArguments(self):
7230 def ExpandNames(self):
7231 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7233 self.share_locks = _ShareAll()
7234 self.needed_locks = {
7235 locking.LEVEL_NODE: [self.op.node_name],
7238 def BuildHooksEnv(self):
7241 This runs on the master, the primary and all the secondaries.
7245 "NODE_NAME": self.op.node_name,
7248 def BuildHooksNodes(self):
7249 """Build hooks nodes.
7252 nl = [self.cfg.GetMasterNode()]
7255 def CheckPrereq(self):
7258 def Exec(self, feedback_fn):
7259 # Prepare jobs for migration instances
7261 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7264 iallocator=self.op.iallocator,
7265 target_node=self.op.target_node)]
7266 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7269 # TODO: Run iallocator in this opcode and pass correct placement options to
7270 # OpInstanceMigrate. Since other jobs can modify the cluster between
7271 # running the iallocator and the actual migration, a good consistency model
7272 # will have to be found.
7274 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7275 frozenset([self.op.node_name]))
7277 return ResultWithJobs(jobs)
7280 class TLMigrateInstance(Tasklet):
7281 """Tasklet class for instance migration.
7284 @ivar live: whether the migration will be done live or non-live;
7285 this variable is initalized only after CheckPrereq has run
7286 @type cleanup: boolean
7287 @ivar cleanup: Wheater we cleanup from a failed migration
7288 @type iallocator: string
7289 @ivar iallocator: The iallocator used to determine target_node
7290 @type target_node: string
7291 @ivar target_node: If given, the target_node to reallocate the instance to
7292 @type failover: boolean
7293 @ivar failover: Whether operation results in failover or migration
7294 @type fallback: boolean
7295 @ivar fallback: Whether fallback to failover is allowed if migration not
7297 @type ignore_consistency: boolean
7298 @ivar ignore_consistency: Wheter we should ignore consistency between source
7300 @type shutdown_timeout: int
7301 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7306 _MIGRATION_POLL_INTERVAL = 1 # seconds
7307 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7309 def __init__(self, lu, instance_name, cleanup=False,
7310 failover=False, fallback=False,
7311 ignore_consistency=False,
7312 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7313 """Initializes this class.
7316 Tasklet.__init__(self, lu)
7319 self.instance_name = instance_name
7320 self.cleanup = cleanup
7321 self.live = False # will be overridden later
7322 self.failover = failover
7323 self.fallback = fallback
7324 self.ignore_consistency = ignore_consistency
7325 self.shutdown_timeout = shutdown_timeout
7327 def CheckPrereq(self):
7328 """Check prerequisites.
7330 This checks that the instance is in the cluster.
7333 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7334 instance = self.cfg.GetInstanceInfo(instance_name)
7335 assert instance is not None
7336 self.instance = instance
7338 if (not self.cleanup and
7339 not instance.admin_state == constants.ADMINST_UP and
7340 not self.failover and self.fallback):
7341 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7342 " switching to failover")
7343 self.failover = True
7345 if instance.disk_template not in constants.DTS_MIRRORED:
7350 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7351 " %s" % (instance.disk_template, text),
7354 if instance.disk_template in constants.DTS_EXT_MIRROR:
7355 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7357 if self.lu.op.iallocator:
7358 self._RunAllocator()
7360 # We set set self.target_node as it is required by
7362 self.target_node = self.lu.op.target_node
7364 # self.target_node is already populated, either directly or by the
7366 target_node = self.target_node
7367 if self.target_node == instance.primary_node:
7368 raise errors.OpPrereqError("Cannot migrate instance %s"
7369 " to its primary (%s)" %
7370 (instance.name, instance.primary_node))
7372 if len(self.lu.tasklets) == 1:
7373 # It is safe to release locks only when we're the only tasklet
7375 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7376 keep=[instance.primary_node, self.target_node])
7379 secondary_nodes = instance.secondary_nodes
7380 if not secondary_nodes:
7381 raise errors.ConfigurationError("No secondary node but using"
7382 " %s disk template" %
7383 instance.disk_template)
7384 target_node = secondary_nodes[0]
7385 if self.lu.op.iallocator or (self.lu.op.target_node and
7386 self.lu.op.target_node != target_node):
7388 text = "failed over"
7391 raise errors.OpPrereqError("Instances with disk template %s cannot"
7392 " be %s to arbitrary nodes"
7393 " (neither an iallocator nor a target"
7394 " node can be passed)" %
7395 (instance.disk_template, text),
7398 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7400 # check memory requirements on the secondary node
7401 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7402 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7403 instance.name, i_be[constants.BE_MEMORY],
7404 instance.hypervisor)
7406 self.lu.LogInfo("Not checking memory on the secondary node as"
7407 " instance will not be started")
7409 # check bridge existance
7410 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7412 if not self.cleanup:
7413 _CheckNodeNotDrained(self.lu, target_node)
7414 if not self.failover:
7415 result = self.rpc.call_instance_migratable(instance.primary_node,
7417 if result.fail_msg and self.fallback:
7418 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7420 self.failover = True
7422 result.Raise("Can't migrate, please use failover",
7423 prereq=True, ecode=errors.ECODE_STATE)
7425 assert not (self.failover and self.cleanup)
7427 if not self.failover:
7428 if self.lu.op.live is not None and self.lu.op.mode is not None:
7429 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7430 " parameters are accepted",
7432 if self.lu.op.live is not None:
7434 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7436 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7437 # reset the 'live' parameter to None so that repeated
7438 # invocations of CheckPrereq do not raise an exception
7439 self.lu.op.live = None
7440 elif self.lu.op.mode is None:
7441 # read the default value from the hypervisor
7442 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7444 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7446 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7448 # Failover is never live
7451 def _RunAllocator(self):
7452 """Run the allocator based on input opcode.
7455 ial = IAllocator(self.cfg, self.rpc,
7456 mode=constants.IALLOCATOR_MODE_RELOC,
7457 name=self.instance_name,
7458 # TODO See why hail breaks with a single node below
7459 relocate_from=[self.instance.primary_node,
7460 self.instance.primary_node],
7463 ial.Run(self.lu.op.iallocator)
7466 raise errors.OpPrereqError("Can't compute nodes using"
7467 " iallocator '%s': %s" %
7468 (self.lu.op.iallocator, ial.info),
7470 if len(ial.result) != ial.required_nodes:
7471 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7472 " of nodes (%s), required %s" %
7473 (self.lu.op.iallocator, len(ial.result),
7474 ial.required_nodes), errors.ECODE_FAULT)
7475 self.target_node = ial.result[0]
7476 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7477 self.instance_name, self.lu.op.iallocator,
7478 utils.CommaJoin(ial.result))
7480 def _WaitUntilSync(self):
7481 """Poll with custom rpc for disk sync.
7483 This uses our own step-based rpc call.
7486 self.feedback_fn("* wait until resync is done")
7490 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7492 self.instance.disks)
7494 for node, nres in result.items():
7495 nres.Raise("Cannot resync disks on node %s" % node)
7496 node_done, node_percent = nres.payload
7497 all_done = all_done and node_done
7498 if node_percent is not None:
7499 min_percent = min(min_percent, node_percent)
7501 if min_percent < 100:
7502 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7505 def _EnsureSecondary(self, node):
7506 """Demote a node to secondary.
7509 self.feedback_fn("* switching node %s to secondary mode" % node)
7511 for dev in self.instance.disks:
7512 self.cfg.SetDiskID(dev, node)
7514 result = self.rpc.call_blockdev_close(node, self.instance.name,
7515 self.instance.disks)
7516 result.Raise("Cannot change disk to secondary on node %s" % node)
7518 def _GoStandalone(self):
7519 """Disconnect from the network.
7522 self.feedback_fn("* changing into standalone mode")
7523 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7524 self.instance.disks)
7525 for node, nres in result.items():
7526 nres.Raise("Cannot disconnect disks node %s" % node)
7528 def _GoReconnect(self, multimaster):
7529 """Reconnect to the network.
7535 msg = "single-master"
7536 self.feedback_fn("* changing disks into %s mode" % msg)
7537 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7538 self.instance.disks,
7539 self.instance.name, multimaster)
7540 for node, nres in result.items():
7541 nres.Raise("Cannot change disks config on node %s" % node)
7543 def _ExecCleanup(self):
7544 """Try to cleanup after a failed migration.
7546 The cleanup is done by:
7547 - check that the instance is running only on one node
7548 (and update the config if needed)
7549 - change disks on its secondary node to secondary
7550 - wait until disks are fully synchronized
7551 - disconnect from the network
7552 - change disks into single-master mode
7553 - wait again until disks are fully synchronized
7556 instance = self.instance
7557 target_node = self.target_node
7558 source_node = self.source_node
7560 # check running on only one node
7561 self.feedback_fn("* checking where the instance actually runs"
7562 " (if this hangs, the hypervisor might be in"
7564 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7565 for node, result in ins_l.items():
7566 result.Raise("Can't contact node %s" % node)
7568 runningon_source = instance.name in ins_l[source_node].payload
7569 runningon_target = instance.name in ins_l[target_node].payload
7571 if runningon_source and runningon_target:
7572 raise errors.OpExecError("Instance seems to be running on two nodes,"
7573 " or the hypervisor is confused; you will have"
7574 " to ensure manually that it runs only on one"
7575 " and restart this operation")
7577 if not (runningon_source or runningon_target):
7578 raise errors.OpExecError("Instance does not seem to be running at all;"
7579 " in this case it's safer to repair by"
7580 " running 'gnt-instance stop' to ensure disk"
7581 " shutdown, and then restarting it")
7583 if runningon_target:
7584 # the migration has actually succeeded, we need to update the config
7585 self.feedback_fn("* instance running on secondary node (%s),"
7586 " updating config" % target_node)
7587 instance.primary_node = target_node
7588 self.cfg.Update(instance, self.feedback_fn)
7589 demoted_node = source_node
7591 self.feedback_fn("* instance confirmed to be running on its"
7592 " primary node (%s)" % source_node)
7593 demoted_node = target_node
7595 if instance.disk_template in constants.DTS_INT_MIRROR:
7596 self._EnsureSecondary(demoted_node)
7598 self._WaitUntilSync()
7599 except errors.OpExecError:
7600 # we ignore here errors, since if the device is standalone, it
7601 # won't be able to sync
7603 self._GoStandalone()
7604 self._GoReconnect(False)
7605 self._WaitUntilSync()
7607 self.feedback_fn("* done")
7609 def _RevertDiskStatus(self):
7610 """Try to revert the disk status after a failed migration.
7613 target_node = self.target_node
7614 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7618 self._EnsureSecondary(target_node)
7619 self._GoStandalone()
7620 self._GoReconnect(False)
7621 self._WaitUntilSync()
7622 except errors.OpExecError, err:
7623 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7624 " please try to recover the instance manually;"
7625 " error '%s'" % str(err))
7627 def _AbortMigration(self):
7628 """Call the hypervisor code to abort a started migration.
7631 instance = self.instance
7632 target_node = self.target_node
7633 source_node = self.source_node
7634 migration_info = self.migration_info
7636 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7640 abort_msg = abort_result.fail_msg
7642 logging.error("Aborting migration failed on target node %s: %s",
7643 target_node, abort_msg)
7644 # Don't raise an exception here, as we stil have to try to revert the
7645 # disk status, even if this step failed.
7647 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7648 instance, False, self.live)
7649 abort_msg = abort_result.fail_msg
7651 logging.error("Aborting migration failed on source node %s: %s",
7652 source_node, abort_msg)
7654 def _ExecMigration(self):
7655 """Migrate an instance.
7657 The migrate is done by:
7658 - change the disks into dual-master mode
7659 - wait until disks are fully synchronized again
7660 - migrate the instance
7661 - change disks on the new secondary node (the old primary) to secondary
7662 - wait until disks are fully synchronized
7663 - change disks into single-master mode
7666 instance = self.instance
7667 target_node = self.target_node
7668 source_node = self.source_node
7670 # Check for hypervisor version mismatch and warn the user.
7671 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7672 None, self.instance.hypervisor)
7673 src_info = nodeinfo[source_node]
7674 dst_info = nodeinfo[target_node]
7676 if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7677 (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7678 src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7679 dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7680 if src_version != dst_version:
7681 self.feedback_fn("* warning: hypervisor version mismatch between"
7682 " source (%s) and target (%s) node" %
7683 (src_version, dst_version))
7685 self.feedback_fn("* checking disk consistency between source and target")
7686 for dev in instance.disks:
7687 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7688 raise errors.OpExecError("Disk %s is degraded or not fully"
7689 " synchronized on target node,"
7690 " aborting migration" % dev.iv_name)
7692 # First get the migration information from the remote node
7693 result = self.rpc.call_migration_info(source_node, instance)
7694 msg = result.fail_msg
7696 log_err = ("Failed fetching source migration information from %s: %s" %
7698 logging.error(log_err)
7699 raise errors.OpExecError(log_err)
7701 self.migration_info = migration_info = result.payload
7703 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7704 # Then switch the disks to master/master mode
7705 self._EnsureSecondary(target_node)
7706 self._GoStandalone()
7707 self._GoReconnect(True)
7708 self._WaitUntilSync()
7710 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7711 result = self.rpc.call_accept_instance(target_node,
7714 self.nodes_ip[target_node])
7716 msg = result.fail_msg
7718 logging.error("Instance pre-migration failed, trying to revert"
7719 " disk status: %s", msg)
7720 self.feedback_fn("Pre-migration failed, aborting")
7721 self._AbortMigration()
7722 self._RevertDiskStatus()
7723 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7724 (instance.name, msg))
7726 self.feedback_fn("* migrating instance to %s" % target_node)
7727 result = self.rpc.call_instance_migrate(source_node, instance,
7728 self.nodes_ip[target_node],
7730 msg = result.fail_msg
7732 logging.error("Instance migration failed, trying to revert"
7733 " disk status: %s", msg)
7734 self.feedback_fn("Migration failed, aborting")
7735 self._AbortMigration()
7736 self._RevertDiskStatus()
7737 raise errors.OpExecError("Could not migrate instance %s: %s" %
7738 (instance.name, msg))
7740 self.feedback_fn("* starting memory transfer")
7741 last_feedback = time.time()
7743 result = self.rpc.call_instance_get_migration_status(source_node,
7745 msg = result.fail_msg
7746 ms = result.payload # MigrationStatus instance
7747 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7748 logging.error("Instance migration failed, trying to revert"
7749 " disk status: %s", msg)
7750 self.feedback_fn("Migration failed, aborting")
7751 self._AbortMigration()
7752 self._RevertDiskStatus()
7753 raise errors.OpExecError("Could not migrate instance %s: %s" %
7754 (instance.name, msg))
7756 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7757 self.feedback_fn("* memory transfer complete")
7760 if (utils.TimeoutExpired(last_feedback,
7761 self._MIGRATION_FEEDBACK_INTERVAL) and
7762 ms.transferred_ram is not None):
7763 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7764 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7765 last_feedback = time.time()
7767 time.sleep(self._MIGRATION_POLL_INTERVAL)
7769 result = self.rpc.call_instance_finalize_migration_src(source_node,
7773 msg = result.fail_msg
7775 logging.error("Instance migration succeeded, but finalization failed"
7776 " on the source node: %s", msg)
7777 raise errors.OpExecError("Could not finalize instance migration: %s" %
7780 instance.primary_node = target_node
7782 # distribute new instance config to the other nodes
7783 self.cfg.Update(instance, self.feedback_fn)
7785 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7789 msg = result.fail_msg
7791 logging.error("Instance migration succeeded, but finalization failed"
7792 " on the target node: %s", msg)
7793 raise errors.OpExecError("Could not finalize instance migration: %s" %
7796 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7797 self._EnsureSecondary(source_node)
7798 self._WaitUntilSync()
7799 self._GoStandalone()
7800 self._GoReconnect(False)
7801 self._WaitUntilSync()
7803 self.feedback_fn("* done")
7805 def _ExecFailover(self):
7806 """Failover an instance.
7808 The failover is done by shutting it down on its present node and
7809 starting it on the secondary.
7812 instance = self.instance
7813 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7815 source_node = instance.primary_node
7816 target_node = self.target_node
7818 if instance.admin_state == constants.ADMINST_UP:
7819 self.feedback_fn("* checking disk consistency between source and target")
7820 for dev in instance.disks:
7821 # for drbd, these are drbd over lvm
7822 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7823 if primary_node.offline:
7824 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7826 (primary_node.name, dev.iv_name, target_node))
7827 elif not self.ignore_consistency:
7828 raise errors.OpExecError("Disk %s is degraded on target node,"
7829 " aborting failover" % dev.iv_name)
7831 self.feedback_fn("* not checking disk consistency as instance is not"
7834 self.feedback_fn("* shutting down instance on source node")
7835 logging.info("Shutting down instance %s on node %s",
7836 instance.name, source_node)
7838 result = self.rpc.call_instance_shutdown(source_node, instance,
7839 self.shutdown_timeout)
7840 msg = result.fail_msg
7842 if self.ignore_consistency or primary_node.offline:
7843 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7844 " proceeding anyway; please make sure node"
7845 " %s is down; error details: %s",
7846 instance.name, source_node, source_node, msg)
7848 raise errors.OpExecError("Could not shutdown instance %s on"
7850 (instance.name, source_node, msg))
7852 self.feedback_fn("* deactivating the instance's disks on source node")
7853 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7854 raise errors.OpExecError("Can't shut down the instance's disks")
7856 instance.primary_node = target_node
7857 # distribute new instance config to the other nodes
7858 self.cfg.Update(instance, self.feedback_fn)
7860 # Only start the instance if it's marked as up
7861 if instance.admin_state == constants.ADMINST_UP:
7862 self.feedback_fn("* activating the instance's disks on target node %s" %
7864 logging.info("Starting instance %s on node %s",
7865 instance.name, target_node)
7867 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7868 ignore_secondaries=True)
7870 _ShutdownInstanceDisks(self.lu, instance)
7871 raise errors.OpExecError("Can't activate the instance's disks")
7873 self.feedback_fn("* starting the instance on the target node %s" %
7875 result = self.rpc.call_instance_start(target_node, (instance, None, None),
7877 msg = result.fail_msg
7879 _ShutdownInstanceDisks(self.lu, instance)
7880 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7881 (instance.name, target_node, msg))
7883 def Exec(self, feedback_fn):
7884 """Perform the migration.
7887 self.feedback_fn = feedback_fn
7888 self.source_node = self.instance.primary_node
7890 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7891 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7892 self.target_node = self.instance.secondary_nodes[0]
7893 # Otherwise self.target_node has been populated either
7894 # directly, or through an iallocator.
7896 self.all_nodes = [self.source_node, self.target_node]
7897 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7898 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7901 feedback_fn("Failover instance %s" % self.instance.name)
7902 self._ExecFailover()
7904 feedback_fn("Migrating instance %s" % self.instance.name)
7907 return self._ExecCleanup()
7909 return self._ExecMigration()
7912 def _CreateBlockDev(lu, node, instance, device, force_create,
7914 """Create a tree of block devices on a given node.
7916 If this device type has to be created on secondaries, create it and
7919 If not, just recurse to children keeping the same 'force' value.
7921 @param lu: the lu on whose behalf we execute
7922 @param node: the node on which to create the device
7923 @type instance: L{objects.Instance}
7924 @param instance: the instance which owns the device
7925 @type device: L{objects.Disk}
7926 @param device: the device to create
7927 @type force_create: boolean
7928 @param force_create: whether to force creation of this device; this
7929 will be change to True whenever we find a device which has
7930 CreateOnSecondary() attribute
7931 @param info: the extra 'metadata' we should attach to the device
7932 (this will be represented as a LVM tag)
7933 @type force_open: boolean
7934 @param force_open: this parameter will be passes to the
7935 L{backend.BlockdevCreate} function where it specifies
7936 whether we run on primary or not, and it affects both
7937 the child assembly and the device own Open() execution
7940 if device.CreateOnSecondary():
7944 for child in device.children:
7945 _CreateBlockDev(lu, node, instance, child, force_create,
7948 if not force_create:
7951 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7954 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7955 """Create a single block device on a given node.
7957 This will not recurse over children of the device, so they must be
7960 @param lu: the lu on whose behalf we execute
7961 @param node: the node on which to create the device
7962 @type instance: L{objects.Instance}
7963 @param instance: the instance which owns the device
7964 @type device: L{objects.Disk}
7965 @param device: the device to create
7966 @param info: the extra 'metadata' we should attach to the device
7967 (this will be represented as a LVM tag)
7968 @type force_open: boolean
7969 @param force_open: this parameter will be passes to the
7970 L{backend.BlockdevCreate} function where it specifies
7971 whether we run on primary or not, and it affects both
7972 the child assembly and the device own Open() execution
7975 lu.cfg.SetDiskID(device, node)
7976 result = lu.rpc.call_blockdev_create(node, device, device.size,
7977 instance.name, force_open, info)
7978 result.Raise("Can't create block device %s on"
7979 " node %s for instance %s" % (device, node, instance.name))
7980 if device.physical_id is None:
7981 device.physical_id = result.payload
7984 def _GenerateUniqueNames(lu, exts):
7985 """Generate a suitable LV name.
7987 This will generate a logical volume name for the given instance.
7992 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7993 results.append("%s%s" % (new_id, val))
7997 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7998 iv_name, p_minor, s_minor):
7999 """Generate a drbd8 device complete with its children.
8002 assert len(vgnames) == len(names) == 2
8003 port = lu.cfg.AllocatePort()
8004 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8005 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8006 logical_id=(vgnames[0], names[0]))
8007 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8008 logical_id=(vgnames[1], names[1]))
8009 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8010 logical_id=(primary, secondary, port,
8013 children=[dev_data, dev_meta],
8018 def _GenerateDiskTemplate(lu, template_name,
8019 instance_name, primary_node,
8020 secondary_nodes, disk_info,
8021 file_storage_dir, file_driver,
8022 base_index, feedback_fn):
8023 """Generate the entire disk layout for a given template type.
8026 #TODO: compute space requirements
8028 vgname = lu.cfg.GetVGName()
8029 disk_count = len(disk_info)
8031 if template_name == constants.DT_DISKLESS:
8033 elif template_name == constants.DT_PLAIN:
8034 if len(secondary_nodes) != 0:
8035 raise errors.ProgrammerError("Wrong template configuration")
8037 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8038 for i in range(disk_count)])
8039 for idx, disk in enumerate(disk_info):
8040 disk_index = idx + base_index
8041 vg = disk.get(constants.IDISK_VG, vgname)
8042 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8043 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8044 size=disk[constants.IDISK_SIZE],
8045 logical_id=(vg, names[idx]),
8046 iv_name="disk/%d" % disk_index,
8047 mode=disk[constants.IDISK_MODE])
8048 disks.append(disk_dev)
8049 elif template_name == constants.DT_DRBD8:
8050 if len(secondary_nodes) != 1:
8051 raise errors.ProgrammerError("Wrong template configuration")
8052 remote_node = secondary_nodes[0]
8053 minors = lu.cfg.AllocateDRBDMinor(
8054 [primary_node, remote_node] * len(disk_info), instance_name)
8057 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8058 for i in range(disk_count)]):
8059 names.append(lv_prefix + "_data")
8060 names.append(lv_prefix + "_meta")
8061 for idx, disk in enumerate(disk_info):
8062 disk_index = idx + base_index
8063 data_vg = disk.get(constants.IDISK_VG, vgname)
8064 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8065 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8066 disk[constants.IDISK_SIZE],
8068 names[idx * 2:idx * 2 + 2],
8069 "disk/%d" % disk_index,
8070 minors[idx * 2], minors[idx * 2 + 1])
8071 disk_dev.mode = disk[constants.IDISK_MODE]
8072 disks.append(disk_dev)
8073 elif template_name == constants.DT_FILE:
8074 if len(secondary_nodes) != 0:
8075 raise errors.ProgrammerError("Wrong template configuration")
8077 opcodes.RequireFileStorage()
8079 for idx, disk in enumerate(disk_info):
8080 disk_index = idx + base_index
8081 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8082 size=disk[constants.IDISK_SIZE],
8083 iv_name="disk/%d" % disk_index,
8084 logical_id=(file_driver,
8085 "%s/disk%d" % (file_storage_dir,
8087 mode=disk[constants.IDISK_MODE])
8088 disks.append(disk_dev)
8089 elif template_name == constants.DT_SHARED_FILE:
8090 if len(secondary_nodes) != 0:
8091 raise errors.ProgrammerError("Wrong template configuration")
8093 opcodes.RequireSharedFileStorage()
8095 for idx, disk in enumerate(disk_info):
8096 disk_index = idx + base_index
8097 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8098 size=disk[constants.IDISK_SIZE],
8099 iv_name="disk/%d" % disk_index,
8100 logical_id=(file_driver,
8101 "%s/disk%d" % (file_storage_dir,
8103 mode=disk[constants.IDISK_MODE])
8104 disks.append(disk_dev)
8105 elif template_name == constants.DT_BLOCK:
8106 if len(secondary_nodes) != 0:
8107 raise errors.ProgrammerError("Wrong template configuration")
8109 for idx, disk in enumerate(disk_info):
8110 disk_index = idx + base_index
8111 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8112 size=disk[constants.IDISK_SIZE],
8113 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8114 disk[constants.IDISK_ADOPT]),
8115 iv_name="disk/%d" % disk_index,
8116 mode=disk[constants.IDISK_MODE])
8117 disks.append(disk_dev)
8120 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8124 def _GetInstanceInfoText(instance):
8125 """Compute that text that should be added to the disk's metadata.
8128 return "originstname+%s" % instance.name
8131 def _CalcEta(time_taken, written, total_size):
8132 """Calculates the ETA based on size written and total size.
8134 @param time_taken: The time taken so far
8135 @param written: amount written so far
8136 @param total_size: The total size of data to be written
8137 @return: The remaining time in seconds
8140 avg_time = time_taken / float(written)
8141 return (total_size - written) * avg_time
8144 def _WipeDisks(lu, instance):
8145 """Wipes instance disks.
8147 @type lu: L{LogicalUnit}
8148 @param lu: the logical unit on whose behalf we execute
8149 @type instance: L{objects.Instance}
8150 @param instance: the instance whose disks we should create
8151 @return: the success of the wipe
8154 node = instance.primary_node
8156 for device in instance.disks:
8157 lu.cfg.SetDiskID(device, node)
8159 logging.info("Pause sync of instance %s disks", instance.name)
8160 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8162 for idx, success in enumerate(result.payload):
8164 logging.warn("pause-sync of instance %s for disks %d failed",
8168 for idx, device in enumerate(instance.disks):
8169 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8170 # MAX_WIPE_CHUNK at max
8171 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8172 constants.MIN_WIPE_CHUNK_PERCENT)
8173 # we _must_ make this an int, otherwise rounding errors will
8175 wipe_chunk_size = int(wipe_chunk_size)
8177 lu.LogInfo("* Wiping disk %d", idx)
8178 logging.info("Wiping disk %d for instance %s, node %s using"
8179 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8184 start_time = time.time()
8186 while offset < size:
8187 wipe_size = min(wipe_chunk_size, size - offset)
8188 logging.debug("Wiping disk %d, offset %s, chunk %s",
8189 idx, offset, wipe_size)
8190 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8191 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8192 (idx, offset, wipe_size))
8195 if now - last_output >= 60:
8196 eta = _CalcEta(now - start_time, offset, size)
8197 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8198 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8201 logging.info("Resume sync of instance %s disks", instance.name)
8203 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8205 for idx, success in enumerate(result.payload):
8207 lu.LogWarning("Resume sync of disk %d failed, please have a"
8208 " look at the status and troubleshoot the issue", idx)
8209 logging.warn("resume-sync of instance %s for disks %d failed",
8213 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8214 """Create all disks for an instance.
8216 This abstracts away some work from AddInstance.
8218 @type lu: L{LogicalUnit}
8219 @param lu: the logical unit on whose behalf we execute
8220 @type instance: L{objects.Instance}
8221 @param instance: the instance whose disks we should create
8223 @param to_skip: list of indices to skip
8224 @type target_node: string
8225 @param target_node: if passed, overrides the target node for creation
8227 @return: the success of the creation
8230 info = _GetInstanceInfoText(instance)
8231 if target_node is None:
8232 pnode = instance.primary_node
8233 all_nodes = instance.all_nodes
8238 if instance.disk_template in constants.DTS_FILEBASED:
8239 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8240 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8242 result.Raise("Failed to create directory '%s' on"
8243 " node %s" % (file_storage_dir, pnode))
8245 # Note: this needs to be kept in sync with adding of disks in
8246 # LUInstanceSetParams
8247 for idx, device in enumerate(instance.disks):
8248 if to_skip and idx in to_skip:
8250 logging.info("Creating volume %s for instance %s",
8251 device.iv_name, instance.name)
8253 for node in all_nodes:
8254 f_create = node == pnode
8255 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8258 def _RemoveDisks(lu, instance, target_node=None):
8259 """Remove all disks for an instance.
8261 This abstracts away some work from `AddInstance()` and
8262 `RemoveInstance()`. Note that in case some of the devices couldn't
8263 be removed, the removal will continue with the other ones (compare
8264 with `_CreateDisks()`).
8266 @type lu: L{LogicalUnit}
8267 @param lu: the logical unit on whose behalf we execute
8268 @type instance: L{objects.Instance}
8269 @param instance: the instance whose disks we should remove
8270 @type target_node: string
8271 @param target_node: used to override the node on which to remove the disks
8273 @return: the success of the removal
8276 logging.info("Removing block devices for instance %s", instance.name)
8279 for device in instance.disks:
8281 edata = [(target_node, device)]
8283 edata = device.ComputeNodeTree(instance.primary_node)
8284 for node, disk in edata:
8285 lu.cfg.SetDiskID(disk, node)
8286 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8288 lu.LogWarning("Could not remove block device %s on node %s,"
8289 " continuing anyway: %s", device.iv_name, node, msg)
8292 # if this is a DRBD disk, return its port to the pool
8293 if device.dev_type in constants.LDS_DRBD:
8294 tcp_port = device.logical_id[2]
8295 lu.cfg.AddTcpUdpPort(tcp_port)
8297 if instance.disk_template == constants.DT_FILE:
8298 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8302 tgt = instance.primary_node
8303 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8305 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8306 file_storage_dir, instance.primary_node, result.fail_msg)
8312 def _ComputeDiskSizePerVG(disk_template, disks):
8313 """Compute disk size requirements in the volume group
8316 def _compute(disks, payload):
8317 """Universal algorithm.
8322 vgs[disk[constants.IDISK_VG]] = \
8323 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8327 # Required free disk space as a function of disk and swap space
8329 constants.DT_DISKLESS: {},
8330 constants.DT_PLAIN: _compute(disks, 0),
8331 # 128 MB are added for drbd metadata for each disk
8332 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8333 constants.DT_FILE: {},
8334 constants.DT_SHARED_FILE: {},
8337 if disk_template not in req_size_dict:
8338 raise errors.ProgrammerError("Disk template '%s' size requirement"
8339 " is unknown" % disk_template)
8341 return req_size_dict[disk_template]
8344 def _ComputeDiskSize(disk_template, disks):
8345 """Compute disk size requirements in the volume group
8348 # Required free disk space as a function of disk and swap space
8350 constants.DT_DISKLESS: None,
8351 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8352 # 128 MB are added for drbd metadata for each disk
8354 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8355 constants.DT_FILE: None,
8356 constants.DT_SHARED_FILE: 0,
8357 constants.DT_BLOCK: 0,
8360 if disk_template not in req_size_dict:
8361 raise errors.ProgrammerError("Disk template '%s' size requirement"
8362 " is unknown" % disk_template)
8364 return req_size_dict[disk_template]
8367 def _FilterVmNodes(lu, nodenames):
8368 """Filters out non-vm_capable nodes from a list.
8370 @type lu: L{LogicalUnit}
8371 @param lu: the logical unit for which we check
8372 @type nodenames: list
8373 @param nodenames: the list of nodes on which we should check
8375 @return: the list of vm-capable nodes
8378 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8379 return [name for name in nodenames if name not in vm_nodes]
8382 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8383 """Hypervisor parameter validation.
8385 This function abstract the hypervisor parameter validation to be
8386 used in both instance create and instance modify.
8388 @type lu: L{LogicalUnit}
8389 @param lu: the logical unit for which we check
8390 @type nodenames: list
8391 @param nodenames: the list of nodes on which we should check
8392 @type hvname: string
8393 @param hvname: the name of the hypervisor we should use
8394 @type hvparams: dict
8395 @param hvparams: the parameters which we need to check
8396 @raise errors.OpPrereqError: if the parameters are not valid
8399 nodenames = _FilterVmNodes(lu, nodenames)
8401 cluster = lu.cfg.GetClusterInfo()
8402 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8404 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8405 for node in nodenames:
8409 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8412 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8413 """OS parameters validation.
8415 @type lu: L{LogicalUnit}
8416 @param lu: the logical unit for which we check
8417 @type required: boolean
8418 @param required: whether the validation should fail if the OS is not
8420 @type nodenames: list
8421 @param nodenames: the list of nodes on which we should check
8422 @type osname: string
8423 @param osname: the name of the hypervisor we should use
8424 @type osparams: dict
8425 @param osparams: the parameters which we need to check
8426 @raise errors.OpPrereqError: if the parameters are not valid
8429 nodenames = _FilterVmNodes(lu, nodenames)
8430 result = lu.rpc.call_os_validate(nodenames, required, osname,
8431 [constants.OS_VALIDATE_PARAMETERS],
8433 for node, nres in result.items():
8434 # we don't check for offline cases since this should be run only
8435 # against the master node and/or an instance's nodes
8436 nres.Raise("OS Parameters validation failed on node %s" % node)
8437 if not nres.payload:
8438 lu.LogInfo("OS %s not found on node %s, validation skipped",
8442 class LUInstanceCreate(LogicalUnit):
8443 """Create an instance.
8446 HPATH = "instance-add"
8447 HTYPE = constants.HTYPE_INSTANCE
8450 def CheckArguments(self):
8454 # do not require name_check to ease forward/backward compatibility
8456 if self.op.no_install and self.op.start:
8457 self.LogInfo("No-installation mode selected, disabling startup")
8458 self.op.start = False
8459 # validate/normalize the instance name
8460 self.op.instance_name = \
8461 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8463 if self.op.ip_check and not self.op.name_check:
8464 # TODO: make the ip check more flexible and not depend on the name check
8465 raise errors.OpPrereqError("Cannot do IP address check without a name"
8466 " check", errors.ECODE_INVAL)
8468 # check nics' parameter names
8469 for nic in self.op.nics:
8470 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8472 # check disks. parameter names and consistent adopt/no-adopt strategy
8473 has_adopt = has_no_adopt = False
8474 for disk in self.op.disks:
8475 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8476 if constants.IDISK_ADOPT in disk:
8480 if has_adopt and has_no_adopt:
8481 raise errors.OpPrereqError("Either all disks are adopted or none is",
8484 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8485 raise errors.OpPrereqError("Disk adoption is not supported for the"
8486 " '%s' disk template" %
8487 self.op.disk_template,
8489 if self.op.iallocator is not None:
8490 raise errors.OpPrereqError("Disk adoption not allowed with an"
8491 " iallocator script", errors.ECODE_INVAL)
8492 if self.op.mode == constants.INSTANCE_IMPORT:
8493 raise errors.OpPrereqError("Disk adoption not allowed for"
8494 " instance import", errors.ECODE_INVAL)
8496 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8497 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8498 " but no 'adopt' parameter given" %
8499 self.op.disk_template,
8502 self.adopt_disks = has_adopt
8504 # instance name verification
8505 if self.op.name_check:
8506 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8507 self.op.instance_name = self.hostname1.name
8508 # used in CheckPrereq for ip ping check
8509 self.check_ip = self.hostname1.ip
8511 self.check_ip = None
8513 # file storage checks
8514 if (self.op.file_driver and
8515 not self.op.file_driver in constants.FILE_DRIVER):
8516 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8517 self.op.file_driver, errors.ECODE_INVAL)
8519 if self.op.disk_template == constants.DT_FILE:
8520 opcodes.RequireFileStorage()
8521 elif self.op.disk_template == constants.DT_SHARED_FILE:
8522 opcodes.RequireSharedFileStorage()
8524 ### Node/iallocator related checks
8525 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8527 if self.op.pnode is not None:
8528 if self.op.disk_template in constants.DTS_INT_MIRROR:
8529 if self.op.snode is None:
8530 raise errors.OpPrereqError("The networked disk templates need"
8531 " a mirror node", errors.ECODE_INVAL)
8533 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8535 self.op.snode = None
8537 self._cds = _GetClusterDomainSecret()
8539 if self.op.mode == constants.INSTANCE_IMPORT:
8540 # On import force_variant must be True, because if we forced it at
8541 # initial install, our only chance when importing it back is that it
8543 self.op.force_variant = True
8545 if self.op.no_install:
8546 self.LogInfo("No-installation mode has no effect during import")
8548 elif self.op.mode == constants.INSTANCE_CREATE:
8549 if self.op.os_type is None:
8550 raise errors.OpPrereqError("No guest OS specified",
8552 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8553 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8554 " installation" % self.op.os_type,
8556 if self.op.disk_template is None:
8557 raise errors.OpPrereqError("No disk template specified",
8560 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8561 # Check handshake to ensure both clusters have the same domain secret
8562 src_handshake = self.op.source_handshake
8563 if not src_handshake:
8564 raise errors.OpPrereqError("Missing source handshake",
8567 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8570 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8573 # Load and check source CA
8574 self.source_x509_ca_pem = self.op.source_x509_ca
8575 if not self.source_x509_ca_pem:
8576 raise errors.OpPrereqError("Missing source X509 CA",
8580 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8582 except OpenSSL.crypto.Error, err:
8583 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8584 (err, ), errors.ECODE_INVAL)
8586 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8587 if errcode is not None:
8588 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8591 self.source_x509_ca = cert
8593 src_instance_name = self.op.source_instance_name
8594 if not src_instance_name:
8595 raise errors.OpPrereqError("Missing source instance name",
8598 self.source_instance_name = \
8599 netutils.GetHostname(name=src_instance_name).name
8602 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8603 self.op.mode, errors.ECODE_INVAL)
8605 def ExpandNames(self):
8606 """ExpandNames for CreateInstance.
8608 Figure out the right locks for instance creation.
8611 self.needed_locks = {}
8613 instance_name = self.op.instance_name
8614 # this is just a preventive check, but someone might still add this
8615 # instance in the meantime, and creation will fail at lock-add time
8616 if instance_name in self.cfg.GetInstanceList():
8617 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8618 instance_name, errors.ECODE_EXISTS)
8620 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8622 if self.op.iallocator:
8623 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8624 # specifying a group on instance creation and then selecting nodes from
8626 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8627 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8629 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8630 nodelist = [self.op.pnode]
8631 if self.op.snode is not None:
8632 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8633 nodelist.append(self.op.snode)
8634 self.needed_locks[locking.LEVEL_NODE] = nodelist
8635 # Lock resources of instance's primary and secondary nodes (copy to
8636 # prevent accidential modification)
8637 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8639 # in case of import lock the source node too
8640 if self.op.mode == constants.INSTANCE_IMPORT:
8641 src_node = self.op.src_node
8642 src_path = self.op.src_path
8644 if src_path is None:
8645 self.op.src_path = src_path = self.op.instance_name
8647 if src_node is None:
8648 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8649 self.op.src_node = None
8650 if os.path.isabs(src_path):
8651 raise errors.OpPrereqError("Importing an instance from a path"
8652 " requires a source node option",
8655 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8656 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8657 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8658 if not os.path.isabs(src_path):
8659 self.op.src_path = src_path = \
8660 utils.PathJoin(constants.EXPORT_DIR, src_path)
8662 def _RunAllocator(self):
8663 """Run the allocator based on input opcode.
8666 nics = [n.ToDict() for n in self.nics]
8667 ial = IAllocator(self.cfg, self.rpc,
8668 mode=constants.IALLOCATOR_MODE_ALLOC,
8669 name=self.op.instance_name,
8670 disk_template=self.op.disk_template,
8673 vcpus=self.be_full[constants.BE_VCPUS],
8674 memory=self.be_full[constants.BE_MEMORY],
8677 hypervisor=self.op.hypervisor,
8680 ial.Run(self.op.iallocator)
8683 raise errors.OpPrereqError("Can't compute nodes using"
8684 " iallocator '%s': %s" %
8685 (self.op.iallocator, ial.info),
8687 if len(ial.result) != ial.required_nodes:
8688 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8689 " of nodes (%s), required %s" %
8690 (self.op.iallocator, len(ial.result),
8691 ial.required_nodes), errors.ECODE_FAULT)
8692 self.op.pnode = ial.result[0]
8693 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8694 self.op.instance_name, self.op.iallocator,
8695 utils.CommaJoin(ial.result))
8696 if ial.required_nodes == 2:
8697 self.op.snode = ial.result[1]
8699 def BuildHooksEnv(self):
8702 This runs on master, primary and secondary nodes of the instance.
8706 "ADD_MODE": self.op.mode,
8708 if self.op.mode == constants.INSTANCE_IMPORT:
8709 env["SRC_NODE"] = self.op.src_node
8710 env["SRC_PATH"] = self.op.src_path
8711 env["SRC_IMAGES"] = self.src_images
8713 env.update(_BuildInstanceHookEnv(
8714 name=self.op.instance_name,
8715 primary_node=self.op.pnode,
8716 secondary_nodes=self.secondaries,
8717 status=self.op.start,
8718 os_type=self.op.os_type,
8719 memory=self.be_full[constants.BE_MEMORY],
8720 vcpus=self.be_full[constants.BE_VCPUS],
8721 nics=_NICListToTuple(self, self.nics),
8722 disk_template=self.op.disk_template,
8723 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8724 for d in self.disks],
8727 hypervisor_name=self.op.hypervisor,
8733 def BuildHooksNodes(self):
8734 """Build hooks nodes.
8737 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8740 def _ReadExportInfo(self):
8741 """Reads the export information from disk.
8743 It will override the opcode source node and path with the actual
8744 information, if these two were not specified before.
8746 @return: the export information
8749 assert self.op.mode == constants.INSTANCE_IMPORT
8751 src_node = self.op.src_node
8752 src_path = self.op.src_path
8754 if src_node is None:
8755 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8756 exp_list = self.rpc.call_export_list(locked_nodes)
8758 for node in exp_list:
8759 if exp_list[node].fail_msg:
8761 if src_path in exp_list[node].payload:
8763 self.op.src_node = src_node = node
8764 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8768 raise errors.OpPrereqError("No export found for relative path %s" %
8769 src_path, errors.ECODE_INVAL)
8771 _CheckNodeOnline(self, src_node)
8772 result = self.rpc.call_export_info(src_node, src_path)
8773 result.Raise("No export or invalid export found in dir %s" % src_path)
8775 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8776 if not export_info.has_section(constants.INISECT_EXP):
8777 raise errors.ProgrammerError("Corrupted export config",
8778 errors.ECODE_ENVIRON)
8780 ei_version = export_info.get(constants.INISECT_EXP, "version")
8781 if (int(ei_version) != constants.EXPORT_VERSION):
8782 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8783 (ei_version, constants.EXPORT_VERSION),
8784 errors.ECODE_ENVIRON)
8787 def _ReadExportParams(self, einfo):
8788 """Use export parameters as defaults.
8790 In case the opcode doesn't specify (as in override) some instance
8791 parameters, then try to use them from the export information, if
8795 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8797 if self.op.disk_template is None:
8798 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8799 self.op.disk_template = einfo.get(constants.INISECT_INS,
8801 if self.op.disk_template not in constants.DISK_TEMPLATES:
8802 raise errors.OpPrereqError("Disk template specified in configuration"
8803 " file is not one of the allowed values:"
8804 " %s" % " ".join(constants.DISK_TEMPLATES))
8806 raise errors.OpPrereqError("No disk template specified and the export"
8807 " is missing the disk_template information",
8810 if not self.op.disks:
8812 # TODO: import the disk iv_name too
8813 for idx in range(constants.MAX_DISKS):
8814 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8815 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8816 disks.append({constants.IDISK_SIZE: disk_sz})
8817 self.op.disks = disks
8818 if not disks and self.op.disk_template != constants.DT_DISKLESS:
8819 raise errors.OpPrereqError("No disk info specified and the export"
8820 " is missing the disk information",
8823 if not self.op.nics:
8825 for idx in range(constants.MAX_NICS):
8826 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8828 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8829 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8836 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8837 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8839 if (self.op.hypervisor is None and
8840 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8841 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8843 if einfo.has_section(constants.INISECT_HYP):
8844 # use the export parameters but do not override the ones
8845 # specified by the user
8846 for name, value in einfo.items(constants.INISECT_HYP):
8847 if name not in self.op.hvparams:
8848 self.op.hvparams[name] = value
8850 if einfo.has_section(constants.INISECT_BEP):
8851 # use the parameters, without overriding
8852 for name, value in einfo.items(constants.INISECT_BEP):
8853 if name not in self.op.beparams:
8854 self.op.beparams[name] = value
8856 # try to read the parameters old style, from the main section
8857 for name in constants.BES_PARAMETERS:
8858 if (name not in self.op.beparams and
8859 einfo.has_option(constants.INISECT_INS, name)):
8860 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8862 if einfo.has_section(constants.INISECT_OSP):
8863 # use the parameters, without overriding
8864 for name, value in einfo.items(constants.INISECT_OSP):
8865 if name not in self.op.osparams:
8866 self.op.osparams[name] = value
8868 def _RevertToDefaults(self, cluster):
8869 """Revert the instance parameters to the default values.
8873 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8874 for name in self.op.hvparams.keys():
8875 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8876 del self.op.hvparams[name]
8878 be_defs = cluster.SimpleFillBE({})
8879 for name in self.op.beparams.keys():
8880 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8881 del self.op.beparams[name]
8883 nic_defs = cluster.SimpleFillNIC({})
8884 for nic in self.op.nics:
8885 for name in constants.NICS_PARAMETERS:
8886 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8889 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8890 for name in self.op.osparams.keys():
8891 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8892 del self.op.osparams[name]
8894 def _CalculateFileStorageDir(self):
8895 """Calculate final instance file storage dir.
8898 # file storage dir calculation/check
8899 self.instance_file_storage_dir = None
8900 if self.op.disk_template in constants.DTS_FILEBASED:
8901 # build the full file storage dir path
8904 if self.op.disk_template == constants.DT_SHARED_FILE:
8905 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8907 get_fsd_fn = self.cfg.GetFileStorageDir
8909 cfg_storagedir = get_fsd_fn()
8910 if not cfg_storagedir:
8911 raise errors.OpPrereqError("Cluster file storage dir not defined")
8912 joinargs.append(cfg_storagedir)
8914 if self.op.file_storage_dir is not None:
8915 joinargs.append(self.op.file_storage_dir)
8917 joinargs.append(self.op.instance_name)
8919 # pylint: disable=W0142
8920 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8922 def CheckPrereq(self):
8923 """Check prerequisites.
8926 self._CalculateFileStorageDir()
8928 if self.op.mode == constants.INSTANCE_IMPORT:
8929 export_info = self._ReadExportInfo()
8930 self._ReadExportParams(export_info)
8932 if (not self.cfg.GetVGName() and
8933 self.op.disk_template not in constants.DTS_NOT_LVM):
8934 raise errors.OpPrereqError("Cluster does not support lvm-based"
8935 " instances", errors.ECODE_STATE)
8937 if (self.op.hypervisor is None or
8938 self.op.hypervisor == constants.VALUE_AUTO):
8939 self.op.hypervisor = self.cfg.GetHypervisorType()
8941 cluster = self.cfg.GetClusterInfo()
8942 enabled_hvs = cluster.enabled_hypervisors
8943 if self.op.hypervisor not in enabled_hvs:
8944 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8945 " cluster (%s)" % (self.op.hypervisor,
8946 ",".join(enabled_hvs)),
8949 # Check tag validity
8950 for tag in self.op.tags:
8951 objects.TaggableObject.ValidateTag(tag)
8953 # check hypervisor parameter syntax (locally)
8954 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8955 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8957 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8958 hv_type.CheckParameterSyntax(filled_hvp)
8959 self.hv_full = filled_hvp
8960 # check that we don't specify global parameters on an instance
8961 _CheckGlobalHvParams(self.op.hvparams)
8963 # fill and remember the beparams dict
8964 default_beparams = cluster.beparams[constants.PP_DEFAULT]
8965 for param, value in self.op.beparams.iteritems():
8966 if value == constants.VALUE_AUTO:
8967 self.op.beparams[param] = default_beparams[param]
8968 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8969 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8971 # build os parameters
8972 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8974 # now that hvp/bep are in final format, let's reset to defaults,
8976 if self.op.identify_defaults:
8977 self._RevertToDefaults(cluster)
8981 for idx, nic in enumerate(self.op.nics):
8982 nic_mode_req = nic.get(constants.INIC_MODE, None)
8983 nic_mode = nic_mode_req
8984 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8985 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8987 # in routed mode, for the first nic, the default ip is 'auto'
8988 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8989 default_ip_mode = constants.VALUE_AUTO
8991 default_ip_mode = constants.VALUE_NONE
8993 # ip validity checks
8994 ip = nic.get(constants.INIC_IP, default_ip_mode)
8995 if ip is None or ip.lower() == constants.VALUE_NONE:
8997 elif ip.lower() == constants.VALUE_AUTO:
8998 if not self.op.name_check:
8999 raise errors.OpPrereqError("IP address set to auto but name checks"
9000 " have been skipped",
9002 nic_ip = self.hostname1.ip
9004 if not netutils.IPAddress.IsValid(ip):
9005 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9009 # TODO: check the ip address for uniqueness
9010 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9011 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9014 # MAC address verification
9015 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9016 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9017 mac = utils.NormalizeAndValidateMac(mac)
9020 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9021 except errors.ReservationError:
9022 raise errors.OpPrereqError("MAC address %s already in use"
9023 " in cluster" % mac,
9024 errors.ECODE_NOTUNIQUE)
9026 # Build nic parameters
9027 link = nic.get(constants.INIC_LINK, None)
9028 if link == constants.VALUE_AUTO:
9029 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9032 nicparams[constants.NIC_MODE] = nic_mode
9034 nicparams[constants.NIC_LINK] = link
9036 check_params = cluster.SimpleFillNIC(nicparams)
9037 objects.NIC.CheckParameterSyntax(check_params)
9038 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9040 # disk checks/pre-build
9041 default_vg = self.cfg.GetVGName()
9043 for disk in self.op.disks:
9044 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9045 if mode not in constants.DISK_ACCESS_SET:
9046 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9047 mode, errors.ECODE_INVAL)
9048 size = disk.get(constants.IDISK_SIZE, None)
9050 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9053 except (TypeError, ValueError):
9054 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9057 data_vg = disk.get(constants.IDISK_VG, default_vg)
9059 constants.IDISK_SIZE: size,
9060 constants.IDISK_MODE: mode,
9061 constants.IDISK_VG: data_vg,
9062 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9064 if constants.IDISK_ADOPT in disk:
9065 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9066 self.disks.append(new_disk)
9068 if self.op.mode == constants.INSTANCE_IMPORT:
9070 for idx in range(len(self.disks)):
9071 option = "disk%d_dump" % idx
9072 if export_info.has_option(constants.INISECT_INS, option):
9073 # FIXME: are the old os-es, disk sizes, etc. useful?
9074 export_name = export_info.get(constants.INISECT_INS, option)
9075 image = utils.PathJoin(self.op.src_path, export_name)
9076 disk_images.append(image)
9078 disk_images.append(False)
9080 self.src_images = disk_images
9082 old_name = export_info.get(constants.INISECT_INS, "name")
9083 if self.op.instance_name == old_name:
9084 for idx, nic in enumerate(self.nics):
9085 if nic.mac == constants.VALUE_AUTO:
9086 nic_mac_ini = "nic%d_mac" % idx
9087 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9089 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9091 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9092 if self.op.ip_check:
9093 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9094 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9095 (self.check_ip, self.op.instance_name),
9096 errors.ECODE_NOTUNIQUE)
9098 #### mac address generation
9099 # By generating here the mac address both the allocator and the hooks get
9100 # the real final mac address rather than the 'auto' or 'generate' value.
9101 # There is a race condition between the generation and the instance object
9102 # creation, which means that we know the mac is valid now, but we're not
9103 # sure it will be when we actually add the instance. If things go bad
9104 # adding the instance will abort because of a duplicate mac, and the
9105 # creation job will fail.
9106 for nic in self.nics:
9107 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9108 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9112 if self.op.iallocator is not None:
9113 self._RunAllocator()
9115 # Release all unneeded node locks
9116 _ReleaseLocks(self, locking.LEVEL_NODE,
9117 keep=filter(None, [self.op.pnode, self.op.snode,
9120 #### node related checks
9122 # check primary node
9123 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9124 assert self.pnode is not None, \
9125 "Cannot retrieve locked node %s" % self.op.pnode
9127 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9128 pnode.name, errors.ECODE_STATE)
9130 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9131 pnode.name, errors.ECODE_STATE)
9132 if not pnode.vm_capable:
9133 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9134 " '%s'" % pnode.name, errors.ECODE_STATE)
9136 self.secondaries = []
9138 # mirror node verification
9139 if self.op.disk_template in constants.DTS_INT_MIRROR:
9140 if self.op.snode == pnode.name:
9141 raise errors.OpPrereqError("The secondary node cannot be the"
9142 " primary node", errors.ECODE_INVAL)
9143 _CheckNodeOnline(self, self.op.snode)
9144 _CheckNodeNotDrained(self, self.op.snode)
9145 _CheckNodeVmCapable(self, self.op.snode)
9146 self.secondaries.append(self.op.snode)
9148 nodenames = [pnode.name] + self.secondaries
9150 if not self.adopt_disks:
9151 # Check lv size requirements, if not adopting
9152 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9153 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9155 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9156 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9157 disk[constants.IDISK_ADOPT])
9158 for disk in self.disks])
9159 if len(all_lvs) != len(self.disks):
9160 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9162 for lv_name in all_lvs:
9164 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9165 # to ReserveLV uses the same syntax
9166 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9167 except errors.ReservationError:
9168 raise errors.OpPrereqError("LV named %s used by another instance" %
9169 lv_name, errors.ECODE_NOTUNIQUE)
9171 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9172 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9174 node_lvs = self.rpc.call_lv_list([pnode.name],
9175 vg_names.payload.keys())[pnode.name]
9176 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9177 node_lvs = node_lvs.payload
9179 delta = all_lvs.difference(node_lvs.keys())
9181 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9182 utils.CommaJoin(delta),
9184 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9186 raise errors.OpPrereqError("Online logical volumes found, cannot"
9187 " adopt: %s" % utils.CommaJoin(online_lvs),
9189 # update the size of disk based on what is found
9190 for dsk in self.disks:
9191 dsk[constants.IDISK_SIZE] = \
9192 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9193 dsk[constants.IDISK_ADOPT])][0]))
9195 elif self.op.disk_template == constants.DT_BLOCK:
9196 # Normalize and de-duplicate device paths
9197 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9198 for disk in self.disks])
9199 if len(all_disks) != len(self.disks):
9200 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9202 baddisks = [d for d in all_disks
9203 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9205 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9206 " cannot be adopted" %
9207 (", ".join(baddisks),
9208 constants.ADOPTABLE_BLOCKDEV_ROOT),
9211 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9212 list(all_disks))[pnode.name]
9213 node_disks.Raise("Cannot get block device information from node %s" %
9215 node_disks = node_disks.payload
9216 delta = all_disks.difference(node_disks.keys())
9218 raise errors.OpPrereqError("Missing block device(s): %s" %
9219 utils.CommaJoin(delta),
9221 for dsk in self.disks:
9222 dsk[constants.IDISK_SIZE] = \
9223 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9225 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9227 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9228 # check OS parameters (remotely)
9229 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9231 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9233 # memory check on primary node
9235 _CheckNodeFreeMemory(self, self.pnode.name,
9236 "creating instance %s" % self.op.instance_name,
9237 self.be_full[constants.BE_MEMORY],
9240 self.dry_run_result = list(nodenames)
9242 def Exec(self, feedback_fn):
9243 """Create and add the instance to the cluster.
9246 instance = self.op.instance_name
9247 pnode_name = self.pnode.name
9249 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9250 self.owned_locks(locking.LEVEL_NODE)), \
9251 "Node locks differ from node resource locks"
9253 ht_kind = self.op.hypervisor
9254 if ht_kind in constants.HTS_REQ_PORT:
9255 network_port = self.cfg.AllocatePort()
9259 disks = _GenerateDiskTemplate(self,
9260 self.op.disk_template,
9261 instance, pnode_name,
9264 self.instance_file_storage_dir,
9265 self.op.file_driver,
9269 iobj = objects.Instance(name=instance, os=self.op.os_type,
9270 primary_node=pnode_name,
9271 nics=self.nics, disks=disks,
9272 disk_template=self.op.disk_template,
9273 admin_state=constants.ADMINST_DOWN,
9274 network_port=network_port,
9275 beparams=self.op.beparams,
9276 hvparams=self.op.hvparams,
9277 hypervisor=self.op.hypervisor,
9278 osparams=self.op.osparams,
9282 for tag in self.op.tags:
9285 if self.adopt_disks:
9286 if self.op.disk_template == constants.DT_PLAIN:
9287 # rename LVs to the newly-generated names; we need to construct
9288 # 'fake' LV disks with the old data, plus the new unique_id
9289 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9291 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9292 rename_to.append(t_dsk.logical_id)
9293 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9294 self.cfg.SetDiskID(t_dsk, pnode_name)
9295 result = self.rpc.call_blockdev_rename(pnode_name,
9296 zip(tmp_disks, rename_to))
9297 result.Raise("Failed to rename adoped LVs")
9299 feedback_fn("* creating instance disks...")
9301 _CreateDisks(self, iobj)
9302 except errors.OpExecError:
9303 self.LogWarning("Device creation failed, reverting...")
9305 _RemoveDisks(self, iobj)
9307 self.cfg.ReleaseDRBDMinors(instance)
9310 feedback_fn("adding instance %s to cluster config" % instance)
9312 self.cfg.AddInstance(iobj, self.proc.GetECId())
9314 # Declare that we don't want to remove the instance lock anymore, as we've
9315 # added the instance to the config
9316 del self.remove_locks[locking.LEVEL_INSTANCE]
9318 if self.op.mode == constants.INSTANCE_IMPORT:
9319 # Release unused nodes
9320 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9323 _ReleaseLocks(self, locking.LEVEL_NODE)
9326 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9327 feedback_fn("* wiping instance disks...")
9329 _WipeDisks(self, iobj)
9330 except errors.OpExecError, err:
9331 logging.exception("Wiping disks failed")
9332 self.LogWarning("Wiping instance disks failed (%s)", err)
9336 # Something is already wrong with the disks, don't do anything else
9338 elif self.op.wait_for_sync:
9339 disk_abort = not _WaitForSync(self, iobj)
9340 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9341 # make sure the disks are not degraded (still sync-ing is ok)
9342 feedback_fn("* checking mirrors status")
9343 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9348 _RemoveDisks(self, iobj)
9349 self.cfg.RemoveInstance(iobj.name)
9350 # Make sure the instance lock gets removed
9351 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9352 raise errors.OpExecError("There are some degraded disks for"
9355 # Release all node resource locks
9356 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9358 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9359 if self.op.mode == constants.INSTANCE_CREATE:
9360 if not self.op.no_install:
9361 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9362 not self.op.wait_for_sync)
9364 feedback_fn("* pausing disk sync to install instance OS")
9365 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9367 for idx, success in enumerate(result.payload):
9369 logging.warn("pause-sync of instance %s for disk %d failed",
9372 feedback_fn("* running the instance OS create scripts...")
9373 # FIXME: pass debug option from opcode to backend
9375 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9376 self.op.debug_level)
9378 feedback_fn("* resuming disk sync")
9379 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9381 for idx, success in enumerate(result.payload):
9383 logging.warn("resume-sync of instance %s for disk %d failed",
9386 os_add_result.Raise("Could not add os for instance %s"
9387 " on node %s" % (instance, pnode_name))
9389 elif self.op.mode == constants.INSTANCE_IMPORT:
9390 feedback_fn("* running the instance OS import scripts...")
9394 for idx, image in enumerate(self.src_images):
9398 # FIXME: pass debug option from opcode to backend
9399 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9400 constants.IEIO_FILE, (image, ),
9401 constants.IEIO_SCRIPT,
9402 (iobj.disks[idx], idx),
9404 transfers.append(dt)
9407 masterd.instance.TransferInstanceData(self, feedback_fn,
9408 self.op.src_node, pnode_name,
9409 self.pnode.secondary_ip,
9411 if not compat.all(import_result):
9412 self.LogWarning("Some disks for instance %s on node %s were not"
9413 " imported successfully" % (instance, pnode_name))
9415 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9416 feedback_fn("* preparing remote import...")
9417 # The source cluster will stop the instance before attempting to make a
9418 # connection. In some cases stopping an instance can take a long time,
9419 # hence the shutdown timeout is added to the connection timeout.
9420 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9421 self.op.source_shutdown_timeout)
9422 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9424 assert iobj.primary_node == self.pnode.name
9426 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9427 self.source_x509_ca,
9428 self._cds, timeouts)
9429 if not compat.all(disk_results):
9430 # TODO: Should the instance still be started, even if some disks
9431 # failed to import (valid for local imports, too)?
9432 self.LogWarning("Some disks for instance %s on node %s were not"
9433 " imported successfully" % (instance, pnode_name))
9435 # Run rename script on newly imported instance
9436 assert iobj.name == instance
9437 feedback_fn("Running rename script for %s" % instance)
9438 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9439 self.source_instance_name,
9440 self.op.debug_level)
9442 self.LogWarning("Failed to run rename script for %s on node"
9443 " %s: %s" % (instance, pnode_name, result.fail_msg))
9446 # also checked in the prereq part
9447 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9450 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9453 iobj.admin_state = constants.ADMINST_UP
9454 self.cfg.Update(iobj, feedback_fn)
9455 logging.info("Starting instance %s on node %s", instance, pnode_name)
9456 feedback_fn("* starting instance...")
9457 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9459 result.Raise("Could not start instance")
9461 return list(iobj.all_nodes)
9464 class LUInstanceConsole(NoHooksLU):
9465 """Connect to an instance's console.
9467 This is somewhat special in that it returns the command line that
9468 you need to run on the master node in order to connect to the
9474 def ExpandNames(self):
9475 self.share_locks = _ShareAll()
9476 self._ExpandAndLockInstance()
9478 def CheckPrereq(self):
9479 """Check prerequisites.
9481 This checks that the instance is in the cluster.
9484 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9485 assert self.instance is not None, \
9486 "Cannot retrieve locked instance %s" % self.op.instance_name
9487 _CheckNodeOnline(self, self.instance.primary_node)
9489 def Exec(self, feedback_fn):
9490 """Connect to the console of an instance
9493 instance = self.instance
9494 node = instance.primary_node
9496 node_insts = self.rpc.call_instance_list([node],
9497 [instance.hypervisor])[node]
9498 node_insts.Raise("Can't get node information from %s" % node)
9500 if instance.name not in node_insts.payload:
9501 if instance.admin_state == constants.ADMINST_UP:
9502 state = constants.INSTST_ERRORDOWN
9503 elif instance.admin_state == constants.ADMINST_DOWN:
9504 state = constants.INSTST_ADMINDOWN
9506 state = constants.INSTST_ADMINOFFLINE
9507 raise errors.OpExecError("Instance %s is not running (state %s)" %
9508 (instance.name, state))
9510 logging.debug("Connecting to console of %s on %s", instance.name, node)
9512 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9515 def _GetInstanceConsole(cluster, instance):
9516 """Returns console information for an instance.
9518 @type cluster: L{objects.Cluster}
9519 @type instance: L{objects.Instance}
9523 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9524 # beparams and hvparams are passed separately, to avoid editing the
9525 # instance and then saving the defaults in the instance itself.
9526 hvparams = cluster.FillHV(instance)
9527 beparams = cluster.FillBE(instance)
9528 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9530 assert console.instance == instance.name
9531 assert console.Validate()
9533 return console.ToDict()
9536 class LUInstanceReplaceDisks(LogicalUnit):
9537 """Replace the disks of an instance.
9540 HPATH = "mirrors-replace"
9541 HTYPE = constants.HTYPE_INSTANCE
9544 def CheckArguments(self):
9545 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9548 def ExpandNames(self):
9549 self._ExpandAndLockInstance()
9551 assert locking.LEVEL_NODE not in self.needed_locks
9552 assert locking.LEVEL_NODE_RES not in self.needed_locks
9553 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9555 assert self.op.iallocator is None or self.op.remote_node is None, \
9556 "Conflicting options"
9558 if self.op.remote_node is not None:
9559 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9561 # Warning: do not remove the locking of the new secondary here
9562 # unless DRBD8.AddChildren is changed to work in parallel;
9563 # currently it doesn't since parallel invocations of
9564 # FindUnusedMinor will conflict
9565 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9566 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9568 self.needed_locks[locking.LEVEL_NODE] = []
9569 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9571 if self.op.iallocator is not None:
9572 # iallocator will select a new node in the same group
9573 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9575 self.needed_locks[locking.LEVEL_NODE_RES] = []
9577 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9578 self.op.iallocator, self.op.remote_node,
9579 self.op.disks, False, self.op.early_release)
9581 self.tasklets = [self.replacer]
9583 def DeclareLocks(self, level):
9584 if level == locking.LEVEL_NODEGROUP:
9585 assert self.op.remote_node is None
9586 assert self.op.iallocator is not None
9587 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9589 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9590 # Lock all groups used by instance optimistically; this requires going
9591 # via the node before it's locked, requiring verification later on
9592 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9593 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9595 elif level == locking.LEVEL_NODE:
9596 if self.op.iallocator is not None:
9597 assert self.op.remote_node is None
9598 assert not self.needed_locks[locking.LEVEL_NODE]
9600 # Lock member nodes of all locked groups
9601 self.needed_locks[locking.LEVEL_NODE] = [node_name
9602 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9603 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9605 self._LockInstancesNodes()
9606 elif level == locking.LEVEL_NODE_RES:
9608 self.needed_locks[locking.LEVEL_NODE_RES] = \
9609 self.needed_locks[locking.LEVEL_NODE]
9611 def BuildHooksEnv(self):
9614 This runs on the master, the primary and all the secondaries.
9617 instance = self.replacer.instance
9619 "MODE": self.op.mode,
9620 "NEW_SECONDARY": self.op.remote_node,
9621 "OLD_SECONDARY": instance.secondary_nodes[0],
9623 env.update(_BuildInstanceHookEnvByObject(self, instance))
9626 def BuildHooksNodes(self):
9627 """Build hooks nodes.
9630 instance = self.replacer.instance
9632 self.cfg.GetMasterNode(),
9633 instance.primary_node,
9635 if self.op.remote_node is not None:
9636 nl.append(self.op.remote_node)
9639 def CheckPrereq(self):
9640 """Check prerequisites.
9643 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9644 self.op.iallocator is None)
9646 # Verify if node group locks are still correct
9647 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9649 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9651 return LogicalUnit.CheckPrereq(self)
9654 class TLReplaceDisks(Tasklet):
9655 """Replaces disks for an instance.
9657 Note: Locking is not within the scope of this class.
9660 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9661 disks, delay_iallocator, early_release):
9662 """Initializes this class.
9665 Tasklet.__init__(self, lu)
9668 self.instance_name = instance_name
9670 self.iallocator_name = iallocator_name
9671 self.remote_node = remote_node
9673 self.delay_iallocator = delay_iallocator
9674 self.early_release = early_release
9677 self.instance = None
9678 self.new_node = None
9679 self.target_node = None
9680 self.other_node = None
9681 self.remote_node_info = None
9682 self.node_secondary_ip = None
9685 def CheckArguments(mode, remote_node, iallocator):
9686 """Helper function for users of this class.
9689 # check for valid parameter combination
9690 if mode == constants.REPLACE_DISK_CHG:
9691 if remote_node is None and iallocator is None:
9692 raise errors.OpPrereqError("When changing the secondary either an"
9693 " iallocator script must be used or the"
9694 " new node given", errors.ECODE_INVAL)
9696 if remote_node is not None and iallocator is not None:
9697 raise errors.OpPrereqError("Give either the iallocator or the new"
9698 " secondary, not both", errors.ECODE_INVAL)
9700 elif remote_node is not None or iallocator is not None:
9701 # Not replacing the secondary
9702 raise errors.OpPrereqError("The iallocator and new node options can"
9703 " only be used when changing the"
9704 " secondary node", errors.ECODE_INVAL)
9707 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9708 """Compute a new secondary node using an IAllocator.
9711 ial = IAllocator(lu.cfg, lu.rpc,
9712 mode=constants.IALLOCATOR_MODE_RELOC,
9714 relocate_from=list(relocate_from))
9716 ial.Run(iallocator_name)
9719 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9720 " %s" % (iallocator_name, ial.info),
9723 if len(ial.result) != ial.required_nodes:
9724 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9725 " of nodes (%s), required %s" %
9727 len(ial.result), ial.required_nodes),
9730 remote_node_name = ial.result[0]
9732 lu.LogInfo("Selected new secondary for instance '%s': %s",
9733 instance_name, remote_node_name)
9735 return remote_node_name
9737 def _FindFaultyDisks(self, node_name):
9738 """Wrapper for L{_FindFaultyInstanceDisks}.
9741 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9744 def _CheckDisksActivated(self, instance):
9745 """Checks if the instance disks are activated.
9747 @param instance: The instance to check disks
9748 @return: True if they are activated, False otherwise
9751 nodes = instance.all_nodes
9753 for idx, dev in enumerate(instance.disks):
9755 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9756 self.cfg.SetDiskID(dev, node)
9758 result = self.rpc.call_blockdev_find(node, dev)
9762 elif result.fail_msg or not result.payload:
9767 def CheckPrereq(self):
9768 """Check prerequisites.
9770 This checks that the instance is in the cluster.
9773 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9774 assert instance is not None, \
9775 "Cannot retrieve locked instance %s" % self.instance_name
9777 if instance.disk_template != constants.DT_DRBD8:
9778 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9779 " instances", errors.ECODE_INVAL)
9781 if len(instance.secondary_nodes) != 1:
9782 raise errors.OpPrereqError("The instance has a strange layout,"
9783 " expected one secondary but found %d" %
9784 len(instance.secondary_nodes),
9787 if not self.delay_iallocator:
9788 self._CheckPrereq2()
9790 def _CheckPrereq2(self):
9791 """Check prerequisites, second part.
9793 This function should always be part of CheckPrereq. It was separated and is
9794 now called from Exec because during node evacuation iallocator was only
9795 called with an unmodified cluster model, not taking planned changes into
9799 instance = self.instance
9800 secondary_node = instance.secondary_nodes[0]
9802 if self.iallocator_name is None:
9803 remote_node = self.remote_node
9805 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9806 instance.name, instance.secondary_nodes)
9808 if remote_node is None:
9809 self.remote_node_info = None
9811 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9812 "Remote node '%s' is not locked" % remote_node
9814 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9815 assert self.remote_node_info is not None, \
9816 "Cannot retrieve locked node %s" % remote_node
9818 if remote_node == self.instance.primary_node:
9819 raise errors.OpPrereqError("The specified node is the primary node of"
9820 " the instance", errors.ECODE_INVAL)
9822 if remote_node == secondary_node:
9823 raise errors.OpPrereqError("The specified node is already the"
9824 " secondary node of the instance",
9827 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9828 constants.REPLACE_DISK_CHG):
9829 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9832 if self.mode == constants.REPLACE_DISK_AUTO:
9833 if not self._CheckDisksActivated(instance):
9834 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9835 " first" % self.instance_name,
9837 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9838 faulty_secondary = self._FindFaultyDisks(secondary_node)
9840 if faulty_primary and faulty_secondary:
9841 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9842 " one node and can not be repaired"
9843 " automatically" % self.instance_name,
9847 self.disks = faulty_primary
9848 self.target_node = instance.primary_node
9849 self.other_node = secondary_node
9850 check_nodes = [self.target_node, self.other_node]
9851 elif faulty_secondary:
9852 self.disks = faulty_secondary
9853 self.target_node = secondary_node
9854 self.other_node = instance.primary_node
9855 check_nodes = [self.target_node, self.other_node]
9861 # Non-automatic modes
9862 if self.mode == constants.REPLACE_DISK_PRI:
9863 self.target_node = instance.primary_node
9864 self.other_node = secondary_node
9865 check_nodes = [self.target_node, self.other_node]
9867 elif self.mode == constants.REPLACE_DISK_SEC:
9868 self.target_node = secondary_node
9869 self.other_node = instance.primary_node
9870 check_nodes = [self.target_node, self.other_node]
9872 elif self.mode == constants.REPLACE_DISK_CHG:
9873 self.new_node = remote_node
9874 self.other_node = instance.primary_node
9875 self.target_node = secondary_node
9876 check_nodes = [self.new_node, self.other_node]
9878 _CheckNodeNotDrained(self.lu, remote_node)
9879 _CheckNodeVmCapable(self.lu, remote_node)
9881 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9882 assert old_node_info is not None
9883 if old_node_info.offline and not self.early_release:
9884 # doesn't make sense to delay the release
9885 self.early_release = True
9886 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9887 " early-release mode", secondary_node)
9890 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9893 # If not specified all disks should be replaced
9895 self.disks = range(len(self.instance.disks))
9897 for node in check_nodes:
9898 _CheckNodeOnline(self.lu, node)
9900 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9903 if node_name is not None)
9905 # Release unneeded node and node resource locks
9906 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9907 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9909 # Release any owned node group
9910 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9911 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9913 # Check whether disks are valid
9914 for disk_idx in self.disks:
9915 instance.FindDisk(disk_idx)
9917 # Get secondary node IP addresses
9918 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9919 in self.cfg.GetMultiNodeInfo(touched_nodes))
9921 def Exec(self, feedback_fn):
9922 """Execute disk replacement.
9924 This dispatches the disk replacement to the appropriate handler.
9927 if self.delay_iallocator:
9928 self._CheckPrereq2()
9931 # Verify owned locks before starting operation
9932 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9933 assert set(owned_nodes) == set(self.node_secondary_ip), \
9934 ("Incorrect node locks, owning %s, expected %s" %
9935 (owned_nodes, self.node_secondary_ip.keys()))
9936 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9937 self.lu.owned_locks(locking.LEVEL_NODE_RES))
9939 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9940 assert list(owned_instances) == [self.instance_name], \
9941 "Instance '%s' not locked" % self.instance_name
9943 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9944 "Should not own any node group lock at this point"
9947 feedback_fn("No disks need replacement")
9950 feedback_fn("Replacing disk(s) %s for %s" %
9951 (utils.CommaJoin(self.disks), self.instance.name))
9953 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9955 # Activate the instance disks if we're replacing them on a down instance
9957 _StartInstanceDisks(self.lu, self.instance, True)
9960 # Should we replace the secondary node?
9961 if self.new_node is not None:
9962 fn = self._ExecDrbd8Secondary
9964 fn = self._ExecDrbd8DiskOnly
9966 result = fn(feedback_fn)
9968 # Deactivate the instance disks if we're replacing them on a
9971 _SafeShutdownInstanceDisks(self.lu, self.instance)
9973 assert not self.lu.owned_locks(locking.LEVEL_NODE)
9976 # Verify owned locks
9977 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9978 nodes = frozenset(self.node_secondary_ip)
9979 assert ((self.early_release and not owned_nodes) or
9980 (not self.early_release and not (set(owned_nodes) - nodes))), \
9981 ("Not owning the correct locks, early_release=%s, owned=%r,"
9982 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9986 def _CheckVolumeGroup(self, nodes):
9987 self.lu.LogInfo("Checking volume groups")
9989 vgname = self.cfg.GetVGName()
9991 # Make sure volume group exists on all involved nodes
9992 results = self.rpc.call_vg_list(nodes)
9994 raise errors.OpExecError("Can't list volume groups on the nodes")
9998 res.Raise("Error checking node %s" % node)
9999 if vgname not in res.payload:
10000 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10003 def _CheckDisksExistence(self, nodes):
10004 # Check disk existence
10005 for idx, dev in enumerate(self.instance.disks):
10006 if idx not in self.disks:
10010 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10011 self.cfg.SetDiskID(dev, node)
10013 result = self.rpc.call_blockdev_find(node, dev)
10015 msg = result.fail_msg
10016 if msg or not result.payload:
10018 msg = "disk not found"
10019 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10022 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10023 for idx, dev in enumerate(self.instance.disks):
10024 if idx not in self.disks:
10027 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10030 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10032 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10033 " replace disks for instance %s" %
10034 (node_name, self.instance.name))
10036 def _CreateNewStorage(self, node_name):
10037 """Create new storage on the primary or secondary node.
10039 This is only used for same-node replaces, not for changing the
10040 secondary node, hence we don't want to modify the existing disk.
10045 for idx, dev in enumerate(self.instance.disks):
10046 if idx not in self.disks:
10049 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10051 self.cfg.SetDiskID(dev, node_name)
10053 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10054 names = _GenerateUniqueNames(self.lu, lv_names)
10056 vg_data = dev.children[0].logical_id[0]
10057 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10058 logical_id=(vg_data, names[0]))
10059 vg_meta = dev.children[1].logical_id[0]
10060 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10061 logical_id=(vg_meta, names[1]))
10063 new_lvs = [lv_data, lv_meta]
10064 old_lvs = [child.Copy() for child in dev.children]
10065 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10067 # we pass force_create=True to force the LVM creation
10068 for new_lv in new_lvs:
10069 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10070 _GetInstanceInfoText(self.instance), False)
10074 def _CheckDevices(self, node_name, iv_names):
10075 for name, (dev, _, _) in iv_names.iteritems():
10076 self.cfg.SetDiskID(dev, node_name)
10078 result = self.rpc.call_blockdev_find(node_name, dev)
10080 msg = result.fail_msg
10081 if msg or not result.payload:
10083 msg = "disk not found"
10084 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10087 if result.payload.is_degraded:
10088 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10090 def _RemoveOldStorage(self, node_name, iv_names):
10091 for name, (_, old_lvs, _) in iv_names.iteritems():
10092 self.lu.LogInfo("Remove logical volumes for %s" % name)
10095 self.cfg.SetDiskID(lv, node_name)
10097 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10099 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10100 hint="remove unused LVs manually")
10102 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10103 """Replace a disk on the primary or secondary for DRBD 8.
10105 The algorithm for replace is quite complicated:
10107 1. for each disk to be replaced:
10109 1. create new LVs on the target node with unique names
10110 1. detach old LVs from the drbd device
10111 1. rename old LVs to name_replaced.<time_t>
10112 1. rename new LVs to old LVs
10113 1. attach the new LVs (with the old names now) to the drbd device
10115 1. wait for sync across all devices
10117 1. for each modified disk:
10119 1. remove old LVs (which have the name name_replaces.<time_t>)
10121 Failures are not very well handled.
10126 # Step: check device activation
10127 self.lu.LogStep(1, steps_total, "Check device existence")
10128 self._CheckDisksExistence([self.other_node, self.target_node])
10129 self._CheckVolumeGroup([self.target_node, self.other_node])
10131 # Step: check other node consistency
10132 self.lu.LogStep(2, steps_total, "Check peer consistency")
10133 self._CheckDisksConsistency(self.other_node,
10134 self.other_node == self.instance.primary_node,
10137 # Step: create new storage
10138 self.lu.LogStep(3, steps_total, "Allocate new storage")
10139 iv_names = self._CreateNewStorage(self.target_node)
10141 # Step: for each lv, detach+rename*2+attach
10142 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10143 for dev, old_lvs, new_lvs in iv_names.itervalues():
10144 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10146 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10148 result.Raise("Can't detach drbd from local storage on node"
10149 " %s for device %s" % (self.target_node, dev.iv_name))
10151 #cfg.Update(instance)
10153 # ok, we created the new LVs, so now we know we have the needed
10154 # storage; as such, we proceed on the target node to rename
10155 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10156 # using the assumption that logical_id == physical_id (which in
10157 # turn is the unique_id on that node)
10159 # FIXME(iustin): use a better name for the replaced LVs
10160 temp_suffix = int(time.time())
10161 ren_fn = lambda d, suff: (d.physical_id[0],
10162 d.physical_id[1] + "_replaced-%s" % suff)
10164 # Build the rename list based on what LVs exist on the node
10165 rename_old_to_new = []
10166 for to_ren in old_lvs:
10167 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10168 if not result.fail_msg and result.payload:
10170 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10172 self.lu.LogInfo("Renaming the old LVs on the target node")
10173 result = self.rpc.call_blockdev_rename(self.target_node,
10175 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10177 # Now we rename the new LVs to the old LVs
10178 self.lu.LogInfo("Renaming the new LVs on the target node")
10179 rename_new_to_old = [(new, old.physical_id)
10180 for old, new in zip(old_lvs, new_lvs)]
10181 result = self.rpc.call_blockdev_rename(self.target_node,
10183 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10185 # Intermediate steps of in memory modifications
10186 for old, new in zip(old_lvs, new_lvs):
10187 new.logical_id = old.logical_id
10188 self.cfg.SetDiskID(new, self.target_node)
10190 # We need to modify old_lvs so that removal later removes the
10191 # right LVs, not the newly added ones; note that old_lvs is a
10193 for disk in old_lvs:
10194 disk.logical_id = ren_fn(disk, temp_suffix)
10195 self.cfg.SetDiskID(disk, self.target_node)
10197 # Now that the new lvs have the old name, we can add them to the device
10198 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10199 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10201 msg = result.fail_msg
10203 for new_lv in new_lvs:
10204 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10207 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10208 hint=("cleanup manually the unused logical"
10210 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10212 cstep = itertools.count(5)
10214 if self.early_release:
10215 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10216 self._RemoveOldStorage(self.target_node, iv_names)
10217 # TODO: Check if releasing locks early still makes sense
10218 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10220 # Release all resource locks except those used by the instance
10221 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10222 keep=self.node_secondary_ip.keys())
10224 # Release all node locks while waiting for sync
10225 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10227 # TODO: Can the instance lock be downgraded here? Take the optional disk
10228 # shutdown in the caller into consideration.
10231 # This can fail as the old devices are degraded and _WaitForSync
10232 # does a combined result over all disks, so we don't check its return value
10233 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10234 _WaitForSync(self.lu, self.instance)
10236 # Check all devices manually
10237 self._CheckDevices(self.instance.primary_node, iv_names)
10239 # Step: remove old storage
10240 if not self.early_release:
10241 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10242 self._RemoveOldStorage(self.target_node, iv_names)
10244 def _ExecDrbd8Secondary(self, feedback_fn):
10245 """Replace the secondary node for DRBD 8.
10247 The algorithm for replace is quite complicated:
10248 - for all disks of the instance:
10249 - create new LVs on the new node with same names
10250 - shutdown the drbd device on the old secondary
10251 - disconnect the drbd network on the primary
10252 - create the drbd device on the new secondary
10253 - network attach the drbd on the primary, using an artifice:
10254 the drbd code for Attach() will connect to the network if it
10255 finds a device which is connected to the good local disks but
10256 not network enabled
10257 - wait for sync across all devices
10258 - remove all disks from the old secondary
10260 Failures are not very well handled.
10265 pnode = self.instance.primary_node
10267 # Step: check device activation
10268 self.lu.LogStep(1, steps_total, "Check device existence")
10269 self._CheckDisksExistence([self.instance.primary_node])
10270 self._CheckVolumeGroup([self.instance.primary_node])
10272 # Step: check other node consistency
10273 self.lu.LogStep(2, steps_total, "Check peer consistency")
10274 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10276 # Step: create new storage
10277 self.lu.LogStep(3, steps_total, "Allocate new storage")
10278 for idx, dev in enumerate(self.instance.disks):
10279 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10280 (self.new_node, idx))
10281 # we pass force_create=True to force LVM creation
10282 for new_lv in dev.children:
10283 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10284 _GetInstanceInfoText(self.instance), False)
10286 # Step 4: dbrd minors and drbd setups changes
10287 # after this, we must manually remove the drbd minors on both the
10288 # error and the success paths
10289 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10290 minors = self.cfg.AllocateDRBDMinor([self.new_node
10291 for dev in self.instance.disks],
10292 self.instance.name)
10293 logging.debug("Allocated minors %r", minors)
10296 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10297 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10298 (self.new_node, idx))
10299 # create new devices on new_node; note that we create two IDs:
10300 # one without port, so the drbd will be activated without
10301 # networking information on the new node at this stage, and one
10302 # with network, for the latter activation in step 4
10303 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10304 if self.instance.primary_node == o_node1:
10307 assert self.instance.primary_node == o_node2, "Three-node instance?"
10310 new_alone_id = (self.instance.primary_node, self.new_node, None,
10311 p_minor, new_minor, o_secret)
10312 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10313 p_minor, new_minor, o_secret)
10315 iv_names[idx] = (dev, dev.children, new_net_id)
10316 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10318 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10319 logical_id=new_alone_id,
10320 children=dev.children,
10323 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10324 _GetInstanceInfoText(self.instance), False)
10325 except errors.GenericError:
10326 self.cfg.ReleaseDRBDMinors(self.instance.name)
10329 # We have new devices, shutdown the drbd on the old secondary
10330 for idx, dev in enumerate(self.instance.disks):
10331 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10332 self.cfg.SetDiskID(dev, self.target_node)
10333 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10335 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10336 "node: %s" % (idx, msg),
10337 hint=("Please cleanup this device manually as"
10338 " soon as possible"))
10340 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10341 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10342 self.instance.disks)[pnode]
10344 msg = result.fail_msg
10346 # detaches didn't succeed (unlikely)
10347 self.cfg.ReleaseDRBDMinors(self.instance.name)
10348 raise errors.OpExecError("Can't detach the disks from the network on"
10349 " old node: %s" % (msg,))
10351 # if we managed to detach at least one, we update all the disks of
10352 # the instance to point to the new secondary
10353 self.lu.LogInfo("Updating instance configuration")
10354 for dev, _, new_logical_id in iv_names.itervalues():
10355 dev.logical_id = new_logical_id
10356 self.cfg.SetDiskID(dev, self.instance.primary_node)
10358 self.cfg.Update(self.instance, feedback_fn)
10360 # Release all node locks (the configuration has been updated)
10361 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10363 # and now perform the drbd attach
10364 self.lu.LogInfo("Attaching primary drbds to new secondary"
10365 " (standalone => connected)")
10366 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10368 self.node_secondary_ip,
10369 self.instance.disks,
10370 self.instance.name,
10372 for to_node, to_result in result.items():
10373 msg = to_result.fail_msg
10375 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10377 hint=("please do a gnt-instance info to see the"
10378 " status of disks"))
10380 cstep = itertools.count(5)
10382 if self.early_release:
10383 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10384 self._RemoveOldStorage(self.target_node, iv_names)
10385 # TODO: Check if releasing locks early still makes sense
10386 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10388 # Release all resource locks except those used by the instance
10389 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10390 keep=self.node_secondary_ip.keys())
10392 # TODO: Can the instance lock be downgraded here? Take the optional disk
10393 # shutdown in the caller into consideration.
10396 # This can fail as the old devices are degraded and _WaitForSync
10397 # does a combined result over all disks, so we don't check its return value
10398 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10399 _WaitForSync(self.lu, self.instance)
10401 # Check all devices manually
10402 self._CheckDevices(self.instance.primary_node, iv_names)
10404 # Step: remove old storage
10405 if not self.early_release:
10406 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10407 self._RemoveOldStorage(self.target_node, iv_names)
10410 class LURepairNodeStorage(NoHooksLU):
10411 """Repairs the volume group on a node.
10416 def CheckArguments(self):
10417 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10419 storage_type = self.op.storage_type
10421 if (constants.SO_FIX_CONSISTENCY not in
10422 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10423 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10424 " repaired" % storage_type,
10425 errors.ECODE_INVAL)
10427 def ExpandNames(self):
10428 self.needed_locks = {
10429 locking.LEVEL_NODE: [self.op.node_name],
10432 def _CheckFaultyDisks(self, instance, node_name):
10433 """Ensure faulty disks abort the opcode or at least warn."""
10435 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10437 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10438 " node '%s'" % (instance.name, node_name),
10439 errors.ECODE_STATE)
10440 except errors.OpPrereqError, err:
10441 if self.op.ignore_consistency:
10442 self.proc.LogWarning(str(err.args[0]))
10446 def CheckPrereq(self):
10447 """Check prerequisites.
10450 # Check whether any instance on this node has faulty disks
10451 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10452 if inst.admin_state != constants.ADMINST_UP:
10454 check_nodes = set(inst.all_nodes)
10455 check_nodes.discard(self.op.node_name)
10456 for inst_node_name in check_nodes:
10457 self._CheckFaultyDisks(inst, inst_node_name)
10459 def Exec(self, feedback_fn):
10460 feedback_fn("Repairing storage unit '%s' on %s ..." %
10461 (self.op.name, self.op.node_name))
10463 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10464 result = self.rpc.call_storage_execute(self.op.node_name,
10465 self.op.storage_type, st_args,
10467 constants.SO_FIX_CONSISTENCY)
10468 result.Raise("Failed to repair storage unit '%s' on %s" %
10469 (self.op.name, self.op.node_name))
10472 class LUNodeEvacuate(NoHooksLU):
10473 """Evacuates instances off a list of nodes.
10478 def CheckArguments(self):
10479 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10481 def ExpandNames(self):
10482 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10484 if self.op.remote_node is not None:
10485 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10486 assert self.op.remote_node
10488 if self.op.remote_node == self.op.node_name:
10489 raise errors.OpPrereqError("Can not use evacuated node as a new"
10490 " secondary node", errors.ECODE_INVAL)
10492 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10493 raise errors.OpPrereqError("Without the use of an iallocator only"
10494 " secondary instances can be evacuated",
10495 errors.ECODE_INVAL)
10498 self.share_locks = _ShareAll()
10499 self.needed_locks = {
10500 locking.LEVEL_INSTANCE: [],
10501 locking.LEVEL_NODEGROUP: [],
10502 locking.LEVEL_NODE: [],
10505 if self.op.remote_node is None:
10506 # Iallocator will choose any node(s) in the same group
10507 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10509 group_nodes = frozenset([self.op.remote_node])
10511 # Determine nodes to be locked
10512 self.lock_nodes = set([self.op.node_name]) | group_nodes
10514 def _DetermineInstances(self):
10515 """Builds list of instances to operate on.
10518 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10520 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10521 # Primary instances only
10522 inst_fn = _GetNodePrimaryInstances
10523 assert self.op.remote_node is None, \
10524 "Evacuating primary instances requires iallocator"
10525 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10526 # Secondary instances only
10527 inst_fn = _GetNodeSecondaryInstances
10530 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10531 inst_fn = _GetNodeInstances
10533 return inst_fn(self.cfg, self.op.node_name)
10535 def DeclareLocks(self, level):
10536 if level == locking.LEVEL_INSTANCE:
10537 # Lock instances optimistically, needs verification once node and group
10538 # locks have been acquired
10539 self.needed_locks[locking.LEVEL_INSTANCE] = \
10540 set(i.name for i in self._DetermineInstances())
10542 elif level == locking.LEVEL_NODEGROUP:
10543 # Lock node groups optimistically, needs verification once nodes have
10545 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10546 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10548 elif level == locking.LEVEL_NODE:
10549 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10551 def CheckPrereq(self):
10553 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10554 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10555 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10557 assert owned_nodes == self.lock_nodes
10559 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10560 if owned_groups != wanted_groups:
10561 raise errors.OpExecError("Node groups changed since locks were acquired,"
10562 " current groups are '%s', used to be '%s'" %
10563 (utils.CommaJoin(wanted_groups),
10564 utils.CommaJoin(owned_groups)))
10566 # Determine affected instances
10567 self.instances = self._DetermineInstances()
10568 self.instance_names = [i.name for i in self.instances]
10570 if set(self.instance_names) != owned_instances:
10571 raise errors.OpExecError("Instances on node '%s' changed since locks"
10572 " were acquired, current instances are '%s',"
10573 " used to be '%s'" %
10574 (self.op.node_name,
10575 utils.CommaJoin(self.instance_names),
10576 utils.CommaJoin(owned_instances)))
10578 if self.instance_names:
10579 self.LogInfo("Evacuating instances from node '%s': %s",
10581 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10583 self.LogInfo("No instances to evacuate from node '%s'",
10586 if self.op.remote_node is not None:
10587 for i in self.instances:
10588 if i.primary_node == self.op.remote_node:
10589 raise errors.OpPrereqError("Node %s is the primary node of"
10590 " instance %s, cannot use it as"
10592 (self.op.remote_node, i.name),
10593 errors.ECODE_INVAL)
10595 def Exec(self, feedback_fn):
10596 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10598 if not self.instance_names:
10599 # No instances to evacuate
10602 elif self.op.iallocator is not None:
10603 # TODO: Implement relocation to other group
10604 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10605 evac_mode=self.op.mode,
10606 instances=list(self.instance_names))
10608 ial.Run(self.op.iallocator)
10610 if not ial.success:
10611 raise errors.OpPrereqError("Can't compute node evacuation using"
10612 " iallocator '%s': %s" %
10613 (self.op.iallocator, ial.info),
10614 errors.ECODE_NORES)
10616 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10618 elif self.op.remote_node is not None:
10619 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10621 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10622 remote_node=self.op.remote_node,
10624 mode=constants.REPLACE_DISK_CHG,
10625 early_release=self.op.early_release)]
10626 for instance_name in self.instance_names
10630 raise errors.ProgrammerError("No iallocator or remote node")
10632 return ResultWithJobs(jobs)
10635 def _SetOpEarlyRelease(early_release, op):
10636 """Sets C{early_release} flag on opcodes if available.
10640 op.early_release = early_release
10641 except AttributeError:
10642 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10647 def _NodeEvacDest(use_nodes, group, nodes):
10648 """Returns group or nodes depending on caller's choice.
10652 return utils.CommaJoin(nodes)
10657 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10658 """Unpacks the result of change-group and node-evacuate iallocator requests.
10660 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10661 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10663 @type lu: L{LogicalUnit}
10664 @param lu: Logical unit instance
10665 @type alloc_result: tuple/list
10666 @param alloc_result: Result from iallocator
10667 @type early_release: bool
10668 @param early_release: Whether to release locks early if possible
10669 @type use_nodes: bool
10670 @param use_nodes: Whether to display node names instead of groups
10673 (moved, failed, jobs) = alloc_result
10676 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10677 for (name, reason) in failed)
10678 lu.LogWarning("Unable to evacuate instances %s", failreason)
10679 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10682 lu.LogInfo("Instances to be moved: %s",
10683 utils.CommaJoin("%s (to %s)" %
10684 (name, _NodeEvacDest(use_nodes, group, nodes))
10685 for (name, group, nodes) in moved))
10687 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10688 map(opcodes.OpCode.LoadOpCode, ops))
10692 class LUInstanceGrowDisk(LogicalUnit):
10693 """Grow a disk of an instance.
10696 HPATH = "disk-grow"
10697 HTYPE = constants.HTYPE_INSTANCE
10700 def ExpandNames(self):
10701 self._ExpandAndLockInstance()
10702 self.needed_locks[locking.LEVEL_NODE] = []
10703 self.needed_locks[locking.LEVEL_NODE_RES] = []
10704 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10706 def DeclareLocks(self, level):
10707 if level == locking.LEVEL_NODE:
10708 self._LockInstancesNodes()
10709 elif level == locking.LEVEL_NODE_RES:
10711 self.needed_locks[locking.LEVEL_NODE_RES] = \
10712 self.needed_locks[locking.LEVEL_NODE][:]
10714 def BuildHooksEnv(self):
10715 """Build hooks env.
10717 This runs on the master, the primary and all the secondaries.
10721 "DISK": self.op.disk,
10722 "AMOUNT": self.op.amount,
10724 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10727 def BuildHooksNodes(self):
10728 """Build hooks nodes.
10731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10734 def CheckPrereq(self):
10735 """Check prerequisites.
10737 This checks that the instance is in the cluster.
10740 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10741 assert instance is not None, \
10742 "Cannot retrieve locked instance %s" % self.op.instance_name
10743 nodenames = list(instance.all_nodes)
10744 for node in nodenames:
10745 _CheckNodeOnline(self, node)
10747 self.instance = instance
10749 if instance.disk_template not in constants.DTS_GROWABLE:
10750 raise errors.OpPrereqError("Instance's disk layout does not support"
10751 " growing", errors.ECODE_INVAL)
10753 self.disk = instance.FindDisk(self.op.disk)
10755 if instance.disk_template not in (constants.DT_FILE,
10756 constants.DT_SHARED_FILE):
10757 # TODO: check the free disk space for file, when that feature will be
10759 _CheckNodesFreeDiskPerVG(self, nodenames,
10760 self.disk.ComputeGrowth(self.op.amount))
10762 def Exec(self, feedback_fn):
10763 """Execute disk grow.
10766 instance = self.instance
10769 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10770 assert (self.owned_locks(locking.LEVEL_NODE) ==
10771 self.owned_locks(locking.LEVEL_NODE_RES))
10773 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10775 raise errors.OpExecError("Cannot activate block device to grow")
10777 feedback_fn("Growing disk %s of instance '%s' by %s" %
10778 (self.op.disk, instance.name,
10779 utils.FormatUnit(self.op.amount, "h")))
10781 # First run all grow ops in dry-run mode
10782 for node in instance.all_nodes:
10783 self.cfg.SetDiskID(disk, node)
10784 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10785 result.Raise("Grow request failed to node %s" % node)
10787 # We know that (as far as we can test) operations across different
10788 # nodes will succeed, time to run it for real
10789 for node in instance.all_nodes:
10790 self.cfg.SetDiskID(disk, node)
10791 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10792 result.Raise("Grow request failed to node %s" % node)
10794 # TODO: Rewrite code to work properly
10795 # DRBD goes into sync mode for a short amount of time after executing the
10796 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10797 # calling "resize" in sync mode fails. Sleeping for a short amount of
10798 # time is a work-around.
10801 disk.RecordGrow(self.op.amount)
10802 self.cfg.Update(instance, feedback_fn)
10804 # Changes have been recorded, release node lock
10805 _ReleaseLocks(self, locking.LEVEL_NODE)
10807 # Downgrade lock while waiting for sync
10808 self.glm.downgrade(locking.LEVEL_INSTANCE)
10810 if self.op.wait_for_sync:
10811 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10813 self.proc.LogWarning("Disk sync-ing has not returned a good"
10814 " status; please check the instance")
10815 if instance.admin_state != constants.ADMINST_UP:
10816 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10817 elif instance.admin_state != constants.ADMINST_UP:
10818 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10819 " not supposed to be running because no wait for"
10820 " sync mode was requested")
10822 assert self.owned_locks(locking.LEVEL_NODE_RES)
10823 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10826 class LUInstanceQueryData(NoHooksLU):
10827 """Query runtime instance data.
10832 def ExpandNames(self):
10833 self.needed_locks = {}
10835 # Use locking if requested or when non-static information is wanted
10836 if not (self.op.static or self.op.use_locking):
10837 self.LogWarning("Non-static data requested, locks need to be acquired")
10838 self.op.use_locking = True
10840 if self.op.instances or not self.op.use_locking:
10841 # Expand instance names right here
10842 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10844 # Will use acquired locks
10845 self.wanted_names = None
10847 if self.op.use_locking:
10848 self.share_locks = _ShareAll()
10850 if self.wanted_names is None:
10851 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10853 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10855 self.needed_locks[locking.LEVEL_NODE] = []
10856 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10858 def DeclareLocks(self, level):
10859 if self.op.use_locking and level == locking.LEVEL_NODE:
10860 self._LockInstancesNodes()
10862 def CheckPrereq(self):
10863 """Check prerequisites.
10865 This only checks the optional instance list against the existing names.
10868 if self.wanted_names is None:
10869 assert self.op.use_locking, "Locking was not used"
10870 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10872 self.wanted_instances = \
10873 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10875 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10876 """Returns the status of a block device
10879 if self.op.static or not node:
10882 self.cfg.SetDiskID(dev, node)
10884 result = self.rpc.call_blockdev_find(node, dev)
10888 result.Raise("Can't compute disk status for %s" % instance_name)
10890 status = result.payload
10894 return (status.dev_path, status.major, status.minor,
10895 status.sync_percent, status.estimated_time,
10896 status.is_degraded, status.ldisk_status)
10898 def _ComputeDiskStatus(self, instance, snode, dev):
10899 """Compute block device status.
10902 if dev.dev_type in constants.LDS_DRBD:
10903 # we change the snode then (otherwise we use the one passed in)
10904 if dev.logical_id[0] == instance.primary_node:
10905 snode = dev.logical_id[1]
10907 snode = dev.logical_id[0]
10909 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10910 instance.name, dev)
10911 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10914 dev_children = map(compat.partial(self._ComputeDiskStatus,
10921 "iv_name": dev.iv_name,
10922 "dev_type": dev.dev_type,
10923 "logical_id": dev.logical_id,
10924 "physical_id": dev.physical_id,
10925 "pstatus": dev_pstatus,
10926 "sstatus": dev_sstatus,
10927 "children": dev_children,
10932 def Exec(self, feedback_fn):
10933 """Gather and return data"""
10936 cluster = self.cfg.GetClusterInfo()
10938 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10939 for i in self.wanted_instances)
10940 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10941 if self.op.static or pnode.offline:
10942 remote_state = None
10944 self.LogWarning("Primary node %s is marked offline, returning static"
10945 " information only for instance %s" %
10946 (pnode.name, instance.name))
10948 remote_info = self.rpc.call_instance_info(instance.primary_node,
10950 instance.hypervisor)
10951 remote_info.Raise("Error checking node %s" % instance.primary_node)
10952 remote_info = remote_info.payload
10953 if remote_info and "state" in remote_info:
10954 remote_state = "up"
10956 if instance.admin_state == constants.ADMINST_UP:
10957 remote_state = "down"
10959 remote_state = instance.admin_state
10961 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10964 result[instance.name] = {
10965 "name": instance.name,
10966 "config_state": instance.admin_state,
10967 "run_state": remote_state,
10968 "pnode": instance.primary_node,
10969 "snodes": instance.secondary_nodes,
10971 # this happens to be the same format used for hooks
10972 "nics": _NICListToTuple(self, instance.nics),
10973 "disk_template": instance.disk_template,
10975 "hypervisor": instance.hypervisor,
10976 "network_port": instance.network_port,
10977 "hv_instance": instance.hvparams,
10978 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10979 "be_instance": instance.beparams,
10980 "be_actual": cluster.FillBE(instance),
10981 "os_instance": instance.osparams,
10982 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10983 "serial_no": instance.serial_no,
10984 "mtime": instance.mtime,
10985 "ctime": instance.ctime,
10986 "uuid": instance.uuid,
10992 class LUInstanceSetParams(LogicalUnit):
10993 """Modifies an instances's parameters.
10996 HPATH = "instance-modify"
10997 HTYPE = constants.HTYPE_INSTANCE
11000 def CheckArguments(self):
11001 if not (self.op.nics or self.op.disks or self.op.disk_template or
11002 self.op.hvparams or self.op.beparams or self.op.os_name or
11003 self.op.online_inst or self.op.offline_inst):
11004 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11006 if self.op.hvparams:
11007 _CheckGlobalHvParams(self.op.hvparams)
11011 for disk_op, disk_dict in self.op.disks:
11012 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11013 if disk_op == constants.DDM_REMOVE:
11014 disk_addremove += 1
11016 elif disk_op == constants.DDM_ADD:
11017 disk_addremove += 1
11019 if not isinstance(disk_op, int):
11020 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11021 if not isinstance(disk_dict, dict):
11022 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11023 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11025 if disk_op == constants.DDM_ADD:
11026 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11027 if mode not in constants.DISK_ACCESS_SET:
11028 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11029 errors.ECODE_INVAL)
11030 size = disk_dict.get(constants.IDISK_SIZE, None)
11032 raise errors.OpPrereqError("Required disk parameter size missing",
11033 errors.ECODE_INVAL)
11036 except (TypeError, ValueError), err:
11037 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11038 str(err), errors.ECODE_INVAL)
11039 disk_dict[constants.IDISK_SIZE] = size
11041 # modification of disk
11042 if constants.IDISK_SIZE in disk_dict:
11043 raise errors.OpPrereqError("Disk size change not possible, use"
11044 " grow-disk", errors.ECODE_INVAL)
11046 if disk_addremove > 1:
11047 raise errors.OpPrereqError("Only one disk add or remove operation"
11048 " supported at a time", errors.ECODE_INVAL)
11050 if self.op.disks and self.op.disk_template is not None:
11051 raise errors.OpPrereqError("Disk template conversion and other disk"
11052 " changes not supported at the same time",
11053 errors.ECODE_INVAL)
11055 if (self.op.disk_template and
11056 self.op.disk_template in constants.DTS_INT_MIRROR and
11057 self.op.remote_node is None):
11058 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11059 " one requires specifying a secondary node",
11060 errors.ECODE_INVAL)
11064 for nic_op, nic_dict in self.op.nics:
11065 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11066 if nic_op == constants.DDM_REMOVE:
11069 elif nic_op == constants.DDM_ADD:
11072 if not isinstance(nic_op, int):
11073 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11074 if not isinstance(nic_dict, dict):
11075 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11076 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11078 # nic_dict should be a dict
11079 nic_ip = nic_dict.get(constants.INIC_IP, None)
11080 if nic_ip is not None:
11081 if nic_ip.lower() == constants.VALUE_NONE:
11082 nic_dict[constants.INIC_IP] = None
11084 if not netutils.IPAddress.IsValid(nic_ip):
11085 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11086 errors.ECODE_INVAL)
11088 nic_bridge = nic_dict.get("bridge", None)
11089 nic_link = nic_dict.get(constants.INIC_LINK, None)
11090 if nic_bridge and nic_link:
11091 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11092 " at the same time", errors.ECODE_INVAL)
11093 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11094 nic_dict["bridge"] = None
11095 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11096 nic_dict[constants.INIC_LINK] = None
11098 if nic_op == constants.DDM_ADD:
11099 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11100 if nic_mac is None:
11101 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11103 if constants.INIC_MAC in nic_dict:
11104 nic_mac = nic_dict[constants.INIC_MAC]
11105 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11106 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11108 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11109 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11110 " modifying an existing nic",
11111 errors.ECODE_INVAL)
11113 if nic_addremove > 1:
11114 raise errors.OpPrereqError("Only one NIC add or remove operation"
11115 " supported at a time", errors.ECODE_INVAL)
11117 def ExpandNames(self):
11118 self._ExpandAndLockInstance()
11119 # Can't even acquire node locks in shared mode as upcoming changes in
11120 # Ganeti 2.6 will start to modify the node object on disk conversion
11121 self.needed_locks[locking.LEVEL_NODE] = []
11122 self.needed_locks[locking.LEVEL_NODE_RES] = []
11123 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11125 def DeclareLocks(self, level):
11126 if level == locking.LEVEL_NODE:
11127 self._LockInstancesNodes()
11128 if self.op.disk_template and self.op.remote_node:
11129 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11130 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11131 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11133 self.needed_locks[locking.LEVEL_NODE_RES] = \
11134 self.needed_locks[locking.LEVEL_NODE][:]
11136 def BuildHooksEnv(self):
11137 """Build hooks env.
11139 This runs on the master, primary and secondaries.
11143 if constants.BE_MEMORY in self.be_new:
11144 args["memory"] = self.be_new[constants.BE_MEMORY]
11145 if constants.BE_VCPUS in self.be_new:
11146 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11147 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11148 # information at all.
11151 nic_override = dict(self.op.nics)
11152 for idx, nic in enumerate(self.instance.nics):
11153 if idx in nic_override:
11154 this_nic_override = nic_override[idx]
11156 this_nic_override = {}
11157 if constants.INIC_IP in this_nic_override:
11158 ip = this_nic_override[constants.INIC_IP]
11161 if constants.INIC_MAC in this_nic_override:
11162 mac = this_nic_override[constants.INIC_MAC]
11165 if idx in self.nic_pnew:
11166 nicparams = self.nic_pnew[idx]
11168 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11169 mode = nicparams[constants.NIC_MODE]
11170 link = nicparams[constants.NIC_LINK]
11171 args["nics"].append((ip, mac, mode, link))
11172 if constants.DDM_ADD in nic_override:
11173 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11174 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11175 nicparams = self.nic_pnew[constants.DDM_ADD]
11176 mode = nicparams[constants.NIC_MODE]
11177 link = nicparams[constants.NIC_LINK]
11178 args["nics"].append((ip, mac, mode, link))
11179 elif constants.DDM_REMOVE in nic_override:
11180 del args["nics"][-1]
11182 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11183 if self.op.disk_template:
11184 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11188 def BuildHooksNodes(self):
11189 """Build hooks nodes.
11192 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11195 def CheckPrereq(self):
11196 """Check prerequisites.
11198 This only checks the instance list against the existing names.
11201 # checking the new params on the primary/secondary nodes
11203 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11204 cluster = self.cluster = self.cfg.GetClusterInfo()
11205 assert self.instance is not None, \
11206 "Cannot retrieve locked instance %s" % self.op.instance_name
11207 pnode = instance.primary_node
11208 nodelist = list(instance.all_nodes)
11211 if self.op.os_name and not self.op.force:
11212 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11213 self.op.force_variant)
11214 instance_os = self.op.os_name
11216 instance_os = instance.os
11218 if self.op.disk_template:
11219 if instance.disk_template == self.op.disk_template:
11220 raise errors.OpPrereqError("Instance already has disk template %s" %
11221 instance.disk_template, errors.ECODE_INVAL)
11223 if (instance.disk_template,
11224 self.op.disk_template) not in self._DISK_CONVERSIONS:
11225 raise errors.OpPrereqError("Unsupported disk template conversion from"
11226 " %s to %s" % (instance.disk_template,
11227 self.op.disk_template),
11228 errors.ECODE_INVAL)
11229 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11230 msg="cannot change disk template")
11231 if self.op.disk_template in constants.DTS_INT_MIRROR:
11232 if self.op.remote_node == pnode:
11233 raise errors.OpPrereqError("Given new secondary node %s is the same"
11234 " as the primary node of the instance" %
11235 self.op.remote_node, errors.ECODE_STATE)
11236 _CheckNodeOnline(self, self.op.remote_node)
11237 _CheckNodeNotDrained(self, self.op.remote_node)
11238 # FIXME: here we assume that the old instance type is DT_PLAIN
11239 assert instance.disk_template == constants.DT_PLAIN
11240 disks = [{constants.IDISK_SIZE: d.size,
11241 constants.IDISK_VG: d.logical_id[0]}
11242 for d in instance.disks]
11243 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11244 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11246 # hvparams processing
11247 if self.op.hvparams:
11248 hv_type = instance.hypervisor
11249 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11250 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11251 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11254 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11255 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11256 self.hv_proposed = self.hv_new = hv_new # the new actual values
11257 self.hv_inst = i_hvdict # the new dict (without defaults)
11259 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11261 self.hv_new = self.hv_inst = {}
11263 # beparams processing
11264 if self.op.beparams:
11265 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11267 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11268 be_new = cluster.SimpleFillBE(i_bedict)
11269 self.be_proposed = self.be_new = be_new # the new actual values
11270 self.be_inst = i_bedict # the new dict (without defaults)
11272 self.be_new = self.be_inst = {}
11273 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11274 be_old = cluster.FillBE(instance)
11276 # CPU param validation -- checking every time a paramtere is
11277 # changed to cover all cases where either CPU mask or vcpus have
11279 if (constants.BE_VCPUS in self.be_proposed and
11280 constants.HV_CPU_MASK in self.hv_proposed):
11282 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11283 # Verify mask is consistent with number of vCPUs. Can skip this
11284 # test if only 1 entry in the CPU mask, which means same mask
11285 # is applied to all vCPUs.
11286 if (len(cpu_list) > 1 and
11287 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11288 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11290 (self.be_proposed[constants.BE_VCPUS],
11291 self.hv_proposed[constants.HV_CPU_MASK]),
11292 errors.ECODE_INVAL)
11294 # Only perform this test if a new CPU mask is given
11295 if constants.HV_CPU_MASK in self.hv_new:
11296 # Calculate the largest CPU number requested
11297 max_requested_cpu = max(map(max, cpu_list))
11298 # Check that all of the instance's nodes have enough physical CPUs to
11299 # satisfy the requested CPU mask
11300 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11301 max_requested_cpu + 1, instance.hypervisor)
11303 # osparams processing
11304 if self.op.osparams:
11305 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11306 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11307 self.os_inst = i_osdict # the new dict (without defaults)
11313 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11314 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11315 mem_check_list = [pnode]
11316 if be_new[constants.BE_AUTO_BALANCE]:
11317 # either we changed auto_balance to yes or it was from before
11318 mem_check_list.extend(instance.secondary_nodes)
11319 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11320 instance.hypervisor)
11321 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11322 instance.hypervisor)
11323 pninfo = nodeinfo[pnode]
11324 msg = pninfo.fail_msg
11326 # Assume the primary node is unreachable and go ahead
11327 self.warn.append("Can't get info from primary node %s: %s" %
11329 elif not isinstance(pninfo.payload.get("memory_free", None), int):
11330 self.warn.append("Node data from primary node %s doesn't contain"
11331 " free memory information" % pnode)
11332 elif instance_info.fail_msg:
11333 self.warn.append("Can't get instance runtime information: %s" %
11334 instance_info.fail_msg)
11336 if instance_info.payload:
11337 current_mem = int(instance_info.payload["memory"])
11339 # Assume instance not running
11340 # (there is a slight race condition here, but it's not very probable,
11341 # and we have no other way to check)
11343 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11344 pninfo.payload["memory_free"])
11346 raise errors.OpPrereqError("This change will prevent the instance"
11347 " from starting, due to %d MB of memory"
11348 " missing on its primary node" % miss_mem,
11349 errors.ECODE_NORES)
11351 if be_new[constants.BE_AUTO_BALANCE]:
11352 for node, nres in nodeinfo.items():
11353 if node not in instance.secondary_nodes:
11355 nres.Raise("Can't get info from secondary node %s" % node,
11356 prereq=True, ecode=errors.ECODE_STATE)
11357 if not isinstance(nres.payload.get("memory_free", None), int):
11358 raise errors.OpPrereqError("Secondary node %s didn't return free"
11359 " memory information" % node,
11360 errors.ECODE_STATE)
11361 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11362 raise errors.OpPrereqError("This change will prevent the instance"
11363 " from failover to its secondary node"
11364 " %s, due to not enough memory" % node,
11365 errors.ECODE_STATE)
11369 self.nic_pinst = {}
11370 for nic_op, nic_dict in self.op.nics:
11371 if nic_op == constants.DDM_REMOVE:
11372 if not instance.nics:
11373 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11374 errors.ECODE_INVAL)
11376 if nic_op != constants.DDM_ADD:
11378 if not instance.nics:
11379 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11380 " no NICs" % nic_op,
11381 errors.ECODE_INVAL)
11382 if nic_op < 0 or nic_op >= len(instance.nics):
11383 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11385 (nic_op, len(instance.nics) - 1),
11386 errors.ECODE_INVAL)
11387 old_nic_params = instance.nics[nic_op].nicparams
11388 old_nic_ip = instance.nics[nic_op].ip
11390 old_nic_params = {}
11393 update_params_dict = dict([(key, nic_dict[key])
11394 for key in constants.NICS_PARAMETERS
11395 if key in nic_dict])
11397 if "bridge" in nic_dict:
11398 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11400 new_nic_params = _GetUpdatedParams(old_nic_params,
11401 update_params_dict)
11402 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11403 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11404 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11405 self.nic_pinst[nic_op] = new_nic_params
11406 self.nic_pnew[nic_op] = new_filled_nic_params
11407 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11409 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11410 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11411 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11413 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11415 self.warn.append(msg)
11417 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11418 if new_nic_mode == constants.NIC_MODE_ROUTED:
11419 if constants.INIC_IP in nic_dict:
11420 nic_ip = nic_dict[constants.INIC_IP]
11422 nic_ip = old_nic_ip
11424 raise errors.OpPrereqError("Cannot set the nic ip to None"
11425 " on a routed nic", errors.ECODE_INVAL)
11426 if constants.INIC_MAC in nic_dict:
11427 nic_mac = nic_dict[constants.INIC_MAC]
11428 if nic_mac is None:
11429 raise errors.OpPrereqError("Cannot set the nic mac to None",
11430 errors.ECODE_INVAL)
11431 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11432 # otherwise generate the mac
11433 nic_dict[constants.INIC_MAC] = \
11434 self.cfg.GenerateMAC(self.proc.GetECId())
11436 # or validate/reserve the current one
11438 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11439 except errors.ReservationError:
11440 raise errors.OpPrereqError("MAC address %s already in use"
11441 " in cluster" % nic_mac,
11442 errors.ECODE_NOTUNIQUE)
11445 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11446 raise errors.OpPrereqError("Disk operations not supported for"
11447 " diskless instances",
11448 errors.ECODE_INVAL)
11449 for disk_op, _ in self.op.disks:
11450 if disk_op == constants.DDM_REMOVE:
11451 if len(instance.disks) == 1:
11452 raise errors.OpPrereqError("Cannot remove the last disk of"
11453 " an instance", errors.ECODE_INVAL)
11454 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11455 msg="cannot remove disks")
11457 if (disk_op == constants.DDM_ADD and
11458 len(instance.disks) >= constants.MAX_DISKS):
11459 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11460 " add more" % constants.MAX_DISKS,
11461 errors.ECODE_STATE)
11462 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11464 if disk_op < 0 or disk_op >= len(instance.disks):
11465 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11467 (disk_op, len(instance.disks)),
11468 errors.ECODE_INVAL)
11470 # disabling the instance
11471 if self.op.offline_inst:
11472 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11473 msg="cannot change instance state to offline")
11475 # enabling the instance
11476 if self.op.online_inst:
11477 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11478 msg="cannot make instance go online")
11480 def _ConvertPlainToDrbd(self, feedback_fn):
11481 """Converts an instance from plain to drbd.
11484 feedback_fn("Converting template to drbd")
11485 instance = self.instance
11486 pnode = instance.primary_node
11487 snode = self.op.remote_node
11489 assert instance.disk_template == constants.DT_PLAIN
11491 # create a fake disk info for _GenerateDiskTemplate
11492 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11493 constants.IDISK_VG: d.logical_id[0]}
11494 for d in instance.disks]
11495 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11496 instance.name, pnode, [snode],
11497 disk_info, None, None, 0, feedback_fn)
11498 info = _GetInstanceInfoText(instance)
11499 feedback_fn("Creating aditional volumes...")
11500 # first, create the missing data and meta devices
11501 for disk in new_disks:
11502 # unfortunately this is... not too nice
11503 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11505 for child in disk.children:
11506 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11507 # at this stage, all new LVs have been created, we can rename the
11509 feedback_fn("Renaming original volumes...")
11510 rename_list = [(o, n.children[0].logical_id)
11511 for (o, n) in zip(instance.disks, new_disks)]
11512 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11513 result.Raise("Failed to rename original LVs")
11515 feedback_fn("Initializing DRBD devices...")
11516 # all child devices are in place, we can now create the DRBD devices
11517 for disk in new_disks:
11518 for node in [pnode, snode]:
11519 f_create = node == pnode
11520 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11522 # at this point, the instance has been modified
11523 instance.disk_template = constants.DT_DRBD8
11524 instance.disks = new_disks
11525 self.cfg.Update(instance, feedback_fn)
11527 # Release node locks while waiting for sync
11528 _ReleaseLocks(self, locking.LEVEL_NODE)
11530 # disks are created, waiting for sync
11531 disk_abort = not _WaitForSync(self, instance,
11532 oneshot=not self.op.wait_for_sync)
11534 raise errors.OpExecError("There are some degraded disks for"
11535 " this instance, please cleanup manually")
11537 # Node resource locks will be released by caller
11539 def _ConvertDrbdToPlain(self, feedback_fn):
11540 """Converts an instance from drbd to plain.
11543 instance = self.instance
11545 assert len(instance.secondary_nodes) == 1
11546 assert instance.disk_template == constants.DT_DRBD8
11548 pnode = instance.primary_node
11549 snode = instance.secondary_nodes[0]
11550 feedback_fn("Converting template to plain")
11552 old_disks = instance.disks
11553 new_disks = [d.children[0] for d in old_disks]
11555 # copy over size and mode
11556 for parent, child in zip(old_disks, new_disks):
11557 child.size = parent.size
11558 child.mode = parent.mode
11560 # update instance structure
11561 instance.disks = new_disks
11562 instance.disk_template = constants.DT_PLAIN
11563 self.cfg.Update(instance, feedback_fn)
11565 # Release locks in case removing disks takes a while
11566 _ReleaseLocks(self, locking.LEVEL_NODE)
11568 feedback_fn("Removing volumes on the secondary node...")
11569 for disk in old_disks:
11570 self.cfg.SetDiskID(disk, snode)
11571 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11573 self.LogWarning("Could not remove block device %s on node %s,"
11574 " continuing anyway: %s", disk.iv_name, snode, msg)
11576 feedback_fn("Removing unneeded volumes on the primary node...")
11577 for idx, disk in enumerate(old_disks):
11578 meta = disk.children[1]
11579 self.cfg.SetDiskID(meta, pnode)
11580 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11582 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11583 " continuing anyway: %s", idx, pnode, msg)
11585 # this is a DRBD disk, return its port to the pool
11586 for disk in old_disks:
11587 tcp_port = disk.logical_id[2]
11588 self.cfg.AddTcpUdpPort(tcp_port)
11590 # Node resource locks will be released by caller
11592 def Exec(self, feedback_fn):
11593 """Modifies an instance.
11595 All parameters take effect only at the next restart of the instance.
11598 # Process here the warnings from CheckPrereq, as we don't have a
11599 # feedback_fn there.
11600 for warn in self.warn:
11601 feedback_fn("WARNING: %s" % warn)
11603 assert ((self.op.disk_template is None) ^
11604 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11605 "Not owning any node resource locks"
11608 instance = self.instance
11610 for disk_op, disk_dict in self.op.disks:
11611 if disk_op == constants.DDM_REMOVE:
11612 # remove the last disk
11613 device = instance.disks.pop()
11614 device_idx = len(instance.disks)
11615 for node, disk in device.ComputeNodeTree(instance.primary_node):
11616 self.cfg.SetDiskID(disk, node)
11617 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11619 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11620 " continuing anyway", device_idx, node, msg)
11621 result.append(("disk/%d" % device_idx, "remove"))
11623 # if this is a DRBD disk, return its port to the pool
11624 if device.dev_type in constants.LDS_DRBD:
11625 tcp_port = device.logical_id[2]
11626 self.cfg.AddTcpUdpPort(tcp_port)
11627 elif disk_op == constants.DDM_ADD:
11629 if instance.disk_template in (constants.DT_FILE,
11630 constants.DT_SHARED_FILE):
11631 file_driver, file_path = instance.disks[0].logical_id
11632 file_path = os.path.dirname(file_path)
11634 file_driver = file_path = None
11635 disk_idx_base = len(instance.disks)
11636 new_disk = _GenerateDiskTemplate(self,
11637 instance.disk_template,
11638 instance.name, instance.primary_node,
11639 instance.secondary_nodes,
11643 disk_idx_base, feedback_fn)[0]
11644 instance.disks.append(new_disk)
11645 info = _GetInstanceInfoText(instance)
11647 logging.info("Creating volume %s for instance %s",
11648 new_disk.iv_name, instance.name)
11649 # Note: this needs to be kept in sync with _CreateDisks
11651 for node in instance.all_nodes:
11652 f_create = node == instance.primary_node
11654 _CreateBlockDev(self, node, instance, new_disk,
11655 f_create, info, f_create)
11656 except errors.OpExecError, err:
11657 self.LogWarning("Failed to create volume %s (%s) on"
11659 new_disk.iv_name, new_disk, node, err)
11660 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11661 (new_disk.size, new_disk.mode)))
11663 # change a given disk
11664 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11665 result.append(("disk.mode/%d" % disk_op,
11666 disk_dict[constants.IDISK_MODE]))
11668 if self.op.disk_template:
11670 check_nodes = set(instance.all_nodes)
11671 if self.op.remote_node:
11672 check_nodes.add(self.op.remote_node)
11673 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11674 owned = self.owned_locks(level)
11675 assert not (check_nodes - owned), \
11676 ("Not owning the correct locks, owning %r, expected at least %r" %
11677 (owned, check_nodes))
11679 r_shut = _ShutdownInstanceDisks(self, instance)
11681 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11682 " proceed with disk template conversion")
11683 mode = (instance.disk_template, self.op.disk_template)
11685 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11687 self.cfg.ReleaseDRBDMinors(instance.name)
11689 result.append(("disk_template", self.op.disk_template))
11691 assert instance.disk_template == self.op.disk_template, \
11692 ("Expected disk template '%s', found '%s'" %
11693 (self.op.disk_template, instance.disk_template))
11695 # Release node and resource locks if there are any (they might already have
11696 # been released during disk conversion)
11697 _ReleaseLocks(self, locking.LEVEL_NODE)
11698 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11701 for nic_op, nic_dict in self.op.nics:
11702 if nic_op == constants.DDM_REMOVE:
11703 # remove the last nic
11704 del instance.nics[-1]
11705 result.append(("nic.%d" % len(instance.nics), "remove"))
11706 elif nic_op == constants.DDM_ADD:
11707 # mac and bridge should be set, by now
11708 mac = nic_dict[constants.INIC_MAC]
11709 ip = nic_dict.get(constants.INIC_IP, None)
11710 nicparams = self.nic_pinst[constants.DDM_ADD]
11711 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11712 instance.nics.append(new_nic)
11713 result.append(("nic.%d" % (len(instance.nics) - 1),
11714 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11715 (new_nic.mac, new_nic.ip,
11716 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11717 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11720 for key in (constants.INIC_MAC, constants.INIC_IP):
11721 if key in nic_dict:
11722 setattr(instance.nics[nic_op], key, nic_dict[key])
11723 if nic_op in self.nic_pinst:
11724 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11725 for key, val in nic_dict.iteritems():
11726 result.append(("nic.%s/%d" % (key, nic_op), val))
11729 if self.op.hvparams:
11730 instance.hvparams = self.hv_inst
11731 for key, val in self.op.hvparams.iteritems():
11732 result.append(("hv/%s" % key, val))
11735 if self.op.beparams:
11736 instance.beparams = self.be_inst
11737 for key, val in self.op.beparams.iteritems():
11738 result.append(("be/%s" % key, val))
11741 if self.op.os_name:
11742 instance.os = self.op.os_name
11745 if self.op.osparams:
11746 instance.osparams = self.os_inst
11747 for key, val in self.op.osparams.iteritems():
11748 result.append(("os/%s" % key, val))
11750 # online/offline instance
11751 if self.op.online_inst:
11752 self.cfg.MarkInstanceDown(instance.name)
11753 result.append(("admin_state", constants.ADMINST_DOWN))
11754 if self.op.offline_inst:
11755 self.cfg.MarkInstanceOffline(instance.name)
11756 result.append(("admin_state", constants.ADMINST_OFFLINE))
11758 self.cfg.Update(instance, feedback_fn)
11760 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11761 self.owned_locks(locking.LEVEL_NODE)), \
11762 "All node locks should have been released by now"
11766 _DISK_CONVERSIONS = {
11767 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11768 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11772 class LUInstanceChangeGroup(LogicalUnit):
11773 HPATH = "instance-change-group"
11774 HTYPE = constants.HTYPE_INSTANCE
11777 def ExpandNames(self):
11778 self.share_locks = _ShareAll()
11779 self.needed_locks = {
11780 locking.LEVEL_NODEGROUP: [],
11781 locking.LEVEL_NODE: [],
11784 self._ExpandAndLockInstance()
11786 if self.op.target_groups:
11787 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11788 self.op.target_groups)
11790 self.req_target_uuids = None
11792 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11794 def DeclareLocks(self, level):
11795 if level == locking.LEVEL_NODEGROUP:
11796 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11798 if self.req_target_uuids:
11799 lock_groups = set(self.req_target_uuids)
11801 # Lock all groups used by instance optimistically; this requires going
11802 # via the node before it's locked, requiring verification later on
11803 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11804 lock_groups.update(instance_groups)
11806 # No target groups, need to lock all of them
11807 lock_groups = locking.ALL_SET
11809 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11811 elif level == locking.LEVEL_NODE:
11812 if self.req_target_uuids:
11813 # Lock all nodes used by instances
11814 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11815 self._LockInstancesNodes()
11817 # Lock all nodes in all potential target groups
11818 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11819 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11820 member_nodes = [node_name
11821 for group in lock_groups
11822 for node_name in self.cfg.GetNodeGroup(group).members]
11823 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11825 # Lock all nodes as all groups are potential targets
11826 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11828 def CheckPrereq(self):
11829 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11830 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11831 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11833 assert (self.req_target_uuids is None or
11834 owned_groups.issuperset(self.req_target_uuids))
11835 assert owned_instances == set([self.op.instance_name])
11837 # Get instance information
11838 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11840 # Check if node groups for locked instance are still correct
11841 assert owned_nodes.issuperset(self.instance.all_nodes), \
11842 ("Instance %s's nodes changed while we kept the lock" %
11843 self.op.instance_name)
11845 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11848 if self.req_target_uuids:
11849 # User requested specific target groups
11850 self.target_uuids = self.req_target_uuids
11852 # All groups except those used by the instance are potential targets
11853 self.target_uuids = owned_groups - inst_groups
11855 conflicting_groups = self.target_uuids & inst_groups
11856 if conflicting_groups:
11857 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11858 " used by the instance '%s'" %
11859 (utils.CommaJoin(conflicting_groups),
11860 self.op.instance_name),
11861 errors.ECODE_INVAL)
11863 if not self.target_uuids:
11864 raise errors.OpPrereqError("There are no possible target groups",
11865 errors.ECODE_INVAL)
11867 def BuildHooksEnv(self):
11868 """Build hooks env.
11871 assert self.target_uuids
11874 "TARGET_GROUPS": " ".join(self.target_uuids),
11877 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11881 def BuildHooksNodes(self):
11882 """Build hooks nodes.
11885 mn = self.cfg.GetMasterNode()
11886 return ([mn], [mn])
11888 def Exec(self, feedback_fn):
11889 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11891 assert instances == [self.op.instance_name], "Instance not locked"
11893 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11894 instances=instances, target_groups=list(self.target_uuids))
11896 ial.Run(self.op.iallocator)
11898 if not ial.success:
11899 raise errors.OpPrereqError("Can't compute solution for changing group of"
11900 " instance '%s' using iallocator '%s': %s" %
11901 (self.op.instance_name, self.op.iallocator,
11903 errors.ECODE_NORES)
11905 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11907 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11908 " instance '%s'", len(jobs), self.op.instance_name)
11910 return ResultWithJobs(jobs)
11913 class LUBackupQuery(NoHooksLU):
11914 """Query the exports list
11919 def ExpandNames(self):
11920 self.needed_locks = {}
11921 self.share_locks[locking.LEVEL_NODE] = 1
11922 if not self.op.nodes:
11923 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11925 self.needed_locks[locking.LEVEL_NODE] = \
11926 _GetWantedNodes(self, self.op.nodes)
11928 def Exec(self, feedback_fn):
11929 """Compute the list of all the exported system images.
11932 @return: a dictionary with the structure node->(export-list)
11933 where export-list is a list of the instances exported on
11937 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11938 rpcresult = self.rpc.call_export_list(self.nodes)
11940 for node in rpcresult:
11941 if rpcresult[node].fail_msg:
11942 result[node] = False
11944 result[node] = rpcresult[node].payload
11949 class LUBackupPrepare(NoHooksLU):
11950 """Prepares an instance for an export and returns useful information.
11955 def ExpandNames(self):
11956 self._ExpandAndLockInstance()
11958 def CheckPrereq(self):
11959 """Check prerequisites.
11962 instance_name = self.op.instance_name
11964 self.instance = self.cfg.GetInstanceInfo(instance_name)
11965 assert self.instance is not None, \
11966 "Cannot retrieve locked instance %s" % self.op.instance_name
11967 _CheckNodeOnline(self, self.instance.primary_node)
11969 self._cds = _GetClusterDomainSecret()
11971 def Exec(self, feedback_fn):
11972 """Prepares an instance for an export.
11975 instance = self.instance
11977 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11978 salt = utils.GenerateSecret(8)
11980 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11981 result = self.rpc.call_x509_cert_create(instance.primary_node,
11982 constants.RIE_CERT_VALIDITY)
11983 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11985 (name, cert_pem) = result.payload
11987 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11991 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11992 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11994 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12000 class LUBackupExport(LogicalUnit):
12001 """Export an instance to an image in the cluster.
12004 HPATH = "instance-export"
12005 HTYPE = constants.HTYPE_INSTANCE
12008 def CheckArguments(self):
12009 """Check the arguments.
12012 self.x509_key_name = self.op.x509_key_name
12013 self.dest_x509_ca_pem = self.op.destination_x509_ca
12015 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12016 if not self.x509_key_name:
12017 raise errors.OpPrereqError("Missing X509 key name for encryption",
12018 errors.ECODE_INVAL)
12020 if not self.dest_x509_ca_pem:
12021 raise errors.OpPrereqError("Missing destination X509 CA",
12022 errors.ECODE_INVAL)
12024 def ExpandNames(self):
12025 self._ExpandAndLockInstance()
12027 # Lock all nodes for local exports
12028 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12029 # FIXME: lock only instance primary and destination node
12031 # Sad but true, for now we have do lock all nodes, as we don't know where
12032 # the previous export might be, and in this LU we search for it and
12033 # remove it from its current node. In the future we could fix this by:
12034 # - making a tasklet to search (share-lock all), then create the
12035 # new one, then one to remove, after
12036 # - removing the removal operation altogether
12037 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12039 def DeclareLocks(self, level):
12040 """Last minute lock declaration."""
12041 # All nodes are locked anyway, so nothing to do here.
12043 def BuildHooksEnv(self):
12044 """Build hooks env.
12046 This will run on the master, primary node and target node.
12050 "EXPORT_MODE": self.op.mode,
12051 "EXPORT_NODE": self.op.target_node,
12052 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12053 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12054 # TODO: Generic function for boolean env variables
12055 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12058 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12062 def BuildHooksNodes(self):
12063 """Build hooks nodes.
12066 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12068 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12069 nl.append(self.op.target_node)
12073 def CheckPrereq(self):
12074 """Check prerequisites.
12076 This checks that the instance and node names are valid.
12079 instance_name = self.op.instance_name
12081 self.instance = self.cfg.GetInstanceInfo(instance_name)
12082 assert self.instance is not None, \
12083 "Cannot retrieve locked instance %s" % self.op.instance_name
12084 _CheckNodeOnline(self, self.instance.primary_node)
12086 if (self.op.remove_instance and
12087 self.instance.admin_state == constants.ADMINST_UP and
12088 not self.op.shutdown):
12089 raise errors.OpPrereqError("Can not remove instance without shutting it"
12092 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12093 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12094 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12095 assert self.dst_node is not None
12097 _CheckNodeOnline(self, self.dst_node.name)
12098 _CheckNodeNotDrained(self, self.dst_node.name)
12101 self.dest_disk_info = None
12102 self.dest_x509_ca = None
12104 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12105 self.dst_node = None
12107 if len(self.op.target_node) != len(self.instance.disks):
12108 raise errors.OpPrereqError(("Received destination information for %s"
12109 " disks, but instance %s has %s disks") %
12110 (len(self.op.target_node), instance_name,
12111 len(self.instance.disks)),
12112 errors.ECODE_INVAL)
12114 cds = _GetClusterDomainSecret()
12116 # Check X509 key name
12118 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12119 except (TypeError, ValueError), err:
12120 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12122 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12123 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12124 errors.ECODE_INVAL)
12126 # Load and verify CA
12128 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12129 except OpenSSL.crypto.Error, err:
12130 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12131 (err, ), errors.ECODE_INVAL)
12133 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12134 if errcode is not None:
12135 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12136 (msg, ), errors.ECODE_INVAL)
12138 self.dest_x509_ca = cert
12140 # Verify target information
12142 for idx, disk_data in enumerate(self.op.target_node):
12144 (host, port, magic) = \
12145 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12146 except errors.GenericError, err:
12147 raise errors.OpPrereqError("Target info for disk %s: %s" %
12148 (idx, err), errors.ECODE_INVAL)
12150 disk_info.append((host, port, magic))
12152 assert len(disk_info) == len(self.op.target_node)
12153 self.dest_disk_info = disk_info
12156 raise errors.ProgrammerError("Unhandled export mode %r" %
12159 # instance disk type verification
12160 # TODO: Implement export support for file-based disks
12161 for disk in self.instance.disks:
12162 if disk.dev_type == constants.LD_FILE:
12163 raise errors.OpPrereqError("Export not supported for instances with"
12164 " file-based disks", errors.ECODE_INVAL)
12166 def _CleanupExports(self, feedback_fn):
12167 """Removes exports of current instance from all other nodes.
12169 If an instance in a cluster with nodes A..D was exported to node C, its
12170 exports will be removed from the nodes A, B and D.
12173 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12175 nodelist = self.cfg.GetNodeList()
12176 nodelist.remove(self.dst_node.name)
12178 # on one-node clusters nodelist will be empty after the removal
12179 # if we proceed the backup would be removed because OpBackupQuery
12180 # substitutes an empty list with the full cluster node list.
12181 iname = self.instance.name
12183 feedback_fn("Removing old exports for instance %s" % iname)
12184 exportlist = self.rpc.call_export_list(nodelist)
12185 for node in exportlist:
12186 if exportlist[node].fail_msg:
12188 if iname in exportlist[node].payload:
12189 msg = self.rpc.call_export_remove(node, iname).fail_msg
12191 self.LogWarning("Could not remove older export for instance %s"
12192 " on node %s: %s", iname, node, msg)
12194 def Exec(self, feedback_fn):
12195 """Export an instance to an image in the cluster.
12198 assert self.op.mode in constants.EXPORT_MODES
12200 instance = self.instance
12201 src_node = instance.primary_node
12203 if self.op.shutdown:
12204 # shutdown the instance, but not the disks
12205 feedback_fn("Shutting down instance %s" % instance.name)
12206 result = self.rpc.call_instance_shutdown(src_node, instance,
12207 self.op.shutdown_timeout)
12208 # TODO: Maybe ignore failures if ignore_remove_failures is set
12209 result.Raise("Could not shutdown instance %s on"
12210 " node %s" % (instance.name, src_node))
12212 # set the disks ID correctly since call_instance_start needs the
12213 # correct drbd minor to create the symlinks
12214 for disk in instance.disks:
12215 self.cfg.SetDiskID(disk, src_node)
12217 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12220 # Activate the instance disks if we'exporting a stopped instance
12221 feedback_fn("Activating disks for %s" % instance.name)
12222 _StartInstanceDisks(self, instance, None)
12225 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12228 helper.CreateSnapshots()
12230 if (self.op.shutdown and
12231 instance.admin_state == constants.ADMINST_UP and
12232 not self.op.remove_instance):
12233 assert not activate_disks
12234 feedback_fn("Starting instance %s" % instance.name)
12235 result = self.rpc.call_instance_start(src_node,
12236 (instance, None, None), False)
12237 msg = result.fail_msg
12239 feedback_fn("Failed to start instance: %s" % msg)
12240 _ShutdownInstanceDisks(self, instance)
12241 raise errors.OpExecError("Could not start instance: %s" % msg)
12243 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12244 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12245 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12246 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12247 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12249 (key_name, _, _) = self.x509_key_name
12252 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12255 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12256 key_name, dest_ca_pem,
12261 # Check for backwards compatibility
12262 assert len(dresults) == len(instance.disks)
12263 assert compat.all(isinstance(i, bool) for i in dresults), \
12264 "Not all results are boolean: %r" % dresults
12268 feedback_fn("Deactivating disks for %s" % instance.name)
12269 _ShutdownInstanceDisks(self, instance)
12271 if not (compat.all(dresults) and fin_resu):
12274 failures.append("export finalization")
12275 if not compat.all(dresults):
12276 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12278 failures.append("disk export: disk(s) %s" % fdsk)
12280 raise errors.OpExecError("Export failed, errors in %s" %
12281 utils.CommaJoin(failures))
12283 # At this point, the export was successful, we can cleanup/finish
12285 # Remove instance if requested
12286 if self.op.remove_instance:
12287 feedback_fn("Removing instance %s" % instance.name)
12288 _RemoveInstance(self, feedback_fn, instance,
12289 self.op.ignore_remove_failures)
12291 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12292 self._CleanupExports(feedback_fn)
12294 return fin_resu, dresults
12297 class LUBackupRemove(NoHooksLU):
12298 """Remove exports related to the named instance.
12303 def ExpandNames(self):
12304 self.needed_locks = {}
12305 # We need all nodes to be locked in order for RemoveExport to work, but we
12306 # don't need to lock the instance itself, as nothing will happen to it (and
12307 # we can remove exports also for a removed instance)
12308 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12310 def Exec(self, feedback_fn):
12311 """Remove any export.
12314 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12315 # If the instance was not found we'll try with the name that was passed in.
12316 # This will only work if it was an FQDN, though.
12318 if not instance_name:
12320 instance_name = self.op.instance_name
12322 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12323 exportlist = self.rpc.call_export_list(locked_nodes)
12325 for node in exportlist:
12326 msg = exportlist[node].fail_msg
12328 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12330 if instance_name in exportlist[node].payload:
12332 result = self.rpc.call_export_remove(node, instance_name)
12333 msg = result.fail_msg
12335 logging.error("Could not remove export for instance %s"
12336 " on node %s: %s", instance_name, node, msg)
12338 if fqdn_warn and not found:
12339 feedback_fn("Export not found. If trying to remove an export belonging"
12340 " to a deleted instance please use its Fully Qualified"
12344 class LUGroupAdd(LogicalUnit):
12345 """Logical unit for creating node groups.
12348 HPATH = "group-add"
12349 HTYPE = constants.HTYPE_GROUP
12352 def ExpandNames(self):
12353 # We need the new group's UUID here so that we can create and acquire the
12354 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12355 # that it should not check whether the UUID exists in the configuration.
12356 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12357 self.needed_locks = {}
12358 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12360 def CheckPrereq(self):
12361 """Check prerequisites.
12363 This checks that the given group name is not an existing node group
12368 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12369 except errors.OpPrereqError:
12372 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12373 " node group (UUID: %s)" %
12374 (self.op.group_name, existing_uuid),
12375 errors.ECODE_EXISTS)
12377 if self.op.ndparams:
12378 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12380 def BuildHooksEnv(self):
12381 """Build hooks env.
12385 "GROUP_NAME": self.op.group_name,
12388 def BuildHooksNodes(self):
12389 """Build hooks nodes.
12392 mn = self.cfg.GetMasterNode()
12393 return ([mn], [mn])
12395 def Exec(self, feedback_fn):
12396 """Add the node group to the cluster.
12399 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12400 uuid=self.group_uuid,
12401 alloc_policy=self.op.alloc_policy,
12402 ndparams=self.op.ndparams)
12404 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12405 del self.remove_locks[locking.LEVEL_NODEGROUP]
12408 class LUGroupAssignNodes(NoHooksLU):
12409 """Logical unit for assigning nodes to groups.
12414 def ExpandNames(self):
12415 # These raise errors.OpPrereqError on their own:
12416 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12417 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12419 # We want to lock all the affected nodes and groups. We have readily
12420 # available the list of nodes, and the *destination* group. To gather the
12421 # list of "source" groups, we need to fetch node information later on.
12422 self.needed_locks = {
12423 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12424 locking.LEVEL_NODE: self.op.nodes,
12427 def DeclareLocks(self, level):
12428 if level == locking.LEVEL_NODEGROUP:
12429 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12431 # Try to get all affected nodes' groups without having the group or node
12432 # lock yet. Needs verification later in the code flow.
12433 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12435 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12437 def CheckPrereq(self):
12438 """Check prerequisites.
12441 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12442 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12443 frozenset(self.op.nodes))
12445 expected_locks = (set([self.group_uuid]) |
12446 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12447 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12448 if actual_locks != expected_locks:
12449 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12450 " current groups are '%s', used to be '%s'" %
12451 (utils.CommaJoin(expected_locks),
12452 utils.CommaJoin(actual_locks)))
12454 self.node_data = self.cfg.GetAllNodesInfo()
12455 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12456 instance_data = self.cfg.GetAllInstancesInfo()
12458 if self.group is None:
12459 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12460 (self.op.group_name, self.group_uuid))
12462 (new_splits, previous_splits) = \
12463 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12464 for node in self.op.nodes],
12465 self.node_data, instance_data)
12468 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12470 if not self.op.force:
12471 raise errors.OpExecError("The following instances get split by this"
12472 " change and --force was not given: %s" %
12475 self.LogWarning("This operation will split the following instances: %s",
12478 if previous_splits:
12479 self.LogWarning("In addition, these already-split instances continue"
12480 " to be split across groups: %s",
12481 utils.CommaJoin(utils.NiceSort(previous_splits)))
12483 def Exec(self, feedback_fn):
12484 """Assign nodes to a new group.
12487 for node in self.op.nodes:
12488 self.node_data[node].group = self.group_uuid
12490 # FIXME: Depends on side-effects of modifying the result of
12491 # C{cfg.GetAllNodesInfo}
12493 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12496 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12497 """Check for split instances after a node assignment.
12499 This method considers a series of node assignments as an atomic operation,
12500 and returns information about split instances after applying the set of
12503 In particular, it returns information about newly split instances, and
12504 instances that were already split, and remain so after the change.
12506 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12509 @type changes: list of (node_name, new_group_uuid) pairs.
12510 @param changes: list of node assignments to consider.
12511 @param node_data: a dict with data for all nodes
12512 @param instance_data: a dict with all instances to consider
12513 @rtype: a two-tuple
12514 @return: a list of instances that were previously okay and result split as a
12515 consequence of this change, and a list of instances that were previously
12516 split and this change does not fix.
12519 changed_nodes = dict((node, group) for node, group in changes
12520 if node_data[node].group != group)
12522 all_split_instances = set()
12523 previously_split_instances = set()
12525 def InstanceNodes(instance):
12526 return [instance.primary_node] + list(instance.secondary_nodes)
12528 for inst in instance_data.values():
12529 if inst.disk_template not in constants.DTS_INT_MIRROR:
12532 instance_nodes = InstanceNodes(inst)
12534 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12535 previously_split_instances.add(inst.name)
12537 if len(set(changed_nodes.get(node, node_data[node].group)
12538 for node in instance_nodes)) > 1:
12539 all_split_instances.add(inst.name)
12541 return (list(all_split_instances - previously_split_instances),
12542 list(previously_split_instances & all_split_instances))
12545 class _GroupQuery(_QueryBase):
12546 FIELDS = query.GROUP_FIELDS
12548 def ExpandNames(self, lu):
12549 lu.needed_locks = {}
12551 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12552 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12555 self.wanted = [name_to_uuid[name]
12556 for name in utils.NiceSort(name_to_uuid.keys())]
12558 # Accept names to be either names or UUIDs.
12561 all_uuid = frozenset(self._all_groups.keys())
12563 for name in self.names:
12564 if name in all_uuid:
12565 self.wanted.append(name)
12566 elif name in name_to_uuid:
12567 self.wanted.append(name_to_uuid[name])
12569 missing.append(name)
12572 raise errors.OpPrereqError("Some groups do not exist: %s" %
12573 utils.CommaJoin(missing),
12574 errors.ECODE_NOENT)
12576 def DeclareLocks(self, lu, level):
12579 def _GetQueryData(self, lu):
12580 """Computes the list of node groups and their attributes.
12583 do_nodes = query.GQ_NODE in self.requested_data
12584 do_instances = query.GQ_INST in self.requested_data
12586 group_to_nodes = None
12587 group_to_instances = None
12589 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12590 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12591 # latter GetAllInstancesInfo() is not enough, for we have to go through
12592 # instance->node. Hence, we will need to process nodes even if we only need
12593 # instance information.
12594 if do_nodes or do_instances:
12595 all_nodes = lu.cfg.GetAllNodesInfo()
12596 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12599 for node in all_nodes.values():
12600 if node.group in group_to_nodes:
12601 group_to_nodes[node.group].append(node.name)
12602 node_to_group[node.name] = node.group
12605 all_instances = lu.cfg.GetAllInstancesInfo()
12606 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12608 for instance in all_instances.values():
12609 node = instance.primary_node
12610 if node in node_to_group:
12611 group_to_instances[node_to_group[node]].append(instance.name)
12614 # Do not pass on node information if it was not requested.
12615 group_to_nodes = None
12617 return query.GroupQueryData([self._all_groups[uuid]
12618 for uuid in self.wanted],
12619 group_to_nodes, group_to_instances)
12622 class LUGroupQuery(NoHooksLU):
12623 """Logical unit for querying node groups.
12628 def CheckArguments(self):
12629 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12630 self.op.output_fields, False)
12632 def ExpandNames(self):
12633 self.gq.ExpandNames(self)
12635 def DeclareLocks(self, level):
12636 self.gq.DeclareLocks(self, level)
12638 def Exec(self, feedback_fn):
12639 return self.gq.OldStyleQuery(self)
12642 class LUGroupSetParams(LogicalUnit):
12643 """Modifies the parameters of a node group.
12646 HPATH = "group-modify"
12647 HTYPE = constants.HTYPE_GROUP
12650 def CheckArguments(self):
12653 self.op.alloc_policy,
12656 if all_changes.count(None) == len(all_changes):
12657 raise errors.OpPrereqError("Please pass at least one modification",
12658 errors.ECODE_INVAL)
12660 def ExpandNames(self):
12661 # This raises errors.OpPrereqError on its own:
12662 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12664 self.needed_locks = {
12665 locking.LEVEL_NODEGROUP: [self.group_uuid],
12668 def CheckPrereq(self):
12669 """Check prerequisites.
12672 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12674 if self.group is None:
12675 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12676 (self.op.group_name, self.group_uuid))
12678 if self.op.ndparams:
12679 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12680 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12681 self.new_ndparams = new_ndparams
12683 def BuildHooksEnv(self):
12684 """Build hooks env.
12688 "GROUP_NAME": self.op.group_name,
12689 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12692 def BuildHooksNodes(self):
12693 """Build hooks nodes.
12696 mn = self.cfg.GetMasterNode()
12697 return ([mn], [mn])
12699 def Exec(self, feedback_fn):
12700 """Modifies the node group.
12705 if self.op.ndparams:
12706 self.group.ndparams = self.new_ndparams
12707 result.append(("ndparams", str(self.group.ndparams)))
12709 if self.op.alloc_policy:
12710 self.group.alloc_policy = self.op.alloc_policy
12712 self.cfg.Update(self.group, feedback_fn)
12716 class LUGroupRemove(LogicalUnit):
12717 HPATH = "group-remove"
12718 HTYPE = constants.HTYPE_GROUP
12721 def ExpandNames(self):
12722 # This will raises errors.OpPrereqError on its own:
12723 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12724 self.needed_locks = {
12725 locking.LEVEL_NODEGROUP: [self.group_uuid],
12728 def CheckPrereq(self):
12729 """Check prerequisites.
12731 This checks that the given group name exists as a node group, that is
12732 empty (i.e., contains no nodes), and that is not the last group of the
12736 # Verify that the group is empty.
12737 group_nodes = [node.name
12738 for node in self.cfg.GetAllNodesInfo().values()
12739 if node.group == self.group_uuid]
12742 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12744 (self.op.group_name,
12745 utils.CommaJoin(utils.NiceSort(group_nodes))),
12746 errors.ECODE_STATE)
12748 # Verify the cluster would not be left group-less.
12749 if len(self.cfg.GetNodeGroupList()) == 1:
12750 raise errors.OpPrereqError("Group '%s' is the only group,"
12751 " cannot be removed" %
12752 self.op.group_name,
12753 errors.ECODE_STATE)
12755 def BuildHooksEnv(self):
12756 """Build hooks env.
12760 "GROUP_NAME": self.op.group_name,
12763 def BuildHooksNodes(self):
12764 """Build hooks nodes.
12767 mn = self.cfg.GetMasterNode()
12768 return ([mn], [mn])
12770 def Exec(self, feedback_fn):
12771 """Remove the node group.
12775 self.cfg.RemoveNodeGroup(self.group_uuid)
12776 except errors.ConfigurationError:
12777 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12778 (self.op.group_name, self.group_uuid))
12780 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12783 class LUGroupRename(LogicalUnit):
12784 HPATH = "group-rename"
12785 HTYPE = constants.HTYPE_GROUP
12788 def ExpandNames(self):
12789 # This raises errors.OpPrereqError on its own:
12790 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12792 self.needed_locks = {
12793 locking.LEVEL_NODEGROUP: [self.group_uuid],
12796 def CheckPrereq(self):
12797 """Check prerequisites.
12799 Ensures requested new name is not yet used.
12803 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12804 except errors.OpPrereqError:
12807 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12808 " node group (UUID: %s)" %
12809 (self.op.new_name, new_name_uuid),
12810 errors.ECODE_EXISTS)
12812 def BuildHooksEnv(self):
12813 """Build hooks env.
12817 "OLD_NAME": self.op.group_name,
12818 "NEW_NAME": self.op.new_name,
12821 def BuildHooksNodes(self):
12822 """Build hooks nodes.
12825 mn = self.cfg.GetMasterNode()
12827 all_nodes = self.cfg.GetAllNodesInfo()
12828 all_nodes.pop(mn, None)
12831 run_nodes.extend(node.name for node in all_nodes.values()
12832 if node.group == self.group_uuid)
12834 return (run_nodes, run_nodes)
12836 def Exec(self, feedback_fn):
12837 """Rename the node group.
12840 group = self.cfg.GetNodeGroup(self.group_uuid)
12843 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12844 (self.op.group_name, self.group_uuid))
12846 group.name = self.op.new_name
12847 self.cfg.Update(group, feedback_fn)
12849 return self.op.new_name
12852 class LUGroupEvacuate(LogicalUnit):
12853 HPATH = "group-evacuate"
12854 HTYPE = constants.HTYPE_GROUP
12857 def ExpandNames(self):
12858 # This raises errors.OpPrereqError on its own:
12859 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12861 if self.op.target_groups:
12862 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12863 self.op.target_groups)
12865 self.req_target_uuids = []
12867 if self.group_uuid in self.req_target_uuids:
12868 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12869 " as a target group (targets are %s)" %
12871 utils.CommaJoin(self.req_target_uuids)),
12872 errors.ECODE_INVAL)
12874 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12876 self.share_locks = _ShareAll()
12877 self.needed_locks = {
12878 locking.LEVEL_INSTANCE: [],
12879 locking.LEVEL_NODEGROUP: [],
12880 locking.LEVEL_NODE: [],
12883 def DeclareLocks(self, level):
12884 if level == locking.LEVEL_INSTANCE:
12885 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12887 # Lock instances optimistically, needs verification once node and group
12888 # locks have been acquired
12889 self.needed_locks[locking.LEVEL_INSTANCE] = \
12890 self.cfg.GetNodeGroupInstances(self.group_uuid)
12892 elif level == locking.LEVEL_NODEGROUP:
12893 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12895 if self.req_target_uuids:
12896 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12898 # Lock all groups used by instances optimistically; this requires going
12899 # via the node before it's locked, requiring verification later on
12900 lock_groups.update(group_uuid
12901 for instance_name in
12902 self.owned_locks(locking.LEVEL_INSTANCE)
12904 self.cfg.GetInstanceNodeGroups(instance_name))
12906 # No target groups, need to lock all of them
12907 lock_groups = locking.ALL_SET
12909 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12911 elif level == locking.LEVEL_NODE:
12912 # This will only lock the nodes in the group to be evacuated which
12913 # contain actual instances
12914 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12915 self._LockInstancesNodes()
12917 # Lock all nodes in group to be evacuated and target groups
12918 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12919 assert self.group_uuid in owned_groups
12920 member_nodes = [node_name
12921 for group in owned_groups
12922 for node_name in self.cfg.GetNodeGroup(group).members]
12923 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12925 def CheckPrereq(self):
12926 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12927 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12928 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12930 assert owned_groups.issuperset(self.req_target_uuids)
12931 assert self.group_uuid in owned_groups
12933 # Check if locked instances are still correct
12934 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12936 # Get instance information
12937 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12939 # Check if node groups for locked instances are still correct
12940 for instance_name in owned_instances:
12941 inst = self.instances[instance_name]
12942 assert owned_nodes.issuperset(inst.all_nodes), \
12943 "Instance %s's nodes changed while we kept the lock" % instance_name
12945 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12948 assert self.group_uuid in inst_groups, \
12949 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12951 if self.req_target_uuids:
12952 # User requested specific target groups
12953 self.target_uuids = self.req_target_uuids
12955 # All groups except the one to be evacuated are potential targets
12956 self.target_uuids = [group_uuid for group_uuid in owned_groups
12957 if group_uuid != self.group_uuid]
12959 if not self.target_uuids:
12960 raise errors.OpPrereqError("There are no possible target groups",
12961 errors.ECODE_INVAL)
12963 def BuildHooksEnv(self):
12964 """Build hooks env.
12968 "GROUP_NAME": self.op.group_name,
12969 "TARGET_GROUPS": " ".join(self.target_uuids),
12972 def BuildHooksNodes(self):
12973 """Build hooks nodes.
12976 mn = self.cfg.GetMasterNode()
12978 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12980 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12982 return (run_nodes, run_nodes)
12984 def Exec(self, feedback_fn):
12985 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12987 assert self.group_uuid not in self.target_uuids
12989 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12990 instances=instances, target_groups=self.target_uuids)
12992 ial.Run(self.op.iallocator)
12994 if not ial.success:
12995 raise errors.OpPrereqError("Can't compute group evacuation using"
12996 " iallocator '%s': %s" %
12997 (self.op.iallocator, ial.info),
12998 errors.ECODE_NORES)
13000 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13002 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13003 len(jobs), self.op.group_name)
13005 return ResultWithJobs(jobs)
13008 class TagsLU(NoHooksLU): # pylint: disable=W0223
13009 """Generic tags LU.
13011 This is an abstract class which is the parent of all the other tags LUs.
13014 def ExpandNames(self):
13015 self.group_uuid = None
13016 self.needed_locks = {}
13017 if self.op.kind == constants.TAG_NODE:
13018 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13019 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13020 elif self.op.kind == constants.TAG_INSTANCE:
13021 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13022 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13023 elif self.op.kind == constants.TAG_NODEGROUP:
13024 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13026 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13027 # not possible to acquire the BGL based on opcode parameters)
13029 def CheckPrereq(self):
13030 """Check prerequisites.
13033 if self.op.kind == constants.TAG_CLUSTER:
13034 self.target = self.cfg.GetClusterInfo()
13035 elif self.op.kind == constants.TAG_NODE:
13036 self.target = self.cfg.GetNodeInfo(self.op.name)
13037 elif self.op.kind == constants.TAG_INSTANCE:
13038 self.target = self.cfg.GetInstanceInfo(self.op.name)
13039 elif self.op.kind == constants.TAG_NODEGROUP:
13040 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13042 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13043 str(self.op.kind), errors.ECODE_INVAL)
13046 class LUTagsGet(TagsLU):
13047 """Returns the tags of a given object.
13052 def ExpandNames(self):
13053 TagsLU.ExpandNames(self)
13055 # Share locks as this is only a read operation
13056 self.share_locks = _ShareAll()
13058 def Exec(self, feedback_fn):
13059 """Returns the tag list.
13062 return list(self.target.GetTags())
13065 class LUTagsSearch(NoHooksLU):
13066 """Searches the tags for a given pattern.
13071 def ExpandNames(self):
13072 self.needed_locks = {}
13074 def CheckPrereq(self):
13075 """Check prerequisites.
13077 This checks the pattern passed for validity by compiling it.
13081 self.re = re.compile(self.op.pattern)
13082 except re.error, err:
13083 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13084 (self.op.pattern, err), errors.ECODE_INVAL)
13086 def Exec(self, feedback_fn):
13087 """Returns the tag list.
13091 tgts = [("/cluster", cfg.GetClusterInfo())]
13092 ilist = cfg.GetAllInstancesInfo().values()
13093 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13094 nlist = cfg.GetAllNodesInfo().values()
13095 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13096 tgts.extend(("/nodegroup/%s" % n.name, n)
13097 for n in cfg.GetAllNodeGroupsInfo().values())
13099 for path, target in tgts:
13100 for tag in target.GetTags():
13101 if self.re.search(tag):
13102 results.append((path, tag))
13106 class LUTagsSet(TagsLU):
13107 """Sets a tag on a given object.
13112 def CheckPrereq(self):
13113 """Check prerequisites.
13115 This checks the type and length of the tag name and value.
13118 TagsLU.CheckPrereq(self)
13119 for tag in self.op.tags:
13120 objects.TaggableObject.ValidateTag(tag)
13122 def Exec(self, feedback_fn):
13127 for tag in self.op.tags:
13128 self.target.AddTag(tag)
13129 except errors.TagError, err:
13130 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13131 self.cfg.Update(self.target, feedback_fn)
13134 class LUTagsDel(TagsLU):
13135 """Delete a list of tags from a given object.
13140 def CheckPrereq(self):
13141 """Check prerequisites.
13143 This checks that we have the given tag.
13146 TagsLU.CheckPrereq(self)
13147 for tag in self.op.tags:
13148 objects.TaggableObject.ValidateTag(tag)
13149 del_tags = frozenset(self.op.tags)
13150 cur_tags = self.target.GetTags()
13152 diff_tags = del_tags - cur_tags
13154 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13155 raise errors.OpPrereqError("Tag(s) %s not found" %
13156 (utils.CommaJoin(diff_names), ),
13157 errors.ECODE_NOENT)
13159 def Exec(self, feedback_fn):
13160 """Remove the tag from the object.
13163 for tag in self.op.tags:
13164 self.target.RemoveTag(tag)
13165 self.cfg.Update(self.target, feedback_fn)
13168 class LUTestDelay(NoHooksLU):
13169 """Sleep for a specified amount of time.
13171 This LU sleeps on the master and/or nodes for a specified amount of
13177 def ExpandNames(self):
13178 """Expand names and set required locks.
13180 This expands the node list, if any.
13183 self.needed_locks = {}
13184 if self.op.on_nodes:
13185 # _GetWantedNodes can be used here, but is not always appropriate to use
13186 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13187 # more information.
13188 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13189 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13191 def _TestDelay(self):
13192 """Do the actual sleep.
13195 if self.op.on_master:
13196 if not utils.TestDelay(self.op.duration):
13197 raise errors.OpExecError("Error during master delay test")
13198 if self.op.on_nodes:
13199 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13200 for node, node_result in result.items():
13201 node_result.Raise("Failure during rpc call to node %s" % node)
13203 def Exec(self, feedback_fn):
13204 """Execute the test delay opcode, with the wanted repetitions.
13207 if self.op.repeat == 0:
13210 top_value = self.op.repeat - 1
13211 for i in range(self.op.repeat):
13212 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13216 class LUTestJqueue(NoHooksLU):
13217 """Utility LU to test some aspects of the job queue.
13222 # Must be lower than default timeout for WaitForJobChange to see whether it
13223 # notices changed jobs
13224 _CLIENT_CONNECT_TIMEOUT = 20.0
13225 _CLIENT_CONFIRM_TIMEOUT = 60.0
13228 def _NotifyUsingSocket(cls, cb, errcls):
13229 """Opens a Unix socket and waits for another program to connect.
13232 @param cb: Callback to send socket name to client
13233 @type errcls: class
13234 @param errcls: Exception class to use for errors
13237 # Using a temporary directory as there's no easy way to create temporary
13238 # sockets without writing a custom loop around tempfile.mktemp and
13240 tmpdir = tempfile.mkdtemp()
13242 tmpsock = utils.PathJoin(tmpdir, "sock")
13244 logging.debug("Creating temporary socket at %s", tmpsock)
13245 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13250 # Send details to client
13253 # Wait for client to connect before continuing
13254 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13256 (conn, _) = sock.accept()
13257 except socket.error, err:
13258 raise errcls("Client didn't connect in time (%s)" % err)
13262 # Remove as soon as client is connected
13263 shutil.rmtree(tmpdir)
13265 # Wait for client to close
13268 # pylint: disable=E1101
13269 # Instance of '_socketobject' has no ... member
13270 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13272 except socket.error, err:
13273 raise errcls("Client failed to confirm notification (%s)" % err)
13277 def _SendNotification(self, test, arg, sockname):
13278 """Sends a notification to the client.
13281 @param test: Test name
13282 @param arg: Test argument (depends on test)
13283 @type sockname: string
13284 @param sockname: Socket path
13287 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13289 def _Notify(self, prereq, test, arg):
13290 """Notifies the client of a test.
13293 @param prereq: Whether this is a prereq-phase test
13295 @param test: Test name
13296 @param arg: Test argument (depends on test)
13300 errcls = errors.OpPrereqError
13302 errcls = errors.OpExecError
13304 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13308 def CheckArguments(self):
13309 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13310 self.expandnames_calls = 0
13312 def ExpandNames(self):
13313 checkargs_calls = getattr(self, "checkargs_calls", 0)
13314 if checkargs_calls < 1:
13315 raise errors.ProgrammerError("CheckArguments was not called")
13317 self.expandnames_calls += 1
13319 if self.op.notify_waitlock:
13320 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13322 self.LogInfo("Expanding names")
13324 # Get lock on master node (just to get a lock, not for a particular reason)
13325 self.needed_locks = {
13326 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13329 def Exec(self, feedback_fn):
13330 if self.expandnames_calls < 1:
13331 raise errors.ProgrammerError("ExpandNames was not called")
13333 if self.op.notify_exec:
13334 self._Notify(False, constants.JQT_EXEC, None)
13336 self.LogInfo("Executing")
13338 if self.op.log_messages:
13339 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13340 for idx, msg in enumerate(self.op.log_messages):
13341 self.LogInfo("Sending log message %s", idx + 1)
13342 feedback_fn(constants.JQT_MSGPREFIX + msg)
13343 # Report how many test messages have been sent
13344 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13347 raise errors.OpExecError("Opcode failure was requested")
13352 class IAllocator(object):
13353 """IAllocator framework.
13355 An IAllocator instance has three sets of attributes:
13356 - cfg that is needed to query the cluster
13357 - input data (all members of the _KEYS class attribute are required)
13358 - four buffer attributes (in|out_data|text), that represent the
13359 input (to the external script) in text and data structure format,
13360 and the output from it, again in two formats
13361 - the result variables from the script (success, info, nodes) for
13365 # pylint: disable=R0902
13366 # lots of instance attributes
13368 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13370 self.rpc = rpc_runner
13371 # init buffer variables
13372 self.in_text = self.out_text = self.in_data = self.out_data = None
13373 # init all input fields so that pylint is happy
13375 self.memory = self.disks = self.disk_template = None
13376 self.os = self.tags = self.nics = self.vcpus = None
13377 self.hypervisor = None
13378 self.relocate_from = None
13380 self.instances = None
13381 self.evac_mode = None
13382 self.target_groups = []
13384 self.required_nodes = None
13385 # init result fields
13386 self.success = self.info = self.result = None
13389 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13391 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13392 " IAllocator" % self.mode)
13394 keyset = [n for (n, _) in keydata]
13397 if key not in keyset:
13398 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13399 " IAllocator" % key)
13400 setattr(self, key, kwargs[key])
13403 if key not in kwargs:
13404 raise errors.ProgrammerError("Missing input parameter '%s' to"
13405 " IAllocator" % key)
13406 self._BuildInputData(compat.partial(fn, self), keydata)
13408 def _ComputeClusterData(self):
13409 """Compute the generic allocator input data.
13411 This is the data that is independent of the actual operation.
13415 cluster_info = cfg.GetClusterInfo()
13418 "version": constants.IALLOCATOR_VERSION,
13419 "cluster_name": cfg.GetClusterName(),
13420 "cluster_tags": list(cluster_info.GetTags()),
13421 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13422 # we don't have job IDs
13424 ninfo = cfg.GetAllNodesInfo()
13425 iinfo = cfg.GetAllInstancesInfo().values()
13426 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13429 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13431 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13432 hypervisor_name = self.hypervisor
13433 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13434 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13436 hypervisor_name = cluster_info.enabled_hypervisors[0]
13438 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13441 self.rpc.call_all_instances_info(node_list,
13442 cluster_info.enabled_hypervisors)
13444 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13446 config_ndata = self._ComputeBasicNodeData(ninfo)
13447 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13448 i_list, config_ndata)
13449 assert len(data["nodes"]) == len(ninfo), \
13450 "Incomplete node data computed"
13452 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13454 self.in_data = data
13457 def _ComputeNodeGroupData(cfg):
13458 """Compute node groups data.
13461 ng = dict((guuid, {
13462 "name": gdata.name,
13463 "alloc_policy": gdata.alloc_policy,
13465 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13470 def _ComputeBasicNodeData(node_cfg):
13471 """Compute global node data.
13474 @returns: a dict of name: (node dict, node config)
13477 # fill in static (config-based) values
13478 node_results = dict((ninfo.name, {
13479 "tags": list(ninfo.GetTags()),
13480 "primary_ip": ninfo.primary_ip,
13481 "secondary_ip": ninfo.secondary_ip,
13482 "offline": ninfo.offline,
13483 "drained": ninfo.drained,
13484 "master_candidate": ninfo.master_candidate,
13485 "group": ninfo.group,
13486 "master_capable": ninfo.master_capable,
13487 "vm_capable": ninfo.vm_capable,
13489 for ninfo in node_cfg.values())
13491 return node_results
13494 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13496 """Compute global node data.
13498 @param node_results: the basic node structures as filled from the config
13501 # make a copy of the current dict
13502 node_results = dict(node_results)
13503 for nname, nresult in node_data.items():
13504 assert nname in node_results, "Missing basic data for node %s" % nname
13505 ninfo = node_cfg[nname]
13507 if not (ninfo.offline or ninfo.drained):
13508 nresult.Raise("Can't get data for node %s" % nname)
13509 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13511 remote_info = nresult.payload
13513 for attr in ["memory_total", "memory_free", "memory_dom0",
13514 "vg_size", "vg_free", "cpu_total"]:
13515 if attr not in remote_info:
13516 raise errors.OpExecError("Node '%s' didn't return attribute"
13517 " '%s'" % (nname, attr))
13518 if not isinstance(remote_info[attr], int):
13519 raise errors.OpExecError("Node '%s' returned invalid value"
13521 (nname, attr, remote_info[attr]))
13522 # compute memory used by primary instances
13523 i_p_mem = i_p_up_mem = 0
13524 for iinfo, beinfo in i_list:
13525 if iinfo.primary_node == nname:
13526 i_p_mem += beinfo[constants.BE_MEMORY]
13527 if iinfo.name not in node_iinfo[nname].payload:
13530 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13531 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13532 remote_info["memory_free"] -= max(0, i_mem_diff)
13534 if iinfo.admin_state == constants.ADMINST_UP:
13535 i_p_up_mem += beinfo[constants.BE_MEMORY]
13537 # compute memory used by instances
13539 "total_memory": remote_info["memory_total"],
13540 "reserved_memory": remote_info["memory_dom0"],
13541 "free_memory": remote_info["memory_free"],
13542 "total_disk": remote_info["vg_size"],
13543 "free_disk": remote_info["vg_free"],
13544 "total_cpus": remote_info["cpu_total"],
13545 "i_pri_memory": i_p_mem,
13546 "i_pri_up_memory": i_p_up_mem,
13548 pnr_dyn.update(node_results[nname])
13549 node_results[nname] = pnr_dyn
13551 return node_results
13554 def _ComputeInstanceData(cluster_info, i_list):
13555 """Compute global instance data.
13559 for iinfo, beinfo in i_list:
13561 for nic in iinfo.nics:
13562 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13566 "mode": filled_params[constants.NIC_MODE],
13567 "link": filled_params[constants.NIC_LINK],
13569 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13570 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13571 nic_data.append(nic_dict)
13573 "tags": list(iinfo.GetTags()),
13574 "admin_state": iinfo.admin_state,
13575 "vcpus": beinfo[constants.BE_VCPUS],
13576 "memory": beinfo[constants.BE_MEMORY],
13578 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13580 "disks": [{constants.IDISK_SIZE: dsk.size,
13581 constants.IDISK_MODE: dsk.mode}
13582 for dsk in iinfo.disks],
13583 "disk_template": iinfo.disk_template,
13584 "hypervisor": iinfo.hypervisor,
13586 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13588 instance_data[iinfo.name] = pir
13590 return instance_data
13592 def _AddNewInstance(self):
13593 """Add new instance data to allocator structure.
13595 This in combination with _AllocatorGetClusterData will create the
13596 correct structure needed as input for the allocator.
13598 The checks for the completeness of the opcode must have already been
13602 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13604 if self.disk_template in constants.DTS_INT_MIRROR:
13605 self.required_nodes = 2
13607 self.required_nodes = 1
13611 "disk_template": self.disk_template,
13614 "vcpus": self.vcpus,
13615 "memory": self.memory,
13616 "disks": self.disks,
13617 "disk_space_total": disk_space,
13619 "required_nodes": self.required_nodes,
13620 "hypervisor": self.hypervisor,
13625 def _AddRelocateInstance(self):
13626 """Add relocate instance data to allocator structure.
13628 This in combination with _IAllocatorGetClusterData will create the
13629 correct structure needed as input for the allocator.
13631 The checks for the completeness of the opcode must have already been
13635 instance = self.cfg.GetInstanceInfo(self.name)
13636 if instance is None:
13637 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13638 " IAllocator" % self.name)
13640 if instance.disk_template not in constants.DTS_MIRRORED:
13641 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13642 errors.ECODE_INVAL)
13644 if instance.disk_template in constants.DTS_INT_MIRROR and \
13645 len(instance.secondary_nodes) != 1:
13646 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13647 errors.ECODE_STATE)
13649 self.required_nodes = 1
13650 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13651 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13655 "disk_space_total": disk_space,
13656 "required_nodes": self.required_nodes,
13657 "relocate_from": self.relocate_from,
13661 def _AddNodeEvacuate(self):
13662 """Get data for node-evacuate requests.
13666 "instances": self.instances,
13667 "evac_mode": self.evac_mode,
13670 def _AddChangeGroup(self):
13671 """Get data for node-evacuate requests.
13675 "instances": self.instances,
13676 "target_groups": self.target_groups,
13679 def _BuildInputData(self, fn, keydata):
13680 """Build input data structures.
13683 self._ComputeClusterData()
13686 request["type"] = self.mode
13687 for keyname, keytype in keydata:
13688 if keyname not in request:
13689 raise errors.ProgrammerError("Request parameter %s is missing" %
13691 val = request[keyname]
13692 if not keytype(val):
13693 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13694 " validation, value %s, expected"
13695 " type %s" % (keyname, val, keytype))
13696 self.in_data["request"] = request
13698 self.in_text = serializer.Dump(self.in_data)
13700 _STRING_LIST = ht.TListOf(ht.TString)
13701 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13702 # pylint: disable=E1101
13703 # Class '...' has no 'OP_ID' member
13704 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13705 opcodes.OpInstanceMigrate.OP_ID,
13706 opcodes.OpInstanceReplaceDisks.OP_ID])
13710 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13711 ht.TItems([ht.TNonEmptyString,
13712 ht.TNonEmptyString,
13713 ht.TListOf(ht.TNonEmptyString),
13716 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13717 ht.TItems([ht.TNonEmptyString,
13720 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13721 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13724 constants.IALLOCATOR_MODE_ALLOC:
13727 ("name", ht.TString),
13728 ("memory", ht.TInt),
13729 ("disks", ht.TListOf(ht.TDict)),
13730 ("disk_template", ht.TString),
13731 ("os", ht.TString),
13732 ("tags", _STRING_LIST),
13733 ("nics", ht.TListOf(ht.TDict)),
13734 ("vcpus", ht.TInt),
13735 ("hypervisor", ht.TString),
13737 constants.IALLOCATOR_MODE_RELOC:
13738 (_AddRelocateInstance,
13739 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13741 constants.IALLOCATOR_MODE_NODE_EVAC:
13742 (_AddNodeEvacuate, [
13743 ("instances", _STRING_LIST),
13744 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13746 constants.IALLOCATOR_MODE_CHG_GROUP:
13747 (_AddChangeGroup, [
13748 ("instances", _STRING_LIST),
13749 ("target_groups", _STRING_LIST),
13753 def Run(self, name, validate=True, call_fn=None):
13754 """Run an instance allocator and return the results.
13757 if call_fn is None:
13758 call_fn = self.rpc.call_iallocator_runner
13760 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13761 result.Raise("Failure while running the iallocator script")
13763 self.out_text = result.payload
13765 self._ValidateResult()
13767 def _ValidateResult(self):
13768 """Process the allocator results.
13770 This will process and if successful save the result in
13771 self.out_data and the other parameters.
13775 rdict = serializer.Load(self.out_text)
13776 except Exception, err:
13777 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13779 if not isinstance(rdict, dict):
13780 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13782 # TODO: remove backwards compatiblity in later versions
13783 if "nodes" in rdict and "result" not in rdict:
13784 rdict["result"] = rdict["nodes"]
13787 for key in "success", "info", "result":
13788 if key not in rdict:
13789 raise errors.OpExecError("Can't parse iallocator results:"
13790 " missing key '%s'" % key)
13791 setattr(self, key, rdict[key])
13793 if not self._result_check(self.result):
13794 raise errors.OpExecError("Iallocator returned invalid result,"
13795 " expected %s, got %s" %
13796 (self._result_check, self.result),
13797 errors.ECODE_INVAL)
13799 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13800 assert self.relocate_from is not None
13801 assert self.required_nodes == 1
13803 node2group = dict((name, ndata["group"])
13804 for (name, ndata) in self.in_data["nodes"].items())
13806 fn = compat.partial(self._NodesToGroups, node2group,
13807 self.in_data["nodegroups"])
13809 instance = self.cfg.GetInstanceInfo(self.name)
13810 request_groups = fn(self.relocate_from + [instance.primary_node])
13811 result_groups = fn(rdict["result"] + [instance.primary_node])
13813 if self.success and not set(result_groups).issubset(request_groups):
13814 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13815 " differ from original groups (%s)" %
13816 (utils.CommaJoin(result_groups),
13817 utils.CommaJoin(request_groups)))
13819 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13820 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13822 self.out_data = rdict
13825 def _NodesToGroups(node2group, groups, nodes):
13826 """Returns a list of unique group names for a list of nodes.
13828 @type node2group: dict
13829 @param node2group: Map from node name to group UUID
13831 @param groups: Group information
13833 @param nodes: Node names
13840 group_uuid = node2group[node]
13842 # Ignore unknown node
13846 group = groups[group_uuid]
13848 # Can't find group, let's use UUID
13849 group_name = group_uuid
13851 group_name = group["name"]
13853 result.add(group_name)
13855 return sorted(result)
13858 class LUTestAllocator(NoHooksLU):
13859 """Run allocator tests.
13861 This LU runs the allocator tests
13864 def CheckPrereq(self):
13865 """Check prerequisites.
13867 This checks the opcode parameters depending on the director and mode test.
13870 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13871 for attr in ["memory", "disks", "disk_template",
13872 "os", "tags", "nics", "vcpus"]:
13873 if not hasattr(self.op, attr):
13874 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13875 attr, errors.ECODE_INVAL)
13876 iname = self.cfg.ExpandInstanceName(self.op.name)
13877 if iname is not None:
13878 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13879 iname, errors.ECODE_EXISTS)
13880 if not isinstance(self.op.nics, list):
13881 raise errors.OpPrereqError("Invalid parameter 'nics'",
13882 errors.ECODE_INVAL)
13883 if not isinstance(self.op.disks, list):
13884 raise errors.OpPrereqError("Invalid parameter 'disks'",
13885 errors.ECODE_INVAL)
13886 for row in self.op.disks:
13887 if (not isinstance(row, dict) or
13888 constants.IDISK_SIZE not in row or
13889 not isinstance(row[constants.IDISK_SIZE], int) or
13890 constants.IDISK_MODE not in row or
13891 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13892 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13893 " parameter", errors.ECODE_INVAL)
13894 if self.op.hypervisor is None:
13895 self.op.hypervisor = self.cfg.GetHypervisorType()
13896 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13897 fname = _ExpandInstanceName(self.cfg, self.op.name)
13898 self.op.name = fname
13899 self.relocate_from = \
13900 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13901 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13902 constants.IALLOCATOR_MODE_NODE_EVAC):
13903 if not self.op.instances:
13904 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13905 self.op.instances = _GetWantedInstances(self, self.op.instances)
13907 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13908 self.op.mode, errors.ECODE_INVAL)
13910 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13911 if self.op.allocator is None:
13912 raise errors.OpPrereqError("Missing allocator name",
13913 errors.ECODE_INVAL)
13914 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13915 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13916 self.op.direction, errors.ECODE_INVAL)
13918 def Exec(self, feedback_fn):
13919 """Run the allocator test.
13922 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13923 ial = IAllocator(self.cfg, self.rpc,
13926 memory=self.op.memory,
13927 disks=self.op.disks,
13928 disk_template=self.op.disk_template,
13932 vcpus=self.op.vcpus,
13933 hypervisor=self.op.hypervisor,
13935 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13936 ial = IAllocator(self.cfg, self.rpc,
13939 relocate_from=list(self.relocate_from),
13941 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13942 ial = IAllocator(self.cfg, self.rpc,
13944 instances=self.op.instances,
13945 target_groups=self.op.target_groups)
13946 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13947 ial = IAllocator(self.cfg, self.rpc,
13949 instances=self.op.instances,
13950 evac_mode=self.op.evac_mode)
13952 raise errors.ProgrammerError("Uncatched mode %s in"
13953 " LUTestAllocator.Exec", self.op.mode)
13955 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13956 result = ial.in_text
13958 ial.Run(self.op.allocator, validate=False)
13959 result = ial.out_text
13963 #: Query type implementations
13965 constants.QR_INSTANCE: _InstanceQuery,
13966 constants.QR_NODE: _NodeQuery,
13967 constants.QR_GROUP: _GroupQuery,
13968 constants.QR_OS: _OsQuery,
13971 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13974 def _GetQueryImplementation(name):
13975 """Returns the implemtnation for a query type.
13977 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13981 return _QUERY_IMPL[name]
13983 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13984 errors.ECODE_INVAL)