4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577 """Checks if the owned node groups are still correct for an instance.
579 @type cfg: L{config.ConfigWriter}
580 @param cfg: The cluster configuration
581 @type instance_name: string
582 @param instance_name: Instance name
583 @type owned_groups: set or frozenset
584 @param owned_groups: List of currently owned node groups
587 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
589 if not owned_groups.issuperset(inst_groups):
590 raise errors.OpPrereqError("Instance %s's node groups changed since"
591 " locks were acquired, current groups are"
592 " are '%s', owning groups '%s'; retry the"
595 utils.CommaJoin(inst_groups),
596 utils.CommaJoin(owned_groups)),
602 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603 """Checks if the instances in a node group are still correct.
605 @type cfg: L{config.ConfigWriter}
606 @param cfg: The cluster configuration
607 @type group_uuid: string
608 @param group_uuid: Node group UUID
609 @type owned_instances: set or frozenset
610 @param owned_instances: List of currently owned instances
613 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614 if owned_instances != wanted_instances:
615 raise errors.OpPrereqError("Instances in node group '%s' changed since"
616 " locks were acquired, wanted '%s', have '%s';"
617 " retry the operation" %
619 utils.CommaJoin(wanted_instances),
620 utils.CommaJoin(owned_instances)),
623 return wanted_instances
626 def _SupportsOob(cfg, node):
627 """Tells if node supports OOB.
629 @type cfg: L{config.ConfigWriter}
630 @param cfg: The cluster configuration
631 @type node: L{objects.Node}
632 @param node: The node
633 @return: The OOB script if supported or an empty string otherwise
636 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
639 def _GetWantedNodes(lu, nodes):
640 """Returns list of checked and expanded node names.
642 @type lu: L{LogicalUnit}
643 @param lu: the logical unit on whose behalf we execute
645 @param nodes: list of node names or None for all nodes
647 @return: the list of nodes, sorted
648 @raise errors.ProgrammerError: if the nodes parameter is wrong type
652 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
654 return utils.NiceSort(lu.cfg.GetNodeList())
657 def _GetWantedInstances(lu, instances):
658 """Returns list of checked and expanded instance names.
660 @type lu: L{LogicalUnit}
661 @param lu: the logical unit on whose behalf we execute
662 @type instances: list
663 @param instances: list of instance names or None for all instances
665 @return: the list of instances, sorted
666 @raise errors.OpPrereqError: if the instances parameter is wrong type
667 @raise errors.OpPrereqError: if any of the passed instances is not found
671 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
673 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
677 def _GetUpdatedParams(old_params, update_dict,
678 use_default=True, use_none=False):
679 """Return the new version of a parameter dictionary.
681 @type old_params: dict
682 @param old_params: old parameters
683 @type update_dict: dict
684 @param update_dict: dict containing new parameter values, or
685 constants.VALUE_DEFAULT to reset the parameter to its default
687 @param use_default: boolean
688 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689 values as 'to be deleted' values
690 @param use_none: boolean
691 @type use_none: whether to recognise C{None} values as 'to be
694 @return: the new parameter dictionary
697 params_copy = copy.deepcopy(old_params)
698 for key, val in update_dict.iteritems():
699 if ((use_default and val == constants.VALUE_DEFAULT) or
700 (use_none and val is None)):
706 params_copy[key] = val
710 def _ReleaseLocks(lu, level, names=None, keep=None):
711 """Releases locks owned by an LU.
713 @type lu: L{LogicalUnit}
714 @param level: Lock level
715 @type names: list or None
716 @param names: Names of locks to release
717 @type keep: list or None
718 @param keep: Names of locks to retain
721 assert not (keep is not None and names is not None), \
722 "Only one of the 'names' and the 'keep' parameters can be given"
724 if names is not None:
725 should_release = names.__contains__
727 should_release = lambda name: name not in keep
729 should_release = None
731 owned = lu.owned_locks(level)
733 # Not owning any lock at this level, do nothing
740 # Determine which locks to release
742 if should_release(name):
747 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
749 # Release just some locks
750 lu.glm.release(level, names=release)
752 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
755 lu.glm.release(level)
757 assert not lu.glm.is_owned(level), "No locks should be owned"
760 def _MapInstanceDisksToNodes(instances):
761 """Creates a map from (node, volume) to instance name.
763 @type instances: list of L{objects.Instance}
764 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
767 return dict(((node, vol), inst.name)
768 for inst in instances
769 for (node, vols) in inst.MapLVsByNode().items()
773 def _RunPostHook(lu, node_name):
774 """Runs the post-hook for an opcode on a single node.
777 hm = lu.proc.BuildHooksManager(lu)
779 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
781 # pylint: disable=W0702
782 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
785 def _CheckOutputFields(static, dynamic, selected):
786 """Checks whether all selected fields are valid.
788 @type static: L{utils.FieldSet}
789 @param static: static fields set
790 @type dynamic: L{utils.FieldSet}
791 @param dynamic: dynamic fields set
798 delta = f.NonMatching(selected)
800 raise errors.OpPrereqError("Unknown output fields selected: %s"
801 % ",".join(delta), errors.ECODE_INVAL)
804 def _CheckGlobalHvParams(params):
805 """Validates that given hypervisor params are not global ones.
807 This will ensure that instances don't get customised versions of
811 used_globals = constants.HVC_GLOBALS.intersection(params)
813 msg = ("The following hypervisor parameters are global and cannot"
814 " be customized at instance level, please modify them at"
815 " cluster level: %s" % utils.CommaJoin(used_globals))
816 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
819 def _CheckNodeOnline(lu, node, msg=None):
820 """Ensure that a given node is online.
822 @param lu: the LU on behalf of which we make the check
823 @param node: the node to check
824 @param msg: if passed, should be a message to replace the default one
825 @raise errors.OpPrereqError: if the node is offline
829 msg = "Can't use offline node"
830 if lu.cfg.GetNodeInfo(node).offline:
831 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
834 def _CheckNodeNotDrained(lu, node):
835 """Ensure that a given node is not drained.
837 @param lu: the LU on behalf of which we make the check
838 @param node: the node to check
839 @raise errors.OpPrereqError: if the node is drained
842 if lu.cfg.GetNodeInfo(node).drained:
843 raise errors.OpPrereqError("Can't use drained node %s" % node,
847 def _CheckNodeVmCapable(lu, node):
848 """Ensure that a given node is vm capable.
850 @param lu: the LU on behalf of which we make the check
851 @param node: the node to check
852 @raise errors.OpPrereqError: if the node is not vm capable
855 if not lu.cfg.GetNodeInfo(node).vm_capable:
856 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
860 def _CheckNodeHasOS(lu, node, os_name, force_variant):
861 """Ensure that a node supports a given OS.
863 @param lu: the LU on behalf of which we make the check
864 @param node: the node to check
865 @param os_name: the OS to query about
866 @param force_variant: whether to ignore variant errors
867 @raise errors.OpPrereqError: if the node is not supporting the OS
870 result = lu.rpc.call_os_get(node, os_name)
871 result.Raise("OS '%s' not in supported OS list for node %s" %
873 prereq=True, ecode=errors.ECODE_INVAL)
874 if not force_variant:
875 _CheckOSVariant(result.payload, os_name)
878 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879 """Ensure that a node has the given secondary ip.
881 @type lu: L{LogicalUnit}
882 @param lu: the LU on behalf of which we make the check
884 @param node: the node to check
885 @type secondary_ip: string
886 @param secondary_ip: the ip to check
887 @type prereq: boolean
888 @param prereq: whether to throw a prerequisite or an execute error
889 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
893 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894 result.Raise("Failure checking secondary ip on node %s" % node,
895 prereq=prereq, ecode=errors.ECODE_ENVIRON)
896 if not result.payload:
897 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898 " please fix and re-run this command" % secondary_ip)
900 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
902 raise errors.OpExecError(msg)
905 def _GetClusterDomainSecret():
906 """Reads the cluster domain secret.
909 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
913 def _CheckInstanceState(lu, instance, req_states, msg=None):
914 """Ensure that an instance is in one of the required states.
916 @param lu: the LU on behalf of which we make the check
917 @param instance: the instance to check
918 @param msg: if passed, should be a message to replace the default one
919 @raise errors.OpPrereqError: if the instance is not in the required state
923 msg = "can't use instance from outside %s states" % ", ".join(req_states)
924 if instance.admin_state not in req_states:
925 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926 (instance, instance.admin_state, msg),
929 if constants.ADMINST_UP not in req_states:
930 pnode = instance.primary_node
931 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932 ins_l.Raise("Can't contact node %s for instance information" % pnode,
933 prereq=True, ecode=errors.ECODE_ENVIRON)
935 if instance.name in ins_l.payload:
936 raise errors.OpPrereqError("Instance %s is running, %s" %
937 (instance.name, msg), errors.ECODE_STATE)
940 def _ExpandItemName(fn, name, kind):
941 """Expand an item name.
943 @param fn: the function to use for expansion
944 @param name: requested item name
945 @param kind: text description ('Node' or 'Instance')
946 @return: the resolved (full) name
947 @raise errors.OpPrereqError: if the item is not found
951 if full_name is None:
952 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
957 def _ExpandNodeName(cfg, name):
958 """Wrapper over L{_ExpandItemName} for nodes."""
959 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
962 def _ExpandInstanceName(cfg, name):
963 """Wrapper over L{_ExpandItemName} for instance."""
964 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
967 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968 minmem, maxmem, vcpus, nics, disk_template, disks,
969 bep, hvp, hypervisor_name, tags):
970 """Builds instance related env variables for hooks
972 This builds the hook environment from individual variables.
975 @param name: the name of the instance
976 @type primary_node: string
977 @param primary_node: the name of the instance's primary node
978 @type secondary_nodes: list
979 @param secondary_nodes: list of secondary nodes as strings
980 @type os_type: string
981 @param os_type: the name of the instance's OS
983 @param status: the desired status of the instance
985 @param minmem: the minimum memory size of the instance
987 @param maxmem: the maximum memory size of the instance
989 @param vcpus: the count of VCPUs the instance has
991 @param nics: list of tuples (ip, mac, mode, link) representing
992 the NICs the instance has
993 @type disk_template: string
994 @param disk_template: the disk template of the instance
996 @param disks: the list of (size, mode) pairs
998 @param bep: the backend parameters for the instance
1000 @param hvp: the hypervisor parameters for the instance
1001 @type hypervisor_name: string
1002 @param hypervisor_name: the hypervisor for the instance
1004 @param tags: list of instance tags as strings
1006 @return: the hook environment for this instance
1011 "INSTANCE_NAME": name,
1012 "INSTANCE_PRIMARY": primary_node,
1013 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1014 "INSTANCE_OS_TYPE": os_type,
1015 "INSTANCE_STATUS": status,
1016 "INSTANCE_MINMEM": minmem,
1017 "INSTANCE_MAXMEM": maxmem,
1018 # TODO(2.7) remove deprecated "memory" value
1019 "INSTANCE_MEMORY": maxmem,
1020 "INSTANCE_VCPUS": vcpus,
1021 "INSTANCE_DISK_TEMPLATE": disk_template,
1022 "INSTANCE_HYPERVISOR": hypervisor_name,
1025 nic_count = len(nics)
1026 for idx, (ip, mac, mode, link) in enumerate(nics):
1029 env["INSTANCE_NIC%d_IP" % idx] = ip
1030 env["INSTANCE_NIC%d_MAC" % idx] = mac
1031 env["INSTANCE_NIC%d_MODE" % idx] = mode
1032 env["INSTANCE_NIC%d_LINK" % idx] = link
1033 if mode == constants.NIC_MODE_BRIDGED:
1034 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1038 env["INSTANCE_NIC_COUNT"] = nic_count
1041 disk_count = len(disks)
1042 for idx, (size, mode) in enumerate(disks):
1043 env["INSTANCE_DISK%d_SIZE" % idx] = size
1044 env["INSTANCE_DISK%d_MODE" % idx] = mode
1048 env["INSTANCE_DISK_COUNT"] = disk_count
1053 env["INSTANCE_TAGS"] = " ".join(tags)
1055 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1056 for key, value in source.items():
1057 env["INSTANCE_%s_%s" % (kind, key)] = value
1062 def _NICListToTuple(lu, nics):
1063 """Build a list of nic information tuples.
1065 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1066 value in LUInstanceQueryData.
1068 @type lu: L{LogicalUnit}
1069 @param lu: the logical unit on whose behalf we execute
1070 @type nics: list of L{objects.NIC}
1071 @param nics: list of nics to convert to hooks tuples
1075 cluster = lu.cfg.GetClusterInfo()
1079 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1080 mode = filled_params[constants.NIC_MODE]
1081 link = filled_params[constants.NIC_LINK]
1082 hooks_nics.append((ip, mac, mode, link))
1086 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1087 """Builds instance related env variables for hooks from an object.
1089 @type lu: L{LogicalUnit}
1090 @param lu: the logical unit on whose behalf we execute
1091 @type instance: L{objects.Instance}
1092 @param instance: the instance for which we should build the
1094 @type override: dict
1095 @param override: dictionary with key/values that will override
1098 @return: the hook environment dictionary
1101 cluster = lu.cfg.GetClusterInfo()
1102 bep = cluster.FillBE(instance)
1103 hvp = cluster.FillHV(instance)
1105 "name": instance.name,
1106 "primary_node": instance.primary_node,
1107 "secondary_nodes": instance.secondary_nodes,
1108 "os_type": instance.os,
1109 "status": instance.admin_state,
1110 "maxmem": bep[constants.BE_MAXMEM],
1111 "minmem": bep[constants.BE_MINMEM],
1112 "vcpus": bep[constants.BE_VCPUS],
1113 "nics": _NICListToTuple(lu, instance.nics),
1114 "disk_template": instance.disk_template,
1115 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1118 "hypervisor_name": instance.hypervisor,
1119 "tags": instance.tags,
1122 args.update(override)
1123 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1126 def _AdjustCandidatePool(lu, exceptions):
1127 """Adjust the candidate pool after node operations.
1130 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1132 lu.LogInfo("Promoted nodes to master candidate role: %s",
1133 utils.CommaJoin(node.name for node in mod_list))
1134 for name in mod_list:
1135 lu.context.ReaddNode(name)
1136 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1138 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1142 def _DecideSelfPromotion(lu, exceptions=None):
1143 """Decide whether I should promote myself as a master candidate.
1146 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1147 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1148 # the new node will increase mc_max with one, so:
1149 mc_should = min(mc_should + 1, cp_size)
1150 return mc_now < mc_should
1153 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1154 """Check that the brigdes needed by a list of nics exist.
1157 cluster = lu.cfg.GetClusterInfo()
1158 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1159 brlist = [params[constants.NIC_LINK] for params in paramslist
1160 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1162 result = lu.rpc.call_bridges_exist(target_node, brlist)
1163 result.Raise("Error checking bridges on destination node '%s'" %
1164 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1167 def _CheckInstanceBridgesExist(lu, instance, node=None):
1168 """Check that the brigdes needed by an instance exist.
1172 node = instance.primary_node
1173 _CheckNicsBridgesExist(lu, instance.nics, node)
1176 def _CheckOSVariant(os_obj, name):
1177 """Check whether an OS name conforms to the os variants specification.
1179 @type os_obj: L{objects.OS}
1180 @param os_obj: OS object to check
1182 @param name: OS name passed by the user, to check for validity
1185 variant = objects.OS.GetVariant(name)
1186 if not os_obj.supported_variants:
1188 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1189 " passed)" % (os_obj.name, variant),
1193 raise errors.OpPrereqError("OS name must include a variant",
1196 if variant not in os_obj.supported_variants:
1197 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1200 def _GetNodeInstancesInner(cfg, fn):
1201 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1204 def _GetNodeInstances(cfg, node_name):
1205 """Returns a list of all primary and secondary instances on a node.
1209 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1212 def _GetNodePrimaryInstances(cfg, node_name):
1213 """Returns primary instances on a node.
1216 return _GetNodeInstancesInner(cfg,
1217 lambda inst: node_name == inst.primary_node)
1220 def _GetNodeSecondaryInstances(cfg, node_name):
1221 """Returns secondary instances on a node.
1224 return _GetNodeInstancesInner(cfg,
1225 lambda inst: node_name in inst.secondary_nodes)
1228 def _GetStorageTypeArgs(cfg, storage_type):
1229 """Returns the arguments for a storage type.
1232 # Special case for file storage
1233 if storage_type == constants.ST_FILE:
1234 # storage.FileStorage wants a list of storage directories
1235 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1240 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1243 for dev in instance.disks:
1244 cfg.SetDiskID(dev, node_name)
1246 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1247 result.Raise("Failed to get disk status from node %s" % node_name,
1248 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1250 for idx, bdev_status in enumerate(result.payload):
1251 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1257 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1258 """Check the sanity of iallocator and node arguments and use the
1259 cluster-wide iallocator if appropriate.
1261 Check that at most one of (iallocator, node) is specified. If none is
1262 specified, then the LU's opcode's iallocator slot is filled with the
1263 cluster-wide default iallocator.
1265 @type iallocator_slot: string
1266 @param iallocator_slot: the name of the opcode iallocator slot
1267 @type node_slot: string
1268 @param node_slot: the name of the opcode target node slot
1271 node = getattr(lu.op, node_slot, None)
1272 iallocator = getattr(lu.op, iallocator_slot, None)
1274 if node is not None and iallocator is not None:
1275 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1277 elif node is None and iallocator is None:
1278 default_iallocator = lu.cfg.GetDefaultIAllocator()
1279 if default_iallocator:
1280 setattr(lu.op, iallocator_slot, default_iallocator)
1282 raise errors.OpPrereqError("No iallocator or node given and no"
1283 " cluster-wide default iallocator found;"
1284 " please specify either an iallocator or a"
1285 " node, or set a cluster-wide default"
1289 def _GetDefaultIAllocator(cfg, iallocator):
1290 """Decides on which iallocator to use.
1292 @type cfg: L{config.ConfigWriter}
1293 @param cfg: Cluster configuration object
1294 @type iallocator: string or None
1295 @param iallocator: Iallocator specified in opcode
1297 @return: Iallocator name
1301 # Use default iallocator
1302 iallocator = cfg.GetDefaultIAllocator()
1305 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1306 " opcode nor as a cluster-wide default",
1312 class LUClusterPostInit(LogicalUnit):
1313 """Logical unit for running hooks after cluster initialization.
1316 HPATH = "cluster-init"
1317 HTYPE = constants.HTYPE_CLUSTER
1319 def BuildHooksEnv(self):
1324 "OP_TARGET": self.cfg.GetClusterName(),
1327 def BuildHooksNodes(self):
1328 """Build hooks nodes.
1331 return ([], [self.cfg.GetMasterNode()])
1333 def Exec(self, feedback_fn):
1340 class LUClusterDestroy(LogicalUnit):
1341 """Logical unit for destroying the cluster.
1344 HPATH = "cluster-destroy"
1345 HTYPE = constants.HTYPE_CLUSTER
1347 def BuildHooksEnv(self):
1352 "OP_TARGET": self.cfg.GetClusterName(),
1355 def BuildHooksNodes(self):
1356 """Build hooks nodes.
1361 def CheckPrereq(self):
1362 """Check prerequisites.
1364 This checks whether the cluster is empty.
1366 Any errors are signaled by raising errors.OpPrereqError.
1369 master = self.cfg.GetMasterNode()
1371 nodelist = self.cfg.GetNodeList()
1372 if len(nodelist) != 1 or nodelist[0] != master:
1373 raise errors.OpPrereqError("There are still %d node(s) in"
1374 " this cluster." % (len(nodelist) - 1),
1376 instancelist = self.cfg.GetInstanceList()
1378 raise errors.OpPrereqError("There are still %d instance(s) in"
1379 " this cluster." % len(instancelist),
1382 def Exec(self, feedback_fn):
1383 """Destroys the cluster.
1386 master_params = self.cfg.GetMasterNetworkParameters()
1388 # Run post hooks on master node before it's removed
1389 _RunPostHook(self, master_params.name)
1391 ems = self.cfg.GetUseExternalMipScript()
1392 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1394 result.Raise("Could not disable the master role")
1396 return master_params.name
1399 def _VerifyCertificate(filename):
1400 """Verifies a certificate for L{LUClusterVerifyConfig}.
1402 @type filename: string
1403 @param filename: Path to PEM file
1407 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1408 utils.ReadFile(filename))
1409 except Exception, err: # pylint: disable=W0703
1410 return (LUClusterVerifyConfig.ETYPE_ERROR,
1411 "Failed to load X509 certificate %s: %s" % (filename, err))
1414 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1415 constants.SSL_CERT_EXPIRATION_ERROR)
1418 fnamemsg = "While verifying %s: %s" % (filename, msg)
1423 return (None, fnamemsg)
1424 elif errcode == utils.CERT_WARNING:
1425 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1426 elif errcode == utils.CERT_ERROR:
1427 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1429 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1432 def _GetAllHypervisorParameters(cluster, instances):
1433 """Compute the set of all hypervisor parameters.
1435 @type cluster: L{objects.Cluster}
1436 @param cluster: the cluster object
1437 @param instances: list of L{objects.Instance}
1438 @param instances: additional instances from which to obtain parameters
1439 @rtype: list of (origin, hypervisor, parameters)
1440 @return: a list with all parameters found, indicating the hypervisor they
1441 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1446 for hv_name in cluster.enabled_hypervisors:
1447 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1449 for os_name, os_hvp in cluster.os_hvp.items():
1450 for hv_name, hv_params in os_hvp.items():
1452 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1453 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1455 # TODO: collapse identical parameter values in a single one
1456 for instance in instances:
1457 if instance.hvparams:
1458 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1459 cluster.FillHV(instance)))
1464 class _VerifyErrors(object):
1465 """Mix-in for cluster/group verify LUs.
1467 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1468 self.op and self._feedback_fn to be available.)
1472 ETYPE_FIELD = "code"
1473 ETYPE_ERROR = "ERROR"
1474 ETYPE_WARNING = "WARNING"
1476 def _Error(self, ecode, item, msg, *args, **kwargs):
1477 """Format an error message.
1479 Based on the opcode's error_codes parameter, either format a
1480 parseable error code, or a simpler error string.
1482 This must be called only from Exec and functions called from Exec.
1485 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1486 itype, etxt, _ = ecode
1487 # first complete the msg
1490 # then format the whole message
1491 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1492 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1498 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1499 # and finally report it via the feedback_fn
1500 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1502 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1503 """Log an error message if the passed condition is True.
1507 or self.op.debug_simulate_errors) # pylint: disable=E1101
1509 # If the error code is in the list of ignored errors, demote the error to a
1511 (_, etxt, _) = ecode
1512 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1513 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1516 self._Error(ecode, *args, **kwargs)
1518 # do not mark the operation as failed for WARN cases only
1519 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1520 self.bad = self.bad or cond
1523 class LUClusterVerify(NoHooksLU):
1524 """Submits all jobs necessary to verify the cluster.
1529 def ExpandNames(self):
1530 self.needed_locks = {}
1532 def Exec(self, feedback_fn):
1535 if self.op.group_name:
1536 groups = [self.op.group_name]
1537 depends_fn = lambda: None
1539 groups = self.cfg.GetNodeGroupList()
1541 # Verify global configuration
1543 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1546 # Always depend on global verification
1547 depends_fn = lambda: [(-len(jobs), [])]
1549 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1550 ignore_errors=self.op.ignore_errors,
1551 depends=depends_fn())]
1552 for group in groups)
1554 # Fix up all parameters
1555 for op in itertools.chain(*jobs): # pylint: disable=W0142
1556 op.debug_simulate_errors = self.op.debug_simulate_errors
1557 op.verbose = self.op.verbose
1558 op.error_codes = self.op.error_codes
1560 op.skip_checks = self.op.skip_checks
1561 except AttributeError:
1562 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1564 return ResultWithJobs(jobs)
1567 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1568 """Verifies the cluster config.
1573 def _VerifyHVP(self, hvp_data):
1574 """Verifies locally the syntax of the hypervisor parameters.
1577 for item, hv_name, hv_params in hvp_data:
1578 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1581 hv_class = hypervisor.GetHypervisor(hv_name)
1582 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1583 hv_class.CheckParameterSyntax(hv_params)
1584 except errors.GenericError, err:
1585 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1587 def ExpandNames(self):
1588 # Information can be safely retrieved as the BGL is acquired in exclusive
1590 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1591 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1592 self.all_node_info = self.cfg.GetAllNodesInfo()
1593 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1594 self.needed_locks = {}
1596 def Exec(self, feedback_fn):
1597 """Verify integrity of cluster, performing various test on nodes.
1601 self._feedback_fn = feedback_fn
1603 feedback_fn("* Verifying cluster config")
1605 for msg in self.cfg.VerifyConfig():
1606 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1608 feedback_fn("* Verifying cluster certificate files")
1610 for cert_filename in constants.ALL_CERT_FILES:
1611 (errcode, msg) = _VerifyCertificate(cert_filename)
1612 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1614 feedback_fn("* Verifying hypervisor parameters")
1616 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1617 self.all_inst_info.values()))
1619 feedback_fn("* Verifying all nodes belong to an existing group")
1621 # We do this verification here because, should this bogus circumstance
1622 # occur, it would never be caught by VerifyGroup, which only acts on
1623 # nodes/instances reachable from existing node groups.
1625 dangling_nodes = set(node.name for node in self.all_node_info.values()
1626 if node.group not in self.all_group_info)
1628 dangling_instances = {}
1629 no_node_instances = []
1631 for inst in self.all_inst_info.values():
1632 if inst.primary_node in dangling_nodes:
1633 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1634 elif inst.primary_node not in self.all_node_info:
1635 no_node_instances.append(inst.name)
1640 utils.CommaJoin(dangling_instances.get(node.name,
1642 for node in dangling_nodes]
1644 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1646 "the following nodes (and their instances) belong to a non"
1647 " existing group: %s", utils.CommaJoin(pretty_dangling))
1649 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1651 "the following instances have a non-existing primary-node:"
1652 " %s", utils.CommaJoin(no_node_instances))
1657 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1658 """Verifies the status of a node group.
1661 HPATH = "cluster-verify"
1662 HTYPE = constants.HTYPE_CLUSTER
1665 _HOOKS_INDENT_RE = re.compile("^", re.M)
1667 class NodeImage(object):
1668 """A class representing the logical and physical status of a node.
1671 @ivar name: the node name to which this object refers
1672 @ivar volumes: a structure as returned from
1673 L{ganeti.backend.GetVolumeList} (runtime)
1674 @ivar instances: a list of running instances (runtime)
1675 @ivar pinst: list of configured primary instances (config)
1676 @ivar sinst: list of configured secondary instances (config)
1677 @ivar sbp: dictionary of {primary-node: list of instances} for all
1678 instances for which this node is secondary (config)
1679 @ivar mfree: free memory, as reported by hypervisor (runtime)
1680 @ivar dfree: free disk, as reported by the node (runtime)
1681 @ivar offline: the offline status (config)
1682 @type rpc_fail: boolean
1683 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1684 not whether the individual keys were correct) (runtime)
1685 @type lvm_fail: boolean
1686 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1687 @type hyp_fail: boolean
1688 @ivar hyp_fail: whether the RPC call didn't return the instance list
1689 @type ghost: boolean
1690 @ivar ghost: whether this is a known node or not (config)
1691 @type os_fail: boolean
1692 @ivar os_fail: whether the RPC call didn't return valid OS data
1694 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1695 @type vm_capable: boolean
1696 @ivar vm_capable: whether the node can host instances
1699 def __init__(self, offline=False, name=None, vm_capable=True):
1708 self.offline = offline
1709 self.vm_capable = vm_capable
1710 self.rpc_fail = False
1711 self.lvm_fail = False
1712 self.hyp_fail = False
1714 self.os_fail = False
1717 def ExpandNames(self):
1718 # This raises errors.OpPrereqError on its own:
1719 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1721 # Get instances in node group; this is unsafe and needs verification later
1722 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1724 self.needed_locks = {
1725 locking.LEVEL_INSTANCE: inst_names,
1726 locking.LEVEL_NODEGROUP: [self.group_uuid],
1727 locking.LEVEL_NODE: [],
1730 self.share_locks = _ShareAll()
1732 def DeclareLocks(self, level):
1733 if level == locking.LEVEL_NODE:
1734 # Get members of node group; this is unsafe and needs verification later
1735 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1737 all_inst_info = self.cfg.GetAllInstancesInfo()
1739 # In Exec(), we warn about mirrored instances that have primary and
1740 # secondary living in separate node groups. To fully verify that
1741 # volumes for these instances are healthy, we will need to do an
1742 # extra call to their secondaries. We ensure here those nodes will
1744 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1745 # Important: access only the instances whose lock is owned
1746 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1747 nodes.update(all_inst_info[inst].secondary_nodes)
1749 self.needed_locks[locking.LEVEL_NODE] = nodes
1751 def CheckPrereq(self):
1752 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1753 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1755 group_nodes = set(self.group_info.members)
1756 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1759 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1761 unlocked_instances = \
1762 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1765 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1766 utils.CommaJoin(unlocked_nodes))
1768 if unlocked_instances:
1769 raise errors.OpPrereqError("Missing lock for instances: %s" %
1770 utils.CommaJoin(unlocked_instances))
1772 self.all_node_info = self.cfg.GetAllNodesInfo()
1773 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1775 self.my_node_names = utils.NiceSort(group_nodes)
1776 self.my_inst_names = utils.NiceSort(group_instances)
1778 self.my_node_info = dict((name, self.all_node_info[name])
1779 for name in self.my_node_names)
1781 self.my_inst_info = dict((name, self.all_inst_info[name])
1782 for name in self.my_inst_names)
1784 # We detect here the nodes that will need the extra RPC calls for verifying
1785 # split LV volumes; they should be locked.
1786 extra_lv_nodes = set()
1788 for inst in self.my_inst_info.values():
1789 if inst.disk_template in constants.DTS_INT_MIRROR:
1790 group = self.my_node_info[inst.primary_node].group
1791 for nname in inst.secondary_nodes:
1792 if self.all_node_info[nname].group != group:
1793 extra_lv_nodes.add(nname)
1795 unlocked_lv_nodes = \
1796 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1798 if unlocked_lv_nodes:
1799 raise errors.OpPrereqError("these nodes could be locked: %s" %
1800 utils.CommaJoin(unlocked_lv_nodes))
1801 self.extra_lv_nodes = list(extra_lv_nodes)
1803 def _VerifyNode(self, ninfo, nresult):
1804 """Perform some basic validation on data returned from a node.
1806 - check the result data structure is well formed and has all the
1808 - check ganeti version
1810 @type ninfo: L{objects.Node}
1811 @param ninfo: the node to check
1812 @param nresult: the results from the node
1814 @return: whether overall this call was successful (and we can expect
1815 reasonable values in the respose)
1819 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1821 # main result, nresult should be a non-empty dict
1822 test = not nresult or not isinstance(nresult, dict)
1823 _ErrorIf(test, constants.CV_ENODERPC, node,
1824 "unable to verify node: no data returned")
1828 # compares ganeti version
1829 local_version = constants.PROTOCOL_VERSION
1830 remote_version = nresult.get("version", None)
1831 test = not (remote_version and
1832 isinstance(remote_version, (list, tuple)) and
1833 len(remote_version) == 2)
1834 _ErrorIf(test, constants.CV_ENODERPC, node,
1835 "connection to node returned invalid data")
1839 test = local_version != remote_version[0]
1840 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1841 "incompatible protocol versions: master %s,"
1842 " node %s", local_version, remote_version[0])
1846 # node seems compatible, we can actually try to look into its results
1848 # full package version
1849 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1850 constants.CV_ENODEVERSION, node,
1851 "software version mismatch: master %s, node %s",
1852 constants.RELEASE_VERSION, remote_version[1],
1853 code=self.ETYPE_WARNING)
1855 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1856 if ninfo.vm_capable and isinstance(hyp_result, dict):
1857 for hv_name, hv_result in hyp_result.iteritems():
1858 test = hv_result is not None
1859 _ErrorIf(test, constants.CV_ENODEHV, node,
1860 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1862 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1863 if ninfo.vm_capable and isinstance(hvp_result, list):
1864 for item, hv_name, hv_result in hvp_result:
1865 _ErrorIf(True, constants.CV_ENODEHV, node,
1866 "hypervisor %s parameter verify failure (source %s): %s",
1867 hv_name, item, hv_result)
1869 test = nresult.get(constants.NV_NODESETUP,
1870 ["Missing NODESETUP results"])
1871 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1876 def _VerifyNodeTime(self, ninfo, nresult,
1877 nvinfo_starttime, nvinfo_endtime):
1878 """Check the node time.
1880 @type ninfo: L{objects.Node}
1881 @param ninfo: the node to check
1882 @param nresult: the remote results for the node
1883 @param nvinfo_starttime: the start time of the RPC call
1884 @param nvinfo_endtime: the end time of the RPC call
1888 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1890 ntime = nresult.get(constants.NV_TIME, None)
1892 ntime_merged = utils.MergeTime(ntime)
1893 except (ValueError, TypeError):
1894 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1897 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1898 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1899 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1900 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1904 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1905 "Node time diverges by at least %s from master node time",
1908 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1909 """Check the node LVM results.
1911 @type ninfo: L{objects.Node}
1912 @param ninfo: the node to check
1913 @param nresult: the remote results for the node
1914 @param vg_name: the configured VG name
1921 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1923 # checks vg existence and size > 20G
1924 vglist = nresult.get(constants.NV_VGLIST, None)
1926 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1928 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1929 constants.MIN_VG_SIZE)
1930 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1933 pvlist = nresult.get(constants.NV_PVLIST, None)
1934 test = pvlist is None
1935 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1937 # check that ':' is not present in PV names, since it's a
1938 # special character for lvcreate (denotes the range of PEs to
1940 for _, pvname, owner_vg in pvlist:
1941 test = ":" in pvname
1942 _ErrorIf(test, constants.CV_ENODELVM, node,
1943 "Invalid character ':' in PV '%s' of VG '%s'",
1946 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1947 """Check the node bridges.
1949 @type ninfo: L{objects.Node}
1950 @param ninfo: the node to check
1951 @param nresult: the remote results for the node
1952 @param bridges: the expected list of bridges
1959 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1961 missing = nresult.get(constants.NV_BRIDGES, None)
1962 test = not isinstance(missing, list)
1963 _ErrorIf(test, constants.CV_ENODENET, node,
1964 "did not return valid bridge information")
1966 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1967 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1969 def _VerifyNodeUserScripts(self, ninfo, nresult):
1970 """Check the results of user scripts presence and executability on the node
1972 @type ninfo: L{objects.Node}
1973 @param ninfo: the node to check
1974 @param nresult: the remote results for the node
1979 test = not constants.NV_USERSCRIPTS in nresult
1980 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1981 "did not return user scripts information")
1983 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1985 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1986 "user scripts not present or not executable: %s" %
1987 utils.CommaJoin(sorted(broken_scripts)))
1989 def _VerifyNodeNetwork(self, ninfo, nresult):
1990 """Check the node network connectivity results.
1992 @type ninfo: L{objects.Node}
1993 @param ninfo: the node to check
1994 @param nresult: the remote results for the node
1998 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2000 test = constants.NV_NODELIST not in nresult
2001 _ErrorIf(test, constants.CV_ENODESSH, node,
2002 "node hasn't returned node ssh connectivity data")
2004 if nresult[constants.NV_NODELIST]:
2005 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2006 _ErrorIf(True, constants.CV_ENODESSH, node,
2007 "ssh communication with node '%s': %s", a_node, a_msg)
2009 test = constants.NV_NODENETTEST not in nresult
2010 _ErrorIf(test, constants.CV_ENODENET, node,
2011 "node hasn't returned node tcp connectivity data")
2013 if nresult[constants.NV_NODENETTEST]:
2014 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2016 _ErrorIf(True, constants.CV_ENODENET, node,
2017 "tcp communication with node '%s': %s",
2018 anode, nresult[constants.NV_NODENETTEST][anode])
2020 test = constants.NV_MASTERIP not in nresult
2021 _ErrorIf(test, constants.CV_ENODENET, node,
2022 "node hasn't returned node master IP reachability data")
2024 if not nresult[constants.NV_MASTERIP]:
2025 if node == self.master_node:
2026 msg = "the master node cannot reach the master IP (not configured?)"
2028 msg = "cannot reach the master IP"
2029 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2031 def _VerifyInstance(self, instance, instanceconfig, node_image,
2033 """Verify an instance.
2035 This function checks to see if the required block devices are
2036 available on the instance's node.
2039 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2040 node_current = instanceconfig.primary_node
2042 node_vol_should = {}
2043 instanceconfig.MapLVsByNode(node_vol_should)
2045 for node in node_vol_should:
2046 n_img = node_image[node]
2047 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2048 # ignore missing volumes on offline or broken nodes
2050 for volume in node_vol_should[node]:
2051 test = volume not in n_img.volumes
2052 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2053 "volume %s missing on node %s", volume, node)
2055 if instanceconfig.admin_state == constants.ADMINST_UP:
2056 pri_img = node_image[node_current]
2057 test = instance not in pri_img.instances and not pri_img.offline
2058 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2059 "instance not running on its primary node %s",
2062 diskdata = [(nname, success, status, idx)
2063 for (nname, disks) in diskstatus.items()
2064 for idx, (success, status) in enumerate(disks)]
2066 for nname, success, bdev_status, idx in diskdata:
2067 # the 'ghost node' construction in Exec() ensures that we have a
2069 snode = node_image[nname]
2070 bad_snode = snode.ghost or snode.offline
2071 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2072 not success and not bad_snode,
2073 constants.CV_EINSTANCEFAULTYDISK, instance,
2074 "couldn't retrieve status for disk/%s on %s: %s",
2075 idx, nname, bdev_status)
2076 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2077 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2078 constants.CV_EINSTANCEFAULTYDISK, instance,
2079 "disk/%s on %s is faulty", idx, nname)
2081 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2082 """Verify if there are any unknown volumes in the cluster.
2084 The .os, .swap and backup volumes are ignored. All other volumes are
2085 reported as unknown.
2087 @type reserved: L{ganeti.utils.FieldSet}
2088 @param reserved: a FieldSet of reserved volume names
2091 for node, n_img in node_image.items():
2092 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2093 # skip non-healthy nodes
2095 for volume in n_img.volumes:
2096 test = ((node not in node_vol_should or
2097 volume not in node_vol_should[node]) and
2098 not reserved.Matches(volume))
2099 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2100 "volume %s is unknown", volume)
2102 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2103 """Verify N+1 Memory Resilience.
2105 Check that if one single node dies we can still start all the
2106 instances it was primary for.
2109 cluster_info = self.cfg.GetClusterInfo()
2110 for node, n_img in node_image.items():
2111 # This code checks that every node which is now listed as
2112 # secondary has enough memory to host all instances it is
2113 # supposed to should a single other node in the cluster fail.
2114 # FIXME: not ready for failover to an arbitrary node
2115 # FIXME: does not support file-backed instances
2116 # WARNING: we currently take into account down instances as well
2117 # as up ones, considering that even if they're down someone
2118 # might want to start them even in the event of a node failure.
2120 # we're skipping offline nodes from the N+1 warning, since
2121 # most likely we don't have good memory infromation from them;
2122 # we already list instances living on such nodes, and that's
2125 #TODO(dynmem): use MINMEM for checking
2126 #TODO(dynmem): also consider ballooning out other instances
2127 for prinode, instances in n_img.sbp.items():
2129 for instance in instances:
2130 bep = cluster_info.FillBE(instance_cfg[instance])
2131 if bep[constants.BE_AUTO_BALANCE]:
2132 needed_mem += bep[constants.BE_MAXMEM]
2133 test = n_img.mfree < needed_mem
2134 self._ErrorIf(test, constants.CV_ENODEN1, node,
2135 "not enough memory to accomodate instance failovers"
2136 " should node %s fail (%dMiB needed, %dMiB available)",
2137 prinode, needed_mem, n_img.mfree)
2140 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2141 (files_all, files_opt, files_mc, files_vm)):
2142 """Verifies file checksums collected from all nodes.
2144 @param errorif: Callback for reporting errors
2145 @param nodeinfo: List of L{objects.Node} objects
2146 @param master_node: Name of master node
2147 @param all_nvinfo: RPC results
2150 # Define functions determining which nodes to consider for a file
2153 (files_mc, lambda node: (node.master_candidate or
2154 node.name == master_node)),
2155 (files_vm, lambda node: node.vm_capable),
2158 # Build mapping from filename to list of nodes which should have the file
2160 for (files, fn) in files2nodefn:
2162 filenodes = nodeinfo
2164 filenodes = filter(fn, nodeinfo)
2165 nodefiles.update((filename,
2166 frozenset(map(operator.attrgetter("name"), filenodes)))
2167 for filename in files)
2169 assert set(nodefiles) == (files_all | files_mc | files_vm)
2171 fileinfo = dict((filename, {}) for filename in nodefiles)
2172 ignore_nodes = set()
2174 for node in nodeinfo:
2176 ignore_nodes.add(node.name)
2179 nresult = all_nvinfo[node.name]
2181 if nresult.fail_msg or not nresult.payload:
2184 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2186 test = not (node_files and isinstance(node_files, dict))
2187 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2188 "Node did not return file checksum data")
2190 ignore_nodes.add(node.name)
2193 # Build per-checksum mapping from filename to nodes having it
2194 for (filename, checksum) in node_files.items():
2195 assert filename in nodefiles
2196 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2198 for (filename, checksums) in fileinfo.items():
2199 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2201 # Nodes having the file
2202 with_file = frozenset(node_name
2203 for nodes in fileinfo[filename].values()
2204 for node_name in nodes) - ignore_nodes
2206 expected_nodes = nodefiles[filename] - ignore_nodes
2208 # Nodes missing file
2209 missing_file = expected_nodes - with_file
2211 if filename in files_opt:
2213 errorif(missing_file and missing_file != expected_nodes,
2214 constants.CV_ECLUSTERFILECHECK, None,
2215 "File %s is optional, but it must exist on all or no"
2216 " nodes (not found on %s)",
2217 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2219 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2220 "File %s is missing from node(s) %s", filename,
2221 utils.CommaJoin(utils.NiceSort(missing_file)))
2223 # Warn if a node has a file it shouldn't
2224 unexpected = with_file - expected_nodes
2226 constants.CV_ECLUSTERFILECHECK, None,
2227 "File %s should not exist on node(s) %s",
2228 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2230 # See if there are multiple versions of the file
2231 test = len(checksums) > 1
2233 variants = ["variant %s on %s" %
2234 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2235 for (idx, (checksum, nodes)) in
2236 enumerate(sorted(checksums.items()))]
2240 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2241 "File %s found with %s different checksums (%s)",
2242 filename, len(checksums), "; ".join(variants))
2244 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2246 """Verifies and the node DRBD status.
2248 @type ninfo: L{objects.Node}
2249 @param ninfo: the node to check
2250 @param nresult: the remote results for the node
2251 @param instanceinfo: the dict of instances
2252 @param drbd_helper: the configured DRBD usermode helper
2253 @param drbd_map: the DRBD map as returned by
2254 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2258 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2261 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2262 test = (helper_result == None)
2263 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2264 "no drbd usermode helper returned")
2266 status, payload = helper_result
2268 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2269 "drbd usermode helper check unsuccessful: %s", payload)
2270 test = status and (payload != drbd_helper)
2271 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2272 "wrong drbd usermode helper: %s", payload)
2274 # compute the DRBD minors
2276 for minor, instance in drbd_map[node].items():
2277 test = instance not in instanceinfo
2278 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2279 "ghost instance '%s' in temporary DRBD map", instance)
2280 # ghost instance should not be running, but otherwise we
2281 # don't give double warnings (both ghost instance and
2282 # unallocated minor in use)
2284 node_drbd[minor] = (instance, False)
2286 instance = instanceinfo[instance]
2287 node_drbd[minor] = (instance.name,
2288 instance.admin_state == constants.ADMINST_UP)
2290 # and now check them
2291 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2292 test = not isinstance(used_minors, (tuple, list))
2293 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2294 "cannot parse drbd status file: %s", str(used_minors))
2296 # we cannot check drbd status
2299 for minor, (iname, must_exist) in node_drbd.items():
2300 test = minor not in used_minors and must_exist
2301 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2302 "drbd minor %d of instance %s is not active", minor, iname)
2303 for minor in used_minors:
2304 test = minor not in node_drbd
2305 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2306 "unallocated drbd minor %d is in use", minor)
2308 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2309 """Builds the node OS structures.
2311 @type ninfo: L{objects.Node}
2312 @param ninfo: the node to check
2313 @param nresult: the remote results for the node
2314 @param nimg: the node image object
2318 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2320 remote_os = nresult.get(constants.NV_OSLIST, None)
2321 test = (not isinstance(remote_os, list) or
2322 not compat.all(isinstance(v, list) and len(v) == 7
2323 for v in remote_os))
2325 _ErrorIf(test, constants.CV_ENODEOS, node,
2326 "node hasn't returned valid OS data")
2335 for (name, os_path, status, diagnose,
2336 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2338 if name not in os_dict:
2341 # parameters is a list of lists instead of list of tuples due to
2342 # JSON lacking a real tuple type, fix it:
2343 parameters = [tuple(v) for v in parameters]
2344 os_dict[name].append((os_path, status, diagnose,
2345 set(variants), set(parameters), set(api_ver)))
2347 nimg.oslist = os_dict
2349 def _VerifyNodeOS(self, ninfo, nimg, base):
2350 """Verifies the node OS list.
2352 @type ninfo: L{objects.Node}
2353 @param ninfo: the node to check
2354 @param nimg: the node image object
2355 @param base: the 'template' node we match against (e.g. from the master)
2359 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2363 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2364 for os_name, os_data in nimg.oslist.items():
2365 assert os_data, "Empty OS status for OS %s?!" % os_name
2366 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2367 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2368 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2369 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2370 "OS '%s' has multiple entries (first one shadows the rest): %s",
2371 os_name, utils.CommaJoin([v[0] for v in os_data]))
2372 # comparisons with the 'base' image
2373 test = os_name not in base.oslist
2374 _ErrorIf(test, constants.CV_ENODEOS, node,
2375 "Extra OS %s not present on reference node (%s)",
2379 assert base.oslist[os_name], "Base node has empty OS status?"
2380 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2382 # base OS is invalid, skipping
2384 for kind, a, b in [("API version", f_api, b_api),
2385 ("variants list", f_var, b_var),
2386 ("parameters", beautify_params(f_param),
2387 beautify_params(b_param))]:
2388 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2389 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2390 kind, os_name, base.name,
2391 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2393 # check any missing OSes
2394 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2395 _ErrorIf(missing, constants.CV_ENODEOS, node,
2396 "OSes present on reference node %s but missing on this node: %s",
2397 base.name, utils.CommaJoin(missing))
2399 def _VerifyOob(self, ninfo, nresult):
2400 """Verifies out of band functionality of a node.
2402 @type ninfo: L{objects.Node}
2403 @param ninfo: the node to check
2404 @param nresult: the remote results for the node
2408 # We just have to verify the paths on master and/or master candidates
2409 # as the oob helper is invoked on the master
2410 if ((ninfo.master_candidate or ninfo.master_capable) and
2411 constants.NV_OOB_PATHS in nresult):
2412 for path_result in nresult[constants.NV_OOB_PATHS]:
2413 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2415 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2416 """Verifies and updates the node volume data.
2418 This function will update a L{NodeImage}'s internal structures
2419 with data from the remote call.
2421 @type ninfo: L{objects.Node}
2422 @param ninfo: the node to check
2423 @param nresult: the remote results for the node
2424 @param nimg: the node image object
2425 @param vg_name: the configured VG name
2429 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2431 nimg.lvm_fail = True
2432 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2435 elif isinstance(lvdata, basestring):
2436 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2437 utils.SafeEncode(lvdata))
2438 elif not isinstance(lvdata, dict):
2439 _ErrorIf(True, constants.CV_ENODELVM, node,
2440 "rpc call to node failed (lvlist)")
2442 nimg.volumes = lvdata
2443 nimg.lvm_fail = False
2445 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2446 """Verifies and updates the node instance list.
2448 If the listing was successful, then updates this node's instance
2449 list. Otherwise, it marks the RPC call as failed for the instance
2452 @type ninfo: L{objects.Node}
2453 @param ninfo: the node to check
2454 @param nresult: the remote results for the node
2455 @param nimg: the node image object
2458 idata = nresult.get(constants.NV_INSTANCELIST, None)
2459 test = not isinstance(idata, list)
2460 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2461 "rpc call to node failed (instancelist): %s",
2462 utils.SafeEncode(str(idata)))
2464 nimg.hyp_fail = True
2466 nimg.instances = idata
2468 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2469 """Verifies and computes a node information map
2471 @type ninfo: L{objects.Node}
2472 @param ninfo: the node to check
2473 @param nresult: the remote results for the node
2474 @param nimg: the node image object
2475 @param vg_name: the configured VG name
2479 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2481 # try to read free memory (from the hypervisor)
2482 hv_info = nresult.get(constants.NV_HVINFO, None)
2483 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2484 _ErrorIf(test, constants.CV_ENODEHV, node,
2485 "rpc call to node failed (hvinfo)")
2488 nimg.mfree = int(hv_info["memory_free"])
2489 except (ValueError, TypeError):
2490 _ErrorIf(True, constants.CV_ENODERPC, node,
2491 "node returned invalid nodeinfo, check hypervisor")
2493 # FIXME: devise a free space model for file based instances as well
2494 if vg_name is not None:
2495 test = (constants.NV_VGLIST not in nresult or
2496 vg_name not in nresult[constants.NV_VGLIST])
2497 _ErrorIf(test, constants.CV_ENODELVM, node,
2498 "node didn't return data for the volume group '%s'"
2499 " - it is either missing or broken", vg_name)
2502 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2503 except (ValueError, TypeError):
2504 _ErrorIf(True, constants.CV_ENODERPC, node,
2505 "node returned invalid LVM info, check LVM status")
2507 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2508 """Gets per-disk status information for all instances.
2510 @type nodelist: list of strings
2511 @param nodelist: Node names
2512 @type node_image: dict of (name, L{objects.Node})
2513 @param node_image: Node objects
2514 @type instanceinfo: dict of (name, L{objects.Instance})
2515 @param instanceinfo: Instance objects
2516 @rtype: {instance: {node: [(succes, payload)]}}
2517 @return: a dictionary of per-instance dictionaries with nodes as
2518 keys and disk information as values; the disk information is a
2519 list of tuples (success, payload)
2522 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2525 node_disks_devonly = {}
2526 diskless_instances = set()
2527 diskless = constants.DT_DISKLESS
2529 for nname in nodelist:
2530 node_instances = list(itertools.chain(node_image[nname].pinst,
2531 node_image[nname].sinst))
2532 diskless_instances.update(inst for inst in node_instances
2533 if instanceinfo[inst].disk_template == diskless)
2534 disks = [(inst, disk)
2535 for inst in node_instances
2536 for disk in instanceinfo[inst].disks]
2539 # No need to collect data
2542 node_disks[nname] = disks
2544 # Creating copies as SetDiskID below will modify the objects and that can
2545 # lead to incorrect data returned from nodes
2546 devonly = [dev.Copy() for (_, dev) in disks]
2549 self.cfg.SetDiskID(dev, nname)
2551 node_disks_devonly[nname] = devonly
2553 assert len(node_disks) == len(node_disks_devonly)
2555 # Collect data from all nodes with disks
2556 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2559 assert len(result) == len(node_disks)
2563 for (nname, nres) in result.items():
2564 disks = node_disks[nname]
2567 # No data from this node
2568 data = len(disks) * [(False, "node offline")]
2571 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2572 "while getting disk information: %s", msg)
2574 # No data from this node
2575 data = len(disks) * [(False, msg)]
2578 for idx, i in enumerate(nres.payload):
2579 if isinstance(i, (tuple, list)) and len(i) == 2:
2582 logging.warning("Invalid result from node %s, entry %d: %s",
2584 data.append((False, "Invalid result from the remote node"))
2586 for ((inst, _), status) in zip(disks, data):
2587 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2589 # Add empty entries for diskless instances.
2590 for inst in diskless_instances:
2591 assert inst not in instdisk
2594 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2595 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2596 compat.all(isinstance(s, (tuple, list)) and
2597 len(s) == 2 for s in statuses)
2598 for inst, nnames in instdisk.items()
2599 for nname, statuses in nnames.items())
2600 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2605 def _SshNodeSelector(group_uuid, all_nodes):
2606 """Create endless iterators for all potential SSH check hosts.
2609 nodes = [node for node in all_nodes
2610 if (node.group != group_uuid and
2612 keyfunc = operator.attrgetter("group")
2614 return map(itertools.cycle,
2615 [sorted(map(operator.attrgetter("name"), names))
2616 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2620 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2621 """Choose which nodes should talk to which other nodes.
2623 We will make nodes contact all nodes in their group, and one node from
2626 @warning: This algorithm has a known issue if one node group is much
2627 smaller than others (e.g. just one node). In such a case all other
2628 nodes will talk to the single node.
2631 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2632 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2634 return (online_nodes,
2635 dict((name, sorted([i.next() for i in sel]))
2636 for name in online_nodes))
2638 def BuildHooksEnv(self):
2641 Cluster-Verify hooks just ran in the post phase and their failure makes
2642 the output be logged in the verify output and the verification to fail.
2646 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2649 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2650 for node in self.my_node_info.values())
2654 def BuildHooksNodes(self):
2655 """Build hooks nodes.
2658 return ([], self.my_node_names)
2660 def Exec(self, feedback_fn):
2661 """Verify integrity of the node group, performing various test on nodes.
2664 # This method has too many local variables. pylint: disable=R0914
2665 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2667 if not self.my_node_names:
2669 feedback_fn("* Empty node group, skipping verification")
2673 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2674 verbose = self.op.verbose
2675 self._feedback_fn = feedback_fn
2677 vg_name = self.cfg.GetVGName()
2678 drbd_helper = self.cfg.GetDRBDHelper()
2679 cluster = self.cfg.GetClusterInfo()
2680 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2681 hypervisors = cluster.enabled_hypervisors
2682 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2684 i_non_redundant = [] # Non redundant instances
2685 i_non_a_balanced = [] # Non auto-balanced instances
2686 i_offline = 0 # Count of offline instances
2687 n_offline = 0 # Count of offline nodes
2688 n_drained = 0 # Count of nodes being drained
2689 node_vol_should = {}
2691 # FIXME: verify OS list
2694 filemap = _ComputeAncillaryFiles(cluster, False)
2696 # do local checksums
2697 master_node = self.master_node = self.cfg.GetMasterNode()
2698 master_ip = self.cfg.GetMasterIP()
2700 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2703 if self.cfg.GetUseExternalMipScript():
2704 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2706 node_verify_param = {
2707 constants.NV_FILELIST:
2708 utils.UniqueSequence(filename
2709 for files in filemap
2710 for filename in files),
2711 constants.NV_NODELIST:
2712 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2713 self.all_node_info.values()),
2714 constants.NV_HYPERVISOR: hypervisors,
2715 constants.NV_HVPARAMS:
2716 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2717 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2718 for node in node_data_list
2719 if not node.offline],
2720 constants.NV_INSTANCELIST: hypervisors,
2721 constants.NV_VERSION: None,
2722 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2723 constants.NV_NODESETUP: None,
2724 constants.NV_TIME: None,
2725 constants.NV_MASTERIP: (master_node, master_ip),
2726 constants.NV_OSLIST: None,
2727 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2728 constants.NV_USERSCRIPTS: user_scripts,
2731 if vg_name is not None:
2732 node_verify_param[constants.NV_VGLIST] = None
2733 node_verify_param[constants.NV_LVLIST] = vg_name
2734 node_verify_param[constants.NV_PVLIST] = [vg_name]
2735 node_verify_param[constants.NV_DRBDLIST] = None
2738 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2741 # FIXME: this needs to be changed per node-group, not cluster-wide
2743 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2744 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2745 bridges.add(default_nicpp[constants.NIC_LINK])
2746 for instance in self.my_inst_info.values():
2747 for nic in instance.nics:
2748 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2749 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2750 bridges.add(full_nic[constants.NIC_LINK])
2753 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2755 # Build our expected cluster state
2756 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2758 vm_capable=node.vm_capable))
2759 for node in node_data_list)
2763 for node in self.all_node_info.values():
2764 path = _SupportsOob(self.cfg, node)
2765 if path and path not in oob_paths:
2766 oob_paths.append(path)
2769 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2771 for instance in self.my_inst_names:
2772 inst_config = self.my_inst_info[instance]
2774 for nname in inst_config.all_nodes:
2775 if nname not in node_image:
2776 gnode = self.NodeImage(name=nname)
2777 gnode.ghost = (nname not in self.all_node_info)
2778 node_image[nname] = gnode
2780 inst_config.MapLVsByNode(node_vol_should)
2782 pnode = inst_config.primary_node
2783 node_image[pnode].pinst.append(instance)
2785 for snode in inst_config.secondary_nodes:
2786 nimg = node_image[snode]
2787 nimg.sinst.append(instance)
2788 if pnode not in nimg.sbp:
2789 nimg.sbp[pnode] = []
2790 nimg.sbp[pnode].append(instance)
2792 # At this point, we have the in-memory data structures complete,
2793 # except for the runtime information, which we'll gather next
2795 # Due to the way our RPC system works, exact response times cannot be
2796 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2797 # time before and after executing the request, we can at least have a time
2799 nvinfo_starttime = time.time()
2800 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2802 self.cfg.GetClusterName())
2803 nvinfo_endtime = time.time()
2805 if self.extra_lv_nodes and vg_name is not None:
2807 self.rpc.call_node_verify(self.extra_lv_nodes,
2808 {constants.NV_LVLIST: vg_name},
2809 self.cfg.GetClusterName())
2811 extra_lv_nvinfo = {}
2813 all_drbd_map = self.cfg.ComputeDRBDMap()
2815 feedback_fn("* Gathering disk information (%s nodes)" %
2816 len(self.my_node_names))
2817 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2820 feedback_fn("* Verifying configuration file consistency")
2822 # If not all nodes are being checked, we need to make sure the master node
2823 # and a non-checked vm_capable node are in the list.
2824 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2826 vf_nvinfo = all_nvinfo.copy()
2827 vf_node_info = list(self.my_node_info.values())
2828 additional_nodes = []
2829 if master_node not in self.my_node_info:
2830 additional_nodes.append(master_node)
2831 vf_node_info.append(self.all_node_info[master_node])
2832 # Add the first vm_capable node we find which is not included
2833 for node in absent_nodes:
2834 nodeinfo = self.all_node_info[node]
2835 if nodeinfo.vm_capable and not nodeinfo.offline:
2836 additional_nodes.append(node)
2837 vf_node_info.append(self.all_node_info[node])
2839 key = constants.NV_FILELIST
2840 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2841 {key: node_verify_param[key]},
2842 self.cfg.GetClusterName()))
2844 vf_nvinfo = all_nvinfo
2845 vf_node_info = self.my_node_info.values()
2847 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2849 feedback_fn("* Verifying node status")
2853 for node_i in node_data_list:
2855 nimg = node_image[node]
2859 feedback_fn("* Skipping offline node %s" % (node,))
2863 if node == master_node:
2865 elif node_i.master_candidate:
2866 ntype = "master candidate"
2867 elif node_i.drained:
2873 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2875 msg = all_nvinfo[node].fail_msg
2876 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2879 nimg.rpc_fail = True
2882 nresult = all_nvinfo[node].payload
2884 nimg.call_ok = self._VerifyNode(node_i, nresult)
2885 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2886 self._VerifyNodeNetwork(node_i, nresult)
2887 self._VerifyNodeUserScripts(node_i, nresult)
2888 self._VerifyOob(node_i, nresult)
2891 self._VerifyNodeLVM(node_i, nresult, vg_name)
2892 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2895 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2896 self._UpdateNodeInstances(node_i, nresult, nimg)
2897 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2898 self._UpdateNodeOS(node_i, nresult, nimg)
2900 if not nimg.os_fail:
2901 if refos_img is None:
2903 self._VerifyNodeOS(node_i, nimg, refos_img)
2904 self._VerifyNodeBridges(node_i, nresult, bridges)
2906 # Check whether all running instancies are primary for the node. (This
2907 # can no longer be done from _VerifyInstance below, since some of the
2908 # wrong instances could be from other node groups.)
2909 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2911 for inst in non_primary_inst:
2912 # FIXME: investigate best way to handle offline insts
2913 if inst.admin_state == constants.ADMINST_OFFLINE:
2915 feedback_fn("* Skipping offline instance %s" % inst.name)
2918 test = inst in self.all_inst_info
2919 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2920 "instance should not run on node %s", node_i.name)
2921 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2922 "node is running unknown instance %s", inst)
2924 for node, result in extra_lv_nvinfo.items():
2925 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2926 node_image[node], vg_name)
2928 feedback_fn("* Verifying instance status")
2929 for instance in self.my_inst_names:
2931 feedback_fn("* Verifying instance %s" % instance)
2932 inst_config = self.my_inst_info[instance]
2933 self._VerifyInstance(instance, inst_config, node_image,
2935 inst_nodes_offline = []
2937 pnode = inst_config.primary_node
2938 pnode_img = node_image[pnode]
2939 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2940 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2941 " primary node failed", instance)
2943 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2945 constants.CV_EINSTANCEBADNODE, instance,
2946 "instance is marked as running and lives on offline node %s",
2947 inst_config.primary_node)
2949 # If the instance is non-redundant we cannot survive losing its primary
2950 # node, so we are not N+1 compliant. On the other hand we have no disk
2951 # templates with more than one secondary so that situation is not well
2953 # FIXME: does not support file-backed instances
2954 if not inst_config.secondary_nodes:
2955 i_non_redundant.append(instance)
2957 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2958 constants.CV_EINSTANCELAYOUT,
2959 instance, "instance has multiple secondary nodes: %s",
2960 utils.CommaJoin(inst_config.secondary_nodes),
2961 code=self.ETYPE_WARNING)
2963 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2964 pnode = inst_config.primary_node
2965 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2966 instance_groups = {}
2968 for node in instance_nodes:
2969 instance_groups.setdefault(self.all_node_info[node].group,
2973 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2974 # Sort so that we always list the primary node first.
2975 for group, nodes in sorted(instance_groups.items(),
2976 key=lambda (_, nodes): pnode in nodes,
2979 self._ErrorIf(len(instance_groups) > 1,
2980 constants.CV_EINSTANCESPLITGROUPS,
2981 instance, "instance has primary and secondary nodes in"
2982 " different groups: %s", utils.CommaJoin(pretty_list),
2983 code=self.ETYPE_WARNING)
2985 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2986 i_non_a_balanced.append(instance)
2988 for snode in inst_config.secondary_nodes:
2989 s_img = node_image[snode]
2990 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2991 snode, "instance %s, connection to secondary node failed",
2995 inst_nodes_offline.append(snode)
2997 # warn that the instance lives on offline nodes
2998 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2999 "instance has offline secondary node(s) %s",
3000 utils.CommaJoin(inst_nodes_offline))
3001 # ... or ghost/non-vm_capable nodes
3002 for node in inst_config.all_nodes:
3003 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3004 instance, "instance lives on ghost node %s", node)
3005 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3006 instance, "instance lives on non-vm_capable node %s", node)
3008 feedback_fn("* Verifying orphan volumes")
3009 reserved = utils.FieldSet(*cluster.reserved_lvs)
3011 # We will get spurious "unknown volume" warnings if any node of this group
3012 # is secondary for an instance whose primary is in another group. To avoid
3013 # them, we find these instances and add their volumes to node_vol_should.
3014 for inst in self.all_inst_info.values():
3015 for secondary in inst.secondary_nodes:
3016 if (secondary in self.my_node_info
3017 and inst.name not in self.my_inst_info):
3018 inst.MapLVsByNode(node_vol_should)
3021 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3023 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3024 feedback_fn("* Verifying N+1 Memory redundancy")
3025 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3027 feedback_fn("* Other Notes")
3029 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3030 % len(i_non_redundant))
3032 if i_non_a_balanced:
3033 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3034 % len(i_non_a_balanced))
3037 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3040 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3043 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3047 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3048 """Analyze the post-hooks' result
3050 This method analyses the hook result, handles it, and sends some
3051 nicely-formatted feedback back to the user.
3053 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3054 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3055 @param hooks_results: the results of the multi-node hooks rpc call
3056 @param feedback_fn: function used send feedback back to the caller
3057 @param lu_result: previous Exec result
3058 @return: the new Exec result, based on the previous result
3062 # We only really run POST phase hooks, only for non-empty groups,
3063 # and are only interested in their results
3064 if not self.my_node_names:
3067 elif phase == constants.HOOKS_PHASE_POST:
3068 # Used to change hooks' output to proper indentation
3069 feedback_fn("* Hooks Results")
3070 assert hooks_results, "invalid result from hooks"
3072 for node_name in hooks_results:
3073 res = hooks_results[node_name]
3075 test = msg and not res.offline
3076 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3077 "Communication failure in hooks execution: %s", msg)
3078 if res.offline or msg:
3079 # No need to investigate payload if node is offline or gave
3082 for script, hkr, output in res.payload:
3083 test = hkr == constants.HKR_FAIL
3084 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3085 "Script %s failed, output:", script)
3087 output = self._HOOKS_INDENT_RE.sub(" ", output)
3088 feedback_fn("%s" % output)
3094 class LUClusterVerifyDisks(NoHooksLU):
3095 """Verifies the cluster disks status.
3100 def ExpandNames(self):
3101 self.share_locks = _ShareAll()
3102 self.needed_locks = {
3103 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3106 def Exec(self, feedback_fn):
3107 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3109 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3110 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3111 for group in group_names])
3114 class LUGroupVerifyDisks(NoHooksLU):
3115 """Verifies the status of all disks in a node group.
3120 def ExpandNames(self):
3121 # Raises errors.OpPrereqError on its own if group can't be found
3122 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3124 self.share_locks = _ShareAll()
3125 self.needed_locks = {
3126 locking.LEVEL_INSTANCE: [],
3127 locking.LEVEL_NODEGROUP: [],
3128 locking.LEVEL_NODE: [],
3131 def DeclareLocks(self, level):
3132 if level == locking.LEVEL_INSTANCE:
3133 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3135 # Lock instances optimistically, needs verification once node and group
3136 # locks have been acquired
3137 self.needed_locks[locking.LEVEL_INSTANCE] = \
3138 self.cfg.GetNodeGroupInstances(self.group_uuid)
3140 elif level == locking.LEVEL_NODEGROUP:
3141 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3143 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3144 set([self.group_uuid] +
3145 # Lock all groups used by instances optimistically; this requires
3146 # going via the node before it's locked, requiring verification
3149 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3150 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3152 elif level == locking.LEVEL_NODE:
3153 # This will only lock the nodes in the group to be verified which contain
3155 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3156 self._LockInstancesNodes()
3158 # Lock all nodes in group to be verified
3159 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3160 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3161 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3163 def CheckPrereq(self):
3164 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3165 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3166 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3168 assert self.group_uuid in owned_groups
3170 # Check if locked instances are still correct
3171 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3173 # Get instance information
3174 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3176 # Check if node groups for locked instances are still correct
3177 for (instance_name, inst) in self.instances.items():
3178 assert owned_nodes.issuperset(inst.all_nodes), \
3179 "Instance %s's nodes changed while we kept the lock" % instance_name
3181 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3184 assert self.group_uuid in inst_groups, \
3185 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3187 def Exec(self, feedback_fn):
3188 """Verify integrity of cluster disks.
3190 @rtype: tuple of three items
3191 @return: a tuple of (dict of node-to-node_error, list of instances
3192 which need activate-disks, dict of instance: (node, volume) for
3197 res_instances = set()
3200 nv_dict = _MapInstanceDisksToNodes([inst
3201 for inst in self.instances.values()
3202 if inst.admin_state == constants.ADMINST_UP])
3205 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3206 set(self.cfg.GetVmCapableNodeList()))
3208 node_lvs = self.rpc.call_lv_list(nodes, [])
3210 for (node, node_res) in node_lvs.items():
3211 if node_res.offline:
3214 msg = node_res.fail_msg
3216 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3217 res_nodes[node] = msg
3220 for lv_name, (_, _, lv_online) in node_res.payload.items():
3221 inst = nv_dict.pop((node, lv_name), None)
3222 if not (lv_online or inst is None):
3223 res_instances.add(inst)
3225 # any leftover items in nv_dict are missing LVs, let's arrange the data
3227 for key, inst in nv_dict.iteritems():
3228 res_missing.setdefault(inst, []).append(list(key))
3230 return (res_nodes, list(res_instances), res_missing)
3233 class LUClusterRepairDiskSizes(NoHooksLU):
3234 """Verifies the cluster disks sizes.
3239 def ExpandNames(self):
3240 if self.op.instances:
3241 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3242 self.needed_locks = {
3243 locking.LEVEL_NODE_RES: [],
3244 locking.LEVEL_INSTANCE: self.wanted_names,
3246 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3248 self.wanted_names = None
3249 self.needed_locks = {
3250 locking.LEVEL_NODE_RES: locking.ALL_SET,
3251 locking.LEVEL_INSTANCE: locking.ALL_SET,
3253 self.share_locks = {
3254 locking.LEVEL_NODE_RES: 1,
3255 locking.LEVEL_INSTANCE: 0,
3258 def DeclareLocks(self, level):
3259 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3260 self._LockInstancesNodes(primary_only=True, level=level)
3262 def CheckPrereq(self):
3263 """Check prerequisites.
3265 This only checks the optional instance list against the existing names.
3268 if self.wanted_names is None:
3269 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3271 self.wanted_instances = \
3272 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3274 def _EnsureChildSizes(self, disk):
3275 """Ensure children of the disk have the needed disk size.
3277 This is valid mainly for DRBD8 and fixes an issue where the
3278 children have smaller disk size.
3280 @param disk: an L{ganeti.objects.Disk} object
3283 if disk.dev_type == constants.LD_DRBD8:
3284 assert disk.children, "Empty children for DRBD8?"
3285 fchild = disk.children[0]
3286 mismatch = fchild.size < disk.size
3288 self.LogInfo("Child disk has size %d, parent %d, fixing",
3289 fchild.size, disk.size)
3290 fchild.size = disk.size
3292 # and we recurse on this child only, not on the metadev
3293 return self._EnsureChildSizes(fchild) or mismatch
3297 def Exec(self, feedback_fn):
3298 """Verify the size of cluster disks.
3301 # TODO: check child disks too
3302 # TODO: check differences in size between primary/secondary nodes
3304 for instance in self.wanted_instances:
3305 pnode = instance.primary_node
3306 if pnode not in per_node_disks:
3307 per_node_disks[pnode] = []
3308 for idx, disk in enumerate(instance.disks):
3309 per_node_disks[pnode].append((instance, idx, disk))
3311 assert not (frozenset(per_node_disks.keys()) -
3312 self.owned_locks(locking.LEVEL_NODE_RES)), \
3313 "Not owning correct locks"
3314 assert not self.owned_locks(locking.LEVEL_NODE)
3317 for node, dskl in per_node_disks.items():
3318 newl = [v[2].Copy() for v in dskl]
3320 self.cfg.SetDiskID(dsk, node)
3321 result = self.rpc.call_blockdev_getsize(node, newl)
3323 self.LogWarning("Failure in blockdev_getsize call to node"
3324 " %s, ignoring", node)
3326 if len(result.payload) != len(dskl):
3327 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3328 " result.payload=%s", node, len(dskl), result.payload)
3329 self.LogWarning("Invalid result from node %s, ignoring node results",
3332 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3334 self.LogWarning("Disk %d of instance %s did not return size"
3335 " information, ignoring", idx, instance.name)
3337 if not isinstance(size, (int, long)):
3338 self.LogWarning("Disk %d of instance %s did not return valid"
3339 " size information, ignoring", idx, instance.name)
3342 if size != disk.size:
3343 self.LogInfo("Disk %d of instance %s has mismatched size,"
3344 " correcting: recorded %d, actual %d", idx,
3345 instance.name, disk.size, size)
3347 self.cfg.Update(instance, feedback_fn)
3348 changed.append((instance.name, idx, size))
3349 if self._EnsureChildSizes(disk):
3350 self.cfg.Update(instance, feedback_fn)
3351 changed.append((instance.name, idx, disk.size))
3355 class LUClusterRename(LogicalUnit):
3356 """Rename the cluster.
3359 HPATH = "cluster-rename"
3360 HTYPE = constants.HTYPE_CLUSTER
3362 def BuildHooksEnv(self):
3367 "OP_TARGET": self.cfg.GetClusterName(),
3368 "NEW_NAME": self.op.name,
3371 def BuildHooksNodes(self):
3372 """Build hooks nodes.
3375 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3377 def CheckPrereq(self):
3378 """Verify that the passed name is a valid one.
3381 hostname = netutils.GetHostname(name=self.op.name,
3382 family=self.cfg.GetPrimaryIPFamily())
3384 new_name = hostname.name
3385 self.ip = new_ip = hostname.ip
3386 old_name = self.cfg.GetClusterName()
3387 old_ip = self.cfg.GetMasterIP()
3388 if new_name == old_name and new_ip == old_ip:
3389 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3390 " cluster has changed",
3392 if new_ip != old_ip:
3393 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3394 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3395 " reachable on the network" %
3396 new_ip, errors.ECODE_NOTUNIQUE)
3398 self.op.name = new_name
3400 def Exec(self, feedback_fn):
3401 """Rename the cluster.
3404 clustername = self.op.name
3407 # shutdown the master IP
3408 master_params = self.cfg.GetMasterNetworkParameters()
3409 ems = self.cfg.GetUseExternalMipScript()
3410 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3412 result.Raise("Could not disable the master role")
3415 cluster = self.cfg.GetClusterInfo()
3416 cluster.cluster_name = clustername
3417 cluster.master_ip = new_ip
3418 self.cfg.Update(cluster, feedback_fn)
3420 # update the known hosts file
3421 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3422 node_list = self.cfg.GetOnlineNodeList()
3424 node_list.remove(master_params.name)
3427 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3429 master_params.ip = new_ip
3430 result = self.rpc.call_node_activate_master_ip(master_params.name,
3432 msg = result.fail_msg
3434 self.LogWarning("Could not re-enable the master role on"
3435 " the master, please restart manually: %s", msg)
3440 def _ValidateNetmask(cfg, netmask):
3441 """Checks if a netmask is valid.
3443 @type cfg: L{config.ConfigWriter}
3444 @param cfg: The cluster configuration
3446 @param netmask: the netmask to be verified
3447 @raise errors.OpPrereqError: if the validation fails
3450 ip_family = cfg.GetPrimaryIPFamily()
3452 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3453 except errors.ProgrammerError:
3454 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3456 if not ipcls.ValidateNetmask(netmask):
3457 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3461 class LUClusterSetParams(LogicalUnit):
3462 """Change the parameters of the cluster.
3465 HPATH = "cluster-modify"
3466 HTYPE = constants.HTYPE_CLUSTER
3469 def CheckArguments(self):
3473 if self.op.uid_pool:
3474 uidpool.CheckUidPool(self.op.uid_pool)
3476 if self.op.add_uids:
3477 uidpool.CheckUidPool(self.op.add_uids)
3479 if self.op.remove_uids:
3480 uidpool.CheckUidPool(self.op.remove_uids)
3482 if self.op.master_netmask is not None:
3483 _ValidateNetmask(self.cfg, self.op.master_netmask)
3485 def ExpandNames(self):
3486 # FIXME: in the future maybe other cluster params won't require checking on
3487 # all nodes to be modified.
3488 self.needed_locks = {
3489 locking.LEVEL_NODE: locking.ALL_SET,
3491 self.share_locks[locking.LEVEL_NODE] = 1
3493 def BuildHooksEnv(self):
3498 "OP_TARGET": self.cfg.GetClusterName(),
3499 "NEW_VG_NAME": self.op.vg_name,
3502 def BuildHooksNodes(self):
3503 """Build hooks nodes.
3506 mn = self.cfg.GetMasterNode()
3509 def CheckPrereq(self):
3510 """Check prerequisites.
3512 This checks whether the given params don't conflict and
3513 if the given volume group is valid.
3516 if self.op.vg_name is not None and not self.op.vg_name:
3517 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3518 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3519 " instances exist", errors.ECODE_INVAL)
3521 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3522 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3523 raise errors.OpPrereqError("Cannot disable drbd helper while"
3524 " drbd-based instances exist",
3527 node_list = self.owned_locks(locking.LEVEL_NODE)
3529 # if vg_name not None, checks given volume group on all nodes
3531 vglist = self.rpc.call_vg_list(node_list)
3532 for node in node_list:
3533 msg = vglist[node].fail_msg
3535 # ignoring down node
3536 self.LogWarning("Error while gathering data on node %s"
3537 " (ignoring node): %s", node, msg)
3539 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3541 constants.MIN_VG_SIZE)
3543 raise errors.OpPrereqError("Error on node '%s': %s" %
3544 (node, vgstatus), errors.ECODE_ENVIRON)
3546 if self.op.drbd_helper:
3547 # checks given drbd helper on all nodes
3548 helpers = self.rpc.call_drbd_helper(node_list)
3549 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3551 self.LogInfo("Not checking drbd helper on offline node %s", node)
3553 msg = helpers[node].fail_msg
3555 raise errors.OpPrereqError("Error checking drbd helper on node"
3556 " '%s': %s" % (node, msg),
3557 errors.ECODE_ENVIRON)
3558 node_helper = helpers[node].payload
3559 if node_helper != self.op.drbd_helper:
3560 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3561 (node, node_helper), errors.ECODE_ENVIRON)
3563 self.cluster = cluster = self.cfg.GetClusterInfo()
3564 # validate params changes
3565 if self.op.beparams:
3566 objects.UpgradeBeParams(self.op.beparams)
3567 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3568 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3570 if self.op.ndparams:
3571 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3572 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3574 # TODO: we need a more general way to handle resetting
3575 # cluster-level parameters to default values
3576 if self.new_ndparams["oob_program"] == "":
3577 self.new_ndparams["oob_program"] = \
3578 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3580 if self.op.nicparams:
3581 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3582 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3583 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3586 # check all instances for consistency
3587 for instance in self.cfg.GetAllInstancesInfo().values():
3588 for nic_idx, nic in enumerate(instance.nics):
3589 params_copy = copy.deepcopy(nic.nicparams)
3590 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3592 # check parameter syntax
3594 objects.NIC.CheckParameterSyntax(params_filled)
3595 except errors.ConfigurationError, err:
3596 nic_errors.append("Instance %s, nic/%d: %s" %
3597 (instance.name, nic_idx, err))
3599 # if we're moving instances to routed, check that they have an ip
3600 target_mode = params_filled[constants.NIC_MODE]
3601 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3602 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3603 " address" % (instance.name, nic_idx))
3605 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3606 "\n".join(nic_errors))
3608 # hypervisor list/parameters
3609 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3610 if self.op.hvparams:
3611 for hv_name, hv_dict in self.op.hvparams.items():
3612 if hv_name not in self.new_hvparams:
3613 self.new_hvparams[hv_name] = hv_dict
3615 self.new_hvparams[hv_name].update(hv_dict)
3617 # os hypervisor parameters
3618 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3620 for os_name, hvs in self.op.os_hvp.items():
3621 if os_name not in self.new_os_hvp:
3622 self.new_os_hvp[os_name] = hvs
3624 for hv_name, hv_dict in hvs.items():
3625 if hv_name not in self.new_os_hvp[os_name]:
3626 self.new_os_hvp[os_name][hv_name] = hv_dict
3628 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3631 self.new_osp = objects.FillDict(cluster.osparams, {})
3632 if self.op.osparams:
3633 for os_name, osp in self.op.osparams.items():
3634 if os_name not in self.new_osp:
3635 self.new_osp[os_name] = {}
3637 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3640 if not self.new_osp[os_name]:
3641 # we removed all parameters
3642 del self.new_osp[os_name]
3644 # check the parameter validity (remote check)
3645 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3646 os_name, self.new_osp[os_name])
3648 # changes to the hypervisor list
3649 if self.op.enabled_hypervisors is not None:
3650 self.hv_list = self.op.enabled_hypervisors
3651 for hv in self.hv_list:
3652 # if the hypervisor doesn't already exist in the cluster
3653 # hvparams, we initialize it to empty, and then (in both
3654 # cases) we make sure to fill the defaults, as we might not
3655 # have a complete defaults list if the hypervisor wasn't
3657 if hv not in new_hvp:
3659 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3660 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3662 self.hv_list = cluster.enabled_hypervisors
3664 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3665 # either the enabled list has changed, or the parameters have, validate
3666 for hv_name, hv_params in self.new_hvparams.items():
3667 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3668 (self.op.enabled_hypervisors and
3669 hv_name in self.op.enabled_hypervisors)):
3670 # either this is a new hypervisor, or its parameters have changed
3671 hv_class = hypervisor.GetHypervisor(hv_name)
3672 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3673 hv_class.CheckParameterSyntax(hv_params)
3674 _CheckHVParams(self, node_list, hv_name, hv_params)
3677 # no need to check any newly-enabled hypervisors, since the
3678 # defaults have already been checked in the above code-block
3679 for os_name, os_hvp in self.new_os_hvp.items():
3680 for hv_name, hv_params in os_hvp.items():
3681 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3682 # we need to fill in the new os_hvp on top of the actual hv_p
3683 cluster_defaults = self.new_hvparams.get(hv_name, {})
3684 new_osp = objects.FillDict(cluster_defaults, hv_params)
3685 hv_class = hypervisor.GetHypervisor(hv_name)
3686 hv_class.CheckParameterSyntax(new_osp)
3687 _CheckHVParams(self, node_list, hv_name, new_osp)
3689 if self.op.default_iallocator:
3690 alloc_script = utils.FindFile(self.op.default_iallocator,
3691 constants.IALLOCATOR_SEARCH_PATH,
3693 if alloc_script is None:
3694 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3695 " specified" % self.op.default_iallocator,
3698 def Exec(self, feedback_fn):
3699 """Change the parameters of the cluster.
3702 if self.op.vg_name is not None:
3703 new_volume = self.op.vg_name
3706 if new_volume != self.cfg.GetVGName():
3707 self.cfg.SetVGName(new_volume)
3709 feedback_fn("Cluster LVM configuration already in desired"
3710 " state, not changing")
3711 if self.op.drbd_helper is not None:
3712 new_helper = self.op.drbd_helper
3715 if new_helper != self.cfg.GetDRBDHelper():
3716 self.cfg.SetDRBDHelper(new_helper)
3718 feedback_fn("Cluster DRBD helper already in desired state,"
3720 if self.op.hvparams:
3721 self.cluster.hvparams = self.new_hvparams
3723 self.cluster.os_hvp = self.new_os_hvp
3724 if self.op.enabled_hypervisors is not None:
3725 self.cluster.hvparams = self.new_hvparams
3726 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3727 if self.op.beparams:
3728 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3729 if self.op.nicparams:
3730 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3731 if self.op.osparams:
3732 self.cluster.osparams = self.new_osp
3733 if self.op.ndparams:
3734 self.cluster.ndparams = self.new_ndparams
3736 if self.op.candidate_pool_size is not None:
3737 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3738 # we need to update the pool size here, otherwise the save will fail
3739 _AdjustCandidatePool(self, [])
3741 if self.op.maintain_node_health is not None:
3742 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3743 feedback_fn("Note: CONFD was disabled at build time, node health"
3744 " maintenance is not useful (still enabling it)")
3745 self.cluster.maintain_node_health = self.op.maintain_node_health
3747 if self.op.prealloc_wipe_disks is not None:
3748 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3750 if self.op.add_uids is not None:
3751 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3753 if self.op.remove_uids is not None:
3754 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3756 if self.op.uid_pool is not None:
3757 self.cluster.uid_pool = self.op.uid_pool
3759 if self.op.default_iallocator is not None:
3760 self.cluster.default_iallocator = self.op.default_iallocator
3762 if self.op.reserved_lvs is not None:
3763 self.cluster.reserved_lvs = self.op.reserved_lvs
3765 if self.op.use_external_mip_script is not None:
3766 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3768 def helper_os(aname, mods, desc):
3770 lst = getattr(self.cluster, aname)
3771 for key, val in mods:
3772 if key == constants.DDM_ADD:
3774 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3777 elif key == constants.DDM_REMOVE:
3781 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3783 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3785 if self.op.hidden_os:
3786 helper_os("hidden_os", self.op.hidden_os, "hidden")
3788 if self.op.blacklisted_os:
3789 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3791 if self.op.master_netdev:
3792 master_params = self.cfg.GetMasterNetworkParameters()
3793 ems = self.cfg.GetUseExternalMipScript()
3794 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3795 self.cluster.master_netdev)
3796 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3798 result.Raise("Could not disable the master ip")
3799 feedback_fn("Changing master_netdev from %s to %s" %
3800 (master_params.netdev, self.op.master_netdev))
3801 self.cluster.master_netdev = self.op.master_netdev
3803 if self.op.master_netmask:
3804 master_params = self.cfg.GetMasterNetworkParameters()
3805 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3806 result = self.rpc.call_node_change_master_netmask(master_params.name,
3807 master_params.netmask,
3808 self.op.master_netmask,
3810 master_params.netdev)
3812 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3815 self.cluster.master_netmask = self.op.master_netmask
3817 self.cfg.Update(self.cluster, feedback_fn)
3819 if self.op.master_netdev:
3820 master_params = self.cfg.GetMasterNetworkParameters()
3821 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3822 self.op.master_netdev)
3823 ems = self.cfg.GetUseExternalMipScript()
3824 result = self.rpc.call_node_activate_master_ip(master_params.name,
3827 self.LogWarning("Could not re-enable the master ip on"
3828 " the master, please restart manually: %s",
3832 def _UploadHelper(lu, nodes, fname):
3833 """Helper for uploading a file and showing warnings.
3836 if os.path.exists(fname):
3837 result = lu.rpc.call_upload_file(nodes, fname)
3838 for to_node, to_result in result.items():
3839 msg = to_result.fail_msg
3841 msg = ("Copy of file %s to node %s failed: %s" %
3842 (fname, to_node, msg))
3843 lu.proc.LogWarning(msg)
3846 def _ComputeAncillaryFiles(cluster, redist):
3847 """Compute files external to Ganeti which need to be consistent.
3849 @type redist: boolean
3850 @param redist: Whether to include files which need to be redistributed
3853 # Compute files for all nodes
3855 constants.SSH_KNOWN_HOSTS_FILE,
3856 constants.CONFD_HMAC_KEY,
3857 constants.CLUSTER_DOMAIN_SECRET_FILE,
3858 constants.SPICE_CERT_FILE,
3859 constants.SPICE_CACERT_FILE,
3860 constants.RAPI_USERS_FILE,
3864 files_all.update(constants.ALL_CERT_FILES)
3865 files_all.update(ssconf.SimpleStore().GetFileList())
3867 # we need to ship at least the RAPI certificate
3868 files_all.add(constants.RAPI_CERT_FILE)
3870 if cluster.modify_etc_hosts:
3871 files_all.add(constants.ETC_HOSTS)
3873 # Files which are optional, these must:
3874 # - be present in one other category as well
3875 # - either exist or not exist on all nodes of that category (mc, vm all)
3877 constants.RAPI_USERS_FILE,
3880 # Files which should only be on master candidates
3884 files_mc.add(constants.CLUSTER_CONF_FILE)
3886 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3888 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3890 # Files which should only be on VM-capable nodes
3891 files_vm = set(filename
3892 for hv_name in cluster.enabled_hypervisors
3893 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3895 files_opt |= set(filename
3896 for hv_name in cluster.enabled_hypervisors
3897 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3899 # Filenames in each category must be unique
3900 all_files_set = files_all | files_mc | files_vm
3901 assert (len(all_files_set) ==
3902 sum(map(len, [files_all, files_mc, files_vm]))), \
3903 "Found file listed in more than one file list"
3905 # Optional files must be present in one other category
3906 assert all_files_set.issuperset(files_opt), \
3907 "Optional file not in a different required list"
3909 return (files_all, files_opt, files_mc, files_vm)
3912 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3913 """Distribute additional files which are part of the cluster configuration.
3915 ConfigWriter takes care of distributing the config and ssconf files, but
3916 there are more files which should be distributed to all nodes. This function
3917 makes sure those are copied.
3919 @param lu: calling logical unit
3920 @param additional_nodes: list of nodes not in the config to distribute to
3921 @type additional_vm: boolean
3922 @param additional_vm: whether the additional nodes are vm-capable or not
3925 # Gather target nodes
3926 cluster = lu.cfg.GetClusterInfo()
3927 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3929 online_nodes = lu.cfg.GetOnlineNodeList()
3930 vm_nodes = lu.cfg.GetVmCapableNodeList()
3932 if additional_nodes is not None:
3933 online_nodes.extend(additional_nodes)
3935 vm_nodes.extend(additional_nodes)
3937 # Never distribute to master node
3938 for nodelist in [online_nodes, vm_nodes]:
3939 if master_info.name in nodelist:
3940 nodelist.remove(master_info.name)
3943 (files_all, _, files_mc, files_vm) = \
3944 _ComputeAncillaryFiles(cluster, True)
3946 # Never re-distribute configuration file from here
3947 assert not (constants.CLUSTER_CONF_FILE in files_all or
3948 constants.CLUSTER_CONF_FILE in files_vm)
3949 assert not files_mc, "Master candidates not handled in this function"
3952 (online_nodes, files_all),
3953 (vm_nodes, files_vm),
3957 for (node_list, files) in filemap:
3959 _UploadHelper(lu, node_list, fname)
3962 class LUClusterRedistConf(NoHooksLU):
3963 """Force the redistribution of cluster configuration.
3965 This is a very simple LU.
3970 def ExpandNames(self):
3971 self.needed_locks = {
3972 locking.LEVEL_NODE: locking.ALL_SET,
3974 self.share_locks[locking.LEVEL_NODE] = 1
3976 def Exec(self, feedback_fn):
3977 """Redistribute the configuration.
3980 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3981 _RedistributeAncillaryFiles(self)
3984 class LUClusterActivateMasterIp(NoHooksLU):
3985 """Activate the master IP on the master node.
3988 def Exec(self, feedback_fn):
3989 """Activate the master IP.
3992 master_params = self.cfg.GetMasterNetworkParameters()
3993 ems = self.cfg.GetUseExternalMipScript()
3994 result = self.rpc.call_node_activate_master_ip(master_params.name,
3996 result.Raise("Could not activate the master IP")
3999 class LUClusterDeactivateMasterIp(NoHooksLU):
4000 """Deactivate the master IP on the master node.
4003 def Exec(self, feedback_fn):
4004 """Deactivate the master IP.
4007 master_params = self.cfg.GetMasterNetworkParameters()
4008 ems = self.cfg.GetUseExternalMipScript()
4009 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4011 result.Raise("Could not deactivate the master IP")
4014 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4015 """Sleep and poll for an instance's disk to sync.
4018 if not instance.disks or disks is not None and not disks:
4021 disks = _ExpandCheckDisks(instance, disks)
4024 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4026 node = instance.primary_node
4029 lu.cfg.SetDiskID(dev, node)
4031 # TODO: Convert to utils.Retry
4034 degr_retries = 10 # in seconds, as we sleep 1 second each time
4038 cumul_degraded = False
4039 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4040 msg = rstats.fail_msg
4042 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4045 raise errors.RemoteError("Can't contact node %s for mirror data,"
4046 " aborting." % node)
4049 rstats = rstats.payload
4051 for i, mstat in enumerate(rstats):
4053 lu.LogWarning("Can't compute data for node %s/%s",
4054 node, disks[i].iv_name)
4057 cumul_degraded = (cumul_degraded or
4058 (mstat.is_degraded and mstat.sync_percent is None))
4059 if mstat.sync_percent is not None:
4061 if mstat.estimated_time is not None:
4062 rem_time = ("%s remaining (estimated)" %
4063 utils.FormatSeconds(mstat.estimated_time))
4064 max_time = mstat.estimated_time
4066 rem_time = "no time estimate"
4067 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4068 (disks[i].iv_name, mstat.sync_percent, rem_time))
4070 # if we're done but degraded, let's do a few small retries, to
4071 # make sure we see a stable and not transient situation; therefore
4072 # we force restart of the loop
4073 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4074 logging.info("Degraded disks found, %d retries left", degr_retries)
4082 time.sleep(min(60, max_time))
4085 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4086 return not cumul_degraded
4089 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4090 """Check that mirrors are not degraded.
4092 The ldisk parameter, if True, will change the test from the
4093 is_degraded attribute (which represents overall non-ok status for
4094 the device(s)) to the ldisk (representing the local storage status).
4097 lu.cfg.SetDiskID(dev, node)
4101 if on_primary or dev.AssembleOnSecondary():
4102 rstats = lu.rpc.call_blockdev_find(node, dev)
4103 msg = rstats.fail_msg
4105 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4107 elif not rstats.payload:
4108 lu.LogWarning("Can't find disk on node %s", node)
4112 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4114 result = result and not rstats.payload.is_degraded
4117 for child in dev.children:
4118 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4123 class LUOobCommand(NoHooksLU):
4124 """Logical unit for OOB handling.
4128 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4130 def ExpandNames(self):
4131 """Gather locks we need.
4134 if self.op.node_names:
4135 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4136 lock_names = self.op.node_names
4138 lock_names = locking.ALL_SET
4140 self.needed_locks = {
4141 locking.LEVEL_NODE: lock_names,
4144 def CheckPrereq(self):
4145 """Check prerequisites.
4148 - the node exists in the configuration
4151 Any errors are signaled by raising errors.OpPrereqError.
4155 self.master_node = self.cfg.GetMasterNode()
4157 assert self.op.power_delay >= 0.0
4159 if self.op.node_names:
4160 if (self.op.command in self._SKIP_MASTER and
4161 self.master_node in self.op.node_names):
4162 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4163 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4165 if master_oob_handler:
4166 additional_text = ("run '%s %s %s' if you want to operate on the"
4167 " master regardless") % (master_oob_handler,
4171 additional_text = "it does not support out-of-band operations"
4173 raise errors.OpPrereqError(("Operating on the master node %s is not"
4174 " allowed for %s; %s") %
4175 (self.master_node, self.op.command,
4176 additional_text), errors.ECODE_INVAL)
4178 self.op.node_names = self.cfg.GetNodeList()
4179 if self.op.command in self._SKIP_MASTER:
4180 self.op.node_names.remove(self.master_node)
4182 if self.op.command in self._SKIP_MASTER:
4183 assert self.master_node not in self.op.node_names
4185 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4187 raise errors.OpPrereqError("Node %s not found" % node_name,
4190 self.nodes.append(node)
4192 if (not self.op.ignore_status and
4193 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4194 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4195 " not marked offline") % node_name,
4198 def Exec(self, feedback_fn):
4199 """Execute OOB and return result if we expect any.
4202 master_node = self.master_node
4205 for idx, node in enumerate(utils.NiceSort(self.nodes,
4206 key=lambda node: node.name)):
4207 node_entry = [(constants.RS_NORMAL, node.name)]
4208 ret.append(node_entry)
4210 oob_program = _SupportsOob(self.cfg, node)
4213 node_entry.append((constants.RS_UNAVAIL, None))
4216 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4217 self.op.command, oob_program, node.name)
4218 result = self.rpc.call_run_oob(master_node, oob_program,
4219 self.op.command, node.name,
4223 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4224 node.name, result.fail_msg)
4225 node_entry.append((constants.RS_NODATA, None))
4228 self._CheckPayload(result)
4229 except errors.OpExecError, err:
4230 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4232 node_entry.append((constants.RS_NODATA, None))
4234 if self.op.command == constants.OOB_HEALTH:
4235 # For health we should log important events
4236 for item, status in result.payload:
4237 if status in [constants.OOB_STATUS_WARNING,
4238 constants.OOB_STATUS_CRITICAL]:
4239 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4240 item, node.name, status)
4242 if self.op.command == constants.OOB_POWER_ON:
4244 elif self.op.command == constants.OOB_POWER_OFF:
4245 node.powered = False
4246 elif self.op.command == constants.OOB_POWER_STATUS:
4247 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4248 if powered != node.powered:
4249 logging.warning(("Recorded power state (%s) of node '%s' does not"
4250 " match actual power state (%s)"), node.powered,
4253 # For configuration changing commands we should update the node
4254 if self.op.command in (constants.OOB_POWER_ON,
4255 constants.OOB_POWER_OFF):
4256 self.cfg.Update(node, feedback_fn)
4258 node_entry.append((constants.RS_NORMAL, result.payload))
4260 if (self.op.command == constants.OOB_POWER_ON and
4261 idx < len(self.nodes) - 1):
4262 time.sleep(self.op.power_delay)
4266 def _CheckPayload(self, result):
4267 """Checks if the payload is valid.
4269 @param result: RPC result
4270 @raises errors.OpExecError: If payload is not valid
4274 if self.op.command == constants.OOB_HEALTH:
4275 if not isinstance(result.payload, list):
4276 errs.append("command 'health' is expected to return a list but got %s" %
4277 type(result.payload))
4279 for item, status in result.payload:
4280 if status not in constants.OOB_STATUSES:
4281 errs.append("health item '%s' has invalid status '%s'" %
4284 if self.op.command == constants.OOB_POWER_STATUS:
4285 if not isinstance(result.payload, dict):
4286 errs.append("power-status is expected to return a dict but got %s" %
4287 type(result.payload))
4289 if self.op.command in [
4290 constants.OOB_POWER_ON,
4291 constants.OOB_POWER_OFF,
4292 constants.OOB_POWER_CYCLE,
4294 if result.payload is not None:
4295 errs.append("%s is expected to not return payload but got '%s'" %
4296 (self.op.command, result.payload))
4299 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4300 utils.CommaJoin(errs))
4303 class _OsQuery(_QueryBase):
4304 FIELDS = query.OS_FIELDS
4306 def ExpandNames(self, lu):
4307 # Lock all nodes in shared mode
4308 # Temporary removal of locks, should be reverted later
4309 # TODO: reintroduce locks when they are lighter-weight
4310 lu.needed_locks = {}
4311 #self.share_locks[locking.LEVEL_NODE] = 1
4312 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4314 # The following variables interact with _QueryBase._GetNames
4316 self.wanted = self.names
4318 self.wanted = locking.ALL_SET
4320 self.do_locking = self.use_locking
4322 def DeclareLocks(self, lu, level):
4326 def _DiagnoseByOS(rlist):
4327 """Remaps a per-node return list into an a per-os per-node dictionary
4329 @param rlist: a map with node names as keys and OS objects as values
4332 @return: a dictionary with osnames as keys and as value another
4333 map, with nodes as keys and tuples of (path, status, diagnose,
4334 variants, parameters, api_versions) as values, eg::
4336 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4337 (/srv/..., False, "invalid api")],
4338 "node2": [(/srv/..., True, "", [], [])]}
4343 # we build here the list of nodes that didn't fail the RPC (at RPC
4344 # level), so that nodes with a non-responding node daemon don't
4345 # make all OSes invalid
4346 good_nodes = [node_name for node_name in rlist
4347 if not rlist[node_name].fail_msg]
4348 for node_name, nr in rlist.items():
4349 if nr.fail_msg or not nr.payload:
4351 for (name, path, status, diagnose, variants,
4352 params, api_versions) in nr.payload:
4353 if name not in all_os:
4354 # build a list of nodes for this os containing empty lists
4355 # for each node in node_list
4357 for nname in good_nodes:
4358 all_os[name][nname] = []
4359 # convert params from [name, help] to (name, help)
4360 params = [tuple(v) for v in params]
4361 all_os[name][node_name].append((path, status, diagnose,
4362 variants, params, api_versions))
4365 def _GetQueryData(self, lu):
4366 """Computes the list of nodes and their attributes.
4369 # Locking is not used
4370 assert not (compat.any(lu.glm.is_owned(level)
4371 for level in locking.LEVELS
4372 if level != locking.LEVEL_CLUSTER) or
4373 self.do_locking or self.use_locking)
4375 valid_nodes = [node.name
4376 for node in lu.cfg.GetAllNodesInfo().values()
4377 if not node.offline and node.vm_capable]
4378 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4379 cluster = lu.cfg.GetClusterInfo()
4383 for (os_name, os_data) in pol.items():
4384 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4385 hidden=(os_name in cluster.hidden_os),
4386 blacklisted=(os_name in cluster.blacklisted_os))
4390 api_versions = set()
4392 for idx, osl in enumerate(os_data.values()):
4393 info.valid = bool(info.valid and osl and osl[0][1])
4397 (node_variants, node_params, node_api) = osl[0][3:6]
4400 variants.update(node_variants)
4401 parameters.update(node_params)
4402 api_versions.update(node_api)
4404 # Filter out inconsistent values
4405 variants.intersection_update(node_variants)
4406 parameters.intersection_update(node_params)
4407 api_versions.intersection_update(node_api)
4409 info.variants = list(variants)
4410 info.parameters = list(parameters)
4411 info.api_versions = list(api_versions)
4413 data[os_name] = info
4415 # Prepare data in requested order
4416 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4420 class LUOsDiagnose(NoHooksLU):
4421 """Logical unit for OS diagnose/query.
4427 def _BuildFilter(fields, names):
4428 """Builds a filter for querying OSes.
4431 name_filter = qlang.MakeSimpleFilter("name", names)
4433 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4434 # respective field is not requested
4435 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4436 for fname in ["hidden", "blacklisted"]
4437 if fname not in fields]
4438 if "valid" not in fields:
4439 status_filter.append([qlang.OP_TRUE, "valid"])
4442 status_filter.insert(0, qlang.OP_AND)
4444 status_filter = None
4446 if name_filter and status_filter:
4447 return [qlang.OP_AND, name_filter, status_filter]
4451 return status_filter
4453 def CheckArguments(self):
4454 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4455 self.op.output_fields, False)
4457 def ExpandNames(self):
4458 self.oq.ExpandNames(self)
4460 def Exec(self, feedback_fn):
4461 return self.oq.OldStyleQuery(self)
4464 class LUNodeRemove(LogicalUnit):
4465 """Logical unit for removing a node.
4468 HPATH = "node-remove"
4469 HTYPE = constants.HTYPE_NODE
4471 def BuildHooksEnv(self):
4474 This doesn't run on the target node in the pre phase as a failed
4475 node would then be impossible to remove.
4479 "OP_TARGET": self.op.node_name,
4480 "NODE_NAME": self.op.node_name,
4483 def BuildHooksNodes(self):
4484 """Build hooks nodes.
4487 all_nodes = self.cfg.GetNodeList()
4489 all_nodes.remove(self.op.node_name)
4491 logging.warning("Node '%s', which is about to be removed, was not found"
4492 " in the list of all nodes", self.op.node_name)
4493 return (all_nodes, all_nodes)
4495 def CheckPrereq(self):
4496 """Check prerequisites.
4499 - the node exists in the configuration
4500 - it does not have primary or secondary instances
4501 - it's not the master
4503 Any errors are signaled by raising errors.OpPrereqError.
4506 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4507 node = self.cfg.GetNodeInfo(self.op.node_name)
4508 assert node is not None
4510 masternode = self.cfg.GetMasterNode()
4511 if node.name == masternode:
4512 raise errors.OpPrereqError("Node is the master node, failover to another"
4513 " node is required", errors.ECODE_INVAL)
4515 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4516 if node.name in instance.all_nodes:
4517 raise errors.OpPrereqError("Instance %s is still running on the node,"
4518 " please remove first" % instance_name,
4520 self.op.node_name = node.name
4523 def Exec(self, feedback_fn):
4524 """Removes the node from the cluster.
4528 logging.info("Stopping the node daemon and removing configs from node %s",
4531 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4533 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4536 # Promote nodes to master candidate as needed
4537 _AdjustCandidatePool(self, exceptions=[node.name])
4538 self.context.RemoveNode(node.name)
4540 # Run post hooks on the node before it's removed
4541 _RunPostHook(self, node.name)
4543 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4544 msg = result.fail_msg
4546 self.LogWarning("Errors encountered on the remote node while leaving"
4547 " the cluster: %s", msg)
4549 # Remove node from our /etc/hosts
4550 if self.cfg.GetClusterInfo().modify_etc_hosts:
4551 master_node = self.cfg.GetMasterNode()
4552 result = self.rpc.call_etc_hosts_modify(master_node,
4553 constants.ETC_HOSTS_REMOVE,
4555 result.Raise("Can't update hosts file with new host data")
4556 _RedistributeAncillaryFiles(self)
4559 class _NodeQuery(_QueryBase):
4560 FIELDS = query.NODE_FIELDS
4562 def ExpandNames(self, lu):
4563 lu.needed_locks = {}
4564 lu.share_locks = _ShareAll()
4567 self.wanted = _GetWantedNodes(lu, self.names)
4569 self.wanted = locking.ALL_SET
4571 self.do_locking = (self.use_locking and
4572 query.NQ_LIVE in self.requested_data)
4575 # If any non-static field is requested we need to lock the nodes
4576 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4578 def DeclareLocks(self, lu, level):
4581 def _GetQueryData(self, lu):
4582 """Computes the list of nodes and their attributes.
4585 all_info = lu.cfg.GetAllNodesInfo()
4587 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4589 # Gather data as requested
4590 if query.NQ_LIVE in self.requested_data:
4591 # filter out non-vm_capable nodes
4592 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4594 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4595 lu.cfg.GetHypervisorType())
4596 live_data = dict((name, nresult.payload)
4597 for (name, nresult) in node_data.items()
4598 if not nresult.fail_msg and nresult.payload)
4602 if query.NQ_INST in self.requested_data:
4603 node_to_primary = dict([(name, set()) for name in nodenames])
4604 node_to_secondary = dict([(name, set()) for name in nodenames])
4606 inst_data = lu.cfg.GetAllInstancesInfo()
4608 for inst in inst_data.values():
4609 if inst.primary_node in node_to_primary:
4610 node_to_primary[inst.primary_node].add(inst.name)
4611 for secnode in inst.secondary_nodes:
4612 if secnode in node_to_secondary:
4613 node_to_secondary[secnode].add(inst.name)
4615 node_to_primary = None
4616 node_to_secondary = None
4618 if query.NQ_OOB in self.requested_data:
4619 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4620 for name, node in all_info.iteritems())
4624 if query.NQ_GROUP in self.requested_data:
4625 groups = lu.cfg.GetAllNodeGroupsInfo()
4629 return query.NodeQueryData([all_info[name] for name in nodenames],
4630 live_data, lu.cfg.GetMasterNode(),
4631 node_to_primary, node_to_secondary, groups,
4632 oob_support, lu.cfg.GetClusterInfo())
4635 class LUNodeQuery(NoHooksLU):
4636 """Logical unit for querying nodes.
4639 # pylint: disable=W0142
4642 def CheckArguments(self):
4643 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4644 self.op.output_fields, self.op.use_locking)
4646 def ExpandNames(self):
4647 self.nq.ExpandNames(self)
4649 def DeclareLocks(self, level):
4650 self.nq.DeclareLocks(self, level)
4652 def Exec(self, feedback_fn):
4653 return self.nq.OldStyleQuery(self)
4656 class LUNodeQueryvols(NoHooksLU):
4657 """Logical unit for getting volumes on node(s).
4661 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4662 _FIELDS_STATIC = utils.FieldSet("node")
4664 def CheckArguments(self):
4665 _CheckOutputFields(static=self._FIELDS_STATIC,
4666 dynamic=self._FIELDS_DYNAMIC,
4667 selected=self.op.output_fields)
4669 def ExpandNames(self):
4670 self.share_locks = _ShareAll()
4671 self.needed_locks = {}
4673 if not self.op.nodes:
4674 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4676 self.needed_locks[locking.LEVEL_NODE] = \
4677 _GetWantedNodes(self, self.op.nodes)
4679 def Exec(self, feedback_fn):
4680 """Computes the list of nodes and their attributes.
4683 nodenames = self.owned_locks(locking.LEVEL_NODE)
4684 volumes = self.rpc.call_node_volumes(nodenames)
4686 ilist = self.cfg.GetAllInstancesInfo()
4687 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4690 for node in nodenames:
4691 nresult = volumes[node]
4694 msg = nresult.fail_msg
4696 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4699 node_vols = sorted(nresult.payload,
4700 key=operator.itemgetter("dev"))
4702 for vol in node_vols:
4704 for field in self.op.output_fields:
4707 elif field == "phys":
4711 elif field == "name":
4713 elif field == "size":
4714 val = int(float(vol["size"]))
4715 elif field == "instance":
4716 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4718 raise errors.ParameterError(field)
4719 node_output.append(str(val))
4721 output.append(node_output)
4726 class LUNodeQueryStorage(NoHooksLU):
4727 """Logical unit for getting information on storage units on node(s).
4730 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4733 def CheckArguments(self):
4734 _CheckOutputFields(static=self._FIELDS_STATIC,
4735 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4736 selected=self.op.output_fields)
4738 def ExpandNames(self):
4739 self.share_locks = _ShareAll()
4740 self.needed_locks = {}
4743 self.needed_locks[locking.LEVEL_NODE] = \
4744 _GetWantedNodes(self, self.op.nodes)
4746 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4748 def Exec(self, feedback_fn):
4749 """Computes the list of nodes and their attributes.
4752 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4754 # Always get name to sort by
4755 if constants.SF_NAME in self.op.output_fields:
4756 fields = self.op.output_fields[:]
4758 fields = [constants.SF_NAME] + self.op.output_fields
4760 # Never ask for node or type as it's only known to the LU
4761 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4762 while extra in fields:
4763 fields.remove(extra)
4765 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4766 name_idx = field_idx[constants.SF_NAME]
4768 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4769 data = self.rpc.call_storage_list(self.nodes,
4770 self.op.storage_type, st_args,
4771 self.op.name, fields)
4775 for node in utils.NiceSort(self.nodes):
4776 nresult = data[node]
4780 msg = nresult.fail_msg
4782 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4785 rows = dict([(row[name_idx], row) for row in nresult.payload])
4787 for name in utils.NiceSort(rows.keys()):
4792 for field in self.op.output_fields:
4793 if field == constants.SF_NODE:
4795 elif field == constants.SF_TYPE:
4796 val = self.op.storage_type
4797 elif field in field_idx:
4798 val = row[field_idx[field]]
4800 raise errors.ParameterError(field)
4809 class _InstanceQuery(_QueryBase):
4810 FIELDS = query.INSTANCE_FIELDS
4812 def ExpandNames(self, lu):
4813 lu.needed_locks = {}
4814 lu.share_locks = _ShareAll()
4817 self.wanted = _GetWantedInstances(lu, self.names)
4819 self.wanted = locking.ALL_SET
4821 self.do_locking = (self.use_locking and
4822 query.IQ_LIVE in self.requested_data)
4824 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4825 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4826 lu.needed_locks[locking.LEVEL_NODE] = []
4827 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4829 self.do_grouplocks = (self.do_locking and
4830 query.IQ_NODES in self.requested_data)
4832 def DeclareLocks(self, lu, level):
4834 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4835 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4837 # Lock all groups used by instances optimistically; this requires going
4838 # via the node before it's locked, requiring verification later on
4839 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4841 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4842 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4843 elif level == locking.LEVEL_NODE:
4844 lu._LockInstancesNodes() # pylint: disable=W0212
4847 def _CheckGroupLocks(lu):
4848 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4849 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4851 # Check if node groups for locked instances are still correct
4852 for instance_name in owned_instances:
4853 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4855 def _GetQueryData(self, lu):
4856 """Computes the list of instances and their attributes.
4859 if self.do_grouplocks:
4860 self._CheckGroupLocks(lu)
4862 cluster = lu.cfg.GetClusterInfo()
4863 all_info = lu.cfg.GetAllInstancesInfo()
4865 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4867 instance_list = [all_info[name] for name in instance_names]
4868 nodes = frozenset(itertools.chain(*(inst.all_nodes
4869 for inst in instance_list)))
4870 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4873 wrongnode_inst = set()
4875 # Gather data as requested
4876 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4878 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4880 result = node_data[name]
4882 # offline nodes will be in both lists
4883 assert result.fail_msg
4884 offline_nodes.append(name)
4886 bad_nodes.append(name)
4887 elif result.payload:
4888 for inst in result.payload:
4889 if inst in all_info:
4890 if all_info[inst].primary_node == name:
4891 live_data.update(result.payload)
4893 wrongnode_inst.add(inst)
4895 # orphan instance; we don't list it here as we don't
4896 # handle this case yet in the output of instance listing
4897 logging.warning("Orphan instance '%s' found on node %s",
4899 # else no instance is alive
4903 if query.IQ_DISKUSAGE in self.requested_data:
4904 disk_usage = dict((inst.name,
4905 _ComputeDiskSize(inst.disk_template,
4906 [{constants.IDISK_SIZE: disk.size}
4907 for disk in inst.disks]))
4908 for inst in instance_list)
4912 if query.IQ_CONSOLE in self.requested_data:
4914 for inst in instance_list:
4915 if inst.name in live_data:
4916 # Instance is running
4917 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4919 consinfo[inst.name] = None
4920 assert set(consinfo.keys()) == set(instance_names)
4924 if query.IQ_NODES in self.requested_data:
4925 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4927 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4928 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4929 for uuid in set(map(operator.attrgetter("group"),
4935 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4936 disk_usage, offline_nodes, bad_nodes,
4937 live_data, wrongnode_inst, consinfo,
4941 class LUQuery(NoHooksLU):
4942 """Query for resources/items of a certain kind.
4945 # pylint: disable=W0142
4948 def CheckArguments(self):
4949 qcls = _GetQueryImplementation(self.op.what)
4951 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4953 def ExpandNames(self):
4954 self.impl.ExpandNames(self)
4956 def DeclareLocks(self, level):
4957 self.impl.DeclareLocks(self, level)
4959 def Exec(self, feedback_fn):
4960 return self.impl.NewStyleQuery(self)
4963 class LUQueryFields(NoHooksLU):
4964 """Query for resources/items of a certain kind.
4967 # pylint: disable=W0142
4970 def CheckArguments(self):
4971 self.qcls = _GetQueryImplementation(self.op.what)
4973 def ExpandNames(self):
4974 self.needed_locks = {}
4976 def Exec(self, feedback_fn):
4977 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4980 class LUNodeModifyStorage(NoHooksLU):
4981 """Logical unit for modifying a storage volume on a node.
4986 def CheckArguments(self):
4987 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4989 storage_type = self.op.storage_type
4992 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4994 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4995 " modified" % storage_type,
4998 diff = set(self.op.changes.keys()) - modifiable
5000 raise errors.OpPrereqError("The following fields can not be modified for"
5001 " storage units of type '%s': %r" %
5002 (storage_type, list(diff)),
5005 def ExpandNames(self):
5006 self.needed_locks = {
5007 locking.LEVEL_NODE: self.op.node_name,
5010 def Exec(self, feedback_fn):
5011 """Computes the list of nodes and their attributes.
5014 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5015 result = self.rpc.call_storage_modify(self.op.node_name,
5016 self.op.storage_type, st_args,
5017 self.op.name, self.op.changes)
5018 result.Raise("Failed to modify storage unit '%s' on %s" %
5019 (self.op.name, self.op.node_name))
5022 class LUNodeAdd(LogicalUnit):
5023 """Logical unit for adding node to the cluster.
5027 HTYPE = constants.HTYPE_NODE
5028 _NFLAGS = ["master_capable", "vm_capable"]
5030 def CheckArguments(self):
5031 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5032 # validate/normalize the node name
5033 self.hostname = netutils.GetHostname(name=self.op.node_name,
5034 family=self.primary_ip_family)
5035 self.op.node_name = self.hostname.name
5037 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5038 raise errors.OpPrereqError("Cannot readd the master node",
5041 if self.op.readd and self.op.group:
5042 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5043 " being readded", errors.ECODE_INVAL)
5045 def BuildHooksEnv(self):
5048 This will run on all nodes before, and on all nodes + the new node after.
5052 "OP_TARGET": self.op.node_name,
5053 "NODE_NAME": self.op.node_name,
5054 "NODE_PIP": self.op.primary_ip,
5055 "NODE_SIP": self.op.secondary_ip,
5056 "MASTER_CAPABLE": str(self.op.master_capable),
5057 "VM_CAPABLE": str(self.op.vm_capable),
5060 def BuildHooksNodes(self):
5061 """Build hooks nodes.
5064 # Exclude added node
5065 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5066 post_nodes = pre_nodes + [self.op.node_name, ]
5068 return (pre_nodes, post_nodes)
5070 def CheckPrereq(self):
5071 """Check prerequisites.
5074 - the new node is not already in the config
5076 - its parameters (single/dual homed) matches the cluster
5078 Any errors are signaled by raising errors.OpPrereqError.
5082 hostname = self.hostname
5083 node = hostname.name
5084 primary_ip = self.op.primary_ip = hostname.ip
5085 if self.op.secondary_ip is None:
5086 if self.primary_ip_family == netutils.IP6Address.family:
5087 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5088 " IPv4 address must be given as secondary",
5090 self.op.secondary_ip = primary_ip
5092 secondary_ip = self.op.secondary_ip
5093 if not netutils.IP4Address.IsValid(secondary_ip):
5094 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5095 " address" % secondary_ip, errors.ECODE_INVAL)
5097 node_list = cfg.GetNodeList()
5098 if not self.op.readd and node in node_list:
5099 raise errors.OpPrereqError("Node %s is already in the configuration" %
5100 node, errors.ECODE_EXISTS)
5101 elif self.op.readd and node not in node_list:
5102 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5105 self.changed_primary_ip = False
5107 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5108 if self.op.readd and node == existing_node_name:
5109 if existing_node.secondary_ip != secondary_ip:
5110 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5111 " address configuration as before",
5113 if existing_node.primary_ip != primary_ip:
5114 self.changed_primary_ip = True
5118 if (existing_node.primary_ip == primary_ip or
5119 existing_node.secondary_ip == primary_ip or
5120 existing_node.primary_ip == secondary_ip or
5121 existing_node.secondary_ip == secondary_ip):
5122 raise errors.OpPrereqError("New node ip address(es) conflict with"
5123 " existing node %s" % existing_node.name,
5124 errors.ECODE_NOTUNIQUE)
5126 # After this 'if' block, None is no longer a valid value for the
5127 # _capable op attributes
5129 old_node = self.cfg.GetNodeInfo(node)
5130 assert old_node is not None, "Can't retrieve locked node %s" % node
5131 for attr in self._NFLAGS:
5132 if getattr(self.op, attr) is None:
5133 setattr(self.op, attr, getattr(old_node, attr))
5135 for attr in self._NFLAGS:
5136 if getattr(self.op, attr) is None:
5137 setattr(self.op, attr, True)
5139 if self.op.readd and not self.op.vm_capable:
5140 pri, sec = cfg.GetNodeInstances(node)
5142 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5143 " flag set to false, but it already holds"
5144 " instances" % node,
5147 # check that the type of the node (single versus dual homed) is the
5148 # same as for the master
5149 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5150 master_singlehomed = myself.secondary_ip == myself.primary_ip
5151 newbie_singlehomed = secondary_ip == primary_ip
5152 if master_singlehomed != newbie_singlehomed:
5153 if master_singlehomed:
5154 raise errors.OpPrereqError("The master has no secondary ip but the"
5155 " new node has one",
5158 raise errors.OpPrereqError("The master has a secondary ip but the"
5159 " new node doesn't have one",
5162 # checks reachability
5163 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5164 raise errors.OpPrereqError("Node not reachable by ping",
5165 errors.ECODE_ENVIRON)
5167 if not newbie_singlehomed:
5168 # check reachability from my secondary ip to newbie's secondary ip
5169 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5170 source=myself.secondary_ip):
5171 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5172 " based ping to node daemon port",
5173 errors.ECODE_ENVIRON)
5180 if self.op.master_capable:
5181 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5183 self.master_candidate = False
5186 self.new_node = old_node
5188 node_group = cfg.LookupNodeGroup(self.op.group)
5189 self.new_node = objects.Node(name=node,
5190 primary_ip=primary_ip,
5191 secondary_ip=secondary_ip,
5192 master_candidate=self.master_candidate,
5193 offline=False, drained=False,
5196 if self.op.ndparams:
5197 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5199 def Exec(self, feedback_fn):
5200 """Adds the new node to the cluster.
5203 new_node = self.new_node
5204 node = new_node.name
5206 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5209 # We adding a new node so we assume it's powered
5210 new_node.powered = True
5212 # for re-adds, reset the offline/drained/master-candidate flags;
5213 # we need to reset here, otherwise offline would prevent RPC calls
5214 # later in the procedure; this also means that if the re-add
5215 # fails, we are left with a non-offlined, broken node
5217 new_node.drained = new_node.offline = False # pylint: disable=W0201
5218 self.LogInfo("Readding a node, the offline/drained flags were reset")
5219 # if we demote the node, we do cleanup later in the procedure
5220 new_node.master_candidate = self.master_candidate
5221 if self.changed_primary_ip:
5222 new_node.primary_ip = self.op.primary_ip
5224 # copy the master/vm_capable flags
5225 for attr in self._NFLAGS:
5226 setattr(new_node, attr, getattr(self.op, attr))
5228 # notify the user about any possible mc promotion
5229 if new_node.master_candidate:
5230 self.LogInfo("Node will be a master candidate")
5232 if self.op.ndparams:
5233 new_node.ndparams = self.op.ndparams
5235 new_node.ndparams = {}
5237 # check connectivity
5238 result = self.rpc.call_version([node])[node]
5239 result.Raise("Can't get version information from node %s" % node)
5240 if constants.PROTOCOL_VERSION == result.payload:
5241 logging.info("Communication to node %s fine, sw version %s match",
5242 node, result.payload)
5244 raise errors.OpExecError("Version mismatch master version %s,"
5245 " node version %s" %
5246 (constants.PROTOCOL_VERSION, result.payload))
5248 # Add node to our /etc/hosts, and add key to known_hosts
5249 if self.cfg.GetClusterInfo().modify_etc_hosts:
5250 master_node = self.cfg.GetMasterNode()
5251 result = self.rpc.call_etc_hosts_modify(master_node,
5252 constants.ETC_HOSTS_ADD,
5255 result.Raise("Can't update hosts file with new host data")
5257 if new_node.secondary_ip != new_node.primary_ip:
5258 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5261 node_verify_list = [self.cfg.GetMasterNode()]
5262 node_verify_param = {
5263 constants.NV_NODELIST: ([node], {}),
5264 # TODO: do a node-net-test as well?
5267 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5268 self.cfg.GetClusterName())
5269 for verifier in node_verify_list:
5270 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5271 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5273 for failed in nl_payload:
5274 feedback_fn("ssh/hostname verification failed"
5275 " (checking from %s): %s" %
5276 (verifier, nl_payload[failed]))
5277 raise errors.OpExecError("ssh/hostname verification failed")
5280 _RedistributeAncillaryFiles(self)
5281 self.context.ReaddNode(new_node)
5282 # make sure we redistribute the config
5283 self.cfg.Update(new_node, feedback_fn)
5284 # and make sure the new node will not have old files around
5285 if not new_node.master_candidate:
5286 result = self.rpc.call_node_demote_from_mc(new_node.name)
5287 msg = result.fail_msg
5289 self.LogWarning("Node failed to demote itself from master"
5290 " candidate status: %s" % msg)
5292 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5293 additional_vm=self.op.vm_capable)
5294 self.context.AddNode(new_node, self.proc.GetECId())
5297 class LUNodeSetParams(LogicalUnit):
5298 """Modifies the parameters of a node.
5300 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5301 to the node role (as _ROLE_*)
5302 @cvar _R2F: a dictionary from node role to tuples of flags
5303 @cvar _FLAGS: a list of attribute names corresponding to the flags
5306 HPATH = "node-modify"
5307 HTYPE = constants.HTYPE_NODE
5309 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5311 (True, False, False): _ROLE_CANDIDATE,
5312 (False, True, False): _ROLE_DRAINED,
5313 (False, False, True): _ROLE_OFFLINE,
5314 (False, False, False): _ROLE_REGULAR,
5316 _R2F = dict((v, k) for k, v in _F2R.items())
5317 _FLAGS = ["master_candidate", "drained", "offline"]
5319 def CheckArguments(self):
5320 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5321 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5322 self.op.master_capable, self.op.vm_capable,
5323 self.op.secondary_ip, self.op.ndparams]
5324 if all_mods.count(None) == len(all_mods):
5325 raise errors.OpPrereqError("Please pass at least one modification",
5327 if all_mods.count(True) > 1:
5328 raise errors.OpPrereqError("Can't set the node into more than one"
5329 " state at the same time",
5332 # Boolean value that tells us whether we might be demoting from MC
5333 self.might_demote = (self.op.master_candidate == False or
5334 self.op.offline == True or
5335 self.op.drained == True or
5336 self.op.master_capable == False)
5338 if self.op.secondary_ip:
5339 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5340 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5341 " address" % self.op.secondary_ip,
5344 self.lock_all = self.op.auto_promote and self.might_demote
5345 self.lock_instances = self.op.secondary_ip is not None
5347 def _InstanceFilter(self, instance):
5348 """Filter for getting affected instances.
5351 return (instance.disk_template in constants.DTS_INT_MIRROR and
5352 self.op.node_name in instance.all_nodes)
5354 def ExpandNames(self):
5356 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5358 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5360 # Since modifying a node can have severe effects on currently running
5361 # operations the resource lock is at least acquired in shared mode
5362 self.needed_locks[locking.LEVEL_NODE_RES] = \
5363 self.needed_locks[locking.LEVEL_NODE]
5365 # Get node resource and instance locks in shared mode; they are not used
5366 # for anything but read-only access
5367 self.share_locks[locking.LEVEL_NODE_RES] = 1
5368 self.share_locks[locking.LEVEL_INSTANCE] = 1
5370 if self.lock_instances:
5371 self.needed_locks[locking.LEVEL_INSTANCE] = \
5372 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5374 def BuildHooksEnv(self):
5377 This runs on the master node.
5381 "OP_TARGET": self.op.node_name,
5382 "MASTER_CANDIDATE": str(self.op.master_candidate),
5383 "OFFLINE": str(self.op.offline),
5384 "DRAINED": str(self.op.drained),
5385 "MASTER_CAPABLE": str(self.op.master_capable),
5386 "VM_CAPABLE": str(self.op.vm_capable),
5389 def BuildHooksNodes(self):
5390 """Build hooks nodes.
5393 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5396 def CheckPrereq(self):
5397 """Check prerequisites.
5399 This only checks the instance list against the existing names.
5402 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5404 if self.lock_instances:
5405 affected_instances = \
5406 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5408 # Verify instance locks
5409 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5410 wanted_instances = frozenset(affected_instances.keys())
5411 if wanted_instances - owned_instances:
5412 raise errors.OpPrereqError("Instances affected by changing node %s's"
5413 " secondary IP address have changed since"
5414 " locks were acquired, wanted '%s', have"
5415 " '%s'; retry the operation" %
5417 utils.CommaJoin(wanted_instances),
5418 utils.CommaJoin(owned_instances)),
5421 affected_instances = None
5423 if (self.op.master_candidate is not None or
5424 self.op.drained is not None or
5425 self.op.offline is not None):
5426 # we can't change the master's node flags
5427 if self.op.node_name == self.cfg.GetMasterNode():
5428 raise errors.OpPrereqError("The master role can be changed"
5429 " only via master-failover",
5432 if self.op.master_candidate and not node.master_capable:
5433 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5434 " it a master candidate" % node.name,
5437 if self.op.vm_capable == False:
5438 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5440 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5441 " the vm_capable flag" % node.name,
5444 if node.master_candidate and self.might_demote and not self.lock_all:
5445 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5446 # check if after removing the current node, we're missing master
5448 (mc_remaining, mc_should, _) = \
5449 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5450 if mc_remaining < mc_should:
5451 raise errors.OpPrereqError("Not enough master candidates, please"
5452 " pass auto promote option to allow"
5453 " promotion", errors.ECODE_STATE)
5455 self.old_flags = old_flags = (node.master_candidate,
5456 node.drained, node.offline)
5457 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5458 self.old_role = old_role = self._F2R[old_flags]
5460 # Check for ineffective changes
5461 for attr in self._FLAGS:
5462 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5463 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5464 setattr(self.op, attr, None)
5466 # Past this point, any flag change to False means a transition
5467 # away from the respective state, as only real changes are kept
5469 # TODO: We might query the real power state if it supports OOB
5470 if _SupportsOob(self.cfg, node):
5471 if self.op.offline is False and not (node.powered or
5472 self.op.powered == True):
5473 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5474 " offline status can be reset") %
5476 elif self.op.powered is not None:
5477 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5478 " as it does not support out-of-band"
5479 " handling") % self.op.node_name)
5481 # If we're being deofflined/drained, we'll MC ourself if needed
5482 if (self.op.drained == False or self.op.offline == False or
5483 (self.op.master_capable and not node.master_capable)):
5484 if _DecideSelfPromotion(self):
5485 self.op.master_candidate = True
5486 self.LogInfo("Auto-promoting node to master candidate")
5488 # If we're no longer master capable, we'll demote ourselves from MC
5489 if self.op.master_capable == False and node.master_candidate:
5490 self.LogInfo("Demoting from master candidate")
5491 self.op.master_candidate = False
5494 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5495 if self.op.master_candidate:
5496 new_role = self._ROLE_CANDIDATE
5497 elif self.op.drained:
5498 new_role = self._ROLE_DRAINED
5499 elif self.op.offline:
5500 new_role = self._ROLE_OFFLINE
5501 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5502 # False is still in new flags, which means we're un-setting (the
5504 new_role = self._ROLE_REGULAR
5505 else: # no new flags, nothing, keep old role
5508 self.new_role = new_role
5510 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5511 # Trying to transition out of offline status
5512 # TODO: Use standard RPC runner, but make sure it works when the node is
5513 # still marked offline
5514 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5516 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5517 " to report its version: %s" %
5518 (node.name, result.fail_msg),
5521 self.LogWarning("Transitioning node from offline to online state"
5522 " without using re-add. Please make sure the node"
5525 if self.op.secondary_ip:
5526 # Ok even without locking, because this can't be changed by any LU
5527 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5528 master_singlehomed = master.secondary_ip == master.primary_ip
5529 if master_singlehomed and self.op.secondary_ip:
5530 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5531 " homed cluster", errors.ECODE_INVAL)
5533 assert not (frozenset(affected_instances) -
5534 self.owned_locks(locking.LEVEL_INSTANCE))
5537 if affected_instances:
5538 raise errors.OpPrereqError("Cannot change secondary IP address:"
5539 " offline node has instances (%s)"
5540 " configured to use it" %
5541 utils.CommaJoin(affected_instances.keys()))
5543 # On online nodes, check that no instances are running, and that
5544 # the node has the new ip and we can reach it.
5545 for instance in affected_instances.values():
5546 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5547 msg="cannot change secondary ip")
5549 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5550 if master.name != node.name:
5551 # check reachability from master secondary ip to new secondary ip
5552 if not netutils.TcpPing(self.op.secondary_ip,
5553 constants.DEFAULT_NODED_PORT,
5554 source=master.secondary_ip):
5555 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5556 " based ping to node daemon port",
5557 errors.ECODE_ENVIRON)
5559 if self.op.ndparams:
5560 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5561 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5562 self.new_ndparams = new_ndparams
5564 def Exec(self, feedback_fn):
5569 old_role = self.old_role
5570 new_role = self.new_role
5574 if self.op.ndparams:
5575 node.ndparams = self.new_ndparams
5577 if self.op.powered is not None:
5578 node.powered = self.op.powered
5580 for attr in ["master_capable", "vm_capable"]:
5581 val = getattr(self.op, attr)
5583 setattr(node, attr, val)
5584 result.append((attr, str(val)))
5586 if new_role != old_role:
5587 # Tell the node to demote itself, if no longer MC and not offline
5588 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5589 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5591 self.LogWarning("Node failed to demote itself: %s", msg)
5593 new_flags = self._R2F[new_role]
5594 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5596 result.append((desc, str(nf)))
5597 (node.master_candidate, node.drained, node.offline) = new_flags
5599 # we locked all nodes, we adjust the CP before updating this node
5601 _AdjustCandidatePool(self, [node.name])
5603 if self.op.secondary_ip:
5604 node.secondary_ip = self.op.secondary_ip
5605 result.append(("secondary_ip", self.op.secondary_ip))
5607 # this will trigger configuration file update, if needed
5608 self.cfg.Update(node, feedback_fn)
5610 # this will trigger job queue propagation or cleanup if the mc
5612 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5613 self.context.ReaddNode(node)
5618 class LUNodePowercycle(NoHooksLU):
5619 """Powercycles a node.
5624 def CheckArguments(self):
5625 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5626 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5627 raise errors.OpPrereqError("The node is the master and the force"
5628 " parameter was not set",
5631 def ExpandNames(self):
5632 """Locking for PowercycleNode.
5634 This is a last-resort option and shouldn't block on other
5635 jobs. Therefore, we grab no locks.
5638 self.needed_locks = {}
5640 def Exec(self, feedback_fn):
5644 result = self.rpc.call_node_powercycle(self.op.node_name,
5645 self.cfg.GetHypervisorType())
5646 result.Raise("Failed to schedule the reboot")
5647 return result.payload
5650 class LUClusterQuery(NoHooksLU):
5651 """Query cluster configuration.
5656 def ExpandNames(self):
5657 self.needed_locks = {}
5659 def Exec(self, feedback_fn):
5660 """Return cluster config.
5663 cluster = self.cfg.GetClusterInfo()
5666 # Filter just for enabled hypervisors
5667 for os_name, hv_dict in cluster.os_hvp.items():
5668 os_hvp[os_name] = {}
5669 for hv_name, hv_params in hv_dict.items():
5670 if hv_name in cluster.enabled_hypervisors:
5671 os_hvp[os_name][hv_name] = hv_params
5673 # Convert ip_family to ip_version
5674 primary_ip_version = constants.IP4_VERSION
5675 if cluster.primary_ip_family == netutils.IP6Address.family:
5676 primary_ip_version = constants.IP6_VERSION
5679 "software_version": constants.RELEASE_VERSION,
5680 "protocol_version": constants.PROTOCOL_VERSION,
5681 "config_version": constants.CONFIG_VERSION,
5682 "os_api_version": max(constants.OS_API_VERSIONS),
5683 "export_version": constants.EXPORT_VERSION,
5684 "architecture": (platform.architecture()[0], platform.machine()),
5685 "name": cluster.cluster_name,
5686 "master": cluster.master_node,
5687 "default_hypervisor": cluster.enabled_hypervisors[0],
5688 "enabled_hypervisors": cluster.enabled_hypervisors,
5689 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5690 for hypervisor_name in cluster.enabled_hypervisors]),
5692 "beparams": cluster.beparams,
5693 "osparams": cluster.osparams,
5694 "nicparams": cluster.nicparams,
5695 "ndparams": cluster.ndparams,
5696 "candidate_pool_size": cluster.candidate_pool_size,
5697 "master_netdev": cluster.master_netdev,
5698 "master_netmask": cluster.master_netmask,
5699 "use_external_mip_script": cluster.use_external_mip_script,
5700 "volume_group_name": cluster.volume_group_name,
5701 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5702 "file_storage_dir": cluster.file_storage_dir,
5703 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5704 "maintain_node_health": cluster.maintain_node_health,
5705 "ctime": cluster.ctime,
5706 "mtime": cluster.mtime,
5707 "uuid": cluster.uuid,
5708 "tags": list(cluster.GetTags()),
5709 "uid_pool": cluster.uid_pool,
5710 "default_iallocator": cluster.default_iallocator,
5711 "reserved_lvs": cluster.reserved_lvs,
5712 "primary_ip_version": primary_ip_version,
5713 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5714 "hidden_os": cluster.hidden_os,
5715 "blacklisted_os": cluster.blacklisted_os,
5721 class LUClusterConfigQuery(NoHooksLU):
5722 """Return configuration values.
5726 _FIELDS_DYNAMIC = utils.FieldSet()
5727 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5728 "watcher_pause", "volume_group_name")
5730 def CheckArguments(self):
5731 _CheckOutputFields(static=self._FIELDS_STATIC,
5732 dynamic=self._FIELDS_DYNAMIC,
5733 selected=self.op.output_fields)
5735 def ExpandNames(self):
5736 self.needed_locks = {}
5738 def Exec(self, feedback_fn):
5739 """Dump a representation of the cluster config to the standard output.
5743 for field in self.op.output_fields:
5744 if field == "cluster_name":
5745 entry = self.cfg.GetClusterName()
5746 elif field == "master_node":
5747 entry = self.cfg.GetMasterNode()
5748 elif field == "drain_flag":
5749 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5750 elif field == "watcher_pause":
5751 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5752 elif field == "volume_group_name":
5753 entry = self.cfg.GetVGName()
5755 raise errors.ParameterError(field)
5756 values.append(entry)
5760 class LUInstanceActivateDisks(NoHooksLU):
5761 """Bring up an instance's disks.
5766 def ExpandNames(self):
5767 self._ExpandAndLockInstance()
5768 self.needed_locks[locking.LEVEL_NODE] = []
5769 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5771 def DeclareLocks(self, level):
5772 if level == locking.LEVEL_NODE:
5773 self._LockInstancesNodes()
5775 def CheckPrereq(self):
5776 """Check prerequisites.
5778 This checks that the instance is in the cluster.
5781 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5782 assert self.instance is not None, \
5783 "Cannot retrieve locked instance %s" % self.op.instance_name
5784 _CheckNodeOnline(self, self.instance.primary_node)
5786 def Exec(self, feedback_fn):
5787 """Activate the disks.
5790 disks_ok, disks_info = \
5791 _AssembleInstanceDisks(self, self.instance,
5792 ignore_size=self.op.ignore_size)
5794 raise errors.OpExecError("Cannot activate block devices")
5799 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5801 """Prepare the block devices for an instance.
5803 This sets up the block devices on all nodes.
5805 @type lu: L{LogicalUnit}
5806 @param lu: the logical unit on whose behalf we execute
5807 @type instance: L{objects.Instance}
5808 @param instance: the instance for whose disks we assemble
5809 @type disks: list of L{objects.Disk} or None
5810 @param disks: which disks to assemble (or all, if None)
5811 @type ignore_secondaries: boolean
5812 @param ignore_secondaries: if true, errors on secondary nodes
5813 won't result in an error return from the function
5814 @type ignore_size: boolean
5815 @param ignore_size: if true, the current known size of the disk
5816 will not be used during the disk activation, useful for cases
5817 when the size is wrong
5818 @return: False if the operation failed, otherwise a list of
5819 (host, instance_visible_name, node_visible_name)
5820 with the mapping from node devices to instance devices
5825 iname = instance.name
5826 disks = _ExpandCheckDisks(instance, disks)
5828 # With the two passes mechanism we try to reduce the window of
5829 # opportunity for the race condition of switching DRBD to primary
5830 # before handshaking occured, but we do not eliminate it
5832 # The proper fix would be to wait (with some limits) until the
5833 # connection has been made and drbd transitions from WFConnection
5834 # into any other network-connected state (Connected, SyncTarget,
5837 # 1st pass, assemble on all nodes in secondary mode
5838 for idx, inst_disk in enumerate(disks):
5839 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5841 node_disk = node_disk.Copy()
5842 node_disk.UnsetSize()
5843 lu.cfg.SetDiskID(node_disk, node)
5844 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5845 msg = result.fail_msg
5847 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5848 " (is_primary=False, pass=1): %s",
5849 inst_disk.iv_name, node, msg)
5850 if not ignore_secondaries:
5853 # FIXME: race condition on drbd migration to primary
5855 # 2nd pass, do only the primary node
5856 for idx, inst_disk in enumerate(disks):
5859 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5860 if node != instance.primary_node:
5863 node_disk = node_disk.Copy()
5864 node_disk.UnsetSize()
5865 lu.cfg.SetDiskID(node_disk, node)
5866 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5867 msg = result.fail_msg
5869 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5870 " (is_primary=True, pass=2): %s",
5871 inst_disk.iv_name, node, msg)
5874 dev_path = result.payload
5876 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5878 # leave the disks configured for the primary node
5879 # this is a workaround that would be fixed better by
5880 # improving the logical/physical id handling
5882 lu.cfg.SetDiskID(disk, instance.primary_node)
5884 return disks_ok, device_info
5887 def _StartInstanceDisks(lu, instance, force):
5888 """Start the disks of an instance.
5891 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5892 ignore_secondaries=force)
5894 _ShutdownInstanceDisks(lu, instance)
5895 if force is not None and not force:
5896 lu.proc.LogWarning("", hint="If the message above refers to a"
5898 " you can retry the operation using '--force'.")
5899 raise errors.OpExecError("Disk consistency error")
5902 class LUInstanceDeactivateDisks(NoHooksLU):
5903 """Shutdown an instance's disks.
5908 def ExpandNames(self):
5909 self._ExpandAndLockInstance()
5910 self.needed_locks[locking.LEVEL_NODE] = []
5911 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5913 def DeclareLocks(self, level):
5914 if level == locking.LEVEL_NODE:
5915 self._LockInstancesNodes()
5917 def CheckPrereq(self):
5918 """Check prerequisites.
5920 This checks that the instance is in the cluster.
5923 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5924 assert self.instance is not None, \
5925 "Cannot retrieve locked instance %s" % self.op.instance_name
5927 def Exec(self, feedback_fn):
5928 """Deactivate the disks
5931 instance = self.instance
5933 _ShutdownInstanceDisks(self, instance)
5935 _SafeShutdownInstanceDisks(self, instance)
5938 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5939 """Shutdown block devices of an instance.
5941 This function checks if an instance is running, before calling
5942 _ShutdownInstanceDisks.
5945 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5946 _ShutdownInstanceDisks(lu, instance, disks=disks)
5949 def _ExpandCheckDisks(instance, disks):
5950 """Return the instance disks selected by the disks list
5952 @type disks: list of L{objects.Disk} or None
5953 @param disks: selected disks
5954 @rtype: list of L{objects.Disk}
5955 @return: selected instance disks to act on
5959 return instance.disks
5961 if not set(disks).issubset(instance.disks):
5962 raise errors.ProgrammerError("Can only act on disks belonging to the"
5967 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5968 """Shutdown block devices of an instance.
5970 This does the shutdown on all nodes of the instance.
5972 If the ignore_primary is false, errors on the primary node are
5977 disks = _ExpandCheckDisks(instance, disks)
5980 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5981 lu.cfg.SetDiskID(top_disk, node)
5982 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5983 msg = result.fail_msg
5985 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5986 disk.iv_name, node, msg)
5987 if ((node == instance.primary_node and not ignore_primary) or
5988 (node != instance.primary_node and not result.offline)):
5993 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5994 """Checks if a node has enough free memory.
5996 This function check if a given node has the needed amount of free
5997 memory. In case the node has less memory or we cannot get the
5998 information from the node, this function raise an OpPrereqError
6001 @type lu: C{LogicalUnit}
6002 @param lu: a logical unit from which we get configuration data
6004 @param node: the node to check
6005 @type reason: C{str}
6006 @param reason: string to use in the error message
6007 @type requested: C{int}
6008 @param requested: the amount of memory in MiB to check for
6009 @type hypervisor_name: C{str}
6010 @param hypervisor_name: the hypervisor to ask for memory stats
6011 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6012 we cannot check the node
6015 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6016 nodeinfo[node].Raise("Can't get data from node %s" % node,
6017 prereq=True, ecode=errors.ECODE_ENVIRON)
6018 free_mem = nodeinfo[node].payload.get("memory_free", None)
6019 if not isinstance(free_mem, int):
6020 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6021 " was '%s'" % (node, free_mem),
6022 errors.ECODE_ENVIRON)
6023 if requested > free_mem:
6024 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6025 " needed %s MiB, available %s MiB" %
6026 (node, reason, requested, free_mem),
6030 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6031 """Checks if nodes have enough free disk space in the all VGs.
6033 This function check if all given nodes have the needed amount of
6034 free disk. In case any node has less disk or we cannot get the
6035 information from the node, this function raise an OpPrereqError
6038 @type lu: C{LogicalUnit}
6039 @param lu: a logical unit from which we get configuration data
6040 @type nodenames: C{list}
6041 @param nodenames: the list of node names to check
6042 @type req_sizes: C{dict}
6043 @param req_sizes: the hash of vg and corresponding amount of disk in
6045 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6046 or we cannot check the node
6049 for vg, req_size in req_sizes.items():
6050 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6053 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6054 """Checks if nodes have enough free disk space in the specified VG.
6056 This function check if all given nodes have the needed amount of
6057 free disk. In case any node has less disk or we cannot get the
6058 information from the node, this function raise an OpPrereqError
6061 @type lu: C{LogicalUnit}
6062 @param lu: a logical unit from which we get configuration data
6063 @type nodenames: C{list}
6064 @param nodenames: the list of node names to check
6066 @param vg: the volume group to check
6067 @type requested: C{int}
6068 @param requested: the amount of disk in MiB to check for
6069 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6070 or we cannot check the node
6073 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6074 for node in nodenames:
6075 info = nodeinfo[node]
6076 info.Raise("Cannot get current information from node %s" % node,
6077 prereq=True, ecode=errors.ECODE_ENVIRON)
6078 vg_free = info.payload.get("vg_free", None)
6079 if not isinstance(vg_free, int):
6080 raise errors.OpPrereqError("Can't compute free disk space on node"
6081 " %s for vg %s, result was '%s'" %
6082 (node, vg, vg_free), errors.ECODE_ENVIRON)
6083 if requested > vg_free:
6084 raise errors.OpPrereqError("Not enough disk space on target node %s"
6085 " vg %s: required %d MiB, available %d MiB" %
6086 (node, vg, requested, vg_free),
6090 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6091 """Checks if nodes have enough physical CPUs
6093 This function checks if all given nodes have the needed number of
6094 physical CPUs. In case any node has less CPUs or we cannot get the
6095 information from the node, this function raises an OpPrereqError
6098 @type lu: C{LogicalUnit}
6099 @param lu: a logical unit from which we get configuration data
6100 @type nodenames: C{list}
6101 @param nodenames: the list of node names to check
6102 @type requested: C{int}
6103 @param requested: the minimum acceptable number of physical CPUs
6104 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6105 or we cannot check the node
6108 nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6109 for node in nodenames:
6110 info = nodeinfo[node]
6111 info.Raise("Cannot get current information from node %s" % node,
6112 prereq=True, ecode=errors.ECODE_ENVIRON)
6113 num_cpus = info.payload.get("cpu_total", None)
6114 if not isinstance(num_cpus, int):
6115 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6116 " on node %s, result was '%s'" %
6117 (node, num_cpus), errors.ECODE_ENVIRON)
6118 if requested > num_cpus:
6119 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6120 "required" % (node, num_cpus, requested),
6124 class LUInstanceStartup(LogicalUnit):
6125 """Starts an instance.
6128 HPATH = "instance-start"
6129 HTYPE = constants.HTYPE_INSTANCE
6132 def CheckArguments(self):
6134 if self.op.beparams:
6135 # fill the beparams dict
6136 objects.UpgradeBeParams(self.op.beparams)
6137 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6139 def ExpandNames(self):
6140 self._ExpandAndLockInstance()
6142 def BuildHooksEnv(self):
6145 This runs on master, primary and secondary nodes of the instance.
6149 "FORCE": self.op.force,
6152 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6156 def BuildHooksNodes(self):
6157 """Build hooks nodes.
6160 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6163 def CheckPrereq(self):
6164 """Check prerequisites.
6166 This checks that the instance is in the cluster.
6169 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6170 assert self.instance is not None, \
6171 "Cannot retrieve locked instance %s" % self.op.instance_name
6174 if self.op.hvparams:
6175 # check hypervisor parameter syntax (locally)
6176 cluster = self.cfg.GetClusterInfo()
6177 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6178 filled_hvp = cluster.FillHV(instance)
6179 filled_hvp.update(self.op.hvparams)
6180 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6181 hv_type.CheckParameterSyntax(filled_hvp)
6182 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6184 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6186 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6188 if self.primary_offline and self.op.ignore_offline_nodes:
6189 self.proc.LogWarning("Ignoring offline primary node")
6191 if self.op.hvparams or self.op.beparams:
6192 self.proc.LogWarning("Overridden parameters are ignored")
6194 _CheckNodeOnline(self, instance.primary_node)
6196 bep = self.cfg.GetClusterInfo().FillBE(instance)
6198 # check bridges existence
6199 _CheckInstanceBridgesExist(self, instance)
6201 remote_info = self.rpc.call_instance_info(instance.primary_node,
6203 instance.hypervisor)
6204 remote_info.Raise("Error checking node %s" % instance.primary_node,
6205 prereq=True, ecode=errors.ECODE_ENVIRON)
6206 if not remote_info.payload: # not running already
6207 _CheckNodeFreeMemory(self, instance.primary_node,
6208 "starting instance %s" % instance.name,
6209 bep[constants.BE_MAXMEM], instance.hypervisor)
6211 def Exec(self, feedback_fn):
6212 """Start the instance.
6215 instance = self.instance
6216 force = self.op.force
6218 if not self.op.no_remember:
6219 self.cfg.MarkInstanceUp(instance.name)
6221 if self.primary_offline:
6222 assert self.op.ignore_offline_nodes
6223 self.proc.LogInfo("Primary node offline, marked instance as started")
6225 node_current = instance.primary_node
6227 _StartInstanceDisks(self, instance, force)
6230 self.rpc.call_instance_start(node_current,
6231 (instance, self.op.hvparams,
6233 self.op.startup_paused)
6234 msg = result.fail_msg
6236 _ShutdownInstanceDisks(self, instance)
6237 raise errors.OpExecError("Could not start instance: %s" % msg)
6240 class LUInstanceReboot(LogicalUnit):
6241 """Reboot an instance.
6244 HPATH = "instance-reboot"
6245 HTYPE = constants.HTYPE_INSTANCE
6248 def ExpandNames(self):
6249 self._ExpandAndLockInstance()
6251 def BuildHooksEnv(self):
6254 This runs on master, primary and secondary nodes of the instance.
6258 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6259 "REBOOT_TYPE": self.op.reboot_type,
6260 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6263 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6267 def BuildHooksNodes(self):
6268 """Build hooks nodes.
6271 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6274 def CheckPrereq(self):
6275 """Check prerequisites.
6277 This checks that the instance is in the cluster.
6280 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6281 assert self.instance is not None, \
6282 "Cannot retrieve locked instance %s" % self.op.instance_name
6283 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6284 _CheckNodeOnline(self, instance.primary_node)
6286 # check bridges existence
6287 _CheckInstanceBridgesExist(self, instance)
6289 def Exec(self, feedback_fn):
6290 """Reboot the instance.
6293 instance = self.instance
6294 ignore_secondaries = self.op.ignore_secondaries
6295 reboot_type = self.op.reboot_type
6297 remote_info = self.rpc.call_instance_info(instance.primary_node,
6299 instance.hypervisor)
6300 remote_info.Raise("Error checking node %s" % instance.primary_node)
6301 instance_running = bool(remote_info.payload)
6303 node_current = instance.primary_node
6305 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6306 constants.INSTANCE_REBOOT_HARD]:
6307 for disk in instance.disks:
6308 self.cfg.SetDiskID(disk, node_current)
6309 result = self.rpc.call_instance_reboot(node_current, instance,
6311 self.op.shutdown_timeout)
6312 result.Raise("Could not reboot instance")
6314 if instance_running:
6315 result = self.rpc.call_instance_shutdown(node_current, instance,
6316 self.op.shutdown_timeout)
6317 result.Raise("Could not shutdown instance for full reboot")
6318 _ShutdownInstanceDisks(self, instance)
6320 self.LogInfo("Instance %s was already stopped, starting now",
6322 _StartInstanceDisks(self, instance, ignore_secondaries)
6323 result = self.rpc.call_instance_start(node_current,
6324 (instance, None, None), False)
6325 msg = result.fail_msg
6327 _ShutdownInstanceDisks(self, instance)
6328 raise errors.OpExecError("Could not start instance for"
6329 " full reboot: %s" % msg)
6331 self.cfg.MarkInstanceUp(instance.name)
6334 class LUInstanceShutdown(LogicalUnit):
6335 """Shutdown an instance.
6338 HPATH = "instance-stop"
6339 HTYPE = constants.HTYPE_INSTANCE
6342 def ExpandNames(self):
6343 self._ExpandAndLockInstance()
6345 def BuildHooksEnv(self):
6348 This runs on master, primary and secondary nodes of the instance.
6351 env = _BuildInstanceHookEnvByObject(self, self.instance)
6352 env["TIMEOUT"] = self.op.timeout
6355 def BuildHooksNodes(self):
6356 """Build hooks nodes.
6359 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6362 def CheckPrereq(self):
6363 """Check prerequisites.
6365 This checks that the instance is in the cluster.
6368 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6369 assert self.instance is not None, \
6370 "Cannot retrieve locked instance %s" % self.op.instance_name
6372 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6374 self.primary_offline = \
6375 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6377 if self.primary_offline and self.op.ignore_offline_nodes:
6378 self.proc.LogWarning("Ignoring offline primary node")
6380 _CheckNodeOnline(self, self.instance.primary_node)
6382 def Exec(self, feedback_fn):
6383 """Shutdown the instance.
6386 instance = self.instance
6387 node_current = instance.primary_node
6388 timeout = self.op.timeout
6390 if not self.op.no_remember:
6391 self.cfg.MarkInstanceDown(instance.name)
6393 if self.primary_offline:
6394 assert self.op.ignore_offline_nodes
6395 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6397 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6398 msg = result.fail_msg
6400 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6402 _ShutdownInstanceDisks(self, instance)
6405 class LUInstanceReinstall(LogicalUnit):
6406 """Reinstall an instance.
6409 HPATH = "instance-reinstall"
6410 HTYPE = constants.HTYPE_INSTANCE
6413 def ExpandNames(self):
6414 self._ExpandAndLockInstance()
6416 def BuildHooksEnv(self):
6419 This runs on master, primary and secondary nodes of the instance.
6422 return _BuildInstanceHookEnvByObject(self, self.instance)
6424 def BuildHooksNodes(self):
6425 """Build hooks nodes.
6428 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6431 def CheckPrereq(self):
6432 """Check prerequisites.
6434 This checks that the instance is in the cluster and is not running.
6437 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6438 assert instance is not None, \
6439 "Cannot retrieve locked instance %s" % self.op.instance_name
6440 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6441 " offline, cannot reinstall")
6442 for node in instance.secondary_nodes:
6443 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6444 " cannot reinstall")
6446 if instance.disk_template == constants.DT_DISKLESS:
6447 raise errors.OpPrereqError("Instance '%s' has no disks" %
6448 self.op.instance_name,
6450 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6452 if self.op.os_type is not None:
6454 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6455 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6456 instance_os = self.op.os_type
6458 instance_os = instance.os
6460 nodelist = list(instance.all_nodes)
6462 if self.op.osparams:
6463 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6464 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6465 self.os_inst = i_osdict # the new dict (without defaults)
6469 self.instance = instance
6471 def Exec(self, feedback_fn):
6472 """Reinstall the instance.
6475 inst = self.instance
6477 if self.op.os_type is not None:
6478 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6479 inst.os = self.op.os_type
6480 # Write to configuration
6481 self.cfg.Update(inst, feedback_fn)
6483 _StartInstanceDisks(self, inst, None)
6485 feedback_fn("Running the instance OS create scripts...")
6486 # FIXME: pass debug option from opcode to backend
6487 result = self.rpc.call_instance_os_add(inst.primary_node,
6488 (inst, self.os_inst), True,
6489 self.op.debug_level)
6490 result.Raise("Could not install OS for instance %s on node %s" %
6491 (inst.name, inst.primary_node))
6493 _ShutdownInstanceDisks(self, inst)
6496 class LUInstanceRecreateDisks(LogicalUnit):
6497 """Recreate an instance's missing disks.
6500 HPATH = "instance-recreate-disks"
6501 HTYPE = constants.HTYPE_INSTANCE
6504 def CheckArguments(self):
6505 # normalise the disk list
6506 self.op.disks = sorted(frozenset(self.op.disks))
6508 def ExpandNames(self):
6509 self._ExpandAndLockInstance()
6510 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6512 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6513 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6515 self.needed_locks[locking.LEVEL_NODE] = []
6517 def DeclareLocks(self, level):
6518 if level == locking.LEVEL_NODE:
6519 # if we replace the nodes, we only need to lock the old primary,
6520 # otherwise we need to lock all nodes for disk re-creation
6521 primary_only = bool(self.op.nodes)
6522 self._LockInstancesNodes(primary_only=primary_only)
6523 elif level == locking.LEVEL_NODE_RES:
6525 self.needed_locks[locking.LEVEL_NODE_RES] = \
6526 self.needed_locks[locking.LEVEL_NODE][:]
6528 def BuildHooksEnv(self):
6531 This runs on master, primary and secondary nodes of the instance.
6534 return _BuildInstanceHookEnvByObject(self, self.instance)
6536 def BuildHooksNodes(self):
6537 """Build hooks nodes.
6540 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6543 def CheckPrereq(self):
6544 """Check prerequisites.
6546 This checks that the instance is in the cluster and is not running.
6549 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6550 assert instance is not None, \
6551 "Cannot retrieve locked instance %s" % self.op.instance_name
6553 if len(self.op.nodes) != len(instance.all_nodes):
6554 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6555 " %d replacement nodes were specified" %
6556 (instance.name, len(instance.all_nodes),
6557 len(self.op.nodes)),
6559 assert instance.disk_template != constants.DT_DRBD8 or \
6560 len(self.op.nodes) == 2
6561 assert instance.disk_template != constants.DT_PLAIN or \
6562 len(self.op.nodes) == 1
6563 primary_node = self.op.nodes[0]
6565 primary_node = instance.primary_node
6566 _CheckNodeOnline(self, primary_node)
6568 if instance.disk_template == constants.DT_DISKLESS:
6569 raise errors.OpPrereqError("Instance '%s' has no disks" %
6570 self.op.instance_name, errors.ECODE_INVAL)
6571 # if we replace nodes *and* the old primary is offline, we don't
6573 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6574 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6575 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6576 if not (self.op.nodes and old_pnode.offline):
6577 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6578 msg="cannot recreate disks")
6580 if not self.op.disks:
6581 self.op.disks = range(len(instance.disks))
6583 for idx in self.op.disks:
6584 if idx >= len(instance.disks):
6585 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6587 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6588 raise errors.OpPrereqError("Can't recreate disks partially and"
6589 " change the nodes at the same time",
6591 self.instance = instance
6593 def Exec(self, feedback_fn):
6594 """Recreate the disks.
6597 instance = self.instance
6599 assert (self.owned_locks(locking.LEVEL_NODE) ==
6600 self.owned_locks(locking.LEVEL_NODE_RES))
6603 mods = [] # keeps track of needed logical_id changes
6605 for idx, disk in enumerate(instance.disks):
6606 if idx not in self.op.disks: # disk idx has not been passed in
6609 # update secondaries for disks, if needed
6611 if disk.dev_type == constants.LD_DRBD8:
6612 # need to update the nodes and minors
6613 assert len(self.op.nodes) == 2
6614 assert len(disk.logical_id) == 6 # otherwise disk internals
6616 (_, _, old_port, _, _, old_secret) = disk.logical_id
6617 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6618 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6619 new_minors[0], new_minors[1], old_secret)
6620 assert len(disk.logical_id) == len(new_id)
6621 mods.append((idx, new_id))
6623 # now that we have passed all asserts above, we can apply the mods
6624 # in a single run (to avoid partial changes)
6625 for idx, new_id in mods:
6626 instance.disks[idx].logical_id = new_id
6628 # change primary node, if needed
6630 instance.primary_node = self.op.nodes[0]
6631 self.LogWarning("Changing the instance's nodes, you will have to"
6632 " remove any disks left on the older nodes manually")
6635 self.cfg.Update(instance, feedback_fn)
6637 _CreateDisks(self, instance, to_skip=to_skip)
6640 class LUInstanceRename(LogicalUnit):
6641 """Rename an instance.
6644 HPATH = "instance-rename"
6645 HTYPE = constants.HTYPE_INSTANCE
6647 def CheckArguments(self):
6651 if self.op.ip_check and not self.op.name_check:
6652 # TODO: make the ip check more flexible and not depend on the name check
6653 raise errors.OpPrereqError("IP address check requires a name check",
6656 def BuildHooksEnv(self):
6659 This runs on master, primary and secondary nodes of the instance.
6662 env = _BuildInstanceHookEnvByObject(self, self.instance)
6663 env["INSTANCE_NEW_NAME"] = self.op.new_name
6666 def BuildHooksNodes(self):
6667 """Build hooks nodes.
6670 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6673 def CheckPrereq(self):
6674 """Check prerequisites.
6676 This checks that the instance is in the cluster and is not running.
6679 self.op.instance_name = _ExpandInstanceName(self.cfg,
6680 self.op.instance_name)
6681 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6682 assert instance is not None
6683 _CheckNodeOnline(self, instance.primary_node)
6684 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6685 msg="cannot rename")
6686 self.instance = instance
6688 new_name = self.op.new_name
6689 if self.op.name_check:
6690 hostname = netutils.GetHostname(name=new_name)
6691 if hostname.name != new_name:
6692 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6694 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6695 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6696 " same as given hostname '%s'") %
6697 (hostname.name, self.op.new_name),
6699 new_name = self.op.new_name = hostname.name
6700 if (self.op.ip_check and
6701 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6702 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6703 (hostname.ip, new_name),
6704 errors.ECODE_NOTUNIQUE)
6706 instance_list = self.cfg.GetInstanceList()
6707 if new_name in instance_list and new_name != instance.name:
6708 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6709 new_name, errors.ECODE_EXISTS)
6711 def Exec(self, feedback_fn):
6712 """Rename the instance.
6715 inst = self.instance
6716 old_name = inst.name
6718 rename_file_storage = False
6719 if (inst.disk_template in constants.DTS_FILEBASED and
6720 self.op.new_name != inst.name):
6721 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6722 rename_file_storage = True
6724 self.cfg.RenameInstance(inst.name, self.op.new_name)
6725 # Change the instance lock. This is definitely safe while we hold the BGL.
6726 # Otherwise the new lock would have to be added in acquired mode.
6728 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6729 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6731 # re-read the instance from the configuration after rename
6732 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6734 if rename_file_storage:
6735 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6736 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6737 old_file_storage_dir,
6738 new_file_storage_dir)
6739 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6740 " (but the instance has been renamed in Ganeti)" %
6741 (inst.primary_node, old_file_storage_dir,
6742 new_file_storage_dir))
6744 _StartInstanceDisks(self, inst, None)
6746 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6747 old_name, self.op.debug_level)
6748 msg = result.fail_msg
6750 msg = ("Could not run OS rename script for instance %s on node %s"
6751 " (but the instance has been renamed in Ganeti): %s" %
6752 (inst.name, inst.primary_node, msg))
6753 self.proc.LogWarning(msg)
6755 _ShutdownInstanceDisks(self, inst)
6760 class LUInstanceRemove(LogicalUnit):
6761 """Remove an instance.
6764 HPATH = "instance-remove"
6765 HTYPE = constants.HTYPE_INSTANCE
6768 def ExpandNames(self):
6769 self._ExpandAndLockInstance()
6770 self.needed_locks[locking.LEVEL_NODE] = []
6771 self.needed_locks[locking.LEVEL_NODE_RES] = []
6772 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6774 def DeclareLocks(self, level):
6775 if level == locking.LEVEL_NODE:
6776 self._LockInstancesNodes()
6777 elif level == locking.LEVEL_NODE_RES:
6779 self.needed_locks[locking.LEVEL_NODE_RES] = \
6780 self.needed_locks[locking.LEVEL_NODE][:]
6782 def BuildHooksEnv(self):
6785 This runs on master, primary and secondary nodes of the instance.
6788 env = _BuildInstanceHookEnvByObject(self, self.instance)
6789 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6792 def BuildHooksNodes(self):
6793 """Build hooks nodes.
6796 nl = [self.cfg.GetMasterNode()]
6797 nl_post = list(self.instance.all_nodes) + nl
6798 return (nl, nl_post)
6800 def CheckPrereq(self):
6801 """Check prerequisites.
6803 This checks that the instance is in the cluster.
6806 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6807 assert self.instance is not None, \
6808 "Cannot retrieve locked instance %s" % self.op.instance_name
6810 def Exec(self, feedback_fn):
6811 """Remove the instance.
6814 instance = self.instance
6815 logging.info("Shutting down instance %s on node %s",
6816 instance.name, instance.primary_node)
6818 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6819 self.op.shutdown_timeout)
6820 msg = result.fail_msg
6822 if self.op.ignore_failures:
6823 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6825 raise errors.OpExecError("Could not shutdown instance %s on"
6827 (instance.name, instance.primary_node, msg))
6829 assert (self.owned_locks(locking.LEVEL_NODE) ==
6830 self.owned_locks(locking.LEVEL_NODE_RES))
6831 assert not (set(instance.all_nodes) -
6832 self.owned_locks(locking.LEVEL_NODE)), \
6833 "Not owning correct locks"
6835 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6838 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6839 """Utility function to remove an instance.
6842 logging.info("Removing block devices for instance %s", instance.name)
6844 if not _RemoveDisks(lu, instance):
6845 if not ignore_failures:
6846 raise errors.OpExecError("Can't remove instance's disks")
6847 feedback_fn("Warning: can't remove instance's disks")
6849 logging.info("Removing instance %s out of cluster config", instance.name)
6851 lu.cfg.RemoveInstance(instance.name)
6853 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6854 "Instance lock removal conflict"
6856 # Remove lock for the instance
6857 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6860 class LUInstanceQuery(NoHooksLU):
6861 """Logical unit for querying instances.
6864 # pylint: disable=W0142
6867 def CheckArguments(self):
6868 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6869 self.op.output_fields, self.op.use_locking)
6871 def ExpandNames(self):
6872 self.iq.ExpandNames(self)
6874 def DeclareLocks(self, level):
6875 self.iq.DeclareLocks(self, level)
6877 def Exec(self, feedback_fn):
6878 return self.iq.OldStyleQuery(self)
6881 class LUInstanceFailover(LogicalUnit):
6882 """Failover an instance.
6885 HPATH = "instance-failover"
6886 HTYPE = constants.HTYPE_INSTANCE
6889 def CheckArguments(self):
6890 """Check the arguments.
6893 self.iallocator = getattr(self.op, "iallocator", None)
6894 self.target_node = getattr(self.op, "target_node", None)
6896 def ExpandNames(self):
6897 self._ExpandAndLockInstance()
6899 if self.op.target_node is not None:
6900 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6902 self.needed_locks[locking.LEVEL_NODE] = []
6903 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6905 ignore_consistency = self.op.ignore_consistency
6906 shutdown_timeout = self.op.shutdown_timeout
6907 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6910 ignore_consistency=ignore_consistency,
6911 shutdown_timeout=shutdown_timeout)
6912 self.tasklets = [self._migrater]
6914 def DeclareLocks(self, level):
6915 if level == locking.LEVEL_NODE:
6916 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6917 if instance.disk_template in constants.DTS_EXT_MIRROR:
6918 if self.op.target_node is None:
6919 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6921 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6922 self.op.target_node]
6923 del self.recalculate_locks[locking.LEVEL_NODE]
6925 self._LockInstancesNodes()
6927 def BuildHooksEnv(self):
6930 This runs on master, primary and secondary nodes of the instance.
6933 instance = self._migrater.instance
6934 source_node = instance.primary_node
6935 target_node = self.op.target_node
6937 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6938 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6939 "OLD_PRIMARY": source_node,
6940 "NEW_PRIMARY": target_node,
6943 if instance.disk_template in constants.DTS_INT_MIRROR:
6944 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6945 env["NEW_SECONDARY"] = source_node
6947 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6949 env.update(_BuildInstanceHookEnvByObject(self, instance))
6953 def BuildHooksNodes(self):
6954 """Build hooks nodes.
6957 instance = self._migrater.instance
6958 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6959 return (nl, nl + [instance.primary_node])
6962 class LUInstanceMigrate(LogicalUnit):
6963 """Migrate an instance.
6965 This is migration without shutting down, compared to the failover,
6966 which is done with shutdown.
6969 HPATH = "instance-migrate"
6970 HTYPE = constants.HTYPE_INSTANCE
6973 def ExpandNames(self):
6974 self._ExpandAndLockInstance()
6976 if self.op.target_node is not None:
6977 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6979 self.needed_locks[locking.LEVEL_NODE] = []
6980 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6982 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6983 cleanup=self.op.cleanup,
6985 fallback=self.op.allow_failover)
6986 self.tasklets = [self._migrater]
6988 def DeclareLocks(self, level):
6989 if level == locking.LEVEL_NODE:
6990 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6991 if instance.disk_template in constants.DTS_EXT_MIRROR:
6992 if self.op.target_node is None:
6993 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6995 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6996 self.op.target_node]
6997 del self.recalculate_locks[locking.LEVEL_NODE]
6999 self._LockInstancesNodes()
7001 def BuildHooksEnv(self):
7004 This runs on master, primary and secondary nodes of the instance.
7007 instance = self._migrater.instance
7008 source_node = instance.primary_node
7009 target_node = self.op.target_node
7010 env = _BuildInstanceHookEnvByObject(self, instance)
7012 "MIGRATE_LIVE": self._migrater.live,
7013 "MIGRATE_CLEANUP": self.op.cleanup,
7014 "OLD_PRIMARY": source_node,
7015 "NEW_PRIMARY": target_node,
7018 if instance.disk_template in constants.DTS_INT_MIRROR:
7019 env["OLD_SECONDARY"] = target_node
7020 env["NEW_SECONDARY"] = source_node
7022 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7026 def BuildHooksNodes(self):
7027 """Build hooks nodes.
7030 instance = self._migrater.instance
7031 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7032 return (nl, nl + [instance.primary_node])
7035 class LUInstanceMove(LogicalUnit):
7036 """Move an instance by data-copying.
7039 HPATH = "instance-move"
7040 HTYPE = constants.HTYPE_INSTANCE
7043 def ExpandNames(self):
7044 self._ExpandAndLockInstance()
7045 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7046 self.op.target_node = target_node
7047 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7048 self.needed_locks[locking.LEVEL_NODE_RES] = []
7049 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7051 def DeclareLocks(self, level):
7052 if level == locking.LEVEL_NODE:
7053 self._LockInstancesNodes(primary_only=True)
7054 elif level == locking.LEVEL_NODE_RES:
7056 self.needed_locks[locking.LEVEL_NODE_RES] = \
7057 self.needed_locks[locking.LEVEL_NODE][:]
7059 def BuildHooksEnv(self):
7062 This runs on master, primary and secondary nodes of the instance.
7066 "TARGET_NODE": self.op.target_node,
7067 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7069 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7072 def BuildHooksNodes(self):
7073 """Build hooks nodes.
7077 self.cfg.GetMasterNode(),
7078 self.instance.primary_node,
7079 self.op.target_node,
7083 def CheckPrereq(self):
7084 """Check prerequisites.
7086 This checks that the instance is in the cluster.
7089 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7090 assert self.instance is not None, \
7091 "Cannot retrieve locked instance %s" % self.op.instance_name
7093 node = self.cfg.GetNodeInfo(self.op.target_node)
7094 assert node is not None, \
7095 "Cannot retrieve locked node %s" % self.op.target_node
7097 self.target_node = target_node = node.name
7099 if target_node == instance.primary_node:
7100 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7101 (instance.name, target_node),
7104 bep = self.cfg.GetClusterInfo().FillBE(instance)
7106 for idx, dsk in enumerate(instance.disks):
7107 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7108 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7109 " cannot copy" % idx, errors.ECODE_STATE)
7111 _CheckNodeOnline(self, target_node)
7112 _CheckNodeNotDrained(self, target_node)
7113 _CheckNodeVmCapable(self, target_node)
7115 if instance.admin_state == constants.ADMINST_UP:
7116 # check memory requirements on the secondary node
7117 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7118 instance.name, bep[constants.BE_MAXMEM],
7119 instance.hypervisor)
7121 self.LogInfo("Not checking memory on the secondary node as"
7122 " instance will not be started")
7124 # check bridge existance
7125 _CheckInstanceBridgesExist(self, instance, node=target_node)
7127 def Exec(self, feedback_fn):
7128 """Move an instance.
7130 The move is done by shutting it down on its present node, copying
7131 the data over (slow) and starting it on the new node.
7134 instance = self.instance
7136 source_node = instance.primary_node
7137 target_node = self.target_node
7139 self.LogInfo("Shutting down instance %s on source node %s",
7140 instance.name, source_node)
7142 assert (self.owned_locks(locking.LEVEL_NODE) ==
7143 self.owned_locks(locking.LEVEL_NODE_RES))
7145 result = self.rpc.call_instance_shutdown(source_node, instance,
7146 self.op.shutdown_timeout)
7147 msg = result.fail_msg
7149 if self.op.ignore_consistency:
7150 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7151 " Proceeding anyway. Please make sure node"
7152 " %s is down. Error details: %s",
7153 instance.name, source_node, source_node, msg)
7155 raise errors.OpExecError("Could not shutdown instance %s on"
7157 (instance.name, source_node, msg))
7159 # create the target disks
7161 _CreateDisks(self, instance, target_node=target_node)
7162 except errors.OpExecError:
7163 self.LogWarning("Device creation failed, reverting...")
7165 _RemoveDisks(self, instance, target_node=target_node)
7167 self.cfg.ReleaseDRBDMinors(instance.name)
7170 cluster_name = self.cfg.GetClusterInfo().cluster_name
7173 # activate, get path, copy the data over
7174 for idx, disk in enumerate(instance.disks):
7175 self.LogInfo("Copying data for disk %d", idx)
7176 result = self.rpc.call_blockdev_assemble(target_node, disk,
7177 instance.name, True, idx)
7179 self.LogWarning("Can't assemble newly created disk %d: %s",
7180 idx, result.fail_msg)
7181 errs.append(result.fail_msg)
7183 dev_path = result.payload
7184 result = self.rpc.call_blockdev_export(source_node, disk,
7185 target_node, dev_path,
7188 self.LogWarning("Can't copy data over for disk %d: %s",
7189 idx, result.fail_msg)
7190 errs.append(result.fail_msg)
7194 self.LogWarning("Some disks failed to copy, aborting")
7196 _RemoveDisks(self, instance, target_node=target_node)
7198 self.cfg.ReleaseDRBDMinors(instance.name)
7199 raise errors.OpExecError("Errors during disk copy: %s" %
7202 instance.primary_node = target_node
7203 self.cfg.Update(instance, feedback_fn)
7205 self.LogInfo("Removing the disks on the original node")
7206 _RemoveDisks(self, instance, target_node=source_node)
7208 # Only start the instance if it's marked as up
7209 if instance.admin_state == constants.ADMINST_UP:
7210 self.LogInfo("Starting instance %s on node %s",
7211 instance.name, target_node)
7213 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7214 ignore_secondaries=True)
7216 _ShutdownInstanceDisks(self, instance)
7217 raise errors.OpExecError("Can't activate the instance's disks")
7219 result = self.rpc.call_instance_start(target_node,
7220 (instance, None, None), False)
7221 msg = result.fail_msg
7223 _ShutdownInstanceDisks(self, instance)
7224 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7225 (instance.name, target_node, msg))
7228 class LUNodeMigrate(LogicalUnit):
7229 """Migrate all instances from a node.
7232 HPATH = "node-migrate"
7233 HTYPE = constants.HTYPE_NODE
7236 def CheckArguments(self):
7239 def ExpandNames(self):
7240 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7242 self.share_locks = _ShareAll()
7243 self.needed_locks = {
7244 locking.LEVEL_NODE: [self.op.node_name],
7247 def BuildHooksEnv(self):
7250 This runs on the master, the primary and all the secondaries.
7254 "NODE_NAME": self.op.node_name,
7257 def BuildHooksNodes(self):
7258 """Build hooks nodes.
7261 nl = [self.cfg.GetMasterNode()]
7264 def CheckPrereq(self):
7267 def Exec(self, feedback_fn):
7268 # Prepare jobs for migration instances
7270 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7273 iallocator=self.op.iallocator,
7274 target_node=self.op.target_node)]
7275 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7278 # TODO: Run iallocator in this opcode and pass correct placement options to
7279 # OpInstanceMigrate. Since other jobs can modify the cluster between
7280 # running the iallocator and the actual migration, a good consistency model
7281 # will have to be found.
7283 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7284 frozenset([self.op.node_name]))
7286 return ResultWithJobs(jobs)
7289 class TLMigrateInstance(Tasklet):
7290 """Tasklet class for instance migration.
7293 @ivar live: whether the migration will be done live or non-live;
7294 this variable is initalized only after CheckPrereq has run
7295 @type cleanup: boolean
7296 @ivar cleanup: Wheater we cleanup from a failed migration
7297 @type iallocator: string
7298 @ivar iallocator: The iallocator used to determine target_node
7299 @type target_node: string
7300 @ivar target_node: If given, the target_node to reallocate the instance to
7301 @type failover: boolean
7302 @ivar failover: Whether operation results in failover or migration
7303 @type fallback: boolean
7304 @ivar fallback: Whether fallback to failover is allowed if migration not
7306 @type ignore_consistency: boolean
7307 @ivar ignore_consistency: Wheter we should ignore consistency between source
7309 @type shutdown_timeout: int
7310 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7315 _MIGRATION_POLL_INTERVAL = 1 # seconds
7316 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7318 def __init__(self, lu, instance_name, cleanup=False,
7319 failover=False, fallback=False,
7320 ignore_consistency=False,
7321 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7322 """Initializes this class.
7325 Tasklet.__init__(self, lu)
7328 self.instance_name = instance_name
7329 self.cleanup = cleanup
7330 self.live = False # will be overridden later
7331 self.failover = failover
7332 self.fallback = fallback
7333 self.ignore_consistency = ignore_consistency
7334 self.shutdown_timeout = shutdown_timeout
7336 def CheckPrereq(self):
7337 """Check prerequisites.
7339 This checks that the instance is in the cluster.
7342 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7343 instance = self.cfg.GetInstanceInfo(instance_name)
7344 assert instance is not None
7345 self.instance = instance
7347 if (not self.cleanup and
7348 not instance.admin_state == constants.ADMINST_UP and
7349 not self.failover and self.fallback):
7350 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7351 " switching to failover")
7352 self.failover = True
7354 if instance.disk_template not in constants.DTS_MIRRORED:
7359 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7360 " %s" % (instance.disk_template, text),
7363 if instance.disk_template in constants.DTS_EXT_MIRROR:
7364 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7366 if self.lu.op.iallocator:
7367 self._RunAllocator()
7369 # We set set self.target_node as it is required by
7371 self.target_node = self.lu.op.target_node
7373 # self.target_node is already populated, either directly or by the
7375 target_node = self.target_node
7376 if self.target_node == instance.primary_node:
7377 raise errors.OpPrereqError("Cannot migrate instance %s"
7378 " to its primary (%s)" %
7379 (instance.name, instance.primary_node))
7381 if len(self.lu.tasklets) == 1:
7382 # It is safe to release locks only when we're the only tasklet
7384 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7385 keep=[instance.primary_node, self.target_node])
7388 secondary_nodes = instance.secondary_nodes
7389 if not secondary_nodes:
7390 raise errors.ConfigurationError("No secondary node but using"
7391 " %s disk template" %
7392 instance.disk_template)
7393 target_node = secondary_nodes[0]
7394 if self.lu.op.iallocator or (self.lu.op.target_node and
7395 self.lu.op.target_node != target_node):
7397 text = "failed over"
7400 raise errors.OpPrereqError("Instances with disk template %s cannot"
7401 " be %s to arbitrary nodes"
7402 " (neither an iallocator nor a target"
7403 " node can be passed)" %
7404 (instance.disk_template, text),
7407 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7409 # check memory requirements on the secondary node
7410 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7411 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7412 instance.name, i_be[constants.BE_MAXMEM],
7413 instance.hypervisor)
7415 self.lu.LogInfo("Not checking memory on the secondary node as"
7416 " instance will not be started")
7418 # check bridge existance
7419 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7421 if not self.cleanup:
7422 _CheckNodeNotDrained(self.lu, target_node)
7423 if not self.failover:
7424 result = self.rpc.call_instance_migratable(instance.primary_node,
7426 if result.fail_msg and self.fallback:
7427 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7429 self.failover = True
7431 result.Raise("Can't migrate, please use failover",
7432 prereq=True, ecode=errors.ECODE_STATE)
7434 assert not (self.failover and self.cleanup)
7436 if not self.failover:
7437 if self.lu.op.live is not None and self.lu.op.mode is not None:
7438 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7439 " parameters are accepted",
7441 if self.lu.op.live is not None:
7443 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7445 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7446 # reset the 'live' parameter to None so that repeated
7447 # invocations of CheckPrereq do not raise an exception
7448 self.lu.op.live = None
7449 elif self.lu.op.mode is None:
7450 # read the default value from the hypervisor
7451 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7453 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7455 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7457 # Failover is never live
7460 def _RunAllocator(self):
7461 """Run the allocator based on input opcode.
7464 ial = IAllocator(self.cfg, self.rpc,
7465 mode=constants.IALLOCATOR_MODE_RELOC,
7466 name=self.instance_name,
7467 # TODO See why hail breaks with a single node below
7468 relocate_from=[self.instance.primary_node,
7469 self.instance.primary_node],
7472 ial.Run(self.lu.op.iallocator)
7475 raise errors.OpPrereqError("Can't compute nodes using"
7476 " iallocator '%s': %s" %
7477 (self.lu.op.iallocator, ial.info),
7479 if len(ial.result) != ial.required_nodes:
7480 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7481 " of nodes (%s), required %s" %
7482 (self.lu.op.iallocator, len(ial.result),
7483 ial.required_nodes), errors.ECODE_FAULT)
7484 self.target_node = ial.result[0]
7485 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7486 self.instance_name, self.lu.op.iallocator,
7487 utils.CommaJoin(ial.result))
7489 def _WaitUntilSync(self):
7490 """Poll with custom rpc for disk sync.
7492 This uses our own step-based rpc call.
7495 self.feedback_fn("* wait until resync is done")
7499 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7501 self.instance.disks)
7503 for node, nres in result.items():
7504 nres.Raise("Cannot resync disks on node %s" % node)
7505 node_done, node_percent = nres.payload
7506 all_done = all_done and node_done
7507 if node_percent is not None:
7508 min_percent = min(min_percent, node_percent)
7510 if min_percent < 100:
7511 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7514 def _EnsureSecondary(self, node):
7515 """Demote a node to secondary.
7518 self.feedback_fn("* switching node %s to secondary mode" % node)
7520 for dev in self.instance.disks:
7521 self.cfg.SetDiskID(dev, node)
7523 result = self.rpc.call_blockdev_close(node, self.instance.name,
7524 self.instance.disks)
7525 result.Raise("Cannot change disk to secondary on node %s" % node)
7527 def _GoStandalone(self):
7528 """Disconnect from the network.
7531 self.feedback_fn("* changing into standalone mode")
7532 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7533 self.instance.disks)
7534 for node, nres in result.items():
7535 nres.Raise("Cannot disconnect disks node %s" % node)
7537 def _GoReconnect(self, multimaster):
7538 """Reconnect to the network.
7544 msg = "single-master"
7545 self.feedback_fn("* changing disks into %s mode" % msg)
7546 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7547 self.instance.disks,
7548 self.instance.name, multimaster)
7549 for node, nres in result.items():
7550 nres.Raise("Cannot change disks config on node %s" % node)
7552 def _ExecCleanup(self):
7553 """Try to cleanup after a failed migration.
7555 The cleanup is done by:
7556 - check that the instance is running only on one node
7557 (and update the config if needed)
7558 - change disks on its secondary node to secondary
7559 - wait until disks are fully synchronized
7560 - disconnect from the network
7561 - change disks into single-master mode
7562 - wait again until disks are fully synchronized
7565 instance = self.instance
7566 target_node = self.target_node
7567 source_node = self.source_node
7569 # check running on only one node
7570 self.feedback_fn("* checking where the instance actually runs"
7571 " (if this hangs, the hypervisor might be in"
7573 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7574 for node, result in ins_l.items():
7575 result.Raise("Can't contact node %s" % node)
7577 runningon_source = instance.name in ins_l[source_node].payload
7578 runningon_target = instance.name in ins_l[target_node].payload
7580 if runningon_source and runningon_target:
7581 raise errors.OpExecError("Instance seems to be running on two nodes,"
7582 " or the hypervisor is confused; you will have"
7583 " to ensure manually that it runs only on one"
7584 " and restart this operation")
7586 if not (runningon_source or runningon_target):
7587 raise errors.OpExecError("Instance does not seem to be running at all;"
7588 " in this case it's safer to repair by"
7589 " running 'gnt-instance stop' to ensure disk"
7590 " shutdown, and then restarting it")
7592 if runningon_target:
7593 # the migration has actually succeeded, we need to update the config
7594 self.feedback_fn("* instance running on secondary node (%s),"
7595 " updating config" % target_node)
7596 instance.primary_node = target_node
7597 self.cfg.Update(instance, self.feedback_fn)
7598 demoted_node = source_node
7600 self.feedback_fn("* instance confirmed to be running on its"
7601 " primary node (%s)" % source_node)
7602 demoted_node = target_node
7604 if instance.disk_template in constants.DTS_INT_MIRROR:
7605 self._EnsureSecondary(demoted_node)
7607 self._WaitUntilSync()
7608 except errors.OpExecError:
7609 # we ignore here errors, since if the device is standalone, it
7610 # won't be able to sync
7612 self._GoStandalone()
7613 self._GoReconnect(False)
7614 self._WaitUntilSync()
7616 self.feedback_fn("* done")
7618 def _RevertDiskStatus(self):
7619 """Try to revert the disk status after a failed migration.
7622 target_node = self.target_node
7623 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7627 self._EnsureSecondary(target_node)
7628 self._GoStandalone()
7629 self._GoReconnect(False)
7630 self._WaitUntilSync()
7631 except errors.OpExecError, err:
7632 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7633 " please try to recover the instance manually;"
7634 " error '%s'" % str(err))
7636 def _AbortMigration(self):
7637 """Call the hypervisor code to abort a started migration.
7640 instance = self.instance
7641 target_node = self.target_node
7642 source_node = self.source_node
7643 migration_info = self.migration_info
7645 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7649 abort_msg = abort_result.fail_msg
7651 logging.error("Aborting migration failed on target node %s: %s",
7652 target_node, abort_msg)
7653 # Don't raise an exception here, as we stil have to try to revert the
7654 # disk status, even if this step failed.
7656 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7657 instance, False, self.live)
7658 abort_msg = abort_result.fail_msg
7660 logging.error("Aborting migration failed on source node %s: %s",
7661 source_node, abort_msg)
7663 def _ExecMigration(self):
7664 """Migrate an instance.
7666 The migrate is done by:
7667 - change the disks into dual-master mode
7668 - wait until disks are fully synchronized again
7669 - migrate the instance
7670 - change disks on the new secondary node (the old primary) to secondary
7671 - wait until disks are fully synchronized
7672 - change disks into single-master mode
7675 instance = self.instance
7676 target_node = self.target_node
7677 source_node = self.source_node
7679 # Check for hypervisor version mismatch and warn the user.
7680 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7681 None, self.instance.hypervisor)
7682 src_info = nodeinfo[source_node]
7683 dst_info = nodeinfo[target_node]
7685 if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7686 (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7687 src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7688 dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7689 if src_version != dst_version:
7690 self.feedback_fn("* warning: hypervisor version mismatch between"
7691 " source (%s) and target (%s) node" %
7692 (src_version, dst_version))
7694 self.feedback_fn("* checking disk consistency between source and target")
7695 for dev in instance.disks:
7696 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7697 raise errors.OpExecError("Disk %s is degraded or not fully"
7698 " synchronized on target node,"
7699 " aborting migration" % dev.iv_name)
7701 # First get the migration information from the remote node
7702 result = self.rpc.call_migration_info(source_node, instance)
7703 msg = result.fail_msg
7705 log_err = ("Failed fetching source migration information from %s: %s" %
7707 logging.error(log_err)
7708 raise errors.OpExecError(log_err)
7710 self.migration_info = migration_info = result.payload
7712 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7713 # Then switch the disks to master/master mode
7714 self._EnsureSecondary(target_node)
7715 self._GoStandalone()
7716 self._GoReconnect(True)
7717 self._WaitUntilSync()
7719 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7720 result = self.rpc.call_accept_instance(target_node,
7723 self.nodes_ip[target_node])
7725 msg = result.fail_msg
7727 logging.error("Instance pre-migration failed, trying to revert"
7728 " disk status: %s", msg)
7729 self.feedback_fn("Pre-migration failed, aborting")
7730 self._AbortMigration()
7731 self._RevertDiskStatus()
7732 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7733 (instance.name, msg))
7735 self.feedback_fn("* migrating instance to %s" % target_node)
7736 result = self.rpc.call_instance_migrate(source_node, instance,
7737 self.nodes_ip[target_node],
7739 msg = result.fail_msg
7741 logging.error("Instance migration failed, trying to revert"
7742 " disk status: %s", msg)
7743 self.feedback_fn("Migration failed, aborting")
7744 self._AbortMigration()
7745 self._RevertDiskStatus()
7746 raise errors.OpExecError("Could not migrate instance %s: %s" %
7747 (instance.name, msg))
7749 self.feedback_fn("* starting memory transfer")
7750 last_feedback = time.time()
7752 result = self.rpc.call_instance_get_migration_status(source_node,
7754 msg = result.fail_msg
7755 ms = result.payload # MigrationStatus instance
7756 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7757 logging.error("Instance migration failed, trying to revert"
7758 " disk status: %s", msg)
7759 self.feedback_fn("Migration failed, aborting")
7760 self._AbortMigration()
7761 self._RevertDiskStatus()
7762 raise errors.OpExecError("Could not migrate instance %s: %s" %
7763 (instance.name, msg))
7765 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7766 self.feedback_fn("* memory transfer complete")
7769 if (utils.TimeoutExpired(last_feedback,
7770 self._MIGRATION_FEEDBACK_INTERVAL) and
7771 ms.transferred_ram is not None):
7772 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7773 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7774 last_feedback = time.time()
7776 time.sleep(self._MIGRATION_POLL_INTERVAL)
7778 result = self.rpc.call_instance_finalize_migration_src(source_node,
7782 msg = result.fail_msg
7784 logging.error("Instance migration succeeded, but finalization failed"
7785 " on the source node: %s", msg)
7786 raise errors.OpExecError("Could not finalize instance migration: %s" %
7789 instance.primary_node = target_node
7791 # distribute new instance config to the other nodes
7792 self.cfg.Update(instance, self.feedback_fn)
7794 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7798 msg = result.fail_msg
7800 logging.error("Instance migration succeeded, but finalization failed"
7801 " on the target node: %s", msg)
7802 raise errors.OpExecError("Could not finalize instance migration: %s" %
7805 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7806 self._EnsureSecondary(source_node)
7807 self._WaitUntilSync()
7808 self._GoStandalone()
7809 self._GoReconnect(False)
7810 self._WaitUntilSync()
7812 self.feedback_fn("* done")
7814 def _ExecFailover(self):
7815 """Failover an instance.
7817 The failover is done by shutting it down on its present node and
7818 starting it on the secondary.
7821 instance = self.instance
7822 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7824 source_node = instance.primary_node
7825 target_node = self.target_node
7827 if instance.admin_state == constants.ADMINST_UP:
7828 self.feedback_fn("* checking disk consistency between source and target")
7829 for dev in instance.disks:
7830 # for drbd, these are drbd over lvm
7831 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7832 if primary_node.offline:
7833 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7835 (primary_node.name, dev.iv_name, target_node))
7836 elif not self.ignore_consistency:
7837 raise errors.OpExecError("Disk %s is degraded on target node,"
7838 " aborting failover" % dev.iv_name)
7840 self.feedback_fn("* not checking disk consistency as instance is not"
7843 self.feedback_fn("* shutting down instance on source node")
7844 logging.info("Shutting down instance %s on node %s",
7845 instance.name, source_node)
7847 result = self.rpc.call_instance_shutdown(source_node, instance,
7848 self.shutdown_timeout)
7849 msg = result.fail_msg
7851 if self.ignore_consistency or primary_node.offline:
7852 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7853 " proceeding anyway; please make sure node"
7854 " %s is down; error details: %s",
7855 instance.name, source_node, source_node, msg)
7857 raise errors.OpExecError("Could not shutdown instance %s on"
7859 (instance.name, source_node, msg))
7861 self.feedback_fn("* deactivating the instance's disks on source node")
7862 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7863 raise errors.OpExecError("Can't shut down the instance's disks")
7865 instance.primary_node = target_node
7866 # distribute new instance config to the other nodes
7867 self.cfg.Update(instance, self.feedback_fn)
7869 # Only start the instance if it's marked as up
7870 if instance.admin_state == constants.ADMINST_UP:
7871 self.feedback_fn("* activating the instance's disks on target node %s" %
7873 logging.info("Starting instance %s on node %s",
7874 instance.name, target_node)
7876 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7877 ignore_secondaries=True)
7879 _ShutdownInstanceDisks(self.lu, instance)
7880 raise errors.OpExecError("Can't activate the instance's disks")
7882 self.feedback_fn("* starting the instance on the target node %s" %
7884 result = self.rpc.call_instance_start(target_node, (instance, None, None),
7886 msg = result.fail_msg
7888 _ShutdownInstanceDisks(self.lu, instance)
7889 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7890 (instance.name, target_node, msg))
7892 def Exec(self, feedback_fn):
7893 """Perform the migration.
7896 self.feedback_fn = feedback_fn
7897 self.source_node = self.instance.primary_node
7899 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7900 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7901 self.target_node = self.instance.secondary_nodes[0]
7902 # Otherwise self.target_node has been populated either
7903 # directly, or through an iallocator.
7905 self.all_nodes = [self.source_node, self.target_node]
7906 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7907 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7910 feedback_fn("Failover instance %s" % self.instance.name)
7911 self._ExecFailover()
7913 feedback_fn("Migrating instance %s" % self.instance.name)
7916 return self._ExecCleanup()
7918 return self._ExecMigration()
7921 def _CreateBlockDev(lu, node, instance, device, force_create,
7923 """Create a tree of block devices on a given node.
7925 If this device type has to be created on secondaries, create it and
7928 If not, just recurse to children keeping the same 'force' value.
7930 @param lu: the lu on whose behalf we execute
7931 @param node: the node on which to create the device
7932 @type instance: L{objects.Instance}
7933 @param instance: the instance which owns the device
7934 @type device: L{objects.Disk}
7935 @param device: the device to create
7936 @type force_create: boolean
7937 @param force_create: whether to force creation of this device; this
7938 will be change to True whenever we find a device which has
7939 CreateOnSecondary() attribute
7940 @param info: the extra 'metadata' we should attach to the device
7941 (this will be represented as a LVM tag)
7942 @type force_open: boolean
7943 @param force_open: this parameter will be passes to the
7944 L{backend.BlockdevCreate} function where it specifies
7945 whether we run on primary or not, and it affects both
7946 the child assembly and the device own Open() execution
7949 if device.CreateOnSecondary():
7953 for child in device.children:
7954 _CreateBlockDev(lu, node, instance, child, force_create,
7957 if not force_create:
7960 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7963 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7964 """Create a single block device on a given node.
7966 This will not recurse over children of the device, so they must be
7969 @param lu: the lu on whose behalf we execute
7970 @param node: the node on which to create the device
7971 @type instance: L{objects.Instance}
7972 @param instance: the instance which owns the device
7973 @type device: L{objects.Disk}
7974 @param device: the device to create
7975 @param info: the extra 'metadata' we should attach to the device
7976 (this will be represented as a LVM tag)
7977 @type force_open: boolean
7978 @param force_open: this parameter will be passes to the
7979 L{backend.BlockdevCreate} function where it specifies
7980 whether we run on primary or not, and it affects both
7981 the child assembly and the device own Open() execution
7984 lu.cfg.SetDiskID(device, node)
7985 result = lu.rpc.call_blockdev_create(node, device, device.size,
7986 instance.name, force_open, info)
7987 result.Raise("Can't create block device %s on"
7988 " node %s for instance %s" % (device, node, instance.name))
7989 if device.physical_id is None:
7990 device.physical_id = result.payload
7993 def _GenerateUniqueNames(lu, exts):
7994 """Generate a suitable LV name.
7996 This will generate a logical volume name for the given instance.
8001 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8002 results.append("%s%s" % (new_id, val))
8006 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8007 iv_name, p_minor, s_minor):
8008 """Generate a drbd8 device complete with its children.
8011 assert len(vgnames) == len(names) == 2
8012 port = lu.cfg.AllocatePort()
8013 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8014 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8015 logical_id=(vgnames[0], names[0]))
8016 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8017 logical_id=(vgnames[1], names[1]))
8018 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8019 logical_id=(primary, secondary, port,
8022 children=[dev_data, dev_meta],
8027 def _GenerateDiskTemplate(lu, template_name,
8028 instance_name, primary_node,
8029 secondary_nodes, disk_info,
8030 file_storage_dir, file_driver,
8031 base_index, feedback_fn):
8032 """Generate the entire disk layout for a given template type.
8035 #TODO: compute space requirements
8037 vgname = lu.cfg.GetVGName()
8038 disk_count = len(disk_info)
8040 if template_name == constants.DT_DISKLESS:
8042 elif template_name == constants.DT_PLAIN:
8043 if len(secondary_nodes) != 0:
8044 raise errors.ProgrammerError("Wrong template configuration")
8046 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8047 for i in range(disk_count)])
8048 for idx, disk in enumerate(disk_info):
8049 disk_index = idx + base_index
8050 vg = disk.get(constants.IDISK_VG, vgname)
8051 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8052 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8053 size=disk[constants.IDISK_SIZE],
8054 logical_id=(vg, names[idx]),
8055 iv_name="disk/%d" % disk_index,
8056 mode=disk[constants.IDISK_MODE])
8057 disks.append(disk_dev)
8058 elif template_name == constants.DT_DRBD8:
8059 if len(secondary_nodes) != 1:
8060 raise errors.ProgrammerError("Wrong template configuration")
8061 remote_node = secondary_nodes[0]
8062 minors = lu.cfg.AllocateDRBDMinor(
8063 [primary_node, remote_node] * len(disk_info), instance_name)
8066 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8067 for i in range(disk_count)]):
8068 names.append(lv_prefix + "_data")
8069 names.append(lv_prefix + "_meta")
8070 for idx, disk in enumerate(disk_info):
8071 disk_index = idx + base_index
8072 data_vg = disk.get(constants.IDISK_VG, vgname)
8073 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8074 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8075 disk[constants.IDISK_SIZE],
8077 names[idx * 2:idx * 2 + 2],
8078 "disk/%d" % disk_index,
8079 minors[idx * 2], minors[idx * 2 + 1])
8080 disk_dev.mode = disk[constants.IDISK_MODE]
8081 disks.append(disk_dev)
8082 elif template_name == constants.DT_FILE:
8083 if len(secondary_nodes) != 0:
8084 raise errors.ProgrammerError("Wrong template configuration")
8086 opcodes.RequireFileStorage()
8088 for idx, disk in enumerate(disk_info):
8089 disk_index = idx + base_index
8090 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8091 size=disk[constants.IDISK_SIZE],
8092 iv_name="disk/%d" % disk_index,
8093 logical_id=(file_driver,
8094 "%s/disk%d" % (file_storage_dir,
8096 mode=disk[constants.IDISK_MODE])
8097 disks.append(disk_dev)
8098 elif template_name == constants.DT_SHARED_FILE:
8099 if len(secondary_nodes) != 0:
8100 raise errors.ProgrammerError("Wrong template configuration")
8102 opcodes.RequireSharedFileStorage()
8104 for idx, disk in enumerate(disk_info):
8105 disk_index = idx + base_index
8106 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8107 size=disk[constants.IDISK_SIZE],
8108 iv_name="disk/%d" % disk_index,
8109 logical_id=(file_driver,
8110 "%s/disk%d" % (file_storage_dir,
8112 mode=disk[constants.IDISK_MODE])
8113 disks.append(disk_dev)
8114 elif template_name == constants.DT_BLOCK:
8115 if len(secondary_nodes) != 0:
8116 raise errors.ProgrammerError("Wrong template configuration")
8118 for idx, disk in enumerate(disk_info):
8119 disk_index = idx + base_index
8120 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8121 size=disk[constants.IDISK_SIZE],
8122 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8123 disk[constants.IDISK_ADOPT]),
8124 iv_name="disk/%d" % disk_index,
8125 mode=disk[constants.IDISK_MODE])
8126 disks.append(disk_dev)
8129 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8133 def _GetInstanceInfoText(instance):
8134 """Compute that text that should be added to the disk's metadata.
8137 return "originstname+%s" % instance.name
8140 def _CalcEta(time_taken, written, total_size):
8141 """Calculates the ETA based on size written and total size.
8143 @param time_taken: The time taken so far
8144 @param written: amount written so far
8145 @param total_size: The total size of data to be written
8146 @return: The remaining time in seconds
8149 avg_time = time_taken / float(written)
8150 return (total_size - written) * avg_time
8153 def _WipeDisks(lu, instance):
8154 """Wipes instance disks.
8156 @type lu: L{LogicalUnit}
8157 @param lu: the logical unit on whose behalf we execute
8158 @type instance: L{objects.Instance}
8159 @param instance: the instance whose disks we should create
8160 @return: the success of the wipe
8163 node = instance.primary_node
8165 for device in instance.disks:
8166 lu.cfg.SetDiskID(device, node)
8168 logging.info("Pause sync of instance %s disks", instance.name)
8169 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8171 for idx, success in enumerate(result.payload):
8173 logging.warn("pause-sync of instance %s for disks %d failed",
8177 for idx, device in enumerate(instance.disks):
8178 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8179 # MAX_WIPE_CHUNK at max
8180 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8181 constants.MIN_WIPE_CHUNK_PERCENT)
8182 # we _must_ make this an int, otherwise rounding errors will
8184 wipe_chunk_size = int(wipe_chunk_size)
8186 lu.LogInfo("* Wiping disk %d", idx)
8187 logging.info("Wiping disk %d for instance %s, node %s using"
8188 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8193 start_time = time.time()
8195 while offset < size:
8196 wipe_size = min(wipe_chunk_size, size - offset)
8197 logging.debug("Wiping disk %d, offset %s, chunk %s",
8198 idx, offset, wipe_size)
8199 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8200 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8201 (idx, offset, wipe_size))
8204 if now - last_output >= 60:
8205 eta = _CalcEta(now - start_time, offset, size)
8206 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8207 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8210 logging.info("Resume sync of instance %s disks", instance.name)
8212 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8214 for idx, success in enumerate(result.payload):
8216 lu.LogWarning("Resume sync of disk %d failed, please have a"
8217 " look at the status and troubleshoot the issue", idx)
8218 logging.warn("resume-sync of instance %s for disks %d failed",
8222 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8223 """Create all disks for an instance.
8225 This abstracts away some work from AddInstance.
8227 @type lu: L{LogicalUnit}
8228 @param lu: the logical unit on whose behalf we execute
8229 @type instance: L{objects.Instance}
8230 @param instance: the instance whose disks we should create
8232 @param to_skip: list of indices to skip
8233 @type target_node: string
8234 @param target_node: if passed, overrides the target node for creation
8236 @return: the success of the creation
8239 info = _GetInstanceInfoText(instance)
8240 if target_node is None:
8241 pnode = instance.primary_node
8242 all_nodes = instance.all_nodes
8247 if instance.disk_template in constants.DTS_FILEBASED:
8248 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8249 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8251 result.Raise("Failed to create directory '%s' on"
8252 " node %s" % (file_storage_dir, pnode))
8254 # Note: this needs to be kept in sync with adding of disks in
8255 # LUInstanceSetParams
8256 for idx, device in enumerate(instance.disks):
8257 if to_skip and idx in to_skip:
8259 logging.info("Creating volume %s for instance %s",
8260 device.iv_name, instance.name)
8262 for node in all_nodes:
8263 f_create = node == pnode
8264 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8267 def _RemoveDisks(lu, instance, target_node=None):
8268 """Remove all disks for an instance.
8270 This abstracts away some work from `AddInstance()` and
8271 `RemoveInstance()`. Note that in case some of the devices couldn't
8272 be removed, the removal will continue with the other ones (compare
8273 with `_CreateDisks()`).
8275 @type lu: L{LogicalUnit}
8276 @param lu: the logical unit on whose behalf we execute
8277 @type instance: L{objects.Instance}
8278 @param instance: the instance whose disks we should remove
8279 @type target_node: string
8280 @param target_node: used to override the node on which to remove the disks
8282 @return: the success of the removal
8285 logging.info("Removing block devices for instance %s", instance.name)
8288 for device in instance.disks:
8290 edata = [(target_node, device)]
8292 edata = device.ComputeNodeTree(instance.primary_node)
8293 for node, disk in edata:
8294 lu.cfg.SetDiskID(disk, node)
8295 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8297 lu.LogWarning("Could not remove block device %s on node %s,"
8298 " continuing anyway: %s", device.iv_name, node, msg)
8301 # if this is a DRBD disk, return its port to the pool
8302 if device.dev_type in constants.LDS_DRBD:
8303 tcp_port = device.logical_id[2]
8304 lu.cfg.AddTcpUdpPort(tcp_port)
8306 if instance.disk_template == constants.DT_FILE:
8307 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8311 tgt = instance.primary_node
8312 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8314 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8315 file_storage_dir, instance.primary_node, result.fail_msg)
8321 def _ComputeDiskSizePerVG(disk_template, disks):
8322 """Compute disk size requirements in the volume group
8325 def _compute(disks, payload):
8326 """Universal algorithm.
8331 vgs[disk[constants.IDISK_VG]] = \
8332 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8336 # Required free disk space as a function of disk and swap space
8338 constants.DT_DISKLESS: {},
8339 constants.DT_PLAIN: _compute(disks, 0),
8340 # 128 MB are added for drbd metadata for each disk
8341 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8342 constants.DT_FILE: {},
8343 constants.DT_SHARED_FILE: {},
8346 if disk_template not in req_size_dict:
8347 raise errors.ProgrammerError("Disk template '%s' size requirement"
8348 " is unknown" % disk_template)
8350 return req_size_dict[disk_template]
8353 def _ComputeDiskSize(disk_template, disks):
8354 """Compute disk size requirements in the volume group
8357 # Required free disk space as a function of disk and swap space
8359 constants.DT_DISKLESS: None,
8360 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8361 # 128 MB are added for drbd metadata for each disk
8363 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8364 constants.DT_FILE: None,
8365 constants.DT_SHARED_FILE: 0,
8366 constants.DT_BLOCK: 0,
8369 if disk_template not in req_size_dict:
8370 raise errors.ProgrammerError("Disk template '%s' size requirement"
8371 " is unknown" % disk_template)
8373 return req_size_dict[disk_template]
8376 def _FilterVmNodes(lu, nodenames):
8377 """Filters out non-vm_capable nodes from a list.
8379 @type lu: L{LogicalUnit}
8380 @param lu: the logical unit for which we check
8381 @type nodenames: list
8382 @param nodenames: the list of nodes on which we should check
8384 @return: the list of vm-capable nodes
8387 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8388 return [name for name in nodenames if name not in vm_nodes]
8391 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8392 """Hypervisor parameter validation.
8394 This function abstract the hypervisor parameter validation to be
8395 used in both instance create and instance modify.
8397 @type lu: L{LogicalUnit}
8398 @param lu: the logical unit for which we check
8399 @type nodenames: list
8400 @param nodenames: the list of nodes on which we should check
8401 @type hvname: string
8402 @param hvname: the name of the hypervisor we should use
8403 @type hvparams: dict
8404 @param hvparams: the parameters which we need to check
8405 @raise errors.OpPrereqError: if the parameters are not valid
8408 nodenames = _FilterVmNodes(lu, nodenames)
8410 cluster = lu.cfg.GetClusterInfo()
8411 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8413 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8414 for node in nodenames:
8418 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8421 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8422 """OS parameters validation.
8424 @type lu: L{LogicalUnit}
8425 @param lu: the logical unit for which we check
8426 @type required: boolean
8427 @param required: whether the validation should fail if the OS is not
8429 @type nodenames: list
8430 @param nodenames: the list of nodes on which we should check
8431 @type osname: string
8432 @param osname: the name of the hypervisor we should use
8433 @type osparams: dict
8434 @param osparams: the parameters which we need to check
8435 @raise errors.OpPrereqError: if the parameters are not valid
8438 nodenames = _FilterVmNodes(lu, nodenames)
8439 result = lu.rpc.call_os_validate(nodenames, required, osname,
8440 [constants.OS_VALIDATE_PARAMETERS],
8442 for node, nres in result.items():
8443 # we don't check for offline cases since this should be run only
8444 # against the master node and/or an instance's nodes
8445 nres.Raise("OS Parameters validation failed on node %s" % node)
8446 if not nres.payload:
8447 lu.LogInfo("OS %s not found on node %s, validation skipped",
8451 class LUInstanceCreate(LogicalUnit):
8452 """Create an instance.
8455 HPATH = "instance-add"
8456 HTYPE = constants.HTYPE_INSTANCE
8459 def CheckArguments(self):
8463 # do not require name_check to ease forward/backward compatibility
8465 if self.op.no_install and self.op.start:
8466 self.LogInfo("No-installation mode selected, disabling startup")
8467 self.op.start = False
8468 # validate/normalize the instance name
8469 self.op.instance_name = \
8470 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8472 if self.op.ip_check and not self.op.name_check:
8473 # TODO: make the ip check more flexible and not depend on the name check
8474 raise errors.OpPrereqError("Cannot do IP address check without a name"
8475 " check", errors.ECODE_INVAL)
8477 # check nics' parameter names
8478 for nic in self.op.nics:
8479 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8481 # check disks. parameter names and consistent adopt/no-adopt strategy
8482 has_adopt = has_no_adopt = False
8483 for disk in self.op.disks:
8484 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8485 if constants.IDISK_ADOPT in disk:
8489 if has_adopt and has_no_adopt:
8490 raise errors.OpPrereqError("Either all disks are adopted or none is",
8493 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8494 raise errors.OpPrereqError("Disk adoption is not supported for the"
8495 " '%s' disk template" %
8496 self.op.disk_template,
8498 if self.op.iallocator is not None:
8499 raise errors.OpPrereqError("Disk adoption not allowed with an"
8500 " iallocator script", errors.ECODE_INVAL)
8501 if self.op.mode == constants.INSTANCE_IMPORT:
8502 raise errors.OpPrereqError("Disk adoption not allowed for"
8503 " instance import", errors.ECODE_INVAL)
8505 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8506 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8507 " but no 'adopt' parameter given" %
8508 self.op.disk_template,
8511 self.adopt_disks = has_adopt
8513 # instance name verification
8514 if self.op.name_check:
8515 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8516 self.op.instance_name = self.hostname1.name
8517 # used in CheckPrereq for ip ping check
8518 self.check_ip = self.hostname1.ip
8520 self.check_ip = None
8522 # file storage checks
8523 if (self.op.file_driver and
8524 not self.op.file_driver in constants.FILE_DRIVER):
8525 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8526 self.op.file_driver, errors.ECODE_INVAL)
8528 if self.op.disk_template == constants.DT_FILE:
8529 opcodes.RequireFileStorage()
8530 elif self.op.disk_template == constants.DT_SHARED_FILE:
8531 opcodes.RequireSharedFileStorage()
8533 ### Node/iallocator related checks
8534 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8536 if self.op.pnode is not None:
8537 if self.op.disk_template in constants.DTS_INT_MIRROR:
8538 if self.op.snode is None:
8539 raise errors.OpPrereqError("The networked disk templates need"
8540 " a mirror node", errors.ECODE_INVAL)
8542 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8544 self.op.snode = None
8546 self._cds = _GetClusterDomainSecret()
8548 if self.op.mode == constants.INSTANCE_IMPORT:
8549 # On import force_variant must be True, because if we forced it at
8550 # initial install, our only chance when importing it back is that it
8552 self.op.force_variant = True
8554 if self.op.no_install:
8555 self.LogInfo("No-installation mode has no effect during import")
8557 elif self.op.mode == constants.INSTANCE_CREATE:
8558 if self.op.os_type is None:
8559 raise errors.OpPrereqError("No guest OS specified",
8561 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8562 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8563 " installation" % self.op.os_type,
8565 if self.op.disk_template is None:
8566 raise errors.OpPrereqError("No disk template specified",
8569 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8570 # Check handshake to ensure both clusters have the same domain secret
8571 src_handshake = self.op.source_handshake
8572 if not src_handshake:
8573 raise errors.OpPrereqError("Missing source handshake",
8576 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8579 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8582 # Load and check source CA
8583 self.source_x509_ca_pem = self.op.source_x509_ca
8584 if not self.source_x509_ca_pem:
8585 raise errors.OpPrereqError("Missing source X509 CA",
8589 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8591 except OpenSSL.crypto.Error, err:
8592 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8593 (err, ), errors.ECODE_INVAL)
8595 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8596 if errcode is not None:
8597 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8600 self.source_x509_ca = cert
8602 src_instance_name = self.op.source_instance_name
8603 if not src_instance_name:
8604 raise errors.OpPrereqError("Missing source instance name",
8607 self.source_instance_name = \
8608 netutils.GetHostname(name=src_instance_name).name
8611 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8612 self.op.mode, errors.ECODE_INVAL)
8614 def ExpandNames(self):
8615 """ExpandNames for CreateInstance.
8617 Figure out the right locks for instance creation.
8620 self.needed_locks = {}
8622 instance_name = self.op.instance_name
8623 # this is just a preventive check, but someone might still add this
8624 # instance in the meantime, and creation will fail at lock-add time
8625 if instance_name in self.cfg.GetInstanceList():
8626 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8627 instance_name, errors.ECODE_EXISTS)
8629 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8631 if self.op.iallocator:
8632 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8633 # specifying a group on instance creation and then selecting nodes from
8635 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8636 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8638 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8639 nodelist = [self.op.pnode]
8640 if self.op.snode is not None:
8641 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8642 nodelist.append(self.op.snode)
8643 self.needed_locks[locking.LEVEL_NODE] = nodelist
8644 # Lock resources of instance's primary and secondary nodes (copy to
8645 # prevent accidential modification)
8646 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8648 # in case of import lock the source node too
8649 if self.op.mode == constants.INSTANCE_IMPORT:
8650 src_node = self.op.src_node
8651 src_path = self.op.src_path
8653 if src_path is None:
8654 self.op.src_path = src_path = self.op.instance_name
8656 if src_node is None:
8657 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8658 self.op.src_node = None
8659 if os.path.isabs(src_path):
8660 raise errors.OpPrereqError("Importing an instance from a path"
8661 " requires a source node option",
8664 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8665 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8666 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8667 if not os.path.isabs(src_path):
8668 self.op.src_path = src_path = \
8669 utils.PathJoin(constants.EXPORT_DIR, src_path)
8671 def _RunAllocator(self):
8672 """Run the allocator based on input opcode.
8675 nics = [n.ToDict() for n in self.nics]
8676 ial = IAllocator(self.cfg, self.rpc,
8677 mode=constants.IALLOCATOR_MODE_ALLOC,
8678 name=self.op.instance_name,
8679 disk_template=self.op.disk_template,
8682 vcpus=self.be_full[constants.BE_VCPUS],
8683 memory=self.be_full[constants.BE_MAXMEM],
8686 hypervisor=self.op.hypervisor,
8689 ial.Run(self.op.iallocator)
8692 raise errors.OpPrereqError("Can't compute nodes using"
8693 " iallocator '%s': %s" %
8694 (self.op.iallocator, ial.info),
8696 if len(ial.result) != ial.required_nodes:
8697 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8698 " of nodes (%s), required %s" %
8699 (self.op.iallocator, len(ial.result),
8700 ial.required_nodes), errors.ECODE_FAULT)
8701 self.op.pnode = ial.result[0]
8702 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8703 self.op.instance_name, self.op.iallocator,
8704 utils.CommaJoin(ial.result))
8705 if ial.required_nodes == 2:
8706 self.op.snode = ial.result[1]
8708 def BuildHooksEnv(self):
8711 This runs on master, primary and secondary nodes of the instance.
8715 "ADD_MODE": self.op.mode,
8717 if self.op.mode == constants.INSTANCE_IMPORT:
8718 env["SRC_NODE"] = self.op.src_node
8719 env["SRC_PATH"] = self.op.src_path
8720 env["SRC_IMAGES"] = self.src_images
8722 env.update(_BuildInstanceHookEnv(
8723 name=self.op.instance_name,
8724 primary_node=self.op.pnode,
8725 secondary_nodes=self.secondaries,
8726 status=self.op.start,
8727 os_type=self.op.os_type,
8728 minmem=self.be_full[constants.BE_MINMEM],
8729 maxmem=self.be_full[constants.BE_MAXMEM],
8730 vcpus=self.be_full[constants.BE_VCPUS],
8731 nics=_NICListToTuple(self, self.nics),
8732 disk_template=self.op.disk_template,
8733 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8734 for d in self.disks],
8737 hypervisor_name=self.op.hypervisor,
8743 def BuildHooksNodes(self):
8744 """Build hooks nodes.
8747 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8750 def _ReadExportInfo(self):
8751 """Reads the export information from disk.
8753 It will override the opcode source node and path with the actual
8754 information, if these two were not specified before.
8756 @return: the export information
8759 assert self.op.mode == constants.INSTANCE_IMPORT
8761 src_node = self.op.src_node
8762 src_path = self.op.src_path
8764 if src_node is None:
8765 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8766 exp_list = self.rpc.call_export_list(locked_nodes)
8768 for node in exp_list:
8769 if exp_list[node].fail_msg:
8771 if src_path in exp_list[node].payload:
8773 self.op.src_node = src_node = node
8774 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8778 raise errors.OpPrereqError("No export found for relative path %s" %
8779 src_path, errors.ECODE_INVAL)
8781 _CheckNodeOnline(self, src_node)
8782 result = self.rpc.call_export_info(src_node, src_path)
8783 result.Raise("No export or invalid export found in dir %s" % src_path)
8785 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8786 if not export_info.has_section(constants.INISECT_EXP):
8787 raise errors.ProgrammerError("Corrupted export config",
8788 errors.ECODE_ENVIRON)
8790 ei_version = export_info.get(constants.INISECT_EXP, "version")
8791 if (int(ei_version) != constants.EXPORT_VERSION):
8792 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8793 (ei_version, constants.EXPORT_VERSION),
8794 errors.ECODE_ENVIRON)
8797 def _ReadExportParams(self, einfo):
8798 """Use export parameters as defaults.
8800 In case the opcode doesn't specify (as in override) some instance
8801 parameters, then try to use them from the export information, if
8805 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8807 if self.op.disk_template is None:
8808 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8809 self.op.disk_template = einfo.get(constants.INISECT_INS,
8811 if self.op.disk_template not in constants.DISK_TEMPLATES:
8812 raise errors.OpPrereqError("Disk template specified in configuration"
8813 " file is not one of the allowed values:"
8814 " %s" % " ".join(constants.DISK_TEMPLATES))
8816 raise errors.OpPrereqError("No disk template specified and the export"
8817 " is missing the disk_template information",
8820 if not self.op.disks:
8822 # TODO: import the disk iv_name too
8823 for idx in range(constants.MAX_DISKS):
8824 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8825 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8826 disks.append({constants.IDISK_SIZE: disk_sz})
8827 self.op.disks = disks
8828 if not disks and self.op.disk_template != constants.DT_DISKLESS:
8829 raise errors.OpPrereqError("No disk info specified and the export"
8830 " is missing the disk information",
8833 if not self.op.nics:
8835 for idx in range(constants.MAX_NICS):
8836 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8838 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8839 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8846 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8847 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8849 if (self.op.hypervisor is None and
8850 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8851 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8853 if einfo.has_section(constants.INISECT_HYP):
8854 # use the export parameters but do not override the ones
8855 # specified by the user
8856 for name, value in einfo.items(constants.INISECT_HYP):
8857 if name not in self.op.hvparams:
8858 self.op.hvparams[name] = value
8860 if einfo.has_section(constants.INISECT_BEP):
8861 # use the parameters, without overriding
8862 for name, value in einfo.items(constants.INISECT_BEP):
8863 if name not in self.op.beparams:
8864 self.op.beparams[name] = value
8865 # Compatibility for the old "memory" be param
8866 if name == constants.BE_MEMORY:
8867 if constants.BE_MAXMEM not in self.op.beparams:
8868 self.op.beparams[constants.BE_MAXMEM] = value
8869 if constants.BE_MINMEM not in self.op.beparams:
8870 self.op.beparams[constants.BE_MINMEM] = value
8872 # try to read the parameters old style, from the main section
8873 for name in constants.BES_PARAMETERS:
8874 if (name not in self.op.beparams and
8875 einfo.has_option(constants.INISECT_INS, name)):
8876 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8878 if einfo.has_section(constants.INISECT_OSP):
8879 # use the parameters, without overriding
8880 for name, value in einfo.items(constants.INISECT_OSP):
8881 if name not in self.op.osparams:
8882 self.op.osparams[name] = value
8884 def _RevertToDefaults(self, cluster):
8885 """Revert the instance parameters to the default values.
8889 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8890 for name in self.op.hvparams.keys():
8891 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8892 del self.op.hvparams[name]
8894 be_defs = cluster.SimpleFillBE({})
8895 for name in self.op.beparams.keys():
8896 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8897 del self.op.beparams[name]
8899 nic_defs = cluster.SimpleFillNIC({})
8900 for nic in self.op.nics:
8901 for name in constants.NICS_PARAMETERS:
8902 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8905 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8906 for name in self.op.osparams.keys():
8907 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8908 del self.op.osparams[name]
8910 def _CalculateFileStorageDir(self):
8911 """Calculate final instance file storage dir.
8914 # file storage dir calculation/check
8915 self.instance_file_storage_dir = None
8916 if self.op.disk_template in constants.DTS_FILEBASED:
8917 # build the full file storage dir path
8920 if self.op.disk_template == constants.DT_SHARED_FILE:
8921 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8923 get_fsd_fn = self.cfg.GetFileStorageDir
8925 cfg_storagedir = get_fsd_fn()
8926 if not cfg_storagedir:
8927 raise errors.OpPrereqError("Cluster file storage dir not defined")
8928 joinargs.append(cfg_storagedir)
8930 if self.op.file_storage_dir is not None:
8931 joinargs.append(self.op.file_storage_dir)
8933 joinargs.append(self.op.instance_name)
8935 # pylint: disable=W0142
8936 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8938 def CheckPrereq(self):
8939 """Check prerequisites.
8942 self._CalculateFileStorageDir()
8944 if self.op.mode == constants.INSTANCE_IMPORT:
8945 export_info = self._ReadExportInfo()
8946 self._ReadExportParams(export_info)
8948 if (not self.cfg.GetVGName() and
8949 self.op.disk_template not in constants.DTS_NOT_LVM):
8950 raise errors.OpPrereqError("Cluster does not support lvm-based"
8951 " instances", errors.ECODE_STATE)
8953 if (self.op.hypervisor is None or
8954 self.op.hypervisor == constants.VALUE_AUTO):
8955 self.op.hypervisor = self.cfg.GetHypervisorType()
8957 cluster = self.cfg.GetClusterInfo()
8958 enabled_hvs = cluster.enabled_hypervisors
8959 if self.op.hypervisor not in enabled_hvs:
8960 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8961 " cluster (%s)" % (self.op.hypervisor,
8962 ",".join(enabled_hvs)),
8965 # Check tag validity
8966 for tag in self.op.tags:
8967 objects.TaggableObject.ValidateTag(tag)
8969 # check hypervisor parameter syntax (locally)
8970 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8971 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8973 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8974 hv_type.CheckParameterSyntax(filled_hvp)
8975 self.hv_full = filled_hvp
8976 # check that we don't specify global parameters on an instance
8977 _CheckGlobalHvParams(self.op.hvparams)
8979 # fill and remember the beparams dict
8980 default_beparams = cluster.beparams[constants.PP_DEFAULT]
8981 for param, value in self.op.beparams.iteritems():
8982 if value == constants.VALUE_AUTO:
8983 self.op.beparams[param] = default_beparams[param]
8984 objects.UpgradeBeParams(self.op.beparams)
8985 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8986 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8988 # build os parameters
8989 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8991 # now that hvp/bep are in final format, let's reset to defaults,
8993 if self.op.identify_defaults:
8994 self._RevertToDefaults(cluster)
8998 for idx, nic in enumerate(self.op.nics):
8999 nic_mode_req = nic.get(constants.INIC_MODE, None)
9000 nic_mode = nic_mode_req
9001 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9002 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9004 # in routed mode, for the first nic, the default ip is 'auto'
9005 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9006 default_ip_mode = constants.VALUE_AUTO
9008 default_ip_mode = constants.VALUE_NONE
9010 # ip validity checks
9011 ip = nic.get(constants.INIC_IP, default_ip_mode)
9012 if ip is None or ip.lower() == constants.VALUE_NONE:
9014 elif ip.lower() == constants.VALUE_AUTO:
9015 if not self.op.name_check:
9016 raise errors.OpPrereqError("IP address set to auto but name checks"
9017 " have been skipped",
9019 nic_ip = self.hostname1.ip
9021 if not netutils.IPAddress.IsValid(ip):
9022 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9026 # TODO: check the ip address for uniqueness
9027 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9028 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9031 # MAC address verification
9032 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9033 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9034 mac = utils.NormalizeAndValidateMac(mac)
9037 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9038 except errors.ReservationError:
9039 raise errors.OpPrereqError("MAC address %s already in use"
9040 " in cluster" % mac,
9041 errors.ECODE_NOTUNIQUE)
9043 # Build nic parameters
9044 link = nic.get(constants.INIC_LINK, None)
9045 if link == constants.VALUE_AUTO:
9046 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9049 nicparams[constants.NIC_MODE] = nic_mode
9051 nicparams[constants.NIC_LINK] = link
9053 check_params = cluster.SimpleFillNIC(nicparams)
9054 objects.NIC.CheckParameterSyntax(check_params)
9055 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9057 # disk checks/pre-build
9058 default_vg = self.cfg.GetVGName()
9060 for disk in self.op.disks:
9061 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9062 if mode not in constants.DISK_ACCESS_SET:
9063 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9064 mode, errors.ECODE_INVAL)
9065 size = disk.get(constants.IDISK_SIZE, None)
9067 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9070 except (TypeError, ValueError):
9071 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9074 data_vg = disk.get(constants.IDISK_VG, default_vg)
9076 constants.IDISK_SIZE: size,
9077 constants.IDISK_MODE: mode,
9078 constants.IDISK_VG: data_vg,
9079 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9081 if constants.IDISK_ADOPT in disk:
9082 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9083 self.disks.append(new_disk)
9085 if self.op.mode == constants.INSTANCE_IMPORT:
9087 for idx in range(len(self.disks)):
9088 option = "disk%d_dump" % idx
9089 if export_info.has_option(constants.INISECT_INS, option):
9090 # FIXME: are the old os-es, disk sizes, etc. useful?
9091 export_name = export_info.get(constants.INISECT_INS, option)
9092 image = utils.PathJoin(self.op.src_path, export_name)
9093 disk_images.append(image)
9095 disk_images.append(False)
9097 self.src_images = disk_images
9099 old_name = export_info.get(constants.INISECT_INS, "name")
9100 if self.op.instance_name == old_name:
9101 for idx, nic in enumerate(self.nics):
9102 if nic.mac == constants.VALUE_AUTO:
9103 nic_mac_ini = "nic%d_mac" % idx
9104 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9106 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9108 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9109 if self.op.ip_check:
9110 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9111 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9112 (self.check_ip, self.op.instance_name),
9113 errors.ECODE_NOTUNIQUE)
9115 #### mac address generation
9116 # By generating here the mac address both the allocator and the hooks get
9117 # the real final mac address rather than the 'auto' or 'generate' value.
9118 # There is a race condition between the generation and the instance object
9119 # creation, which means that we know the mac is valid now, but we're not
9120 # sure it will be when we actually add the instance. If things go bad
9121 # adding the instance will abort because of a duplicate mac, and the
9122 # creation job will fail.
9123 for nic in self.nics:
9124 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9125 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9129 if self.op.iallocator is not None:
9130 self._RunAllocator()
9132 # Release all unneeded node locks
9133 _ReleaseLocks(self, locking.LEVEL_NODE,
9134 keep=filter(None, [self.op.pnode, self.op.snode,
9137 #### node related checks
9139 # check primary node
9140 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9141 assert self.pnode is not None, \
9142 "Cannot retrieve locked node %s" % self.op.pnode
9144 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9145 pnode.name, errors.ECODE_STATE)
9147 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9148 pnode.name, errors.ECODE_STATE)
9149 if not pnode.vm_capable:
9150 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9151 " '%s'" % pnode.name, errors.ECODE_STATE)
9153 self.secondaries = []
9155 # mirror node verification
9156 if self.op.disk_template in constants.DTS_INT_MIRROR:
9157 if self.op.snode == pnode.name:
9158 raise errors.OpPrereqError("The secondary node cannot be the"
9159 " primary node", errors.ECODE_INVAL)
9160 _CheckNodeOnline(self, self.op.snode)
9161 _CheckNodeNotDrained(self, self.op.snode)
9162 _CheckNodeVmCapable(self, self.op.snode)
9163 self.secondaries.append(self.op.snode)
9165 nodenames = [pnode.name] + self.secondaries
9167 if not self.adopt_disks:
9168 # Check lv size requirements, if not adopting
9169 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9170 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9172 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9173 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9174 disk[constants.IDISK_ADOPT])
9175 for disk in self.disks])
9176 if len(all_lvs) != len(self.disks):
9177 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9179 for lv_name in all_lvs:
9181 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9182 # to ReserveLV uses the same syntax
9183 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9184 except errors.ReservationError:
9185 raise errors.OpPrereqError("LV named %s used by another instance" %
9186 lv_name, errors.ECODE_NOTUNIQUE)
9188 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9189 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9191 node_lvs = self.rpc.call_lv_list([pnode.name],
9192 vg_names.payload.keys())[pnode.name]
9193 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9194 node_lvs = node_lvs.payload
9196 delta = all_lvs.difference(node_lvs.keys())
9198 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9199 utils.CommaJoin(delta),
9201 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9203 raise errors.OpPrereqError("Online logical volumes found, cannot"
9204 " adopt: %s" % utils.CommaJoin(online_lvs),
9206 # update the size of disk based on what is found
9207 for dsk in self.disks:
9208 dsk[constants.IDISK_SIZE] = \
9209 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9210 dsk[constants.IDISK_ADOPT])][0]))
9212 elif self.op.disk_template == constants.DT_BLOCK:
9213 # Normalize and de-duplicate device paths
9214 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9215 for disk in self.disks])
9216 if len(all_disks) != len(self.disks):
9217 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9219 baddisks = [d for d in all_disks
9220 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9222 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9223 " cannot be adopted" %
9224 (", ".join(baddisks),
9225 constants.ADOPTABLE_BLOCKDEV_ROOT),
9228 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9229 list(all_disks))[pnode.name]
9230 node_disks.Raise("Cannot get block device information from node %s" %
9232 node_disks = node_disks.payload
9233 delta = all_disks.difference(node_disks.keys())
9235 raise errors.OpPrereqError("Missing block device(s): %s" %
9236 utils.CommaJoin(delta),
9238 for dsk in self.disks:
9239 dsk[constants.IDISK_SIZE] = \
9240 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9242 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9244 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9245 # check OS parameters (remotely)
9246 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9248 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9250 # memory check on primary node
9251 #TODO(dynmem): use MINMEM for checking
9253 _CheckNodeFreeMemory(self, self.pnode.name,
9254 "creating instance %s" % self.op.instance_name,
9255 self.be_full[constants.BE_MAXMEM],
9258 self.dry_run_result = list(nodenames)
9260 def Exec(self, feedback_fn):
9261 """Create and add the instance to the cluster.
9264 instance = self.op.instance_name
9265 pnode_name = self.pnode.name
9267 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9268 self.owned_locks(locking.LEVEL_NODE)), \
9269 "Node locks differ from node resource locks"
9271 ht_kind = self.op.hypervisor
9272 if ht_kind in constants.HTS_REQ_PORT:
9273 network_port = self.cfg.AllocatePort()
9277 disks = _GenerateDiskTemplate(self,
9278 self.op.disk_template,
9279 instance, pnode_name,
9282 self.instance_file_storage_dir,
9283 self.op.file_driver,
9287 iobj = objects.Instance(name=instance, os=self.op.os_type,
9288 primary_node=pnode_name,
9289 nics=self.nics, disks=disks,
9290 disk_template=self.op.disk_template,
9291 admin_state=constants.ADMINST_DOWN,
9292 network_port=network_port,
9293 beparams=self.op.beparams,
9294 hvparams=self.op.hvparams,
9295 hypervisor=self.op.hypervisor,
9296 osparams=self.op.osparams,
9300 for tag in self.op.tags:
9303 if self.adopt_disks:
9304 if self.op.disk_template == constants.DT_PLAIN:
9305 # rename LVs to the newly-generated names; we need to construct
9306 # 'fake' LV disks with the old data, plus the new unique_id
9307 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9309 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9310 rename_to.append(t_dsk.logical_id)
9311 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9312 self.cfg.SetDiskID(t_dsk, pnode_name)
9313 result = self.rpc.call_blockdev_rename(pnode_name,
9314 zip(tmp_disks, rename_to))
9315 result.Raise("Failed to rename adoped LVs")
9317 feedback_fn("* creating instance disks...")
9319 _CreateDisks(self, iobj)
9320 except errors.OpExecError:
9321 self.LogWarning("Device creation failed, reverting...")
9323 _RemoveDisks(self, iobj)
9325 self.cfg.ReleaseDRBDMinors(instance)
9328 feedback_fn("adding instance %s to cluster config" % instance)
9330 self.cfg.AddInstance(iobj, self.proc.GetECId())
9332 # Declare that we don't want to remove the instance lock anymore, as we've
9333 # added the instance to the config
9334 del self.remove_locks[locking.LEVEL_INSTANCE]
9336 if self.op.mode == constants.INSTANCE_IMPORT:
9337 # Release unused nodes
9338 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9341 _ReleaseLocks(self, locking.LEVEL_NODE)
9344 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9345 feedback_fn("* wiping instance disks...")
9347 _WipeDisks(self, iobj)
9348 except errors.OpExecError, err:
9349 logging.exception("Wiping disks failed")
9350 self.LogWarning("Wiping instance disks failed (%s)", err)
9354 # Something is already wrong with the disks, don't do anything else
9356 elif self.op.wait_for_sync:
9357 disk_abort = not _WaitForSync(self, iobj)
9358 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9359 # make sure the disks are not degraded (still sync-ing is ok)
9360 feedback_fn("* checking mirrors status")
9361 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9366 _RemoveDisks(self, iobj)
9367 self.cfg.RemoveInstance(iobj.name)
9368 # Make sure the instance lock gets removed
9369 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9370 raise errors.OpExecError("There are some degraded disks for"
9373 # Release all node resource locks
9374 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9376 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9377 if self.op.mode == constants.INSTANCE_CREATE:
9378 if not self.op.no_install:
9379 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9380 not self.op.wait_for_sync)
9382 feedback_fn("* pausing disk sync to install instance OS")
9383 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9385 for idx, success in enumerate(result.payload):
9387 logging.warn("pause-sync of instance %s for disk %d failed",
9390 feedback_fn("* running the instance OS create scripts...")
9391 # FIXME: pass debug option from opcode to backend
9393 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9394 self.op.debug_level)
9396 feedback_fn("* resuming disk sync")
9397 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9399 for idx, success in enumerate(result.payload):
9401 logging.warn("resume-sync of instance %s for disk %d failed",
9404 os_add_result.Raise("Could not add os for instance %s"
9405 " on node %s" % (instance, pnode_name))
9407 elif self.op.mode == constants.INSTANCE_IMPORT:
9408 feedback_fn("* running the instance OS import scripts...")
9412 for idx, image in enumerate(self.src_images):
9416 # FIXME: pass debug option from opcode to backend
9417 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9418 constants.IEIO_FILE, (image, ),
9419 constants.IEIO_SCRIPT,
9420 (iobj.disks[idx], idx),
9422 transfers.append(dt)
9425 masterd.instance.TransferInstanceData(self, feedback_fn,
9426 self.op.src_node, pnode_name,
9427 self.pnode.secondary_ip,
9429 if not compat.all(import_result):
9430 self.LogWarning("Some disks for instance %s on node %s were not"
9431 " imported successfully" % (instance, pnode_name))
9433 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9434 feedback_fn("* preparing remote import...")
9435 # The source cluster will stop the instance before attempting to make a
9436 # connection. In some cases stopping an instance can take a long time,
9437 # hence the shutdown timeout is added to the connection timeout.
9438 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9439 self.op.source_shutdown_timeout)
9440 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9442 assert iobj.primary_node == self.pnode.name
9444 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9445 self.source_x509_ca,
9446 self._cds, timeouts)
9447 if not compat.all(disk_results):
9448 # TODO: Should the instance still be started, even if some disks
9449 # failed to import (valid for local imports, too)?
9450 self.LogWarning("Some disks for instance %s on node %s were not"
9451 " imported successfully" % (instance, pnode_name))
9453 # Run rename script on newly imported instance
9454 assert iobj.name == instance
9455 feedback_fn("Running rename script for %s" % instance)
9456 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9457 self.source_instance_name,
9458 self.op.debug_level)
9460 self.LogWarning("Failed to run rename script for %s on node"
9461 " %s: %s" % (instance, pnode_name, result.fail_msg))
9464 # also checked in the prereq part
9465 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9468 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9471 iobj.admin_state = constants.ADMINST_UP
9472 self.cfg.Update(iobj, feedback_fn)
9473 logging.info("Starting instance %s on node %s", instance, pnode_name)
9474 feedback_fn("* starting instance...")
9475 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9477 result.Raise("Could not start instance")
9479 return list(iobj.all_nodes)
9482 class LUInstanceConsole(NoHooksLU):
9483 """Connect to an instance's console.
9485 This is somewhat special in that it returns the command line that
9486 you need to run on the master node in order to connect to the
9492 def ExpandNames(self):
9493 self.share_locks = _ShareAll()
9494 self._ExpandAndLockInstance()
9496 def CheckPrereq(self):
9497 """Check prerequisites.
9499 This checks that the instance is in the cluster.
9502 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9503 assert self.instance is not None, \
9504 "Cannot retrieve locked instance %s" % self.op.instance_name
9505 _CheckNodeOnline(self, self.instance.primary_node)
9507 def Exec(self, feedback_fn):
9508 """Connect to the console of an instance
9511 instance = self.instance
9512 node = instance.primary_node
9514 node_insts = self.rpc.call_instance_list([node],
9515 [instance.hypervisor])[node]
9516 node_insts.Raise("Can't get node information from %s" % node)
9518 if instance.name not in node_insts.payload:
9519 if instance.admin_state == constants.ADMINST_UP:
9520 state = constants.INSTST_ERRORDOWN
9521 elif instance.admin_state == constants.ADMINST_DOWN:
9522 state = constants.INSTST_ADMINDOWN
9524 state = constants.INSTST_ADMINOFFLINE
9525 raise errors.OpExecError("Instance %s is not running (state %s)" %
9526 (instance.name, state))
9528 logging.debug("Connecting to console of %s on %s", instance.name, node)
9530 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9533 def _GetInstanceConsole(cluster, instance):
9534 """Returns console information for an instance.
9536 @type cluster: L{objects.Cluster}
9537 @type instance: L{objects.Instance}
9541 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9542 # beparams and hvparams are passed separately, to avoid editing the
9543 # instance and then saving the defaults in the instance itself.
9544 hvparams = cluster.FillHV(instance)
9545 beparams = cluster.FillBE(instance)
9546 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9548 assert console.instance == instance.name
9549 assert console.Validate()
9551 return console.ToDict()
9554 class LUInstanceReplaceDisks(LogicalUnit):
9555 """Replace the disks of an instance.
9558 HPATH = "mirrors-replace"
9559 HTYPE = constants.HTYPE_INSTANCE
9562 def CheckArguments(self):
9563 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9566 def ExpandNames(self):
9567 self._ExpandAndLockInstance()
9569 assert locking.LEVEL_NODE not in self.needed_locks
9570 assert locking.LEVEL_NODE_RES not in self.needed_locks
9571 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9573 assert self.op.iallocator is None or self.op.remote_node is None, \
9574 "Conflicting options"
9576 if self.op.remote_node is not None:
9577 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9579 # Warning: do not remove the locking of the new secondary here
9580 # unless DRBD8.AddChildren is changed to work in parallel;
9581 # currently it doesn't since parallel invocations of
9582 # FindUnusedMinor will conflict
9583 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9584 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9586 self.needed_locks[locking.LEVEL_NODE] = []
9587 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9589 if self.op.iallocator is not None:
9590 # iallocator will select a new node in the same group
9591 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9593 self.needed_locks[locking.LEVEL_NODE_RES] = []
9595 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9596 self.op.iallocator, self.op.remote_node,
9597 self.op.disks, False, self.op.early_release)
9599 self.tasklets = [self.replacer]
9601 def DeclareLocks(self, level):
9602 if level == locking.LEVEL_NODEGROUP:
9603 assert self.op.remote_node is None
9604 assert self.op.iallocator is not None
9605 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9607 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9608 # Lock all groups used by instance optimistically; this requires going
9609 # via the node before it's locked, requiring verification later on
9610 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9611 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9613 elif level == locking.LEVEL_NODE:
9614 if self.op.iallocator is not None:
9615 assert self.op.remote_node is None
9616 assert not self.needed_locks[locking.LEVEL_NODE]
9618 # Lock member nodes of all locked groups
9619 self.needed_locks[locking.LEVEL_NODE] = [node_name
9620 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9621 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9623 self._LockInstancesNodes()
9624 elif level == locking.LEVEL_NODE_RES:
9626 self.needed_locks[locking.LEVEL_NODE_RES] = \
9627 self.needed_locks[locking.LEVEL_NODE]
9629 def BuildHooksEnv(self):
9632 This runs on the master, the primary and all the secondaries.
9635 instance = self.replacer.instance
9637 "MODE": self.op.mode,
9638 "NEW_SECONDARY": self.op.remote_node,
9639 "OLD_SECONDARY": instance.secondary_nodes[0],
9641 env.update(_BuildInstanceHookEnvByObject(self, instance))
9644 def BuildHooksNodes(self):
9645 """Build hooks nodes.
9648 instance = self.replacer.instance
9650 self.cfg.GetMasterNode(),
9651 instance.primary_node,
9653 if self.op.remote_node is not None:
9654 nl.append(self.op.remote_node)
9657 def CheckPrereq(self):
9658 """Check prerequisites.
9661 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9662 self.op.iallocator is None)
9664 # Verify if node group locks are still correct
9665 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9667 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9669 return LogicalUnit.CheckPrereq(self)
9672 class TLReplaceDisks(Tasklet):
9673 """Replaces disks for an instance.
9675 Note: Locking is not within the scope of this class.
9678 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9679 disks, delay_iallocator, early_release):
9680 """Initializes this class.
9683 Tasklet.__init__(self, lu)
9686 self.instance_name = instance_name
9688 self.iallocator_name = iallocator_name
9689 self.remote_node = remote_node
9691 self.delay_iallocator = delay_iallocator
9692 self.early_release = early_release
9695 self.instance = None
9696 self.new_node = None
9697 self.target_node = None
9698 self.other_node = None
9699 self.remote_node_info = None
9700 self.node_secondary_ip = None
9703 def CheckArguments(mode, remote_node, iallocator):
9704 """Helper function for users of this class.
9707 # check for valid parameter combination
9708 if mode == constants.REPLACE_DISK_CHG:
9709 if remote_node is None and iallocator is None:
9710 raise errors.OpPrereqError("When changing the secondary either an"
9711 " iallocator script must be used or the"
9712 " new node given", errors.ECODE_INVAL)
9714 if remote_node is not None and iallocator is not None:
9715 raise errors.OpPrereqError("Give either the iallocator or the new"
9716 " secondary, not both", errors.ECODE_INVAL)
9718 elif remote_node is not None or iallocator is not None:
9719 # Not replacing the secondary
9720 raise errors.OpPrereqError("The iallocator and new node options can"
9721 " only be used when changing the"
9722 " secondary node", errors.ECODE_INVAL)
9725 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9726 """Compute a new secondary node using an IAllocator.
9729 ial = IAllocator(lu.cfg, lu.rpc,
9730 mode=constants.IALLOCATOR_MODE_RELOC,
9732 relocate_from=list(relocate_from))
9734 ial.Run(iallocator_name)
9737 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9738 " %s" % (iallocator_name, ial.info),
9741 if len(ial.result) != ial.required_nodes:
9742 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9743 " of nodes (%s), required %s" %
9745 len(ial.result), ial.required_nodes),
9748 remote_node_name = ial.result[0]
9750 lu.LogInfo("Selected new secondary for instance '%s': %s",
9751 instance_name, remote_node_name)
9753 return remote_node_name
9755 def _FindFaultyDisks(self, node_name):
9756 """Wrapper for L{_FindFaultyInstanceDisks}.
9759 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9762 def _CheckDisksActivated(self, instance):
9763 """Checks if the instance disks are activated.
9765 @param instance: The instance to check disks
9766 @return: True if they are activated, False otherwise
9769 nodes = instance.all_nodes
9771 for idx, dev in enumerate(instance.disks):
9773 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9774 self.cfg.SetDiskID(dev, node)
9776 result = self.rpc.call_blockdev_find(node, dev)
9780 elif result.fail_msg or not result.payload:
9785 def CheckPrereq(self):
9786 """Check prerequisites.
9788 This checks that the instance is in the cluster.
9791 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9792 assert instance is not None, \
9793 "Cannot retrieve locked instance %s" % self.instance_name
9795 if instance.disk_template != constants.DT_DRBD8:
9796 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9797 " instances", errors.ECODE_INVAL)
9799 if len(instance.secondary_nodes) != 1:
9800 raise errors.OpPrereqError("The instance has a strange layout,"
9801 " expected one secondary but found %d" %
9802 len(instance.secondary_nodes),
9805 if not self.delay_iallocator:
9806 self._CheckPrereq2()
9808 def _CheckPrereq2(self):
9809 """Check prerequisites, second part.
9811 This function should always be part of CheckPrereq. It was separated and is
9812 now called from Exec because during node evacuation iallocator was only
9813 called with an unmodified cluster model, not taking planned changes into
9817 instance = self.instance
9818 secondary_node = instance.secondary_nodes[0]
9820 if self.iallocator_name is None:
9821 remote_node = self.remote_node
9823 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9824 instance.name, instance.secondary_nodes)
9826 if remote_node is None:
9827 self.remote_node_info = None
9829 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9830 "Remote node '%s' is not locked" % remote_node
9832 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9833 assert self.remote_node_info is not None, \
9834 "Cannot retrieve locked node %s" % remote_node
9836 if remote_node == self.instance.primary_node:
9837 raise errors.OpPrereqError("The specified node is the primary node of"
9838 " the instance", errors.ECODE_INVAL)
9840 if remote_node == secondary_node:
9841 raise errors.OpPrereqError("The specified node is already the"
9842 " secondary node of the instance",
9845 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9846 constants.REPLACE_DISK_CHG):
9847 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9850 if self.mode == constants.REPLACE_DISK_AUTO:
9851 if not self._CheckDisksActivated(instance):
9852 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9853 " first" % self.instance_name,
9855 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9856 faulty_secondary = self._FindFaultyDisks(secondary_node)
9858 if faulty_primary and faulty_secondary:
9859 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9860 " one node and can not be repaired"
9861 " automatically" % self.instance_name,
9865 self.disks = faulty_primary
9866 self.target_node = instance.primary_node
9867 self.other_node = secondary_node
9868 check_nodes = [self.target_node, self.other_node]
9869 elif faulty_secondary:
9870 self.disks = faulty_secondary
9871 self.target_node = secondary_node
9872 self.other_node = instance.primary_node
9873 check_nodes = [self.target_node, self.other_node]
9879 # Non-automatic modes
9880 if self.mode == constants.REPLACE_DISK_PRI:
9881 self.target_node = instance.primary_node
9882 self.other_node = secondary_node
9883 check_nodes = [self.target_node, self.other_node]
9885 elif self.mode == constants.REPLACE_DISK_SEC:
9886 self.target_node = secondary_node
9887 self.other_node = instance.primary_node
9888 check_nodes = [self.target_node, self.other_node]
9890 elif self.mode == constants.REPLACE_DISK_CHG:
9891 self.new_node = remote_node
9892 self.other_node = instance.primary_node
9893 self.target_node = secondary_node
9894 check_nodes = [self.new_node, self.other_node]
9896 _CheckNodeNotDrained(self.lu, remote_node)
9897 _CheckNodeVmCapable(self.lu, remote_node)
9899 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9900 assert old_node_info is not None
9901 if old_node_info.offline and not self.early_release:
9902 # doesn't make sense to delay the release
9903 self.early_release = True
9904 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9905 " early-release mode", secondary_node)
9908 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9911 # If not specified all disks should be replaced
9913 self.disks = range(len(self.instance.disks))
9915 for node in check_nodes:
9916 _CheckNodeOnline(self.lu, node)
9918 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9921 if node_name is not None)
9923 # Release unneeded node and node resource locks
9924 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9925 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9927 # Release any owned node group
9928 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9929 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9931 # Check whether disks are valid
9932 for disk_idx in self.disks:
9933 instance.FindDisk(disk_idx)
9935 # Get secondary node IP addresses
9936 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9937 in self.cfg.GetMultiNodeInfo(touched_nodes))
9939 def Exec(self, feedback_fn):
9940 """Execute disk replacement.
9942 This dispatches the disk replacement to the appropriate handler.
9945 if self.delay_iallocator:
9946 self._CheckPrereq2()
9949 # Verify owned locks before starting operation
9950 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9951 assert set(owned_nodes) == set(self.node_secondary_ip), \
9952 ("Incorrect node locks, owning %s, expected %s" %
9953 (owned_nodes, self.node_secondary_ip.keys()))
9954 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9955 self.lu.owned_locks(locking.LEVEL_NODE_RES))
9957 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9958 assert list(owned_instances) == [self.instance_name], \
9959 "Instance '%s' not locked" % self.instance_name
9961 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9962 "Should not own any node group lock at this point"
9965 feedback_fn("No disks need replacement")
9968 feedback_fn("Replacing disk(s) %s for %s" %
9969 (utils.CommaJoin(self.disks), self.instance.name))
9971 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9973 # Activate the instance disks if we're replacing them on a down instance
9975 _StartInstanceDisks(self.lu, self.instance, True)
9978 # Should we replace the secondary node?
9979 if self.new_node is not None:
9980 fn = self._ExecDrbd8Secondary
9982 fn = self._ExecDrbd8DiskOnly
9984 result = fn(feedback_fn)
9986 # Deactivate the instance disks if we're replacing them on a
9989 _SafeShutdownInstanceDisks(self.lu, self.instance)
9991 assert not self.lu.owned_locks(locking.LEVEL_NODE)
9994 # Verify owned locks
9995 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9996 nodes = frozenset(self.node_secondary_ip)
9997 assert ((self.early_release and not owned_nodes) or
9998 (not self.early_release and not (set(owned_nodes) - nodes))), \
9999 ("Not owning the correct locks, early_release=%s, owned=%r,"
10000 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10004 def _CheckVolumeGroup(self, nodes):
10005 self.lu.LogInfo("Checking volume groups")
10007 vgname = self.cfg.GetVGName()
10009 # Make sure volume group exists on all involved nodes
10010 results = self.rpc.call_vg_list(nodes)
10012 raise errors.OpExecError("Can't list volume groups on the nodes")
10015 res = results[node]
10016 res.Raise("Error checking node %s" % node)
10017 if vgname not in res.payload:
10018 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10021 def _CheckDisksExistence(self, nodes):
10022 # Check disk existence
10023 for idx, dev in enumerate(self.instance.disks):
10024 if idx not in self.disks:
10028 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10029 self.cfg.SetDiskID(dev, node)
10031 result = self.rpc.call_blockdev_find(node, dev)
10033 msg = result.fail_msg
10034 if msg or not result.payload:
10036 msg = "disk not found"
10037 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10040 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10041 for idx, dev in enumerate(self.instance.disks):
10042 if idx not in self.disks:
10045 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10048 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10050 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10051 " replace disks for instance %s" %
10052 (node_name, self.instance.name))
10054 def _CreateNewStorage(self, node_name):
10055 """Create new storage on the primary or secondary node.
10057 This is only used for same-node replaces, not for changing the
10058 secondary node, hence we don't want to modify the existing disk.
10063 for idx, dev in enumerate(self.instance.disks):
10064 if idx not in self.disks:
10067 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10069 self.cfg.SetDiskID(dev, node_name)
10071 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10072 names = _GenerateUniqueNames(self.lu, lv_names)
10074 vg_data = dev.children[0].logical_id[0]
10075 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10076 logical_id=(vg_data, names[0]))
10077 vg_meta = dev.children[1].logical_id[0]
10078 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10079 logical_id=(vg_meta, names[1]))
10081 new_lvs = [lv_data, lv_meta]
10082 old_lvs = [child.Copy() for child in dev.children]
10083 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10085 # we pass force_create=True to force the LVM creation
10086 for new_lv in new_lvs:
10087 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10088 _GetInstanceInfoText(self.instance), False)
10092 def _CheckDevices(self, node_name, iv_names):
10093 for name, (dev, _, _) in iv_names.iteritems():
10094 self.cfg.SetDiskID(dev, node_name)
10096 result = self.rpc.call_blockdev_find(node_name, dev)
10098 msg = result.fail_msg
10099 if msg or not result.payload:
10101 msg = "disk not found"
10102 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10105 if result.payload.is_degraded:
10106 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10108 def _RemoveOldStorage(self, node_name, iv_names):
10109 for name, (_, old_lvs, _) in iv_names.iteritems():
10110 self.lu.LogInfo("Remove logical volumes for %s" % name)
10113 self.cfg.SetDiskID(lv, node_name)
10115 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10117 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10118 hint="remove unused LVs manually")
10120 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10121 """Replace a disk on the primary or secondary for DRBD 8.
10123 The algorithm for replace is quite complicated:
10125 1. for each disk to be replaced:
10127 1. create new LVs on the target node with unique names
10128 1. detach old LVs from the drbd device
10129 1. rename old LVs to name_replaced.<time_t>
10130 1. rename new LVs to old LVs
10131 1. attach the new LVs (with the old names now) to the drbd device
10133 1. wait for sync across all devices
10135 1. for each modified disk:
10137 1. remove old LVs (which have the name name_replaces.<time_t>)
10139 Failures are not very well handled.
10144 # Step: check device activation
10145 self.lu.LogStep(1, steps_total, "Check device existence")
10146 self._CheckDisksExistence([self.other_node, self.target_node])
10147 self._CheckVolumeGroup([self.target_node, self.other_node])
10149 # Step: check other node consistency
10150 self.lu.LogStep(2, steps_total, "Check peer consistency")
10151 self._CheckDisksConsistency(self.other_node,
10152 self.other_node == self.instance.primary_node,
10155 # Step: create new storage
10156 self.lu.LogStep(3, steps_total, "Allocate new storage")
10157 iv_names = self._CreateNewStorage(self.target_node)
10159 # Step: for each lv, detach+rename*2+attach
10160 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10161 for dev, old_lvs, new_lvs in iv_names.itervalues():
10162 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10164 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10166 result.Raise("Can't detach drbd from local storage on node"
10167 " %s for device %s" % (self.target_node, dev.iv_name))
10169 #cfg.Update(instance)
10171 # ok, we created the new LVs, so now we know we have the needed
10172 # storage; as such, we proceed on the target node to rename
10173 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10174 # using the assumption that logical_id == physical_id (which in
10175 # turn is the unique_id on that node)
10177 # FIXME(iustin): use a better name for the replaced LVs
10178 temp_suffix = int(time.time())
10179 ren_fn = lambda d, suff: (d.physical_id[0],
10180 d.physical_id[1] + "_replaced-%s" % suff)
10182 # Build the rename list based on what LVs exist on the node
10183 rename_old_to_new = []
10184 for to_ren in old_lvs:
10185 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10186 if not result.fail_msg and result.payload:
10188 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10190 self.lu.LogInfo("Renaming the old LVs on the target node")
10191 result = self.rpc.call_blockdev_rename(self.target_node,
10193 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10195 # Now we rename the new LVs to the old LVs
10196 self.lu.LogInfo("Renaming the new LVs on the target node")
10197 rename_new_to_old = [(new, old.physical_id)
10198 for old, new in zip(old_lvs, new_lvs)]
10199 result = self.rpc.call_blockdev_rename(self.target_node,
10201 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10203 # Intermediate steps of in memory modifications
10204 for old, new in zip(old_lvs, new_lvs):
10205 new.logical_id = old.logical_id
10206 self.cfg.SetDiskID(new, self.target_node)
10208 # We need to modify old_lvs so that removal later removes the
10209 # right LVs, not the newly added ones; note that old_lvs is a
10211 for disk in old_lvs:
10212 disk.logical_id = ren_fn(disk, temp_suffix)
10213 self.cfg.SetDiskID(disk, self.target_node)
10215 # Now that the new lvs have the old name, we can add them to the device
10216 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10217 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10219 msg = result.fail_msg
10221 for new_lv in new_lvs:
10222 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10225 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10226 hint=("cleanup manually the unused logical"
10228 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10230 cstep = itertools.count(5)
10232 if self.early_release:
10233 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10234 self._RemoveOldStorage(self.target_node, iv_names)
10235 # TODO: Check if releasing locks early still makes sense
10236 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10238 # Release all resource locks except those used by the instance
10239 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10240 keep=self.node_secondary_ip.keys())
10242 # Release all node locks while waiting for sync
10243 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10245 # TODO: Can the instance lock be downgraded here? Take the optional disk
10246 # shutdown in the caller into consideration.
10249 # This can fail as the old devices are degraded and _WaitForSync
10250 # does a combined result over all disks, so we don't check its return value
10251 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10252 _WaitForSync(self.lu, self.instance)
10254 # Check all devices manually
10255 self._CheckDevices(self.instance.primary_node, iv_names)
10257 # Step: remove old storage
10258 if not self.early_release:
10259 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10260 self._RemoveOldStorage(self.target_node, iv_names)
10262 def _ExecDrbd8Secondary(self, feedback_fn):
10263 """Replace the secondary node for DRBD 8.
10265 The algorithm for replace is quite complicated:
10266 - for all disks of the instance:
10267 - create new LVs on the new node with same names
10268 - shutdown the drbd device on the old secondary
10269 - disconnect the drbd network on the primary
10270 - create the drbd device on the new secondary
10271 - network attach the drbd on the primary, using an artifice:
10272 the drbd code for Attach() will connect to the network if it
10273 finds a device which is connected to the good local disks but
10274 not network enabled
10275 - wait for sync across all devices
10276 - remove all disks from the old secondary
10278 Failures are not very well handled.
10283 pnode = self.instance.primary_node
10285 # Step: check device activation
10286 self.lu.LogStep(1, steps_total, "Check device existence")
10287 self._CheckDisksExistence([self.instance.primary_node])
10288 self._CheckVolumeGroup([self.instance.primary_node])
10290 # Step: check other node consistency
10291 self.lu.LogStep(2, steps_total, "Check peer consistency")
10292 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10294 # Step: create new storage
10295 self.lu.LogStep(3, steps_total, "Allocate new storage")
10296 for idx, dev in enumerate(self.instance.disks):
10297 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10298 (self.new_node, idx))
10299 # we pass force_create=True to force LVM creation
10300 for new_lv in dev.children:
10301 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10302 _GetInstanceInfoText(self.instance), False)
10304 # Step 4: dbrd minors and drbd setups changes
10305 # after this, we must manually remove the drbd minors on both the
10306 # error and the success paths
10307 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10308 minors = self.cfg.AllocateDRBDMinor([self.new_node
10309 for dev in self.instance.disks],
10310 self.instance.name)
10311 logging.debug("Allocated minors %r", minors)
10314 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10315 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10316 (self.new_node, idx))
10317 # create new devices on new_node; note that we create two IDs:
10318 # one without port, so the drbd will be activated without
10319 # networking information on the new node at this stage, and one
10320 # with network, for the latter activation in step 4
10321 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10322 if self.instance.primary_node == o_node1:
10325 assert self.instance.primary_node == o_node2, "Three-node instance?"
10328 new_alone_id = (self.instance.primary_node, self.new_node, None,
10329 p_minor, new_minor, o_secret)
10330 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10331 p_minor, new_minor, o_secret)
10333 iv_names[idx] = (dev, dev.children, new_net_id)
10334 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10336 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10337 logical_id=new_alone_id,
10338 children=dev.children,
10341 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10342 _GetInstanceInfoText(self.instance), False)
10343 except errors.GenericError:
10344 self.cfg.ReleaseDRBDMinors(self.instance.name)
10347 # We have new devices, shutdown the drbd on the old secondary
10348 for idx, dev in enumerate(self.instance.disks):
10349 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10350 self.cfg.SetDiskID(dev, self.target_node)
10351 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10353 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10354 "node: %s" % (idx, msg),
10355 hint=("Please cleanup this device manually as"
10356 " soon as possible"))
10358 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10359 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10360 self.instance.disks)[pnode]
10362 msg = result.fail_msg
10364 # detaches didn't succeed (unlikely)
10365 self.cfg.ReleaseDRBDMinors(self.instance.name)
10366 raise errors.OpExecError("Can't detach the disks from the network on"
10367 " old node: %s" % (msg,))
10369 # if we managed to detach at least one, we update all the disks of
10370 # the instance to point to the new secondary
10371 self.lu.LogInfo("Updating instance configuration")
10372 for dev, _, new_logical_id in iv_names.itervalues():
10373 dev.logical_id = new_logical_id
10374 self.cfg.SetDiskID(dev, self.instance.primary_node)
10376 self.cfg.Update(self.instance, feedback_fn)
10378 # Release all node locks (the configuration has been updated)
10379 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10381 # and now perform the drbd attach
10382 self.lu.LogInfo("Attaching primary drbds to new secondary"
10383 " (standalone => connected)")
10384 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10386 self.node_secondary_ip,
10387 self.instance.disks,
10388 self.instance.name,
10390 for to_node, to_result in result.items():
10391 msg = to_result.fail_msg
10393 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10395 hint=("please do a gnt-instance info to see the"
10396 " status of disks"))
10398 cstep = itertools.count(5)
10400 if self.early_release:
10401 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10402 self._RemoveOldStorage(self.target_node, iv_names)
10403 # TODO: Check if releasing locks early still makes sense
10404 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10406 # Release all resource locks except those used by the instance
10407 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10408 keep=self.node_secondary_ip.keys())
10410 # TODO: Can the instance lock be downgraded here? Take the optional disk
10411 # shutdown in the caller into consideration.
10414 # This can fail as the old devices are degraded and _WaitForSync
10415 # does a combined result over all disks, so we don't check its return value
10416 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10417 _WaitForSync(self.lu, self.instance)
10419 # Check all devices manually
10420 self._CheckDevices(self.instance.primary_node, iv_names)
10422 # Step: remove old storage
10423 if not self.early_release:
10424 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10425 self._RemoveOldStorage(self.target_node, iv_names)
10428 class LURepairNodeStorage(NoHooksLU):
10429 """Repairs the volume group on a node.
10434 def CheckArguments(self):
10435 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10437 storage_type = self.op.storage_type
10439 if (constants.SO_FIX_CONSISTENCY not in
10440 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10441 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10442 " repaired" % storage_type,
10443 errors.ECODE_INVAL)
10445 def ExpandNames(self):
10446 self.needed_locks = {
10447 locking.LEVEL_NODE: [self.op.node_name],
10450 def _CheckFaultyDisks(self, instance, node_name):
10451 """Ensure faulty disks abort the opcode or at least warn."""
10453 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10455 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10456 " node '%s'" % (instance.name, node_name),
10457 errors.ECODE_STATE)
10458 except errors.OpPrereqError, err:
10459 if self.op.ignore_consistency:
10460 self.proc.LogWarning(str(err.args[0]))
10464 def CheckPrereq(self):
10465 """Check prerequisites.
10468 # Check whether any instance on this node has faulty disks
10469 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10470 if inst.admin_state != constants.ADMINST_UP:
10472 check_nodes = set(inst.all_nodes)
10473 check_nodes.discard(self.op.node_name)
10474 for inst_node_name in check_nodes:
10475 self._CheckFaultyDisks(inst, inst_node_name)
10477 def Exec(self, feedback_fn):
10478 feedback_fn("Repairing storage unit '%s' on %s ..." %
10479 (self.op.name, self.op.node_name))
10481 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10482 result = self.rpc.call_storage_execute(self.op.node_name,
10483 self.op.storage_type, st_args,
10485 constants.SO_FIX_CONSISTENCY)
10486 result.Raise("Failed to repair storage unit '%s' on %s" %
10487 (self.op.name, self.op.node_name))
10490 class LUNodeEvacuate(NoHooksLU):
10491 """Evacuates instances off a list of nodes.
10496 _MODE2IALLOCATOR = {
10497 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10498 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10499 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10501 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10502 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10503 constants.IALLOCATOR_NEVAC_MODES)
10505 def CheckArguments(self):
10506 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10508 def ExpandNames(self):
10509 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10511 if self.op.remote_node is not None:
10512 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10513 assert self.op.remote_node
10515 if self.op.remote_node == self.op.node_name:
10516 raise errors.OpPrereqError("Can not use evacuated node as a new"
10517 " secondary node", errors.ECODE_INVAL)
10519 if self.op.mode != constants.NODE_EVAC_SEC:
10520 raise errors.OpPrereqError("Without the use of an iallocator only"
10521 " secondary instances can be evacuated",
10522 errors.ECODE_INVAL)
10525 self.share_locks = _ShareAll()
10526 self.needed_locks = {
10527 locking.LEVEL_INSTANCE: [],
10528 locking.LEVEL_NODEGROUP: [],
10529 locking.LEVEL_NODE: [],
10532 # Determine nodes (via group) optimistically, needs verification once locks
10533 # have been acquired
10534 self.lock_nodes = self._DetermineNodes()
10536 def _DetermineNodes(self):
10537 """Gets the list of nodes to operate on.
10540 if self.op.remote_node is None:
10541 # Iallocator will choose any node(s) in the same group
10542 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10544 group_nodes = frozenset([self.op.remote_node])
10546 # Determine nodes to be locked
10547 return set([self.op.node_name]) | group_nodes
10549 def _DetermineInstances(self):
10550 """Builds list of instances to operate on.
10553 assert self.op.mode in constants.NODE_EVAC_MODES
10555 if self.op.mode == constants.NODE_EVAC_PRI:
10556 # Primary instances only
10557 inst_fn = _GetNodePrimaryInstances
10558 assert self.op.remote_node is None, \
10559 "Evacuating primary instances requires iallocator"
10560 elif self.op.mode == constants.NODE_EVAC_SEC:
10561 # Secondary instances only
10562 inst_fn = _GetNodeSecondaryInstances
10565 assert self.op.mode == constants.NODE_EVAC_ALL
10566 inst_fn = _GetNodeInstances
10567 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10569 raise errors.OpPrereqError("Due to an issue with the iallocator"
10570 " interface it is not possible to evacuate"
10571 " all instances at once; specify explicitly"
10572 " whether to evacuate primary or secondary"
10574 errors.ECODE_INVAL)
10576 return inst_fn(self.cfg, self.op.node_name)
10578 def DeclareLocks(self, level):
10579 if level == locking.LEVEL_INSTANCE:
10580 # Lock instances optimistically, needs verification once node and group
10581 # locks have been acquired
10582 self.needed_locks[locking.LEVEL_INSTANCE] = \
10583 set(i.name for i in self._DetermineInstances())
10585 elif level == locking.LEVEL_NODEGROUP:
10586 # Lock node groups for all potential target nodes optimistically, needs
10587 # verification once nodes have been acquired
10588 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10589 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10591 elif level == locking.LEVEL_NODE:
10592 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10594 def CheckPrereq(self):
10596 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10597 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10598 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10600 need_nodes = self._DetermineNodes()
10602 if not owned_nodes.issuperset(need_nodes):
10603 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10604 " locks were acquired, current nodes are"
10605 " are '%s', used to be '%s'; retry the"
10607 (self.op.node_name,
10608 utils.CommaJoin(need_nodes),
10609 utils.CommaJoin(owned_nodes)),
10610 errors.ECODE_STATE)
10612 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10613 if owned_groups != wanted_groups:
10614 raise errors.OpExecError("Node groups changed since locks were acquired,"
10615 " current groups are '%s', used to be '%s';"
10616 " retry the operation" %
10617 (utils.CommaJoin(wanted_groups),
10618 utils.CommaJoin(owned_groups)))
10620 # Determine affected instances
10621 self.instances = self._DetermineInstances()
10622 self.instance_names = [i.name for i in self.instances]
10624 if set(self.instance_names) != owned_instances:
10625 raise errors.OpExecError("Instances on node '%s' changed since locks"
10626 " were acquired, current instances are '%s',"
10627 " used to be '%s'; retry the operation" %
10628 (self.op.node_name,
10629 utils.CommaJoin(self.instance_names),
10630 utils.CommaJoin(owned_instances)))
10632 if self.instance_names:
10633 self.LogInfo("Evacuating instances from node '%s': %s",
10635 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10637 self.LogInfo("No instances to evacuate from node '%s'",
10640 if self.op.remote_node is not None:
10641 for i in self.instances:
10642 if i.primary_node == self.op.remote_node:
10643 raise errors.OpPrereqError("Node %s is the primary node of"
10644 " instance %s, cannot use it as"
10646 (self.op.remote_node, i.name),
10647 errors.ECODE_INVAL)
10649 def Exec(self, feedback_fn):
10650 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10652 if not self.instance_names:
10653 # No instances to evacuate
10656 elif self.op.iallocator is not None:
10657 # TODO: Implement relocation to other group
10658 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10659 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10660 instances=list(self.instance_names))
10662 ial.Run(self.op.iallocator)
10664 if not ial.success:
10665 raise errors.OpPrereqError("Can't compute node evacuation using"
10666 " iallocator '%s': %s" %
10667 (self.op.iallocator, ial.info),
10668 errors.ECODE_NORES)
10670 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10672 elif self.op.remote_node is not None:
10673 assert self.op.mode == constants.NODE_EVAC_SEC
10675 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10676 remote_node=self.op.remote_node,
10678 mode=constants.REPLACE_DISK_CHG,
10679 early_release=self.op.early_release)]
10680 for instance_name in self.instance_names
10684 raise errors.ProgrammerError("No iallocator or remote node")
10686 return ResultWithJobs(jobs)
10689 def _SetOpEarlyRelease(early_release, op):
10690 """Sets C{early_release} flag on opcodes if available.
10694 op.early_release = early_release
10695 except AttributeError:
10696 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10701 def _NodeEvacDest(use_nodes, group, nodes):
10702 """Returns group or nodes depending on caller's choice.
10706 return utils.CommaJoin(nodes)
10711 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10712 """Unpacks the result of change-group and node-evacuate iallocator requests.
10714 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10715 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10717 @type lu: L{LogicalUnit}
10718 @param lu: Logical unit instance
10719 @type alloc_result: tuple/list
10720 @param alloc_result: Result from iallocator
10721 @type early_release: bool
10722 @param early_release: Whether to release locks early if possible
10723 @type use_nodes: bool
10724 @param use_nodes: Whether to display node names instead of groups
10727 (moved, failed, jobs) = alloc_result
10730 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10731 for (name, reason) in failed)
10732 lu.LogWarning("Unable to evacuate instances %s", failreason)
10733 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10736 lu.LogInfo("Instances to be moved: %s",
10737 utils.CommaJoin("%s (to %s)" %
10738 (name, _NodeEvacDest(use_nodes, group, nodes))
10739 for (name, group, nodes) in moved))
10741 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10742 map(opcodes.OpCode.LoadOpCode, ops))
10746 class LUInstanceGrowDisk(LogicalUnit):
10747 """Grow a disk of an instance.
10750 HPATH = "disk-grow"
10751 HTYPE = constants.HTYPE_INSTANCE
10754 def ExpandNames(self):
10755 self._ExpandAndLockInstance()
10756 self.needed_locks[locking.LEVEL_NODE] = []
10757 self.needed_locks[locking.LEVEL_NODE_RES] = []
10758 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10760 def DeclareLocks(self, level):
10761 if level == locking.LEVEL_NODE:
10762 self._LockInstancesNodes()
10763 elif level == locking.LEVEL_NODE_RES:
10765 self.needed_locks[locking.LEVEL_NODE_RES] = \
10766 self.needed_locks[locking.LEVEL_NODE][:]
10768 def BuildHooksEnv(self):
10769 """Build hooks env.
10771 This runs on the master, the primary and all the secondaries.
10775 "DISK": self.op.disk,
10776 "AMOUNT": self.op.amount,
10778 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10781 def BuildHooksNodes(self):
10782 """Build hooks nodes.
10785 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10788 def CheckPrereq(self):
10789 """Check prerequisites.
10791 This checks that the instance is in the cluster.
10794 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10795 assert instance is not None, \
10796 "Cannot retrieve locked instance %s" % self.op.instance_name
10797 nodenames = list(instance.all_nodes)
10798 for node in nodenames:
10799 _CheckNodeOnline(self, node)
10801 self.instance = instance
10803 if instance.disk_template not in constants.DTS_GROWABLE:
10804 raise errors.OpPrereqError("Instance's disk layout does not support"
10805 " growing", errors.ECODE_INVAL)
10807 self.disk = instance.FindDisk(self.op.disk)
10809 if instance.disk_template not in (constants.DT_FILE,
10810 constants.DT_SHARED_FILE):
10811 # TODO: check the free disk space for file, when that feature will be
10813 _CheckNodesFreeDiskPerVG(self, nodenames,
10814 self.disk.ComputeGrowth(self.op.amount))
10816 def Exec(self, feedback_fn):
10817 """Execute disk grow.
10820 instance = self.instance
10823 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10824 assert (self.owned_locks(locking.LEVEL_NODE) ==
10825 self.owned_locks(locking.LEVEL_NODE_RES))
10827 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10829 raise errors.OpExecError("Cannot activate block device to grow")
10831 feedback_fn("Growing disk %s of instance '%s' by %s" %
10832 (self.op.disk, instance.name,
10833 utils.FormatUnit(self.op.amount, "h")))
10835 # First run all grow ops in dry-run mode
10836 for node in instance.all_nodes:
10837 self.cfg.SetDiskID(disk, node)
10838 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10839 result.Raise("Grow request failed to node %s" % node)
10841 # We know that (as far as we can test) operations across different
10842 # nodes will succeed, time to run it for real
10843 for node in instance.all_nodes:
10844 self.cfg.SetDiskID(disk, node)
10845 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10846 result.Raise("Grow request failed to node %s" % node)
10848 # TODO: Rewrite code to work properly
10849 # DRBD goes into sync mode for a short amount of time after executing the
10850 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10851 # calling "resize" in sync mode fails. Sleeping for a short amount of
10852 # time is a work-around.
10855 disk.RecordGrow(self.op.amount)
10856 self.cfg.Update(instance, feedback_fn)
10858 # Changes have been recorded, release node lock
10859 _ReleaseLocks(self, locking.LEVEL_NODE)
10861 # Downgrade lock while waiting for sync
10862 self.glm.downgrade(locking.LEVEL_INSTANCE)
10864 if self.op.wait_for_sync:
10865 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10867 self.proc.LogWarning("Disk sync-ing has not returned a good"
10868 " status; please check the instance")
10869 if instance.admin_state != constants.ADMINST_UP:
10870 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10871 elif instance.admin_state != constants.ADMINST_UP:
10872 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10873 " not supposed to be running because no wait for"
10874 " sync mode was requested")
10876 assert self.owned_locks(locking.LEVEL_NODE_RES)
10877 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10880 class LUInstanceQueryData(NoHooksLU):
10881 """Query runtime instance data.
10886 def ExpandNames(self):
10887 self.needed_locks = {}
10889 # Use locking if requested or when non-static information is wanted
10890 if not (self.op.static or self.op.use_locking):
10891 self.LogWarning("Non-static data requested, locks need to be acquired")
10892 self.op.use_locking = True
10894 if self.op.instances or not self.op.use_locking:
10895 # Expand instance names right here
10896 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10898 # Will use acquired locks
10899 self.wanted_names = None
10901 if self.op.use_locking:
10902 self.share_locks = _ShareAll()
10904 if self.wanted_names is None:
10905 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10907 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10909 self.needed_locks[locking.LEVEL_NODE] = []
10910 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10912 def DeclareLocks(self, level):
10913 if self.op.use_locking and level == locking.LEVEL_NODE:
10914 self._LockInstancesNodes()
10916 def CheckPrereq(self):
10917 """Check prerequisites.
10919 This only checks the optional instance list against the existing names.
10922 if self.wanted_names is None:
10923 assert self.op.use_locking, "Locking was not used"
10924 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10926 self.wanted_instances = \
10927 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10929 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10930 """Returns the status of a block device
10933 if self.op.static or not node:
10936 self.cfg.SetDiskID(dev, node)
10938 result = self.rpc.call_blockdev_find(node, dev)
10942 result.Raise("Can't compute disk status for %s" % instance_name)
10944 status = result.payload
10948 return (status.dev_path, status.major, status.minor,
10949 status.sync_percent, status.estimated_time,
10950 status.is_degraded, status.ldisk_status)
10952 def _ComputeDiskStatus(self, instance, snode, dev):
10953 """Compute block device status.
10956 if dev.dev_type in constants.LDS_DRBD:
10957 # we change the snode then (otherwise we use the one passed in)
10958 if dev.logical_id[0] == instance.primary_node:
10959 snode = dev.logical_id[1]
10961 snode = dev.logical_id[0]
10963 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10964 instance.name, dev)
10965 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10968 dev_children = map(compat.partial(self._ComputeDiskStatus,
10975 "iv_name": dev.iv_name,
10976 "dev_type": dev.dev_type,
10977 "logical_id": dev.logical_id,
10978 "physical_id": dev.physical_id,
10979 "pstatus": dev_pstatus,
10980 "sstatus": dev_sstatus,
10981 "children": dev_children,
10986 def Exec(self, feedback_fn):
10987 """Gather and return data"""
10990 cluster = self.cfg.GetClusterInfo()
10992 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10993 for i in self.wanted_instances)
10994 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10995 if self.op.static or pnode.offline:
10996 remote_state = None
10998 self.LogWarning("Primary node %s is marked offline, returning static"
10999 " information only for instance %s" %
11000 (pnode.name, instance.name))
11002 remote_info = self.rpc.call_instance_info(instance.primary_node,
11004 instance.hypervisor)
11005 remote_info.Raise("Error checking node %s" % instance.primary_node)
11006 remote_info = remote_info.payload
11007 if remote_info and "state" in remote_info:
11008 remote_state = "up"
11010 if instance.admin_state == constants.ADMINST_UP:
11011 remote_state = "down"
11013 remote_state = instance.admin_state
11015 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11018 result[instance.name] = {
11019 "name": instance.name,
11020 "config_state": instance.admin_state,
11021 "run_state": remote_state,
11022 "pnode": instance.primary_node,
11023 "snodes": instance.secondary_nodes,
11025 # this happens to be the same format used for hooks
11026 "nics": _NICListToTuple(self, instance.nics),
11027 "disk_template": instance.disk_template,
11029 "hypervisor": instance.hypervisor,
11030 "network_port": instance.network_port,
11031 "hv_instance": instance.hvparams,
11032 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11033 "be_instance": instance.beparams,
11034 "be_actual": cluster.FillBE(instance),
11035 "os_instance": instance.osparams,
11036 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11037 "serial_no": instance.serial_no,
11038 "mtime": instance.mtime,
11039 "ctime": instance.ctime,
11040 "uuid": instance.uuid,
11046 class LUInstanceSetParams(LogicalUnit):
11047 """Modifies an instances's parameters.
11050 HPATH = "instance-modify"
11051 HTYPE = constants.HTYPE_INSTANCE
11054 def CheckArguments(self):
11055 if not (self.op.nics or self.op.disks or self.op.disk_template or
11056 self.op.hvparams or self.op.beparams or self.op.os_name or
11057 self.op.online_inst or self.op.offline_inst):
11058 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11060 if self.op.hvparams:
11061 _CheckGlobalHvParams(self.op.hvparams)
11065 for disk_op, disk_dict in self.op.disks:
11066 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11067 if disk_op == constants.DDM_REMOVE:
11068 disk_addremove += 1
11070 elif disk_op == constants.DDM_ADD:
11071 disk_addremove += 1
11073 if not isinstance(disk_op, int):
11074 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11075 if not isinstance(disk_dict, dict):
11076 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11077 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11079 if disk_op == constants.DDM_ADD:
11080 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11081 if mode not in constants.DISK_ACCESS_SET:
11082 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11083 errors.ECODE_INVAL)
11084 size = disk_dict.get(constants.IDISK_SIZE, None)
11086 raise errors.OpPrereqError("Required disk parameter size missing",
11087 errors.ECODE_INVAL)
11090 except (TypeError, ValueError), err:
11091 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11092 str(err), errors.ECODE_INVAL)
11093 disk_dict[constants.IDISK_SIZE] = size
11095 # modification of disk
11096 if constants.IDISK_SIZE in disk_dict:
11097 raise errors.OpPrereqError("Disk size change not possible, use"
11098 " grow-disk", errors.ECODE_INVAL)
11100 if disk_addremove > 1:
11101 raise errors.OpPrereqError("Only one disk add or remove operation"
11102 " supported at a time", errors.ECODE_INVAL)
11104 if self.op.disks and self.op.disk_template is not None:
11105 raise errors.OpPrereqError("Disk template conversion and other disk"
11106 " changes not supported at the same time",
11107 errors.ECODE_INVAL)
11109 if (self.op.disk_template and
11110 self.op.disk_template in constants.DTS_INT_MIRROR and
11111 self.op.remote_node is None):
11112 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11113 " one requires specifying a secondary node",
11114 errors.ECODE_INVAL)
11118 for nic_op, nic_dict in self.op.nics:
11119 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11120 if nic_op == constants.DDM_REMOVE:
11123 elif nic_op == constants.DDM_ADD:
11126 if not isinstance(nic_op, int):
11127 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11128 if not isinstance(nic_dict, dict):
11129 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11130 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11132 # nic_dict should be a dict
11133 nic_ip = nic_dict.get(constants.INIC_IP, None)
11134 if nic_ip is not None:
11135 if nic_ip.lower() == constants.VALUE_NONE:
11136 nic_dict[constants.INIC_IP] = None
11138 if not netutils.IPAddress.IsValid(nic_ip):
11139 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11140 errors.ECODE_INVAL)
11142 nic_bridge = nic_dict.get("bridge", None)
11143 nic_link = nic_dict.get(constants.INIC_LINK, None)
11144 if nic_bridge and nic_link:
11145 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11146 " at the same time", errors.ECODE_INVAL)
11147 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11148 nic_dict["bridge"] = None
11149 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11150 nic_dict[constants.INIC_LINK] = None
11152 if nic_op == constants.DDM_ADD:
11153 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11154 if nic_mac is None:
11155 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11157 if constants.INIC_MAC in nic_dict:
11158 nic_mac = nic_dict[constants.INIC_MAC]
11159 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11160 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11162 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11163 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11164 " modifying an existing nic",
11165 errors.ECODE_INVAL)
11167 if nic_addremove > 1:
11168 raise errors.OpPrereqError("Only one NIC add or remove operation"
11169 " supported at a time", errors.ECODE_INVAL)
11171 def ExpandNames(self):
11172 self._ExpandAndLockInstance()
11173 # Can't even acquire node locks in shared mode as upcoming changes in
11174 # Ganeti 2.6 will start to modify the node object on disk conversion
11175 self.needed_locks[locking.LEVEL_NODE] = []
11176 self.needed_locks[locking.LEVEL_NODE_RES] = []
11177 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11179 def DeclareLocks(self, level):
11180 if level == locking.LEVEL_NODE:
11181 self._LockInstancesNodes()
11182 if self.op.disk_template and self.op.remote_node:
11183 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11184 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11185 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11187 self.needed_locks[locking.LEVEL_NODE_RES] = \
11188 self.needed_locks[locking.LEVEL_NODE][:]
11190 def BuildHooksEnv(self):
11191 """Build hooks env.
11193 This runs on the master, primary and secondaries.
11197 if constants.BE_MINMEM in self.be_new:
11198 args["minmem"] = self.be_new[constants.BE_MINMEM]
11199 if constants.BE_MAXMEM in self.be_new:
11200 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11201 if constants.BE_VCPUS in self.be_new:
11202 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11203 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11204 # information at all.
11207 nic_override = dict(self.op.nics)
11208 for idx, nic in enumerate(self.instance.nics):
11209 if idx in nic_override:
11210 this_nic_override = nic_override[idx]
11212 this_nic_override = {}
11213 if constants.INIC_IP in this_nic_override:
11214 ip = this_nic_override[constants.INIC_IP]
11217 if constants.INIC_MAC in this_nic_override:
11218 mac = this_nic_override[constants.INIC_MAC]
11221 if idx in self.nic_pnew:
11222 nicparams = self.nic_pnew[idx]
11224 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11225 mode = nicparams[constants.NIC_MODE]
11226 link = nicparams[constants.NIC_LINK]
11227 args["nics"].append((ip, mac, mode, link))
11228 if constants.DDM_ADD in nic_override:
11229 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11230 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11231 nicparams = self.nic_pnew[constants.DDM_ADD]
11232 mode = nicparams[constants.NIC_MODE]
11233 link = nicparams[constants.NIC_LINK]
11234 args["nics"].append((ip, mac, mode, link))
11235 elif constants.DDM_REMOVE in nic_override:
11236 del args["nics"][-1]
11238 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11239 if self.op.disk_template:
11240 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11244 def BuildHooksNodes(self):
11245 """Build hooks nodes.
11248 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11251 def CheckPrereq(self):
11252 """Check prerequisites.
11254 This only checks the instance list against the existing names.
11257 # checking the new params on the primary/secondary nodes
11259 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11260 cluster = self.cluster = self.cfg.GetClusterInfo()
11261 assert self.instance is not None, \
11262 "Cannot retrieve locked instance %s" % self.op.instance_name
11263 pnode = instance.primary_node
11264 nodelist = list(instance.all_nodes)
11267 if self.op.os_name and not self.op.force:
11268 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11269 self.op.force_variant)
11270 instance_os = self.op.os_name
11272 instance_os = instance.os
11274 if self.op.disk_template:
11275 if instance.disk_template == self.op.disk_template:
11276 raise errors.OpPrereqError("Instance already has disk template %s" %
11277 instance.disk_template, errors.ECODE_INVAL)
11279 if (instance.disk_template,
11280 self.op.disk_template) not in self._DISK_CONVERSIONS:
11281 raise errors.OpPrereqError("Unsupported disk template conversion from"
11282 " %s to %s" % (instance.disk_template,
11283 self.op.disk_template),
11284 errors.ECODE_INVAL)
11285 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11286 msg="cannot change disk template")
11287 if self.op.disk_template in constants.DTS_INT_MIRROR:
11288 if self.op.remote_node == pnode:
11289 raise errors.OpPrereqError("Given new secondary node %s is the same"
11290 " as the primary node of the instance" %
11291 self.op.remote_node, errors.ECODE_STATE)
11292 _CheckNodeOnline(self, self.op.remote_node)
11293 _CheckNodeNotDrained(self, self.op.remote_node)
11294 # FIXME: here we assume that the old instance type is DT_PLAIN
11295 assert instance.disk_template == constants.DT_PLAIN
11296 disks = [{constants.IDISK_SIZE: d.size,
11297 constants.IDISK_VG: d.logical_id[0]}
11298 for d in instance.disks]
11299 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11300 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11302 # hvparams processing
11303 if self.op.hvparams:
11304 hv_type = instance.hypervisor
11305 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11306 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11307 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11310 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11311 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11312 self.hv_proposed = self.hv_new = hv_new # the new actual values
11313 self.hv_inst = i_hvdict # the new dict (without defaults)
11315 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11317 self.hv_new = self.hv_inst = {}
11319 # beparams processing
11320 if self.op.beparams:
11321 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11323 objects.UpgradeBeParams(i_bedict)
11324 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11325 be_new = cluster.SimpleFillBE(i_bedict)
11326 self.be_proposed = self.be_new = be_new # the new actual values
11327 self.be_inst = i_bedict # the new dict (without defaults)
11329 self.be_new = self.be_inst = {}
11330 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11331 be_old = cluster.FillBE(instance)
11333 # CPU param validation -- checking every time a paramtere is
11334 # changed to cover all cases where either CPU mask or vcpus have
11336 if (constants.BE_VCPUS in self.be_proposed and
11337 constants.HV_CPU_MASK in self.hv_proposed):
11339 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11340 # Verify mask is consistent with number of vCPUs. Can skip this
11341 # test if only 1 entry in the CPU mask, which means same mask
11342 # is applied to all vCPUs.
11343 if (len(cpu_list) > 1 and
11344 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11345 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11347 (self.be_proposed[constants.BE_VCPUS],
11348 self.hv_proposed[constants.HV_CPU_MASK]),
11349 errors.ECODE_INVAL)
11351 # Only perform this test if a new CPU mask is given
11352 if constants.HV_CPU_MASK in self.hv_new:
11353 # Calculate the largest CPU number requested
11354 max_requested_cpu = max(map(max, cpu_list))
11355 # Check that all of the instance's nodes have enough physical CPUs to
11356 # satisfy the requested CPU mask
11357 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11358 max_requested_cpu + 1, instance.hypervisor)
11360 # osparams processing
11361 if self.op.osparams:
11362 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11363 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11364 self.os_inst = i_osdict # the new dict (without defaults)
11370 #TODO(dynmem): do the appropriate check involving MINMEM
11371 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11372 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11373 mem_check_list = [pnode]
11374 if be_new[constants.BE_AUTO_BALANCE]:
11375 # either we changed auto_balance to yes or it was from before
11376 mem_check_list.extend(instance.secondary_nodes)
11377 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11378 instance.hypervisor)
11379 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11380 instance.hypervisor)
11381 pninfo = nodeinfo[pnode]
11382 msg = pninfo.fail_msg
11384 # Assume the primary node is unreachable and go ahead
11385 self.warn.append("Can't get info from primary node %s: %s" %
11387 elif not isinstance(pninfo.payload.get("memory_free", None), int):
11388 self.warn.append("Node data from primary node %s doesn't contain"
11389 " free memory information" % pnode)
11390 elif instance_info.fail_msg:
11391 self.warn.append("Can't get instance runtime information: %s" %
11392 instance_info.fail_msg)
11394 if instance_info.payload:
11395 current_mem = int(instance_info.payload["memory"])
11397 # Assume instance not running
11398 # (there is a slight race condition here, but it's not very probable,
11399 # and we have no other way to check)
11401 #TODO(dynmem): do the appropriate check involving MINMEM
11402 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11403 pninfo.payload["memory_free"])
11405 raise errors.OpPrereqError("This change will prevent the instance"
11406 " from starting, due to %d MB of memory"
11407 " missing on its primary node" % miss_mem,
11408 errors.ECODE_NORES)
11410 if be_new[constants.BE_AUTO_BALANCE]:
11411 for node, nres in nodeinfo.items():
11412 if node not in instance.secondary_nodes:
11414 nres.Raise("Can't get info from secondary node %s" % node,
11415 prereq=True, ecode=errors.ECODE_STATE)
11416 if not isinstance(nres.payload.get("memory_free", None), int):
11417 raise errors.OpPrereqError("Secondary node %s didn't return free"
11418 " memory information" % node,
11419 errors.ECODE_STATE)
11420 #TODO(dynmem): do the appropriate check involving MINMEM
11421 elif be_new[constants.BE_MAXMEM] > nres.payload["memory_free"]:
11422 raise errors.OpPrereqError("This change will prevent the instance"
11423 " from failover to its secondary node"
11424 " %s, due to not enough memory" % node,
11425 errors.ECODE_STATE)
11429 self.nic_pinst = {}
11430 for nic_op, nic_dict in self.op.nics:
11431 if nic_op == constants.DDM_REMOVE:
11432 if not instance.nics:
11433 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11434 errors.ECODE_INVAL)
11436 if nic_op != constants.DDM_ADD:
11438 if not instance.nics:
11439 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11440 " no NICs" % nic_op,
11441 errors.ECODE_INVAL)
11442 if nic_op < 0 or nic_op >= len(instance.nics):
11443 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11445 (nic_op, len(instance.nics) - 1),
11446 errors.ECODE_INVAL)
11447 old_nic_params = instance.nics[nic_op].nicparams
11448 old_nic_ip = instance.nics[nic_op].ip
11450 old_nic_params = {}
11453 update_params_dict = dict([(key, nic_dict[key])
11454 for key in constants.NICS_PARAMETERS
11455 if key in nic_dict])
11457 if "bridge" in nic_dict:
11458 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11460 new_nic_params = _GetUpdatedParams(old_nic_params,
11461 update_params_dict)
11462 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11463 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11464 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11465 self.nic_pinst[nic_op] = new_nic_params
11466 self.nic_pnew[nic_op] = new_filled_nic_params
11467 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11469 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11470 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11471 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11473 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11475 self.warn.append(msg)
11477 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11478 if new_nic_mode == constants.NIC_MODE_ROUTED:
11479 if constants.INIC_IP in nic_dict:
11480 nic_ip = nic_dict[constants.INIC_IP]
11482 nic_ip = old_nic_ip
11484 raise errors.OpPrereqError("Cannot set the nic ip to None"
11485 " on a routed nic", errors.ECODE_INVAL)
11486 if constants.INIC_MAC in nic_dict:
11487 nic_mac = nic_dict[constants.INIC_MAC]
11488 if nic_mac is None:
11489 raise errors.OpPrereqError("Cannot set the nic mac to None",
11490 errors.ECODE_INVAL)
11491 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11492 # otherwise generate the mac
11493 nic_dict[constants.INIC_MAC] = \
11494 self.cfg.GenerateMAC(self.proc.GetECId())
11496 # or validate/reserve the current one
11498 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11499 except errors.ReservationError:
11500 raise errors.OpPrereqError("MAC address %s already in use"
11501 " in cluster" % nic_mac,
11502 errors.ECODE_NOTUNIQUE)
11505 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11506 raise errors.OpPrereqError("Disk operations not supported for"
11507 " diskless instances",
11508 errors.ECODE_INVAL)
11509 for disk_op, _ in self.op.disks:
11510 if disk_op == constants.DDM_REMOVE:
11511 if len(instance.disks) == 1:
11512 raise errors.OpPrereqError("Cannot remove the last disk of"
11513 " an instance", errors.ECODE_INVAL)
11514 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11515 msg="cannot remove disks")
11517 if (disk_op == constants.DDM_ADD and
11518 len(instance.disks) >= constants.MAX_DISKS):
11519 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11520 " add more" % constants.MAX_DISKS,
11521 errors.ECODE_STATE)
11522 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11524 if disk_op < 0 or disk_op >= len(instance.disks):
11525 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11527 (disk_op, len(instance.disks)),
11528 errors.ECODE_INVAL)
11530 # disabling the instance
11531 if self.op.offline_inst:
11532 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11533 msg="cannot change instance state to offline")
11535 # enabling the instance
11536 if self.op.online_inst:
11537 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11538 msg="cannot make instance go online")
11540 def _ConvertPlainToDrbd(self, feedback_fn):
11541 """Converts an instance from plain to drbd.
11544 feedback_fn("Converting template to drbd")
11545 instance = self.instance
11546 pnode = instance.primary_node
11547 snode = self.op.remote_node
11549 assert instance.disk_template == constants.DT_PLAIN
11551 # create a fake disk info for _GenerateDiskTemplate
11552 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11553 constants.IDISK_VG: d.logical_id[0]}
11554 for d in instance.disks]
11555 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11556 instance.name, pnode, [snode],
11557 disk_info, None, None, 0, feedback_fn)
11558 info = _GetInstanceInfoText(instance)
11559 feedback_fn("Creating aditional volumes...")
11560 # first, create the missing data and meta devices
11561 for disk in new_disks:
11562 # unfortunately this is... not too nice
11563 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11565 for child in disk.children:
11566 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11567 # at this stage, all new LVs have been created, we can rename the
11569 feedback_fn("Renaming original volumes...")
11570 rename_list = [(o, n.children[0].logical_id)
11571 for (o, n) in zip(instance.disks, new_disks)]
11572 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11573 result.Raise("Failed to rename original LVs")
11575 feedback_fn("Initializing DRBD devices...")
11576 # all child devices are in place, we can now create the DRBD devices
11577 for disk in new_disks:
11578 for node in [pnode, snode]:
11579 f_create = node == pnode
11580 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11582 # at this point, the instance has been modified
11583 instance.disk_template = constants.DT_DRBD8
11584 instance.disks = new_disks
11585 self.cfg.Update(instance, feedback_fn)
11587 # Release node locks while waiting for sync
11588 _ReleaseLocks(self, locking.LEVEL_NODE)
11590 # disks are created, waiting for sync
11591 disk_abort = not _WaitForSync(self, instance,
11592 oneshot=not self.op.wait_for_sync)
11594 raise errors.OpExecError("There are some degraded disks for"
11595 " this instance, please cleanup manually")
11597 # Node resource locks will be released by caller
11599 def _ConvertDrbdToPlain(self, feedback_fn):
11600 """Converts an instance from drbd to plain.
11603 instance = self.instance
11605 assert len(instance.secondary_nodes) == 1
11606 assert instance.disk_template == constants.DT_DRBD8
11608 pnode = instance.primary_node
11609 snode = instance.secondary_nodes[0]
11610 feedback_fn("Converting template to plain")
11612 old_disks = instance.disks
11613 new_disks = [d.children[0] for d in old_disks]
11615 # copy over size and mode
11616 for parent, child in zip(old_disks, new_disks):
11617 child.size = parent.size
11618 child.mode = parent.mode
11620 # update instance structure
11621 instance.disks = new_disks
11622 instance.disk_template = constants.DT_PLAIN
11623 self.cfg.Update(instance, feedback_fn)
11625 # Release locks in case removing disks takes a while
11626 _ReleaseLocks(self, locking.LEVEL_NODE)
11628 feedback_fn("Removing volumes on the secondary node...")
11629 for disk in old_disks:
11630 self.cfg.SetDiskID(disk, snode)
11631 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11633 self.LogWarning("Could not remove block device %s on node %s,"
11634 " continuing anyway: %s", disk.iv_name, snode, msg)
11636 feedback_fn("Removing unneeded volumes on the primary node...")
11637 for idx, disk in enumerate(old_disks):
11638 meta = disk.children[1]
11639 self.cfg.SetDiskID(meta, pnode)
11640 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11642 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11643 " continuing anyway: %s", idx, pnode, msg)
11645 # this is a DRBD disk, return its port to the pool
11646 for disk in old_disks:
11647 tcp_port = disk.logical_id[2]
11648 self.cfg.AddTcpUdpPort(tcp_port)
11650 # Node resource locks will be released by caller
11652 def Exec(self, feedback_fn):
11653 """Modifies an instance.
11655 All parameters take effect only at the next restart of the instance.
11658 # Process here the warnings from CheckPrereq, as we don't have a
11659 # feedback_fn there.
11660 for warn in self.warn:
11661 feedback_fn("WARNING: %s" % warn)
11663 assert ((self.op.disk_template is None) ^
11664 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11665 "Not owning any node resource locks"
11668 instance = self.instance
11670 for disk_op, disk_dict in self.op.disks:
11671 if disk_op == constants.DDM_REMOVE:
11672 # remove the last disk
11673 device = instance.disks.pop()
11674 device_idx = len(instance.disks)
11675 for node, disk in device.ComputeNodeTree(instance.primary_node):
11676 self.cfg.SetDiskID(disk, node)
11677 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11679 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11680 " continuing anyway", device_idx, node, msg)
11681 result.append(("disk/%d" % device_idx, "remove"))
11683 # if this is a DRBD disk, return its port to the pool
11684 if device.dev_type in constants.LDS_DRBD:
11685 tcp_port = device.logical_id[2]
11686 self.cfg.AddTcpUdpPort(tcp_port)
11687 elif disk_op == constants.DDM_ADD:
11689 if instance.disk_template in (constants.DT_FILE,
11690 constants.DT_SHARED_FILE):
11691 file_driver, file_path = instance.disks[0].logical_id
11692 file_path = os.path.dirname(file_path)
11694 file_driver = file_path = None
11695 disk_idx_base = len(instance.disks)
11696 new_disk = _GenerateDiskTemplate(self,
11697 instance.disk_template,
11698 instance.name, instance.primary_node,
11699 instance.secondary_nodes,
11703 disk_idx_base, feedback_fn)[0]
11704 instance.disks.append(new_disk)
11705 info = _GetInstanceInfoText(instance)
11707 logging.info("Creating volume %s for instance %s",
11708 new_disk.iv_name, instance.name)
11709 # Note: this needs to be kept in sync with _CreateDisks
11711 for node in instance.all_nodes:
11712 f_create = node == instance.primary_node
11714 _CreateBlockDev(self, node, instance, new_disk,
11715 f_create, info, f_create)
11716 except errors.OpExecError, err:
11717 self.LogWarning("Failed to create volume %s (%s) on"
11719 new_disk.iv_name, new_disk, node, err)
11720 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11721 (new_disk.size, new_disk.mode)))
11723 # change a given disk
11724 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11725 result.append(("disk.mode/%d" % disk_op,
11726 disk_dict[constants.IDISK_MODE]))
11728 if self.op.disk_template:
11730 check_nodes = set(instance.all_nodes)
11731 if self.op.remote_node:
11732 check_nodes.add(self.op.remote_node)
11733 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11734 owned = self.owned_locks(level)
11735 assert not (check_nodes - owned), \
11736 ("Not owning the correct locks, owning %r, expected at least %r" %
11737 (owned, check_nodes))
11739 r_shut = _ShutdownInstanceDisks(self, instance)
11741 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11742 " proceed with disk template conversion")
11743 mode = (instance.disk_template, self.op.disk_template)
11745 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11747 self.cfg.ReleaseDRBDMinors(instance.name)
11749 result.append(("disk_template", self.op.disk_template))
11751 assert instance.disk_template == self.op.disk_template, \
11752 ("Expected disk template '%s', found '%s'" %
11753 (self.op.disk_template, instance.disk_template))
11755 # Release node and resource locks if there are any (they might already have
11756 # been released during disk conversion)
11757 _ReleaseLocks(self, locking.LEVEL_NODE)
11758 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11761 for nic_op, nic_dict in self.op.nics:
11762 if nic_op == constants.DDM_REMOVE:
11763 # remove the last nic
11764 del instance.nics[-1]
11765 result.append(("nic.%d" % len(instance.nics), "remove"))
11766 elif nic_op == constants.DDM_ADD:
11767 # mac and bridge should be set, by now
11768 mac = nic_dict[constants.INIC_MAC]
11769 ip = nic_dict.get(constants.INIC_IP, None)
11770 nicparams = self.nic_pinst[constants.DDM_ADD]
11771 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11772 instance.nics.append(new_nic)
11773 result.append(("nic.%d" % (len(instance.nics) - 1),
11774 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11775 (new_nic.mac, new_nic.ip,
11776 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11777 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11780 for key in (constants.INIC_MAC, constants.INIC_IP):
11781 if key in nic_dict:
11782 setattr(instance.nics[nic_op], key, nic_dict[key])
11783 if nic_op in self.nic_pinst:
11784 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11785 for key, val in nic_dict.iteritems():
11786 result.append(("nic.%s/%d" % (key, nic_op), val))
11789 if self.op.hvparams:
11790 instance.hvparams = self.hv_inst
11791 for key, val in self.op.hvparams.iteritems():
11792 result.append(("hv/%s" % key, val))
11795 if self.op.beparams:
11796 instance.beparams = self.be_inst
11797 for key, val in self.op.beparams.iteritems():
11798 result.append(("be/%s" % key, val))
11801 if self.op.os_name:
11802 instance.os = self.op.os_name
11805 if self.op.osparams:
11806 instance.osparams = self.os_inst
11807 for key, val in self.op.osparams.iteritems():
11808 result.append(("os/%s" % key, val))
11810 # online/offline instance
11811 if self.op.online_inst:
11812 self.cfg.MarkInstanceDown(instance.name)
11813 result.append(("admin_state", constants.ADMINST_DOWN))
11814 if self.op.offline_inst:
11815 self.cfg.MarkInstanceOffline(instance.name)
11816 result.append(("admin_state", constants.ADMINST_OFFLINE))
11818 self.cfg.Update(instance, feedback_fn)
11820 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11821 self.owned_locks(locking.LEVEL_NODE)), \
11822 "All node locks should have been released by now"
11826 _DISK_CONVERSIONS = {
11827 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11828 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11832 class LUInstanceChangeGroup(LogicalUnit):
11833 HPATH = "instance-change-group"
11834 HTYPE = constants.HTYPE_INSTANCE
11837 def ExpandNames(self):
11838 self.share_locks = _ShareAll()
11839 self.needed_locks = {
11840 locking.LEVEL_NODEGROUP: [],
11841 locking.LEVEL_NODE: [],
11844 self._ExpandAndLockInstance()
11846 if self.op.target_groups:
11847 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11848 self.op.target_groups)
11850 self.req_target_uuids = None
11852 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11854 def DeclareLocks(self, level):
11855 if level == locking.LEVEL_NODEGROUP:
11856 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11858 if self.req_target_uuids:
11859 lock_groups = set(self.req_target_uuids)
11861 # Lock all groups used by instance optimistically; this requires going
11862 # via the node before it's locked, requiring verification later on
11863 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11864 lock_groups.update(instance_groups)
11866 # No target groups, need to lock all of them
11867 lock_groups = locking.ALL_SET
11869 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11871 elif level == locking.LEVEL_NODE:
11872 if self.req_target_uuids:
11873 # Lock all nodes used by instances
11874 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11875 self._LockInstancesNodes()
11877 # Lock all nodes in all potential target groups
11878 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11879 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11880 member_nodes = [node_name
11881 for group in lock_groups
11882 for node_name in self.cfg.GetNodeGroup(group).members]
11883 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11885 # Lock all nodes as all groups are potential targets
11886 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11888 def CheckPrereq(self):
11889 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11890 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11891 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11893 assert (self.req_target_uuids is None or
11894 owned_groups.issuperset(self.req_target_uuids))
11895 assert owned_instances == set([self.op.instance_name])
11897 # Get instance information
11898 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11900 # Check if node groups for locked instance are still correct
11901 assert owned_nodes.issuperset(self.instance.all_nodes), \
11902 ("Instance %s's nodes changed while we kept the lock" %
11903 self.op.instance_name)
11905 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11908 if self.req_target_uuids:
11909 # User requested specific target groups
11910 self.target_uuids = self.req_target_uuids
11912 # All groups except those used by the instance are potential targets
11913 self.target_uuids = owned_groups - inst_groups
11915 conflicting_groups = self.target_uuids & inst_groups
11916 if conflicting_groups:
11917 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11918 " used by the instance '%s'" %
11919 (utils.CommaJoin(conflicting_groups),
11920 self.op.instance_name),
11921 errors.ECODE_INVAL)
11923 if not self.target_uuids:
11924 raise errors.OpPrereqError("There are no possible target groups",
11925 errors.ECODE_INVAL)
11927 def BuildHooksEnv(self):
11928 """Build hooks env.
11931 assert self.target_uuids
11934 "TARGET_GROUPS": " ".join(self.target_uuids),
11937 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11941 def BuildHooksNodes(self):
11942 """Build hooks nodes.
11945 mn = self.cfg.GetMasterNode()
11946 return ([mn], [mn])
11948 def Exec(self, feedback_fn):
11949 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11951 assert instances == [self.op.instance_name], "Instance not locked"
11953 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11954 instances=instances, target_groups=list(self.target_uuids))
11956 ial.Run(self.op.iallocator)
11958 if not ial.success:
11959 raise errors.OpPrereqError("Can't compute solution for changing group of"
11960 " instance '%s' using iallocator '%s': %s" %
11961 (self.op.instance_name, self.op.iallocator,
11963 errors.ECODE_NORES)
11965 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11967 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11968 " instance '%s'", len(jobs), self.op.instance_name)
11970 return ResultWithJobs(jobs)
11973 class LUBackupQuery(NoHooksLU):
11974 """Query the exports list
11979 def ExpandNames(self):
11980 self.needed_locks = {}
11981 self.share_locks[locking.LEVEL_NODE] = 1
11982 if not self.op.nodes:
11983 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11985 self.needed_locks[locking.LEVEL_NODE] = \
11986 _GetWantedNodes(self, self.op.nodes)
11988 def Exec(self, feedback_fn):
11989 """Compute the list of all the exported system images.
11992 @return: a dictionary with the structure node->(export-list)
11993 where export-list is a list of the instances exported on
11997 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11998 rpcresult = self.rpc.call_export_list(self.nodes)
12000 for node in rpcresult:
12001 if rpcresult[node].fail_msg:
12002 result[node] = False
12004 result[node] = rpcresult[node].payload
12009 class LUBackupPrepare(NoHooksLU):
12010 """Prepares an instance for an export and returns useful information.
12015 def ExpandNames(self):
12016 self._ExpandAndLockInstance()
12018 def CheckPrereq(self):
12019 """Check prerequisites.
12022 instance_name = self.op.instance_name
12024 self.instance = self.cfg.GetInstanceInfo(instance_name)
12025 assert self.instance is not None, \
12026 "Cannot retrieve locked instance %s" % self.op.instance_name
12027 _CheckNodeOnline(self, self.instance.primary_node)
12029 self._cds = _GetClusterDomainSecret()
12031 def Exec(self, feedback_fn):
12032 """Prepares an instance for an export.
12035 instance = self.instance
12037 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12038 salt = utils.GenerateSecret(8)
12040 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12041 result = self.rpc.call_x509_cert_create(instance.primary_node,
12042 constants.RIE_CERT_VALIDITY)
12043 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12045 (name, cert_pem) = result.payload
12047 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12051 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12052 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12054 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12060 class LUBackupExport(LogicalUnit):
12061 """Export an instance to an image in the cluster.
12064 HPATH = "instance-export"
12065 HTYPE = constants.HTYPE_INSTANCE
12068 def CheckArguments(self):
12069 """Check the arguments.
12072 self.x509_key_name = self.op.x509_key_name
12073 self.dest_x509_ca_pem = self.op.destination_x509_ca
12075 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12076 if not self.x509_key_name:
12077 raise errors.OpPrereqError("Missing X509 key name for encryption",
12078 errors.ECODE_INVAL)
12080 if not self.dest_x509_ca_pem:
12081 raise errors.OpPrereqError("Missing destination X509 CA",
12082 errors.ECODE_INVAL)
12084 def ExpandNames(self):
12085 self._ExpandAndLockInstance()
12087 # Lock all nodes for local exports
12088 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12089 # FIXME: lock only instance primary and destination node
12091 # Sad but true, for now we have do lock all nodes, as we don't know where
12092 # the previous export might be, and in this LU we search for it and
12093 # remove it from its current node. In the future we could fix this by:
12094 # - making a tasklet to search (share-lock all), then create the
12095 # new one, then one to remove, after
12096 # - removing the removal operation altogether
12097 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12099 def DeclareLocks(self, level):
12100 """Last minute lock declaration."""
12101 # All nodes are locked anyway, so nothing to do here.
12103 def BuildHooksEnv(self):
12104 """Build hooks env.
12106 This will run on the master, primary node and target node.
12110 "EXPORT_MODE": self.op.mode,
12111 "EXPORT_NODE": self.op.target_node,
12112 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12113 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12114 # TODO: Generic function for boolean env variables
12115 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12118 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12122 def BuildHooksNodes(self):
12123 """Build hooks nodes.
12126 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12128 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12129 nl.append(self.op.target_node)
12133 def CheckPrereq(self):
12134 """Check prerequisites.
12136 This checks that the instance and node names are valid.
12139 instance_name = self.op.instance_name
12141 self.instance = self.cfg.GetInstanceInfo(instance_name)
12142 assert self.instance is not None, \
12143 "Cannot retrieve locked instance %s" % self.op.instance_name
12144 _CheckNodeOnline(self, self.instance.primary_node)
12146 if (self.op.remove_instance and
12147 self.instance.admin_state == constants.ADMINST_UP and
12148 not self.op.shutdown):
12149 raise errors.OpPrereqError("Can not remove instance without shutting it"
12152 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12153 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12154 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12155 assert self.dst_node is not None
12157 _CheckNodeOnline(self, self.dst_node.name)
12158 _CheckNodeNotDrained(self, self.dst_node.name)
12161 self.dest_disk_info = None
12162 self.dest_x509_ca = None
12164 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12165 self.dst_node = None
12167 if len(self.op.target_node) != len(self.instance.disks):
12168 raise errors.OpPrereqError(("Received destination information for %s"
12169 " disks, but instance %s has %s disks") %
12170 (len(self.op.target_node), instance_name,
12171 len(self.instance.disks)),
12172 errors.ECODE_INVAL)
12174 cds = _GetClusterDomainSecret()
12176 # Check X509 key name
12178 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12179 except (TypeError, ValueError), err:
12180 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12182 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12183 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12184 errors.ECODE_INVAL)
12186 # Load and verify CA
12188 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12189 except OpenSSL.crypto.Error, err:
12190 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12191 (err, ), errors.ECODE_INVAL)
12193 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12194 if errcode is not None:
12195 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12196 (msg, ), errors.ECODE_INVAL)
12198 self.dest_x509_ca = cert
12200 # Verify target information
12202 for idx, disk_data in enumerate(self.op.target_node):
12204 (host, port, magic) = \
12205 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12206 except errors.GenericError, err:
12207 raise errors.OpPrereqError("Target info for disk %s: %s" %
12208 (idx, err), errors.ECODE_INVAL)
12210 disk_info.append((host, port, magic))
12212 assert len(disk_info) == len(self.op.target_node)
12213 self.dest_disk_info = disk_info
12216 raise errors.ProgrammerError("Unhandled export mode %r" %
12219 # instance disk type verification
12220 # TODO: Implement export support for file-based disks
12221 for disk in self.instance.disks:
12222 if disk.dev_type == constants.LD_FILE:
12223 raise errors.OpPrereqError("Export not supported for instances with"
12224 " file-based disks", errors.ECODE_INVAL)
12226 def _CleanupExports(self, feedback_fn):
12227 """Removes exports of current instance from all other nodes.
12229 If an instance in a cluster with nodes A..D was exported to node C, its
12230 exports will be removed from the nodes A, B and D.
12233 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12235 nodelist = self.cfg.GetNodeList()
12236 nodelist.remove(self.dst_node.name)
12238 # on one-node clusters nodelist will be empty after the removal
12239 # if we proceed the backup would be removed because OpBackupQuery
12240 # substitutes an empty list with the full cluster node list.
12241 iname = self.instance.name
12243 feedback_fn("Removing old exports for instance %s" % iname)
12244 exportlist = self.rpc.call_export_list(nodelist)
12245 for node in exportlist:
12246 if exportlist[node].fail_msg:
12248 if iname in exportlist[node].payload:
12249 msg = self.rpc.call_export_remove(node, iname).fail_msg
12251 self.LogWarning("Could not remove older export for instance %s"
12252 " on node %s: %s", iname, node, msg)
12254 def Exec(self, feedback_fn):
12255 """Export an instance to an image in the cluster.
12258 assert self.op.mode in constants.EXPORT_MODES
12260 instance = self.instance
12261 src_node = instance.primary_node
12263 if self.op.shutdown:
12264 # shutdown the instance, but not the disks
12265 feedback_fn("Shutting down instance %s" % instance.name)
12266 result = self.rpc.call_instance_shutdown(src_node, instance,
12267 self.op.shutdown_timeout)
12268 # TODO: Maybe ignore failures if ignore_remove_failures is set
12269 result.Raise("Could not shutdown instance %s on"
12270 " node %s" % (instance.name, src_node))
12272 # set the disks ID correctly since call_instance_start needs the
12273 # correct drbd minor to create the symlinks
12274 for disk in instance.disks:
12275 self.cfg.SetDiskID(disk, src_node)
12277 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12280 # Activate the instance disks if we'exporting a stopped instance
12281 feedback_fn("Activating disks for %s" % instance.name)
12282 _StartInstanceDisks(self, instance, None)
12285 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12288 helper.CreateSnapshots()
12290 if (self.op.shutdown and
12291 instance.admin_state == constants.ADMINST_UP and
12292 not self.op.remove_instance):
12293 assert not activate_disks
12294 feedback_fn("Starting instance %s" % instance.name)
12295 result = self.rpc.call_instance_start(src_node,
12296 (instance, None, None), False)
12297 msg = result.fail_msg
12299 feedback_fn("Failed to start instance: %s" % msg)
12300 _ShutdownInstanceDisks(self, instance)
12301 raise errors.OpExecError("Could not start instance: %s" % msg)
12303 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12304 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12305 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12306 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12307 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12309 (key_name, _, _) = self.x509_key_name
12312 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12315 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12316 key_name, dest_ca_pem,
12321 # Check for backwards compatibility
12322 assert len(dresults) == len(instance.disks)
12323 assert compat.all(isinstance(i, bool) for i in dresults), \
12324 "Not all results are boolean: %r" % dresults
12328 feedback_fn("Deactivating disks for %s" % instance.name)
12329 _ShutdownInstanceDisks(self, instance)
12331 if not (compat.all(dresults) and fin_resu):
12334 failures.append("export finalization")
12335 if not compat.all(dresults):
12336 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12338 failures.append("disk export: disk(s) %s" % fdsk)
12340 raise errors.OpExecError("Export failed, errors in %s" %
12341 utils.CommaJoin(failures))
12343 # At this point, the export was successful, we can cleanup/finish
12345 # Remove instance if requested
12346 if self.op.remove_instance:
12347 feedback_fn("Removing instance %s" % instance.name)
12348 _RemoveInstance(self, feedback_fn, instance,
12349 self.op.ignore_remove_failures)
12351 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12352 self._CleanupExports(feedback_fn)
12354 return fin_resu, dresults
12357 class LUBackupRemove(NoHooksLU):
12358 """Remove exports related to the named instance.
12363 def ExpandNames(self):
12364 self.needed_locks = {}
12365 # We need all nodes to be locked in order for RemoveExport to work, but we
12366 # don't need to lock the instance itself, as nothing will happen to it (and
12367 # we can remove exports also for a removed instance)
12368 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12370 def Exec(self, feedback_fn):
12371 """Remove any export.
12374 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12375 # If the instance was not found we'll try with the name that was passed in.
12376 # This will only work if it was an FQDN, though.
12378 if not instance_name:
12380 instance_name = self.op.instance_name
12382 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12383 exportlist = self.rpc.call_export_list(locked_nodes)
12385 for node in exportlist:
12386 msg = exportlist[node].fail_msg
12388 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12390 if instance_name in exportlist[node].payload:
12392 result = self.rpc.call_export_remove(node, instance_name)
12393 msg = result.fail_msg
12395 logging.error("Could not remove export for instance %s"
12396 " on node %s: %s", instance_name, node, msg)
12398 if fqdn_warn and not found:
12399 feedback_fn("Export not found. If trying to remove an export belonging"
12400 " to a deleted instance please use its Fully Qualified"
12404 class LUGroupAdd(LogicalUnit):
12405 """Logical unit for creating node groups.
12408 HPATH = "group-add"
12409 HTYPE = constants.HTYPE_GROUP
12412 def ExpandNames(self):
12413 # We need the new group's UUID here so that we can create and acquire the
12414 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12415 # that it should not check whether the UUID exists in the configuration.
12416 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12417 self.needed_locks = {}
12418 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12420 def CheckPrereq(self):
12421 """Check prerequisites.
12423 This checks that the given group name is not an existing node group
12428 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12429 except errors.OpPrereqError:
12432 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12433 " node group (UUID: %s)" %
12434 (self.op.group_name, existing_uuid),
12435 errors.ECODE_EXISTS)
12437 if self.op.ndparams:
12438 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12440 def BuildHooksEnv(self):
12441 """Build hooks env.
12445 "GROUP_NAME": self.op.group_name,
12448 def BuildHooksNodes(self):
12449 """Build hooks nodes.
12452 mn = self.cfg.GetMasterNode()
12453 return ([mn], [mn])
12455 def Exec(self, feedback_fn):
12456 """Add the node group to the cluster.
12459 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12460 uuid=self.group_uuid,
12461 alloc_policy=self.op.alloc_policy,
12462 ndparams=self.op.ndparams)
12464 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12465 del self.remove_locks[locking.LEVEL_NODEGROUP]
12468 class LUGroupAssignNodes(NoHooksLU):
12469 """Logical unit for assigning nodes to groups.
12474 def ExpandNames(self):
12475 # These raise errors.OpPrereqError on their own:
12476 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12477 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12479 # We want to lock all the affected nodes and groups. We have readily
12480 # available the list of nodes, and the *destination* group. To gather the
12481 # list of "source" groups, we need to fetch node information later on.
12482 self.needed_locks = {
12483 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12484 locking.LEVEL_NODE: self.op.nodes,
12487 def DeclareLocks(self, level):
12488 if level == locking.LEVEL_NODEGROUP:
12489 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12491 # Try to get all affected nodes' groups without having the group or node
12492 # lock yet. Needs verification later in the code flow.
12493 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12495 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12497 def CheckPrereq(self):
12498 """Check prerequisites.
12501 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12502 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12503 frozenset(self.op.nodes))
12505 expected_locks = (set([self.group_uuid]) |
12506 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12507 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12508 if actual_locks != expected_locks:
12509 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12510 " current groups are '%s', used to be '%s'" %
12511 (utils.CommaJoin(expected_locks),
12512 utils.CommaJoin(actual_locks)))
12514 self.node_data = self.cfg.GetAllNodesInfo()
12515 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12516 instance_data = self.cfg.GetAllInstancesInfo()
12518 if self.group is None:
12519 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12520 (self.op.group_name, self.group_uuid))
12522 (new_splits, previous_splits) = \
12523 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12524 for node in self.op.nodes],
12525 self.node_data, instance_data)
12528 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12530 if not self.op.force:
12531 raise errors.OpExecError("The following instances get split by this"
12532 " change and --force was not given: %s" %
12535 self.LogWarning("This operation will split the following instances: %s",
12538 if previous_splits:
12539 self.LogWarning("In addition, these already-split instances continue"
12540 " to be split across groups: %s",
12541 utils.CommaJoin(utils.NiceSort(previous_splits)))
12543 def Exec(self, feedback_fn):
12544 """Assign nodes to a new group.
12547 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12549 self.cfg.AssignGroupNodes(mods)
12552 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12553 """Check for split instances after a node assignment.
12555 This method considers a series of node assignments as an atomic operation,
12556 and returns information about split instances after applying the set of
12559 In particular, it returns information about newly split instances, and
12560 instances that were already split, and remain so after the change.
12562 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12565 @type changes: list of (node_name, new_group_uuid) pairs.
12566 @param changes: list of node assignments to consider.
12567 @param node_data: a dict with data for all nodes
12568 @param instance_data: a dict with all instances to consider
12569 @rtype: a two-tuple
12570 @return: a list of instances that were previously okay and result split as a
12571 consequence of this change, and a list of instances that were previously
12572 split and this change does not fix.
12575 changed_nodes = dict((node, group) for node, group in changes
12576 if node_data[node].group != group)
12578 all_split_instances = set()
12579 previously_split_instances = set()
12581 def InstanceNodes(instance):
12582 return [instance.primary_node] + list(instance.secondary_nodes)
12584 for inst in instance_data.values():
12585 if inst.disk_template not in constants.DTS_INT_MIRROR:
12588 instance_nodes = InstanceNodes(inst)
12590 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12591 previously_split_instances.add(inst.name)
12593 if len(set(changed_nodes.get(node, node_data[node].group)
12594 for node in instance_nodes)) > 1:
12595 all_split_instances.add(inst.name)
12597 return (list(all_split_instances - previously_split_instances),
12598 list(previously_split_instances & all_split_instances))
12601 class _GroupQuery(_QueryBase):
12602 FIELDS = query.GROUP_FIELDS
12604 def ExpandNames(self, lu):
12605 lu.needed_locks = {}
12607 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12608 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12611 self.wanted = [name_to_uuid[name]
12612 for name in utils.NiceSort(name_to_uuid.keys())]
12614 # Accept names to be either names or UUIDs.
12617 all_uuid = frozenset(self._all_groups.keys())
12619 for name in self.names:
12620 if name in all_uuid:
12621 self.wanted.append(name)
12622 elif name in name_to_uuid:
12623 self.wanted.append(name_to_uuid[name])
12625 missing.append(name)
12628 raise errors.OpPrereqError("Some groups do not exist: %s" %
12629 utils.CommaJoin(missing),
12630 errors.ECODE_NOENT)
12632 def DeclareLocks(self, lu, level):
12635 def _GetQueryData(self, lu):
12636 """Computes the list of node groups and their attributes.
12639 do_nodes = query.GQ_NODE in self.requested_data
12640 do_instances = query.GQ_INST in self.requested_data
12642 group_to_nodes = None
12643 group_to_instances = None
12645 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12646 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12647 # latter GetAllInstancesInfo() is not enough, for we have to go through
12648 # instance->node. Hence, we will need to process nodes even if we only need
12649 # instance information.
12650 if do_nodes or do_instances:
12651 all_nodes = lu.cfg.GetAllNodesInfo()
12652 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12655 for node in all_nodes.values():
12656 if node.group in group_to_nodes:
12657 group_to_nodes[node.group].append(node.name)
12658 node_to_group[node.name] = node.group
12661 all_instances = lu.cfg.GetAllInstancesInfo()
12662 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12664 for instance in all_instances.values():
12665 node = instance.primary_node
12666 if node in node_to_group:
12667 group_to_instances[node_to_group[node]].append(instance.name)
12670 # Do not pass on node information if it was not requested.
12671 group_to_nodes = None
12673 return query.GroupQueryData([self._all_groups[uuid]
12674 for uuid in self.wanted],
12675 group_to_nodes, group_to_instances)
12678 class LUGroupQuery(NoHooksLU):
12679 """Logical unit for querying node groups.
12684 def CheckArguments(self):
12685 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12686 self.op.output_fields, False)
12688 def ExpandNames(self):
12689 self.gq.ExpandNames(self)
12691 def DeclareLocks(self, level):
12692 self.gq.DeclareLocks(self, level)
12694 def Exec(self, feedback_fn):
12695 return self.gq.OldStyleQuery(self)
12698 class LUGroupSetParams(LogicalUnit):
12699 """Modifies the parameters of a node group.
12702 HPATH = "group-modify"
12703 HTYPE = constants.HTYPE_GROUP
12706 def CheckArguments(self):
12709 self.op.alloc_policy,
12712 if all_changes.count(None) == len(all_changes):
12713 raise errors.OpPrereqError("Please pass at least one modification",
12714 errors.ECODE_INVAL)
12716 def ExpandNames(self):
12717 # This raises errors.OpPrereqError on its own:
12718 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12720 self.needed_locks = {
12721 locking.LEVEL_NODEGROUP: [self.group_uuid],
12724 def CheckPrereq(self):
12725 """Check prerequisites.
12728 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12730 if self.group is None:
12731 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12732 (self.op.group_name, self.group_uuid))
12734 if self.op.ndparams:
12735 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12736 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12737 self.new_ndparams = new_ndparams
12739 def BuildHooksEnv(self):
12740 """Build hooks env.
12744 "GROUP_NAME": self.op.group_name,
12745 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12748 def BuildHooksNodes(self):
12749 """Build hooks nodes.
12752 mn = self.cfg.GetMasterNode()
12753 return ([mn], [mn])
12755 def Exec(self, feedback_fn):
12756 """Modifies the node group.
12761 if self.op.ndparams:
12762 self.group.ndparams = self.new_ndparams
12763 result.append(("ndparams", str(self.group.ndparams)))
12765 if self.op.alloc_policy:
12766 self.group.alloc_policy = self.op.alloc_policy
12768 self.cfg.Update(self.group, feedback_fn)
12772 class LUGroupRemove(LogicalUnit):
12773 HPATH = "group-remove"
12774 HTYPE = constants.HTYPE_GROUP
12777 def ExpandNames(self):
12778 # This will raises errors.OpPrereqError on its own:
12779 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12780 self.needed_locks = {
12781 locking.LEVEL_NODEGROUP: [self.group_uuid],
12784 def CheckPrereq(self):
12785 """Check prerequisites.
12787 This checks that the given group name exists as a node group, that is
12788 empty (i.e., contains no nodes), and that is not the last group of the
12792 # Verify that the group is empty.
12793 group_nodes = [node.name
12794 for node in self.cfg.GetAllNodesInfo().values()
12795 if node.group == self.group_uuid]
12798 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12800 (self.op.group_name,
12801 utils.CommaJoin(utils.NiceSort(group_nodes))),
12802 errors.ECODE_STATE)
12804 # Verify the cluster would not be left group-less.
12805 if len(self.cfg.GetNodeGroupList()) == 1:
12806 raise errors.OpPrereqError("Group '%s' is the only group,"
12807 " cannot be removed" %
12808 self.op.group_name,
12809 errors.ECODE_STATE)
12811 def BuildHooksEnv(self):
12812 """Build hooks env.
12816 "GROUP_NAME": self.op.group_name,
12819 def BuildHooksNodes(self):
12820 """Build hooks nodes.
12823 mn = self.cfg.GetMasterNode()
12824 return ([mn], [mn])
12826 def Exec(self, feedback_fn):
12827 """Remove the node group.
12831 self.cfg.RemoveNodeGroup(self.group_uuid)
12832 except errors.ConfigurationError:
12833 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12834 (self.op.group_name, self.group_uuid))
12836 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12839 class LUGroupRename(LogicalUnit):
12840 HPATH = "group-rename"
12841 HTYPE = constants.HTYPE_GROUP
12844 def ExpandNames(self):
12845 # This raises errors.OpPrereqError on its own:
12846 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12848 self.needed_locks = {
12849 locking.LEVEL_NODEGROUP: [self.group_uuid],
12852 def CheckPrereq(self):
12853 """Check prerequisites.
12855 Ensures requested new name is not yet used.
12859 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12860 except errors.OpPrereqError:
12863 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12864 " node group (UUID: %s)" %
12865 (self.op.new_name, new_name_uuid),
12866 errors.ECODE_EXISTS)
12868 def BuildHooksEnv(self):
12869 """Build hooks env.
12873 "OLD_NAME": self.op.group_name,
12874 "NEW_NAME": self.op.new_name,
12877 def BuildHooksNodes(self):
12878 """Build hooks nodes.
12881 mn = self.cfg.GetMasterNode()
12883 all_nodes = self.cfg.GetAllNodesInfo()
12884 all_nodes.pop(mn, None)
12887 run_nodes.extend(node.name for node in all_nodes.values()
12888 if node.group == self.group_uuid)
12890 return (run_nodes, run_nodes)
12892 def Exec(self, feedback_fn):
12893 """Rename the node group.
12896 group = self.cfg.GetNodeGroup(self.group_uuid)
12899 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12900 (self.op.group_name, self.group_uuid))
12902 group.name = self.op.new_name
12903 self.cfg.Update(group, feedback_fn)
12905 return self.op.new_name
12908 class LUGroupEvacuate(LogicalUnit):
12909 HPATH = "group-evacuate"
12910 HTYPE = constants.HTYPE_GROUP
12913 def ExpandNames(self):
12914 # This raises errors.OpPrereqError on its own:
12915 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12917 if self.op.target_groups:
12918 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12919 self.op.target_groups)
12921 self.req_target_uuids = []
12923 if self.group_uuid in self.req_target_uuids:
12924 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12925 " as a target group (targets are %s)" %
12927 utils.CommaJoin(self.req_target_uuids)),
12928 errors.ECODE_INVAL)
12930 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12932 self.share_locks = _ShareAll()
12933 self.needed_locks = {
12934 locking.LEVEL_INSTANCE: [],
12935 locking.LEVEL_NODEGROUP: [],
12936 locking.LEVEL_NODE: [],
12939 def DeclareLocks(self, level):
12940 if level == locking.LEVEL_INSTANCE:
12941 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12943 # Lock instances optimistically, needs verification once node and group
12944 # locks have been acquired
12945 self.needed_locks[locking.LEVEL_INSTANCE] = \
12946 self.cfg.GetNodeGroupInstances(self.group_uuid)
12948 elif level == locking.LEVEL_NODEGROUP:
12949 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12951 if self.req_target_uuids:
12952 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12954 # Lock all groups used by instances optimistically; this requires going
12955 # via the node before it's locked, requiring verification later on
12956 lock_groups.update(group_uuid
12957 for instance_name in
12958 self.owned_locks(locking.LEVEL_INSTANCE)
12960 self.cfg.GetInstanceNodeGroups(instance_name))
12962 # No target groups, need to lock all of them
12963 lock_groups = locking.ALL_SET
12965 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12967 elif level == locking.LEVEL_NODE:
12968 # This will only lock the nodes in the group to be evacuated which
12969 # contain actual instances
12970 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12971 self._LockInstancesNodes()
12973 # Lock all nodes in group to be evacuated and target groups
12974 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12975 assert self.group_uuid in owned_groups
12976 member_nodes = [node_name
12977 for group in owned_groups
12978 for node_name in self.cfg.GetNodeGroup(group).members]
12979 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12981 def CheckPrereq(self):
12982 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12983 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12984 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12986 assert owned_groups.issuperset(self.req_target_uuids)
12987 assert self.group_uuid in owned_groups
12989 # Check if locked instances are still correct
12990 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12992 # Get instance information
12993 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12995 # Check if node groups for locked instances are still correct
12996 for instance_name in owned_instances:
12997 inst = self.instances[instance_name]
12998 assert owned_nodes.issuperset(inst.all_nodes), \
12999 "Instance %s's nodes changed while we kept the lock" % instance_name
13001 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13004 assert self.group_uuid in inst_groups, \
13005 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13007 if self.req_target_uuids:
13008 # User requested specific target groups
13009 self.target_uuids = self.req_target_uuids
13011 # All groups except the one to be evacuated are potential targets
13012 self.target_uuids = [group_uuid for group_uuid in owned_groups
13013 if group_uuid != self.group_uuid]
13015 if not self.target_uuids:
13016 raise errors.OpPrereqError("There are no possible target groups",
13017 errors.ECODE_INVAL)
13019 def BuildHooksEnv(self):
13020 """Build hooks env.
13024 "GROUP_NAME": self.op.group_name,
13025 "TARGET_GROUPS": " ".join(self.target_uuids),
13028 def BuildHooksNodes(self):
13029 """Build hooks nodes.
13032 mn = self.cfg.GetMasterNode()
13034 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13036 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13038 return (run_nodes, run_nodes)
13040 def Exec(self, feedback_fn):
13041 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13043 assert self.group_uuid not in self.target_uuids
13045 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13046 instances=instances, target_groups=self.target_uuids)
13048 ial.Run(self.op.iallocator)
13050 if not ial.success:
13051 raise errors.OpPrereqError("Can't compute group evacuation using"
13052 " iallocator '%s': %s" %
13053 (self.op.iallocator, ial.info),
13054 errors.ECODE_NORES)
13056 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13058 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13059 len(jobs), self.op.group_name)
13061 return ResultWithJobs(jobs)
13064 class TagsLU(NoHooksLU): # pylint: disable=W0223
13065 """Generic tags LU.
13067 This is an abstract class which is the parent of all the other tags LUs.
13070 def ExpandNames(self):
13071 self.group_uuid = None
13072 self.needed_locks = {}
13073 if self.op.kind == constants.TAG_NODE:
13074 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13075 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13076 elif self.op.kind == constants.TAG_INSTANCE:
13077 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13078 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13079 elif self.op.kind == constants.TAG_NODEGROUP:
13080 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13082 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13083 # not possible to acquire the BGL based on opcode parameters)
13085 def CheckPrereq(self):
13086 """Check prerequisites.
13089 if self.op.kind == constants.TAG_CLUSTER:
13090 self.target = self.cfg.GetClusterInfo()
13091 elif self.op.kind == constants.TAG_NODE:
13092 self.target = self.cfg.GetNodeInfo(self.op.name)
13093 elif self.op.kind == constants.TAG_INSTANCE:
13094 self.target = self.cfg.GetInstanceInfo(self.op.name)
13095 elif self.op.kind == constants.TAG_NODEGROUP:
13096 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13098 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13099 str(self.op.kind), errors.ECODE_INVAL)
13102 class LUTagsGet(TagsLU):
13103 """Returns the tags of a given object.
13108 def ExpandNames(self):
13109 TagsLU.ExpandNames(self)
13111 # Share locks as this is only a read operation
13112 self.share_locks = _ShareAll()
13114 def Exec(self, feedback_fn):
13115 """Returns the tag list.
13118 return list(self.target.GetTags())
13121 class LUTagsSearch(NoHooksLU):
13122 """Searches the tags for a given pattern.
13127 def ExpandNames(self):
13128 self.needed_locks = {}
13130 def CheckPrereq(self):
13131 """Check prerequisites.
13133 This checks the pattern passed for validity by compiling it.
13137 self.re = re.compile(self.op.pattern)
13138 except re.error, err:
13139 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13140 (self.op.pattern, err), errors.ECODE_INVAL)
13142 def Exec(self, feedback_fn):
13143 """Returns the tag list.
13147 tgts = [("/cluster", cfg.GetClusterInfo())]
13148 ilist = cfg.GetAllInstancesInfo().values()
13149 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13150 nlist = cfg.GetAllNodesInfo().values()
13151 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13152 tgts.extend(("/nodegroup/%s" % n.name, n)
13153 for n in cfg.GetAllNodeGroupsInfo().values())
13155 for path, target in tgts:
13156 for tag in target.GetTags():
13157 if self.re.search(tag):
13158 results.append((path, tag))
13162 class LUTagsSet(TagsLU):
13163 """Sets a tag on a given object.
13168 def CheckPrereq(self):
13169 """Check prerequisites.
13171 This checks the type and length of the tag name and value.
13174 TagsLU.CheckPrereq(self)
13175 for tag in self.op.tags:
13176 objects.TaggableObject.ValidateTag(tag)
13178 def Exec(self, feedback_fn):
13183 for tag in self.op.tags:
13184 self.target.AddTag(tag)
13185 except errors.TagError, err:
13186 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13187 self.cfg.Update(self.target, feedback_fn)
13190 class LUTagsDel(TagsLU):
13191 """Delete a list of tags from a given object.
13196 def CheckPrereq(self):
13197 """Check prerequisites.
13199 This checks that we have the given tag.
13202 TagsLU.CheckPrereq(self)
13203 for tag in self.op.tags:
13204 objects.TaggableObject.ValidateTag(tag)
13205 del_tags = frozenset(self.op.tags)
13206 cur_tags = self.target.GetTags()
13208 diff_tags = del_tags - cur_tags
13210 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13211 raise errors.OpPrereqError("Tag(s) %s not found" %
13212 (utils.CommaJoin(diff_names), ),
13213 errors.ECODE_NOENT)
13215 def Exec(self, feedback_fn):
13216 """Remove the tag from the object.
13219 for tag in self.op.tags:
13220 self.target.RemoveTag(tag)
13221 self.cfg.Update(self.target, feedback_fn)
13224 class LUTestDelay(NoHooksLU):
13225 """Sleep for a specified amount of time.
13227 This LU sleeps on the master and/or nodes for a specified amount of
13233 def ExpandNames(self):
13234 """Expand names and set required locks.
13236 This expands the node list, if any.
13239 self.needed_locks = {}
13240 if self.op.on_nodes:
13241 # _GetWantedNodes can be used here, but is not always appropriate to use
13242 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13243 # more information.
13244 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13245 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13247 def _TestDelay(self):
13248 """Do the actual sleep.
13251 if self.op.on_master:
13252 if not utils.TestDelay(self.op.duration):
13253 raise errors.OpExecError("Error during master delay test")
13254 if self.op.on_nodes:
13255 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13256 for node, node_result in result.items():
13257 node_result.Raise("Failure during rpc call to node %s" % node)
13259 def Exec(self, feedback_fn):
13260 """Execute the test delay opcode, with the wanted repetitions.
13263 if self.op.repeat == 0:
13266 top_value = self.op.repeat - 1
13267 for i in range(self.op.repeat):
13268 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13272 class LUTestJqueue(NoHooksLU):
13273 """Utility LU to test some aspects of the job queue.
13278 # Must be lower than default timeout for WaitForJobChange to see whether it
13279 # notices changed jobs
13280 _CLIENT_CONNECT_TIMEOUT = 20.0
13281 _CLIENT_CONFIRM_TIMEOUT = 60.0
13284 def _NotifyUsingSocket(cls, cb, errcls):
13285 """Opens a Unix socket and waits for another program to connect.
13288 @param cb: Callback to send socket name to client
13289 @type errcls: class
13290 @param errcls: Exception class to use for errors
13293 # Using a temporary directory as there's no easy way to create temporary
13294 # sockets without writing a custom loop around tempfile.mktemp and
13296 tmpdir = tempfile.mkdtemp()
13298 tmpsock = utils.PathJoin(tmpdir, "sock")
13300 logging.debug("Creating temporary socket at %s", tmpsock)
13301 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13306 # Send details to client
13309 # Wait for client to connect before continuing
13310 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13312 (conn, _) = sock.accept()
13313 except socket.error, err:
13314 raise errcls("Client didn't connect in time (%s)" % err)
13318 # Remove as soon as client is connected
13319 shutil.rmtree(tmpdir)
13321 # Wait for client to close
13324 # pylint: disable=E1101
13325 # Instance of '_socketobject' has no ... member
13326 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13328 except socket.error, err:
13329 raise errcls("Client failed to confirm notification (%s)" % err)
13333 def _SendNotification(self, test, arg, sockname):
13334 """Sends a notification to the client.
13337 @param test: Test name
13338 @param arg: Test argument (depends on test)
13339 @type sockname: string
13340 @param sockname: Socket path
13343 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13345 def _Notify(self, prereq, test, arg):
13346 """Notifies the client of a test.
13349 @param prereq: Whether this is a prereq-phase test
13351 @param test: Test name
13352 @param arg: Test argument (depends on test)
13356 errcls = errors.OpPrereqError
13358 errcls = errors.OpExecError
13360 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13364 def CheckArguments(self):
13365 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13366 self.expandnames_calls = 0
13368 def ExpandNames(self):
13369 checkargs_calls = getattr(self, "checkargs_calls", 0)
13370 if checkargs_calls < 1:
13371 raise errors.ProgrammerError("CheckArguments was not called")
13373 self.expandnames_calls += 1
13375 if self.op.notify_waitlock:
13376 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13378 self.LogInfo("Expanding names")
13380 # Get lock on master node (just to get a lock, not for a particular reason)
13381 self.needed_locks = {
13382 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13385 def Exec(self, feedback_fn):
13386 if self.expandnames_calls < 1:
13387 raise errors.ProgrammerError("ExpandNames was not called")
13389 if self.op.notify_exec:
13390 self._Notify(False, constants.JQT_EXEC, None)
13392 self.LogInfo("Executing")
13394 if self.op.log_messages:
13395 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13396 for idx, msg in enumerate(self.op.log_messages):
13397 self.LogInfo("Sending log message %s", idx + 1)
13398 feedback_fn(constants.JQT_MSGPREFIX + msg)
13399 # Report how many test messages have been sent
13400 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13403 raise errors.OpExecError("Opcode failure was requested")
13408 class IAllocator(object):
13409 """IAllocator framework.
13411 An IAllocator instance has three sets of attributes:
13412 - cfg that is needed to query the cluster
13413 - input data (all members of the _KEYS class attribute are required)
13414 - four buffer attributes (in|out_data|text), that represent the
13415 input (to the external script) in text and data structure format,
13416 and the output from it, again in two formats
13417 - the result variables from the script (success, info, nodes) for
13421 # pylint: disable=R0902
13422 # lots of instance attributes
13424 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13426 self.rpc = rpc_runner
13427 # init buffer variables
13428 self.in_text = self.out_text = self.in_data = self.out_data = None
13429 # init all input fields so that pylint is happy
13431 self.memory = self.disks = self.disk_template = None
13432 self.os = self.tags = self.nics = self.vcpus = None
13433 self.hypervisor = None
13434 self.relocate_from = None
13436 self.instances = None
13437 self.evac_mode = None
13438 self.target_groups = []
13440 self.required_nodes = None
13441 # init result fields
13442 self.success = self.info = self.result = None
13445 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13447 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13448 " IAllocator" % self.mode)
13450 keyset = [n for (n, _) in keydata]
13453 if key not in keyset:
13454 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13455 " IAllocator" % key)
13456 setattr(self, key, kwargs[key])
13459 if key not in kwargs:
13460 raise errors.ProgrammerError("Missing input parameter '%s' to"
13461 " IAllocator" % key)
13462 self._BuildInputData(compat.partial(fn, self), keydata)
13464 def _ComputeClusterData(self):
13465 """Compute the generic allocator input data.
13467 This is the data that is independent of the actual operation.
13471 cluster_info = cfg.GetClusterInfo()
13474 "version": constants.IALLOCATOR_VERSION,
13475 "cluster_name": cfg.GetClusterName(),
13476 "cluster_tags": list(cluster_info.GetTags()),
13477 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13478 # we don't have job IDs
13480 ninfo = cfg.GetAllNodesInfo()
13481 iinfo = cfg.GetAllInstancesInfo().values()
13482 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13485 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13487 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13488 hypervisor_name = self.hypervisor
13489 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13490 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13492 hypervisor_name = cluster_info.enabled_hypervisors[0]
13494 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13497 self.rpc.call_all_instances_info(node_list,
13498 cluster_info.enabled_hypervisors)
13500 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13502 config_ndata = self._ComputeBasicNodeData(ninfo)
13503 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13504 i_list, config_ndata)
13505 assert len(data["nodes"]) == len(ninfo), \
13506 "Incomplete node data computed"
13508 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13510 self.in_data = data
13513 def _ComputeNodeGroupData(cfg):
13514 """Compute node groups data.
13517 ng = dict((guuid, {
13518 "name": gdata.name,
13519 "alloc_policy": gdata.alloc_policy,
13521 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13526 def _ComputeBasicNodeData(node_cfg):
13527 """Compute global node data.
13530 @returns: a dict of name: (node dict, node config)
13533 # fill in static (config-based) values
13534 node_results = dict((ninfo.name, {
13535 "tags": list(ninfo.GetTags()),
13536 "primary_ip": ninfo.primary_ip,
13537 "secondary_ip": ninfo.secondary_ip,
13538 "offline": ninfo.offline,
13539 "drained": ninfo.drained,
13540 "master_candidate": ninfo.master_candidate,
13541 "group": ninfo.group,
13542 "master_capable": ninfo.master_capable,
13543 "vm_capable": ninfo.vm_capable,
13545 for ninfo in node_cfg.values())
13547 return node_results
13550 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13552 """Compute global node data.
13554 @param node_results: the basic node structures as filled from the config
13557 #TODO(dynmem): compute the right data on MAX and MIN memory
13558 # make a copy of the current dict
13559 node_results = dict(node_results)
13560 for nname, nresult in node_data.items():
13561 assert nname in node_results, "Missing basic data for node %s" % nname
13562 ninfo = node_cfg[nname]
13564 if not (ninfo.offline or ninfo.drained):
13565 nresult.Raise("Can't get data for node %s" % nname)
13566 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13568 remote_info = nresult.payload
13570 for attr in ["memory_total", "memory_free", "memory_dom0",
13571 "vg_size", "vg_free", "cpu_total"]:
13572 if attr not in remote_info:
13573 raise errors.OpExecError("Node '%s' didn't return attribute"
13574 " '%s'" % (nname, attr))
13575 if not isinstance(remote_info[attr], int):
13576 raise errors.OpExecError("Node '%s' returned invalid value"
13578 (nname, attr, remote_info[attr]))
13579 # compute memory used by primary instances
13580 i_p_mem = i_p_up_mem = 0
13581 for iinfo, beinfo in i_list:
13582 if iinfo.primary_node == nname:
13583 i_p_mem += beinfo[constants.BE_MAXMEM]
13584 if iinfo.name not in node_iinfo[nname].payload:
13587 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13588 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13589 remote_info["memory_free"] -= max(0, i_mem_diff)
13591 if iinfo.admin_state == constants.ADMINST_UP:
13592 i_p_up_mem += beinfo[constants.BE_MAXMEM]
13594 # compute memory used by instances
13596 "total_memory": remote_info["memory_total"],
13597 "reserved_memory": remote_info["memory_dom0"],
13598 "free_memory": remote_info["memory_free"],
13599 "total_disk": remote_info["vg_size"],
13600 "free_disk": remote_info["vg_free"],
13601 "total_cpus": remote_info["cpu_total"],
13602 "i_pri_memory": i_p_mem,
13603 "i_pri_up_memory": i_p_up_mem,
13605 pnr_dyn.update(node_results[nname])
13606 node_results[nname] = pnr_dyn
13608 return node_results
13611 def _ComputeInstanceData(cluster_info, i_list):
13612 """Compute global instance data.
13616 for iinfo, beinfo in i_list:
13618 for nic in iinfo.nics:
13619 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13623 "mode": filled_params[constants.NIC_MODE],
13624 "link": filled_params[constants.NIC_LINK],
13626 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13627 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13628 nic_data.append(nic_dict)
13630 "tags": list(iinfo.GetTags()),
13631 "admin_state": iinfo.admin_state,
13632 "vcpus": beinfo[constants.BE_VCPUS],
13633 "memory": beinfo[constants.BE_MAXMEM],
13635 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13637 "disks": [{constants.IDISK_SIZE: dsk.size,
13638 constants.IDISK_MODE: dsk.mode}
13639 for dsk in iinfo.disks],
13640 "disk_template": iinfo.disk_template,
13641 "hypervisor": iinfo.hypervisor,
13643 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13645 instance_data[iinfo.name] = pir
13647 return instance_data
13649 def _AddNewInstance(self):
13650 """Add new instance data to allocator structure.
13652 This in combination with _AllocatorGetClusterData will create the
13653 correct structure needed as input for the allocator.
13655 The checks for the completeness of the opcode must have already been
13659 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13661 if self.disk_template in constants.DTS_INT_MIRROR:
13662 self.required_nodes = 2
13664 self.required_nodes = 1
13668 "disk_template": self.disk_template,
13671 "vcpus": self.vcpus,
13672 "memory": self.memory,
13673 "disks": self.disks,
13674 "disk_space_total": disk_space,
13676 "required_nodes": self.required_nodes,
13677 "hypervisor": self.hypervisor,
13682 def _AddRelocateInstance(self):
13683 """Add relocate instance data to allocator structure.
13685 This in combination with _IAllocatorGetClusterData will create the
13686 correct structure needed as input for the allocator.
13688 The checks for the completeness of the opcode must have already been
13692 instance = self.cfg.GetInstanceInfo(self.name)
13693 if instance is None:
13694 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13695 " IAllocator" % self.name)
13697 if instance.disk_template not in constants.DTS_MIRRORED:
13698 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13699 errors.ECODE_INVAL)
13701 if instance.disk_template in constants.DTS_INT_MIRROR and \
13702 len(instance.secondary_nodes) != 1:
13703 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13704 errors.ECODE_STATE)
13706 self.required_nodes = 1
13707 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13708 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13712 "disk_space_total": disk_space,
13713 "required_nodes": self.required_nodes,
13714 "relocate_from": self.relocate_from,
13718 def _AddNodeEvacuate(self):
13719 """Get data for node-evacuate requests.
13723 "instances": self.instances,
13724 "evac_mode": self.evac_mode,
13727 def _AddChangeGroup(self):
13728 """Get data for node-evacuate requests.
13732 "instances": self.instances,
13733 "target_groups": self.target_groups,
13736 def _BuildInputData(self, fn, keydata):
13737 """Build input data structures.
13740 self._ComputeClusterData()
13743 request["type"] = self.mode
13744 for keyname, keytype in keydata:
13745 if keyname not in request:
13746 raise errors.ProgrammerError("Request parameter %s is missing" %
13748 val = request[keyname]
13749 if not keytype(val):
13750 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13751 " validation, value %s, expected"
13752 " type %s" % (keyname, val, keytype))
13753 self.in_data["request"] = request
13755 self.in_text = serializer.Dump(self.in_data)
13757 _STRING_LIST = ht.TListOf(ht.TString)
13758 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13759 # pylint: disable=E1101
13760 # Class '...' has no 'OP_ID' member
13761 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13762 opcodes.OpInstanceMigrate.OP_ID,
13763 opcodes.OpInstanceReplaceDisks.OP_ID])
13767 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13768 ht.TItems([ht.TNonEmptyString,
13769 ht.TNonEmptyString,
13770 ht.TListOf(ht.TNonEmptyString),
13773 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13774 ht.TItems([ht.TNonEmptyString,
13777 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13778 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13781 constants.IALLOCATOR_MODE_ALLOC:
13784 ("name", ht.TString),
13785 ("memory", ht.TInt),
13786 ("disks", ht.TListOf(ht.TDict)),
13787 ("disk_template", ht.TString),
13788 ("os", ht.TString),
13789 ("tags", _STRING_LIST),
13790 ("nics", ht.TListOf(ht.TDict)),
13791 ("vcpus", ht.TInt),
13792 ("hypervisor", ht.TString),
13794 constants.IALLOCATOR_MODE_RELOC:
13795 (_AddRelocateInstance,
13796 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13798 constants.IALLOCATOR_MODE_NODE_EVAC:
13799 (_AddNodeEvacuate, [
13800 ("instances", _STRING_LIST),
13801 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13803 constants.IALLOCATOR_MODE_CHG_GROUP:
13804 (_AddChangeGroup, [
13805 ("instances", _STRING_LIST),
13806 ("target_groups", _STRING_LIST),
13810 def Run(self, name, validate=True, call_fn=None):
13811 """Run an instance allocator and return the results.
13814 if call_fn is None:
13815 call_fn = self.rpc.call_iallocator_runner
13817 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13818 result.Raise("Failure while running the iallocator script")
13820 self.out_text = result.payload
13822 self._ValidateResult()
13824 def _ValidateResult(self):
13825 """Process the allocator results.
13827 This will process and if successful save the result in
13828 self.out_data and the other parameters.
13832 rdict = serializer.Load(self.out_text)
13833 except Exception, err:
13834 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13836 if not isinstance(rdict, dict):
13837 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13839 # TODO: remove backwards compatiblity in later versions
13840 if "nodes" in rdict and "result" not in rdict:
13841 rdict["result"] = rdict["nodes"]
13844 for key in "success", "info", "result":
13845 if key not in rdict:
13846 raise errors.OpExecError("Can't parse iallocator results:"
13847 " missing key '%s'" % key)
13848 setattr(self, key, rdict[key])
13850 if not self._result_check(self.result):
13851 raise errors.OpExecError("Iallocator returned invalid result,"
13852 " expected %s, got %s" %
13853 (self._result_check, self.result),
13854 errors.ECODE_INVAL)
13856 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13857 assert self.relocate_from is not None
13858 assert self.required_nodes == 1
13860 node2group = dict((name, ndata["group"])
13861 for (name, ndata) in self.in_data["nodes"].items())
13863 fn = compat.partial(self._NodesToGroups, node2group,
13864 self.in_data["nodegroups"])
13866 instance = self.cfg.GetInstanceInfo(self.name)
13867 request_groups = fn(self.relocate_from + [instance.primary_node])
13868 result_groups = fn(rdict["result"] + [instance.primary_node])
13870 if self.success and not set(result_groups).issubset(request_groups):
13871 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13872 " differ from original groups (%s)" %
13873 (utils.CommaJoin(result_groups),
13874 utils.CommaJoin(request_groups)))
13876 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13877 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13879 self.out_data = rdict
13882 def _NodesToGroups(node2group, groups, nodes):
13883 """Returns a list of unique group names for a list of nodes.
13885 @type node2group: dict
13886 @param node2group: Map from node name to group UUID
13888 @param groups: Group information
13890 @param nodes: Node names
13897 group_uuid = node2group[node]
13899 # Ignore unknown node
13903 group = groups[group_uuid]
13905 # Can't find group, let's use UUID
13906 group_name = group_uuid
13908 group_name = group["name"]
13910 result.add(group_name)
13912 return sorted(result)
13915 class LUTestAllocator(NoHooksLU):
13916 """Run allocator tests.
13918 This LU runs the allocator tests
13921 def CheckPrereq(self):
13922 """Check prerequisites.
13924 This checks the opcode parameters depending on the director and mode test.
13927 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13928 for attr in ["memory", "disks", "disk_template",
13929 "os", "tags", "nics", "vcpus"]:
13930 if not hasattr(self.op, attr):
13931 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13932 attr, errors.ECODE_INVAL)
13933 iname = self.cfg.ExpandInstanceName(self.op.name)
13934 if iname is not None:
13935 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13936 iname, errors.ECODE_EXISTS)
13937 if not isinstance(self.op.nics, list):
13938 raise errors.OpPrereqError("Invalid parameter 'nics'",
13939 errors.ECODE_INVAL)
13940 if not isinstance(self.op.disks, list):
13941 raise errors.OpPrereqError("Invalid parameter 'disks'",
13942 errors.ECODE_INVAL)
13943 for row in self.op.disks:
13944 if (not isinstance(row, dict) or
13945 constants.IDISK_SIZE not in row or
13946 not isinstance(row[constants.IDISK_SIZE], int) or
13947 constants.IDISK_MODE not in row or
13948 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13949 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13950 " parameter", errors.ECODE_INVAL)
13951 if self.op.hypervisor is None:
13952 self.op.hypervisor = self.cfg.GetHypervisorType()
13953 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13954 fname = _ExpandInstanceName(self.cfg, self.op.name)
13955 self.op.name = fname
13956 self.relocate_from = \
13957 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13958 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13959 constants.IALLOCATOR_MODE_NODE_EVAC):
13960 if not self.op.instances:
13961 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13962 self.op.instances = _GetWantedInstances(self, self.op.instances)
13964 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13965 self.op.mode, errors.ECODE_INVAL)
13967 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13968 if self.op.allocator is None:
13969 raise errors.OpPrereqError("Missing allocator name",
13970 errors.ECODE_INVAL)
13971 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13972 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13973 self.op.direction, errors.ECODE_INVAL)
13975 def Exec(self, feedback_fn):
13976 """Run the allocator test.
13979 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13980 ial = IAllocator(self.cfg, self.rpc,
13983 memory=self.op.memory,
13984 disks=self.op.disks,
13985 disk_template=self.op.disk_template,
13989 vcpus=self.op.vcpus,
13990 hypervisor=self.op.hypervisor,
13992 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13993 ial = IAllocator(self.cfg, self.rpc,
13996 relocate_from=list(self.relocate_from),
13998 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13999 ial = IAllocator(self.cfg, self.rpc,
14001 instances=self.op.instances,
14002 target_groups=self.op.target_groups)
14003 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14004 ial = IAllocator(self.cfg, self.rpc,
14006 instances=self.op.instances,
14007 evac_mode=self.op.evac_mode)
14009 raise errors.ProgrammerError("Uncatched mode %s in"
14010 " LUTestAllocator.Exec", self.op.mode)
14012 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14013 result = ial.in_text
14015 ial.Run(self.op.allocator, validate=False)
14016 result = ial.out_text
14020 #: Query type implementations
14022 constants.QR_INSTANCE: _InstanceQuery,
14023 constants.QR_NODE: _NodeQuery,
14024 constants.QR_GROUP: _GroupQuery,
14025 constants.QR_OS: _OsQuery,
14028 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14031 def _GetQueryImplementation(name):
14032 """Returns the implemtnation for a query type.
14034 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14038 return _QUERY_IMPL[name]
14040 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14041 errors.ECODE_INVAL)