4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 """Data container for LU results with jobs.
80 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82 contained in the C{jobs} attribute and include the job IDs in the opcode
86 def __init__(self, jobs, **kwargs):
87 """Initializes this class.
89 Additional return values can be specified as keyword arguments.
91 @type jobs: list of lists of L{opcode.OpCode}
92 @param jobs: A list of lists of opcode objects
99 class LogicalUnit(object):
100 """Logical Unit base class.
102 Subclasses must follow these rules:
103 - implement ExpandNames
104 - implement CheckPrereq (except when tasklets are used)
105 - implement Exec (except when tasklets are used)
106 - implement BuildHooksEnv
107 - implement BuildHooksNodes
108 - redefine HPATH and HTYPE
109 - optionally redefine their run requirements:
110 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112 Note that all commands require root permissions.
114 @ivar dry_run_result: the value (if any) that will be returned to the caller
115 in dry-run mode (signalled by opcode dry_run parameter)
122 def __init__(self, processor, op, context, rpc):
123 """Constructor for LogicalUnit.
125 This needs to be overridden in derived classes in order to check op
129 self.proc = processor
131 self.cfg = context.cfg
132 self.context = context
134 # Dicts used to declare locking needs to mcpu
135 self.needed_locks = None
136 self.acquired_locks = {}
137 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139 self.remove_locks = {}
140 # Used to force good behavior when calling helper functions
141 self.recalculate_locks = {}
143 self.Log = processor.Log # pylint: disable-msg=C0103
144 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
145 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
146 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
147 # support for dry-run
148 self.dry_run_result = None
149 # support for generic debug attribute
150 if (not hasattr(self.op, "debug_level") or
151 not isinstance(self.op.debug_level, int)):
152 self.op.debug_level = 0
157 # Validate opcode parameters and set defaults
158 self.op.Validate(True)
160 self.CheckArguments()
162 def CheckArguments(self):
163 """Check syntactic validity for the opcode arguments.
165 This method is for doing a simple syntactic check and ensure
166 validity of opcode parameters, without any cluster-related
167 checks. While the same can be accomplished in ExpandNames and/or
168 CheckPrereq, doing these separate is better because:
170 - ExpandNames is left as as purely a lock-related function
171 - CheckPrereq is run after we have acquired locks (and possible
174 The function is allowed to change the self.op attribute so that
175 later methods can no longer worry about missing parameters.
180 def ExpandNames(self):
181 """Expand names for this LU.
183 This method is called before starting to execute the opcode, and it should
184 update all the parameters of the opcode to their canonical form (e.g. a
185 short node name must be fully expanded after this method has successfully
186 completed). This way locking, hooks, logging, etc. can work correctly.
188 LUs which implement this method must also populate the self.needed_locks
189 member, as a dict with lock levels as keys, and a list of needed lock names
192 - use an empty dict if you don't need any lock
193 - if you don't need any lock at a particular level omit that level
194 - don't put anything for the BGL level
195 - if you want all locks at a level use locking.ALL_SET as a value
197 If you need to share locks (rather than acquire them exclusively) at one
198 level you can modify self.share_locks, setting a true value (usually 1) for
199 that level. By default locks are not shared.
201 This function can also define a list of tasklets, which then will be
202 executed in order instead of the usual LU-level CheckPrereq and Exec
203 functions, if those are not defined by the LU.
207 # Acquire all nodes and one instance
208 self.needed_locks = {
209 locking.LEVEL_NODE: locking.ALL_SET,
210 locking.LEVEL_INSTANCE: ['instance1.example.com'],
212 # Acquire just two nodes
213 self.needed_locks = {
214 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
217 self.needed_locks = {} # No, you can't leave it to the default value None
220 # The implementation of this method is mandatory only if the new LU is
221 # concurrent, so that old LUs don't need to be changed all at the same
224 self.needed_locks = {} # Exclusive LUs don't need locks.
226 raise NotImplementedError
228 def DeclareLocks(self, level):
229 """Declare LU locking needs for a level
231 While most LUs can just declare their locking needs at ExpandNames time,
232 sometimes there's the need to calculate some locks after having acquired
233 the ones before. This function is called just before acquiring locks at a
234 particular level, but after acquiring the ones at lower levels, and permits
235 such calculations. It can be used to modify self.needed_locks, and by
236 default it does nothing.
238 This function is only called if you have something already set in
239 self.needed_locks for the level.
241 @param level: Locking level which is going to be locked
242 @type level: member of ganeti.locking.LEVELS
246 def CheckPrereq(self):
247 """Check prerequisites for this LU.
249 This method should check that the prerequisites for the execution
250 of this LU are fulfilled. It can do internode communication, but
251 it should be idempotent - no cluster or system changes are
254 The method should raise errors.OpPrereqError in case something is
255 not fulfilled. Its return value is ignored.
257 This method should also update all the parameters of the opcode to
258 their canonical form if it hasn't been done by ExpandNames before.
261 if self.tasklets is not None:
262 for (idx, tl) in enumerate(self.tasklets):
263 logging.debug("Checking prerequisites for tasklet %s/%s",
264 idx + 1, len(self.tasklets))
269 def Exec(self, feedback_fn):
272 This method should implement the actual work. It should raise
273 errors.OpExecError for failures that are somewhat dealt with in
277 if self.tasklets is not None:
278 for (idx, tl) in enumerate(self.tasklets):
279 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
282 raise NotImplementedError
284 def BuildHooksEnv(self):
285 """Build hooks environment for this LU.
288 @return: Dictionary containing the environment that will be used for
289 running the hooks for this LU. The keys of the dict must not be prefixed
290 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
291 will extend the environment with additional variables. If no environment
292 should be defined, an empty dictionary should be returned (not C{None}).
293 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297 raise NotImplementedError
299 def BuildHooksNodes(self):
300 """Build list of nodes to run LU's hooks.
302 @rtype: tuple; (list, list)
303 @return: Tuple containing a list of node names on which the hook
304 should run before the execution and a list of node names on which the
305 hook should run after the execution. No nodes should be returned as an
306 empty list (and not None).
307 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311 raise NotImplementedError
313 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
314 """Notify the LU about the results of its hooks.
316 This method is called every time a hooks phase is executed, and notifies
317 the Logical Unit about the hooks' result. The LU can then use it to alter
318 its result based on the hooks. By default the method does nothing and the
319 previous result is passed back unchanged but any LU can define it if it
320 wants to use the local cluster hook-scripts somehow.
322 @param phase: one of L{constants.HOOKS_PHASE_POST} or
323 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
324 @param hook_results: the results of the multi-node hooks rpc call
325 @param feedback_fn: function used send feedback back to the caller
326 @param lu_result: the previous Exec result this LU had, or None
328 @return: the new Exec result, based on the previous result
332 # API must be kept, thus we ignore the unused argument and could
333 # be a function warnings
334 # pylint: disable-msg=W0613,R0201
337 def _ExpandAndLockInstance(self):
338 """Helper function to expand and lock an instance.
340 Many LUs that work on an instance take its name in self.op.instance_name
341 and need to expand it and then declare the expanded name for locking. This
342 function does it, and then updates self.op.instance_name to the expanded
343 name. It also initializes needed_locks as a dict, if this hasn't been done
347 if self.needed_locks is None:
348 self.needed_locks = {}
350 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
351 "_ExpandAndLockInstance called with instance-level locks set"
352 self.op.instance_name = _ExpandInstanceName(self.cfg,
353 self.op.instance_name)
354 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356 def _LockInstancesNodes(self, primary_only=False):
357 """Helper function to declare instances' nodes for locking.
359 This function should be called after locking one or more instances to lock
360 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
361 with all primary or secondary nodes for instances already locked and
362 present in self.needed_locks[locking.LEVEL_INSTANCE].
364 It should be called from DeclareLocks, and for safety only works if
365 self.recalculate_locks[locking.LEVEL_NODE] is set.
367 In the future it may grow parameters to just lock some instance's nodes, or
368 to just lock primaries or secondary nodes, if needed.
370 If should be called in DeclareLocks in a way similar to::
372 if level == locking.LEVEL_NODE:
373 self._LockInstancesNodes()
375 @type primary_only: boolean
376 @param primary_only: only lock primary nodes of locked instances
379 assert locking.LEVEL_NODE in self.recalculate_locks, \
380 "_LockInstancesNodes helper function called with no nodes to recalculate"
382 # TODO: check if we're really been called with the instance locks held
384 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
385 # future we might want to have different behaviors depending on the value
386 # of self.recalculate_locks[locking.LEVEL_NODE]
388 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
389 instance = self.context.cfg.GetInstanceInfo(instance_name)
390 wanted_nodes.append(instance.primary_node)
392 wanted_nodes.extend(instance.secondary_nodes)
394 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
395 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
396 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
397 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399 del self.recalculate_locks[locking.LEVEL_NODE]
402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
403 """Simple LU which runs no hooks.
405 This LU is intended as a parent for other LogicalUnits which will
406 run no hooks, in order to reduce duplicate code.
412 def BuildHooksEnv(self):
413 """Empty BuildHooksEnv for NoHooksLu.
415 This just raises an error.
418 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420 def BuildHooksNodes(self):
421 """Empty BuildHooksNodes for NoHooksLU.
424 raise AssertionError("BuildHooksNodes called for NoHooksLU")
428 """Tasklet base class.
430 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431 they can mix legacy code with tasklets. Locking needs to be done in the LU,
432 tasklets know nothing about locks.
434 Subclasses must follow these rules:
435 - Implement CheckPrereq
439 def __init__(self, lu):
446 def CheckPrereq(self):
447 """Check prerequisites for this tasklets.
449 This method should check whether the prerequisites for the execution of
450 this tasklet are fulfilled. It can do internode communication, but it
451 should be idempotent - no cluster or system changes are allowed.
453 The method should raise errors.OpPrereqError in case something is not
454 fulfilled. Its return value is ignored.
456 This method should also update all parameters to their canonical form if it
457 hasn't been done before.
462 def Exec(self, feedback_fn):
463 """Execute the tasklet.
465 This method should implement the actual work. It should raise
466 errors.OpExecError for failures that are somewhat dealt with in code, or
470 raise NotImplementedError
474 """Base for query utility classes.
477 #: Attribute holding field definitions
480 def __init__(self, filter_, fields, use_locking):
481 """Initializes this class.
484 self.use_locking = use_locking
486 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488 self.requested_data = self.query.RequestedData()
489 self.names = self.query.RequestedNames()
491 # Sort only if no names were requested
492 self.sort_by_name = not self.names
494 self.do_locking = None
497 def _GetNames(self, lu, all_names, lock_level):
498 """Helper function to determine names asked for in the query.
502 names = lu.acquired_locks[lock_level]
506 if self.wanted == locking.ALL_SET:
507 assert not self.names
508 # caller didn't specify names, so ordering is not important
509 return utils.NiceSort(names)
511 # caller specified names and we must keep the same order
513 assert not self.do_locking or lu.acquired_locks[lock_level]
515 missing = set(self.wanted).difference(names)
517 raise errors.OpExecError("Some items were removed before retrieving"
518 " their data: %s" % missing)
520 # Return expanded names
523 def ExpandNames(self, lu):
524 """Expand names for this query.
526 See L{LogicalUnit.ExpandNames}.
529 raise NotImplementedError()
531 def DeclareLocks(self, lu, level):
532 """Declare locks for this query.
534 See L{LogicalUnit.DeclareLocks}.
537 raise NotImplementedError()
539 def _GetQueryData(self, lu):
540 """Collects all data for this query.
542 @return: Query data object
545 raise NotImplementedError()
547 def NewStyleQuery(self, lu):
548 """Collect data and execute query.
551 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552 sort_by_name=self.sort_by_name)
554 def OldStyleQuery(self, lu):
555 """Collect data and execute query.
558 return self.query.OldStyleQuery(self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
562 def _GetWantedNodes(lu, nodes):
563 """Returns list of checked and expanded node names.
565 @type lu: L{LogicalUnit}
566 @param lu: the logical unit on whose behalf we execute
568 @param nodes: list of node names or None for all nodes
570 @return: the list of nodes, sorted
571 @raise errors.ProgrammerError: if the nodes parameter is wrong type
575 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577 return utils.NiceSort(lu.cfg.GetNodeList())
580 def _GetWantedInstances(lu, instances):
581 """Returns list of checked and expanded instance names.
583 @type lu: L{LogicalUnit}
584 @param lu: the logical unit on whose behalf we execute
585 @type instances: list
586 @param instances: list of instance names or None for all instances
588 @return: the list of instances, sorted
589 @raise errors.OpPrereqError: if the instances parameter is wrong type
590 @raise errors.OpPrereqError: if any of the passed instances is not found
594 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
600 def _GetUpdatedParams(old_params, update_dict,
601 use_default=True, use_none=False):
602 """Return the new version of a parameter dictionary.
604 @type old_params: dict
605 @param old_params: old parameters
606 @type update_dict: dict
607 @param update_dict: dict containing new parameter values, or
608 constants.VALUE_DEFAULT to reset the parameter to its default
610 @param use_default: boolean
611 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
612 values as 'to be deleted' values
613 @param use_none: boolean
614 @type use_none: whether to recognise C{None} values as 'to be
617 @return: the new parameter dictionary
620 params_copy = copy.deepcopy(old_params)
621 for key, val in update_dict.iteritems():
622 if ((use_default and val == constants.VALUE_DEFAULT) or
623 (use_none and val is None)):
629 params_copy[key] = val
633 def _RunPostHook(lu, node_name):
634 """Runs the post-hook for an opcode on a single node.
637 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
639 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
641 # pylint: disable-msg=W0702
642 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
645 def _CheckOutputFields(static, dynamic, selected):
646 """Checks whether all selected fields are valid.
648 @type static: L{utils.FieldSet}
649 @param static: static fields set
650 @type dynamic: L{utils.FieldSet}
651 @param dynamic: dynamic fields set
658 delta = f.NonMatching(selected)
660 raise errors.OpPrereqError("Unknown output fields selected: %s"
661 % ",".join(delta), errors.ECODE_INVAL)
664 def _CheckGlobalHvParams(params):
665 """Validates that given hypervisor params are not global ones.
667 This will ensure that instances don't get customised versions of
671 used_globals = constants.HVC_GLOBALS.intersection(params)
673 msg = ("The following hypervisor parameters are global and cannot"
674 " be customized at instance level, please modify them at"
675 " cluster level: %s" % utils.CommaJoin(used_globals))
676 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
679 def _CheckNodeOnline(lu, node, msg=None):
680 """Ensure that a given node is online.
682 @param lu: the LU on behalf of which we make the check
683 @param node: the node to check
684 @param msg: if passed, should be a message to replace the default one
685 @raise errors.OpPrereqError: if the node is offline
689 msg = "Can't use offline node"
690 if lu.cfg.GetNodeInfo(node).offline:
691 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
694 def _CheckNodeNotDrained(lu, node):
695 """Ensure that a given node is not drained.
697 @param lu: the LU on behalf of which we make the check
698 @param node: the node to check
699 @raise errors.OpPrereqError: if the node is drained
702 if lu.cfg.GetNodeInfo(node).drained:
703 raise errors.OpPrereqError("Can't use drained node %s" % node,
707 def _CheckNodeVmCapable(lu, node):
708 """Ensure that a given node is vm capable.
710 @param lu: the LU on behalf of which we make the check
711 @param node: the node to check
712 @raise errors.OpPrereqError: if the node is not vm capable
715 if not lu.cfg.GetNodeInfo(node).vm_capable:
716 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
720 def _CheckNodeHasOS(lu, node, os_name, force_variant):
721 """Ensure that a node supports a given OS.
723 @param lu: the LU on behalf of which we make the check
724 @param node: the node to check
725 @param os_name: the OS to query about
726 @param force_variant: whether to ignore variant errors
727 @raise errors.OpPrereqError: if the node is not supporting the OS
730 result = lu.rpc.call_os_get(node, os_name)
731 result.Raise("OS '%s' not in supported OS list for node %s" %
733 prereq=True, ecode=errors.ECODE_INVAL)
734 if not force_variant:
735 _CheckOSVariant(result.payload, os_name)
738 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
739 """Ensure that a node has the given secondary ip.
741 @type lu: L{LogicalUnit}
742 @param lu: the LU on behalf of which we make the check
744 @param node: the node to check
745 @type secondary_ip: string
746 @param secondary_ip: the ip to check
747 @type prereq: boolean
748 @param prereq: whether to throw a prerequisite or an execute error
749 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
750 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
753 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
754 result.Raise("Failure checking secondary ip on node %s" % node,
755 prereq=prereq, ecode=errors.ECODE_ENVIRON)
756 if not result.payload:
757 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
758 " please fix and re-run this command" % secondary_ip)
760 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
762 raise errors.OpExecError(msg)
765 def _GetClusterDomainSecret():
766 """Reads the cluster domain secret.
769 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
773 def _CheckInstanceDown(lu, instance, reason):
774 """Ensure that an instance is not running."""
775 if instance.admin_up:
776 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
777 (instance.name, reason), errors.ECODE_STATE)
779 pnode = instance.primary_node
780 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
781 ins_l.Raise("Can't contact node %s for instance information" % pnode,
782 prereq=True, ecode=errors.ECODE_ENVIRON)
784 if instance.name in ins_l.payload:
785 raise errors.OpPrereqError("Instance %s is running, %s" %
786 (instance.name, reason), errors.ECODE_STATE)
789 def _ExpandItemName(fn, name, kind):
790 """Expand an item name.
792 @param fn: the function to use for expansion
793 @param name: requested item name
794 @param kind: text description ('Node' or 'Instance')
795 @return: the resolved (full) name
796 @raise errors.OpPrereqError: if the item is not found
800 if full_name is None:
801 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
806 def _ExpandNodeName(cfg, name):
807 """Wrapper over L{_ExpandItemName} for nodes."""
808 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
811 def _ExpandInstanceName(cfg, name):
812 """Wrapper over L{_ExpandItemName} for instance."""
813 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
816 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
817 memory, vcpus, nics, disk_template, disks,
818 bep, hvp, hypervisor_name):
819 """Builds instance related env variables for hooks
821 This builds the hook environment from individual variables.
824 @param name: the name of the instance
825 @type primary_node: string
826 @param primary_node: the name of the instance's primary node
827 @type secondary_nodes: list
828 @param secondary_nodes: list of secondary nodes as strings
829 @type os_type: string
830 @param os_type: the name of the instance's OS
831 @type status: boolean
832 @param status: the should_run status of the instance
834 @param memory: the memory size of the instance
836 @param vcpus: the count of VCPUs the instance has
838 @param nics: list of tuples (ip, mac, mode, link) representing
839 the NICs the instance has
840 @type disk_template: string
841 @param disk_template: the disk template of the instance
843 @param disks: the list of (size, mode) pairs
845 @param bep: the backend parameters for the instance
847 @param hvp: the hypervisor parameters for the instance
848 @type hypervisor_name: string
849 @param hypervisor_name: the hypervisor for the instance
851 @return: the hook environment for this instance
860 "INSTANCE_NAME": name,
861 "INSTANCE_PRIMARY": primary_node,
862 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
863 "INSTANCE_OS_TYPE": os_type,
864 "INSTANCE_STATUS": str_status,
865 "INSTANCE_MEMORY": memory,
866 "INSTANCE_VCPUS": vcpus,
867 "INSTANCE_DISK_TEMPLATE": disk_template,
868 "INSTANCE_HYPERVISOR": hypervisor_name,
872 nic_count = len(nics)
873 for idx, (ip, mac, mode, link) in enumerate(nics):
876 env["INSTANCE_NIC%d_IP" % idx] = ip
877 env["INSTANCE_NIC%d_MAC" % idx] = mac
878 env["INSTANCE_NIC%d_MODE" % idx] = mode
879 env["INSTANCE_NIC%d_LINK" % idx] = link
880 if mode == constants.NIC_MODE_BRIDGED:
881 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
885 env["INSTANCE_NIC_COUNT"] = nic_count
888 disk_count = len(disks)
889 for idx, (size, mode) in enumerate(disks):
890 env["INSTANCE_DISK%d_SIZE" % idx] = size
891 env["INSTANCE_DISK%d_MODE" % idx] = mode
895 env["INSTANCE_DISK_COUNT"] = disk_count
897 for source, kind in [(bep, "BE"), (hvp, "HV")]:
898 for key, value in source.items():
899 env["INSTANCE_%s_%s" % (kind, key)] = value
904 def _NICListToTuple(lu, nics):
905 """Build a list of nic information tuples.
907 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
908 value in LUInstanceQueryData.
910 @type lu: L{LogicalUnit}
911 @param lu: the logical unit on whose behalf we execute
912 @type nics: list of L{objects.NIC}
913 @param nics: list of nics to convert to hooks tuples
917 cluster = lu.cfg.GetClusterInfo()
921 filled_params = cluster.SimpleFillNIC(nic.nicparams)
922 mode = filled_params[constants.NIC_MODE]
923 link = filled_params[constants.NIC_LINK]
924 hooks_nics.append((ip, mac, mode, link))
928 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
929 """Builds instance related env variables for hooks from an object.
931 @type lu: L{LogicalUnit}
932 @param lu: the logical unit on whose behalf we execute
933 @type instance: L{objects.Instance}
934 @param instance: the instance for which we should build the
937 @param override: dictionary with key/values that will override
940 @return: the hook environment dictionary
943 cluster = lu.cfg.GetClusterInfo()
944 bep = cluster.FillBE(instance)
945 hvp = cluster.FillHV(instance)
947 'name': instance.name,
948 'primary_node': instance.primary_node,
949 'secondary_nodes': instance.secondary_nodes,
950 'os_type': instance.os,
951 'status': instance.admin_up,
952 'memory': bep[constants.BE_MEMORY],
953 'vcpus': bep[constants.BE_VCPUS],
954 'nics': _NICListToTuple(lu, instance.nics),
955 'disk_template': instance.disk_template,
956 'disks': [(disk.size, disk.mode) for disk in instance.disks],
959 'hypervisor_name': instance.hypervisor,
962 args.update(override)
963 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
966 def _AdjustCandidatePool(lu, exceptions):
967 """Adjust the candidate pool after node operations.
970 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
972 lu.LogInfo("Promoted nodes to master candidate role: %s",
973 utils.CommaJoin(node.name for node in mod_list))
974 for name in mod_list:
975 lu.context.ReaddNode(name)
976 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
978 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
982 def _DecideSelfPromotion(lu, exceptions=None):
983 """Decide whether I should promote myself as a master candidate.
986 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
987 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
988 # the new node will increase mc_max with one, so:
989 mc_should = min(mc_should + 1, cp_size)
990 return mc_now < mc_should
993 def _CheckNicsBridgesExist(lu, target_nics, target_node):
994 """Check that the brigdes needed by a list of nics exist.
997 cluster = lu.cfg.GetClusterInfo()
998 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
999 brlist = [params[constants.NIC_LINK] for params in paramslist
1000 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1002 result = lu.rpc.call_bridges_exist(target_node, brlist)
1003 result.Raise("Error checking bridges on destination node '%s'" %
1004 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1007 def _CheckInstanceBridgesExist(lu, instance, node=None):
1008 """Check that the brigdes needed by an instance exist.
1012 node = instance.primary_node
1013 _CheckNicsBridgesExist(lu, instance.nics, node)
1016 def _CheckOSVariant(os_obj, name):
1017 """Check whether an OS name conforms to the os variants specification.
1019 @type os_obj: L{objects.OS}
1020 @param os_obj: OS object to check
1022 @param name: OS name passed by the user, to check for validity
1025 if not os_obj.supported_variants:
1027 variant = objects.OS.GetVariant(name)
1029 raise errors.OpPrereqError("OS name must include a variant",
1032 if variant not in os_obj.supported_variants:
1033 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1036 def _GetNodeInstancesInner(cfg, fn):
1037 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1040 def _GetNodeInstances(cfg, node_name):
1041 """Returns a list of all primary and secondary instances on a node.
1045 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1048 def _GetNodePrimaryInstances(cfg, node_name):
1049 """Returns primary instances on a node.
1052 return _GetNodeInstancesInner(cfg,
1053 lambda inst: node_name == inst.primary_node)
1056 def _GetNodeSecondaryInstances(cfg, node_name):
1057 """Returns secondary instances on a node.
1060 return _GetNodeInstancesInner(cfg,
1061 lambda inst: node_name in inst.secondary_nodes)
1064 def _GetStorageTypeArgs(cfg, storage_type):
1065 """Returns the arguments for a storage type.
1068 # Special case for file storage
1069 if storage_type == constants.ST_FILE:
1070 # storage.FileStorage wants a list of storage directories
1071 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1076 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1079 for dev in instance.disks:
1080 cfg.SetDiskID(dev, node_name)
1082 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1083 result.Raise("Failed to get disk status from node %s" % node_name,
1084 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1086 for idx, bdev_status in enumerate(result.payload):
1087 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1093 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1094 """Check the sanity of iallocator and node arguments and use the
1095 cluster-wide iallocator if appropriate.
1097 Check that at most one of (iallocator, node) is specified. If none is
1098 specified, then the LU's opcode's iallocator slot is filled with the
1099 cluster-wide default iallocator.
1101 @type iallocator_slot: string
1102 @param iallocator_slot: the name of the opcode iallocator slot
1103 @type node_slot: string
1104 @param node_slot: the name of the opcode target node slot
1107 node = getattr(lu.op, node_slot, None)
1108 iallocator = getattr(lu.op, iallocator_slot, None)
1110 if node is not None and iallocator is not None:
1111 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1113 elif node is None and iallocator is None:
1114 default_iallocator = lu.cfg.GetDefaultIAllocator()
1115 if default_iallocator:
1116 setattr(lu.op, iallocator_slot, default_iallocator)
1118 raise errors.OpPrereqError("No iallocator or node given and no"
1119 " cluster-wide default iallocator found."
1120 " Please specify either an iallocator or a"
1121 " node, or set a cluster-wide default"
1125 class LUClusterPostInit(LogicalUnit):
1126 """Logical unit for running hooks after cluster initialization.
1129 HPATH = "cluster-init"
1130 HTYPE = constants.HTYPE_CLUSTER
1132 def BuildHooksEnv(self):
1137 "OP_TARGET": self.cfg.GetClusterName(),
1140 def BuildHooksNodes(self):
1141 """Build hooks nodes.
1144 return ([], [self.cfg.GetMasterNode()])
1146 def Exec(self, feedback_fn):
1153 class LUClusterDestroy(LogicalUnit):
1154 """Logical unit for destroying the cluster.
1157 HPATH = "cluster-destroy"
1158 HTYPE = constants.HTYPE_CLUSTER
1160 def BuildHooksEnv(self):
1165 "OP_TARGET": self.cfg.GetClusterName(),
1168 def BuildHooksNodes(self):
1169 """Build hooks nodes.
1174 def CheckPrereq(self):
1175 """Check prerequisites.
1177 This checks whether the cluster is empty.
1179 Any errors are signaled by raising errors.OpPrereqError.
1182 master = self.cfg.GetMasterNode()
1184 nodelist = self.cfg.GetNodeList()
1185 if len(nodelist) != 1 or nodelist[0] != master:
1186 raise errors.OpPrereqError("There are still %d node(s) in"
1187 " this cluster." % (len(nodelist) - 1),
1189 instancelist = self.cfg.GetInstanceList()
1191 raise errors.OpPrereqError("There are still %d instance(s) in"
1192 " this cluster." % len(instancelist),
1195 def Exec(self, feedback_fn):
1196 """Destroys the cluster.
1199 master = self.cfg.GetMasterNode()
1201 # Run post hooks on master node before it's removed
1202 _RunPostHook(self, master)
1204 result = self.rpc.call_node_stop_master(master, False)
1205 result.Raise("Could not disable the master role")
1210 def _VerifyCertificate(filename):
1211 """Verifies a certificate for LUClusterVerify.
1213 @type filename: string
1214 @param filename: Path to PEM file
1218 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1219 utils.ReadFile(filename))
1220 except Exception, err: # pylint: disable-msg=W0703
1221 return (LUClusterVerify.ETYPE_ERROR,
1222 "Failed to load X509 certificate %s: %s" % (filename, err))
1225 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1226 constants.SSL_CERT_EXPIRATION_ERROR)
1229 fnamemsg = "While verifying %s: %s" % (filename, msg)
1234 return (None, fnamemsg)
1235 elif errcode == utils.CERT_WARNING:
1236 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1237 elif errcode == utils.CERT_ERROR:
1238 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1240 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1243 class LUClusterVerify(LogicalUnit):
1244 """Verifies the cluster status.
1247 HPATH = "cluster-verify"
1248 HTYPE = constants.HTYPE_CLUSTER
1251 TCLUSTER = "cluster"
1253 TINSTANCE = "instance"
1255 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1256 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1257 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1258 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1259 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1260 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1261 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1262 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1263 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1264 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1265 ENODEDRBD = (TNODE, "ENODEDRBD")
1266 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1267 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1268 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1269 ENODEHV = (TNODE, "ENODEHV")
1270 ENODELVM = (TNODE, "ENODELVM")
1271 ENODEN1 = (TNODE, "ENODEN1")
1272 ENODENET = (TNODE, "ENODENET")
1273 ENODEOS = (TNODE, "ENODEOS")
1274 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1275 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1276 ENODERPC = (TNODE, "ENODERPC")
1277 ENODESSH = (TNODE, "ENODESSH")
1278 ENODEVERSION = (TNODE, "ENODEVERSION")
1279 ENODESETUP = (TNODE, "ENODESETUP")
1280 ENODETIME = (TNODE, "ENODETIME")
1281 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1283 ETYPE_FIELD = "code"
1284 ETYPE_ERROR = "ERROR"
1285 ETYPE_WARNING = "WARNING"
1287 _HOOKS_INDENT_RE = re.compile("^", re.M)
1289 class NodeImage(object):
1290 """A class representing the logical and physical status of a node.
1293 @ivar name: the node name to which this object refers
1294 @ivar volumes: a structure as returned from
1295 L{ganeti.backend.GetVolumeList} (runtime)
1296 @ivar instances: a list of running instances (runtime)
1297 @ivar pinst: list of configured primary instances (config)
1298 @ivar sinst: list of configured secondary instances (config)
1299 @ivar sbp: dictionary of {primary-node: list of instances} for all
1300 instances for which this node is secondary (config)
1301 @ivar mfree: free memory, as reported by hypervisor (runtime)
1302 @ivar dfree: free disk, as reported by the node (runtime)
1303 @ivar offline: the offline status (config)
1304 @type rpc_fail: boolean
1305 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1306 not whether the individual keys were correct) (runtime)
1307 @type lvm_fail: boolean
1308 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1309 @type hyp_fail: boolean
1310 @ivar hyp_fail: whether the RPC call didn't return the instance list
1311 @type ghost: boolean
1312 @ivar ghost: whether this is a known node or not (config)
1313 @type os_fail: boolean
1314 @ivar os_fail: whether the RPC call didn't return valid OS data
1316 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1317 @type vm_capable: boolean
1318 @ivar vm_capable: whether the node can host instances
1321 def __init__(self, offline=False, name=None, vm_capable=True):
1330 self.offline = offline
1331 self.vm_capable = vm_capable
1332 self.rpc_fail = False
1333 self.lvm_fail = False
1334 self.hyp_fail = False
1336 self.os_fail = False
1339 def ExpandNames(self):
1340 self.needed_locks = {
1341 locking.LEVEL_NODE: locking.ALL_SET,
1342 locking.LEVEL_INSTANCE: locking.ALL_SET,
1344 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1346 def _Error(self, ecode, item, msg, *args, **kwargs):
1347 """Format an error message.
1349 Based on the opcode's error_codes parameter, either format a
1350 parseable error code, or a simpler error string.
1352 This must be called only from Exec and functions called from Exec.
1355 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1357 # first complete the msg
1360 # then format the whole message
1361 if self.op.error_codes:
1362 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1368 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1369 # and finally report it via the feedback_fn
1370 self._feedback_fn(" - %s" % msg)
1372 def _ErrorIf(self, cond, *args, **kwargs):
1373 """Log an error message if the passed condition is True.
1376 cond = bool(cond) or self.op.debug_simulate_errors
1378 self._Error(*args, **kwargs)
1379 # do not mark the operation as failed for WARN cases only
1380 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1381 self.bad = self.bad or cond
1383 def _VerifyNode(self, ninfo, nresult):
1384 """Perform some basic validation on data returned from a node.
1386 - check the result data structure is well formed and has all the
1388 - check ganeti version
1390 @type ninfo: L{objects.Node}
1391 @param ninfo: the node to check
1392 @param nresult: the results from the node
1394 @return: whether overall this call was successful (and we can expect
1395 reasonable values in the respose)
1399 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1401 # main result, nresult should be a non-empty dict
1402 test = not nresult or not isinstance(nresult, dict)
1403 _ErrorIf(test, self.ENODERPC, node,
1404 "unable to verify node: no data returned")
1408 # compares ganeti version
1409 local_version = constants.PROTOCOL_VERSION
1410 remote_version = nresult.get("version", None)
1411 test = not (remote_version and
1412 isinstance(remote_version, (list, tuple)) and
1413 len(remote_version) == 2)
1414 _ErrorIf(test, self.ENODERPC, node,
1415 "connection to node returned invalid data")
1419 test = local_version != remote_version[0]
1420 _ErrorIf(test, self.ENODEVERSION, node,
1421 "incompatible protocol versions: master %s,"
1422 " node %s", local_version, remote_version[0])
1426 # node seems compatible, we can actually try to look into its results
1428 # full package version
1429 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1430 self.ENODEVERSION, node,
1431 "software version mismatch: master %s, node %s",
1432 constants.RELEASE_VERSION, remote_version[1],
1433 code=self.ETYPE_WARNING)
1435 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1436 if ninfo.vm_capable and isinstance(hyp_result, dict):
1437 for hv_name, hv_result in hyp_result.iteritems():
1438 test = hv_result is not None
1439 _ErrorIf(test, self.ENODEHV, node,
1440 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1442 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1443 if ninfo.vm_capable and isinstance(hvp_result, list):
1444 for item, hv_name, hv_result in hvp_result:
1445 _ErrorIf(True, self.ENODEHV, node,
1446 "hypervisor %s parameter verify failure (source %s): %s",
1447 hv_name, item, hv_result)
1449 test = nresult.get(constants.NV_NODESETUP,
1450 ["Missing NODESETUP results"])
1451 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1456 def _VerifyNodeTime(self, ninfo, nresult,
1457 nvinfo_starttime, nvinfo_endtime):
1458 """Check the node time.
1460 @type ninfo: L{objects.Node}
1461 @param ninfo: the node to check
1462 @param nresult: the remote results for the node
1463 @param nvinfo_starttime: the start time of the RPC call
1464 @param nvinfo_endtime: the end time of the RPC call
1468 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1470 ntime = nresult.get(constants.NV_TIME, None)
1472 ntime_merged = utils.MergeTime(ntime)
1473 except (ValueError, TypeError):
1474 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1477 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1478 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1479 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1480 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1484 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1485 "Node time diverges by at least %s from master node time",
1488 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1489 """Check the node time.
1491 @type ninfo: L{objects.Node}
1492 @param ninfo: the node to check
1493 @param nresult: the remote results for the node
1494 @param vg_name: the configured VG name
1501 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1503 # checks vg existence and size > 20G
1504 vglist = nresult.get(constants.NV_VGLIST, None)
1506 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1508 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1509 constants.MIN_VG_SIZE)
1510 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1513 pvlist = nresult.get(constants.NV_PVLIST, None)
1514 test = pvlist is None
1515 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1517 # check that ':' is not present in PV names, since it's a
1518 # special character for lvcreate (denotes the range of PEs to
1520 for _, pvname, owner_vg in pvlist:
1521 test = ":" in pvname
1522 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1523 " '%s' of VG '%s'", pvname, owner_vg)
1525 def _VerifyNodeNetwork(self, ninfo, nresult):
1526 """Check the node time.
1528 @type ninfo: L{objects.Node}
1529 @param ninfo: the node to check
1530 @param nresult: the remote results for the node
1534 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1536 test = constants.NV_NODELIST not in nresult
1537 _ErrorIf(test, self.ENODESSH, node,
1538 "node hasn't returned node ssh connectivity data")
1540 if nresult[constants.NV_NODELIST]:
1541 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1542 _ErrorIf(True, self.ENODESSH, node,
1543 "ssh communication with node '%s': %s", a_node, a_msg)
1545 test = constants.NV_NODENETTEST not in nresult
1546 _ErrorIf(test, self.ENODENET, node,
1547 "node hasn't returned node tcp connectivity data")
1549 if nresult[constants.NV_NODENETTEST]:
1550 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1552 _ErrorIf(True, self.ENODENET, node,
1553 "tcp communication with node '%s': %s",
1554 anode, nresult[constants.NV_NODENETTEST][anode])
1556 test = constants.NV_MASTERIP not in nresult
1557 _ErrorIf(test, self.ENODENET, node,
1558 "node hasn't returned node master IP reachability data")
1560 if not nresult[constants.NV_MASTERIP]:
1561 if node == self.master_node:
1562 msg = "the master node cannot reach the master IP (not configured?)"
1564 msg = "cannot reach the master IP"
1565 _ErrorIf(True, self.ENODENET, node, msg)
1567 def _VerifyInstance(self, instance, instanceconfig, node_image,
1569 """Verify an instance.
1571 This function checks to see if the required block devices are
1572 available on the instance's node.
1575 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1576 node_current = instanceconfig.primary_node
1578 node_vol_should = {}
1579 instanceconfig.MapLVsByNode(node_vol_should)
1581 for node in node_vol_should:
1582 n_img = node_image[node]
1583 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1584 # ignore missing volumes on offline or broken nodes
1586 for volume in node_vol_should[node]:
1587 test = volume not in n_img.volumes
1588 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1589 "volume %s missing on node %s", volume, node)
1591 if instanceconfig.admin_up:
1592 pri_img = node_image[node_current]
1593 test = instance not in pri_img.instances and not pri_img.offline
1594 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1595 "instance not running on its primary node %s",
1598 for node, n_img in node_image.items():
1599 if node != node_current:
1600 test = instance in n_img.instances
1601 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1602 "instance should not run on node %s", node)
1604 diskdata = [(nname, success, status, idx)
1605 for (nname, disks) in diskstatus.items()
1606 for idx, (success, status) in enumerate(disks)]
1608 for nname, success, bdev_status, idx in diskdata:
1609 # the 'ghost node' construction in Exec() ensures that we have a
1611 snode = node_image[nname]
1612 bad_snode = snode.ghost or snode.offline
1613 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1614 self.EINSTANCEFAULTYDISK, instance,
1615 "couldn't retrieve status for disk/%s on %s: %s",
1616 idx, nname, bdev_status)
1617 _ErrorIf((instanceconfig.admin_up and success and
1618 bdev_status.ldisk_status == constants.LDS_FAULTY),
1619 self.EINSTANCEFAULTYDISK, instance,
1620 "disk/%s on %s is faulty", idx, nname)
1622 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1623 """Verify if there are any unknown volumes in the cluster.
1625 The .os, .swap and backup volumes are ignored. All other volumes are
1626 reported as unknown.
1628 @type reserved: L{ganeti.utils.FieldSet}
1629 @param reserved: a FieldSet of reserved volume names
1632 for node, n_img in node_image.items():
1633 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1634 # skip non-healthy nodes
1636 for volume in n_img.volumes:
1637 test = ((node not in node_vol_should or
1638 volume not in node_vol_should[node]) and
1639 not reserved.Matches(volume))
1640 self._ErrorIf(test, self.ENODEORPHANLV, node,
1641 "volume %s is unknown", volume)
1643 def _VerifyOrphanInstances(self, instancelist, node_image):
1644 """Verify the list of running instances.
1646 This checks what instances are running but unknown to the cluster.
1649 for node, n_img in node_image.items():
1650 for o_inst in n_img.instances:
1651 test = o_inst not in instancelist
1652 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1653 "instance %s on node %s should not exist", o_inst, node)
1655 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1656 """Verify N+1 Memory Resilience.
1658 Check that if one single node dies we can still start all the
1659 instances it was primary for.
1662 cluster_info = self.cfg.GetClusterInfo()
1663 for node, n_img in node_image.items():
1664 # This code checks that every node which is now listed as
1665 # secondary has enough memory to host all instances it is
1666 # supposed to should a single other node in the cluster fail.
1667 # FIXME: not ready for failover to an arbitrary node
1668 # FIXME: does not support file-backed instances
1669 # WARNING: we currently take into account down instances as well
1670 # as up ones, considering that even if they're down someone
1671 # might want to start them even in the event of a node failure.
1673 # we're skipping offline nodes from the N+1 warning, since
1674 # most likely we don't have good memory infromation from them;
1675 # we already list instances living on such nodes, and that's
1678 for prinode, instances in n_img.sbp.items():
1680 for instance in instances:
1681 bep = cluster_info.FillBE(instance_cfg[instance])
1682 if bep[constants.BE_AUTO_BALANCE]:
1683 needed_mem += bep[constants.BE_MEMORY]
1684 test = n_img.mfree < needed_mem
1685 self._ErrorIf(test, self.ENODEN1, node,
1686 "not enough memory to accomodate instance failovers"
1687 " should node %s fail (%dMiB needed, %dMiB available)",
1688 prinode, needed_mem, n_img.mfree)
1691 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1692 (files_all, files_all_opt, files_mc, files_vm)):
1693 """Verifies file checksums collected from all nodes.
1695 @param errorif: Callback for reporting errors
1696 @param nodeinfo: List of L{objects.Node} objects
1697 @param master_node: Name of master node
1698 @param all_nvinfo: RPC results
1701 node_names = frozenset(node.name for node in nodeinfo)
1703 assert master_node in node_names
1704 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1705 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1706 "Found file listed in more than one file list"
1708 # Define functions determining which nodes to consider for a file
1709 file2nodefn = dict([(filename, fn)
1710 for (files, fn) in [(files_all, None),
1711 (files_all_opt, None),
1712 (files_mc, lambda node: (node.master_candidate or
1713 node.name == master_node)),
1714 (files_vm, lambda node: node.vm_capable)]
1715 for filename in files])
1717 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1719 for node in nodeinfo:
1720 nresult = all_nvinfo[node.name]
1722 if nresult.fail_msg or not nresult.payload:
1725 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1727 test = not (node_files and isinstance(node_files, dict))
1728 errorif(test, cls.ENODEFILECHECK, node.name,
1729 "Node did not return file checksum data")
1733 for (filename, checksum) in node_files.items():
1734 # Check if the file should be considered for a node
1735 fn = file2nodefn[filename]
1736 if fn is None or fn(node):
1737 fileinfo[filename].setdefault(checksum, set()).add(node.name)
1739 for (filename, checksums) in fileinfo.items():
1740 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1742 # Nodes having the file
1743 with_file = frozenset(node_name
1744 for nodes in fileinfo[filename].values()
1745 for node_name in nodes)
1747 # Nodes missing file
1748 missing_file = node_names - with_file
1750 if filename in files_all_opt:
1752 errorif(missing_file and missing_file != node_names,
1753 cls.ECLUSTERFILECHECK, None,
1754 "File %s is optional, but it must exist on all or no nodes (not"
1756 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1758 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1759 "File %s is missing from node(s) %s", filename,
1760 utils.CommaJoin(utils.NiceSort(missing_file)))
1762 # See if there are multiple versions of the file
1763 test = len(checksums) > 1
1765 variants = ["variant %s on %s" %
1766 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1767 for (idx, (checksum, nodes)) in
1768 enumerate(sorted(checksums.items()))]
1772 errorif(test, cls.ECLUSTERFILECHECK, None,
1773 "File %s found with %s different checksums (%s)",
1774 filename, len(checksums), "; ".join(variants))
1776 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1778 """Verifies and the node DRBD status.
1780 @type ninfo: L{objects.Node}
1781 @param ninfo: the node to check
1782 @param nresult: the remote results for the node
1783 @param instanceinfo: the dict of instances
1784 @param drbd_helper: the configured DRBD usermode helper
1785 @param drbd_map: the DRBD map as returned by
1786 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1790 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1793 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1794 test = (helper_result == None)
1795 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796 "no drbd usermode helper returned")
1798 status, payload = helper_result
1800 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1801 "drbd usermode helper check unsuccessful: %s", payload)
1802 test = status and (payload != drbd_helper)
1803 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1804 "wrong drbd usermode helper: %s", payload)
1806 # compute the DRBD minors
1808 for minor, instance in drbd_map[node].items():
1809 test = instance not in instanceinfo
1810 _ErrorIf(test, self.ECLUSTERCFG, None,
1811 "ghost instance '%s' in temporary DRBD map", instance)
1812 # ghost instance should not be running, but otherwise we
1813 # don't give double warnings (both ghost instance and
1814 # unallocated minor in use)
1816 node_drbd[minor] = (instance, False)
1818 instance = instanceinfo[instance]
1819 node_drbd[minor] = (instance.name, instance.admin_up)
1821 # and now check them
1822 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1823 test = not isinstance(used_minors, (tuple, list))
1824 _ErrorIf(test, self.ENODEDRBD, node,
1825 "cannot parse drbd status file: %s", str(used_minors))
1827 # we cannot check drbd status
1830 for minor, (iname, must_exist) in node_drbd.items():
1831 test = minor not in used_minors and must_exist
1832 _ErrorIf(test, self.ENODEDRBD, node,
1833 "drbd minor %d of instance %s is not active", minor, iname)
1834 for minor in used_minors:
1835 test = minor not in node_drbd
1836 _ErrorIf(test, self.ENODEDRBD, node,
1837 "unallocated drbd minor %d is in use", minor)
1839 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1840 """Builds the node OS structures.
1842 @type ninfo: L{objects.Node}
1843 @param ninfo: the node to check
1844 @param nresult: the remote results for the node
1845 @param nimg: the node image object
1849 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1851 remote_os = nresult.get(constants.NV_OSLIST, None)
1852 test = (not isinstance(remote_os, list) or
1853 not compat.all(isinstance(v, list) and len(v) == 7
1854 for v in remote_os))
1856 _ErrorIf(test, self.ENODEOS, node,
1857 "node hasn't returned valid OS data")
1866 for (name, os_path, status, diagnose,
1867 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1869 if name not in os_dict:
1872 # parameters is a list of lists instead of list of tuples due to
1873 # JSON lacking a real tuple type, fix it:
1874 parameters = [tuple(v) for v in parameters]
1875 os_dict[name].append((os_path, status, diagnose,
1876 set(variants), set(parameters), set(api_ver)))
1878 nimg.oslist = os_dict
1880 def _VerifyNodeOS(self, ninfo, nimg, base):
1881 """Verifies the node OS list.
1883 @type ninfo: L{objects.Node}
1884 @param ninfo: the node to check
1885 @param nimg: the node image object
1886 @param base: the 'template' node we match against (e.g. from the master)
1890 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1892 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1894 for os_name, os_data in nimg.oslist.items():
1895 assert os_data, "Empty OS status for OS %s?!" % os_name
1896 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1897 _ErrorIf(not f_status, self.ENODEOS, node,
1898 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1899 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1900 "OS '%s' has multiple entries (first one shadows the rest): %s",
1901 os_name, utils.CommaJoin([v[0] for v in os_data]))
1902 # this will catched in backend too
1903 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1904 and not f_var, self.ENODEOS, node,
1905 "OS %s with API at least %d does not declare any variant",
1906 os_name, constants.OS_API_V15)
1907 # comparisons with the 'base' image
1908 test = os_name not in base.oslist
1909 _ErrorIf(test, self.ENODEOS, node,
1910 "Extra OS %s not present on reference node (%s)",
1914 assert base.oslist[os_name], "Base node has empty OS status?"
1915 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1917 # base OS is invalid, skipping
1919 for kind, a, b in [("API version", f_api, b_api),
1920 ("variants list", f_var, b_var),
1921 ("parameters", f_param, b_param)]:
1922 _ErrorIf(a != b, self.ENODEOS, node,
1923 "OS %s %s differs from reference node %s: %s vs. %s",
1924 kind, os_name, base.name,
1925 utils.CommaJoin(a), utils.CommaJoin(b))
1927 # check any missing OSes
1928 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1929 _ErrorIf(missing, self.ENODEOS, node,
1930 "OSes present on reference node %s but missing on this node: %s",
1931 base.name, utils.CommaJoin(missing))
1933 def _VerifyOob(self, ninfo, nresult):
1934 """Verifies out of band functionality of a node.
1936 @type ninfo: L{objects.Node}
1937 @param ninfo: the node to check
1938 @param nresult: the remote results for the node
1942 # We just have to verify the paths on master and/or master candidates
1943 # as the oob helper is invoked on the master
1944 if ((ninfo.master_candidate or ninfo.master_capable) and
1945 constants.NV_OOB_PATHS in nresult):
1946 for path_result in nresult[constants.NV_OOB_PATHS]:
1947 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1949 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1950 """Verifies and updates the node volume data.
1952 This function will update a L{NodeImage}'s internal structures
1953 with data from the remote call.
1955 @type ninfo: L{objects.Node}
1956 @param ninfo: the node to check
1957 @param nresult: the remote results for the node
1958 @param nimg: the node image object
1959 @param vg_name: the configured VG name
1963 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1965 nimg.lvm_fail = True
1966 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1969 elif isinstance(lvdata, basestring):
1970 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1971 utils.SafeEncode(lvdata))
1972 elif not isinstance(lvdata, dict):
1973 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1975 nimg.volumes = lvdata
1976 nimg.lvm_fail = False
1978 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1979 """Verifies and updates the node instance list.
1981 If the listing was successful, then updates this node's instance
1982 list. Otherwise, it marks the RPC call as failed for the instance
1985 @type ninfo: L{objects.Node}
1986 @param ninfo: the node to check
1987 @param nresult: the remote results for the node
1988 @param nimg: the node image object
1991 idata = nresult.get(constants.NV_INSTANCELIST, None)
1992 test = not isinstance(idata, list)
1993 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1994 " (instancelist): %s", utils.SafeEncode(str(idata)))
1996 nimg.hyp_fail = True
1998 nimg.instances = idata
2000 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2001 """Verifies and computes a node information map
2003 @type ninfo: L{objects.Node}
2004 @param ninfo: the node to check
2005 @param nresult: the remote results for the node
2006 @param nimg: the node image object
2007 @param vg_name: the configured VG name
2011 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2013 # try to read free memory (from the hypervisor)
2014 hv_info = nresult.get(constants.NV_HVINFO, None)
2015 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2016 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2019 nimg.mfree = int(hv_info["memory_free"])
2020 except (ValueError, TypeError):
2021 _ErrorIf(True, self.ENODERPC, node,
2022 "node returned invalid nodeinfo, check hypervisor")
2024 # FIXME: devise a free space model for file based instances as well
2025 if vg_name is not None:
2026 test = (constants.NV_VGLIST not in nresult or
2027 vg_name not in nresult[constants.NV_VGLIST])
2028 _ErrorIf(test, self.ENODELVM, node,
2029 "node didn't return data for the volume group '%s'"
2030 " - it is either missing or broken", vg_name)
2033 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2034 except (ValueError, TypeError):
2035 _ErrorIf(True, self.ENODERPC, node,
2036 "node returned invalid LVM info, check LVM status")
2038 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2039 """Gets per-disk status information for all instances.
2041 @type nodelist: list of strings
2042 @param nodelist: Node names
2043 @type node_image: dict of (name, L{objects.Node})
2044 @param node_image: Node objects
2045 @type instanceinfo: dict of (name, L{objects.Instance})
2046 @param instanceinfo: Instance objects
2047 @rtype: {instance: {node: [(succes, payload)]}}
2048 @return: a dictionary of per-instance dictionaries with nodes as
2049 keys and disk information as values; the disk information is a
2050 list of tuples (success, payload)
2053 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2056 node_disks_devonly = {}
2057 diskless_instances = set()
2058 diskless = constants.DT_DISKLESS
2060 for nname in nodelist:
2061 node_instances = list(itertools.chain(node_image[nname].pinst,
2062 node_image[nname].sinst))
2063 diskless_instances.update(inst for inst in node_instances
2064 if instanceinfo[inst].disk_template == diskless)
2065 disks = [(inst, disk)
2066 for inst in node_instances
2067 for disk in instanceinfo[inst].disks]
2070 # No need to collect data
2073 node_disks[nname] = disks
2075 # Creating copies as SetDiskID below will modify the objects and that can
2076 # lead to incorrect data returned from nodes
2077 devonly = [dev.Copy() for (_, dev) in disks]
2080 self.cfg.SetDiskID(dev, nname)
2082 node_disks_devonly[nname] = devonly
2084 assert len(node_disks) == len(node_disks_devonly)
2086 # Collect data from all nodes with disks
2087 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2090 assert len(result) == len(node_disks)
2094 for (nname, nres) in result.items():
2095 disks = node_disks[nname]
2098 # No data from this node
2099 data = len(disks) * [(False, "node offline")]
2102 _ErrorIf(msg, self.ENODERPC, nname,
2103 "while getting disk information: %s", msg)
2105 # No data from this node
2106 data = len(disks) * [(False, msg)]
2109 for idx, i in enumerate(nres.payload):
2110 if isinstance(i, (tuple, list)) and len(i) == 2:
2113 logging.warning("Invalid result from node %s, entry %d: %s",
2115 data.append((False, "Invalid result from the remote node"))
2117 for ((inst, _), status) in zip(disks, data):
2118 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2120 # Add empty entries for diskless instances.
2121 for inst in diskless_instances:
2122 assert inst not in instdisk
2125 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2126 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2127 compat.all(isinstance(s, (tuple, list)) and
2128 len(s) == 2 for s in statuses)
2129 for inst, nnames in instdisk.items()
2130 for nname, statuses in nnames.items())
2131 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2135 def _VerifyHVP(self, hvp_data):
2136 """Verifies locally the syntax of the hypervisor parameters.
2139 for item, hv_name, hv_params in hvp_data:
2140 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2143 hv_class = hypervisor.GetHypervisor(hv_name)
2144 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2145 hv_class.CheckParameterSyntax(hv_params)
2146 except errors.GenericError, err:
2147 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2149 def BuildHooksEnv(self):
2152 Cluster-Verify hooks just ran in the post phase and their failure makes
2153 the output be logged in the verify output and the verification to fail.
2159 "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2162 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2163 for node in cfg.GetAllNodesInfo().values())
2167 def BuildHooksNodes(self):
2168 """Build hooks nodes.
2171 return ([], self.cfg.GetNodeList())
2173 def Exec(self, feedback_fn):
2174 """Verify integrity of cluster, performing various test on nodes.
2177 # This method has too many local variables. pylint: disable-msg=R0914
2179 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2180 verbose = self.op.verbose
2181 self._feedback_fn = feedback_fn
2182 feedback_fn("* Verifying global settings")
2183 for msg in self.cfg.VerifyConfig():
2184 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2186 # Check the cluster certificates
2187 for cert_filename in constants.ALL_CERT_FILES:
2188 (errcode, msg) = _VerifyCertificate(cert_filename)
2189 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2191 vg_name = self.cfg.GetVGName()
2192 drbd_helper = self.cfg.GetDRBDHelper()
2193 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2194 cluster = self.cfg.GetClusterInfo()
2195 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2196 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2197 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2198 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2199 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2200 for iname in instancelist)
2201 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2202 i_non_redundant = [] # Non redundant instances
2203 i_non_a_balanced = [] # Non auto-balanced instances
2204 n_offline = 0 # Count of offline nodes
2205 n_drained = 0 # Count of nodes being drained
2206 node_vol_should = {}
2208 # FIXME: verify OS list
2211 filemap = _ComputeAncillaryFiles(cluster, False)
2213 # do local checksums
2214 master_node = self.master_node = self.cfg.GetMasterNode()
2215 master_ip = self.cfg.GetMasterIP()
2217 # Compute the set of hypervisor parameters
2219 for hv_name in hypervisors:
2220 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2221 for os_name, os_hvp in cluster.os_hvp.items():
2222 for hv_name, hv_params in os_hvp.items():
2225 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2226 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2227 # TODO: collapse identical parameter values in a single one
2228 for instance in instanceinfo.values():
2229 if not instance.hvparams:
2231 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2232 cluster.FillHV(instance)))
2233 # and verify them locally
2234 self._VerifyHVP(hvp_data)
2236 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2237 node_verify_param = {
2238 constants.NV_FILELIST:
2239 utils.UniqueSequence(filename
2240 for files in filemap
2241 for filename in files),
2242 constants.NV_NODELIST: [node.name for node in nodeinfo
2243 if not node.offline],
2244 constants.NV_HYPERVISOR: hypervisors,
2245 constants.NV_HVPARAMS: hvp_data,
2246 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2247 node.secondary_ip) for node in nodeinfo
2248 if not node.offline],
2249 constants.NV_INSTANCELIST: hypervisors,
2250 constants.NV_VERSION: None,
2251 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2252 constants.NV_NODESETUP: None,
2253 constants.NV_TIME: None,
2254 constants.NV_MASTERIP: (master_node, master_ip),
2255 constants.NV_OSLIST: None,
2256 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2259 if vg_name is not None:
2260 node_verify_param[constants.NV_VGLIST] = None
2261 node_verify_param[constants.NV_LVLIST] = vg_name
2262 node_verify_param[constants.NV_PVLIST] = [vg_name]
2263 node_verify_param[constants.NV_DRBDLIST] = None
2266 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2268 # Build our expected cluster state
2269 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2271 vm_capable=node.vm_capable))
2272 for node in nodeinfo)
2276 for node in nodeinfo:
2277 path = _SupportsOob(self.cfg, node)
2278 if path and path not in oob_paths:
2279 oob_paths.append(path)
2282 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2284 for instance in instancelist:
2285 inst_config = instanceinfo[instance]
2287 for nname in inst_config.all_nodes:
2288 if nname not in node_image:
2290 gnode = self.NodeImage(name=nname)
2292 node_image[nname] = gnode
2294 inst_config.MapLVsByNode(node_vol_should)
2296 pnode = inst_config.primary_node
2297 node_image[pnode].pinst.append(instance)
2299 for snode in inst_config.secondary_nodes:
2300 nimg = node_image[snode]
2301 nimg.sinst.append(instance)
2302 if pnode not in nimg.sbp:
2303 nimg.sbp[pnode] = []
2304 nimg.sbp[pnode].append(instance)
2306 # At this point, we have the in-memory data structures complete,
2307 # except for the runtime information, which we'll gather next
2309 # Due to the way our RPC system works, exact response times cannot be
2310 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2311 # time before and after executing the request, we can at least have a time
2313 nvinfo_starttime = time.time()
2314 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2315 self.cfg.GetClusterName())
2316 nvinfo_endtime = time.time()
2318 all_drbd_map = self.cfg.ComputeDRBDMap()
2320 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2321 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2323 feedback_fn("* Verifying configuration file consistency")
2324 self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2326 feedback_fn("* Verifying node status")
2330 for node_i in nodeinfo:
2332 nimg = node_image[node]
2336 feedback_fn("* Skipping offline node %s" % (node,))
2340 if node == master_node:
2342 elif node_i.master_candidate:
2343 ntype = "master candidate"
2344 elif node_i.drained:
2350 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2352 msg = all_nvinfo[node].fail_msg
2353 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2355 nimg.rpc_fail = True
2358 nresult = all_nvinfo[node].payload
2360 nimg.call_ok = self._VerifyNode(node_i, nresult)
2361 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2362 self._VerifyNodeNetwork(node_i, nresult)
2363 self._VerifyOob(node_i, nresult)
2366 self._VerifyNodeLVM(node_i, nresult, vg_name)
2367 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2370 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2371 self._UpdateNodeInstances(node_i, nresult, nimg)
2372 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2373 self._UpdateNodeOS(node_i, nresult, nimg)
2374 if not nimg.os_fail:
2375 if refos_img is None:
2377 self._VerifyNodeOS(node_i, nimg, refos_img)
2379 feedback_fn("* Verifying instance status")
2380 for instance in instancelist:
2382 feedback_fn("* Verifying instance %s" % instance)
2383 inst_config = instanceinfo[instance]
2384 self._VerifyInstance(instance, inst_config, node_image,
2386 inst_nodes_offline = []
2388 pnode = inst_config.primary_node
2389 pnode_img = node_image[pnode]
2390 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2391 self.ENODERPC, pnode, "instance %s, connection to"
2392 " primary node failed", instance)
2394 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2395 self.EINSTANCEBADNODE, instance,
2396 "instance is marked as running and lives on offline node %s",
2397 inst_config.primary_node)
2399 # If the instance is non-redundant we cannot survive losing its primary
2400 # node, so we are not N+1 compliant. On the other hand we have no disk
2401 # templates with more than one secondary so that situation is not well
2403 # FIXME: does not support file-backed instances
2404 if not inst_config.secondary_nodes:
2405 i_non_redundant.append(instance)
2407 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2408 instance, "instance has multiple secondary nodes: %s",
2409 utils.CommaJoin(inst_config.secondary_nodes),
2410 code=self.ETYPE_WARNING)
2412 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2413 pnode = inst_config.primary_node
2414 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2415 instance_groups = {}
2417 for node in instance_nodes:
2418 instance_groups.setdefault(nodeinfo_byname[node].group,
2422 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2423 # Sort so that we always list the primary node first.
2424 for group, nodes in sorted(instance_groups.items(),
2425 key=lambda (_, nodes): pnode in nodes,
2428 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2429 instance, "instance has primary and secondary nodes in"
2430 " different groups: %s", utils.CommaJoin(pretty_list),
2431 code=self.ETYPE_WARNING)
2433 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2434 i_non_a_balanced.append(instance)
2436 for snode in inst_config.secondary_nodes:
2437 s_img = node_image[snode]
2438 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2439 "instance %s, connection to secondary node failed", instance)
2442 inst_nodes_offline.append(snode)
2444 # warn that the instance lives on offline nodes
2445 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2446 "instance has offline secondary node(s) %s",
2447 utils.CommaJoin(inst_nodes_offline))
2448 # ... or ghost/non-vm_capable nodes
2449 for node in inst_config.all_nodes:
2450 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2451 "instance lives on ghost node %s", node)
2452 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2453 instance, "instance lives on non-vm_capable node %s", node)
2455 feedback_fn("* Verifying orphan volumes")
2456 reserved = utils.FieldSet(*cluster.reserved_lvs)
2457 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2459 feedback_fn("* Verifying orphan instances")
2460 self._VerifyOrphanInstances(instancelist, node_image)
2462 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2463 feedback_fn("* Verifying N+1 Memory redundancy")
2464 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2466 feedback_fn("* Other Notes")
2468 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2469 % len(i_non_redundant))
2471 if i_non_a_balanced:
2472 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2473 % len(i_non_a_balanced))
2476 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2479 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2483 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2484 """Analyze the post-hooks' result
2486 This method analyses the hook result, handles it, and sends some
2487 nicely-formatted feedback back to the user.
2489 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2490 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2491 @param hooks_results: the results of the multi-node hooks rpc call
2492 @param feedback_fn: function used send feedback back to the caller
2493 @param lu_result: previous Exec result
2494 @return: the new Exec result, based on the previous result
2498 # We only really run POST phase hooks, and are only interested in
2500 if phase == constants.HOOKS_PHASE_POST:
2501 # Used to change hooks' output to proper indentation
2502 feedback_fn("* Hooks Results")
2503 assert hooks_results, "invalid result from hooks"
2505 for node_name in hooks_results:
2506 res = hooks_results[node_name]
2508 test = msg and not res.offline
2509 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2510 "Communication failure in hooks execution: %s", msg)
2511 if res.offline or msg:
2512 # No need to investigate payload if node is offline or gave an error.
2513 # override manually lu_result here as _ErrorIf only
2514 # overrides self.bad
2517 for script, hkr, output in res.payload:
2518 test = hkr == constants.HKR_FAIL
2519 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2520 "Script %s failed, output:", script)
2522 output = self._HOOKS_INDENT_RE.sub(' ', output)
2523 feedback_fn("%s" % output)
2529 class LUClusterVerifyDisks(NoHooksLU):
2530 """Verifies the cluster disks status.
2535 def ExpandNames(self):
2536 self.needed_locks = {
2537 locking.LEVEL_NODE: locking.ALL_SET,
2538 locking.LEVEL_INSTANCE: locking.ALL_SET,
2540 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2542 def Exec(self, feedback_fn):
2543 """Verify integrity of cluster disks.
2545 @rtype: tuple of three items
2546 @return: a tuple of (dict of node-to-node_error, list of instances
2547 which need activate-disks, dict of instance: (node, volume) for
2551 result = res_nodes, res_instances, res_missing = {}, [], {}
2553 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2554 instances = self.cfg.GetAllInstancesInfo().values()
2557 for inst in instances:
2559 if not inst.admin_up:
2561 inst.MapLVsByNode(inst_lvs)
2562 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2563 for node, vol_list in inst_lvs.iteritems():
2564 for vol in vol_list:
2565 nv_dict[(node, vol)] = inst
2570 node_lvs = self.rpc.call_lv_list(nodes, [])
2571 for node, node_res in node_lvs.items():
2572 if node_res.offline:
2574 msg = node_res.fail_msg
2576 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2577 res_nodes[node] = msg
2580 lvs = node_res.payload
2581 for lv_name, (_, _, lv_online) in lvs.items():
2582 inst = nv_dict.pop((node, lv_name), None)
2583 if (not lv_online and inst is not None
2584 and inst.name not in res_instances):
2585 res_instances.append(inst.name)
2587 # any leftover items in nv_dict are missing LVs, let's arrange the
2589 for key, inst in nv_dict.iteritems():
2590 if inst.name not in res_missing:
2591 res_missing[inst.name] = []
2592 res_missing[inst.name].append(key)
2597 class LUClusterRepairDiskSizes(NoHooksLU):
2598 """Verifies the cluster disks sizes.
2603 def ExpandNames(self):
2604 if self.op.instances:
2605 self.wanted_names = []
2606 for name in self.op.instances:
2607 full_name = _ExpandInstanceName(self.cfg, name)
2608 self.wanted_names.append(full_name)
2609 self.needed_locks = {
2610 locking.LEVEL_NODE: [],
2611 locking.LEVEL_INSTANCE: self.wanted_names,
2613 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2615 self.wanted_names = None
2616 self.needed_locks = {
2617 locking.LEVEL_NODE: locking.ALL_SET,
2618 locking.LEVEL_INSTANCE: locking.ALL_SET,
2620 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2622 def DeclareLocks(self, level):
2623 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2624 self._LockInstancesNodes(primary_only=True)
2626 def CheckPrereq(self):
2627 """Check prerequisites.
2629 This only checks the optional instance list against the existing names.
2632 if self.wanted_names is None:
2633 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2635 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2636 in self.wanted_names]
2638 def _EnsureChildSizes(self, disk):
2639 """Ensure children of the disk have the needed disk size.
2641 This is valid mainly for DRBD8 and fixes an issue where the
2642 children have smaller disk size.
2644 @param disk: an L{ganeti.objects.Disk} object
2647 if disk.dev_type == constants.LD_DRBD8:
2648 assert disk.children, "Empty children for DRBD8?"
2649 fchild = disk.children[0]
2650 mismatch = fchild.size < disk.size
2652 self.LogInfo("Child disk has size %d, parent %d, fixing",
2653 fchild.size, disk.size)
2654 fchild.size = disk.size
2656 # and we recurse on this child only, not on the metadev
2657 return self._EnsureChildSizes(fchild) or mismatch
2661 def Exec(self, feedback_fn):
2662 """Verify the size of cluster disks.
2665 # TODO: check child disks too
2666 # TODO: check differences in size between primary/secondary nodes
2668 for instance in self.wanted_instances:
2669 pnode = instance.primary_node
2670 if pnode not in per_node_disks:
2671 per_node_disks[pnode] = []
2672 for idx, disk in enumerate(instance.disks):
2673 per_node_disks[pnode].append((instance, idx, disk))
2676 for node, dskl in per_node_disks.items():
2677 newl = [v[2].Copy() for v in dskl]
2679 self.cfg.SetDiskID(dsk, node)
2680 result = self.rpc.call_blockdev_getsize(node, newl)
2682 self.LogWarning("Failure in blockdev_getsize call to node"
2683 " %s, ignoring", node)
2685 if len(result.payload) != len(dskl):
2686 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2687 " result.payload=%s", node, len(dskl), result.payload)
2688 self.LogWarning("Invalid result from node %s, ignoring node results",
2691 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2693 self.LogWarning("Disk %d of instance %s did not return size"
2694 " information, ignoring", idx, instance.name)
2696 if not isinstance(size, (int, long)):
2697 self.LogWarning("Disk %d of instance %s did not return valid"
2698 " size information, ignoring", idx, instance.name)
2701 if size != disk.size:
2702 self.LogInfo("Disk %d of instance %s has mismatched size,"
2703 " correcting: recorded %d, actual %d", idx,
2704 instance.name, disk.size, size)
2706 self.cfg.Update(instance, feedback_fn)
2707 changed.append((instance.name, idx, size))
2708 if self._EnsureChildSizes(disk):
2709 self.cfg.Update(instance, feedback_fn)
2710 changed.append((instance.name, idx, disk.size))
2714 class LUClusterRename(LogicalUnit):
2715 """Rename the cluster.
2718 HPATH = "cluster-rename"
2719 HTYPE = constants.HTYPE_CLUSTER
2721 def BuildHooksEnv(self):
2726 "OP_TARGET": self.cfg.GetClusterName(),
2727 "NEW_NAME": self.op.name,
2730 def BuildHooksNodes(self):
2731 """Build hooks nodes.
2734 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2736 def CheckPrereq(self):
2737 """Verify that the passed name is a valid one.
2740 hostname = netutils.GetHostname(name=self.op.name,
2741 family=self.cfg.GetPrimaryIPFamily())
2743 new_name = hostname.name
2744 self.ip = new_ip = hostname.ip
2745 old_name = self.cfg.GetClusterName()
2746 old_ip = self.cfg.GetMasterIP()
2747 if new_name == old_name and new_ip == old_ip:
2748 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2749 " cluster has changed",
2751 if new_ip != old_ip:
2752 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2753 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2754 " reachable on the network" %
2755 new_ip, errors.ECODE_NOTUNIQUE)
2757 self.op.name = new_name
2759 def Exec(self, feedback_fn):
2760 """Rename the cluster.
2763 clustername = self.op.name
2766 # shutdown the master IP
2767 master = self.cfg.GetMasterNode()
2768 result = self.rpc.call_node_stop_master(master, False)
2769 result.Raise("Could not disable the master role")
2772 cluster = self.cfg.GetClusterInfo()
2773 cluster.cluster_name = clustername
2774 cluster.master_ip = ip
2775 self.cfg.Update(cluster, feedback_fn)
2777 # update the known hosts file
2778 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2779 node_list = self.cfg.GetOnlineNodeList()
2781 node_list.remove(master)
2784 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2786 result = self.rpc.call_node_start_master(master, False, False)
2787 msg = result.fail_msg
2789 self.LogWarning("Could not re-enable the master role on"
2790 " the master, please restart manually: %s", msg)
2795 class LUClusterSetParams(LogicalUnit):
2796 """Change the parameters of the cluster.
2799 HPATH = "cluster-modify"
2800 HTYPE = constants.HTYPE_CLUSTER
2803 def CheckArguments(self):
2807 if self.op.uid_pool:
2808 uidpool.CheckUidPool(self.op.uid_pool)
2810 if self.op.add_uids:
2811 uidpool.CheckUidPool(self.op.add_uids)
2813 if self.op.remove_uids:
2814 uidpool.CheckUidPool(self.op.remove_uids)
2816 def ExpandNames(self):
2817 # FIXME: in the future maybe other cluster params won't require checking on
2818 # all nodes to be modified.
2819 self.needed_locks = {
2820 locking.LEVEL_NODE: locking.ALL_SET,
2822 self.share_locks[locking.LEVEL_NODE] = 1
2824 def BuildHooksEnv(self):
2829 "OP_TARGET": self.cfg.GetClusterName(),
2830 "NEW_VG_NAME": self.op.vg_name,
2833 def BuildHooksNodes(self):
2834 """Build hooks nodes.
2837 mn = self.cfg.GetMasterNode()
2840 def CheckPrereq(self):
2841 """Check prerequisites.
2843 This checks whether the given params don't conflict and
2844 if the given volume group is valid.
2847 if self.op.vg_name is not None and not self.op.vg_name:
2848 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2849 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2850 " instances exist", errors.ECODE_INVAL)
2852 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2853 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2854 raise errors.OpPrereqError("Cannot disable drbd helper while"
2855 " drbd-based instances exist",
2858 node_list = self.acquired_locks[locking.LEVEL_NODE]
2860 # if vg_name not None, checks given volume group on all nodes
2862 vglist = self.rpc.call_vg_list(node_list)
2863 for node in node_list:
2864 msg = vglist[node].fail_msg
2866 # ignoring down node
2867 self.LogWarning("Error while gathering data on node %s"
2868 " (ignoring node): %s", node, msg)
2870 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2872 constants.MIN_VG_SIZE)
2874 raise errors.OpPrereqError("Error on node '%s': %s" %
2875 (node, vgstatus), errors.ECODE_ENVIRON)
2877 if self.op.drbd_helper:
2878 # checks given drbd helper on all nodes
2879 helpers = self.rpc.call_drbd_helper(node_list)
2880 for node in node_list:
2881 ninfo = self.cfg.GetNodeInfo(node)
2883 self.LogInfo("Not checking drbd helper on offline node %s", node)
2885 msg = helpers[node].fail_msg
2887 raise errors.OpPrereqError("Error checking drbd helper on node"
2888 " '%s': %s" % (node, msg),
2889 errors.ECODE_ENVIRON)
2890 node_helper = helpers[node].payload
2891 if node_helper != self.op.drbd_helper:
2892 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2893 (node, node_helper), errors.ECODE_ENVIRON)
2895 self.cluster = cluster = self.cfg.GetClusterInfo()
2896 # validate params changes
2897 if self.op.beparams:
2898 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2899 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2901 if self.op.ndparams:
2902 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2903 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2905 # TODO: we need a more general way to handle resetting
2906 # cluster-level parameters to default values
2907 if self.new_ndparams["oob_program"] == "":
2908 self.new_ndparams["oob_program"] = \
2909 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2911 if self.op.nicparams:
2912 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2913 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2914 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2917 # check all instances for consistency
2918 for instance in self.cfg.GetAllInstancesInfo().values():
2919 for nic_idx, nic in enumerate(instance.nics):
2920 params_copy = copy.deepcopy(nic.nicparams)
2921 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2923 # check parameter syntax
2925 objects.NIC.CheckParameterSyntax(params_filled)
2926 except errors.ConfigurationError, err:
2927 nic_errors.append("Instance %s, nic/%d: %s" %
2928 (instance.name, nic_idx, err))
2930 # if we're moving instances to routed, check that they have an ip
2931 target_mode = params_filled[constants.NIC_MODE]
2932 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2933 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2934 (instance.name, nic_idx))
2936 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2937 "\n".join(nic_errors))
2939 # hypervisor list/parameters
2940 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2941 if self.op.hvparams:
2942 for hv_name, hv_dict in self.op.hvparams.items():
2943 if hv_name not in self.new_hvparams:
2944 self.new_hvparams[hv_name] = hv_dict
2946 self.new_hvparams[hv_name].update(hv_dict)
2948 # os hypervisor parameters
2949 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2951 for os_name, hvs in self.op.os_hvp.items():
2952 if os_name not in self.new_os_hvp:
2953 self.new_os_hvp[os_name] = hvs
2955 for hv_name, hv_dict in hvs.items():
2956 if hv_name not in self.new_os_hvp[os_name]:
2957 self.new_os_hvp[os_name][hv_name] = hv_dict
2959 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2962 self.new_osp = objects.FillDict(cluster.osparams, {})
2963 if self.op.osparams:
2964 for os_name, osp in self.op.osparams.items():
2965 if os_name not in self.new_osp:
2966 self.new_osp[os_name] = {}
2968 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2971 if not self.new_osp[os_name]:
2972 # we removed all parameters
2973 del self.new_osp[os_name]
2975 # check the parameter validity (remote check)
2976 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2977 os_name, self.new_osp[os_name])
2979 # changes to the hypervisor list
2980 if self.op.enabled_hypervisors is not None:
2981 self.hv_list = self.op.enabled_hypervisors
2982 for hv in self.hv_list:
2983 # if the hypervisor doesn't already exist in the cluster
2984 # hvparams, we initialize it to empty, and then (in both
2985 # cases) we make sure to fill the defaults, as we might not
2986 # have a complete defaults list if the hypervisor wasn't
2988 if hv not in new_hvp:
2990 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2991 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2993 self.hv_list = cluster.enabled_hypervisors
2995 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2996 # either the enabled list has changed, or the parameters have, validate
2997 for hv_name, hv_params in self.new_hvparams.items():
2998 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2999 (self.op.enabled_hypervisors and
3000 hv_name in self.op.enabled_hypervisors)):
3001 # either this is a new hypervisor, or its parameters have changed
3002 hv_class = hypervisor.GetHypervisor(hv_name)
3003 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3004 hv_class.CheckParameterSyntax(hv_params)
3005 _CheckHVParams(self, node_list, hv_name, hv_params)
3008 # no need to check any newly-enabled hypervisors, since the
3009 # defaults have already been checked in the above code-block
3010 for os_name, os_hvp in self.new_os_hvp.items():
3011 for hv_name, hv_params in os_hvp.items():
3012 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3013 # we need to fill in the new os_hvp on top of the actual hv_p
3014 cluster_defaults = self.new_hvparams.get(hv_name, {})
3015 new_osp = objects.FillDict(cluster_defaults, hv_params)
3016 hv_class = hypervisor.GetHypervisor(hv_name)
3017 hv_class.CheckParameterSyntax(new_osp)
3018 _CheckHVParams(self, node_list, hv_name, new_osp)
3020 if self.op.default_iallocator:
3021 alloc_script = utils.FindFile(self.op.default_iallocator,
3022 constants.IALLOCATOR_SEARCH_PATH,
3024 if alloc_script is None:
3025 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3026 " specified" % self.op.default_iallocator,
3029 def Exec(self, feedback_fn):
3030 """Change the parameters of the cluster.
3033 if self.op.vg_name is not None:
3034 new_volume = self.op.vg_name
3037 if new_volume != self.cfg.GetVGName():
3038 self.cfg.SetVGName(new_volume)
3040 feedback_fn("Cluster LVM configuration already in desired"
3041 " state, not changing")
3042 if self.op.drbd_helper is not None:
3043 new_helper = self.op.drbd_helper
3046 if new_helper != self.cfg.GetDRBDHelper():
3047 self.cfg.SetDRBDHelper(new_helper)
3049 feedback_fn("Cluster DRBD helper already in desired state,"
3051 if self.op.hvparams:
3052 self.cluster.hvparams = self.new_hvparams
3054 self.cluster.os_hvp = self.new_os_hvp
3055 if self.op.enabled_hypervisors is not None:
3056 self.cluster.hvparams = self.new_hvparams
3057 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3058 if self.op.beparams:
3059 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3060 if self.op.nicparams:
3061 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3062 if self.op.osparams:
3063 self.cluster.osparams = self.new_osp
3064 if self.op.ndparams:
3065 self.cluster.ndparams = self.new_ndparams
3067 if self.op.candidate_pool_size is not None:
3068 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3069 # we need to update the pool size here, otherwise the save will fail
3070 _AdjustCandidatePool(self, [])
3072 if self.op.maintain_node_health is not None:
3073 self.cluster.maintain_node_health = self.op.maintain_node_health
3075 if self.op.prealloc_wipe_disks is not None:
3076 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3078 if self.op.add_uids is not None:
3079 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3081 if self.op.remove_uids is not None:
3082 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3084 if self.op.uid_pool is not None:
3085 self.cluster.uid_pool = self.op.uid_pool
3087 if self.op.default_iallocator is not None:
3088 self.cluster.default_iallocator = self.op.default_iallocator
3090 if self.op.reserved_lvs is not None:
3091 self.cluster.reserved_lvs = self.op.reserved_lvs
3093 def helper_os(aname, mods, desc):
3095 lst = getattr(self.cluster, aname)
3096 for key, val in mods:
3097 if key == constants.DDM_ADD:
3099 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3102 elif key == constants.DDM_REMOVE:
3106 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3108 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3110 if self.op.hidden_os:
3111 helper_os("hidden_os", self.op.hidden_os, "hidden")
3113 if self.op.blacklisted_os:
3114 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3116 if self.op.master_netdev:
3117 master = self.cfg.GetMasterNode()
3118 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3119 self.cluster.master_netdev)
3120 result = self.rpc.call_node_stop_master(master, False)
3121 result.Raise("Could not disable the master ip")
3122 feedback_fn("Changing master_netdev from %s to %s" %
3123 (self.cluster.master_netdev, self.op.master_netdev))
3124 self.cluster.master_netdev = self.op.master_netdev
3126 self.cfg.Update(self.cluster, feedback_fn)
3128 if self.op.master_netdev:
3129 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3130 self.op.master_netdev)
3131 result = self.rpc.call_node_start_master(master, False, False)
3133 self.LogWarning("Could not re-enable the master ip on"
3134 " the master, please restart manually: %s",
3138 def _UploadHelper(lu, nodes, fname):
3139 """Helper for uploading a file and showing warnings.
3142 if os.path.exists(fname):
3143 result = lu.rpc.call_upload_file(nodes, fname)
3144 for to_node, to_result in result.items():
3145 msg = to_result.fail_msg
3147 msg = ("Copy of file %s to node %s failed: %s" %
3148 (fname, to_node, msg))
3149 lu.proc.LogWarning(msg)
3152 def _ComputeAncillaryFiles(cluster, redist):
3153 """Compute files external to Ganeti which need to be consistent.
3155 @type redist: boolean
3156 @param redist: Whether to include files which need to be redistributed
3159 # Compute files for all nodes
3161 constants.SSH_KNOWN_HOSTS_FILE,
3162 constants.CONFD_HMAC_KEY,
3163 constants.CLUSTER_DOMAIN_SECRET_FILE,
3167 files_all.update(constants.ALL_CERT_FILES)
3168 files_all.update(ssconf.SimpleStore().GetFileList())
3170 if cluster.modify_etc_hosts:
3171 files_all.add(constants.ETC_HOSTS)
3173 # Files which must either exist on all nodes or on none
3174 files_all_opt = set([
3175 constants.RAPI_USERS_FILE,
3178 # Files which should only be on master candidates
3181 files_mc.add(constants.CLUSTER_CONF_FILE)
3183 # Files which should only be on VM-capable nodes
3184 files_vm = set(filename
3185 for hv_name in cluster.enabled_hypervisors
3186 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3188 # Filenames must be unique
3189 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3190 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3191 "Found file listed in more than one file list"
3193 return (files_all, files_all_opt, files_mc, files_vm)
3196 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3197 """Distribute additional files which are part of the cluster configuration.
3199 ConfigWriter takes care of distributing the config and ssconf files, but
3200 there are more files which should be distributed to all nodes. This function
3201 makes sure those are copied.
3203 @param lu: calling logical unit
3204 @param additional_nodes: list of nodes not in the config to distribute to
3205 @type additional_vm: boolean
3206 @param additional_vm: whether the additional nodes are vm-capable or not
3209 # Gather target nodes
3210 cluster = lu.cfg.GetClusterInfo()
3211 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3213 online_nodes = lu.cfg.GetOnlineNodeList()
3214 vm_nodes = lu.cfg.GetVmCapableNodeList()
3216 if additional_nodes is not None:
3217 online_nodes.extend(additional_nodes)
3219 vm_nodes.extend(additional_nodes)
3221 # Never distribute to master node
3222 for nodelist in [online_nodes, vm_nodes]:
3223 if master_info.name in nodelist:
3224 nodelist.remove(master_info.name)
3227 (files_all, files_all_opt, files_mc, files_vm) = \
3228 _ComputeAncillaryFiles(cluster, True)
3230 # Never re-distribute configuration file from here
3231 assert not (constants.CLUSTER_CONF_FILE in files_all or
3232 constants.CLUSTER_CONF_FILE in files_vm)
3233 assert not files_mc, "Master candidates not handled in this function"
3236 (online_nodes, files_all),
3237 (online_nodes, files_all_opt),
3238 (vm_nodes, files_vm),
3242 for (node_list, files) in filemap:
3244 _UploadHelper(lu, node_list, fname)
3247 class LUClusterRedistConf(NoHooksLU):
3248 """Force the redistribution of cluster configuration.
3250 This is a very simple LU.
3255 def ExpandNames(self):
3256 self.needed_locks = {
3257 locking.LEVEL_NODE: locking.ALL_SET,
3259 self.share_locks[locking.LEVEL_NODE] = 1
3261 def Exec(self, feedback_fn):
3262 """Redistribute the configuration.
3265 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3266 _RedistributeAncillaryFiles(self)
3269 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3270 """Sleep and poll for an instance's disk to sync.
3273 if not instance.disks or disks is not None and not disks:
3276 disks = _ExpandCheckDisks(instance, disks)
3279 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3281 node = instance.primary_node
3284 lu.cfg.SetDiskID(dev, node)
3286 # TODO: Convert to utils.Retry
3289 degr_retries = 10 # in seconds, as we sleep 1 second each time
3293 cumul_degraded = False
3294 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3295 msg = rstats.fail_msg
3297 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3300 raise errors.RemoteError("Can't contact node %s for mirror data,"
3301 " aborting." % node)
3304 rstats = rstats.payload
3306 for i, mstat in enumerate(rstats):
3308 lu.LogWarning("Can't compute data for node %s/%s",
3309 node, disks[i].iv_name)
3312 cumul_degraded = (cumul_degraded or
3313 (mstat.is_degraded and mstat.sync_percent is None))
3314 if mstat.sync_percent is not None:
3316 if mstat.estimated_time is not None:
3317 rem_time = ("%s remaining (estimated)" %
3318 utils.FormatSeconds(mstat.estimated_time))
3319 max_time = mstat.estimated_time
3321 rem_time = "no time estimate"
3322 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3323 (disks[i].iv_name, mstat.sync_percent, rem_time))
3325 # if we're done but degraded, let's do a few small retries, to
3326 # make sure we see a stable and not transient situation; therefore
3327 # we force restart of the loop
3328 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3329 logging.info("Degraded disks found, %d retries left", degr_retries)
3337 time.sleep(min(60, max_time))
3340 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3341 return not cumul_degraded
3344 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3345 """Check that mirrors are not degraded.
3347 The ldisk parameter, if True, will change the test from the
3348 is_degraded attribute (which represents overall non-ok status for
3349 the device(s)) to the ldisk (representing the local storage status).
3352 lu.cfg.SetDiskID(dev, node)
3356 if on_primary or dev.AssembleOnSecondary():
3357 rstats = lu.rpc.call_blockdev_find(node, dev)
3358 msg = rstats.fail_msg
3360 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3362 elif not rstats.payload:
3363 lu.LogWarning("Can't find disk on node %s", node)
3367 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3369 result = result and not rstats.payload.is_degraded
3372 for child in dev.children:
3373 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3378 class LUOobCommand(NoHooksLU):
3379 """Logical unit for OOB handling.
3383 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3385 def CheckPrereq(self):
3386 """Check prerequisites.
3389 - the node exists in the configuration
3392 Any errors are signaled by raising errors.OpPrereqError.
3396 self.master_node = self.cfg.GetMasterNode()
3398 assert self.op.power_delay >= 0.0
3400 if self.op.node_names:
3401 if self.op.command in self._SKIP_MASTER:
3402 if self.master_node in self.op.node_names:
3403 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3404 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3406 if master_oob_handler:
3407 additional_text = ("Run '%s %s %s' if you want to operate on the"
3408 " master regardless") % (master_oob_handler,
3412 additional_text = "The master node does not support out-of-band"
3414 raise errors.OpPrereqError(("Operating on the master node %s is not"
3415 " allowed for %s\n%s") %
3416 (self.master_node, self.op.command,
3417 additional_text), errors.ECODE_INVAL)
3419 self.op.node_names = self.cfg.GetNodeList()
3420 if self.op.command in self._SKIP_MASTER:
3421 self.op.node_names.remove(self.master_node)
3423 if self.op.command in self._SKIP_MASTER:
3424 assert self.master_node not in self.op.node_names
3426 for node_name in self.op.node_names:
3427 node = self.cfg.GetNodeInfo(node_name)
3430 raise errors.OpPrereqError("Node %s not found" % node_name,
3433 self.nodes.append(node)
3435 if (not self.op.ignore_status and
3436 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3437 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3438 " not marked offline") % node_name,
3441 def ExpandNames(self):
3442 """Gather locks we need.
3445 if self.op.node_names:
3446 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3447 for name in self.op.node_names]
3448 lock_names = self.op.node_names
3450 lock_names = locking.ALL_SET
3452 self.needed_locks = {
3453 locking.LEVEL_NODE: lock_names,
3456 def Exec(self, feedback_fn):
3457 """Execute OOB and return result if we expect any.
3460 master_node = self.master_node
3463 for idx, node in enumerate(self.nodes):
3464 node_entry = [(constants.RS_NORMAL, node.name)]
3465 ret.append(node_entry)
3467 oob_program = _SupportsOob(self.cfg, node)
3470 node_entry.append((constants.RS_UNAVAIL, None))
3473 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3474 self.op.command, oob_program, node.name)
3475 result = self.rpc.call_run_oob(master_node, oob_program,
3476 self.op.command, node.name,
3480 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3481 node.name, result.fail_msg)
3482 node_entry.append((constants.RS_NODATA, None))
3485 self._CheckPayload(result)
3486 except errors.OpExecError, err:
3487 self.LogWarning("The payload returned by '%s' is not valid: %s",
3489 node_entry.append((constants.RS_NODATA, None))
3491 if self.op.command == constants.OOB_HEALTH:
3492 # For health we should log important events
3493 for item, status in result.payload:
3494 if status in [constants.OOB_STATUS_WARNING,
3495 constants.OOB_STATUS_CRITICAL]:
3496 self.LogWarning("On node '%s' item '%s' has status '%s'",
3497 node.name, item, status)
3499 if self.op.command == constants.OOB_POWER_ON:
3501 elif self.op.command == constants.OOB_POWER_OFF:
3502 node.powered = False
3503 elif self.op.command == constants.OOB_POWER_STATUS:
3504 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3505 if powered != node.powered:
3506 logging.warning(("Recorded power state (%s) of node '%s' does not"
3507 " match actual power state (%s)"), node.powered,
3510 # For configuration changing commands we should update the node
3511 if self.op.command in (constants.OOB_POWER_ON,
3512 constants.OOB_POWER_OFF):
3513 self.cfg.Update(node, feedback_fn)
3515 node_entry.append((constants.RS_NORMAL, result.payload))
3517 if (self.op.command == constants.OOB_POWER_ON and
3518 idx < len(self.nodes) - 1):
3519 time.sleep(self.op.power_delay)
3523 def _CheckPayload(self, result):
3524 """Checks if the payload is valid.
3526 @param result: RPC result
3527 @raises errors.OpExecError: If payload is not valid
3531 if self.op.command == constants.OOB_HEALTH:
3532 if not isinstance(result.payload, list):
3533 errs.append("command 'health' is expected to return a list but got %s" %
3534 type(result.payload))
3536 for item, status in result.payload:
3537 if status not in constants.OOB_STATUSES:
3538 errs.append("health item '%s' has invalid status '%s'" %
3541 if self.op.command == constants.OOB_POWER_STATUS:
3542 if not isinstance(result.payload, dict):
3543 errs.append("power-status is expected to return a dict but got %s" %
3544 type(result.payload))
3546 if self.op.command in [
3547 constants.OOB_POWER_ON,
3548 constants.OOB_POWER_OFF,
3549 constants.OOB_POWER_CYCLE,
3551 if result.payload is not None:
3552 errs.append("%s is expected to not return payload but got '%s'" %
3553 (self.op.command, result.payload))
3556 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3557 utils.CommaJoin(errs))
3559 class _OsQuery(_QueryBase):
3560 FIELDS = query.OS_FIELDS
3562 def ExpandNames(self, lu):
3563 # Lock all nodes in shared mode
3564 # Temporary removal of locks, should be reverted later
3565 # TODO: reintroduce locks when they are lighter-weight
3566 lu.needed_locks = {}
3567 #self.share_locks[locking.LEVEL_NODE] = 1
3568 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3570 # The following variables interact with _QueryBase._GetNames
3572 self.wanted = self.names
3574 self.wanted = locking.ALL_SET
3576 self.do_locking = self.use_locking
3578 def DeclareLocks(self, lu, level):
3582 def _DiagnoseByOS(rlist):
3583 """Remaps a per-node return list into an a per-os per-node dictionary
3585 @param rlist: a map with node names as keys and OS objects as values
3588 @return: a dictionary with osnames as keys and as value another
3589 map, with nodes as keys and tuples of (path, status, diagnose,
3590 variants, parameters, api_versions) as values, eg::
3592 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3593 (/srv/..., False, "invalid api")],
3594 "node2": [(/srv/..., True, "", [], [])]}
3599 # we build here the list of nodes that didn't fail the RPC (at RPC
3600 # level), so that nodes with a non-responding node daemon don't
3601 # make all OSes invalid
3602 good_nodes = [node_name for node_name in rlist
3603 if not rlist[node_name].fail_msg]
3604 for node_name, nr in rlist.items():
3605 if nr.fail_msg or not nr.payload:
3607 for (name, path, status, diagnose, variants,
3608 params, api_versions) in nr.payload:
3609 if name not in all_os:
3610 # build a list of nodes for this os containing empty lists
3611 # for each node in node_list
3613 for nname in good_nodes:
3614 all_os[name][nname] = []
3615 # convert params from [name, help] to (name, help)
3616 params = [tuple(v) for v in params]
3617 all_os[name][node_name].append((path, status, diagnose,
3618 variants, params, api_versions))
3621 def _GetQueryData(self, lu):
3622 """Computes the list of nodes and their attributes.
3625 # Locking is not used
3626 assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3628 valid_nodes = [node.name
3629 for node in lu.cfg.GetAllNodesInfo().values()
3630 if not node.offline and node.vm_capable]
3631 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3632 cluster = lu.cfg.GetClusterInfo()
3636 for (os_name, os_data) in pol.items():
3637 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3638 hidden=(os_name in cluster.hidden_os),
3639 blacklisted=(os_name in cluster.blacklisted_os))
3643 api_versions = set()
3645 for idx, osl in enumerate(os_data.values()):
3646 info.valid = bool(info.valid and osl and osl[0][1])
3650 (node_variants, node_params, node_api) = osl[0][3:6]
3653 variants.update(node_variants)
3654 parameters.update(node_params)
3655 api_versions.update(node_api)
3657 # Filter out inconsistent values
3658 variants.intersection_update(node_variants)
3659 parameters.intersection_update(node_params)
3660 api_versions.intersection_update(node_api)
3662 info.variants = list(variants)
3663 info.parameters = list(parameters)
3664 info.api_versions = list(api_versions)
3666 data[os_name] = info
3668 # Prepare data in requested order
3669 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3673 class LUOsDiagnose(NoHooksLU):
3674 """Logical unit for OS diagnose/query.
3680 def _BuildFilter(fields, names):
3681 """Builds a filter for querying OSes.
3684 name_filter = qlang.MakeSimpleFilter("name", names)
3686 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3687 # respective field is not requested
3688 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3689 for fname in ["hidden", "blacklisted"]
3690 if fname not in fields]
3691 if "valid" not in fields:
3692 status_filter.append([qlang.OP_TRUE, "valid"])
3695 status_filter.insert(0, qlang.OP_AND)
3697 status_filter = None
3699 if name_filter and status_filter:
3700 return [qlang.OP_AND, name_filter, status_filter]
3704 return status_filter
3706 def CheckArguments(self):
3707 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3708 self.op.output_fields, False)
3710 def ExpandNames(self):
3711 self.oq.ExpandNames(self)
3713 def Exec(self, feedback_fn):
3714 return self.oq.OldStyleQuery(self)
3717 class LUNodeRemove(LogicalUnit):
3718 """Logical unit for removing a node.
3721 HPATH = "node-remove"
3722 HTYPE = constants.HTYPE_NODE
3724 def BuildHooksEnv(self):
3727 This doesn't run on the target node in the pre phase as a failed
3728 node would then be impossible to remove.
3732 "OP_TARGET": self.op.node_name,
3733 "NODE_NAME": self.op.node_name,
3736 def BuildHooksNodes(self):
3737 """Build hooks nodes.
3740 all_nodes = self.cfg.GetNodeList()
3742 all_nodes.remove(self.op.node_name)
3744 logging.warning("Node '%s', which is about to be removed, was not found"
3745 " in the list of all nodes", self.op.node_name)
3746 return (all_nodes, all_nodes)
3748 def CheckPrereq(self):
3749 """Check prerequisites.
3752 - the node exists in the configuration
3753 - it does not have primary or secondary instances
3754 - it's not the master
3756 Any errors are signaled by raising errors.OpPrereqError.
3759 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3760 node = self.cfg.GetNodeInfo(self.op.node_name)
3761 assert node is not None
3763 instance_list = self.cfg.GetInstanceList()
3765 masternode = self.cfg.GetMasterNode()
3766 if node.name == masternode:
3767 raise errors.OpPrereqError("Node is the master node,"
3768 " you need to failover first.",
3771 for instance_name in instance_list:
3772 instance = self.cfg.GetInstanceInfo(instance_name)
3773 if node.name in instance.all_nodes:
3774 raise errors.OpPrereqError("Instance %s is still running on the node,"
3775 " please remove first." % instance_name,
3777 self.op.node_name = node.name
3780 def Exec(self, feedback_fn):
3781 """Removes the node from the cluster.
3785 logging.info("Stopping the node daemon and removing configs from node %s",
3788 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3790 # Promote nodes to master candidate as needed
3791 _AdjustCandidatePool(self, exceptions=[node.name])
3792 self.context.RemoveNode(node.name)
3794 # Run post hooks on the node before it's removed
3795 _RunPostHook(self, node.name)
3797 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3798 msg = result.fail_msg
3800 self.LogWarning("Errors encountered on the remote node while leaving"
3801 " the cluster: %s", msg)
3803 # Remove node from our /etc/hosts
3804 if self.cfg.GetClusterInfo().modify_etc_hosts:
3805 master_node = self.cfg.GetMasterNode()
3806 result = self.rpc.call_etc_hosts_modify(master_node,
3807 constants.ETC_HOSTS_REMOVE,
3809 result.Raise("Can't update hosts file with new host data")
3810 _RedistributeAncillaryFiles(self)
3813 class _NodeQuery(_QueryBase):
3814 FIELDS = query.NODE_FIELDS
3816 def ExpandNames(self, lu):
3817 lu.needed_locks = {}
3818 lu.share_locks[locking.LEVEL_NODE] = 1
3821 self.wanted = _GetWantedNodes(lu, self.names)
3823 self.wanted = locking.ALL_SET
3825 self.do_locking = (self.use_locking and
3826 query.NQ_LIVE in self.requested_data)
3829 # if we don't request only static fields, we need to lock the nodes
3830 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3832 def DeclareLocks(self, lu, level):
3835 def _GetQueryData(self, lu):
3836 """Computes the list of nodes and their attributes.
3839 all_info = lu.cfg.GetAllNodesInfo()
3841 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3843 # Gather data as requested
3844 if query.NQ_LIVE in self.requested_data:
3845 # filter out non-vm_capable nodes
3846 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3848 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3849 lu.cfg.GetHypervisorType())
3850 live_data = dict((name, nresult.payload)
3851 for (name, nresult) in node_data.items()
3852 if not nresult.fail_msg and nresult.payload)
3856 if query.NQ_INST in self.requested_data:
3857 node_to_primary = dict([(name, set()) for name in nodenames])
3858 node_to_secondary = dict([(name, set()) for name in nodenames])
3860 inst_data = lu.cfg.GetAllInstancesInfo()
3862 for inst in inst_data.values():
3863 if inst.primary_node in node_to_primary:
3864 node_to_primary[inst.primary_node].add(inst.name)
3865 for secnode in inst.secondary_nodes:
3866 if secnode in node_to_secondary:
3867 node_to_secondary[secnode].add(inst.name)
3869 node_to_primary = None
3870 node_to_secondary = None
3872 if query.NQ_OOB in self.requested_data:
3873 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3874 for name, node in all_info.iteritems())
3878 if query.NQ_GROUP in self.requested_data:
3879 groups = lu.cfg.GetAllNodeGroupsInfo()
3883 return query.NodeQueryData([all_info[name] for name in nodenames],
3884 live_data, lu.cfg.GetMasterNode(),
3885 node_to_primary, node_to_secondary, groups,
3886 oob_support, lu.cfg.GetClusterInfo())
3889 class LUNodeQuery(NoHooksLU):
3890 """Logical unit for querying nodes.
3893 # pylint: disable-msg=W0142
3896 def CheckArguments(self):
3897 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3898 self.op.output_fields, self.op.use_locking)
3900 def ExpandNames(self):
3901 self.nq.ExpandNames(self)
3903 def Exec(self, feedback_fn):
3904 return self.nq.OldStyleQuery(self)
3907 class LUNodeQueryvols(NoHooksLU):
3908 """Logical unit for getting volumes on node(s).
3912 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3913 _FIELDS_STATIC = utils.FieldSet("node")
3915 def CheckArguments(self):
3916 _CheckOutputFields(static=self._FIELDS_STATIC,
3917 dynamic=self._FIELDS_DYNAMIC,
3918 selected=self.op.output_fields)
3920 def ExpandNames(self):
3921 self.needed_locks = {}
3922 self.share_locks[locking.LEVEL_NODE] = 1
3923 if not self.op.nodes:
3924 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3926 self.needed_locks[locking.LEVEL_NODE] = \
3927 _GetWantedNodes(self, self.op.nodes)
3929 def Exec(self, feedback_fn):
3930 """Computes the list of nodes and their attributes.
3933 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3934 volumes = self.rpc.call_node_volumes(nodenames)
3936 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3937 in self.cfg.GetInstanceList()]
3939 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3942 for node in nodenames:
3943 nresult = volumes[node]
3946 msg = nresult.fail_msg
3948 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3951 node_vols = nresult.payload[:]
3952 node_vols.sort(key=lambda vol: vol['dev'])
3954 for vol in node_vols:
3956 for field in self.op.output_fields:
3959 elif field == "phys":
3963 elif field == "name":
3965 elif field == "size":
3966 val = int(float(vol['size']))
3967 elif field == "instance":
3969 if node not in lv_by_node[inst]:
3971 if vol['name'] in lv_by_node[inst][node]:
3977 raise errors.ParameterError(field)
3978 node_output.append(str(val))
3980 output.append(node_output)
3985 class LUNodeQueryStorage(NoHooksLU):
3986 """Logical unit for getting information on storage units on node(s).
3989 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3992 def CheckArguments(self):
3993 _CheckOutputFields(static=self._FIELDS_STATIC,
3994 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3995 selected=self.op.output_fields)
3997 def ExpandNames(self):
3998 self.needed_locks = {}
3999 self.share_locks[locking.LEVEL_NODE] = 1
4002 self.needed_locks[locking.LEVEL_NODE] = \
4003 _GetWantedNodes(self, self.op.nodes)
4005 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4007 def Exec(self, feedback_fn):
4008 """Computes the list of nodes and their attributes.
4011 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4013 # Always get name to sort by
4014 if constants.SF_NAME in self.op.output_fields:
4015 fields = self.op.output_fields[:]
4017 fields = [constants.SF_NAME] + self.op.output_fields
4019 # Never ask for node or type as it's only known to the LU
4020 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4021 while extra in fields:
4022 fields.remove(extra)
4024 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4025 name_idx = field_idx[constants.SF_NAME]
4027 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4028 data = self.rpc.call_storage_list(self.nodes,
4029 self.op.storage_type, st_args,
4030 self.op.name, fields)
4034 for node in utils.NiceSort(self.nodes):
4035 nresult = data[node]
4039 msg = nresult.fail_msg
4041 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4044 rows = dict([(row[name_idx], row) for row in nresult.payload])
4046 for name in utils.NiceSort(rows.keys()):
4051 for field in self.op.output_fields:
4052 if field == constants.SF_NODE:
4054 elif field == constants.SF_TYPE:
4055 val = self.op.storage_type
4056 elif field in field_idx:
4057 val = row[field_idx[field]]
4059 raise errors.ParameterError(field)
4068 class _InstanceQuery(_QueryBase):
4069 FIELDS = query.INSTANCE_FIELDS
4071 def ExpandNames(self, lu):
4072 lu.needed_locks = {}
4073 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4074 lu.share_locks[locking.LEVEL_NODE] = 1
4077 self.wanted = _GetWantedInstances(lu, self.names)
4079 self.wanted = locking.ALL_SET
4081 self.do_locking = (self.use_locking and
4082 query.IQ_LIVE in self.requested_data)
4084 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4085 lu.needed_locks[locking.LEVEL_NODE] = []
4086 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4088 def DeclareLocks(self, lu, level):
4089 if level == locking.LEVEL_NODE and self.do_locking:
4090 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4092 def _GetQueryData(self, lu):
4093 """Computes the list of instances and their attributes.
4096 cluster = lu.cfg.GetClusterInfo()
4097 all_info = lu.cfg.GetAllInstancesInfo()
4099 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4101 instance_list = [all_info[name] for name in instance_names]
4102 nodes = frozenset(itertools.chain(*(inst.all_nodes
4103 for inst in instance_list)))
4104 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4107 wrongnode_inst = set()
4109 # Gather data as requested
4110 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4112 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4114 result = node_data[name]
4116 # offline nodes will be in both lists
4117 assert result.fail_msg
4118 offline_nodes.append(name)
4120 bad_nodes.append(name)
4121 elif result.payload:
4122 for inst in result.payload:
4123 if inst in all_info:
4124 if all_info[inst].primary_node == name:
4125 live_data.update(result.payload)
4127 wrongnode_inst.add(inst)
4129 # orphan instance; we don't list it here as we don't
4130 # handle this case yet in the output of instance listing
4131 logging.warning("Orphan instance '%s' found on node %s",
4133 # else no instance is alive
4137 if query.IQ_DISKUSAGE in self.requested_data:
4138 disk_usage = dict((inst.name,
4139 _ComputeDiskSize(inst.disk_template,
4140 [{constants.IDISK_SIZE: disk.size}
4141 for disk in inst.disks]))
4142 for inst in instance_list)
4146 if query.IQ_CONSOLE in self.requested_data:
4148 for inst in instance_list:
4149 if inst.name in live_data:
4150 # Instance is running
4151 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4153 consinfo[inst.name] = None
4154 assert set(consinfo.keys()) == set(instance_names)
4158 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4159 disk_usage, offline_nodes, bad_nodes,
4160 live_data, wrongnode_inst, consinfo)
4163 class LUQuery(NoHooksLU):
4164 """Query for resources/items of a certain kind.
4167 # pylint: disable-msg=W0142
4170 def CheckArguments(self):
4171 qcls = _GetQueryImplementation(self.op.what)
4173 self.impl = qcls(self.op.filter, self.op.fields, False)
4175 def ExpandNames(self):
4176 self.impl.ExpandNames(self)
4178 def DeclareLocks(self, level):
4179 self.impl.DeclareLocks(self, level)
4181 def Exec(self, feedback_fn):
4182 return self.impl.NewStyleQuery(self)
4185 class LUQueryFields(NoHooksLU):
4186 """Query for resources/items of a certain kind.
4189 # pylint: disable-msg=W0142
4192 def CheckArguments(self):
4193 self.qcls = _GetQueryImplementation(self.op.what)
4195 def ExpandNames(self):
4196 self.needed_locks = {}
4198 def Exec(self, feedback_fn):
4199 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4202 class LUNodeModifyStorage(NoHooksLU):
4203 """Logical unit for modifying a storage volume on a node.
4208 def CheckArguments(self):
4209 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4211 storage_type = self.op.storage_type
4214 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4216 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4217 " modified" % storage_type,
4220 diff = set(self.op.changes.keys()) - modifiable
4222 raise errors.OpPrereqError("The following fields can not be modified for"
4223 " storage units of type '%s': %r" %
4224 (storage_type, list(diff)),
4227 def ExpandNames(self):
4228 self.needed_locks = {
4229 locking.LEVEL_NODE: self.op.node_name,
4232 def Exec(self, feedback_fn):
4233 """Computes the list of nodes and their attributes.
4236 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4237 result = self.rpc.call_storage_modify(self.op.node_name,
4238 self.op.storage_type, st_args,
4239 self.op.name, self.op.changes)
4240 result.Raise("Failed to modify storage unit '%s' on %s" %
4241 (self.op.name, self.op.node_name))
4244 class LUNodeAdd(LogicalUnit):
4245 """Logical unit for adding node to the cluster.
4249 HTYPE = constants.HTYPE_NODE
4250 _NFLAGS = ["master_capable", "vm_capable"]
4252 def CheckArguments(self):
4253 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4254 # validate/normalize the node name
4255 self.hostname = netutils.GetHostname(name=self.op.node_name,
4256 family=self.primary_ip_family)
4257 self.op.node_name = self.hostname.name
4258 if self.op.readd and self.op.group:
4259 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4260 " being readded", errors.ECODE_INVAL)
4262 def BuildHooksEnv(self):
4265 This will run on all nodes before, and on all nodes + the new node after.
4269 "OP_TARGET": self.op.node_name,
4270 "NODE_NAME": self.op.node_name,
4271 "NODE_PIP": self.op.primary_ip,
4272 "NODE_SIP": self.op.secondary_ip,
4273 "MASTER_CAPABLE": str(self.op.master_capable),
4274 "VM_CAPABLE": str(self.op.vm_capable),
4277 def BuildHooksNodes(self):
4278 """Build hooks nodes.
4281 # Exclude added node
4282 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4283 post_nodes = pre_nodes + [self.op.node_name, ]
4285 return (pre_nodes, post_nodes)
4287 def CheckPrereq(self):
4288 """Check prerequisites.
4291 - the new node is not already in the config
4293 - its parameters (single/dual homed) matches the cluster
4295 Any errors are signaled by raising errors.OpPrereqError.
4299 hostname = self.hostname
4300 node = hostname.name
4301 primary_ip = self.op.primary_ip = hostname.ip
4302 if self.op.secondary_ip is None:
4303 if self.primary_ip_family == netutils.IP6Address.family:
4304 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4305 " IPv4 address must be given as secondary",
4307 self.op.secondary_ip = primary_ip
4309 secondary_ip = self.op.secondary_ip
4310 if not netutils.IP4Address.IsValid(secondary_ip):
4311 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4312 " address" % secondary_ip, errors.ECODE_INVAL)
4314 node_list = cfg.GetNodeList()
4315 if not self.op.readd and node in node_list:
4316 raise errors.OpPrereqError("Node %s is already in the configuration" %
4317 node, errors.ECODE_EXISTS)
4318 elif self.op.readd and node not in node_list:
4319 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4322 self.changed_primary_ip = False
4324 for existing_node_name in node_list:
4325 existing_node = cfg.GetNodeInfo(existing_node_name)
4327 if self.op.readd and node == existing_node_name:
4328 if existing_node.secondary_ip != secondary_ip:
4329 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4330 " address configuration as before",
4332 if existing_node.primary_ip != primary_ip:
4333 self.changed_primary_ip = True
4337 if (existing_node.primary_ip == primary_ip or
4338 existing_node.secondary_ip == primary_ip or
4339 existing_node.primary_ip == secondary_ip or
4340 existing_node.secondary_ip == secondary_ip):
4341 raise errors.OpPrereqError("New node ip address(es) conflict with"
4342 " existing node %s" % existing_node.name,
4343 errors.ECODE_NOTUNIQUE)
4345 # After this 'if' block, None is no longer a valid value for the
4346 # _capable op attributes
4348 old_node = self.cfg.GetNodeInfo(node)
4349 assert old_node is not None, "Can't retrieve locked node %s" % node
4350 for attr in self._NFLAGS:
4351 if getattr(self.op, attr) is None:
4352 setattr(self.op, attr, getattr(old_node, attr))
4354 for attr in self._NFLAGS:
4355 if getattr(self.op, attr) is None:
4356 setattr(self.op, attr, True)
4358 if self.op.readd and not self.op.vm_capable:
4359 pri, sec = cfg.GetNodeInstances(node)
4361 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4362 " flag set to false, but it already holds"
4363 " instances" % node,
4366 # check that the type of the node (single versus dual homed) is the
4367 # same as for the master
4368 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4369 master_singlehomed = myself.secondary_ip == myself.primary_ip
4370 newbie_singlehomed = secondary_ip == primary_ip
4371 if master_singlehomed != newbie_singlehomed:
4372 if master_singlehomed:
4373 raise errors.OpPrereqError("The master has no secondary ip but the"
4374 " new node has one",
4377 raise errors.OpPrereqError("The master has a secondary ip but the"
4378 " new node doesn't have one",
4381 # checks reachability
4382 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4383 raise errors.OpPrereqError("Node not reachable by ping",
4384 errors.ECODE_ENVIRON)
4386 if not newbie_singlehomed:
4387 # check reachability from my secondary ip to newbie's secondary ip
4388 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4389 source=myself.secondary_ip):
4390 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4391 " based ping to node daemon port",
4392 errors.ECODE_ENVIRON)
4399 if self.op.master_capable:
4400 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4402 self.master_candidate = False
4405 self.new_node = old_node
4407 node_group = cfg.LookupNodeGroup(self.op.group)
4408 self.new_node = objects.Node(name=node,
4409 primary_ip=primary_ip,
4410 secondary_ip=secondary_ip,
4411 master_candidate=self.master_candidate,
4412 offline=False, drained=False,
4415 if self.op.ndparams:
4416 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4418 def Exec(self, feedback_fn):
4419 """Adds the new node to the cluster.
4422 new_node = self.new_node
4423 node = new_node.name
4425 # We adding a new node so we assume it's powered
4426 new_node.powered = True
4428 # for re-adds, reset the offline/drained/master-candidate flags;
4429 # we need to reset here, otherwise offline would prevent RPC calls
4430 # later in the procedure; this also means that if the re-add
4431 # fails, we are left with a non-offlined, broken node
4433 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4434 self.LogInfo("Readding a node, the offline/drained flags were reset")
4435 # if we demote the node, we do cleanup later in the procedure
4436 new_node.master_candidate = self.master_candidate
4437 if self.changed_primary_ip:
4438 new_node.primary_ip = self.op.primary_ip
4440 # copy the master/vm_capable flags
4441 for attr in self._NFLAGS:
4442 setattr(new_node, attr, getattr(self.op, attr))
4444 # notify the user about any possible mc promotion
4445 if new_node.master_candidate:
4446 self.LogInfo("Node will be a master candidate")
4448 if self.op.ndparams:
4449 new_node.ndparams = self.op.ndparams
4451 new_node.ndparams = {}
4453 # check connectivity
4454 result = self.rpc.call_version([node])[node]
4455 result.Raise("Can't get version information from node %s" % node)
4456 if constants.PROTOCOL_VERSION == result.payload:
4457 logging.info("Communication to node %s fine, sw version %s match",
4458 node, result.payload)
4460 raise errors.OpExecError("Version mismatch master version %s,"
4461 " node version %s" %
4462 (constants.PROTOCOL_VERSION, result.payload))
4464 # Add node to our /etc/hosts, and add key to known_hosts
4465 if self.cfg.GetClusterInfo().modify_etc_hosts:
4466 master_node = self.cfg.GetMasterNode()
4467 result = self.rpc.call_etc_hosts_modify(master_node,
4468 constants.ETC_HOSTS_ADD,
4471 result.Raise("Can't update hosts file with new host data")
4473 if new_node.secondary_ip != new_node.primary_ip:
4474 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4477 node_verify_list = [self.cfg.GetMasterNode()]
4478 node_verify_param = {
4479 constants.NV_NODELIST: [node],
4480 # TODO: do a node-net-test as well?
4483 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4484 self.cfg.GetClusterName())
4485 for verifier in node_verify_list:
4486 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4487 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4489 for failed in nl_payload:
4490 feedback_fn("ssh/hostname verification failed"
4491 " (checking from %s): %s" %
4492 (verifier, nl_payload[failed]))
4493 raise errors.OpExecError("ssh/hostname verification failed.")
4496 _RedistributeAncillaryFiles(self)
4497 self.context.ReaddNode(new_node)
4498 # make sure we redistribute the config
4499 self.cfg.Update(new_node, feedback_fn)
4500 # and make sure the new node will not have old files around
4501 if not new_node.master_candidate:
4502 result = self.rpc.call_node_demote_from_mc(new_node.name)
4503 msg = result.fail_msg
4505 self.LogWarning("Node failed to demote itself from master"
4506 " candidate status: %s" % msg)
4508 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4509 additional_vm=self.op.vm_capable)
4510 self.context.AddNode(new_node, self.proc.GetECId())
4513 class LUNodeSetParams(LogicalUnit):
4514 """Modifies the parameters of a node.
4516 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4517 to the node role (as _ROLE_*)
4518 @cvar _R2F: a dictionary from node role to tuples of flags
4519 @cvar _FLAGS: a list of attribute names corresponding to the flags
4522 HPATH = "node-modify"
4523 HTYPE = constants.HTYPE_NODE
4525 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4527 (True, False, False): _ROLE_CANDIDATE,
4528 (False, True, False): _ROLE_DRAINED,
4529 (False, False, True): _ROLE_OFFLINE,
4530 (False, False, False): _ROLE_REGULAR,
4532 _R2F = dict((v, k) for k, v in _F2R.items())
4533 _FLAGS = ["master_candidate", "drained", "offline"]
4535 def CheckArguments(self):
4536 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4537 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4538 self.op.master_capable, self.op.vm_capable,
4539 self.op.secondary_ip, self.op.ndparams]
4540 if all_mods.count(None) == len(all_mods):
4541 raise errors.OpPrereqError("Please pass at least one modification",
4543 if all_mods.count(True) > 1:
4544 raise errors.OpPrereqError("Can't set the node into more than one"
4545 " state at the same time",
4548 # Boolean value that tells us whether we might be demoting from MC
4549 self.might_demote = (self.op.master_candidate == False or
4550 self.op.offline == True or
4551 self.op.drained == True or
4552 self.op.master_capable == False)
4554 if self.op.secondary_ip:
4555 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4556 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4557 " address" % self.op.secondary_ip,
4560 self.lock_all = self.op.auto_promote and self.might_demote
4561 self.lock_instances = self.op.secondary_ip is not None
4563 def ExpandNames(self):
4565 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4567 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4569 if self.lock_instances:
4570 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4572 def DeclareLocks(self, level):
4573 # If we have locked all instances, before waiting to lock nodes, release
4574 # all the ones living on nodes unrelated to the current operation.
4575 if level == locking.LEVEL_NODE and self.lock_instances:
4576 instances_release = []
4578 self.affected_instances = []
4579 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4580 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4581 instance = self.context.cfg.GetInstanceInfo(instance_name)
4582 i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4583 if i_mirrored and self.op.node_name in instance.all_nodes:
4584 instances_keep.append(instance_name)
4585 self.affected_instances.append(instance)
4587 instances_release.append(instance_name)
4588 if instances_release:
4589 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4590 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4592 def BuildHooksEnv(self):
4595 This runs on the master node.
4599 "OP_TARGET": self.op.node_name,
4600 "MASTER_CANDIDATE": str(self.op.master_candidate),
4601 "OFFLINE": str(self.op.offline),
4602 "DRAINED": str(self.op.drained),
4603 "MASTER_CAPABLE": str(self.op.master_capable),
4604 "VM_CAPABLE": str(self.op.vm_capable),
4607 def BuildHooksNodes(self):
4608 """Build hooks nodes.
4611 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4614 def CheckPrereq(self):
4615 """Check prerequisites.
4617 This only checks the instance list against the existing names.
4620 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4622 if (self.op.master_candidate is not None or
4623 self.op.drained is not None or
4624 self.op.offline is not None):
4625 # we can't change the master's node flags
4626 if self.op.node_name == self.cfg.GetMasterNode():
4627 raise errors.OpPrereqError("The master role can be changed"
4628 " only via master-failover",
4631 if self.op.master_candidate and not node.master_capable:
4632 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4633 " it a master candidate" % node.name,
4636 if self.op.vm_capable == False:
4637 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4639 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4640 " the vm_capable flag" % node.name,
4643 if node.master_candidate and self.might_demote and not self.lock_all:
4644 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4645 # check if after removing the current node, we're missing master
4647 (mc_remaining, mc_should, _) = \
4648 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4649 if mc_remaining < mc_should:
4650 raise errors.OpPrereqError("Not enough master candidates, please"
4651 " pass auto promote option to allow"
4652 " promotion", errors.ECODE_STATE)
4654 self.old_flags = old_flags = (node.master_candidate,
4655 node.drained, node.offline)
4656 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4657 self.old_role = old_role = self._F2R[old_flags]
4659 # Check for ineffective changes
4660 for attr in self._FLAGS:
4661 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4662 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4663 setattr(self.op, attr, None)
4665 # Past this point, any flag change to False means a transition
4666 # away from the respective state, as only real changes are kept
4668 # TODO: We might query the real power state if it supports OOB
4669 if _SupportsOob(self.cfg, node):
4670 if self.op.offline is False and not (node.powered or
4671 self.op.powered == True):
4672 raise errors.OpPrereqError(("Please power on node %s first before you"
4673 " can reset offline state") %
4675 elif self.op.powered is not None:
4676 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4677 " which does not support out-of-band"
4678 " handling") % self.op.node_name)
4680 # If we're being deofflined/drained, we'll MC ourself if needed
4681 if (self.op.drained == False or self.op.offline == False or
4682 (self.op.master_capable and not node.master_capable)):
4683 if _DecideSelfPromotion(self):
4684 self.op.master_candidate = True
4685 self.LogInfo("Auto-promoting node to master candidate")
4687 # If we're no longer master capable, we'll demote ourselves from MC
4688 if self.op.master_capable == False and node.master_candidate:
4689 self.LogInfo("Demoting from master candidate")
4690 self.op.master_candidate = False
4693 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4694 if self.op.master_candidate:
4695 new_role = self._ROLE_CANDIDATE
4696 elif self.op.drained:
4697 new_role = self._ROLE_DRAINED
4698 elif self.op.offline:
4699 new_role = self._ROLE_OFFLINE
4700 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4701 # False is still in new flags, which means we're un-setting (the
4703 new_role = self._ROLE_REGULAR
4704 else: # no new flags, nothing, keep old role
4707 self.new_role = new_role
4709 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4710 # Trying to transition out of offline status
4711 result = self.rpc.call_version([node.name])[node.name]
4713 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4714 " to report its version: %s" %
4715 (node.name, result.fail_msg),
4718 self.LogWarning("Transitioning node from offline to online state"
4719 " without using re-add. Please make sure the node"
4722 if self.op.secondary_ip:
4723 # Ok even without locking, because this can't be changed by any LU
4724 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4725 master_singlehomed = master.secondary_ip == master.primary_ip
4726 if master_singlehomed and self.op.secondary_ip:
4727 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4728 " homed cluster", errors.ECODE_INVAL)
4731 if self.affected_instances:
4732 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4733 " node has instances (%s) configured"
4734 " to use it" % self.affected_instances)
4736 # On online nodes, check that no instances are running, and that
4737 # the node has the new ip and we can reach it.
4738 for instance in self.affected_instances:
4739 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4741 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4742 if master.name != node.name:
4743 # check reachability from master secondary ip to new secondary ip
4744 if not netutils.TcpPing(self.op.secondary_ip,
4745 constants.DEFAULT_NODED_PORT,
4746 source=master.secondary_ip):
4747 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4748 " based ping to node daemon port",
4749 errors.ECODE_ENVIRON)
4751 if self.op.ndparams:
4752 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4753 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4754 self.new_ndparams = new_ndparams
4756 def Exec(self, feedback_fn):
4761 old_role = self.old_role
4762 new_role = self.new_role
4766 if self.op.ndparams:
4767 node.ndparams = self.new_ndparams
4769 if self.op.powered is not None:
4770 node.powered = self.op.powered
4772 for attr in ["master_capable", "vm_capable"]:
4773 val = getattr(self.op, attr)
4775 setattr(node, attr, val)
4776 result.append((attr, str(val)))
4778 if new_role != old_role:
4779 # Tell the node to demote itself, if no longer MC and not offline
4780 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4781 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4783 self.LogWarning("Node failed to demote itself: %s", msg)
4785 new_flags = self._R2F[new_role]
4786 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4788 result.append((desc, str(nf)))
4789 (node.master_candidate, node.drained, node.offline) = new_flags
4791 # we locked all nodes, we adjust the CP before updating this node
4793 _AdjustCandidatePool(self, [node.name])
4795 if self.op.secondary_ip:
4796 node.secondary_ip = self.op.secondary_ip
4797 result.append(("secondary_ip", self.op.secondary_ip))
4799 # this will trigger configuration file update, if needed
4800 self.cfg.Update(node, feedback_fn)
4802 # this will trigger job queue propagation or cleanup if the mc
4804 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4805 self.context.ReaddNode(node)
4810 class LUNodePowercycle(NoHooksLU):
4811 """Powercycles a node.
4816 def CheckArguments(self):
4817 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4818 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4819 raise errors.OpPrereqError("The node is the master and the force"
4820 " parameter was not set",
4823 def ExpandNames(self):
4824 """Locking for PowercycleNode.
4826 This is a last-resort option and shouldn't block on other
4827 jobs. Therefore, we grab no locks.
4830 self.needed_locks = {}
4832 def Exec(self, feedback_fn):
4836 result = self.rpc.call_node_powercycle(self.op.node_name,
4837 self.cfg.GetHypervisorType())
4838 result.Raise("Failed to schedule the reboot")
4839 return result.payload
4842 class LUClusterQuery(NoHooksLU):
4843 """Query cluster configuration.
4848 def ExpandNames(self):
4849 self.needed_locks = {}
4851 def Exec(self, feedback_fn):
4852 """Return cluster config.
4855 cluster = self.cfg.GetClusterInfo()
4858 # Filter just for enabled hypervisors
4859 for os_name, hv_dict in cluster.os_hvp.items():
4860 os_hvp[os_name] = {}
4861 for hv_name, hv_params in hv_dict.items():
4862 if hv_name in cluster.enabled_hypervisors:
4863 os_hvp[os_name][hv_name] = hv_params
4865 # Convert ip_family to ip_version
4866 primary_ip_version = constants.IP4_VERSION
4867 if cluster.primary_ip_family == netutils.IP6Address.family:
4868 primary_ip_version = constants.IP6_VERSION
4871 "software_version": constants.RELEASE_VERSION,
4872 "protocol_version": constants.PROTOCOL_VERSION,
4873 "config_version": constants.CONFIG_VERSION,
4874 "os_api_version": max(constants.OS_API_VERSIONS),
4875 "export_version": constants.EXPORT_VERSION,
4876 "architecture": (platform.architecture()[0], platform.machine()),
4877 "name": cluster.cluster_name,
4878 "master": cluster.master_node,
4879 "default_hypervisor": cluster.enabled_hypervisors[0],
4880 "enabled_hypervisors": cluster.enabled_hypervisors,
4881 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4882 for hypervisor_name in cluster.enabled_hypervisors]),
4884 "beparams": cluster.beparams,
4885 "osparams": cluster.osparams,
4886 "nicparams": cluster.nicparams,
4887 "ndparams": cluster.ndparams,
4888 "candidate_pool_size": cluster.candidate_pool_size,
4889 "master_netdev": cluster.master_netdev,
4890 "volume_group_name": cluster.volume_group_name,
4891 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4892 "file_storage_dir": cluster.file_storage_dir,
4893 "shared_file_storage_dir": cluster.shared_file_storage_dir,
4894 "maintain_node_health": cluster.maintain_node_health,
4895 "ctime": cluster.ctime,
4896 "mtime": cluster.mtime,
4897 "uuid": cluster.uuid,
4898 "tags": list(cluster.GetTags()),
4899 "uid_pool": cluster.uid_pool,
4900 "default_iallocator": cluster.default_iallocator,
4901 "reserved_lvs": cluster.reserved_lvs,
4902 "primary_ip_version": primary_ip_version,
4903 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4904 "hidden_os": cluster.hidden_os,
4905 "blacklisted_os": cluster.blacklisted_os,
4911 class LUClusterConfigQuery(NoHooksLU):
4912 """Return configuration values.
4916 _FIELDS_DYNAMIC = utils.FieldSet()
4917 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4918 "watcher_pause", "volume_group_name")
4920 def CheckArguments(self):
4921 _CheckOutputFields(static=self._FIELDS_STATIC,
4922 dynamic=self._FIELDS_DYNAMIC,
4923 selected=self.op.output_fields)
4925 def ExpandNames(self):
4926 self.needed_locks = {}
4928 def Exec(self, feedback_fn):
4929 """Dump a representation of the cluster config to the standard output.
4933 for field in self.op.output_fields:
4934 if field == "cluster_name":
4935 entry = self.cfg.GetClusterName()
4936 elif field == "master_node":
4937 entry = self.cfg.GetMasterNode()
4938 elif field == "drain_flag":
4939 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4940 elif field == "watcher_pause":
4941 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4942 elif field == "volume_group_name":
4943 entry = self.cfg.GetVGName()
4945 raise errors.ParameterError(field)
4946 values.append(entry)
4950 class LUInstanceActivateDisks(NoHooksLU):
4951 """Bring up an instance's disks.
4956 def ExpandNames(self):
4957 self._ExpandAndLockInstance()
4958 self.needed_locks[locking.LEVEL_NODE] = []
4959 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4961 def DeclareLocks(self, level):
4962 if level == locking.LEVEL_NODE:
4963 self._LockInstancesNodes()
4965 def CheckPrereq(self):
4966 """Check prerequisites.
4968 This checks that the instance is in the cluster.
4971 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4972 assert self.instance is not None, \
4973 "Cannot retrieve locked instance %s" % self.op.instance_name
4974 _CheckNodeOnline(self, self.instance.primary_node)
4976 def Exec(self, feedback_fn):
4977 """Activate the disks.
4980 disks_ok, disks_info = \
4981 _AssembleInstanceDisks(self, self.instance,
4982 ignore_size=self.op.ignore_size)
4984 raise errors.OpExecError("Cannot activate block devices")
4989 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4991 """Prepare the block devices for an instance.
4993 This sets up the block devices on all nodes.
4995 @type lu: L{LogicalUnit}
4996 @param lu: the logical unit on whose behalf we execute
4997 @type instance: L{objects.Instance}
4998 @param instance: the instance for whose disks we assemble
4999 @type disks: list of L{objects.Disk} or None
5000 @param disks: which disks to assemble (or all, if None)
5001 @type ignore_secondaries: boolean
5002 @param ignore_secondaries: if true, errors on secondary nodes
5003 won't result in an error return from the function
5004 @type ignore_size: boolean
5005 @param ignore_size: if true, the current known size of the disk
5006 will not be used during the disk activation, useful for cases
5007 when the size is wrong
5008 @return: False if the operation failed, otherwise a list of
5009 (host, instance_visible_name, node_visible_name)
5010 with the mapping from node devices to instance devices
5015 iname = instance.name
5016 disks = _ExpandCheckDisks(instance, disks)
5018 # With the two passes mechanism we try to reduce the window of
5019 # opportunity for the race condition of switching DRBD to primary
5020 # before handshaking occured, but we do not eliminate it
5022 # The proper fix would be to wait (with some limits) until the
5023 # connection has been made and drbd transitions from WFConnection
5024 # into any other network-connected state (Connected, SyncTarget,
5027 # 1st pass, assemble on all nodes in secondary mode
5028 for idx, inst_disk in enumerate(disks):
5029 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5031 node_disk = node_disk.Copy()
5032 node_disk.UnsetSize()
5033 lu.cfg.SetDiskID(node_disk, node)
5034 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5035 msg = result.fail_msg
5037 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5038 " (is_primary=False, pass=1): %s",
5039 inst_disk.iv_name, node, msg)
5040 if not ignore_secondaries:
5043 # FIXME: race condition on drbd migration to primary
5045 # 2nd pass, do only the primary node
5046 for idx, inst_disk in enumerate(disks):
5049 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5050 if node != instance.primary_node:
5053 node_disk = node_disk.Copy()
5054 node_disk.UnsetSize()
5055 lu.cfg.SetDiskID(node_disk, node)
5056 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5057 msg = result.fail_msg
5059 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5060 " (is_primary=True, pass=2): %s",
5061 inst_disk.iv_name, node, msg)
5064 dev_path = result.payload
5066 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5068 # leave the disks configured for the primary node
5069 # this is a workaround that would be fixed better by
5070 # improving the logical/physical id handling
5072 lu.cfg.SetDiskID(disk, instance.primary_node)
5074 return disks_ok, device_info
5077 def _StartInstanceDisks(lu, instance, force):
5078 """Start the disks of an instance.
5081 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5082 ignore_secondaries=force)
5084 _ShutdownInstanceDisks(lu, instance)
5085 if force is not None and not force:
5086 lu.proc.LogWarning("", hint="If the message above refers to a"
5088 " you can retry the operation using '--force'.")
5089 raise errors.OpExecError("Disk consistency error")
5092 class LUInstanceDeactivateDisks(NoHooksLU):
5093 """Shutdown an instance's disks.
5098 def ExpandNames(self):
5099 self._ExpandAndLockInstance()
5100 self.needed_locks[locking.LEVEL_NODE] = []
5101 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5103 def DeclareLocks(self, level):
5104 if level == locking.LEVEL_NODE:
5105 self._LockInstancesNodes()
5107 def CheckPrereq(self):
5108 """Check prerequisites.
5110 This checks that the instance is in the cluster.
5113 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5114 assert self.instance is not None, \
5115 "Cannot retrieve locked instance %s" % self.op.instance_name
5117 def Exec(self, feedback_fn):
5118 """Deactivate the disks
5121 instance = self.instance
5123 _ShutdownInstanceDisks(self, instance)
5125 _SafeShutdownInstanceDisks(self, instance)
5128 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5129 """Shutdown block devices of an instance.
5131 This function checks if an instance is running, before calling
5132 _ShutdownInstanceDisks.
5135 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5136 _ShutdownInstanceDisks(lu, instance, disks=disks)
5139 def _ExpandCheckDisks(instance, disks):
5140 """Return the instance disks selected by the disks list
5142 @type disks: list of L{objects.Disk} or None
5143 @param disks: selected disks
5144 @rtype: list of L{objects.Disk}
5145 @return: selected instance disks to act on
5149 return instance.disks
5151 if not set(disks).issubset(instance.disks):
5152 raise errors.ProgrammerError("Can only act on disks belonging to the"
5157 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5158 """Shutdown block devices of an instance.
5160 This does the shutdown on all nodes of the instance.
5162 If the ignore_primary is false, errors on the primary node are
5167 disks = _ExpandCheckDisks(instance, disks)
5170 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5171 lu.cfg.SetDiskID(top_disk, node)
5172 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5173 msg = result.fail_msg
5175 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5176 disk.iv_name, node, msg)
5177 if ((node == instance.primary_node and not ignore_primary) or
5178 (node != instance.primary_node and not result.offline)):
5183 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5184 """Checks if a node has enough free memory.
5186 This function check if a given node has the needed amount of free
5187 memory. In case the node has less memory or we cannot get the
5188 information from the node, this function raise an OpPrereqError
5191 @type lu: C{LogicalUnit}
5192 @param lu: a logical unit from which we get configuration data
5194 @param node: the node to check
5195 @type reason: C{str}
5196 @param reason: string to use in the error message
5197 @type requested: C{int}
5198 @param requested: the amount of memory in MiB to check for
5199 @type hypervisor_name: C{str}
5200 @param hypervisor_name: the hypervisor to ask for memory stats
5201 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5202 we cannot check the node
5205 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5206 nodeinfo[node].Raise("Can't get data from node %s" % node,
5207 prereq=True, ecode=errors.ECODE_ENVIRON)
5208 free_mem = nodeinfo[node].payload.get('memory_free', None)
5209 if not isinstance(free_mem, int):
5210 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5211 " was '%s'" % (node, free_mem),
5212 errors.ECODE_ENVIRON)
5213 if requested > free_mem:
5214 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5215 " needed %s MiB, available %s MiB" %
5216 (node, reason, requested, free_mem),
5220 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5221 """Checks if nodes have enough free disk space in the all VGs.
5223 This function check if all given nodes have the needed amount of
5224 free disk. In case any node has less disk or we cannot get the
5225 information from the node, this function raise an OpPrereqError
5228 @type lu: C{LogicalUnit}
5229 @param lu: a logical unit from which we get configuration data
5230 @type nodenames: C{list}
5231 @param nodenames: the list of node names to check
5232 @type req_sizes: C{dict}
5233 @param req_sizes: the hash of vg and corresponding amount of disk in
5235 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5236 or we cannot check the node
5239 for vg, req_size in req_sizes.items():
5240 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5243 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5244 """Checks if nodes have enough free disk space in the specified VG.
5246 This function check if all given nodes have the needed amount of
5247 free disk. In case any node has less disk or we cannot get the
5248 information from the node, this function raise an OpPrereqError
5251 @type lu: C{LogicalUnit}
5252 @param lu: a logical unit from which we get configuration data
5253 @type nodenames: C{list}
5254 @param nodenames: the list of node names to check
5256 @param vg: the volume group to check
5257 @type requested: C{int}
5258 @param requested: the amount of disk in MiB to check for
5259 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5260 or we cannot check the node
5263 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5264 for node in nodenames:
5265 info = nodeinfo[node]
5266 info.Raise("Cannot get current information from node %s" % node,
5267 prereq=True, ecode=errors.ECODE_ENVIRON)
5268 vg_free = info.payload.get("vg_free", None)
5269 if not isinstance(vg_free, int):
5270 raise errors.OpPrereqError("Can't compute free disk space on node"
5271 " %s for vg %s, result was '%s'" %
5272 (node, vg, vg_free), errors.ECODE_ENVIRON)
5273 if requested > vg_free:
5274 raise errors.OpPrereqError("Not enough disk space on target node %s"
5275 " vg %s: required %d MiB, available %d MiB" %
5276 (node, vg, requested, vg_free),
5280 class LUInstanceStartup(LogicalUnit):
5281 """Starts an instance.
5284 HPATH = "instance-start"
5285 HTYPE = constants.HTYPE_INSTANCE
5288 def CheckArguments(self):
5290 if self.op.beparams:
5291 # fill the beparams dict
5292 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5294 def ExpandNames(self):
5295 self._ExpandAndLockInstance()
5297 def BuildHooksEnv(self):
5300 This runs on master, primary and secondary nodes of the instance.
5304 "FORCE": self.op.force,
5307 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5311 def BuildHooksNodes(self):
5312 """Build hooks nodes.
5315 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5318 def CheckPrereq(self):
5319 """Check prerequisites.
5321 This checks that the instance is in the cluster.
5324 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5325 assert self.instance is not None, \
5326 "Cannot retrieve locked instance %s" % self.op.instance_name
5329 if self.op.hvparams:
5330 # check hypervisor parameter syntax (locally)
5331 cluster = self.cfg.GetClusterInfo()
5332 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5333 filled_hvp = cluster.FillHV(instance)
5334 filled_hvp.update(self.op.hvparams)
5335 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5336 hv_type.CheckParameterSyntax(filled_hvp)
5337 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5339 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5341 if self.primary_offline and self.op.ignore_offline_nodes:
5342 self.proc.LogWarning("Ignoring offline primary node")
5344 if self.op.hvparams or self.op.beparams:
5345 self.proc.LogWarning("Overridden parameters are ignored")
5347 _CheckNodeOnline(self, instance.primary_node)
5349 bep = self.cfg.GetClusterInfo().FillBE(instance)
5351 # check bridges existence
5352 _CheckInstanceBridgesExist(self, instance)
5354 remote_info = self.rpc.call_instance_info(instance.primary_node,
5356 instance.hypervisor)
5357 remote_info.Raise("Error checking node %s" % instance.primary_node,
5358 prereq=True, ecode=errors.ECODE_ENVIRON)
5359 if not remote_info.payload: # not running already
5360 _CheckNodeFreeMemory(self, instance.primary_node,
5361 "starting instance %s" % instance.name,
5362 bep[constants.BE_MEMORY], instance.hypervisor)
5364 def Exec(self, feedback_fn):
5365 """Start the instance.
5368 instance = self.instance
5369 force = self.op.force
5371 self.cfg.MarkInstanceUp(instance.name)
5373 if self.primary_offline:
5374 assert self.op.ignore_offline_nodes
5375 self.proc.LogInfo("Primary node offline, marked instance as started")
5377 node_current = instance.primary_node
5379 _StartInstanceDisks(self, instance, force)
5381 result = self.rpc.call_instance_start(node_current, instance,
5382 self.op.hvparams, self.op.beparams)
5383 msg = result.fail_msg
5385 _ShutdownInstanceDisks(self, instance)
5386 raise errors.OpExecError("Could not start instance: %s" % msg)
5389 class LUInstanceReboot(LogicalUnit):
5390 """Reboot an instance.
5393 HPATH = "instance-reboot"
5394 HTYPE = constants.HTYPE_INSTANCE
5397 def ExpandNames(self):
5398 self._ExpandAndLockInstance()
5400 def BuildHooksEnv(self):
5403 This runs on master, primary and secondary nodes of the instance.
5407 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5408 "REBOOT_TYPE": self.op.reboot_type,
5409 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5412 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5416 def BuildHooksNodes(self):
5417 """Build hooks nodes.
5420 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5423 def CheckPrereq(self):
5424 """Check prerequisites.
5426 This checks that the instance is in the cluster.
5429 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5430 assert self.instance is not None, \
5431 "Cannot retrieve locked instance %s" % self.op.instance_name
5433 _CheckNodeOnline(self, instance.primary_node)
5435 # check bridges existence
5436 _CheckInstanceBridgesExist(self, instance)
5438 def Exec(self, feedback_fn):
5439 """Reboot the instance.
5442 instance = self.instance
5443 ignore_secondaries = self.op.ignore_secondaries
5444 reboot_type = self.op.reboot_type
5446 remote_info = self.rpc.call_instance_info(instance.primary_node,
5448 instance.hypervisor)
5449 remote_info.Raise("Error checking node %s" % instance.primary_node)
5450 instance_running = bool(remote_info.payload)
5452 node_current = instance.primary_node
5454 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5455 constants.INSTANCE_REBOOT_HARD]:
5456 for disk in instance.disks:
5457 self.cfg.SetDiskID(disk, node_current)
5458 result = self.rpc.call_instance_reboot(node_current, instance,
5460 self.op.shutdown_timeout)
5461 result.Raise("Could not reboot instance")
5463 if instance_running:
5464 result = self.rpc.call_instance_shutdown(node_current, instance,
5465 self.op.shutdown_timeout)
5466 result.Raise("Could not shutdown instance for full reboot")
5467 _ShutdownInstanceDisks(self, instance)
5469 self.LogInfo("Instance %s was already stopped, starting now",
5471 _StartInstanceDisks(self, instance, ignore_secondaries)
5472 result = self.rpc.call_instance_start(node_current, instance, None, None)
5473 msg = result.fail_msg
5475 _ShutdownInstanceDisks(self, instance)
5476 raise errors.OpExecError("Could not start instance for"
5477 " full reboot: %s" % msg)
5479 self.cfg.MarkInstanceUp(instance.name)
5482 class LUInstanceShutdown(LogicalUnit):
5483 """Shutdown an instance.
5486 HPATH = "instance-stop"
5487 HTYPE = constants.HTYPE_INSTANCE
5490 def ExpandNames(self):
5491 self._ExpandAndLockInstance()
5493 def BuildHooksEnv(self):
5496 This runs on master, primary and secondary nodes of the instance.
5499 env = _BuildInstanceHookEnvByObject(self, self.instance)
5500 env["TIMEOUT"] = self.op.timeout
5503 def BuildHooksNodes(self):
5504 """Build hooks nodes.
5507 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5510 def CheckPrereq(self):
5511 """Check prerequisites.
5513 This checks that the instance is in the cluster.
5516 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5517 assert self.instance is not None, \
5518 "Cannot retrieve locked instance %s" % self.op.instance_name
5520 self.primary_offline = \
5521 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5523 if self.primary_offline and self.op.ignore_offline_nodes:
5524 self.proc.LogWarning("Ignoring offline primary node")
5526 _CheckNodeOnline(self, self.instance.primary_node)
5528 def Exec(self, feedback_fn):
5529 """Shutdown the instance.
5532 instance = self.instance
5533 node_current = instance.primary_node
5534 timeout = self.op.timeout
5536 self.cfg.MarkInstanceDown(instance.name)
5538 if self.primary_offline:
5539 assert self.op.ignore_offline_nodes
5540 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5542 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5543 msg = result.fail_msg
5545 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5547 _ShutdownInstanceDisks(self, instance)
5550 class LUInstanceReinstall(LogicalUnit):
5551 """Reinstall an instance.
5554 HPATH = "instance-reinstall"
5555 HTYPE = constants.HTYPE_INSTANCE
5558 def ExpandNames(self):
5559 self._ExpandAndLockInstance()
5561 def BuildHooksEnv(self):
5564 This runs on master, primary and secondary nodes of the instance.
5567 return _BuildInstanceHookEnvByObject(self, self.instance)
5569 def BuildHooksNodes(self):
5570 """Build hooks nodes.
5573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5576 def CheckPrereq(self):
5577 """Check prerequisites.
5579 This checks that the instance is in the cluster and is not running.
5582 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5583 assert instance is not None, \
5584 "Cannot retrieve locked instance %s" % self.op.instance_name
5585 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5586 " offline, cannot reinstall")
5587 for node in instance.secondary_nodes:
5588 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5589 " cannot reinstall")
5591 if instance.disk_template == constants.DT_DISKLESS:
5592 raise errors.OpPrereqError("Instance '%s' has no disks" %
5593 self.op.instance_name,
5595 _CheckInstanceDown(self, instance, "cannot reinstall")
5597 if self.op.os_type is not None:
5599 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5600 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5601 instance_os = self.op.os_type
5603 instance_os = instance.os
5605 nodelist = list(instance.all_nodes)
5607 if self.op.osparams:
5608 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5609 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5610 self.os_inst = i_osdict # the new dict (without defaults)
5614 self.instance = instance
5616 def Exec(self, feedback_fn):
5617 """Reinstall the instance.
5620 inst = self.instance
5622 if self.op.os_type is not None:
5623 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5624 inst.os = self.op.os_type
5625 # Write to configuration
5626 self.cfg.Update(inst, feedback_fn)
5628 _StartInstanceDisks(self, inst, None)
5630 feedback_fn("Running the instance OS create scripts...")
5631 # FIXME: pass debug option from opcode to backend
5632 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5633 self.op.debug_level,
5634 osparams=self.os_inst)
5635 result.Raise("Could not install OS for instance %s on node %s" %
5636 (inst.name, inst.primary_node))
5638 _ShutdownInstanceDisks(self, inst)
5641 class LUInstanceRecreateDisks(LogicalUnit):
5642 """Recreate an instance's missing disks.
5645 HPATH = "instance-recreate-disks"
5646 HTYPE = constants.HTYPE_INSTANCE
5649 def ExpandNames(self):
5650 self._ExpandAndLockInstance()
5652 def BuildHooksEnv(self):
5655 This runs on master, primary and secondary nodes of the instance.
5658 return _BuildInstanceHookEnvByObject(self, self.instance)
5660 def BuildHooksNodes(self):
5661 """Build hooks nodes.
5664 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5667 def CheckPrereq(self):
5668 """Check prerequisites.
5670 This checks that the instance is in the cluster and is not running.
5673 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5674 assert instance is not None, \
5675 "Cannot retrieve locked instance %s" % self.op.instance_name
5676 _CheckNodeOnline(self, instance.primary_node)
5678 if instance.disk_template == constants.DT_DISKLESS:
5679 raise errors.OpPrereqError("Instance '%s' has no disks" %
5680 self.op.instance_name, errors.ECODE_INVAL)
5681 _CheckInstanceDown(self, instance, "cannot recreate disks")
5683 if not self.op.disks:
5684 self.op.disks = range(len(instance.disks))
5686 for idx in self.op.disks:
5687 if idx >= len(instance.disks):
5688 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5691 self.instance = instance
5693 def Exec(self, feedback_fn):
5694 """Recreate the disks.
5698 for idx, _ in enumerate(self.instance.disks):
5699 if idx not in self.op.disks: # disk idx has not been passed in
5703 _CreateDisks(self, self.instance, to_skip=to_skip)
5706 class LUInstanceRename(LogicalUnit):
5707 """Rename an instance.
5710 HPATH = "instance-rename"
5711 HTYPE = constants.HTYPE_INSTANCE
5713 def CheckArguments(self):
5717 if self.op.ip_check and not self.op.name_check:
5718 # TODO: make the ip check more flexible and not depend on the name check
5719 raise errors.OpPrereqError("Cannot do ip check without a name check",
5722 def BuildHooksEnv(self):
5725 This runs on master, primary and secondary nodes of the instance.
5728 env = _BuildInstanceHookEnvByObject(self, self.instance)
5729 env["INSTANCE_NEW_NAME"] = self.op.new_name
5732 def BuildHooksNodes(self):
5733 """Build hooks nodes.
5736 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5739 def CheckPrereq(self):
5740 """Check prerequisites.
5742 This checks that the instance is in the cluster and is not running.
5745 self.op.instance_name = _ExpandInstanceName(self.cfg,
5746 self.op.instance_name)
5747 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5748 assert instance is not None
5749 _CheckNodeOnline(self, instance.primary_node)
5750 _CheckInstanceDown(self, instance, "cannot rename")
5751 self.instance = instance
5753 new_name = self.op.new_name
5754 if self.op.name_check:
5755 hostname = netutils.GetHostname(name=new_name)
5756 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5758 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5759 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5760 " same as given hostname '%s'") %
5761 (hostname.name, self.op.new_name),
5763 new_name = self.op.new_name = hostname.name
5764 if (self.op.ip_check and
5765 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5766 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5767 (hostname.ip, new_name),
5768 errors.ECODE_NOTUNIQUE)
5770 instance_list = self.cfg.GetInstanceList()
5771 if new_name in instance_list and new_name != instance.name:
5772 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5773 new_name, errors.ECODE_EXISTS)
5775 def Exec(self, feedback_fn):
5776 """Rename the instance.
5779 inst = self.instance
5780 old_name = inst.name
5782 rename_file_storage = False
5783 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5784 self.op.new_name != inst.name):
5785 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5786 rename_file_storage = True
5788 self.cfg.RenameInstance(inst.name, self.op.new_name)
5789 # Change the instance lock. This is definitely safe while we hold the BGL.
5790 # Otherwise the new lock would have to be added in acquired mode.
5792 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5793 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5795 # re-read the instance from the configuration after rename
5796 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5798 if rename_file_storage:
5799 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5800 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5801 old_file_storage_dir,
5802 new_file_storage_dir)
5803 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5804 " (but the instance has been renamed in Ganeti)" %
5805 (inst.primary_node, old_file_storage_dir,
5806 new_file_storage_dir))
5808 _StartInstanceDisks(self, inst, None)
5810 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5811 old_name, self.op.debug_level)
5812 msg = result.fail_msg
5814 msg = ("Could not run OS rename script for instance %s on node %s"
5815 " (but the instance has been renamed in Ganeti): %s" %
5816 (inst.name, inst.primary_node, msg))
5817 self.proc.LogWarning(msg)
5819 _ShutdownInstanceDisks(self, inst)
5824 class LUInstanceRemove(LogicalUnit):
5825 """Remove an instance.
5828 HPATH = "instance-remove"
5829 HTYPE = constants.HTYPE_INSTANCE
5832 def ExpandNames(self):
5833 self._ExpandAndLockInstance()
5834 self.needed_locks[locking.LEVEL_NODE] = []
5835 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5837 def DeclareLocks(self, level):
5838 if level == locking.LEVEL_NODE:
5839 self._LockInstancesNodes()
5841 def BuildHooksEnv(self):
5844 This runs on master, primary and secondary nodes of the instance.
5847 env = _BuildInstanceHookEnvByObject(self, self.instance)
5848 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5851 def BuildHooksNodes(self):
5852 """Build hooks nodes.
5855 nl = [self.cfg.GetMasterNode()]
5856 nl_post = list(self.instance.all_nodes) + nl
5857 return (nl, nl_post)
5859 def CheckPrereq(self):
5860 """Check prerequisites.
5862 This checks that the instance is in the cluster.
5865 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5866 assert self.instance is not None, \
5867 "Cannot retrieve locked instance %s" % self.op.instance_name
5869 def Exec(self, feedback_fn):
5870 """Remove the instance.
5873 instance = self.instance
5874 logging.info("Shutting down instance %s on node %s",
5875 instance.name, instance.primary_node)
5877 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5878 self.op.shutdown_timeout)
5879 msg = result.fail_msg
5881 if self.op.ignore_failures:
5882 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5884 raise errors.OpExecError("Could not shutdown instance %s on"
5886 (instance.name, instance.primary_node, msg))
5888 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5891 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5892 """Utility function to remove an instance.
5895 logging.info("Removing block devices for instance %s", instance.name)
5897 if not _RemoveDisks(lu, instance):
5898 if not ignore_failures:
5899 raise errors.OpExecError("Can't remove instance's disks")
5900 feedback_fn("Warning: can't remove instance's disks")
5902 logging.info("Removing instance %s out of cluster config", instance.name)
5904 lu.cfg.RemoveInstance(instance.name)
5906 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5907 "Instance lock removal conflict"
5909 # Remove lock for the instance
5910 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5913 class LUInstanceQuery(NoHooksLU):
5914 """Logical unit for querying instances.
5917 # pylint: disable-msg=W0142
5920 def CheckArguments(self):
5921 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5922 self.op.output_fields, self.op.use_locking)
5924 def ExpandNames(self):
5925 self.iq.ExpandNames(self)
5927 def DeclareLocks(self, level):
5928 self.iq.DeclareLocks(self, level)
5930 def Exec(self, feedback_fn):
5931 return self.iq.OldStyleQuery(self)
5934 class LUInstanceFailover(LogicalUnit):
5935 """Failover an instance.
5938 HPATH = "instance-failover"
5939 HTYPE = constants.HTYPE_INSTANCE
5942 def CheckArguments(self):
5943 """Check the arguments.
5946 self.iallocator = getattr(self.op, "iallocator", None)
5947 self.target_node = getattr(self.op, "target_node", None)
5949 def ExpandNames(self):
5950 self._ExpandAndLockInstance()
5952 if self.op.target_node is not None:
5953 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5955 self.needed_locks[locking.LEVEL_NODE] = []
5956 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5958 ignore_consistency = self.op.ignore_consistency
5959 shutdown_timeout = self.op.shutdown_timeout
5960 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5962 iallocator=self.op.iallocator,
5963 target_node=self.op.target_node,
5965 ignore_consistency=ignore_consistency,
5966 shutdown_timeout=shutdown_timeout)
5967 self.tasklets = [self._migrater]
5969 def DeclareLocks(self, level):
5970 if level == locking.LEVEL_NODE:
5971 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5972 if instance.disk_template in constants.DTS_EXT_MIRROR:
5973 if self.op.target_node is None:
5974 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5976 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5977 self.op.target_node]
5978 del self.recalculate_locks[locking.LEVEL_NODE]
5980 self._LockInstancesNodes()
5982 def BuildHooksEnv(self):
5985 This runs on master, primary and secondary nodes of the instance.
5988 instance = self._migrater.instance
5989 source_node = instance.primary_node
5990 target_node = self._migrater.target_node
5992 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5993 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5994 "OLD_PRIMARY": source_node,
5995 "NEW_PRIMARY": target_node,
5998 if instance.disk_template in constants.DTS_INT_MIRROR:
5999 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6000 env["NEW_SECONDARY"] = source_node
6002 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6004 env.update(_BuildInstanceHookEnvByObject(self, instance))
6008 def BuildHooksNodes(self):
6009 """Build hooks nodes.
6012 instance = self._migrater.instance
6013 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6014 return (nl, nl + [instance.primary_node])
6017 class LUInstanceMigrate(LogicalUnit):
6018 """Migrate an instance.
6020 This is migration without shutting down, compared to the failover,
6021 which is done with shutdown.
6024 HPATH = "instance-migrate"
6025 HTYPE = constants.HTYPE_INSTANCE
6028 def ExpandNames(self):
6029 self._ExpandAndLockInstance()
6031 if self.op.target_node is not None:
6032 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6034 self.needed_locks[locking.LEVEL_NODE] = []
6035 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6037 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6038 cleanup=self.op.cleanup,
6039 iallocator=self.op.iallocator,
6040 target_node=self.op.target_node,
6042 fallback=self.op.allow_failover)
6043 self.tasklets = [self._migrater]
6045 def DeclareLocks(self, level):
6046 if level == locking.LEVEL_NODE:
6047 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6048 if instance.disk_template in constants.DTS_EXT_MIRROR:
6049 if self.op.target_node is None:
6050 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6052 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6053 self.op.target_node]
6054 del self.recalculate_locks[locking.LEVEL_NODE]
6056 self._LockInstancesNodes()
6058 def BuildHooksEnv(self):
6061 This runs on master, primary and secondary nodes of the instance.
6064 instance = self._migrater.instance
6065 source_node = instance.primary_node
6066 target_node = self._migrater.target_node
6067 env = _BuildInstanceHookEnvByObject(self, instance)
6069 "MIGRATE_LIVE": self._migrater.live,
6070 "MIGRATE_CLEANUP": self.op.cleanup,
6071 "OLD_PRIMARY": source_node,
6072 "NEW_PRIMARY": target_node,
6075 if instance.disk_template in constants.DTS_INT_MIRROR:
6076 env["OLD_SECONDARY"] = target_node
6077 env["NEW_SECONDARY"] = source_node
6079 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6083 def BuildHooksNodes(self):
6084 """Build hooks nodes.
6087 instance = self._migrater.instance
6088 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6089 return (nl, nl + [instance.primary_node])
6092 class LUInstanceMove(LogicalUnit):
6093 """Move an instance by data-copying.
6096 HPATH = "instance-move"
6097 HTYPE = constants.HTYPE_INSTANCE
6100 def ExpandNames(self):
6101 self._ExpandAndLockInstance()
6102 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6103 self.op.target_node = target_node
6104 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6105 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6107 def DeclareLocks(self, level):
6108 if level == locking.LEVEL_NODE:
6109 self._LockInstancesNodes(primary_only=True)
6111 def BuildHooksEnv(self):
6114 This runs on master, primary and secondary nodes of the instance.
6118 "TARGET_NODE": self.op.target_node,
6119 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6121 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6124 def BuildHooksNodes(self):
6125 """Build hooks nodes.
6129 self.cfg.GetMasterNode(),
6130 self.instance.primary_node,
6131 self.op.target_node,
6135 def CheckPrereq(self):
6136 """Check prerequisites.
6138 This checks that the instance is in the cluster.
6141 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6142 assert self.instance is not None, \
6143 "Cannot retrieve locked instance %s" % self.op.instance_name
6145 node = self.cfg.GetNodeInfo(self.op.target_node)
6146 assert node is not None, \
6147 "Cannot retrieve locked node %s" % self.op.target_node
6149 self.target_node = target_node = node.name
6151 if target_node == instance.primary_node:
6152 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6153 (instance.name, target_node),
6156 bep = self.cfg.GetClusterInfo().FillBE(instance)
6158 for idx, dsk in enumerate(instance.disks):
6159 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6160 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6161 " cannot copy" % idx, errors.ECODE_STATE)
6163 _CheckNodeOnline(self, target_node)
6164 _CheckNodeNotDrained(self, target_node)
6165 _CheckNodeVmCapable(self, target_node)
6167 if instance.admin_up:
6168 # check memory requirements on the secondary node
6169 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6170 instance.name, bep[constants.BE_MEMORY],
6171 instance.hypervisor)
6173 self.LogInfo("Not checking memory on the secondary node as"
6174 " instance will not be started")
6176 # check bridge existance
6177 _CheckInstanceBridgesExist(self, instance, node=target_node)
6179 def Exec(self, feedback_fn):
6180 """Move an instance.
6182 The move is done by shutting it down on its present node, copying
6183 the data over (slow) and starting it on the new node.
6186 instance = self.instance
6188 source_node = instance.primary_node
6189 target_node = self.target_node
6191 self.LogInfo("Shutting down instance %s on source node %s",
6192 instance.name, source_node)
6194 result = self.rpc.call_instance_shutdown(source_node, instance,
6195 self.op.shutdown_timeout)
6196 msg = result.fail_msg
6198 if self.op.ignore_consistency:
6199 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6200 " Proceeding anyway. Please make sure node"
6201 " %s is down. Error details: %s",
6202 instance.name, source_node, source_node, msg)
6204 raise errors.OpExecError("Could not shutdown instance %s on"
6206 (instance.name, source_node, msg))
6208 # create the target disks
6210 _CreateDisks(self, instance, target_node=target_node)
6211 except errors.OpExecError:
6212 self.LogWarning("Device creation failed, reverting...")
6214 _RemoveDisks(self, instance, target_node=target_node)
6216 self.cfg.ReleaseDRBDMinors(instance.name)
6219 cluster_name = self.cfg.GetClusterInfo().cluster_name
6222 # activate, get path, copy the data over
6223 for idx, disk in enumerate(instance.disks):
6224 self.LogInfo("Copying data for disk %d", idx)
6225 result = self.rpc.call_blockdev_assemble(target_node, disk,
6226 instance.name, True, idx)
6228 self.LogWarning("Can't assemble newly created disk %d: %s",
6229 idx, result.fail_msg)
6230 errs.append(result.fail_msg)
6232 dev_path = result.payload
6233 result = self.rpc.call_blockdev_export(source_node, disk,
6234 target_node, dev_path,
6237 self.LogWarning("Can't copy data over for disk %d: %s",
6238 idx, result.fail_msg)
6239 errs.append(result.fail_msg)
6243 self.LogWarning("Some disks failed to copy, aborting")
6245 _RemoveDisks(self, instance, target_node=target_node)
6247 self.cfg.ReleaseDRBDMinors(instance.name)
6248 raise errors.OpExecError("Errors during disk copy: %s" %
6251 instance.primary_node = target_node
6252 self.cfg.Update(instance, feedback_fn)
6254 self.LogInfo("Removing the disks on the original node")
6255 _RemoveDisks(self, instance, target_node=source_node)
6257 # Only start the instance if it's marked as up
6258 if instance.admin_up:
6259 self.LogInfo("Starting instance %s on node %s",
6260 instance.name, target_node)
6262 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6263 ignore_secondaries=True)
6265 _ShutdownInstanceDisks(self, instance)
6266 raise errors.OpExecError("Can't activate the instance's disks")
6268 result = self.rpc.call_instance_start(target_node, instance, None, None)
6269 msg = result.fail_msg
6271 _ShutdownInstanceDisks(self, instance)
6272 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6273 (instance.name, target_node, msg))
6276 class LUNodeMigrate(LogicalUnit):
6277 """Migrate all instances from a node.
6280 HPATH = "node-migrate"
6281 HTYPE = constants.HTYPE_NODE
6284 def CheckArguments(self):
6285 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6287 def ExpandNames(self):
6288 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6290 self.needed_locks = {}
6292 # Create tasklets for migrating instances for all instances on this node
6296 self.lock_all_nodes = False
6298 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6299 logging.debug("Migrating instance %s", inst.name)
6300 names.append(inst.name)
6302 tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6303 iallocator=self.op.iallocator,
6306 if inst.disk_template in constants.DTS_EXT_MIRROR:
6307 # We need to lock all nodes, as the iallocator will choose the
6308 # destination nodes afterwards
6309 self.lock_all_nodes = True
6311 self.tasklets = tasklets
6313 # Declare node locks
6314 if self.lock_all_nodes:
6315 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6317 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6318 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6320 # Declare instance locks
6321 self.needed_locks[locking.LEVEL_INSTANCE] = names
6323 def DeclareLocks(self, level):
6324 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6325 self._LockInstancesNodes()
6327 def BuildHooksEnv(self):
6330 This runs on the master, the primary and all the secondaries.
6334 "NODE_NAME": self.op.node_name,
6337 def BuildHooksNodes(self):
6338 """Build hooks nodes.
6341 nl = [self.cfg.GetMasterNode()]
6345 class TLMigrateInstance(Tasklet):
6346 """Tasklet class for instance migration.
6349 @ivar live: whether the migration will be done live or non-live;
6350 this variable is initalized only after CheckPrereq has run
6351 @type cleanup: boolean
6352 @ivar cleanup: Wheater we cleanup from a failed migration
6353 @type iallocator: string
6354 @ivar iallocator: The iallocator used to determine target_node
6355 @type target_node: string
6356 @ivar target_node: If given, the target_node to reallocate the instance to
6357 @type failover: boolean
6358 @ivar failover: Whether operation results in failover or migration
6359 @type fallback: boolean
6360 @ivar fallback: Whether fallback to failover is allowed if migration not
6362 @type ignore_consistency: boolean
6363 @ivar ignore_consistency: Wheter we should ignore consistency between source
6365 @type shutdown_timeout: int
6366 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6369 def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6370 target_node=None, failover=False, fallback=False,
6371 ignore_consistency=False,
6372 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6373 """Initializes this class.
6376 Tasklet.__init__(self, lu)
6379 self.instance_name = instance_name
6380 self.cleanup = cleanup
6381 self.live = False # will be overridden later
6382 self.iallocator = iallocator
6383 self.target_node = target_node
6384 self.failover = failover
6385 self.fallback = fallback
6386 self.ignore_consistency = ignore_consistency
6387 self.shutdown_timeout = shutdown_timeout
6389 def CheckPrereq(self):
6390 """Check prerequisites.
6392 This checks that the instance is in the cluster.
6395 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6396 instance = self.cfg.GetInstanceInfo(instance_name)
6397 assert instance is not None
6398 self.instance = instance
6400 if (not self.cleanup and not instance.admin_up and not self.failover and
6402 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6404 self.failover = True
6406 if instance.disk_template not in constants.DTS_MIRRORED:
6411 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6412 " %s" % (instance.disk_template, text),
6415 if instance.disk_template in constants.DTS_EXT_MIRROR:
6416 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6419 self._RunAllocator()
6421 # self.target_node is already populated, either directly or by the
6423 target_node = self.target_node
6425 if len(self.lu.tasklets) == 1:
6426 # It is safe to remove locks only when we're the only tasklet in the LU
6427 nodes_keep = [instance.primary_node, self.target_node]
6428 nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6429 if node not in nodes_keep]
6430 self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6431 self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6434 secondary_nodes = instance.secondary_nodes
6435 if not secondary_nodes:
6436 raise errors.ConfigurationError("No secondary node but using"
6437 " %s disk template" %
6438 instance.disk_template)
6439 target_node = secondary_nodes[0]
6440 if self.iallocator or (self.target_node and
6441 self.target_node != target_node):
6443 text = "failed over"
6446 raise errors.OpPrereqError("Instances with disk template %s cannot"
6447 " be %s to arbitrary nodes"
6448 " (neither an iallocator nor a target"
6449 " node can be passed)" %
6450 (instance.disk_template, text),
6453 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6455 # check memory requirements on the secondary node
6456 if not self.failover or instance.admin_up:
6457 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6458 instance.name, i_be[constants.BE_MEMORY],
6459 instance.hypervisor)
6461 self.lu.LogInfo("Not checking memory on the secondary node as"
6462 " instance will not be started")
6464 # check bridge existance
6465 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6467 if not self.cleanup:
6468 _CheckNodeNotDrained(self.lu, target_node)
6469 if not self.failover:
6470 result = self.rpc.call_instance_migratable(instance.primary_node,
6472 if result.fail_msg and self.fallback:
6473 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6475 self.failover = True
6477 result.Raise("Can't migrate, please use failover",
6478 prereq=True, ecode=errors.ECODE_STATE)
6480 assert not (self.failover and self.cleanup)
6482 def _RunAllocator(self):
6483 """Run the allocator based on input opcode.
6486 ial = IAllocator(self.cfg, self.rpc,
6487 mode=constants.IALLOCATOR_MODE_RELOC,
6488 name=self.instance_name,
6489 # TODO See why hail breaks with a single node below
6490 relocate_from=[self.instance.primary_node,
6491 self.instance.primary_node],
6494 ial.Run(self.iallocator)
6497 raise errors.OpPrereqError("Can't compute nodes using"
6498 " iallocator '%s': %s" %
6499 (self.iallocator, ial.info),
6501 if len(ial.result) != ial.required_nodes:
6502 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6503 " of nodes (%s), required %s" %
6504 (self.iallocator, len(ial.result),
6505 ial.required_nodes), errors.ECODE_FAULT)
6506 self.target_node = ial.result[0]
6507 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6508 self.instance_name, self.iallocator,
6509 utils.CommaJoin(ial.result))
6511 if not self.failover:
6512 if self.lu.op.live is not None and self.lu.op.mode is not None:
6513 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6514 " parameters are accepted",
6516 if self.lu.op.live is not None:
6518 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6520 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6521 # reset the 'live' parameter to None so that repeated
6522 # invocations of CheckPrereq do not raise an exception
6523 self.lu.op.live = None
6524 elif self.lu.op.mode is None:
6525 # read the default value from the hypervisor
6526 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6528 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6530 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6532 # Failover is never live
6535 def _WaitUntilSync(self):
6536 """Poll with custom rpc for disk sync.
6538 This uses our own step-based rpc call.
6541 self.feedback_fn("* wait until resync is done")
6545 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6547 self.instance.disks)
6549 for node, nres in result.items():
6550 nres.Raise("Cannot resync disks on node %s" % node)
6551 node_done, node_percent = nres.payload
6552 all_done = all_done and node_done
6553 if node_percent is not None:
6554 min_percent = min(min_percent, node_percent)
6556 if min_percent < 100:
6557 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6560 def _EnsureSecondary(self, node):
6561 """Demote a node to secondary.
6564 self.feedback_fn("* switching node %s to secondary mode" % node)
6566 for dev in self.instance.disks:
6567 self.cfg.SetDiskID(dev, node)
6569 result = self.rpc.call_blockdev_close(node, self.instance.name,
6570 self.instance.disks)
6571 result.Raise("Cannot change disk to secondary on node %s" % node)
6573 def _GoStandalone(self):
6574 """Disconnect from the network.
6577 self.feedback_fn("* changing into standalone mode")
6578 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6579 self.instance.disks)
6580 for node, nres in result.items():
6581 nres.Raise("Cannot disconnect disks node %s" % node)
6583 def _GoReconnect(self, multimaster):
6584 """Reconnect to the network.
6590 msg = "single-master"
6591 self.feedback_fn("* changing disks into %s mode" % msg)
6592 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6593 self.instance.disks,
6594 self.instance.name, multimaster)
6595 for node, nres in result.items():
6596 nres.Raise("Cannot change disks config on node %s" % node)
6598 def _ExecCleanup(self):
6599 """Try to cleanup after a failed migration.
6601 The cleanup is done by:
6602 - check that the instance is running only on one node
6603 (and update the config if needed)
6604 - change disks on its secondary node to secondary
6605 - wait until disks are fully synchronized
6606 - disconnect from the network
6607 - change disks into single-master mode
6608 - wait again until disks are fully synchronized
6611 instance = self.instance
6612 target_node = self.target_node
6613 source_node = self.source_node
6615 # check running on only one node
6616 self.feedback_fn("* checking where the instance actually runs"
6617 " (if this hangs, the hypervisor might be in"
6619 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6620 for node, result in ins_l.items():
6621 result.Raise("Can't contact node %s" % node)
6623 runningon_source = instance.name in ins_l[source_node].payload
6624 runningon_target = instance.name in ins_l[target_node].payload
6626 if runningon_source and runningon_target:
6627 raise errors.OpExecError("Instance seems to be running on two nodes,"
6628 " or the hypervisor is confused. You will have"
6629 " to ensure manually that it runs only on one"
6630 " and restart this operation.")
6632 if not (runningon_source or runningon_target):
6633 raise errors.OpExecError("Instance does not seem to be running at all."
6634 " In this case, it's safer to repair by"
6635 " running 'gnt-instance stop' to ensure disk"
6636 " shutdown, and then restarting it.")
6638 if runningon_target:
6639 # the migration has actually succeeded, we need to update the config
6640 self.feedback_fn("* instance running on secondary node (%s),"
6641 " updating config" % target_node)
6642 instance.primary_node = target_node
6643 self.cfg.Update(instance, self.feedback_fn)
6644 demoted_node = source_node
6646 self.feedback_fn("* instance confirmed to be running on its"
6647 " primary node (%s)" % source_node)
6648 demoted_node = target_node
6650 if instance.disk_template in constants.DTS_INT_MIRROR:
6651 self._EnsureSecondary(demoted_node)
6653 self._WaitUntilSync()
6654 except errors.OpExecError:
6655 # we ignore here errors, since if the device is standalone, it
6656 # won't be able to sync
6658 self._GoStandalone()
6659 self._GoReconnect(False)
6660 self._WaitUntilSync()
6662 self.feedback_fn("* done")
6664 def _RevertDiskStatus(self):
6665 """Try to revert the disk status after a failed migration.
6668 target_node = self.target_node
6669 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6673 self._EnsureSecondary(target_node)
6674 self._GoStandalone()
6675 self._GoReconnect(False)
6676 self._WaitUntilSync()
6677 except errors.OpExecError, err:
6678 self.lu.LogWarning("Migration failed and I can't reconnect the"
6679 " drives: error '%s'\n"
6680 "Please look and recover the instance status" %
6683 def _AbortMigration(self):
6684 """Call the hypervisor code to abort a started migration.
6687 instance = self.instance
6688 target_node = self.target_node
6689 migration_info = self.migration_info
6691 abort_result = self.rpc.call_finalize_migration(target_node,
6695 abort_msg = abort_result.fail_msg
6697 logging.error("Aborting migration failed on target node %s: %s",
6698 target_node, abort_msg)
6699 # Don't raise an exception here, as we stil have to try to revert the
6700 # disk status, even if this step failed.
6702 def _ExecMigration(self):
6703 """Migrate an instance.
6705 The migrate is done by:
6706 - change the disks into dual-master mode
6707 - wait until disks are fully synchronized again
6708 - migrate the instance
6709 - change disks on the new secondary node (the old primary) to secondary
6710 - wait until disks are fully synchronized
6711 - change disks into single-master mode
6714 instance = self.instance
6715 target_node = self.target_node
6716 source_node = self.source_node
6718 self.feedback_fn("* checking disk consistency between source and target")
6719 for dev in instance.disks:
6720 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6721 raise errors.OpExecError("Disk %s is degraded or not fully"
6722 " synchronized on target node,"
6723 " aborting migrate." % dev.iv_name)
6725 # First get the migration information from the remote node
6726 result = self.rpc.call_migration_info(source_node, instance)
6727 msg = result.fail_msg
6729 log_err = ("Failed fetching source migration information from %s: %s" %
6731 logging.error(log_err)
6732 raise errors.OpExecError(log_err)
6734 self.migration_info = migration_info = result.payload
6736 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6737 # Then switch the disks to master/master mode
6738 self._EnsureSecondary(target_node)
6739 self._GoStandalone()
6740 self._GoReconnect(True)
6741 self._WaitUntilSync()
6743 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6744 result = self.rpc.call_accept_instance(target_node,
6747 self.nodes_ip[target_node])
6749 msg = result.fail_msg
6751 logging.error("Instance pre-migration failed, trying to revert"
6752 " disk status: %s", msg)
6753 self.feedback_fn("Pre-migration failed, aborting")
6754 self._AbortMigration()
6755 self._RevertDiskStatus()
6756 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6757 (instance.name, msg))
6759 self.feedback_fn("* migrating instance to %s" % target_node)
6760 result = self.rpc.call_instance_migrate(source_node, instance,
6761 self.nodes_ip[target_node],
6763 msg = result.fail_msg
6765 logging.error("Instance migration failed, trying to revert"
6766 " disk status: %s", msg)
6767 self.feedback_fn("Migration failed, aborting")
6768 self._AbortMigration()
6769 self._RevertDiskStatus()
6770 raise errors.OpExecError("Could not migrate instance %s: %s" %
6771 (instance.name, msg))
6773 instance.primary_node = target_node
6774 # distribute new instance config to the other nodes
6775 self.cfg.Update(instance, self.feedback_fn)
6777 result = self.rpc.call_finalize_migration(target_node,
6781 msg = result.fail_msg
6783 logging.error("Instance migration succeeded, but finalization failed:"
6785 raise errors.OpExecError("Could not finalize instance migration: %s" %
6788 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6789 self._EnsureSecondary(source_node)
6790 self._WaitUntilSync()
6791 self._GoStandalone()
6792 self._GoReconnect(False)
6793 self._WaitUntilSync()
6795 self.feedback_fn("* done")
6797 def _ExecFailover(self):
6798 """Failover an instance.
6800 The failover is done by shutting it down on its present node and
6801 starting it on the secondary.
6804 instance = self.instance
6805 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6807 source_node = instance.primary_node
6808 target_node = self.target_node
6810 if instance.admin_up:
6811 self.feedback_fn("* checking disk consistency between source and target")
6812 for dev in instance.disks:
6813 # for drbd, these are drbd over lvm
6814 if not _CheckDiskConsistency(self, dev, target_node, False):
6815 if not self.ignore_consistency:
6816 raise errors.OpExecError("Disk %s is degraded on target node,"
6817 " aborting failover." % dev.iv_name)
6819 self.feedback_fn("* not checking disk consistency as instance is not"
6822 self.feedback_fn("* shutting down instance on source node")
6823 logging.info("Shutting down instance %s on node %s",
6824 instance.name, source_node)
6826 result = self.rpc.call_instance_shutdown(source_node, instance,
6827 self.shutdown_timeout)
6828 msg = result.fail_msg
6830 if self.ignore_consistency or primary_node.offline:
6831 self.lu.LogWarning("Could not shutdown instance %s on node %s."
6832 " Proceeding anyway. Please make sure node"
6833 " %s is down. Error details: %s",
6834 instance.name, source_node, source_node, msg)
6836 raise errors.OpExecError("Could not shutdown instance %s on"
6838 (instance.name, source_node, msg))
6840 self.feedback_fn("* deactivating the instance's disks on source node")
6841 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6842 raise errors.OpExecError("Can't shut down the instance's disks.")
6844 instance.primary_node = target_node
6845 # distribute new instance config to the other nodes
6846 self.cfg.Update(instance, self.feedback_fn)
6848 # Only start the instance if it's marked as up
6849 if instance.admin_up:
6850 self.feedback_fn("* activating the instance's disks on target node")
6851 logging.info("Starting instance %s on node %s",
6852 instance.name, target_node)
6854 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6855 ignore_secondaries=True)
6857 _ShutdownInstanceDisks(self, instance)
6858 raise errors.OpExecError("Can't activate the instance's disks")
6860 self.feedback_fn("* starting the instance on the target node")
6861 result = self.rpc.call_instance_start(target_node, instance, None, None)
6862 msg = result.fail_msg
6864 _ShutdownInstanceDisks(self, instance)
6865 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6866 (instance.name, target_node, msg))
6868 def Exec(self, feedback_fn):
6869 """Perform the migration.
6872 self.feedback_fn = feedback_fn
6873 self.source_node = self.instance.primary_node
6875 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6876 if self.instance.disk_template in constants.DTS_INT_MIRROR:
6877 self.target_node = self.instance.secondary_nodes[0]
6878 # Otherwise self.target_node has been populated either
6879 # directly, or through an iallocator.
6881 self.all_nodes = [self.source_node, self.target_node]
6883 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6884 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6888 feedback_fn("Failover instance %s" % self.instance.name)
6889 self._ExecFailover()
6891 feedback_fn("Migrating instance %s" % self.instance.name)
6894 return self._ExecCleanup()
6896 return self._ExecMigration()
6899 def _CreateBlockDev(lu, node, instance, device, force_create,
6901 """Create a tree of block devices on a given node.
6903 If this device type has to be created on secondaries, create it and
6906 If not, just recurse to children keeping the same 'force' value.
6908 @param lu: the lu on whose behalf we execute
6909 @param node: the node on which to create the device
6910 @type instance: L{objects.Instance}
6911 @param instance: the instance which owns the device
6912 @type device: L{objects.Disk}
6913 @param device: the device to create
6914 @type force_create: boolean
6915 @param force_create: whether to force creation of this device; this
6916 will be change to True whenever we find a device which has
6917 CreateOnSecondary() attribute
6918 @param info: the extra 'metadata' we should attach to the device
6919 (this will be represented as a LVM tag)
6920 @type force_open: boolean
6921 @param force_open: this parameter will be passes to the
6922 L{backend.BlockdevCreate} function where it specifies
6923 whether we run on primary or not, and it affects both
6924 the child assembly and the device own Open() execution
6927 if device.CreateOnSecondary():
6931 for child in device.children:
6932 _CreateBlockDev(lu, node, instance, child, force_create,
6935 if not force_create:
6938 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6941 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6942 """Create a single block device on a given node.
6944 This will not recurse over children of the device, so they must be
6947 @param lu: the lu on whose behalf we execute
6948 @param node: the node on which to create the device
6949 @type instance: L{objects.Instance}
6950 @param instance: the instance which owns the device
6951 @type device: L{objects.Disk}
6952 @param device: the device to create
6953 @param info: the extra 'metadata' we should attach to the device
6954 (this will be represented as a LVM tag)
6955 @type force_open: boolean
6956 @param force_open: this parameter will be passes to the
6957 L{backend.BlockdevCreate} function where it specifies
6958 whether we run on primary or not, and it affects both
6959 the child assembly and the device own Open() execution
6962 lu.cfg.SetDiskID(device, node)
6963 result = lu.rpc.call_blockdev_create(node, device, device.size,
6964 instance.name, force_open, info)
6965 result.Raise("Can't create block device %s on"
6966 " node %s for instance %s" % (device, node, instance.name))
6967 if device.physical_id is None:
6968 device.physical_id = result.payload
6971 def _GenerateUniqueNames(lu, exts):
6972 """Generate a suitable LV name.
6974 This will generate a logical volume name for the given instance.
6979 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6980 results.append("%s%s" % (new_id, val))
6984 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6986 """Generate a drbd8 device complete with its children.
6989 port = lu.cfg.AllocatePort()
6990 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6991 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6992 logical_id=(vgname, names[0]))
6993 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6994 logical_id=(vgname, names[1]))
6995 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6996 logical_id=(primary, secondary, port,
6999 children=[dev_data, dev_meta],
7004 def _GenerateDiskTemplate(lu, template_name,
7005 instance_name, primary_node,
7006 secondary_nodes, disk_info,
7007 file_storage_dir, file_driver,
7008 base_index, feedback_fn):
7009 """Generate the entire disk layout for a given template type.
7012 #TODO: compute space requirements
7014 vgname = lu.cfg.GetVGName()
7015 disk_count = len(disk_info)
7017 if template_name == constants.DT_DISKLESS:
7019 elif template_name == constants.DT_PLAIN:
7020 if len(secondary_nodes) != 0:
7021 raise errors.ProgrammerError("Wrong template configuration")
7023 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7024 for i in range(disk_count)])
7025 for idx, disk in enumerate(disk_info):
7026 disk_index = idx + base_index
7027 vg = disk.get(constants.IDISK_VG, vgname)
7028 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7029 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7030 size=disk[constants.IDISK_SIZE],
7031 logical_id=(vg, names[idx]),
7032 iv_name="disk/%d" % disk_index,
7033 mode=disk[constants.IDISK_MODE])
7034 disks.append(disk_dev)
7035 elif template_name == constants.DT_DRBD8:
7036 if len(secondary_nodes) != 1:
7037 raise errors.ProgrammerError("Wrong template configuration")
7038 remote_node = secondary_nodes[0]
7039 minors = lu.cfg.AllocateDRBDMinor(
7040 [primary_node, remote_node] * len(disk_info), instance_name)
7043 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7044 for i in range(disk_count)]):
7045 names.append(lv_prefix + "_data")
7046 names.append(lv_prefix + "_meta")
7047 for idx, disk in enumerate(disk_info):
7048 disk_index = idx + base_index
7049 vg = disk.get(constants.IDISK_VG, vgname)
7050 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7051 disk[constants.IDISK_SIZE], vg,
7052 names[idx * 2:idx * 2 + 2],
7053 "disk/%d" % disk_index,
7054 minors[idx * 2], minors[idx * 2 + 1])
7055 disk_dev.mode = disk[constants.IDISK_MODE]
7056 disks.append(disk_dev)
7057 elif template_name == constants.DT_FILE:
7058 if len(secondary_nodes) != 0:
7059 raise errors.ProgrammerError("Wrong template configuration")
7061 opcodes.RequireFileStorage()
7063 for idx, disk in enumerate(disk_info):
7064 disk_index = idx + base_index
7065 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7066 size=disk[constants.IDISK_SIZE],
7067 iv_name="disk/%d" % disk_index,
7068 logical_id=(file_driver,
7069 "%s/disk%d" % (file_storage_dir,
7071 mode=disk[constants.IDISK_MODE])
7072 disks.append(disk_dev)
7073 elif template_name == constants.DT_SHARED_FILE:
7074 if len(secondary_nodes) != 0:
7075 raise errors.ProgrammerError("Wrong template configuration")
7077 opcodes.RequireSharedFileStorage()
7079 for idx, disk in enumerate(disk_info):
7080 disk_index = idx + base_index
7081 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7082 size=disk[constants.IDISK_SIZE],
7083 iv_name="disk/%d" % disk_index,
7084 logical_id=(file_driver,
7085 "%s/disk%d" % (file_storage_dir,
7087 mode=disk[constants.IDISK_MODE])
7088 disks.append(disk_dev)
7089 elif template_name == constants.DT_BLOCK:
7090 if len(secondary_nodes) != 0:
7091 raise errors.ProgrammerError("Wrong template configuration")
7093 for idx, disk in enumerate(disk_info):
7094 disk_index = idx + base_index
7095 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7096 size=disk[constants.IDISK_SIZE],
7097 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7098 disk[constants.IDISK_ADOPT]),
7099 iv_name="disk/%d" % disk_index,
7100 mode=disk[constants.IDISK_MODE])
7101 disks.append(disk_dev)
7104 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7108 def _GetInstanceInfoText(instance):
7109 """Compute that text that should be added to the disk's metadata.
7112 return "originstname+%s" % instance.name
7115 def _CalcEta(time_taken, written, total_size):
7116 """Calculates the ETA based on size written and total size.
7118 @param time_taken: The time taken so far
7119 @param written: amount written so far
7120 @param total_size: The total size of data to be written
7121 @return: The remaining time in seconds
7124 avg_time = time_taken / float(written)
7125 return (total_size - written) * avg_time
7128 def _WipeDisks(lu, instance):
7129 """Wipes instance disks.
7131 @type lu: L{LogicalUnit}
7132 @param lu: the logical unit on whose behalf we execute
7133 @type instance: L{objects.Instance}
7134 @param instance: the instance whose disks we should create
7135 @return: the success of the wipe
7138 node = instance.primary_node
7140 for device in instance.disks:
7141 lu.cfg.SetDiskID(device, node)
7143 logging.info("Pause sync of instance %s disks", instance.name)
7144 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7146 for idx, success in enumerate(result.payload):
7148 logging.warn("pause-sync of instance %s for disks %d failed",
7152 for idx, device in enumerate(instance.disks):
7153 lu.LogInfo("* Wiping disk %d", idx)
7154 logging.info("Wiping disk %d for instance %s, node %s",
7155 idx, instance.name, node)
7157 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7158 # MAX_WIPE_CHUNK at max
7159 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7160 constants.MIN_WIPE_CHUNK_PERCENT)
7165 start_time = time.time()
7167 while offset < size:
7168 wipe_size = min(wipe_chunk_size, size - offset)
7169 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7170 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7171 (idx, offset, wipe_size))
7174 if now - last_output >= 60:
7175 eta = _CalcEta(now - start_time, offset, size)
7176 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7177 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7180 logging.info("Resume sync of instance %s disks", instance.name)
7182 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7184 for idx, success in enumerate(result.payload):
7186 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7187 " look at the status and troubleshoot the issue.", idx)
7188 logging.warn("resume-sync of instance %s for disks %d failed",
7192 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7193 """Create all disks for an instance.
7195 This abstracts away some work from AddInstance.
7197 @type lu: L{LogicalUnit}
7198 @param lu: the logical unit on whose behalf we execute
7199 @type instance: L{objects.Instance}
7200 @param instance: the instance whose disks we should create
7202 @param to_skip: list of indices to skip
7203 @type target_node: string
7204 @param target_node: if passed, overrides the target node for creation
7206 @return: the success of the creation
7209 info = _GetInstanceInfoText(instance)
7210 if target_node is None:
7211 pnode = instance.primary_node
7212 all_nodes = instance.all_nodes
7217 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7218 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7219 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7221 result.Raise("Failed to create directory '%s' on"
7222 " node %s" % (file_storage_dir, pnode))
7224 # Note: this needs to be kept in sync with adding of disks in
7225 # LUInstanceSetParams
7226 for idx, device in enumerate(instance.disks):
7227 if to_skip and idx in to_skip:
7229 logging.info("Creating volume %s for instance %s",
7230 device.iv_name, instance.name)
7232 for node in all_nodes:
7233 f_create = node == pnode
7234 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7237 def _RemoveDisks(lu, instance, target_node=None):
7238 """Remove all disks for an instance.
7240 This abstracts away some work from `AddInstance()` and
7241 `RemoveInstance()`. Note that in case some of the devices couldn't
7242 be removed, the removal will continue with the other ones (compare
7243 with `_CreateDisks()`).
7245 @type lu: L{LogicalUnit}
7246 @param lu: the logical unit on whose behalf we execute
7247 @type instance: L{objects.Instance}
7248 @param instance: the instance whose disks we should remove
7249 @type target_node: string
7250 @param target_node: used to override the node on which to remove the disks
7252 @return: the success of the removal
7255 logging.info("Removing block devices for instance %s", instance.name)
7258 for device in instance.disks:
7260 edata = [(target_node, device)]
7262 edata = device.ComputeNodeTree(instance.primary_node)
7263 for node, disk in edata:
7264 lu.cfg.SetDiskID(disk, node)
7265 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7267 lu.LogWarning("Could not remove block device %s on node %s,"
7268 " continuing anyway: %s", device.iv_name, node, msg)
7271 if instance.disk_template == constants.DT_FILE:
7272 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7276 tgt = instance.primary_node
7277 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7279 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7280 file_storage_dir, instance.primary_node, result.fail_msg)
7286 def _ComputeDiskSizePerVG(disk_template, disks):
7287 """Compute disk size requirements in the volume group
7290 def _compute(disks, payload):
7291 """Universal algorithm.
7296 vgs[disk[constants.IDISK_VG]] = \
7297 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7301 # Required free disk space as a function of disk and swap space
7303 constants.DT_DISKLESS: {},
7304 constants.DT_PLAIN: _compute(disks, 0),
7305 # 128 MB are added for drbd metadata for each disk
7306 constants.DT_DRBD8: _compute(disks, 128),
7307 constants.DT_FILE: {},
7308 constants.DT_SHARED_FILE: {},
7311 if disk_template not in req_size_dict:
7312 raise errors.ProgrammerError("Disk template '%s' size requirement"
7313 " is unknown" % disk_template)
7315 return req_size_dict[disk_template]
7318 def _ComputeDiskSize(disk_template, disks):
7319 """Compute disk size requirements in the volume group
7322 # Required free disk space as a function of disk and swap space
7324 constants.DT_DISKLESS: None,
7325 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7326 # 128 MB are added for drbd metadata for each disk
7327 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7328 constants.DT_FILE: None,
7329 constants.DT_SHARED_FILE: 0,
7330 constants.DT_BLOCK: 0,
7333 if disk_template not in req_size_dict:
7334 raise errors.ProgrammerError("Disk template '%s' size requirement"
7335 " is unknown" % disk_template)
7337 return req_size_dict[disk_template]
7340 def _FilterVmNodes(lu, nodenames):
7341 """Filters out non-vm_capable nodes from a list.
7343 @type lu: L{LogicalUnit}
7344 @param lu: the logical unit for which we check
7345 @type nodenames: list
7346 @param nodenames: the list of nodes on which we should check
7348 @return: the list of vm-capable nodes
7351 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7352 return [name for name in nodenames if name not in vm_nodes]
7355 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7356 """Hypervisor parameter validation.
7358 This function abstract the hypervisor parameter validation to be
7359 used in both instance create and instance modify.
7361 @type lu: L{LogicalUnit}
7362 @param lu: the logical unit for which we check
7363 @type nodenames: list
7364 @param nodenames: the list of nodes on which we should check
7365 @type hvname: string
7366 @param hvname: the name of the hypervisor we should use
7367 @type hvparams: dict
7368 @param hvparams: the parameters which we need to check
7369 @raise errors.OpPrereqError: if the parameters are not valid
7372 nodenames = _FilterVmNodes(lu, nodenames)
7373 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7376 for node in nodenames:
7380 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7383 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7384 """OS parameters validation.
7386 @type lu: L{LogicalUnit}
7387 @param lu: the logical unit for which we check
7388 @type required: boolean
7389 @param required: whether the validation should fail if the OS is not
7391 @type nodenames: list
7392 @param nodenames: the list of nodes on which we should check
7393 @type osname: string
7394 @param osname: the name of the hypervisor we should use
7395 @type osparams: dict
7396 @param osparams: the parameters which we need to check
7397 @raise errors.OpPrereqError: if the parameters are not valid
7400 nodenames = _FilterVmNodes(lu, nodenames)
7401 result = lu.rpc.call_os_validate(required, nodenames, osname,
7402 [constants.OS_VALIDATE_PARAMETERS],
7404 for node, nres in result.items():
7405 # we don't check for offline cases since this should be run only
7406 # against the master node and/or an instance's nodes
7407 nres.Raise("OS Parameters validation failed on node %s" % node)
7408 if not nres.payload:
7409 lu.LogInfo("OS %s not found on node %s, validation skipped",
7413 class LUInstanceCreate(LogicalUnit):
7414 """Create an instance.
7417 HPATH = "instance-add"
7418 HTYPE = constants.HTYPE_INSTANCE
7421 def CheckArguments(self):
7425 # do not require name_check to ease forward/backward compatibility
7427 if self.op.no_install and self.op.start:
7428 self.LogInfo("No-installation mode selected, disabling startup")
7429 self.op.start = False
7430 # validate/normalize the instance name
7431 self.op.instance_name = \
7432 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7434 if self.op.ip_check and not self.op.name_check:
7435 # TODO: make the ip check more flexible and not depend on the name check
7436 raise errors.OpPrereqError("Cannot do ip check without a name check",
7439 # check nics' parameter names
7440 for nic in self.op.nics:
7441 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7443 # check disks. parameter names and consistent adopt/no-adopt strategy
7444 has_adopt = has_no_adopt = False
7445 for disk in self.op.disks:
7446 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7447 if constants.IDISK_ADOPT in disk:
7451 if has_adopt and has_no_adopt:
7452 raise errors.OpPrereqError("Either all disks are adopted or none is",
7455 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7456 raise errors.OpPrereqError("Disk adoption is not supported for the"
7457 " '%s' disk template" %
7458 self.op.disk_template,
7460 if self.op.iallocator is not None:
7461 raise errors.OpPrereqError("Disk adoption not allowed with an"
7462 " iallocator script", errors.ECODE_INVAL)
7463 if self.op.mode == constants.INSTANCE_IMPORT:
7464 raise errors.OpPrereqError("Disk adoption not allowed for"
7465 " instance import", errors.ECODE_INVAL)
7467 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7468 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7469 " but no 'adopt' parameter given" %
7470 self.op.disk_template,
7473 self.adopt_disks = has_adopt
7475 # instance name verification
7476 if self.op.name_check:
7477 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7478 self.op.instance_name = self.hostname1.name
7479 # used in CheckPrereq for ip ping check
7480 self.check_ip = self.hostname1.ip
7482 self.check_ip = None
7484 # file storage checks
7485 if (self.op.file_driver and
7486 not self.op.file_driver in constants.FILE_DRIVER):
7487 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7488 self.op.file_driver, errors.ECODE_INVAL)
7490 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7491 raise errors.OpPrereqError("File storage directory path not absolute",
7494 ### Node/iallocator related checks
7495 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7497 if self.op.pnode is not None:
7498 if self.op.disk_template in constants.DTS_INT_MIRROR:
7499 if self.op.snode is None:
7500 raise errors.OpPrereqError("The networked disk templates need"
7501 " a mirror node", errors.ECODE_INVAL)
7503 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7505 self.op.snode = None
7507 self._cds = _GetClusterDomainSecret()
7509 if self.op.mode == constants.INSTANCE_IMPORT:
7510 # On import force_variant must be True, because if we forced it at
7511 # initial install, our only chance when importing it back is that it
7513 self.op.force_variant = True
7515 if self.op.no_install:
7516 self.LogInfo("No-installation mode has no effect during import")
7518 elif self.op.mode == constants.INSTANCE_CREATE:
7519 if self.op.os_type is None:
7520 raise errors.OpPrereqError("No guest OS specified",
7522 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7523 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7524 " installation" % self.op.os_type,
7526 if self.op.disk_template is None:
7527 raise errors.OpPrereqError("No disk template specified",
7530 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7531 # Check handshake to ensure both clusters have the same domain secret
7532 src_handshake = self.op.source_handshake
7533 if not src_handshake:
7534 raise errors.OpPrereqError("Missing source handshake",
7537 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7540 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7543 # Load and check source CA
7544 self.source_x509_ca_pem = self.op.source_x509_ca
7545 if not self.source_x509_ca_pem:
7546 raise errors.OpPrereqError("Missing source X509 CA",
7550 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7552 except OpenSSL.crypto.Error, err:
7553 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7554 (err, ), errors.ECODE_INVAL)
7556 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7557 if errcode is not None:
7558 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7561 self.source_x509_ca = cert
7563 src_instance_name = self.op.source_instance_name
7564 if not src_instance_name:
7565 raise errors.OpPrereqError("Missing source instance name",
7568 self.source_instance_name = \
7569 netutils.GetHostname(name=src_instance_name).name
7572 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7573 self.op.mode, errors.ECODE_INVAL)
7575 def ExpandNames(self):
7576 """ExpandNames for CreateInstance.
7578 Figure out the right locks for instance creation.
7581 self.needed_locks = {}
7583 instance_name = self.op.instance_name
7584 # this is just a preventive check, but someone might still add this
7585 # instance in the meantime, and creation will fail at lock-add time
7586 if instance_name in self.cfg.GetInstanceList():
7587 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7588 instance_name, errors.ECODE_EXISTS)
7590 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7592 if self.op.iallocator:
7593 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7595 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7596 nodelist = [self.op.pnode]
7597 if self.op.snode is not None:
7598 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7599 nodelist.append(self.op.snode)
7600 self.needed_locks[locking.LEVEL_NODE] = nodelist
7602 # in case of import lock the source node too
7603 if self.op.mode == constants.INSTANCE_IMPORT:
7604 src_node = self.op.src_node
7605 src_path = self.op.src_path
7607 if src_path is None:
7608 self.op.src_path = src_path = self.op.instance_name
7610 if src_node is None:
7611 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7612 self.op.src_node = None
7613 if os.path.isabs(src_path):
7614 raise errors.OpPrereqError("Importing an instance from an absolute"
7615 " path requires a source node option.",
7618 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7619 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7620 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7621 if not os.path.isabs(src_path):
7622 self.op.src_path = src_path = \
7623 utils.PathJoin(constants.EXPORT_DIR, src_path)
7625 def _RunAllocator(self):
7626 """Run the allocator based on input opcode.
7629 nics = [n.ToDict() for n in self.nics]
7630 ial = IAllocator(self.cfg, self.rpc,
7631 mode=constants.IALLOCATOR_MODE_ALLOC,
7632 name=self.op.instance_name,
7633 disk_template=self.op.disk_template,
7636 vcpus=self.be_full[constants.BE_VCPUS],
7637 mem_size=self.be_full[constants.BE_MEMORY],
7640 hypervisor=self.op.hypervisor,
7643 ial.Run(self.op.iallocator)
7646 raise errors.OpPrereqError("Can't compute nodes using"
7647 " iallocator '%s': %s" %
7648 (self.op.iallocator, ial.info),
7650 if len(ial.result) != ial.required_nodes:
7651 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7652 " of nodes (%s), required %s" %
7653 (self.op.iallocator, len(ial.result),
7654 ial.required_nodes), errors.ECODE_FAULT)
7655 self.op.pnode = ial.result[0]
7656 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7657 self.op.instance_name, self.op.iallocator,
7658 utils.CommaJoin(ial.result))
7659 if ial.required_nodes == 2:
7660 self.op.snode = ial.result[1]
7662 def BuildHooksEnv(self):
7665 This runs on master, primary and secondary nodes of the instance.
7669 "ADD_MODE": self.op.mode,
7671 if self.op.mode == constants.INSTANCE_IMPORT:
7672 env["SRC_NODE"] = self.op.src_node
7673 env["SRC_PATH"] = self.op.src_path
7674 env["SRC_IMAGES"] = self.src_images
7676 env.update(_BuildInstanceHookEnv(
7677 name=self.op.instance_name,
7678 primary_node=self.op.pnode,
7679 secondary_nodes=self.secondaries,
7680 status=self.op.start,
7681 os_type=self.op.os_type,
7682 memory=self.be_full[constants.BE_MEMORY],
7683 vcpus=self.be_full[constants.BE_VCPUS],
7684 nics=_NICListToTuple(self, self.nics),
7685 disk_template=self.op.disk_template,
7686 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7687 for d in self.disks],
7690 hypervisor_name=self.op.hypervisor,
7695 def BuildHooksNodes(self):
7696 """Build hooks nodes.
7699 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7702 def _ReadExportInfo(self):
7703 """Reads the export information from disk.
7705 It will override the opcode source node and path with the actual
7706 information, if these two were not specified before.
7708 @return: the export information
7711 assert self.op.mode == constants.INSTANCE_IMPORT
7713 src_node = self.op.src_node
7714 src_path = self.op.src_path
7716 if src_node is None:
7717 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7718 exp_list = self.rpc.call_export_list(locked_nodes)
7720 for node in exp_list:
7721 if exp_list[node].fail_msg:
7723 if src_path in exp_list[node].payload:
7725 self.op.src_node = src_node = node
7726 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7730 raise errors.OpPrereqError("No export found for relative path %s" %
7731 src_path, errors.ECODE_INVAL)
7733 _CheckNodeOnline(self, src_node)
7734 result = self.rpc.call_export_info(src_node, src_path)
7735 result.Raise("No export or invalid export found in dir %s" % src_path)
7737 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7738 if not export_info.has_section(constants.INISECT_EXP):
7739 raise errors.ProgrammerError("Corrupted export config",
7740 errors.ECODE_ENVIRON)
7742 ei_version = export_info.get(constants.INISECT_EXP, "version")
7743 if (int(ei_version) != constants.EXPORT_VERSION):
7744 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7745 (ei_version, constants.EXPORT_VERSION),
7746 errors.ECODE_ENVIRON)
7749 def _ReadExportParams(self, einfo):
7750 """Use export parameters as defaults.
7752 In case the opcode doesn't specify (as in override) some instance
7753 parameters, then try to use them from the export information, if
7757 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7759 if self.op.disk_template is None:
7760 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7761 self.op.disk_template = einfo.get(constants.INISECT_INS,
7764 raise errors.OpPrereqError("No disk template specified and the export"
7765 " is missing the disk_template information",
7768 if not self.op.disks:
7769 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7771 # TODO: import the disk iv_name too
7772 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7773 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7774 disks.append({constants.IDISK_SIZE: disk_sz})
7775 self.op.disks = disks
7777 raise errors.OpPrereqError("No disk info specified and the export"
7778 " is missing the disk information",
7781 if (not self.op.nics and
7782 einfo.has_option(constants.INISECT_INS, "nic_count")):
7784 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7786 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7787 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7792 if (self.op.hypervisor is None and
7793 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7794 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7795 if einfo.has_section(constants.INISECT_HYP):
7796 # use the export parameters but do not override the ones
7797 # specified by the user
7798 for name, value in einfo.items(constants.INISECT_HYP):
7799 if name not in self.op.hvparams:
7800 self.op.hvparams[name] = value
7802 if einfo.has_section(constants.INISECT_BEP):
7803 # use the parameters, without overriding
7804 for name, value in einfo.items(constants.INISECT_BEP):
7805 if name not in self.op.beparams:
7806 self.op.beparams[name] = value
7808 # try to read the parameters old style, from the main section
7809 for name in constants.BES_PARAMETERS:
7810 if (name not in self.op.beparams and
7811 einfo.has_option(constants.INISECT_INS, name)):
7812 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7814 if einfo.has_section(constants.INISECT_OSP):
7815 # use the parameters, without overriding
7816 for name, value in einfo.items(constants.INISECT_OSP):
7817 if name not in self.op.osparams:
7818 self.op.osparams[name] = value
7820 def _RevertToDefaults(self, cluster):
7821 """Revert the instance parameters to the default values.
7825 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7826 for name in self.op.hvparams.keys():
7827 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7828 del self.op.hvparams[name]
7830 be_defs = cluster.SimpleFillBE({})
7831 for name in self.op.beparams.keys():
7832 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7833 del self.op.beparams[name]
7835 nic_defs = cluster.SimpleFillNIC({})
7836 for nic in self.op.nics:
7837 for name in constants.NICS_PARAMETERS:
7838 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7841 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7842 for name in self.op.osparams.keys():
7843 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7844 del self.op.osparams[name]
7846 def CheckPrereq(self):
7847 """Check prerequisites.
7850 if self.op.mode == constants.INSTANCE_IMPORT:
7851 export_info = self._ReadExportInfo()
7852 self._ReadExportParams(export_info)
7854 if (not self.cfg.GetVGName() and
7855 self.op.disk_template not in constants.DTS_NOT_LVM):
7856 raise errors.OpPrereqError("Cluster does not support lvm-based"
7857 " instances", errors.ECODE_STATE)
7859 if self.op.hypervisor is None:
7860 self.op.hypervisor = self.cfg.GetHypervisorType()
7862 cluster = self.cfg.GetClusterInfo()
7863 enabled_hvs = cluster.enabled_hypervisors
7864 if self.op.hypervisor not in enabled_hvs:
7865 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7866 " cluster (%s)" % (self.op.hypervisor,
7867 ",".join(enabled_hvs)),
7870 # check hypervisor parameter syntax (locally)
7871 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7872 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7874 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7875 hv_type.CheckParameterSyntax(filled_hvp)
7876 self.hv_full = filled_hvp
7877 # check that we don't specify global parameters on an instance
7878 _CheckGlobalHvParams(self.op.hvparams)
7880 # fill and remember the beparams dict
7881 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7882 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7884 # build os parameters
7885 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7887 # now that hvp/bep are in final format, let's reset to defaults,
7889 if self.op.identify_defaults:
7890 self._RevertToDefaults(cluster)
7894 for idx, nic in enumerate(self.op.nics):
7895 nic_mode_req = nic.get(constants.INIC_MODE, None)
7896 nic_mode = nic_mode_req
7897 if nic_mode is None:
7898 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7900 # in routed mode, for the first nic, the default ip is 'auto'
7901 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7902 default_ip_mode = constants.VALUE_AUTO
7904 default_ip_mode = constants.VALUE_NONE
7906 # ip validity checks
7907 ip = nic.get(constants.INIC_IP, default_ip_mode)
7908 if ip is None or ip.lower() == constants.VALUE_NONE:
7910 elif ip.lower() == constants.VALUE_AUTO:
7911 if not self.op.name_check:
7912 raise errors.OpPrereqError("IP address set to auto but name checks"
7913 " have been skipped",
7915 nic_ip = self.hostname1.ip
7917 if not netutils.IPAddress.IsValid(ip):
7918 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7922 # TODO: check the ip address for uniqueness
7923 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7924 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7927 # MAC address verification
7928 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7929 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7930 mac = utils.NormalizeAndValidateMac(mac)
7933 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7934 except errors.ReservationError:
7935 raise errors.OpPrereqError("MAC address %s already in use"
7936 " in cluster" % mac,
7937 errors.ECODE_NOTUNIQUE)
7939 # Build nic parameters
7940 link = nic.get(constants.INIC_LINK, None)
7943 nicparams[constants.NIC_MODE] = nic_mode_req
7945 nicparams[constants.NIC_LINK] = link
7947 check_params = cluster.SimpleFillNIC(nicparams)
7948 objects.NIC.CheckParameterSyntax(check_params)
7949 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7951 # disk checks/pre-build
7952 default_vg = self.cfg.GetVGName()
7954 for disk in self.op.disks:
7955 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7956 if mode not in constants.DISK_ACCESS_SET:
7957 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7958 mode, errors.ECODE_INVAL)
7959 size = disk.get(constants.IDISK_SIZE, None)
7961 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7964 except (TypeError, ValueError):
7965 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7968 constants.IDISK_SIZE: size,
7969 constants.IDISK_MODE: mode,
7970 constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7972 if constants.IDISK_ADOPT in disk:
7973 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7974 self.disks.append(new_disk)
7976 if self.op.mode == constants.INSTANCE_IMPORT:
7978 # Check that the new instance doesn't have less disks than the export
7979 instance_disks = len(self.disks)
7980 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7981 if instance_disks < export_disks:
7982 raise errors.OpPrereqError("Not enough disks to import."
7983 " (instance: %d, export: %d)" %
7984 (instance_disks, export_disks),
7988 for idx in range(export_disks):
7989 option = 'disk%d_dump' % idx
7990 if export_info.has_option(constants.INISECT_INS, option):
7991 # FIXME: are the old os-es, disk sizes, etc. useful?
7992 export_name = export_info.get(constants.INISECT_INS, option)
7993 image = utils.PathJoin(self.op.src_path, export_name)
7994 disk_images.append(image)
7996 disk_images.append(False)
7998 self.src_images = disk_images
8000 old_name = export_info.get(constants.INISECT_INS, 'name')
8002 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8003 except (TypeError, ValueError), err:
8004 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8005 " an integer: %s" % str(err),
8007 if self.op.instance_name == old_name:
8008 for idx, nic in enumerate(self.nics):
8009 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8010 nic_mac_ini = 'nic%d_mac' % idx
8011 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8013 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8015 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8016 if self.op.ip_check:
8017 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8018 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8019 (self.check_ip, self.op.instance_name),
8020 errors.ECODE_NOTUNIQUE)
8022 #### mac address generation
8023 # By generating here the mac address both the allocator and the hooks get
8024 # the real final mac address rather than the 'auto' or 'generate' value.
8025 # There is a race condition between the generation and the instance object
8026 # creation, which means that we know the mac is valid now, but we're not
8027 # sure it will be when we actually add the instance. If things go bad
8028 # adding the instance will abort because of a duplicate mac, and the
8029 # creation job will fail.
8030 for nic in self.nics:
8031 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8032 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8036 if self.op.iallocator is not None:
8037 self._RunAllocator()
8039 #### node related checks
8041 # check primary node
8042 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8043 assert self.pnode is not None, \
8044 "Cannot retrieve locked node %s" % self.op.pnode
8046 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8047 pnode.name, errors.ECODE_STATE)
8049 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8050 pnode.name, errors.ECODE_STATE)
8051 if not pnode.vm_capable:
8052 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8053 " '%s'" % pnode.name, errors.ECODE_STATE)
8055 self.secondaries = []
8057 # mirror node verification
8058 if self.op.disk_template in constants.DTS_INT_MIRROR:
8059 if self.op.snode == pnode.name:
8060 raise errors.OpPrereqError("The secondary node cannot be the"
8061 " primary node.", errors.ECODE_INVAL)
8062 _CheckNodeOnline(self, self.op.snode)
8063 _CheckNodeNotDrained(self, self.op.snode)
8064 _CheckNodeVmCapable(self, self.op.snode)
8065 self.secondaries.append(self.op.snode)
8067 nodenames = [pnode.name] + self.secondaries
8069 if not self.adopt_disks:
8070 # Check lv size requirements, if not adopting
8071 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8072 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8074 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8075 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8076 disk[constants.IDISK_ADOPT])
8077 for disk in self.disks])
8078 if len(all_lvs) != len(self.disks):
8079 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8081 for lv_name in all_lvs:
8083 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8084 # to ReserveLV uses the same syntax
8085 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8086 except errors.ReservationError:
8087 raise errors.OpPrereqError("LV named %s used by another instance" %
8088 lv_name, errors.ECODE_NOTUNIQUE)
8090 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8091 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8093 node_lvs = self.rpc.call_lv_list([pnode.name],
8094 vg_names.payload.keys())[pnode.name]
8095 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8096 node_lvs = node_lvs.payload
8098 delta = all_lvs.difference(node_lvs.keys())
8100 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8101 utils.CommaJoin(delta),
8103 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8105 raise errors.OpPrereqError("Online logical volumes found, cannot"
8106 " adopt: %s" % utils.CommaJoin(online_lvs),
8108 # update the size of disk based on what is found
8109 for dsk in self.disks:
8110 dsk[constants.IDISK_SIZE] = \
8111 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8112 dsk[constants.IDISK_ADOPT])][0]))
8114 elif self.op.disk_template == constants.DT_BLOCK:
8115 # Normalize and de-duplicate device paths
8116 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8117 for disk in self.disks])
8118 if len(all_disks) != len(self.disks):
8119 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8121 baddisks = [d for d in all_disks
8122 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8124 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8125 " cannot be adopted" %
8126 (", ".join(baddisks),
8127 constants.ADOPTABLE_BLOCKDEV_ROOT),
8130 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8131 list(all_disks))[pnode.name]
8132 node_disks.Raise("Cannot get block device information from node %s" %
8134 node_disks = node_disks.payload
8135 delta = all_disks.difference(node_disks.keys())
8137 raise errors.OpPrereqError("Missing block device(s): %s" %
8138 utils.CommaJoin(delta),
8140 for dsk in self.disks:
8141 dsk[constants.IDISK_SIZE] = \
8142 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8144 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8146 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8147 # check OS parameters (remotely)
8148 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8150 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8152 # memory check on primary node
8154 _CheckNodeFreeMemory(self, self.pnode.name,
8155 "creating instance %s" % self.op.instance_name,
8156 self.be_full[constants.BE_MEMORY],
8159 self.dry_run_result = list(nodenames)
8161 def Exec(self, feedback_fn):
8162 """Create and add the instance to the cluster.
8165 instance = self.op.instance_name
8166 pnode_name = self.pnode.name
8168 ht_kind = self.op.hypervisor
8169 if ht_kind in constants.HTS_REQ_PORT:
8170 network_port = self.cfg.AllocatePort()
8174 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8175 # this is needed because os.path.join does not accept None arguments
8176 if self.op.file_storage_dir is None:
8177 string_file_storage_dir = ""
8179 string_file_storage_dir = self.op.file_storage_dir
8181 # build the full file storage dir path
8182 if self.op.disk_template == constants.DT_SHARED_FILE:
8183 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8185 get_fsd_fn = self.cfg.GetFileStorageDir
8187 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8188 string_file_storage_dir, instance)
8190 file_storage_dir = ""
8192 disks = _GenerateDiskTemplate(self,
8193 self.op.disk_template,
8194 instance, pnode_name,
8198 self.op.file_driver,
8202 iobj = objects.Instance(name=instance, os=self.op.os_type,
8203 primary_node=pnode_name,
8204 nics=self.nics, disks=disks,
8205 disk_template=self.op.disk_template,
8207 network_port=network_port,
8208 beparams=self.op.beparams,
8209 hvparams=self.op.hvparams,
8210 hypervisor=self.op.hypervisor,
8211 osparams=self.op.osparams,
8214 if self.adopt_disks:
8215 if self.op.disk_template == constants.DT_PLAIN:
8216 # rename LVs to the newly-generated names; we need to construct
8217 # 'fake' LV disks with the old data, plus the new unique_id
8218 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8220 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8221 rename_to.append(t_dsk.logical_id)
8222 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8223 self.cfg.SetDiskID(t_dsk, pnode_name)
8224 result = self.rpc.call_blockdev_rename(pnode_name,
8225 zip(tmp_disks, rename_to))
8226 result.Raise("Failed to rename adoped LVs")
8228 feedback_fn("* creating instance disks...")
8230 _CreateDisks(self, iobj)
8231 except errors.OpExecError:
8232 self.LogWarning("Device creation failed, reverting...")
8234 _RemoveDisks(self, iobj)
8236 self.cfg.ReleaseDRBDMinors(instance)
8239 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8240 feedback_fn("* wiping instance disks...")
8242 _WipeDisks(self, iobj)
8243 except errors.OpExecError:
8244 self.LogWarning("Device wiping failed, reverting...")
8246 _RemoveDisks(self, iobj)
8248 self.cfg.ReleaseDRBDMinors(instance)
8251 feedback_fn("adding instance %s to cluster config" % instance)
8253 self.cfg.AddInstance(iobj, self.proc.GetECId())
8255 # Declare that we don't want to remove the instance lock anymore, as we've
8256 # added the instance to the config
8257 del self.remove_locks[locking.LEVEL_INSTANCE]
8258 # Unlock all the nodes
8259 if self.op.mode == constants.INSTANCE_IMPORT:
8260 nodes_keep = [self.op.src_node]
8261 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8262 if node != self.op.src_node]
8263 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8264 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8266 self.context.glm.release(locking.LEVEL_NODE)
8267 del self.acquired_locks[locking.LEVEL_NODE]
8269 if self.op.wait_for_sync:
8270 disk_abort = not _WaitForSync(self, iobj)
8271 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8272 # make sure the disks are not degraded (still sync-ing is ok)
8274 feedback_fn("* checking mirrors status")
8275 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8280 _RemoveDisks(self, iobj)
8281 self.cfg.RemoveInstance(iobj.name)
8282 # Make sure the instance lock gets removed
8283 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8284 raise errors.OpExecError("There are some degraded disks for"
8287 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8288 if self.op.mode == constants.INSTANCE_CREATE:
8289 if not self.op.no_install:
8290 feedback_fn("* running the instance OS create scripts...")
8291 # FIXME: pass debug option from opcode to backend
8292 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8293 self.op.debug_level)
8294 result.Raise("Could not add os for instance %s"
8295 " on node %s" % (instance, pnode_name))
8297 elif self.op.mode == constants.INSTANCE_IMPORT:
8298 feedback_fn("* running the instance OS import scripts...")
8302 for idx, image in enumerate(self.src_images):
8306 # FIXME: pass debug option from opcode to backend
8307 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8308 constants.IEIO_FILE, (image, ),
8309 constants.IEIO_SCRIPT,
8310 (iobj.disks[idx], idx),
8312 transfers.append(dt)
8315 masterd.instance.TransferInstanceData(self, feedback_fn,
8316 self.op.src_node, pnode_name,
8317 self.pnode.secondary_ip,
8319 if not compat.all(import_result):
8320 self.LogWarning("Some disks for instance %s on node %s were not"
8321 " imported successfully" % (instance, pnode_name))
8323 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8324 feedback_fn("* preparing remote import...")
8325 # The source cluster will stop the instance before attempting to make a
8326 # connection. In some cases stopping an instance can take a long time,
8327 # hence the shutdown timeout is added to the connection timeout.
8328 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8329 self.op.source_shutdown_timeout)
8330 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8332 assert iobj.primary_node == self.pnode.name
8334 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8335 self.source_x509_ca,
8336 self._cds, timeouts)
8337 if not compat.all(disk_results):
8338 # TODO: Should the instance still be started, even if some disks
8339 # failed to import (valid for local imports, too)?
8340 self.LogWarning("Some disks for instance %s on node %s were not"
8341 " imported successfully" % (instance, pnode_name))
8343 # Run rename script on newly imported instance
8344 assert iobj.name == instance
8345 feedback_fn("Running rename script for %s" % instance)
8346 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8347 self.source_instance_name,
8348 self.op.debug_level)
8350 self.LogWarning("Failed to run rename script for %s on node"
8351 " %s: %s" % (instance, pnode_name, result.fail_msg))
8354 # also checked in the prereq part
8355 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8359 iobj.admin_up = True
8360 self.cfg.Update(iobj, feedback_fn)
8361 logging.info("Starting instance %s on node %s", instance, pnode_name)
8362 feedback_fn("* starting instance...")
8363 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8364 result.Raise("Could not start instance")
8366 return list(iobj.all_nodes)
8369 class LUInstanceConsole(NoHooksLU):
8370 """Connect to an instance's console.
8372 This is somewhat special in that it returns the command line that
8373 you need to run on the master node in order to connect to the
8379 def ExpandNames(self):
8380 self._ExpandAndLockInstance()
8382 def CheckPrereq(self):
8383 """Check prerequisites.
8385 This checks that the instance is in the cluster.
8388 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8389 assert self.instance is not None, \
8390 "Cannot retrieve locked instance %s" % self.op.instance_name
8391 _CheckNodeOnline(self, self.instance.primary_node)
8393 def Exec(self, feedback_fn):
8394 """Connect to the console of an instance
8397 instance = self.instance
8398 node = instance.primary_node
8400 node_insts = self.rpc.call_instance_list([node],
8401 [instance.hypervisor])[node]
8402 node_insts.Raise("Can't get node information from %s" % node)
8404 if instance.name not in node_insts.payload:
8405 if instance.admin_up:
8406 state = constants.INSTST_ERRORDOWN
8408 state = constants.INSTST_ADMINDOWN
8409 raise errors.OpExecError("Instance %s is not running (state %s)" %
8410 (instance.name, state))
8412 logging.debug("Connecting to console of %s on %s", instance.name, node)
8414 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8417 def _GetInstanceConsole(cluster, instance):
8418 """Returns console information for an instance.
8420 @type cluster: L{objects.Cluster}
8421 @type instance: L{objects.Instance}
8425 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8426 # beparams and hvparams are passed separately, to avoid editing the
8427 # instance and then saving the defaults in the instance itself.
8428 hvparams = cluster.FillHV(instance)
8429 beparams = cluster.FillBE(instance)
8430 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8432 assert console.instance == instance.name
8433 assert console.Validate()
8435 return console.ToDict()
8438 class LUInstanceReplaceDisks(LogicalUnit):
8439 """Replace the disks of an instance.
8442 HPATH = "mirrors-replace"
8443 HTYPE = constants.HTYPE_INSTANCE
8446 def CheckArguments(self):
8447 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8450 def ExpandNames(self):
8451 self._ExpandAndLockInstance()
8453 if self.op.iallocator is not None:
8454 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8456 elif self.op.remote_node is not None:
8457 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8458 self.op.remote_node = remote_node
8460 # Warning: do not remove the locking of the new secondary here
8461 # unless DRBD8.AddChildren is changed to work in parallel;
8462 # currently it doesn't since parallel invocations of
8463 # FindUnusedMinor will conflict
8464 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8465 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8468 self.needed_locks[locking.LEVEL_NODE] = []
8469 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8471 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8472 self.op.iallocator, self.op.remote_node,
8473 self.op.disks, False, self.op.early_release)
8475 self.tasklets = [self.replacer]
8477 def DeclareLocks(self, level):
8478 # If we're not already locking all nodes in the set we have to declare the
8479 # instance's primary/secondary nodes.
8480 if (level == locking.LEVEL_NODE and
8481 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8482 self._LockInstancesNodes()
8484 def BuildHooksEnv(self):
8487 This runs on the master, the primary and all the secondaries.
8490 instance = self.replacer.instance
8492 "MODE": self.op.mode,
8493 "NEW_SECONDARY": self.op.remote_node,
8494 "OLD_SECONDARY": instance.secondary_nodes[0],
8496 env.update(_BuildInstanceHookEnvByObject(self, instance))
8499 def BuildHooksNodes(self):
8500 """Build hooks nodes.
8503 instance = self.replacer.instance
8505 self.cfg.GetMasterNode(),
8506 instance.primary_node,
8508 if self.op.remote_node is not None:
8509 nl.append(self.op.remote_node)
8513 class TLReplaceDisks(Tasklet):
8514 """Replaces disks for an instance.
8516 Note: Locking is not within the scope of this class.
8519 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8520 disks, delay_iallocator, early_release):
8521 """Initializes this class.
8524 Tasklet.__init__(self, lu)
8527 self.instance_name = instance_name
8529 self.iallocator_name = iallocator_name
8530 self.remote_node = remote_node
8532 self.delay_iallocator = delay_iallocator
8533 self.early_release = early_release
8536 self.instance = None
8537 self.new_node = None
8538 self.target_node = None
8539 self.other_node = None
8540 self.remote_node_info = None
8541 self.node_secondary_ip = None
8544 def CheckArguments(mode, remote_node, iallocator):
8545 """Helper function for users of this class.
8548 # check for valid parameter combination
8549 if mode == constants.REPLACE_DISK_CHG:
8550 if remote_node is None and iallocator is None:
8551 raise errors.OpPrereqError("When changing the secondary either an"
8552 " iallocator script must be used or the"
8553 " new node given", errors.ECODE_INVAL)
8555 if remote_node is not None and iallocator is not None:
8556 raise errors.OpPrereqError("Give either the iallocator or the new"
8557 " secondary, not both", errors.ECODE_INVAL)
8559 elif remote_node is not None or iallocator is not None:
8560 # Not replacing the secondary
8561 raise errors.OpPrereqError("The iallocator and new node options can"
8562 " only be used when changing the"
8563 " secondary node", errors.ECODE_INVAL)
8566 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8567 """Compute a new secondary node using an IAllocator.
8570 ial = IAllocator(lu.cfg, lu.rpc,
8571 mode=constants.IALLOCATOR_MODE_RELOC,
8573 relocate_from=relocate_from)
8575 ial.Run(iallocator_name)
8578 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8579 " %s" % (iallocator_name, ial.info),
8582 if len(ial.result) != ial.required_nodes:
8583 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8584 " of nodes (%s), required %s" %
8586 len(ial.result), ial.required_nodes),
8589 remote_node_name = ial.result[0]
8591 lu.LogInfo("Selected new secondary for instance '%s': %s",
8592 instance_name, remote_node_name)
8594 return remote_node_name
8596 def _FindFaultyDisks(self, node_name):
8597 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8600 def _CheckDisksActivated(self, instance):
8601 """Checks if the instance disks are activated.
8603 @param instance: The instance to check disks
8604 @return: True if they are activated, False otherwise
8607 nodes = instance.all_nodes
8609 for idx, dev in enumerate(instance.disks):
8611 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8612 self.cfg.SetDiskID(dev, node)
8614 result = self.rpc.call_blockdev_find(node, dev)
8618 elif result.fail_msg or not result.payload:
8624 def CheckPrereq(self):
8625 """Check prerequisites.
8627 This checks that the instance is in the cluster.
8630 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8631 assert instance is not None, \
8632 "Cannot retrieve locked instance %s" % self.instance_name
8634 if instance.disk_template != constants.DT_DRBD8:
8635 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8636 " instances", errors.ECODE_INVAL)
8638 if len(instance.secondary_nodes) != 1:
8639 raise errors.OpPrereqError("The instance has a strange layout,"
8640 " expected one secondary but found %d" %
8641 len(instance.secondary_nodes),
8644 if not self.delay_iallocator:
8645 self._CheckPrereq2()
8647 def _CheckPrereq2(self):
8648 """Check prerequisites, second part.
8650 This function should always be part of CheckPrereq. It was separated and is
8651 now called from Exec because during node evacuation iallocator was only
8652 called with an unmodified cluster model, not taking planned changes into
8656 instance = self.instance
8657 secondary_node = instance.secondary_nodes[0]
8659 if self.iallocator_name is None:
8660 remote_node = self.remote_node
8662 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8663 instance.name, instance.secondary_nodes)
8665 if remote_node is not None:
8666 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8667 assert self.remote_node_info is not None, \
8668 "Cannot retrieve locked node %s" % remote_node
8670 self.remote_node_info = None
8672 if remote_node == self.instance.primary_node:
8673 raise errors.OpPrereqError("The specified node is the primary node of"
8674 " the instance.", errors.ECODE_INVAL)
8676 if remote_node == secondary_node:
8677 raise errors.OpPrereqError("The specified node is already the"
8678 " secondary node of the instance.",
8681 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8682 constants.REPLACE_DISK_CHG):
8683 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8686 if self.mode == constants.REPLACE_DISK_AUTO:
8687 if not self._CheckDisksActivated(instance):
8688 raise errors.OpPrereqError("Please run activate-disks on instance %s"
8689 " first" % self.instance_name,
8691 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8692 faulty_secondary = self._FindFaultyDisks(secondary_node)
8694 if faulty_primary and faulty_secondary:
8695 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8696 " one node and can not be repaired"
8697 " automatically" % self.instance_name,
8701 self.disks = faulty_primary
8702 self.target_node = instance.primary_node
8703 self.other_node = secondary_node
8704 check_nodes = [self.target_node, self.other_node]
8705 elif faulty_secondary:
8706 self.disks = faulty_secondary
8707 self.target_node = secondary_node
8708 self.other_node = instance.primary_node
8709 check_nodes = [self.target_node, self.other_node]
8715 # Non-automatic modes
8716 if self.mode == constants.REPLACE_DISK_PRI:
8717 self.target_node = instance.primary_node
8718 self.other_node = secondary_node
8719 check_nodes = [self.target_node, self.other_node]
8721 elif self.mode == constants.REPLACE_DISK_SEC:
8722 self.target_node = secondary_node
8723 self.other_node = instance.primary_node
8724 check_nodes = [self.target_node, self.other_node]
8726 elif self.mode == constants.REPLACE_DISK_CHG:
8727 self.new_node = remote_node
8728 self.other_node = instance.primary_node
8729 self.target_node = secondary_node
8730 check_nodes = [self.new_node, self.other_node]
8732 _CheckNodeNotDrained(self.lu, remote_node)
8733 _CheckNodeVmCapable(self.lu, remote_node)
8735 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8736 assert old_node_info is not None
8737 if old_node_info.offline and not self.early_release:
8738 # doesn't make sense to delay the release
8739 self.early_release = True
8740 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8741 " early-release mode", secondary_node)
8744 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8747 # If not specified all disks should be replaced
8749 self.disks = range(len(self.instance.disks))
8751 for node in check_nodes:
8752 _CheckNodeOnline(self.lu, node)
8754 # Check whether disks are valid
8755 for disk_idx in self.disks:
8756 instance.FindDisk(disk_idx)
8758 # Get secondary node IP addresses
8761 for node_name in [self.target_node, self.other_node, self.new_node]:
8762 if node_name is not None:
8763 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8765 self.node_secondary_ip = node_2nd_ip
8767 def Exec(self, feedback_fn):
8768 """Execute disk replacement.
8770 This dispatches the disk replacement to the appropriate handler.
8773 if self.delay_iallocator:
8774 self._CheckPrereq2()
8777 feedback_fn("No disks need replacement")
8780 feedback_fn("Replacing disk(s) %s for %s" %
8781 (utils.CommaJoin(self.disks), self.instance.name))
8783 activate_disks = (not self.instance.admin_up)
8785 # Activate the instance disks if we're replacing them on a down instance
8787 _StartInstanceDisks(self.lu, self.instance, True)
8790 # Should we replace the secondary node?
8791 if self.new_node is not None:
8792 fn = self._ExecDrbd8Secondary
8794 fn = self._ExecDrbd8DiskOnly
8796 return fn(feedback_fn)
8799 # Deactivate the instance disks if we're replacing them on a
8802 _SafeShutdownInstanceDisks(self.lu, self.instance)
8804 def _CheckVolumeGroup(self, nodes):
8805 self.lu.LogInfo("Checking volume groups")
8807 vgname = self.cfg.GetVGName()
8809 # Make sure volume group exists on all involved nodes
8810 results = self.rpc.call_vg_list(nodes)
8812 raise errors.OpExecError("Can't list volume groups on the nodes")
8816 res.Raise("Error checking node %s" % node)
8817 if vgname not in res.payload:
8818 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8821 def _CheckDisksExistence(self, nodes):
8822 # Check disk existence
8823 for idx, dev in enumerate(self.instance.disks):
8824 if idx not in self.disks:
8828 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8829 self.cfg.SetDiskID(dev, node)
8831 result = self.rpc.call_blockdev_find(node, dev)
8833 msg = result.fail_msg
8834 if msg or not result.payload:
8836 msg = "disk not found"
8837 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8840 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8841 for idx, dev in enumerate(self.instance.disks):
8842 if idx not in self.disks:
8845 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8848 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8850 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8851 " replace disks for instance %s" %
8852 (node_name, self.instance.name))
8854 def _CreateNewStorage(self, node_name):
8855 vgname = self.cfg.GetVGName()
8858 for idx, dev in enumerate(self.instance.disks):
8859 if idx not in self.disks:
8862 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8864 self.cfg.SetDiskID(dev, node_name)
8866 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8867 names = _GenerateUniqueNames(self.lu, lv_names)
8869 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8870 logical_id=(vgname, names[0]))
8871 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8872 logical_id=(vgname, names[1]))
8874 new_lvs = [lv_data, lv_meta]
8875 old_lvs = dev.children
8876 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8878 # we pass force_create=True to force the LVM creation
8879 for new_lv in new_lvs:
8880 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8881 _GetInstanceInfoText(self.instance), False)
8885 def _CheckDevices(self, node_name, iv_names):
8886 for name, (dev, _, _) in iv_names.iteritems():
8887 self.cfg.SetDiskID(dev, node_name)
8889 result = self.rpc.call_blockdev_find(node_name, dev)
8891 msg = result.fail_msg
8892 if msg or not result.payload:
8894 msg = "disk not found"
8895 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8898 if result.payload.is_degraded:
8899 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8901 def _RemoveOldStorage(self, node_name, iv_names):
8902 for name, (_, old_lvs, _) in iv_names.iteritems():
8903 self.lu.LogInfo("Remove logical volumes for %s" % name)
8906 self.cfg.SetDiskID(lv, node_name)
8908 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8910 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8911 hint="remove unused LVs manually")
8913 def _ReleaseNodeLock(self, node_name):
8914 """Releases the lock for a given node."""
8915 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8917 def _ExecDrbd8DiskOnly(self, feedback_fn):
8918 """Replace a disk on the primary or secondary for DRBD 8.
8920 The algorithm for replace is quite complicated:
8922 1. for each disk to be replaced:
8924 1. create new LVs on the target node with unique names
8925 1. detach old LVs from the drbd device
8926 1. rename old LVs to name_replaced.<time_t>
8927 1. rename new LVs to old LVs
8928 1. attach the new LVs (with the old names now) to the drbd device
8930 1. wait for sync across all devices
8932 1. for each modified disk:
8934 1. remove old LVs (which have the name name_replaces.<time_t>)
8936 Failures are not very well handled.
8941 # Step: check device activation
8942 self.lu.LogStep(1, steps_total, "Check device existence")
8943 self._CheckDisksExistence([self.other_node, self.target_node])
8944 self._CheckVolumeGroup([self.target_node, self.other_node])
8946 # Step: check other node consistency
8947 self.lu.LogStep(2, steps_total, "Check peer consistency")
8948 self._CheckDisksConsistency(self.other_node,
8949 self.other_node == self.instance.primary_node,
8952 # Step: create new storage
8953 self.lu.LogStep(3, steps_total, "Allocate new storage")
8954 iv_names = self._CreateNewStorage(self.target_node)
8956 # Step: for each lv, detach+rename*2+attach
8957 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8958 for dev, old_lvs, new_lvs in iv_names.itervalues():
8959 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8961 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8963 result.Raise("Can't detach drbd from local storage on node"
8964 " %s for device %s" % (self.target_node, dev.iv_name))
8966 #cfg.Update(instance)
8968 # ok, we created the new LVs, so now we know we have the needed
8969 # storage; as such, we proceed on the target node to rename
8970 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8971 # using the assumption that logical_id == physical_id (which in
8972 # turn is the unique_id on that node)
8974 # FIXME(iustin): use a better name for the replaced LVs
8975 temp_suffix = int(time.time())
8976 ren_fn = lambda d, suff: (d.physical_id[0],
8977 d.physical_id[1] + "_replaced-%s" % suff)
8979 # Build the rename list based on what LVs exist on the node
8980 rename_old_to_new = []
8981 for to_ren in old_lvs:
8982 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8983 if not result.fail_msg and result.payload:
8985 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8987 self.lu.LogInfo("Renaming the old LVs on the target node")
8988 result = self.rpc.call_blockdev_rename(self.target_node,
8990 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8992 # Now we rename the new LVs to the old LVs
8993 self.lu.LogInfo("Renaming the new LVs on the target node")
8994 rename_new_to_old = [(new, old.physical_id)
8995 for old, new in zip(old_lvs, new_lvs)]
8996 result = self.rpc.call_blockdev_rename(self.target_node,
8998 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9000 for old, new in zip(old_lvs, new_lvs):
9001 new.logical_id = old.logical_id
9002 self.cfg.SetDiskID(new, self.target_node)
9004 for disk in old_lvs:
9005 disk.logical_id = ren_fn(disk, temp_suffix)
9006 self.cfg.SetDiskID(disk, self.target_node)
9008 # Now that the new lvs have the old name, we can add them to the device
9009 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9010 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9012 msg = result.fail_msg
9014 for new_lv in new_lvs:
9015 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9018 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9019 hint=("cleanup manually the unused logical"
9021 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9023 dev.children = new_lvs
9025 self.cfg.Update(self.instance, feedback_fn)
9028 if self.early_release:
9029 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9031 self._RemoveOldStorage(self.target_node, iv_names)
9032 # WARNING: we release both node locks here, do not do other RPCs
9033 # than WaitForSync to the primary node
9034 self._ReleaseNodeLock([self.target_node, self.other_node])
9037 # This can fail as the old devices are degraded and _WaitForSync
9038 # does a combined result over all disks, so we don't check its return value
9039 self.lu.LogStep(cstep, steps_total, "Sync devices")
9041 _WaitForSync(self.lu, self.instance)
9043 # Check all devices manually
9044 self._CheckDevices(self.instance.primary_node, iv_names)
9046 # Step: remove old storage
9047 if not self.early_release:
9048 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9050 self._RemoveOldStorage(self.target_node, iv_names)
9052 def _ExecDrbd8Secondary(self, feedback_fn):
9053 """Replace the secondary node for DRBD 8.
9055 The algorithm for replace is quite complicated:
9056 - for all disks of the instance:
9057 - create new LVs on the new node with same names
9058 - shutdown the drbd device on the old secondary
9059 - disconnect the drbd network on the primary
9060 - create the drbd device on the new secondary
9061 - network attach the drbd on the primary, using an artifice:
9062 the drbd code for Attach() will connect to the network if it
9063 finds a device which is connected to the good local disks but
9065 - wait for sync across all devices
9066 - remove all disks from the old secondary
9068 Failures are not very well handled.
9073 # Step: check device activation
9074 self.lu.LogStep(1, steps_total, "Check device existence")
9075 self._CheckDisksExistence([self.instance.primary_node])
9076 self._CheckVolumeGroup([self.instance.primary_node])
9078 # Step: check other node consistency
9079 self.lu.LogStep(2, steps_total, "Check peer consistency")
9080 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9082 # Step: create new storage
9083 self.lu.LogStep(3, steps_total, "Allocate new storage")
9084 for idx, dev in enumerate(self.instance.disks):
9085 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9086 (self.new_node, idx))
9087 # we pass force_create=True to force LVM creation
9088 for new_lv in dev.children:
9089 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9090 _GetInstanceInfoText(self.instance), False)
9092 # Step 4: dbrd minors and drbd setups changes
9093 # after this, we must manually remove the drbd minors on both the
9094 # error and the success paths
9095 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9096 minors = self.cfg.AllocateDRBDMinor([self.new_node
9097 for dev in self.instance.disks],
9099 logging.debug("Allocated minors %r", minors)
9102 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9103 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9104 (self.new_node, idx))
9105 # create new devices on new_node; note that we create two IDs:
9106 # one without port, so the drbd will be activated without
9107 # networking information on the new node at this stage, and one
9108 # with network, for the latter activation in step 4
9109 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9110 if self.instance.primary_node == o_node1:
9113 assert self.instance.primary_node == o_node2, "Three-node instance?"
9116 new_alone_id = (self.instance.primary_node, self.new_node, None,
9117 p_minor, new_minor, o_secret)
9118 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9119 p_minor, new_minor, o_secret)
9121 iv_names[idx] = (dev, dev.children, new_net_id)
9122 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9124 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9125 logical_id=new_alone_id,
9126 children=dev.children,
9129 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9130 _GetInstanceInfoText(self.instance), False)
9131 except errors.GenericError:
9132 self.cfg.ReleaseDRBDMinors(self.instance.name)
9135 # We have new devices, shutdown the drbd on the old secondary
9136 for idx, dev in enumerate(self.instance.disks):
9137 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9138 self.cfg.SetDiskID(dev, self.target_node)
9139 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9141 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9142 "node: %s" % (idx, msg),
9143 hint=("Please cleanup this device manually as"
9144 " soon as possible"))
9146 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9147 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9148 self.node_secondary_ip,
9149 self.instance.disks)\
9150 [self.instance.primary_node]
9152 msg = result.fail_msg
9154 # detaches didn't succeed (unlikely)
9155 self.cfg.ReleaseDRBDMinors(self.instance.name)
9156 raise errors.OpExecError("Can't detach the disks from the network on"
9157 " old node: %s" % (msg,))
9159 # if we managed to detach at least one, we update all the disks of
9160 # the instance to point to the new secondary
9161 self.lu.LogInfo("Updating instance configuration")
9162 for dev, _, new_logical_id in iv_names.itervalues():
9163 dev.logical_id = new_logical_id
9164 self.cfg.SetDiskID(dev, self.instance.primary_node)
9166 self.cfg.Update(self.instance, feedback_fn)
9168 # and now perform the drbd attach
9169 self.lu.LogInfo("Attaching primary drbds to new secondary"
9170 " (standalone => connected)")
9171 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9173 self.node_secondary_ip,
9174 self.instance.disks,
9177 for to_node, to_result in result.items():
9178 msg = to_result.fail_msg
9180 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9182 hint=("please do a gnt-instance info to see the"
9183 " status of disks"))
9185 if self.early_release:
9186 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9188 self._RemoveOldStorage(self.target_node, iv_names)
9189 # WARNING: we release all node locks here, do not do other RPCs
9190 # than WaitForSync to the primary node
9191 self._ReleaseNodeLock([self.instance.primary_node,
9196 # This can fail as the old devices are degraded and _WaitForSync
9197 # does a combined result over all disks, so we don't check its return value
9198 self.lu.LogStep(cstep, steps_total, "Sync devices")
9200 _WaitForSync(self.lu, self.instance)
9202 # Check all devices manually
9203 self._CheckDevices(self.instance.primary_node, iv_names)
9205 # Step: remove old storage
9206 if not self.early_release:
9207 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9208 self._RemoveOldStorage(self.target_node, iv_names)
9211 class LURepairNodeStorage(NoHooksLU):
9212 """Repairs the volume group on a node.
9217 def CheckArguments(self):
9218 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9220 storage_type = self.op.storage_type
9222 if (constants.SO_FIX_CONSISTENCY not in
9223 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9224 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9225 " repaired" % storage_type,
9228 def ExpandNames(self):
9229 self.needed_locks = {
9230 locking.LEVEL_NODE: [self.op.node_name],
9233 def _CheckFaultyDisks(self, instance, node_name):
9234 """Ensure faulty disks abort the opcode or at least warn."""
9236 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9238 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9239 " node '%s'" % (instance.name, node_name),
9241 except errors.OpPrereqError, err:
9242 if self.op.ignore_consistency:
9243 self.proc.LogWarning(str(err.args[0]))
9247 def CheckPrereq(self):
9248 """Check prerequisites.
9251 # Check whether any instance on this node has faulty disks
9252 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9253 if not inst.admin_up:
9255 check_nodes = set(inst.all_nodes)
9256 check_nodes.discard(self.op.node_name)
9257 for inst_node_name in check_nodes:
9258 self._CheckFaultyDisks(inst, inst_node_name)
9260 def Exec(self, feedback_fn):
9261 feedback_fn("Repairing storage unit '%s' on %s ..." %
9262 (self.op.name, self.op.node_name))
9264 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9265 result = self.rpc.call_storage_execute(self.op.node_name,
9266 self.op.storage_type, st_args,
9268 constants.SO_FIX_CONSISTENCY)
9269 result.Raise("Failed to repair storage unit '%s' on %s" %
9270 (self.op.name, self.op.node_name))
9273 class LUNodeEvacStrategy(NoHooksLU):
9274 """Computes the node evacuation strategy.
9279 def CheckArguments(self):
9280 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9282 def ExpandNames(self):
9283 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9284 self.needed_locks = locks = {}
9285 if self.op.remote_node is None:
9286 locks[locking.LEVEL_NODE] = locking.ALL_SET
9288 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9289 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9291 def Exec(self, feedback_fn):
9292 if self.op.remote_node is not None:
9294 for node in self.op.nodes:
9295 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9298 if i.primary_node == self.op.remote_node:
9299 raise errors.OpPrereqError("Node %s is the primary node of"
9300 " instance %s, cannot use it as"
9302 (self.op.remote_node, i.name),
9304 result.append([i.name, self.op.remote_node])
9306 ial = IAllocator(self.cfg, self.rpc,
9307 mode=constants.IALLOCATOR_MODE_MEVAC,
9308 evac_nodes=self.op.nodes)
9309 ial.Run(self.op.iallocator, validate=True)
9311 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9317 class LUInstanceGrowDisk(LogicalUnit):
9318 """Grow a disk of an instance.
9322 HTYPE = constants.HTYPE_INSTANCE
9325 def ExpandNames(self):
9326 self._ExpandAndLockInstance()
9327 self.needed_locks[locking.LEVEL_NODE] = []
9328 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9330 def DeclareLocks(self, level):
9331 if level == locking.LEVEL_NODE:
9332 self._LockInstancesNodes()
9334 def BuildHooksEnv(self):
9337 This runs on the master, the primary and all the secondaries.
9341 "DISK": self.op.disk,
9342 "AMOUNT": self.op.amount,
9344 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9347 def BuildHooksNodes(self):
9348 """Build hooks nodes.
9351 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9354 def CheckPrereq(self):
9355 """Check prerequisites.
9357 This checks that the instance is in the cluster.
9360 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9361 assert instance is not None, \
9362 "Cannot retrieve locked instance %s" % self.op.instance_name
9363 nodenames = list(instance.all_nodes)
9364 for node in nodenames:
9365 _CheckNodeOnline(self, node)
9367 self.instance = instance
9369 if instance.disk_template not in constants.DTS_GROWABLE:
9370 raise errors.OpPrereqError("Instance's disk layout does not support"
9371 " growing.", errors.ECODE_INVAL)
9373 self.disk = instance.FindDisk(self.op.disk)
9375 if instance.disk_template not in (constants.DT_FILE,
9376 constants.DT_SHARED_FILE):
9377 # TODO: check the free disk space for file, when that feature will be
9379 _CheckNodesFreeDiskPerVG(self, nodenames,
9380 self.disk.ComputeGrowth(self.op.amount))
9382 def Exec(self, feedback_fn):
9383 """Execute disk grow.
9386 instance = self.instance
9389 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9391 raise errors.OpExecError("Cannot activate block device to grow")
9393 for node in instance.all_nodes:
9394 self.cfg.SetDiskID(disk, node)
9395 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9396 result.Raise("Grow request failed to node %s" % node)
9398 # TODO: Rewrite code to work properly
9399 # DRBD goes into sync mode for a short amount of time after executing the
9400 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9401 # calling "resize" in sync mode fails. Sleeping for a short amount of
9402 # time is a work-around.
9405 disk.RecordGrow(self.op.amount)
9406 self.cfg.Update(instance, feedback_fn)
9407 if self.op.wait_for_sync:
9408 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9410 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9411 " status.\nPlease check the instance.")
9412 if not instance.admin_up:
9413 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9414 elif not instance.admin_up:
9415 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9416 " not supposed to be running because no wait for"
9417 " sync mode was requested.")
9420 class LUInstanceQueryData(NoHooksLU):
9421 """Query runtime instance data.
9426 def ExpandNames(self):
9427 self.needed_locks = {}
9429 # Use locking if requested or when non-static information is wanted
9430 if not (self.op.static or self.op.use_locking):
9431 self.LogWarning("Non-static data requested, locks need to be acquired")
9432 self.op.use_locking = True
9434 if self.op.instances or not self.op.use_locking:
9435 # Expand instance names right here
9436 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9438 # Will use acquired locks
9439 self.wanted_names = None
9441 if self.op.use_locking:
9442 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9444 if self.wanted_names is None:
9445 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9447 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9449 self.needed_locks[locking.LEVEL_NODE] = []
9450 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9451 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9453 def DeclareLocks(self, level):
9454 if self.op.use_locking and level == locking.LEVEL_NODE:
9455 self._LockInstancesNodes()
9457 def CheckPrereq(self):
9458 """Check prerequisites.
9460 This only checks the optional instance list against the existing names.
9463 if self.wanted_names is None:
9464 assert self.op.use_locking, "Locking was not used"
9465 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9467 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9468 for name in self.wanted_names]
9470 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9471 """Returns the status of a block device
9474 if self.op.static or not node:
9477 self.cfg.SetDiskID(dev, node)
9479 result = self.rpc.call_blockdev_find(node, dev)
9483 result.Raise("Can't compute disk status for %s" % instance_name)
9485 status = result.payload
9489 return (status.dev_path, status.major, status.minor,
9490 status.sync_percent, status.estimated_time,
9491 status.is_degraded, status.ldisk_status)
9493 def _ComputeDiskStatus(self, instance, snode, dev):
9494 """Compute block device status.
9497 if dev.dev_type in constants.LDS_DRBD:
9498 # we change the snode then (otherwise we use the one passed in)
9499 if dev.logical_id[0] == instance.primary_node:
9500 snode = dev.logical_id[1]
9502 snode = dev.logical_id[0]
9504 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9506 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9509 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9510 for child in dev.children]
9515 "iv_name": dev.iv_name,
9516 "dev_type": dev.dev_type,
9517 "logical_id": dev.logical_id,
9518 "physical_id": dev.physical_id,
9519 "pstatus": dev_pstatus,
9520 "sstatus": dev_sstatus,
9521 "children": dev_children,
9526 def Exec(self, feedback_fn):
9527 """Gather and return data"""
9530 cluster = self.cfg.GetClusterInfo()
9532 for instance in self.wanted_instances:
9533 if not self.op.static:
9534 remote_info = self.rpc.call_instance_info(instance.primary_node,
9536 instance.hypervisor)
9537 remote_info.Raise("Error checking node %s" % instance.primary_node)
9538 remote_info = remote_info.payload
9539 if remote_info and "state" in remote_info:
9542 remote_state = "down"
9545 if instance.admin_up:
9548 config_state = "down"
9550 disks = [self._ComputeDiskStatus(instance, None, device)
9551 for device in instance.disks]
9553 result[instance.name] = {
9554 "name": instance.name,
9555 "config_state": config_state,
9556 "run_state": remote_state,
9557 "pnode": instance.primary_node,
9558 "snodes": instance.secondary_nodes,
9560 # this happens to be the same format used for hooks
9561 "nics": _NICListToTuple(self, instance.nics),
9562 "disk_template": instance.disk_template,
9564 "hypervisor": instance.hypervisor,
9565 "network_port": instance.network_port,
9566 "hv_instance": instance.hvparams,
9567 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9568 "be_instance": instance.beparams,
9569 "be_actual": cluster.FillBE(instance),
9570 "os_instance": instance.osparams,
9571 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9572 "serial_no": instance.serial_no,
9573 "mtime": instance.mtime,
9574 "ctime": instance.ctime,
9575 "uuid": instance.uuid,
9581 class LUInstanceSetParams(LogicalUnit):
9582 """Modifies an instances's parameters.
9585 HPATH = "instance-modify"
9586 HTYPE = constants.HTYPE_INSTANCE
9589 def CheckArguments(self):
9590 if not (self.op.nics or self.op.disks or self.op.disk_template or
9591 self.op.hvparams or self.op.beparams or self.op.os_name):
9592 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9594 if self.op.hvparams:
9595 _CheckGlobalHvParams(self.op.hvparams)
9599 for disk_op, disk_dict in self.op.disks:
9600 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9601 if disk_op == constants.DDM_REMOVE:
9604 elif disk_op == constants.DDM_ADD:
9607 if not isinstance(disk_op, int):
9608 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9609 if not isinstance(disk_dict, dict):
9610 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9611 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9613 if disk_op == constants.DDM_ADD:
9614 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9615 if mode not in constants.DISK_ACCESS_SET:
9616 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9618 size = disk_dict.get(constants.IDISK_SIZE, None)
9620 raise errors.OpPrereqError("Required disk parameter size missing",
9624 except (TypeError, ValueError), err:
9625 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9626 str(err), errors.ECODE_INVAL)
9627 disk_dict[constants.IDISK_SIZE] = size
9629 # modification of disk
9630 if constants.IDISK_SIZE in disk_dict:
9631 raise errors.OpPrereqError("Disk size change not possible, use"
9632 " grow-disk", errors.ECODE_INVAL)
9634 if disk_addremove > 1:
9635 raise errors.OpPrereqError("Only one disk add or remove operation"
9636 " supported at a time", errors.ECODE_INVAL)
9638 if self.op.disks and self.op.disk_template is not None:
9639 raise errors.OpPrereqError("Disk template conversion and other disk"
9640 " changes not supported at the same time",
9643 if (self.op.disk_template and
9644 self.op.disk_template in constants.DTS_INT_MIRROR and
9645 self.op.remote_node is None):
9646 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9647 " one requires specifying a secondary node",
9652 for nic_op, nic_dict in self.op.nics:
9653 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9654 if nic_op == constants.DDM_REMOVE:
9657 elif nic_op == constants.DDM_ADD:
9660 if not isinstance(nic_op, int):
9661 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9662 if not isinstance(nic_dict, dict):
9663 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9664 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9666 # nic_dict should be a dict
9667 nic_ip = nic_dict.get(constants.INIC_IP, None)
9668 if nic_ip is not None:
9669 if nic_ip.lower() == constants.VALUE_NONE:
9670 nic_dict[constants.INIC_IP] = None
9672 if not netutils.IPAddress.IsValid(nic_ip):
9673 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9676 nic_bridge = nic_dict.get('bridge', None)
9677 nic_link = nic_dict.get(constants.INIC_LINK, None)
9678 if nic_bridge and nic_link:
9679 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9680 " at the same time", errors.ECODE_INVAL)
9681 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9682 nic_dict['bridge'] = None
9683 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9684 nic_dict[constants.INIC_LINK] = None
9686 if nic_op == constants.DDM_ADD:
9687 nic_mac = nic_dict.get(constants.INIC_MAC, None)
9689 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9691 if constants.INIC_MAC in nic_dict:
9692 nic_mac = nic_dict[constants.INIC_MAC]
9693 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9694 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9696 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9697 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9698 " modifying an existing nic",
9701 if nic_addremove > 1:
9702 raise errors.OpPrereqError("Only one NIC add or remove operation"
9703 " supported at a time", errors.ECODE_INVAL)
9705 def ExpandNames(self):
9706 self._ExpandAndLockInstance()
9707 self.needed_locks[locking.LEVEL_NODE] = []
9708 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9710 def DeclareLocks(self, level):
9711 if level == locking.LEVEL_NODE:
9712 self._LockInstancesNodes()
9713 if self.op.disk_template and self.op.remote_node:
9714 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9715 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9717 def BuildHooksEnv(self):
9720 This runs on the master, primary and secondaries.
9724 if constants.BE_MEMORY in self.be_new:
9725 args['memory'] = self.be_new[constants.BE_MEMORY]
9726 if constants.BE_VCPUS in self.be_new:
9727 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9728 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9729 # information at all.
9732 nic_override = dict(self.op.nics)
9733 for idx, nic in enumerate(self.instance.nics):
9734 if idx in nic_override:
9735 this_nic_override = nic_override[idx]
9737 this_nic_override = {}
9738 if constants.INIC_IP in this_nic_override:
9739 ip = this_nic_override[constants.INIC_IP]
9742 if constants.INIC_MAC in this_nic_override:
9743 mac = this_nic_override[constants.INIC_MAC]
9746 if idx in self.nic_pnew:
9747 nicparams = self.nic_pnew[idx]
9749 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9750 mode = nicparams[constants.NIC_MODE]
9751 link = nicparams[constants.NIC_LINK]
9752 args['nics'].append((ip, mac, mode, link))
9753 if constants.DDM_ADD in nic_override:
9754 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9755 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9756 nicparams = self.nic_pnew[constants.DDM_ADD]
9757 mode = nicparams[constants.NIC_MODE]
9758 link = nicparams[constants.NIC_LINK]
9759 args['nics'].append((ip, mac, mode, link))
9760 elif constants.DDM_REMOVE in nic_override:
9761 del args['nics'][-1]
9763 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9764 if self.op.disk_template:
9765 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9769 def BuildHooksNodes(self):
9770 """Build hooks nodes.
9773 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9776 def CheckPrereq(self):
9777 """Check prerequisites.
9779 This only checks the instance list against the existing names.
9782 # checking the new params on the primary/secondary nodes
9784 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9785 cluster = self.cluster = self.cfg.GetClusterInfo()
9786 assert self.instance is not None, \
9787 "Cannot retrieve locked instance %s" % self.op.instance_name
9788 pnode = instance.primary_node
9789 nodelist = list(instance.all_nodes)
9792 if self.op.os_name and not self.op.force:
9793 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9794 self.op.force_variant)
9795 instance_os = self.op.os_name
9797 instance_os = instance.os
9799 if self.op.disk_template:
9800 if instance.disk_template == self.op.disk_template:
9801 raise errors.OpPrereqError("Instance already has disk template %s" %
9802 instance.disk_template, errors.ECODE_INVAL)
9804 if (instance.disk_template,
9805 self.op.disk_template) not in self._DISK_CONVERSIONS:
9806 raise errors.OpPrereqError("Unsupported disk template conversion from"
9807 " %s to %s" % (instance.disk_template,
9808 self.op.disk_template),
9810 _CheckInstanceDown(self, instance, "cannot change disk template")
9811 if self.op.disk_template in constants.DTS_INT_MIRROR:
9812 if self.op.remote_node == pnode:
9813 raise errors.OpPrereqError("Given new secondary node %s is the same"
9814 " as the primary node of the instance" %
9815 self.op.remote_node, errors.ECODE_STATE)
9816 _CheckNodeOnline(self, self.op.remote_node)
9817 _CheckNodeNotDrained(self, self.op.remote_node)
9818 # FIXME: here we assume that the old instance type is DT_PLAIN
9819 assert instance.disk_template == constants.DT_PLAIN
9820 disks = [{constants.IDISK_SIZE: d.size,
9821 constants.IDISK_VG: d.logical_id[0]}
9822 for d in instance.disks]
9823 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9824 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9826 # hvparams processing
9827 if self.op.hvparams:
9828 hv_type = instance.hypervisor
9829 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9830 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9831 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9834 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9835 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9836 self.hv_new = hv_new # the new actual values
9837 self.hv_inst = i_hvdict # the new dict (without defaults)
9839 self.hv_new = self.hv_inst = {}
9841 # beparams processing
9842 if self.op.beparams:
9843 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9845 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9846 be_new = cluster.SimpleFillBE(i_bedict)
9847 self.be_new = be_new # the new actual values
9848 self.be_inst = i_bedict # the new dict (without defaults)
9850 self.be_new = self.be_inst = {}
9852 # osparams processing
9853 if self.op.osparams:
9854 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9855 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9856 self.os_inst = i_osdict # the new dict (without defaults)
9862 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9863 mem_check_list = [pnode]
9864 if be_new[constants.BE_AUTO_BALANCE]:
9865 # either we changed auto_balance to yes or it was from before
9866 mem_check_list.extend(instance.secondary_nodes)
9867 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9868 instance.hypervisor)
9869 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9870 instance.hypervisor)
9871 pninfo = nodeinfo[pnode]
9872 msg = pninfo.fail_msg
9874 # Assume the primary node is unreachable and go ahead
9875 self.warn.append("Can't get info from primary node %s: %s" %
9877 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9878 self.warn.append("Node data from primary node %s doesn't contain"
9879 " free memory information" % pnode)
9880 elif instance_info.fail_msg:
9881 self.warn.append("Can't get instance runtime information: %s" %
9882 instance_info.fail_msg)
9884 if instance_info.payload:
9885 current_mem = int(instance_info.payload['memory'])
9887 # Assume instance not running
9888 # (there is a slight race condition here, but it's not very probable,
9889 # and we have no other way to check)
9891 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9892 pninfo.payload['memory_free'])
9894 raise errors.OpPrereqError("This change will prevent the instance"
9895 " from starting, due to %d MB of memory"
9896 " missing on its primary node" % miss_mem,
9899 if be_new[constants.BE_AUTO_BALANCE]:
9900 for node, nres in nodeinfo.items():
9901 if node not in instance.secondary_nodes:
9905 self.warn.append("Can't get info from secondary node %s: %s" %
9907 elif not isinstance(nres.payload.get('memory_free', None), int):
9908 self.warn.append("Secondary node %s didn't return free"
9909 " memory information" % node)
9910 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9911 self.warn.append("Not enough memory to failover instance to"
9912 " secondary node %s" % node)
9917 for nic_op, nic_dict in self.op.nics:
9918 if nic_op == constants.DDM_REMOVE:
9919 if not instance.nics:
9920 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9923 if nic_op != constants.DDM_ADD:
9925 if not instance.nics:
9926 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9927 " no NICs" % nic_op,
9929 if nic_op < 0 or nic_op >= len(instance.nics):
9930 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9932 (nic_op, len(instance.nics) - 1),
9934 old_nic_params = instance.nics[nic_op].nicparams
9935 old_nic_ip = instance.nics[nic_op].ip
9940 update_params_dict = dict([(key, nic_dict[key])
9941 for key in constants.NICS_PARAMETERS
9942 if key in nic_dict])
9944 if 'bridge' in nic_dict:
9945 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9947 new_nic_params = _GetUpdatedParams(old_nic_params,
9949 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9950 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9951 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9952 self.nic_pinst[nic_op] = new_nic_params
9953 self.nic_pnew[nic_op] = new_filled_nic_params
9954 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9956 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9957 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9958 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9960 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9962 self.warn.append(msg)
9964 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9965 if new_nic_mode == constants.NIC_MODE_ROUTED:
9966 if constants.INIC_IP in nic_dict:
9967 nic_ip = nic_dict[constants.INIC_IP]
9971 raise errors.OpPrereqError('Cannot set the nic ip to None'
9972 ' on a routed nic', errors.ECODE_INVAL)
9973 if constants.INIC_MAC in nic_dict:
9974 nic_mac = nic_dict[constants.INIC_MAC]
9976 raise errors.OpPrereqError('Cannot set the nic mac to None',
9978 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9979 # otherwise generate the mac
9980 nic_dict[constants.INIC_MAC] = \
9981 self.cfg.GenerateMAC(self.proc.GetECId())
9983 # or validate/reserve the current one
9985 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9986 except errors.ReservationError:
9987 raise errors.OpPrereqError("MAC address %s already in use"
9988 " in cluster" % nic_mac,
9989 errors.ECODE_NOTUNIQUE)
9992 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9993 raise errors.OpPrereqError("Disk operations not supported for"
9994 " diskless instances",
9996 for disk_op, _ in self.op.disks:
9997 if disk_op == constants.DDM_REMOVE:
9998 if len(instance.disks) == 1:
9999 raise errors.OpPrereqError("Cannot remove the last disk of"
10000 " an instance", errors.ECODE_INVAL)
10001 _CheckInstanceDown(self, instance, "cannot remove disks")
10003 if (disk_op == constants.DDM_ADD and
10004 len(instance.disks) >= constants.MAX_DISKS):
10005 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10006 " add more" % constants.MAX_DISKS,
10007 errors.ECODE_STATE)
10008 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10010 if disk_op < 0 or disk_op >= len(instance.disks):
10011 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10013 (disk_op, len(instance.disks)),
10014 errors.ECODE_INVAL)
10018 def _ConvertPlainToDrbd(self, feedback_fn):
10019 """Converts an instance from plain to drbd.
10022 feedback_fn("Converting template to drbd")
10023 instance = self.instance
10024 pnode = instance.primary_node
10025 snode = self.op.remote_node
10027 # create a fake disk info for _GenerateDiskTemplate
10028 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10029 for d in instance.disks]
10030 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10031 instance.name, pnode, [snode],
10032 disk_info, None, None, 0, feedback_fn)
10033 info = _GetInstanceInfoText(instance)
10034 feedback_fn("Creating aditional volumes...")
10035 # first, create the missing data and meta devices
10036 for disk in new_disks:
10037 # unfortunately this is... not too nice
10038 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10040 for child in disk.children:
10041 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10042 # at this stage, all new LVs have been created, we can rename the
10044 feedback_fn("Renaming original volumes...")
10045 rename_list = [(o, n.children[0].logical_id)
10046 for (o, n) in zip(instance.disks, new_disks)]
10047 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10048 result.Raise("Failed to rename original LVs")
10050 feedback_fn("Initializing DRBD devices...")
10051 # all child devices are in place, we can now create the DRBD devices
10052 for disk in new_disks:
10053 for node in [pnode, snode]:
10054 f_create = node == pnode
10055 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10057 # at this point, the instance has been modified
10058 instance.disk_template = constants.DT_DRBD8
10059 instance.disks = new_disks
10060 self.cfg.Update(instance, feedback_fn)
10062 # disks are created, waiting for sync
10063 disk_abort = not _WaitForSync(self, instance)
10065 raise errors.OpExecError("There are some degraded disks for"
10066 " this instance, please cleanup manually")
10068 def _ConvertDrbdToPlain(self, feedback_fn):
10069 """Converts an instance from drbd to plain.
10072 instance = self.instance
10073 assert len(instance.secondary_nodes) == 1
10074 pnode = instance.primary_node
10075 snode = instance.secondary_nodes[0]
10076 feedback_fn("Converting template to plain")
10078 old_disks = instance.disks
10079 new_disks = [d.children[0] for d in old_disks]
10081 # copy over size and mode
10082 for parent, child in zip(old_disks, new_disks):
10083 child.size = parent.size
10084 child.mode = parent.mode
10086 # update instance structure
10087 instance.disks = new_disks
10088 instance.disk_template = constants.DT_PLAIN
10089 self.cfg.Update(instance, feedback_fn)
10091 feedback_fn("Removing volumes on the secondary node...")
10092 for disk in old_disks:
10093 self.cfg.SetDiskID(disk, snode)
10094 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10096 self.LogWarning("Could not remove block device %s on node %s,"
10097 " continuing anyway: %s", disk.iv_name, snode, msg)
10099 feedback_fn("Removing unneeded volumes on the primary node...")
10100 for idx, disk in enumerate(old_disks):
10101 meta = disk.children[1]
10102 self.cfg.SetDiskID(meta, pnode)
10103 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10105 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10106 " continuing anyway: %s", idx, pnode, msg)
10108 def Exec(self, feedback_fn):
10109 """Modifies an instance.
10111 All parameters take effect only at the next restart of the instance.
10114 # Process here the warnings from CheckPrereq, as we don't have a
10115 # feedback_fn there.
10116 for warn in self.warn:
10117 feedback_fn("WARNING: %s" % warn)
10120 instance = self.instance
10122 for disk_op, disk_dict in self.op.disks:
10123 if disk_op == constants.DDM_REMOVE:
10124 # remove the last disk
10125 device = instance.disks.pop()
10126 device_idx = len(instance.disks)
10127 for node, disk in device.ComputeNodeTree(instance.primary_node):
10128 self.cfg.SetDiskID(disk, node)
10129 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10131 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10132 " continuing anyway", device_idx, node, msg)
10133 result.append(("disk/%d" % device_idx, "remove"))
10134 elif disk_op == constants.DDM_ADD:
10136 if instance.disk_template in (constants.DT_FILE,
10137 constants.DT_SHARED_FILE):
10138 file_driver, file_path = instance.disks[0].logical_id
10139 file_path = os.path.dirname(file_path)
10141 file_driver = file_path = None
10142 disk_idx_base = len(instance.disks)
10143 new_disk = _GenerateDiskTemplate(self,
10144 instance.disk_template,
10145 instance.name, instance.primary_node,
10146 instance.secondary_nodes,
10150 disk_idx_base, feedback_fn)[0]
10151 instance.disks.append(new_disk)
10152 info = _GetInstanceInfoText(instance)
10154 logging.info("Creating volume %s for instance %s",
10155 new_disk.iv_name, instance.name)
10156 # Note: this needs to be kept in sync with _CreateDisks
10158 for node in instance.all_nodes:
10159 f_create = node == instance.primary_node
10161 _CreateBlockDev(self, node, instance, new_disk,
10162 f_create, info, f_create)
10163 except errors.OpExecError, err:
10164 self.LogWarning("Failed to create volume %s (%s) on"
10166 new_disk.iv_name, new_disk, node, err)
10167 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10168 (new_disk.size, new_disk.mode)))
10170 # change a given disk
10171 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10172 result.append(("disk.mode/%d" % disk_op,
10173 disk_dict[constants.IDISK_MODE]))
10175 if self.op.disk_template:
10176 r_shut = _ShutdownInstanceDisks(self, instance)
10178 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10179 " proceed with disk template conversion")
10180 mode = (instance.disk_template, self.op.disk_template)
10182 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10184 self.cfg.ReleaseDRBDMinors(instance.name)
10186 result.append(("disk_template", self.op.disk_template))
10189 for nic_op, nic_dict in self.op.nics:
10190 if nic_op == constants.DDM_REMOVE:
10191 # remove the last nic
10192 del instance.nics[-1]
10193 result.append(("nic.%d" % len(instance.nics), "remove"))
10194 elif nic_op == constants.DDM_ADD:
10195 # mac and bridge should be set, by now
10196 mac = nic_dict[constants.INIC_MAC]
10197 ip = nic_dict.get(constants.INIC_IP, None)
10198 nicparams = self.nic_pinst[constants.DDM_ADD]
10199 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10200 instance.nics.append(new_nic)
10201 result.append(("nic.%d" % (len(instance.nics) - 1),
10202 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10203 (new_nic.mac, new_nic.ip,
10204 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10205 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10208 for key in (constants.INIC_MAC, constants.INIC_IP):
10209 if key in nic_dict:
10210 setattr(instance.nics[nic_op], key, nic_dict[key])
10211 if nic_op in self.nic_pinst:
10212 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10213 for key, val in nic_dict.iteritems():
10214 result.append(("nic.%s/%d" % (key, nic_op), val))
10217 if self.op.hvparams:
10218 instance.hvparams = self.hv_inst
10219 for key, val in self.op.hvparams.iteritems():
10220 result.append(("hv/%s" % key, val))
10223 if self.op.beparams:
10224 instance.beparams = self.be_inst
10225 for key, val in self.op.beparams.iteritems():
10226 result.append(("be/%s" % key, val))
10229 if self.op.os_name:
10230 instance.os = self.op.os_name
10233 if self.op.osparams:
10234 instance.osparams = self.os_inst
10235 for key, val in self.op.osparams.iteritems():
10236 result.append(("os/%s" % key, val))
10238 self.cfg.Update(instance, feedback_fn)
10242 _DISK_CONVERSIONS = {
10243 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10244 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10248 class LUBackupQuery(NoHooksLU):
10249 """Query the exports list
10254 def ExpandNames(self):
10255 self.needed_locks = {}
10256 self.share_locks[locking.LEVEL_NODE] = 1
10257 if not self.op.nodes:
10258 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10260 self.needed_locks[locking.LEVEL_NODE] = \
10261 _GetWantedNodes(self, self.op.nodes)
10263 def Exec(self, feedback_fn):
10264 """Compute the list of all the exported system images.
10267 @return: a dictionary with the structure node->(export-list)
10268 where export-list is a list of the instances exported on
10272 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10273 rpcresult = self.rpc.call_export_list(self.nodes)
10275 for node in rpcresult:
10276 if rpcresult[node].fail_msg:
10277 result[node] = False
10279 result[node] = rpcresult[node].payload
10284 class LUBackupPrepare(NoHooksLU):
10285 """Prepares an instance for an export and returns useful information.
10290 def ExpandNames(self):
10291 self._ExpandAndLockInstance()
10293 def CheckPrereq(self):
10294 """Check prerequisites.
10297 instance_name = self.op.instance_name
10299 self.instance = self.cfg.GetInstanceInfo(instance_name)
10300 assert self.instance is not None, \
10301 "Cannot retrieve locked instance %s" % self.op.instance_name
10302 _CheckNodeOnline(self, self.instance.primary_node)
10304 self._cds = _GetClusterDomainSecret()
10306 def Exec(self, feedback_fn):
10307 """Prepares an instance for an export.
10310 instance = self.instance
10312 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10313 salt = utils.GenerateSecret(8)
10315 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10316 result = self.rpc.call_x509_cert_create(instance.primary_node,
10317 constants.RIE_CERT_VALIDITY)
10318 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10320 (name, cert_pem) = result.payload
10322 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10326 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10327 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10329 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10335 class LUBackupExport(LogicalUnit):
10336 """Export an instance to an image in the cluster.
10339 HPATH = "instance-export"
10340 HTYPE = constants.HTYPE_INSTANCE
10343 def CheckArguments(self):
10344 """Check the arguments.
10347 self.x509_key_name = self.op.x509_key_name
10348 self.dest_x509_ca_pem = self.op.destination_x509_ca
10350 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10351 if not self.x509_key_name:
10352 raise errors.OpPrereqError("Missing X509 key name for encryption",
10353 errors.ECODE_INVAL)
10355 if not self.dest_x509_ca_pem:
10356 raise errors.OpPrereqError("Missing destination X509 CA",
10357 errors.ECODE_INVAL)
10359 def ExpandNames(self):
10360 self._ExpandAndLockInstance()
10362 # Lock all nodes for local exports
10363 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10364 # FIXME: lock only instance primary and destination node
10366 # Sad but true, for now we have do lock all nodes, as we don't know where
10367 # the previous export might be, and in this LU we search for it and
10368 # remove it from its current node. In the future we could fix this by:
10369 # - making a tasklet to search (share-lock all), then create the
10370 # new one, then one to remove, after
10371 # - removing the removal operation altogether
10372 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10374 def DeclareLocks(self, level):
10375 """Last minute lock declaration."""
10376 # All nodes are locked anyway, so nothing to do here.
10378 def BuildHooksEnv(self):
10379 """Build hooks env.
10381 This will run on the master, primary node and target node.
10385 "EXPORT_MODE": self.op.mode,
10386 "EXPORT_NODE": self.op.target_node,
10387 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10388 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10389 # TODO: Generic function for boolean env variables
10390 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10393 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10397 def BuildHooksNodes(self):
10398 """Build hooks nodes.
10401 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10403 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10404 nl.append(self.op.target_node)
10408 def CheckPrereq(self):
10409 """Check prerequisites.
10411 This checks that the instance and node names are valid.
10414 instance_name = self.op.instance_name
10416 self.instance = self.cfg.GetInstanceInfo(instance_name)
10417 assert self.instance is not None, \
10418 "Cannot retrieve locked instance %s" % self.op.instance_name
10419 _CheckNodeOnline(self, self.instance.primary_node)
10421 if (self.op.remove_instance and self.instance.admin_up and
10422 not self.op.shutdown):
10423 raise errors.OpPrereqError("Can not remove instance without shutting it"
10426 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10427 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10428 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10429 assert self.dst_node is not None
10431 _CheckNodeOnline(self, self.dst_node.name)
10432 _CheckNodeNotDrained(self, self.dst_node.name)
10435 self.dest_disk_info = None
10436 self.dest_x509_ca = None
10438 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10439 self.dst_node = None
10441 if len(self.op.target_node) != len(self.instance.disks):
10442 raise errors.OpPrereqError(("Received destination information for %s"
10443 " disks, but instance %s has %s disks") %
10444 (len(self.op.target_node), instance_name,
10445 len(self.instance.disks)),
10446 errors.ECODE_INVAL)
10448 cds = _GetClusterDomainSecret()
10450 # Check X509 key name
10452 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10453 except (TypeError, ValueError), err:
10454 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10456 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10457 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10458 errors.ECODE_INVAL)
10460 # Load and verify CA
10462 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10463 except OpenSSL.crypto.Error, err:
10464 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10465 (err, ), errors.ECODE_INVAL)
10467 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10468 if errcode is not None:
10469 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10470 (msg, ), errors.ECODE_INVAL)
10472 self.dest_x509_ca = cert
10474 # Verify target information
10476 for idx, disk_data in enumerate(self.op.target_node):
10478 (host, port, magic) = \
10479 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10480 except errors.GenericError, err:
10481 raise errors.OpPrereqError("Target info for disk %s: %s" %
10482 (idx, err), errors.ECODE_INVAL)
10484 disk_info.append((host, port, magic))
10486 assert len(disk_info) == len(self.op.target_node)
10487 self.dest_disk_info = disk_info
10490 raise errors.ProgrammerError("Unhandled export mode %r" %
10493 # instance disk type verification
10494 # TODO: Implement export support for file-based disks
10495 for disk in self.instance.disks:
10496 if disk.dev_type == constants.LD_FILE:
10497 raise errors.OpPrereqError("Export not supported for instances with"
10498 " file-based disks", errors.ECODE_INVAL)
10500 def _CleanupExports(self, feedback_fn):
10501 """Removes exports of current instance from all other nodes.
10503 If an instance in a cluster with nodes A..D was exported to node C, its
10504 exports will be removed from the nodes A, B and D.
10507 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10509 nodelist = self.cfg.GetNodeList()
10510 nodelist.remove(self.dst_node.name)
10512 # on one-node clusters nodelist will be empty after the removal
10513 # if we proceed the backup would be removed because OpBackupQuery
10514 # substitutes an empty list with the full cluster node list.
10515 iname = self.instance.name
10517 feedback_fn("Removing old exports for instance %s" % iname)
10518 exportlist = self.rpc.call_export_list(nodelist)
10519 for node in exportlist:
10520 if exportlist[node].fail_msg:
10522 if iname in exportlist[node].payload:
10523 msg = self.rpc.call_export_remove(node, iname).fail_msg
10525 self.LogWarning("Could not remove older export for instance %s"
10526 " on node %s: %s", iname, node, msg)
10528 def Exec(self, feedback_fn):
10529 """Export an instance to an image in the cluster.
10532 assert self.op.mode in constants.EXPORT_MODES
10534 instance = self.instance
10535 src_node = instance.primary_node
10537 if self.op.shutdown:
10538 # shutdown the instance, but not the disks
10539 feedback_fn("Shutting down instance %s" % instance.name)
10540 result = self.rpc.call_instance_shutdown(src_node, instance,
10541 self.op.shutdown_timeout)
10542 # TODO: Maybe ignore failures if ignore_remove_failures is set
10543 result.Raise("Could not shutdown instance %s on"
10544 " node %s" % (instance.name, src_node))
10546 # set the disks ID correctly since call_instance_start needs the
10547 # correct drbd minor to create the symlinks
10548 for disk in instance.disks:
10549 self.cfg.SetDiskID(disk, src_node)
10551 activate_disks = (not instance.admin_up)
10554 # Activate the instance disks if we'exporting a stopped instance
10555 feedback_fn("Activating disks for %s" % instance.name)
10556 _StartInstanceDisks(self, instance, None)
10559 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10562 helper.CreateSnapshots()
10564 if (self.op.shutdown and instance.admin_up and
10565 not self.op.remove_instance):
10566 assert not activate_disks
10567 feedback_fn("Starting instance %s" % instance.name)
10568 result = self.rpc.call_instance_start(src_node, instance, None, None)
10569 msg = result.fail_msg
10571 feedback_fn("Failed to start instance: %s" % msg)
10572 _ShutdownInstanceDisks(self, instance)
10573 raise errors.OpExecError("Could not start instance: %s" % msg)
10575 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10576 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10577 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10578 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10579 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10581 (key_name, _, _) = self.x509_key_name
10584 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10587 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10588 key_name, dest_ca_pem,
10593 # Check for backwards compatibility
10594 assert len(dresults) == len(instance.disks)
10595 assert compat.all(isinstance(i, bool) for i in dresults), \
10596 "Not all results are boolean: %r" % dresults
10600 feedback_fn("Deactivating disks for %s" % instance.name)
10601 _ShutdownInstanceDisks(self, instance)
10603 if not (compat.all(dresults) and fin_resu):
10606 failures.append("export finalization")
10607 if not compat.all(dresults):
10608 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10610 failures.append("disk export: disk(s) %s" % fdsk)
10612 raise errors.OpExecError("Export failed, errors in %s" %
10613 utils.CommaJoin(failures))
10615 # At this point, the export was successful, we can cleanup/finish
10617 # Remove instance if requested
10618 if self.op.remove_instance:
10619 feedback_fn("Removing instance %s" % instance.name)
10620 _RemoveInstance(self, feedback_fn, instance,
10621 self.op.ignore_remove_failures)
10623 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10624 self._CleanupExports(feedback_fn)
10626 return fin_resu, dresults
10629 class LUBackupRemove(NoHooksLU):
10630 """Remove exports related to the named instance.
10635 def ExpandNames(self):
10636 self.needed_locks = {}
10637 # We need all nodes to be locked in order for RemoveExport to work, but we
10638 # don't need to lock the instance itself, as nothing will happen to it (and
10639 # we can remove exports also for a removed instance)
10640 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10642 def Exec(self, feedback_fn):
10643 """Remove any export.
10646 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10647 # If the instance was not found we'll try with the name that was passed in.
10648 # This will only work if it was an FQDN, though.
10650 if not instance_name:
10652 instance_name = self.op.instance_name
10654 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10655 exportlist = self.rpc.call_export_list(locked_nodes)
10657 for node in exportlist:
10658 msg = exportlist[node].fail_msg
10660 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10662 if instance_name in exportlist[node].payload:
10664 result = self.rpc.call_export_remove(node, instance_name)
10665 msg = result.fail_msg
10667 logging.error("Could not remove export for instance %s"
10668 " on node %s: %s", instance_name, node, msg)
10670 if fqdn_warn and not found:
10671 feedback_fn("Export not found. If trying to remove an export belonging"
10672 " to a deleted instance please use its Fully Qualified"
10676 class LUGroupAdd(LogicalUnit):
10677 """Logical unit for creating node groups.
10680 HPATH = "group-add"
10681 HTYPE = constants.HTYPE_GROUP
10684 def ExpandNames(self):
10685 # We need the new group's UUID here so that we can create and acquire the
10686 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10687 # that it should not check whether the UUID exists in the configuration.
10688 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10689 self.needed_locks = {}
10690 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10692 def CheckPrereq(self):
10693 """Check prerequisites.
10695 This checks that the given group name is not an existing node group
10700 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10701 except errors.OpPrereqError:
10704 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10705 " node group (UUID: %s)" %
10706 (self.op.group_name, existing_uuid),
10707 errors.ECODE_EXISTS)
10709 if self.op.ndparams:
10710 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10712 def BuildHooksEnv(self):
10713 """Build hooks env.
10717 "GROUP_NAME": self.op.group_name,
10720 def BuildHooksNodes(self):
10721 """Build hooks nodes.
10724 mn = self.cfg.GetMasterNode()
10725 return ([mn], [mn])
10727 def Exec(self, feedback_fn):
10728 """Add the node group to the cluster.
10731 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10732 uuid=self.group_uuid,
10733 alloc_policy=self.op.alloc_policy,
10734 ndparams=self.op.ndparams)
10736 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10737 del self.remove_locks[locking.LEVEL_NODEGROUP]
10740 class LUGroupAssignNodes(NoHooksLU):
10741 """Logical unit for assigning nodes to groups.
10746 def ExpandNames(self):
10747 # These raise errors.OpPrereqError on their own:
10748 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10749 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10751 # We want to lock all the affected nodes and groups. We have readily
10752 # available the list of nodes, and the *destination* group. To gather the
10753 # list of "source" groups, we need to fetch node information.
10754 self.node_data = self.cfg.GetAllNodesInfo()
10755 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10756 affected_groups.add(self.group_uuid)
10758 self.needed_locks = {
10759 locking.LEVEL_NODEGROUP: list(affected_groups),
10760 locking.LEVEL_NODE: self.op.nodes,
10763 def CheckPrereq(self):
10764 """Check prerequisites.
10767 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10768 instance_data = self.cfg.GetAllInstancesInfo()
10770 if self.group is None:
10771 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10772 (self.op.group_name, self.group_uuid))
10774 (new_splits, previous_splits) = \
10775 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10776 for node in self.op.nodes],
10777 self.node_data, instance_data)
10780 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10782 if not self.op.force:
10783 raise errors.OpExecError("The following instances get split by this"
10784 " change and --force was not given: %s" %
10787 self.LogWarning("This operation will split the following instances: %s",
10790 if previous_splits:
10791 self.LogWarning("In addition, these already-split instances continue"
10792 " to be spit across groups: %s",
10793 utils.CommaJoin(utils.NiceSort(previous_splits)))
10795 def Exec(self, feedback_fn):
10796 """Assign nodes to a new group.
10799 for node in self.op.nodes:
10800 self.node_data[node].group = self.group_uuid
10802 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10805 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10806 """Check for split instances after a node assignment.
10808 This method considers a series of node assignments as an atomic operation,
10809 and returns information about split instances after applying the set of
10812 In particular, it returns information about newly split instances, and
10813 instances that were already split, and remain so after the change.
10815 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10818 @type changes: list of (node_name, new_group_uuid) pairs.
10819 @param changes: list of node assignments to consider.
10820 @param node_data: a dict with data for all nodes
10821 @param instance_data: a dict with all instances to consider
10822 @rtype: a two-tuple
10823 @return: a list of instances that were previously okay and result split as a
10824 consequence of this change, and a list of instances that were previously
10825 split and this change does not fix.
10828 changed_nodes = dict((node, group) for node, group in changes
10829 if node_data[node].group != group)
10831 all_split_instances = set()
10832 previously_split_instances = set()
10834 def InstanceNodes(instance):
10835 return [instance.primary_node] + list(instance.secondary_nodes)
10837 for inst in instance_data.values():
10838 if inst.disk_template not in constants.DTS_INT_MIRROR:
10841 instance_nodes = InstanceNodes(inst)
10843 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10844 previously_split_instances.add(inst.name)
10846 if len(set(changed_nodes.get(node, node_data[node].group)
10847 for node in instance_nodes)) > 1:
10848 all_split_instances.add(inst.name)
10850 return (list(all_split_instances - previously_split_instances),
10851 list(previously_split_instances & all_split_instances))
10854 class _GroupQuery(_QueryBase):
10855 FIELDS = query.GROUP_FIELDS
10857 def ExpandNames(self, lu):
10858 lu.needed_locks = {}
10860 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10861 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10864 self.wanted = [name_to_uuid[name]
10865 for name in utils.NiceSort(name_to_uuid.keys())]
10867 # Accept names to be either names or UUIDs.
10870 all_uuid = frozenset(self._all_groups.keys())
10872 for name in self.names:
10873 if name in all_uuid:
10874 self.wanted.append(name)
10875 elif name in name_to_uuid:
10876 self.wanted.append(name_to_uuid[name])
10878 missing.append(name)
10881 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10882 errors.ECODE_NOENT)
10884 def DeclareLocks(self, lu, level):
10887 def _GetQueryData(self, lu):
10888 """Computes the list of node groups and their attributes.
10891 do_nodes = query.GQ_NODE in self.requested_data
10892 do_instances = query.GQ_INST in self.requested_data
10894 group_to_nodes = None
10895 group_to_instances = None
10897 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10898 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10899 # latter GetAllInstancesInfo() is not enough, for we have to go through
10900 # instance->node. Hence, we will need to process nodes even if we only need
10901 # instance information.
10902 if do_nodes or do_instances:
10903 all_nodes = lu.cfg.GetAllNodesInfo()
10904 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10907 for node in all_nodes.values():
10908 if node.group in group_to_nodes:
10909 group_to_nodes[node.group].append(node.name)
10910 node_to_group[node.name] = node.group
10913 all_instances = lu.cfg.GetAllInstancesInfo()
10914 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10916 for instance in all_instances.values():
10917 node = instance.primary_node
10918 if node in node_to_group:
10919 group_to_instances[node_to_group[node]].append(instance.name)
10922 # Do not pass on node information if it was not requested.
10923 group_to_nodes = None
10925 return query.GroupQueryData([self._all_groups[uuid]
10926 for uuid in self.wanted],
10927 group_to_nodes, group_to_instances)
10930 class LUGroupQuery(NoHooksLU):
10931 """Logical unit for querying node groups.
10936 def CheckArguments(self):
10937 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10938 self.op.output_fields, False)
10940 def ExpandNames(self):
10941 self.gq.ExpandNames(self)
10943 def Exec(self, feedback_fn):
10944 return self.gq.OldStyleQuery(self)
10947 class LUGroupSetParams(LogicalUnit):
10948 """Modifies the parameters of a node group.
10951 HPATH = "group-modify"
10952 HTYPE = constants.HTYPE_GROUP
10955 def CheckArguments(self):
10958 self.op.alloc_policy,
10961 if all_changes.count(None) == len(all_changes):
10962 raise errors.OpPrereqError("Please pass at least one modification",
10963 errors.ECODE_INVAL)
10965 def ExpandNames(self):
10966 # This raises errors.OpPrereqError on its own:
10967 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10969 self.needed_locks = {
10970 locking.LEVEL_NODEGROUP: [self.group_uuid],
10973 def CheckPrereq(self):
10974 """Check prerequisites.
10977 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10979 if self.group is None:
10980 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10981 (self.op.group_name, self.group_uuid))
10983 if self.op.ndparams:
10984 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10985 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10986 self.new_ndparams = new_ndparams
10988 def BuildHooksEnv(self):
10989 """Build hooks env.
10993 "GROUP_NAME": self.op.group_name,
10994 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10997 def BuildHooksNodes(self):
10998 """Build hooks nodes.
11001 mn = self.cfg.GetMasterNode()
11002 return ([mn], [mn])
11004 def Exec(self, feedback_fn):
11005 """Modifies the node group.
11010 if self.op.ndparams:
11011 self.group.ndparams = self.new_ndparams
11012 result.append(("ndparams", str(self.group.ndparams)))
11014 if self.op.alloc_policy:
11015 self.group.alloc_policy = self.op.alloc_policy
11017 self.cfg.Update(self.group, feedback_fn)
11022 class LUGroupRemove(LogicalUnit):
11023 HPATH = "group-remove"
11024 HTYPE = constants.HTYPE_GROUP
11027 def ExpandNames(self):
11028 # This will raises errors.OpPrereqError on its own:
11029 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11030 self.needed_locks = {
11031 locking.LEVEL_NODEGROUP: [self.group_uuid],
11034 def CheckPrereq(self):
11035 """Check prerequisites.
11037 This checks that the given group name exists as a node group, that is
11038 empty (i.e., contains no nodes), and that is not the last group of the
11042 # Verify that the group is empty.
11043 group_nodes = [node.name
11044 for node in self.cfg.GetAllNodesInfo().values()
11045 if node.group == self.group_uuid]
11048 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11050 (self.op.group_name,
11051 utils.CommaJoin(utils.NiceSort(group_nodes))),
11052 errors.ECODE_STATE)
11054 # Verify the cluster would not be left group-less.
11055 if len(self.cfg.GetNodeGroupList()) == 1:
11056 raise errors.OpPrereqError("Group '%s' is the only group,"
11057 " cannot be removed" %
11058 self.op.group_name,
11059 errors.ECODE_STATE)
11061 def BuildHooksEnv(self):
11062 """Build hooks env.
11066 "GROUP_NAME": self.op.group_name,
11069 def BuildHooksNodes(self):
11070 """Build hooks nodes.
11073 mn = self.cfg.GetMasterNode()
11074 return ([mn], [mn])
11076 def Exec(self, feedback_fn):
11077 """Remove the node group.
11081 self.cfg.RemoveNodeGroup(self.group_uuid)
11082 except errors.ConfigurationError:
11083 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11084 (self.op.group_name, self.group_uuid))
11086 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11089 class LUGroupRename(LogicalUnit):
11090 HPATH = "group-rename"
11091 HTYPE = constants.HTYPE_GROUP
11094 def ExpandNames(self):
11095 # This raises errors.OpPrereqError on its own:
11096 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11098 self.needed_locks = {
11099 locking.LEVEL_NODEGROUP: [self.group_uuid],
11102 def CheckPrereq(self):
11103 """Check prerequisites.
11105 Ensures requested new name is not yet used.
11109 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11110 except errors.OpPrereqError:
11113 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11114 " node group (UUID: %s)" %
11115 (self.op.new_name, new_name_uuid),
11116 errors.ECODE_EXISTS)
11118 def BuildHooksEnv(self):
11119 """Build hooks env.
11123 "OLD_NAME": self.op.group_name,
11124 "NEW_NAME": self.op.new_name,
11127 def BuildHooksNodes(self):
11128 """Build hooks nodes.
11131 mn = self.cfg.GetMasterNode()
11133 all_nodes = self.cfg.GetAllNodesInfo()
11134 all_nodes.pop(mn, None)
11137 run_nodes.extend(node.name for node in all_nodes.values()
11138 if node.group == self.group_uuid)
11140 return (run_nodes, run_nodes)
11142 def Exec(self, feedback_fn):
11143 """Rename the node group.
11146 group = self.cfg.GetNodeGroup(self.group_uuid)
11149 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11150 (self.op.group_name, self.group_uuid))
11152 group.name = self.op.new_name
11153 self.cfg.Update(group, feedback_fn)
11155 return self.op.new_name
11158 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11159 """Generic tags LU.
11161 This is an abstract class which is the parent of all the other tags LUs.
11164 def ExpandNames(self):
11165 self.group_uuid = None
11166 self.needed_locks = {}
11167 if self.op.kind == constants.TAG_NODE:
11168 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11169 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11170 elif self.op.kind == constants.TAG_INSTANCE:
11171 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11172 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11173 elif self.op.kind == constants.TAG_NODEGROUP:
11174 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11176 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11177 # not possible to acquire the BGL based on opcode parameters)
11179 def CheckPrereq(self):
11180 """Check prerequisites.
11183 if self.op.kind == constants.TAG_CLUSTER:
11184 self.target = self.cfg.GetClusterInfo()
11185 elif self.op.kind == constants.TAG_NODE:
11186 self.target = self.cfg.GetNodeInfo(self.op.name)
11187 elif self.op.kind == constants.TAG_INSTANCE:
11188 self.target = self.cfg.GetInstanceInfo(self.op.name)
11189 elif self.op.kind == constants.TAG_NODEGROUP:
11190 self.target = self.cfg.GetNodeGroup(self.group_uuid)
11192 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11193 str(self.op.kind), errors.ECODE_INVAL)
11196 class LUTagsGet(TagsLU):
11197 """Returns the tags of a given object.
11202 def ExpandNames(self):
11203 TagsLU.ExpandNames(self)
11205 # Share locks as this is only a read operation
11206 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11208 def Exec(self, feedback_fn):
11209 """Returns the tag list.
11212 return list(self.target.GetTags())
11215 class LUTagsSearch(NoHooksLU):
11216 """Searches the tags for a given pattern.
11221 def ExpandNames(self):
11222 self.needed_locks = {}
11224 def CheckPrereq(self):
11225 """Check prerequisites.
11227 This checks the pattern passed for validity by compiling it.
11231 self.re = re.compile(self.op.pattern)
11232 except re.error, err:
11233 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11234 (self.op.pattern, err), errors.ECODE_INVAL)
11236 def Exec(self, feedback_fn):
11237 """Returns the tag list.
11241 tgts = [("/cluster", cfg.GetClusterInfo())]
11242 ilist = cfg.GetAllInstancesInfo().values()
11243 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11244 nlist = cfg.GetAllNodesInfo().values()
11245 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11246 tgts.extend(("/nodegroup/%s" % n.name, n)
11247 for n in cfg.GetAllNodeGroupsInfo().values())
11249 for path, target in tgts:
11250 for tag in target.GetTags():
11251 if self.re.search(tag):
11252 results.append((path, tag))
11256 class LUTagsSet(TagsLU):
11257 """Sets a tag on a given object.
11262 def CheckPrereq(self):
11263 """Check prerequisites.
11265 This checks the type and length of the tag name and value.
11268 TagsLU.CheckPrereq(self)
11269 for tag in self.op.tags:
11270 objects.TaggableObject.ValidateTag(tag)
11272 def Exec(self, feedback_fn):
11277 for tag in self.op.tags:
11278 self.target.AddTag(tag)
11279 except errors.TagError, err:
11280 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11281 self.cfg.Update(self.target, feedback_fn)
11284 class LUTagsDel(TagsLU):
11285 """Delete a list of tags from a given object.
11290 def CheckPrereq(self):
11291 """Check prerequisites.
11293 This checks that we have the given tag.
11296 TagsLU.CheckPrereq(self)
11297 for tag in self.op.tags:
11298 objects.TaggableObject.ValidateTag(tag)
11299 del_tags = frozenset(self.op.tags)
11300 cur_tags = self.target.GetTags()
11302 diff_tags = del_tags - cur_tags
11304 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11305 raise errors.OpPrereqError("Tag(s) %s not found" %
11306 (utils.CommaJoin(diff_names), ),
11307 errors.ECODE_NOENT)
11309 def Exec(self, feedback_fn):
11310 """Remove the tag from the object.
11313 for tag in self.op.tags:
11314 self.target.RemoveTag(tag)
11315 self.cfg.Update(self.target, feedback_fn)
11318 class LUTestDelay(NoHooksLU):
11319 """Sleep for a specified amount of time.
11321 This LU sleeps on the master and/or nodes for a specified amount of
11327 def ExpandNames(self):
11328 """Expand names and set required locks.
11330 This expands the node list, if any.
11333 self.needed_locks = {}
11334 if self.op.on_nodes:
11335 # _GetWantedNodes can be used here, but is not always appropriate to use
11336 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11337 # more information.
11338 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11339 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11341 def _TestDelay(self):
11342 """Do the actual sleep.
11345 if self.op.on_master:
11346 if not utils.TestDelay(self.op.duration):
11347 raise errors.OpExecError("Error during master delay test")
11348 if self.op.on_nodes:
11349 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11350 for node, node_result in result.items():
11351 node_result.Raise("Failure during rpc call to node %s" % node)
11353 def Exec(self, feedback_fn):
11354 """Execute the test delay opcode, with the wanted repetitions.
11357 if self.op.repeat == 0:
11360 top_value = self.op.repeat - 1
11361 for i in range(self.op.repeat):
11362 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11366 class LUTestJqueue(NoHooksLU):
11367 """Utility LU to test some aspects of the job queue.
11372 # Must be lower than default timeout for WaitForJobChange to see whether it
11373 # notices changed jobs
11374 _CLIENT_CONNECT_TIMEOUT = 20.0
11375 _CLIENT_CONFIRM_TIMEOUT = 60.0
11378 def _NotifyUsingSocket(cls, cb, errcls):
11379 """Opens a Unix socket and waits for another program to connect.
11382 @param cb: Callback to send socket name to client
11383 @type errcls: class
11384 @param errcls: Exception class to use for errors
11387 # Using a temporary directory as there's no easy way to create temporary
11388 # sockets without writing a custom loop around tempfile.mktemp and
11390 tmpdir = tempfile.mkdtemp()
11392 tmpsock = utils.PathJoin(tmpdir, "sock")
11394 logging.debug("Creating temporary socket at %s", tmpsock)
11395 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11400 # Send details to client
11403 # Wait for client to connect before continuing
11404 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11406 (conn, _) = sock.accept()
11407 except socket.error, err:
11408 raise errcls("Client didn't connect in time (%s)" % err)
11412 # Remove as soon as client is connected
11413 shutil.rmtree(tmpdir)
11415 # Wait for client to close
11418 # pylint: disable-msg=E1101
11419 # Instance of '_socketobject' has no ... member
11420 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11422 except socket.error, err:
11423 raise errcls("Client failed to confirm notification (%s)" % err)
11427 def _SendNotification(self, test, arg, sockname):
11428 """Sends a notification to the client.
11431 @param test: Test name
11432 @param arg: Test argument (depends on test)
11433 @type sockname: string
11434 @param sockname: Socket path
11437 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11439 def _Notify(self, prereq, test, arg):
11440 """Notifies the client of a test.
11443 @param prereq: Whether this is a prereq-phase test
11445 @param test: Test name
11446 @param arg: Test argument (depends on test)
11450 errcls = errors.OpPrereqError
11452 errcls = errors.OpExecError
11454 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11458 def CheckArguments(self):
11459 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11460 self.expandnames_calls = 0
11462 def ExpandNames(self):
11463 checkargs_calls = getattr(self, "checkargs_calls", 0)
11464 if checkargs_calls < 1:
11465 raise errors.ProgrammerError("CheckArguments was not called")
11467 self.expandnames_calls += 1
11469 if self.op.notify_waitlock:
11470 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11472 self.LogInfo("Expanding names")
11474 # Get lock on master node (just to get a lock, not for a particular reason)
11475 self.needed_locks = {
11476 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11479 def Exec(self, feedback_fn):
11480 if self.expandnames_calls < 1:
11481 raise errors.ProgrammerError("ExpandNames was not called")
11483 if self.op.notify_exec:
11484 self._Notify(False, constants.JQT_EXEC, None)
11486 self.LogInfo("Executing")
11488 if self.op.log_messages:
11489 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11490 for idx, msg in enumerate(self.op.log_messages):
11491 self.LogInfo("Sending log message %s", idx + 1)
11492 feedback_fn(constants.JQT_MSGPREFIX + msg)
11493 # Report how many test messages have been sent
11494 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11497 raise errors.OpExecError("Opcode failure was requested")
11502 class IAllocator(object):
11503 """IAllocator framework.
11505 An IAllocator instance has three sets of attributes:
11506 - cfg that is needed to query the cluster
11507 - input data (all members of the _KEYS class attribute are required)
11508 - four buffer attributes (in|out_data|text), that represent the
11509 input (to the external script) in text and data structure format,
11510 and the output from it, again in two formats
11511 - the result variables from the script (success, info, nodes) for
11515 # pylint: disable-msg=R0902
11516 # lots of instance attributes
11518 "name", "mem_size", "disks", "disk_template",
11519 "os", "tags", "nics", "vcpus", "hypervisor",
11522 "name", "relocate_from",
11528 def __init__(self, cfg, rpc, mode, **kwargs):
11531 # init buffer variables
11532 self.in_text = self.out_text = self.in_data = self.out_data = None
11533 # init all input fields so that pylint is happy
11535 self.mem_size = self.disks = self.disk_template = None
11536 self.os = self.tags = self.nics = self.vcpus = None
11537 self.hypervisor = None
11538 self.relocate_from = None
11540 self.evac_nodes = None
11542 self.required_nodes = None
11543 # init result fields
11544 self.success = self.info = self.result = None
11545 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11546 keyset = self._ALLO_KEYS
11547 fn = self._AddNewInstance
11548 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11549 keyset = self._RELO_KEYS
11550 fn = self._AddRelocateInstance
11551 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11552 keyset = self._EVAC_KEYS
11553 fn = self._AddEvacuateNodes
11555 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11556 " IAllocator" % self.mode)
11558 if key not in keyset:
11559 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11560 " IAllocator" % key)
11561 setattr(self, key, kwargs[key])
11564 if key not in kwargs:
11565 raise errors.ProgrammerError("Missing input parameter '%s' to"
11566 " IAllocator" % key)
11567 self._BuildInputData(fn)
11569 def _ComputeClusterData(self):
11570 """Compute the generic allocator input data.
11572 This is the data that is independent of the actual operation.
11576 cluster_info = cfg.GetClusterInfo()
11579 "version": constants.IALLOCATOR_VERSION,
11580 "cluster_name": cfg.GetClusterName(),
11581 "cluster_tags": list(cluster_info.GetTags()),
11582 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11583 # we don't have job IDs
11585 ninfo = cfg.GetAllNodesInfo()
11586 iinfo = cfg.GetAllInstancesInfo().values()
11587 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11590 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11592 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11593 hypervisor_name = self.hypervisor
11594 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11595 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11596 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11597 hypervisor_name = cluster_info.enabled_hypervisors[0]
11599 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11602 self.rpc.call_all_instances_info(node_list,
11603 cluster_info.enabled_hypervisors)
11605 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11607 config_ndata = self._ComputeBasicNodeData(ninfo)
11608 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11609 i_list, config_ndata)
11610 assert len(data["nodes"]) == len(ninfo), \
11611 "Incomplete node data computed"
11613 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11615 self.in_data = data
11618 def _ComputeNodeGroupData(cfg):
11619 """Compute node groups data.
11623 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11625 "name": gdata.name,
11626 "alloc_policy": gdata.alloc_policy,
11631 def _ComputeBasicNodeData(node_cfg):
11632 """Compute global node data.
11635 @returns: a dict of name: (node dict, node config)
11639 for ninfo in node_cfg.values():
11640 # fill in static (config-based) values
11642 "tags": list(ninfo.GetTags()),
11643 "primary_ip": ninfo.primary_ip,
11644 "secondary_ip": ninfo.secondary_ip,
11645 "offline": ninfo.offline,
11646 "drained": ninfo.drained,
11647 "master_candidate": ninfo.master_candidate,
11648 "group": ninfo.group,
11649 "master_capable": ninfo.master_capable,
11650 "vm_capable": ninfo.vm_capable,
11653 node_results[ninfo.name] = pnr
11655 return node_results
11658 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11660 """Compute global node data.
11662 @param node_results: the basic node structures as filled from the config
11665 # make a copy of the current dict
11666 node_results = dict(node_results)
11667 for nname, nresult in node_data.items():
11668 assert nname in node_results, "Missing basic data for node %s" % nname
11669 ninfo = node_cfg[nname]
11671 if not (ninfo.offline or ninfo.drained):
11672 nresult.Raise("Can't get data for node %s" % nname)
11673 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11675 remote_info = nresult.payload
11677 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11678 'vg_size', 'vg_free', 'cpu_total']:
11679 if attr not in remote_info:
11680 raise errors.OpExecError("Node '%s' didn't return attribute"
11681 " '%s'" % (nname, attr))
11682 if not isinstance(remote_info[attr], int):
11683 raise errors.OpExecError("Node '%s' returned invalid value"
11685 (nname, attr, remote_info[attr]))
11686 # compute memory used by primary instances
11687 i_p_mem = i_p_up_mem = 0
11688 for iinfo, beinfo in i_list:
11689 if iinfo.primary_node == nname:
11690 i_p_mem += beinfo[constants.BE_MEMORY]
11691 if iinfo.name not in node_iinfo[nname].payload:
11694 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11695 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11696 remote_info['memory_free'] -= max(0, i_mem_diff)
11699 i_p_up_mem += beinfo[constants.BE_MEMORY]
11701 # compute memory used by instances
11703 "total_memory": remote_info['memory_total'],
11704 "reserved_memory": remote_info['memory_dom0'],
11705 "free_memory": remote_info['memory_free'],
11706 "total_disk": remote_info['vg_size'],
11707 "free_disk": remote_info['vg_free'],
11708 "total_cpus": remote_info['cpu_total'],
11709 "i_pri_memory": i_p_mem,
11710 "i_pri_up_memory": i_p_up_mem,
11712 pnr_dyn.update(node_results[nname])
11713 node_results[nname] = pnr_dyn
11715 return node_results
11718 def _ComputeInstanceData(cluster_info, i_list):
11719 """Compute global instance data.
11723 for iinfo, beinfo in i_list:
11725 for nic in iinfo.nics:
11726 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11727 nic_dict = {"mac": nic.mac,
11729 "mode": filled_params[constants.NIC_MODE],
11730 "link": filled_params[constants.NIC_LINK],
11732 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11733 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11734 nic_data.append(nic_dict)
11736 "tags": list(iinfo.GetTags()),
11737 "admin_up": iinfo.admin_up,
11738 "vcpus": beinfo[constants.BE_VCPUS],
11739 "memory": beinfo[constants.BE_MEMORY],
11741 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11743 "disks": [{constants.IDISK_SIZE: dsk.size,
11744 constants.IDISK_MODE: dsk.mode}
11745 for dsk in iinfo.disks],
11746 "disk_template": iinfo.disk_template,
11747 "hypervisor": iinfo.hypervisor,
11749 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11751 instance_data[iinfo.name] = pir
11753 return instance_data
11755 def _AddNewInstance(self):
11756 """Add new instance data to allocator structure.
11758 This in combination with _AllocatorGetClusterData will create the
11759 correct structure needed as input for the allocator.
11761 The checks for the completeness of the opcode must have already been
11765 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11767 if self.disk_template in constants.DTS_INT_MIRROR:
11768 self.required_nodes = 2
11770 self.required_nodes = 1
11773 "disk_template": self.disk_template,
11776 "vcpus": self.vcpus,
11777 "memory": self.mem_size,
11778 "disks": self.disks,
11779 "disk_space_total": disk_space,
11781 "required_nodes": self.required_nodes,
11785 def _AddRelocateInstance(self):
11786 """Add relocate instance data to allocator structure.
11788 This in combination with _IAllocatorGetClusterData will create the
11789 correct structure needed as input for the allocator.
11791 The checks for the completeness of the opcode must have already been
11795 instance = self.cfg.GetInstanceInfo(self.name)
11796 if instance is None:
11797 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11798 " IAllocator" % self.name)
11800 if instance.disk_template not in constants.DTS_MIRRORED:
11801 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11802 errors.ECODE_INVAL)
11804 if instance.disk_template in constants.DTS_INT_MIRROR and \
11805 len(instance.secondary_nodes) != 1:
11806 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11807 errors.ECODE_STATE)
11809 self.required_nodes = 1
11810 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11811 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11815 "disk_space_total": disk_space,
11816 "required_nodes": self.required_nodes,
11817 "relocate_from": self.relocate_from,
11821 def _AddEvacuateNodes(self):
11822 """Add evacuate nodes data to allocator structure.
11826 "evac_nodes": self.evac_nodes
11830 def _BuildInputData(self, fn):
11831 """Build input data structures.
11834 self._ComputeClusterData()
11837 request["type"] = self.mode
11838 self.in_data["request"] = request
11840 self.in_text = serializer.Dump(self.in_data)
11842 def Run(self, name, validate=True, call_fn=None):
11843 """Run an instance allocator and return the results.
11846 if call_fn is None:
11847 call_fn = self.rpc.call_iallocator_runner
11849 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11850 result.Raise("Failure while running the iallocator script")
11852 self.out_text = result.payload
11854 self._ValidateResult()
11856 def _ValidateResult(self):
11857 """Process the allocator results.
11859 This will process and if successful save the result in
11860 self.out_data and the other parameters.
11864 rdict = serializer.Load(self.out_text)
11865 except Exception, err:
11866 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11868 if not isinstance(rdict, dict):
11869 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11871 # TODO: remove backwards compatiblity in later versions
11872 if "nodes" in rdict and "result" not in rdict:
11873 rdict["result"] = rdict["nodes"]
11876 for key in "success", "info", "result":
11877 if key not in rdict:
11878 raise errors.OpExecError("Can't parse iallocator results:"
11879 " missing key '%s'" % key)
11880 setattr(self, key, rdict[key])
11882 if not isinstance(rdict["result"], list):
11883 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11886 if self.mode == constants.IALLOCATOR_MODE_RELOC:
11887 assert self.relocate_from is not None
11888 assert self.required_nodes == 1
11890 node2group = dict((name, ndata["group"])
11891 for (name, ndata) in self.in_data["nodes"].items())
11893 fn = compat.partial(self._NodesToGroups, node2group,
11894 self.in_data["nodegroups"])
11896 request_groups = fn(self.relocate_from)
11897 result_groups = fn(rdict["result"])
11899 if result_groups != request_groups:
11900 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11901 " differ from original groups (%s)" %
11902 (utils.CommaJoin(result_groups),
11903 utils.CommaJoin(request_groups)))
11905 self.out_data = rdict
11908 def _NodesToGroups(node2group, groups, nodes):
11909 """Returns a list of unique group names for a list of nodes.
11911 @type node2group: dict
11912 @param node2group: Map from node name to group UUID
11914 @param groups: Group information
11916 @param nodes: Node names
11923 group_uuid = node2group[node]
11925 # Ignore unknown node
11929 group = groups[group_uuid]
11931 # Can't find group, let's use UUID
11932 group_name = group_uuid
11934 group_name = group["name"]
11936 result.add(group_name)
11938 return sorted(result)
11941 class LUTestAllocator(NoHooksLU):
11942 """Run allocator tests.
11944 This LU runs the allocator tests
11947 def CheckPrereq(self):
11948 """Check prerequisites.
11950 This checks the opcode parameters depending on the director and mode test.
11953 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11954 for attr in ["mem_size", "disks", "disk_template",
11955 "os", "tags", "nics", "vcpus"]:
11956 if not hasattr(self.op, attr):
11957 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11958 attr, errors.ECODE_INVAL)
11959 iname = self.cfg.ExpandInstanceName(self.op.name)
11960 if iname is not None:
11961 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11962 iname, errors.ECODE_EXISTS)
11963 if not isinstance(self.op.nics, list):
11964 raise errors.OpPrereqError("Invalid parameter 'nics'",
11965 errors.ECODE_INVAL)
11966 if not isinstance(self.op.disks, list):
11967 raise errors.OpPrereqError("Invalid parameter 'disks'",
11968 errors.ECODE_INVAL)
11969 for row in self.op.disks:
11970 if (not isinstance(row, dict) or
11971 "size" not in row or
11972 not isinstance(row["size"], int) or
11973 "mode" not in row or
11974 row["mode"] not in ['r', 'w']):
11975 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11976 " parameter", errors.ECODE_INVAL)
11977 if self.op.hypervisor is None:
11978 self.op.hypervisor = self.cfg.GetHypervisorType()
11979 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11980 fname = _ExpandInstanceName(self.cfg, self.op.name)
11981 self.op.name = fname
11982 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11983 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11984 if not hasattr(self.op, "evac_nodes"):
11985 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11986 " opcode input", errors.ECODE_INVAL)
11988 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11989 self.op.mode, errors.ECODE_INVAL)
11991 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11992 if self.op.allocator is None:
11993 raise errors.OpPrereqError("Missing allocator name",
11994 errors.ECODE_INVAL)
11995 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11996 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11997 self.op.direction, errors.ECODE_INVAL)
11999 def Exec(self, feedback_fn):
12000 """Run the allocator test.
12003 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12004 ial = IAllocator(self.cfg, self.rpc,
12007 mem_size=self.op.mem_size,
12008 disks=self.op.disks,
12009 disk_template=self.op.disk_template,
12013 vcpus=self.op.vcpus,
12014 hypervisor=self.op.hypervisor,
12016 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12017 ial = IAllocator(self.cfg, self.rpc,
12020 relocate_from=list(self.relocate_from),
12022 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12023 ial = IAllocator(self.cfg, self.rpc,
12025 evac_nodes=self.op.evac_nodes)
12027 raise errors.ProgrammerError("Uncatched mode %s in"
12028 " LUTestAllocator.Exec", self.op.mode)
12030 if self.op.direction == constants.IALLOCATOR_DIR_IN:
12031 result = ial.in_text
12033 ial.Run(self.op.allocator, validate=False)
12034 result = ial.out_text
12038 #: Query type implementations
12040 constants.QR_INSTANCE: _InstanceQuery,
12041 constants.QR_NODE: _NodeQuery,
12042 constants.QR_GROUP: _GroupQuery,
12043 constants.QR_OS: _OsQuery,
12046 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12049 def _GetQueryImplementation(name):
12050 """Returns the implemtnation for a query type.
12052 @param name: Query type, must be one of L{constants.QR_VIA_OP}
12056 return _QUERY_IMPL[name]
12058 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12059 errors.ECODE_INVAL)