4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 """Data container for LU results with jobs.
80 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82 contained in the C{jobs} attribute and include the job IDs in the opcode
86 def __init__(self, jobs, **kwargs):
87 """Initializes this class.
89 Additional return values can be specified as keyword arguments.
91 @type jobs: list of lists of L{opcode.OpCode}
92 @param jobs: A list of lists of opcode objects
99 class LogicalUnit(object):
100 """Logical Unit base class.
102 Subclasses must follow these rules:
103 - implement ExpandNames
104 - implement CheckPrereq (except when tasklets are used)
105 - implement Exec (except when tasklets are used)
106 - implement BuildHooksEnv
107 - implement BuildHooksNodes
108 - redefine HPATH and HTYPE
109 - optionally redefine their run requirements:
110 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112 Note that all commands require root permissions.
114 @ivar dry_run_result: the value (if any) that will be returned to the caller
115 in dry-run mode (signalled by opcode dry_run parameter)
122 def __init__(self, processor, op, context, rpc):
123 """Constructor for LogicalUnit.
125 This needs to be overridden in derived classes in order to check op
129 self.proc = processor
131 self.cfg = context.cfg
132 self.context = context
134 # Dicts used to declare locking needs to mcpu
135 self.needed_locks = None
136 self.acquired_locks = {}
137 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139 self.remove_locks = {}
140 # Used to force good behavior when calling helper functions
141 self.recalculate_locks = {}
143 self.Log = processor.Log # pylint: disable-msg=C0103
144 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
145 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
146 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
147 # support for dry-run
148 self.dry_run_result = None
149 # support for generic debug attribute
150 if (not hasattr(self.op, "debug_level") or
151 not isinstance(self.op.debug_level, int)):
152 self.op.debug_level = 0
157 # Validate opcode parameters and set defaults
158 self.op.Validate(True)
160 self.CheckArguments()
162 def CheckArguments(self):
163 """Check syntactic validity for the opcode arguments.
165 This method is for doing a simple syntactic check and ensure
166 validity of opcode parameters, without any cluster-related
167 checks. While the same can be accomplished in ExpandNames and/or
168 CheckPrereq, doing these separate is better because:
170 - ExpandNames is left as as purely a lock-related function
171 - CheckPrereq is run after we have acquired locks (and possible
174 The function is allowed to change the self.op attribute so that
175 later methods can no longer worry about missing parameters.
180 def ExpandNames(self):
181 """Expand names for this LU.
183 This method is called before starting to execute the opcode, and it should
184 update all the parameters of the opcode to their canonical form (e.g. a
185 short node name must be fully expanded after this method has successfully
186 completed). This way locking, hooks, logging, etc. can work correctly.
188 LUs which implement this method must also populate the self.needed_locks
189 member, as a dict with lock levels as keys, and a list of needed lock names
192 - use an empty dict if you don't need any lock
193 - if you don't need any lock at a particular level omit that level
194 - don't put anything for the BGL level
195 - if you want all locks at a level use locking.ALL_SET as a value
197 If you need to share locks (rather than acquire them exclusively) at one
198 level you can modify self.share_locks, setting a true value (usually 1) for
199 that level. By default locks are not shared.
201 This function can also define a list of tasklets, which then will be
202 executed in order instead of the usual LU-level CheckPrereq and Exec
203 functions, if those are not defined by the LU.
207 # Acquire all nodes and one instance
208 self.needed_locks = {
209 locking.LEVEL_NODE: locking.ALL_SET,
210 locking.LEVEL_INSTANCE: ['instance1.example.com'],
212 # Acquire just two nodes
213 self.needed_locks = {
214 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
217 self.needed_locks = {} # No, you can't leave it to the default value None
220 # The implementation of this method is mandatory only if the new LU is
221 # concurrent, so that old LUs don't need to be changed all at the same
224 self.needed_locks = {} # Exclusive LUs don't need locks.
226 raise NotImplementedError
228 def DeclareLocks(self, level):
229 """Declare LU locking needs for a level
231 While most LUs can just declare their locking needs at ExpandNames time,
232 sometimes there's the need to calculate some locks after having acquired
233 the ones before. This function is called just before acquiring locks at a
234 particular level, but after acquiring the ones at lower levels, and permits
235 such calculations. It can be used to modify self.needed_locks, and by
236 default it does nothing.
238 This function is only called if you have something already set in
239 self.needed_locks for the level.
241 @param level: Locking level which is going to be locked
242 @type level: member of ganeti.locking.LEVELS
246 def CheckPrereq(self):
247 """Check prerequisites for this LU.
249 This method should check that the prerequisites for the execution
250 of this LU are fulfilled. It can do internode communication, but
251 it should be idempotent - no cluster or system changes are
254 The method should raise errors.OpPrereqError in case something is
255 not fulfilled. Its return value is ignored.
257 This method should also update all the parameters of the opcode to
258 their canonical form if it hasn't been done by ExpandNames before.
261 if self.tasklets is not None:
262 for (idx, tl) in enumerate(self.tasklets):
263 logging.debug("Checking prerequisites for tasklet %s/%s",
264 idx + 1, len(self.tasklets))
269 def Exec(self, feedback_fn):
272 This method should implement the actual work. It should raise
273 errors.OpExecError for failures that are somewhat dealt with in
277 if self.tasklets is not None:
278 for (idx, tl) in enumerate(self.tasklets):
279 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
282 raise NotImplementedError
284 def BuildHooksEnv(self):
285 """Build hooks environment for this LU.
288 @return: Dictionary containing the environment that will be used for
289 running the hooks for this LU. The keys of the dict must not be prefixed
290 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
291 will extend the environment with additional variables. If no environment
292 should be defined, an empty dictionary should be returned (not C{None}).
293 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297 raise NotImplementedError
299 def BuildHooksNodes(self):
300 """Build list of nodes to run LU's hooks.
302 @rtype: tuple; (list, list)
303 @return: Tuple containing a list of node names on which the hook
304 should run before the execution and a list of node names on which the
305 hook should run after the execution. No nodes should be returned as an
306 empty list (and not None).
307 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311 raise NotImplementedError
313 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
314 """Notify the LU about the results of its hooks.
316 This method is called every time a hooks phase is executed, and notifies
317 the Logical Unit about the hooks' result. The LU can then use it to alter
318 its result based on the hooks. By default the method does nothing and the
319 previous result is passed back unchanged but any LU can define it if it
320 wants to use the local cluster hook-scripts somehow.
322 @param phase: one of L{constants.HOOKS_PHASE_POST} or
323 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
324 @param hook_results: the results of the multi-node hooks rpc call
325 @param feedback_fn: function used send feedback back to the caller
326 @param lu_result: the previous Exec result this LU had, or None
328 @return: the new Exec result, based on the previous result
332 # API must be kept, thus we ignore the unused argument and could
333 # be a function warnings
334 # pylint: disable-msg=W0613,R0201
337 def _ExpandAndLockInstance(self):
338 """Helper function to expand and lock an instance.
340 Many LUs that work on an instance take its name in self.op.instance_name
341 and need to expand it and then declare the expanded name for locking. This
342 function does it, and then updates self.op.instance_name to the expanded
343 name. It also initializes needed_locks as a dict, if this hasn't been done
347 if self.needed_locks is None:
348 self.needed_locks = {}
350 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
351 "_ExpandAndLockInstance called with instance-level locks set"
352 self.op.instance_name = _ExpandInstanceName(self.cfg,
353 self.op.instance_name)
354 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356 def _LockInstancesNodes(self, primary_only=False):
357 """Helper function to declare instances' nodes for locking.
359 This function should be called after locking one or more instances to lock
360 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
361 with all primary or secondary nodes for instances already locked and
362 present in self.needed_locks[locking.LEVEL_INSTANCE].
364 It should be called from DeclareLocks, and for safety only works if
365 self.recalculate_locks[locking.LEVEL_NODE] is set.
367 In the future it may grow parameters to just lock some instance's nodes, or
368 to just lock primaries or secondary nodes, if needed.
370 If should be called in DeclareLocks in a way similar to::
372 if level == locking.LEVEL_NODE:
373 self._LockInstancesNodes()
375 @type primary_only: boolean
376 @param primary_only: only lock primary nodes of locked instances
379 assert locking.LEVEL_NODE in self.recalculate_locks, \
380 "_LockInstancesNodes helper function called with no nodes to recalculate"
382 # TODO: check if we're really been called with the instance locks held
384 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
385 # future we might want to have different behaviors depending on the value
386 # of self.recalculate_locks[locking.LEVEL_NODE]
388 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
389 instance = self.context.cfg.GetInstanceInfo(instance_name)
390 wanted_nodes.append(instance.primary_node)
392 wanted_nodes.extend(instance.secondary_nodes)
394 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
395 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
396 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
397 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399 del self.recalculate_locks[locking.LEVEL_NODE]
402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
403 """Simple LU which runs no hooks.
405 This LU is intended as a parent for other LogicalUnits which will
406 run no hooks, in order to reduce duplicate code.
412 def BuildHooksEnv(self):
413 """Empty BuildHooksEnv for NoHooksLu.
415 This just raises an error.
418 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420 def BuildHooksNodes(self):
421 """Empty BuildHooksNodes for NoHooksLU.
424 raise AssertionError("BuildHooksNodes called for NoHooksLU")
428 """Tasklet base class.
430 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431 they can mix legacy code with tasklets. Locking needs to be done in the LU,
432 tasklets know nothing about locks.
434 Subclasses must follow these rules:
435 - Implement CheckPrereq
439 def __init__(self, lu):
446 def CheckPrereq(self):
447 """Check prerequisites for this tasklets.
449 This method should check whether the prerequisites for the execution of
450 this tasklet are fulfilled. It can do internode communication, but it
451 should be idempotent - no cluster or system changes are allowed.
453 The method should raise errors.OpPrereqError in case something is not
454 fulfilled. Its return value is ignored.
456 This method should also update all parameters to their canonical form if it
457 hasn't been done before.
462 def Exec(self, feedback_fn):
463 """Execute the tasklet.
465 This method should implement the actual work. It should raise
466 errors.OpExecError for failures that are somewhat dealt with in code, or
470 raise NotImplementedError
474 """Base for query utility classes.
477 #: Attribute holding field definitions
480 def __init__(self, filter_, fields, use_locking):
481 """Initializes this class.
484 self.use_locking = use_locking
486 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488 self.requested_data = self.query.RequestedData()
489 self.names = self.query.RequestedNames()
491 # Sort only if no names were requested
492 self.sort_by_name = not self.names
494 self.do_locking = None
497 def _GetNames(self, lu, all_names, lock_level):
498 """Helper function to determine names asked for in the query.
502 names = lu.acquired_locks[lock_level]
506 if self.wanted == locking.ALL_SET:
507 assert not self.names
508 # caller didn't specify names, so ordering is not important
509 return utils.NiceSort(names)
511 # caller specified names and we must keep the same order
513 assert not self.do_locking or lu.acquired_locks[lock_level]
515 missing = set(self.wanted).difference(names)
517 raise errors.OpExecError("Some items were removed before retrieving"
518 " their data: %s" % missing)
520 # Return expanded names
523 def ExpandNames(self, lu):
524 """Expand names for this query.
526 See L{LogicalUnit.ExpandNames}.
529 raise NotImplementedError()
531 def DeclareLocks(self, lu, level):
532 """Declare locks for this query.
534 See L{LogicalUnit.DeclareLocks}.
537 raise NotImplementedError()
539 def _GetQueryData(self, lu):
540 """Collects all data for this query.
542 @return: Query data object
545 raise NotImplementedError()
547 def NewStyleQuery(self, lu):
548 """Collect data and execute query.
551 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552 sort_by_name=self.sort_by_name)
554 def OldStyleQuery(self, lu):
555 """Collect data and execute query.
558 return self.query.OldStyleQuery(self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
562 def _GetWantedNodes(lu, nodes):
563 """Returns list of checked and expanded node names.
565 @type lu: L{LogicalUnit}
566 @param lu: the logical unit on whose behalf we execute
568 @param nodes: list of node names or None for all nodes
570 @return: the list of nodes, sorted
571 @raise errors.ProgrammerError: if the nodes parameter is wrong type
575 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577 return utils.NiceSort(lu.cfg.GetNodeList())
580 def _GetWantedInstances(lu, instances):
581 """Returns list of checked and expanded instance names.
583 @type lu: L{LogicalUnit}
584 @param lu: the logical unit on whose behalf we execute
585 @type instances: list
586 @param instances: list of instance names or None for all instances
588 @return: the list of instances, sorted
589 @raise errors.OpPrereqError: if the instances parameter is wrong type
590 @raise errors.OpPrereqError: if any of the passed instances is not found
594 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
600 def _GetUpdatedParams(old_params, update_dict,
601 use_default=True, use_none=False):
602 """Return the new version of a parameter dictionary.
604 @type old_params: dict
605 @param old_params: old parameters
606 @type update_dict: dict
607 @param update_dict: dict containing new parameter values, or
608 constants.VALUE_DEFAULT to reset the parameter to its default
610 @param use_default: boolean
611 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
612 values as 'to be deleted' values
613 @param use_none: boolean
614 @type use_none: whether to recognise C{None} values as 'to be
617 @return: the new parameter dictionary
620 params_copy = copy.deepcopy(old_params)
621 for key, val in update_dict.iteritems():
622 if ((use_default and val == constants.VALUE_DEFAULT) or
623 (use_none and val is None)):
629 params_copy[key] = val
633 def _RunPostHook(lu, node_name):
634 """Runs the post-hook for an opcode on a single node.
637 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
639 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
641 # pylint: disable-msg=W0702
642 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
645 def _CheckOutputFields(static, dynamic, selected):
646 """Checks whether all selected fields are valid.
648 @type static: L{utils.FieldSet}
649 @param static: static fields set
650 @type dynamic: L{utils.FieldSet}
651 @param dynamic: dynamic fields set
658 delta = f.NonMatching(selected)
660 raise errors.OpPrereqError("Unknown output fields selected: %s"
661 % ",".join(delta), errors.ECODE_INVAL)
664 def _CheckGlobalHvParams(params):
665 """Validates that given hypervisor params are not global ones.
667 This will ensure that instances don't get customised versions of
671 used_globals = constants.HVC_GLOBALS.intersection(params)
673 msg = ("The following hypervisor parameters are global and cannot"
674 " be customized at instance level, please modify them at"
675 " cluster level: %s" % utils.CommaJoin(used_globals))
676 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
679 def _CheckNodeOnline(lu, node, msg=None):
680 """Ensure that a given node is online.
682 @param lu: the LU on behalf of which we make the check
683 @param node: the node to check
684 @param msg: if passed, should be a message to replace the default one
685 @raise errors.OpPrereqError: if the node is offline
689 msg = "Can't use offline node"
690 if lu.cfg.GetNodeInfo(node).offline:
691 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
694 def _CheckNodeNotDrained(lu, node):
695 """Ensure that a given node is not drained.
697 @param lu: the LU on behalf of which we make the check
698 @param node: the node to check
699 @raise errors.OpPrereqError: if the node is drained
702 if lu.cfg.GetNodeInfo(node).drained:
703 raise errors.OpPrereqError("Can't use drained node %s" % node,
707 def _CheckNodeVmCapable(lu, node):
708 """Ensure that a given node is vm capable.
710 @param lu: the LU on behalf of which we make the check
711 @param node: the node to check
712 @raise errors.OpPrereqError: if the node is not vm capable
715 if not lu.cfg.GetNodeInfo(node).vm_capable:
716 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
720 def _CheckNodeHasOS(lu, node, os_name, force_variant):
721 """Ensure that a node supports a given OS.
723 @param lu: the LU on behalf of which we make the check
724 @param node: the node to check
725 @param os_name: the OS to query about
726 @param force_variant: whether to ignore variant errors
727 @raise errors.OpPrereqError: if the node is not supporting the OS
730 result = lu.rpc.call_os_get(node, os_name)
731 result.Raise("OS '%s' not in supported OS list for node %s" %
733 prereq=True, ecode=errors.ECODE_INVAL)
734 if not force_variant:
735 _CheckOSVariant(result.payload, os_name)
738 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
739 """Ensure that a node has the given secondary ip.
741 @type lu: L{LogicalUnit}
742 @param lu: the LU on behalf of which we make the check
744 @param node: the node to check
745 @type secondary_ip: string
746 @param secondary_ip: the ip to check
747 @type prereq: boolean
748 @param prereq: whether to throw a prerequisite or an execute error
749 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
750 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
753 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
754 result.Raise("Failure checking secondary ip on node %s" % node,
755 prereq=prereq, ecode=errors.ECODE_ENVIRON)
756 if not result.payload:
757 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
758 " please fix and re-run this command" % secondary_ip)
760 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
762 raise errors.OpExecError(msg)
765 def _GetClusterDomainSecret():
766 """Reads the cluster domain secret.
769 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
773 def _CheckInstanceDown(lu, instance, reason):
774 """Ensure that an instance is not running."""
775 if instance.admin_up:
776 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
777 (instance.name, reason), errors.ECODE_STATE)
779 pnode = instance.primary_node
780 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
781 ins_l.Raise("Can't contact node %s for instance information" % pnode,
782 prereq=True, ecode=errors.ECODE_ENVIRON)
784 if instance.name in ins_l.payload:
785 raise errors.OpPrereqError("Instance %s is running, %s" %
786 (instance.name, reason), errors.ECODE_STATE)
789 def _ExpandItemName(fn, name, kind):
790 """Expand an item name.
792 @param fn: the function to use for expansion
793 @param name: requested item name
794 @param kind: text description ('Node' or 'Instance')
795 @return: the resolved (full) name
796 @raise errors.OpPrereqError: if the item is not found
800 if full_name is None:
801 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
806 def _ExpandNodeName(cfg, name):
807 """Wrapper over L{_ExpandItemName} for nodes."""
808 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
811 def _ExpandInstanceName(cfg, name):
812 """Wrapper over L{_ExpandItemName} for instance."""
813 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
816 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
817 memory, vcpus, nics, disk_template, disks,
818 bep, hvp, hypervisor_name):
819 """Builds instance related env variables for hooks
821 This builds the hook environment from individual variables.
824 @param name: the name of the instance
825 @type primary_node: string
826 @param primary_node: the name of the instance's primary node
827 @type secondary_nodes: list
828 @param secondary_nodes: list of secondary nodes as strings
829 @type os_type: string
830 @param os_type: the name of the instance's OS
831 @type status: boolean
832 @param status: the should_run status of the instance
834 @param memory: the memory size of the instance
836 @param vcpus: the count of VCPUs the instance has
838 @param nics: list of tuples (ip, mac, mode, link) representing
839 the NICs the instance has
840 @type disk_template: string
841 @param disk_template: the disk template of the instance
843 @param disks: the list of (size, mode) pairs
845 @param bep: the backend parameters for the instance
847 @param hvp: the hypervisor parameters for the instance
848 @type hypervisor_name: string
849 @param hypervisor_name: the hypervisor for the instance
851 @return: the hook environment for this instance
860 "INSTANCE_NAME": name,
861 "INSTANCE_PRIMARY": primary_node,
862 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
863 "INSTANCE_OS_TYPE": os_type,
864 "INSTANCE_STATUS": str_status,
865 "INSTANCE_MEMORY": memory,
866 "INSTANCE_VCPUS": vcpus,
867 "INSTANCE_DISK_TEMPLATE": disk_template,
868 "INSTANCE_HYPERVISOR": hypervisor_name,
872 nic_count = len(nics)
873 for idx, (ip, mac, mode, link) in enumerate(nics):
876 env["INSTANCE_NIC%d_IP" % idx] = ip
877 env["INSTANCE_NIC%d_MAC" % idx] = mac
878 env["INSTANCE_NIC%d_MODE" % idx] = mode
879 env["INSTANCE_NIC%d_LINK" % idx] = link
880 if mode == constants.NIC_MODE_BRIDGED:
881 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
885 env["INSTANCE_NIC_COUNT"] = nic_count
888 disk_count = len(disks)
889 for idx, (size, mode) in enumerate(disks):
890 env["INSTANCE_DISK%d_SIZE" % idx] = size
891 env["INSTANCE_DISK%d_MODE" % idx] = mode
895 env["INSTANCE_DISK_COUNT"] = disk_count
897 for source, kind in [(bep, "BE"), (hvp, "HV")]:
898 for key, value in source.items():
899 env["INSTANCE_%s_%s" % (kind, key)] = value
904 def _NICListToTuple(lu, nics):
905 """Build a list of nic information tuples.
907 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
908 value in LUInstanceQueryData.
910 @type lu: L{LogicalUnit}
911 @param lu: the logical unit on whose behalf we execute
912 @type nics: list of L{objects.NIC}
913 @param nics: list of nics to convert to hooks tuples
917 cluster = lu.cfg.GetClusterInfo()
921 filled_params = cluster.SimpleFillNIC(nic.nicparams)
922 mode = filled_params[constants.NIC_MODE]
923 link = filled_params[constants.NIC_LINK]
924 hooks_nics.append((ip, mac, mode, link))
928 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
929 """Builds instance related env variables for hooks from an object.
931 @type lu: L{LogicalUnit}
932 @param lu: the logical unit on whose behalf we execute
933 @type instance: L{objects.Instance}
934 @param instance: the instance for which we should build the
937 @param override: dictionary with key/values that will override
940 @return: the hook environment dictionary
943 cluster = lu.cfg.GetClusterInfo()
944 bep = cluster.FillBE(instance)
945 hvp = cluster.FillHV(instance)
947 'name': instance.name,
948 'primary_node': instance.primary_node,
949 'secondary_nodes': instance.secondary_nodes,
950 'os_type': instance.os,
951 'status': instance.admin_up,
952 'memory': bep[constants.BE_MEMORY],
953 'vcpus': bep[constants.BE_VCPUS],
954 'nics': _NICListToTuple(lu, instance.nics),
955 'disk_template': instance.disk_template,
956 'disks': [(disk.size, disk.mode) for disk in instance.disks],
959 'hypervisor_name': instance.hypervisor,
962 args.update(override)
963 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
966 def _AdjustCandidatePool(lu, exceptions):
967 """Adjust the candidate pool after node operations.
970 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
972 lu.LogInfo("Promoted nodes to master candidate role: %s",
973 utils.CommaJoin(node.name for node in mod_list))
974 for name in mod_list:
975 lu.context.ReaddNode(name)
976 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
978 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
982 def _DecideSelfPromotion(lu, exceptions=None):
983 """Decide whether I should promote myself as a master candidate.
986 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
987 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
988 # the new node will increase mc_max with one, so:
989 mc_should = min(mc_should + 1, cp_size)
990 return mc_now < mc_should
993 def _CheckNicsBridgesExist(lu, target_nics, target_node):
994 """Check that the brigdes needed by a list of nics exist.
997 cluster = lu.cfg.GetClusterInfo()
998 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
999 brlist = [params[constants.NIC_LINK] for params in paramslist
1000 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1002 result = lu.rpc.call_bridges_exist(target_node, brlist)
1003 result.Raise("Error checking bridges on destination node '%s'" %
1004 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1007 def _CheckInstanceBridgesExist(lu, instance, node=None):
1008 """Check that the brigdes needed by an instance exist.
1012 node = instance.primary_node
1013 _CheckNicsBridgesExist(lu, instance.nics, node)
1016 def _CheckOSVariant(os_obj, name):
1017 """Check whether an OS name conforms to the os variants specification.
1019 @type os_obj: L{objects.OS}
1020 @param os_obj: OS object to check
1022 @param name: OS name passed by the user, to check for validity
1025 if not os_obj.supported_variants:
1027 variant = objects.OS.GetVariant(name)
1029 raise errors.OpPrereqError("OS name must include a variant",
1032 if variant not in os_obj.supported_variants:
1033 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1036 def _GetNodeInstancesInner(cfg, fn):
1037 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1040 def _GetNodeInstances(cfg, node_name):
1041 """Returns a list of all primary and secondary instances on a node.
1045 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1048 def _GetNodePrimaryInstances(cfg, node_name):
1049 """Returns primary instances on a node.
1052 return _GetNodeInstancesInner(cfg,
1053 lambda inst: node_name == inst.primary_node)
1056 def _GetNodeSecondaryInstances(cfg, node_name):
1057 """Returns secondary instances on a node.
1060 return _GetNodeInstancesInner(cfg,
1061 lambda inst: node_name in inst.secondary_nodes)
1064 def _GetStorageTypeArgs(cfg, storage_type):
1065 """Returns the arguments for a storage type.
1068 # Special case for file storage
1069 if storage_type == constants.ST_FILE:
1070 # storage.FileStorage wants a list of storage directories
1071 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1076 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1079 for dev in instance.disks:
1080 cfg.SetDiskID(dev, node_name)
1082 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1083 result.Raise("Failed to get disk status from node %s" % node_name,
1084 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1086 for idx, bdev_status in enumerate(result.payload):
1087 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1093 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1094 """Check the sanity of iallocator and node arguments and use the
1095 cluster-wide iallocator if appropriate.
1097 Check that at most one of (iallocator, node) is specified. If none is
1098 specified, then the LU's opcode's iallocator slot is filled with the
1099 cluster-wide default iallocator.
1101 @type iallocator_slot: string
1102 @param iallocator_slot: the name of the opcode iallocator slot
1103 @type node_slot: string
1104 @param node_slot: the name of the opcode target node slot
1107 node = getattr(lu.op, node_slot, None)
1108 iallocator = getattr(lu.op, iallocator_slot, None)
1110 if node is not None and iallocator is not None:
1111 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1113 elif node is None and iallocator is None:
1114 default_iallocator = lu.cfg.GetDefaultIAllocator()
1115 if default_iallocator:
1116 setattr(lu.op, iallocator_slot, default_iallocator)
1118 raise errors.OpPrereqError("No iallocator or node given and no"
1119 " cluster-wide default iallocator found."
1120 " Please specify either an iallocator or a"
1121 " node, or set a cluster-wide default"
1125 class LUClusterPostInit(LogicalUnit):
1126 """Logical unit for running hooks after cluster initialization.
1129 HPATH = "cluster-init"
1130 HTYPE = constants.HTYPE_CLUSTER
1132 def BuildHooksEnv(self):
1137 "OP_TARGET": self.cfg.GetClusterName(),
1140 def BuildHooksNodes(self):
1141 """Build hooks nodes.
1144 return ([], [self.cfg.GetMasterNode()])
1146 def Exec(self, feedback_fn):
1153 class LUClusterDestroy(LogicalUnit):
1154 """Logical unit for destroying the cluster.
1157 HPATH = "cluster-destroy"
1158 HTYPE = constants.HTYPE_CLUSTER
1160 def BuildHooksEnv(self):
1165 "OP_TARGET": self.cfg.GetClusterName(),
1168 def BuildHooksNodes(self):
1169 """Build hooks nodes.
1174 def CheckPrereq(self):
1175 """Check prerequisites.
1177 This checks whether the cluster is empty.
1179 Any errors are signaled by raising errors.OpPrereqError.
1182 master = self.cfg.GetMasterNode()
1184 nodelist = self.cfg.GetNodeList()
1185 if len(nodelist) != 1 or nodelist[0] != master:
1186 raise errors.OpPrereqError("There are still %d node(s) in"
1187 " this cluster." % (len(nodelist) - 1),
1189 instancelist = self.cfg.GetInstanceList()
1191 raise errors.OpPrereqError("There are still %d instance(s) in"
1192 " this cluster." % len(instancelist),
1195 def Exec(self, feedback_fn):
1196 """Destroys the cluster.
1199 master = self.cfg.GetMasterNode()
1201 # Run post hooks on master node before it's removed
1202 _RunPostHook(self, master)
1204 result = self.rpc.call_node_stop_master(master, False)
1205 result.Raise("Could not disable the master role")
1210 def _VerifyCertificate(filename):
1211 """Verifies a certificate for LUClusterVerify.
1213 @type filename: string
1214 @param filename: Path to PEM file
1218 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1219 utils.ReadFile(filename))
1220 except Exception, err: # pylint: disable-msg=W0703
1221 return (LUClusterVerify.ETYPE_ERROR,
1222 "Failed to load X509 certificate %s: %s" % (filename, err))
1225 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1226 constants.SSL_CERT_EXPIRATION_ERROR)
1229 fnamemsg = "While verifying %s: %s" % (filename, msg)
1234 return (None, fnamemsg)
1235 elif errcode == utils.CERT_WARNING:
1236 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1237 elif errcode == utils.CERT_ERROR:
1238 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1240 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1243 class LUClusterVerify(LogicalUnit):
1244 """Verifies the cluster status.
1247 HPATH = "cluster-verify"
1248 HTYPE = constants.HTYPE_CLUSTER
1251 TCLUSTER = "cluster"
1253 TINSTANCE = "instance"
1255 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1256 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1257 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1258 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1259 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1260 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1261 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1262 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1263 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1264 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1265 ENODEDRBD = (TNODE, "ENODEDRBD")
1266 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1267 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1268 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1269 ENODEHV = (TNODE, "ENODEHV")
1270 ENODELVM = (TNODE, "ENODELVM")
1271 ENODEN1 = (TNODE, "ENODEN1")
1272 ENODENET = (TNODE, "ENODENET")
1273 ENODEOS = (TNODE, "ENODEOS")
1274 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1275 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1276 ENODERPC = (TNODE, "ENODERPC")
1277 ENODESSH = (TNODE, "ENODESSH")
1278 ENODEVERSION = (TNODE, "ENODEVERSION")
1279 ENODESETUP = (TNODE, "ENODESETUP")
1280 ENODETIME = (TNODE, "ENODETIME")
1281 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1283 ETYPE_FIELD = "code"
1284 ETYPE_ERROR = "ERROR"
1285 ETYPE_WARNING = "WARNING"
1287 _HOOKS_INDENT_RE = re.compile("^", re.M)
1289 class NodeImage(object):
1290 """A class representing the logical and physical status of a node.
1293 @ivar name: the node name to which this object refers
1294 @ivar volumes: a structure as returned from
1295 L{ganeti.backend.GetVolumeList} (runtime)
1296 @ivar instances: a list of running instances (runtime)
1297 @ivar pinst: list of configured primary instances (config)
1298 @ivar sinst: list of configured secondary instances (config)
1299 @ivar sbp: dictionary of {primary-node: list of instances} for all
1300 instances for which this node is secondary (config)
1301 @ivar mfree: free memory, as reported by hypervisor (runtime)
1302 @ivar dfree: free disk, as reported by the node (runtime)
1303 @ivar offline: the offline status (config)
1304 @type rpc_fail: boolean
1305 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1306 not whether the individual keys were correct) (runtime)
1307 @type lvm_fail: boolean
1308 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1309 @type hyp_fail: boolean
1310 @ivar hyp_fail: whether the RPC call didn't return the instance list
1311 @type ghost: boolean
1312 @ivar ghost: whether this is a known node or not (config)
1313 @type os_fail: boolean
1314 @ivar os_fail: whether the RPC call didn't return valid OS data
1316 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1317 @type vm_capable: boolean
1318 @ivar vm_capable: whether the node can host instances
1321 def __init__(self, offline=False, name=None, vm_capable=True):
1330 self.offline = offline
1331 self.vm_capable = vm_capable
1332 self.rpc_fail = False
1333 self.lvm_fail = False
1334 self.hyp_fail = False
1336 self.os_fail = False
1339 def ExpandNames(self):
1340 self.needed_locks = {
1341 locking.LEVEL_NODE: locking.ALL_SET,
1342 locking.LEVEL_INSTANCE: locking.ALL_SET,
1344 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1346 def _Error(self, ecode, item, msg, *args, **kwargs):
1347 """Format an error message.
1349 Based on the opcode's error_codes parameter, either format a
1350 parseable error code, or a simpler error string.
1352 This must be called only from Exec and functions called from Exec.
1355 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1357 # first complete the msg
1360 # then format the whole message
1361 if self.op.error_codes:
1362 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1368 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1369 # and finally report it via the feedback_fn
1370 self._feedback_fn(" - %s" % msg)
1372 def _ErrorIf(self, cond, *args, **kwargs):
1373 """Log an error message if the passed condition is True.
1376 cond = bool(cond) or self.op.debug_simulate_errors
1378 self._Error(*args, **kwargs)
1379 # do not mark the operation as failed for WARN cases only
1380 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1381 self.bad = self.bad or cond
1383 def _VerifyNode(self, ninfo, nresult):
1384 """Perform some basic validation on data returned from a node.
1386 - check the result data structure is well formed and has all the
1388 - check ganeti version
1390 @type ninfo: L{objects.Node}
1391 @param ninfo: the node to check
1392 @param nresult: the results from the node
1394 @return: whether overall this call was successful (and we can expect
1395 reasonable values in the respose)
1399 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1401 # main result, nresult should be a non-empty dict
1402 test = not nresult or not isinstance(nresult, dict)
1403 _ErrorIf(test, self.ENODERPC, node,
1404 "unable to verify node: no data returned")
1408 # compares ganeti version
1409 local_version = constants.PROTOCOL_VERSION
1410 remote_version = nresult.get("version", None)
1411 test = not (remote_version and
1412 isinstance(remote_version, (list, tuple)) and
1413 len(remote_version) == 2)
1414 _ErrorIf(test, self.ENODERPC, node,
1415 "connection to node returned invalid data")
1419 test = local_version != remote_version[0]
1420 _ErrorIf(test, self.ENODEVERSION, node,
1421 "incompatible protocol versions: master %s,"
1422 " node %s", local_version, remote_version[0])
1426 # node seems compatible, we can actually try to look into its results
1428 # full package version
1429 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1430 self.ENODEVERSION, node,
1431 "software version mismatch: master %s, node %s",
1432 constants.RELEASE_VERSION, remote_version[1],
1433 code=self.ETYPE_WARNING)
1435 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1436 if ninfo.vm_capable and isinstance(hyp_result, dict):
1437 for hv_name, hv_result in hyp_result.iteritems():
1438 test = hv_result is not None
1439 _ErrorIf(test, self.ENODEHV, node,
1440 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1442 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1443 if ninfo.vm_capable and isinstance(hvp_result, list):
1444 for item, hv_name, hv_result in hvp_result:
1445 _ErrorIf(True, self.ENODEHV, node,
1446 "hypervisor %s parameter verify failure (source %s): %s",
1447 hv_name, item, hv_result)
1449 test = nresult.get(constants.NV_NODESETUP,
1450 ["Missing NODESETUP results"])
1451 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1456 def _VerifyNodeTime(self, ninfo, nresult,
1457 nvinfo_starttime, nvinfo_endtime):
1458 """Check the node time.
1460 @type ninfo: L{objects.Node}
1461 @param ninfo: the node to check
1462 @param nresult: the remote results for the node
1463 @param nvinfo_starttime: the start time of the RPC call
1464 @param nvinfo_endtime: the end time of the RPC call
1468 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1470 ntime = nresult.get(constants.NV_TIME, None)
1472 ntime_merged = utils.MergeTime(ntime)
1473 except (ValueError, TypeError):
1474 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1477 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1478 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1479 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1480 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1484 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1485 "Node time diverges by at least %s from master node time",
1488 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1489 """Check the node time.
1491 @type ninfo: L{objects.Node}
1492 @param ninfo: the node to check
1493 @param nresult: the remote results for the node
1494 @param vg_name: the configured VG name
1501 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1503 # checks vg existence and size > 20G
1504 vglist = nresult.get(constants.NV_VGLIST, None)
1506 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1508 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1509 constants.MIN_VG_SIZE)
1510 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1513 pvlist = nresult.get(constants.NV_PVLIST, None)
1514 test = pvlist is None
1515 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1517 # check that ':' is not present in PV names, since it's a
1518 # special character for lvcreate (denotes the range of PEs to
1520 for _, pvname, owner_vg in pvlist:
1521 test = ":" in pvname
1522 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1523 " '%s' of VG '%s'", pvname, owner_vg)
1525 def _VerifyNodeNetwork(self, ninfo, nresult):
1526 """Check the node time.
1528 @type ninfo: L{objects.Node}
1529 @param ninfo: the node to check
1530 @param nresult: the remote results for the node
1534 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1536 test = constants.NV_NODELIST not in nresult
1537 _ErrorIf(test, self.ENODESSH, node,
1538 "node hasn't returned node ssh connectivity data")
1540 if nresult[constants.NV_NODELIST]:
1541 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1542 _ErrorIf(True, self.ENODESSH, node,
1543 "ssh communication with node '%s': %s", a_node, a_msg)
1545 test = constants.NV_NODENETTEST not in nresult
1546 _ErrorIf(test, self.ENODENET, node,
1547 "node hasn't returned node tcp connectivity data")
1549 if nresult[constants.NV_NODENETTEST]:
1550 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1552 _ErrorIf(True, self.ENODENET, node,
1553 "tcp communication with node '%s': %s",
1554 anode, nresult[constants.NV_NODENETTEST][anode])
1556 test = constants.NV_MASTERIP not in nresult
1557 _ErrorIf(test, self.ENODENET, node,
1558 "node hasn't returned node master IP reachability data")
1560 if not nresult[constants.NV_MASTERIP]:
1561 if node == self.master_node:
1562 msg = "the master node cannot reach the master IP (not configured?)"
1564 msg = "cannot reach the master IP"
1565 _ErrorIf(True, self.ENODENET, node, msg)
1567 def _VerifyInstance(self, instance, instanceconfig, node_image,
1569 """Verify an instance.
1571 This function checks to see if the required block devices are
1572 available on the instance's node.
1575 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1576 node_current = instanceconfig.primary_node
1578 node_vol_should = {}
1579 instanceconfig.MapLVsByNode(node_vol_should)
1581 for node in node_vol_should:
1582 n_img = node_image[node]
1583 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1584 # ignore missing volumes on offline or broken nodes
1586 for volume in node_vol_should[node]:
1587 test = volume not in n_img.volumes
1588 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1589 "volume %s missing on node %s", volume, node)
1591 if instanceconfig.admin_up:
1592 pri_img = node_image[node_current]
1593 test = instance not in pri_img.instances and not pri_img.offline
1594 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1595 "instance not running on its primary node %s",
1598 for node, n_img in node_image.items():
1599 if node != node_current:
1600 test = instance in n_img.instances
1601 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1602 "instance should not run on node %s", node)
1604 diskdata = [(nname, success, status, idx)
1605 for (nname, disks) in diskstatus.items()
1606 for idx, (success, status) in enumerate(disks)]
1608 for nname, success, bdev_status, idx in diskdata:
1609 # the 'ghost node' construction in Exec() ensures that we have a
1611 snode = node_image[nname]
1612 bad_snode = snode.ghost or snode.offline
1613 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1614 self.EINSTANCEFAULTYDISK, instance,
1615 "couldn't retrieve status for disk/%s on %s: %s",
1616 idx, nname, bdev_status)
1617 _ErrorIf((instanceconfig.admin_up and success and
1618 bdev_status.ldisk_status == constants.LDS_FAULTY),
1619 self.EINSTANCEFAULTYDISK, instance,
1620 "disk/%s on %s is faulty", idx, nname)
1622 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1623 """Verify if there are any unknown volumes in the cluster.
1625 The .os, .swap and backup volumes are ignored. All other volumes are
1626 reported as unknown.
1628 @type reserved: L{ganeti.utils.FieldSet}
1629 @param reserved: a FieldSet of reserved volume names
1632 for node, n_img in node_image.items():
1633 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1634 # skip non-healthy nodes
1636 for volume in n_img.volumes:
1637 test = ((node not in node_vol_should or
1638 volume not in node_vol_should[node]) and
1639 not reserved.Matches(volume))
1640 self._ErrorIf(test, self.ENODEORPHANLV, node,
1641 "volume %s is unknown", volume)
1643 def _VerifyOrphanInstances(self, instancelist, node_image):
1644 """Verify the list of running instances.
1646 This checks what instances are running but unknown to the cluster.
1649 for node, n_img in node_image.items():
1650 for o_inst in n_img.instances:
1651 test = o_inst not in instancelist
1652 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1653 "instance %s on node %s should not exist", o_inst, node)
1655 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1656 """Verify N+1 Memory Resilience.
1658 Check that if one single node dies we can still start all the
1659 instances it was primary for.
1662 cluster_info = self.cfg.GetClusterInfo()
1663 for node, n_img in node_image.items():
1664 # This code checks that every node which is now listed as
1665 # secondary has enough memory to host all instances it is
1666 # supposed to should a single other node in the cluster fail.
1667 # FIXME: not ready for failover to an arbitrary node
1668 # FIXME: does not support file-backed instances
1669 # WARNING: we currently take into account down instances as well
1670 # as up ones, considering that even if they're down someone
1671 # might want to start them even in the event of a node failure.
1673 # we're skipping offline nodes from the N+1 warning, since
1674 # most likely we don't have good memory infromation from them;
1675 # we already list instances living on such nodes, and that's
1678 for prinode, instances in n_img.sbp.items():
1680 for instance in instances:
1681 bep = cluster_info.FillBE(instance_cfg[instance])
1682 if bep[constants.BE_AUTO_BALANCE]:
1683 needed_mem += bep[constants.BE_MEMORY]
1684 test = n_img.mfree < needed_mem
1685 self._ErrorIf(test, self.ENODEN1, node,
1686 "not enough memory to accomodate instance failovers"
1687 " should node %s fail (%dMiB needed, %dMiB available)",
1688 prinode, needed_mem, n_img.mfree)
1691 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1692 (files_all, files_all_opt, files_mc, files_vm)):
1693 """Verifies file checksums collected from all nodes.
1695 @param errorif: Callback for reporting errors
1696 @param nodeinfo: List of L{objects.Node} objects
1697 @param master_node: Name of master node
1698 @param all_nvinfo: RPC results
1701 node_names = frozenset(node.name for node in nodeinfo)
1703 assert master_node in node_names
1704 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1705 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1706 "Found file listed in more than one file list"
1708 # Define functions determining which nodes to consider for a file
1709 file2nodefn = dict([(filename, fn)
1710 for (files, fn) in [(files_all, None),
1711 (files_all_opt, None),
1712 (files_mc, lambda node: (node.master_candidate or
1713 node.name == master_node)),
1714 (files_vm, lambda node: node.vm_capable)]
1715 for filename in files])
1717 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1719 for node in nodeinfo:
1720 nresult = all_nvinfo[node.name]
1722 if nresult.fail_msg or not nresult.payload:
1725 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1727 test = not (node_files and isinstance(node_files, dict))
1728 errorif(test, cls.ENODEFILECHECK, node.name,
1729 "Node did not return file checksum data")
1733 for (filename, checksum) in node_files.items():
1734 # Check if the file should be considered for a node
1735 fn = file2nodefn[filename]
1736 if fn is None or fn(node):
1737 fileinfo[filename].setdefault(checksum, set()).add(node.name)
1739 for (filename, checksums) in fileinfo.items():
1740 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1742 # Nodes having the file
1743 with_file = frozenset(node_name
1744 for nodes in fileinfo[filename].values()
1745 for node_name in nodes)
1747 # Nodes missing file
1748 missing_file = node_names - with_file
1750 if filename in files_all_opt:
1752 errorif(missing_file and missing_file != node_names,
1753 cls.ECLUSTERFILECHECK, None,
1754 "File %s is optional, but it must exist on all or no nodes (not"
1756 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1758 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1759 "File %s is missing from node(s) %s", filename,
1760 utils.CommaJoin(utils.NiceSort(missing_file)))
1762 # See if there are multiple versions of the file
1763 test = len(checksums) > 1
1765 variants = ["variant %s on %s" %
1766 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1767 for (idx, (checksum, nodes)) in
1768 enumerate(sorted(checksums.items()))]
1772 errorif(test, cls.ECLUSTERFILECHECK, None,
1773 "File %s found with %s different checksums (%s)",
1774 filename, len(checksums), "; ".join(variants))
1776 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1778 """Verifies and the node DRBD status.
1780 @type ninfo: L{objects.Node}
1781 @param ninfo: the node to check
1782 @param nresult: the remote results for the node
1783 @param instanceinfo: the dict of instances
1784 @param drbd_helper: the configured DRBD usermode helper
1785 @param drbd_map: the DRBD map as returned by
1786 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1790 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1793 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1794 test = (helper_result == None)
1795 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796 "no drbd usermode helper returned")
1798 status, payload = helper_result
1800 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1801 "drbd usermode helper check unsuccessful: %s", payload)
1802 test = status and (payload != drbd_helper)
1803 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1804 "wrong drbd usermode helper: %s", payload)
1806 # compute the DRBD minors
1808 for minor, instance in drbd_map[node].items():
1809 test = instance not in instanceinfo
1810 _ErrorIf(test, self.ECLUSTERCFG, None,
1811 "ghost instance '%s' in temporary DRBD map", instance)
1812 # ghost instance should not be running, but otherwise we
1813 # don't give double warnings (both ghost instance and
1814 # unallocated minor in use)
1816 node_drbd[minor] = (instance, False)
1818 instance = instanceinfo[instance]
1819 node_drbd[minor] = (instance.name, instance.admin_up)
1821 # and now check them
1822 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1823 test = not isinstance(used_minors, (tuple, list))
1824 _ErrorIf(test, self.ENODEDRBD, node,
1825 "cannot parse drbd status file: %s", str(used_minors))
1827 # we cannot check drbd status
1830 for minor, (iname, must_exist) in node_drbd.items():
1831 test = minor not in used_minors and must_exist
1832 _ErrorIf(test, self.ENODEDRBD, node,
1833 "drbd minor %d of instance %s is not active", minor, iname)
1834 for minor in used_minors:
1835 test = minor not in node_drbd
1836 _ErrorIf(test, self.ENODEDRBD, node,
1837 "unallocated drbd minor %d is in use", minor)
1839 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1840 """Builds the node OS structures.
1842 @type ninfo: L{objects.Node}
1843 @param ninfo: the node to check
1844 @param nresult: the remote results for the node
1845 @param nimg: the node image object
1849 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1851 remote_os = nresult.get(constants.NV_OSLIST, None)
1852 test = (not isinstance(remote_os, list) or
1853 not compat.all(isinstance(v, list) and len(v) == 7
1854 for v in remote_os))
1856 _ErrorIf(test, self.ENODEOS, node,
1857 "node hasn't returned valid OS data")
1866 for (name, os_path, status, diagnose,
1867 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1869 if name not in os_dict:
1872 # parameters is a list of lists instead of list of tuples due to
1873 # JSON lacking a real tuple type, fix it:
1874 parameters = [tuple(v) for v in parameters]
1875 os_dict[name].append((os_path, status, diagnose,
1876 set(variants), set(parameters), set(api_ver)))
1878 nimg.oslist = os_dict
1880 def _VerifyNodeOS(self, ninfo, nimg, base):
1881 """Verifies the node OS list.
1883 @type ninfo: L{objects.Node}
1884 @param ninfo: the node to check
1885 @param nimg: the node image object
1886 @param base: the 'template' node we match against (e.g. from the master)
1890 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1892 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1894 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1895 for os_name, os_data in nimg.oslist.items():
1896 assert os_data, "Empty OS status for OS %s?!" % os_name
1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898 _ErrorIf(not f_status, self.ENODEOS, node,
1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901 "OS '%s' has multiple entries (first one shadows the rest): %s",
1902 os_name, utils.CommaJoin([v[0] for v in os_data]))
1903 # this will catched in backend too
1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905 and not f_var, self.ENODEOS, node,
1906 "OS %s with API at least %d does not declare any variant",
1907 os_name, constants.OS_API_V15)
1908 # comparisons with the 'base' image
1909 test = os_name not in base.oslist
1910 _ErrorIf(test, self.ENODEOS, node,
1911 "Extra OS %s not present on reference node (%s)",
1915 assert base.oslist[os_name], "Base node has empty OS status?"
1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1918 # base OS is invalid, skipping
1920 for kind, a, b in [("API version", f_api, b_api),
1921 ("variants list", f_var, b_var),
1922 ("parameters", beautify_params(f_param),
1923 beautify_params(b_param))]:
1924 _ErrorIf(a != b, self.ENODEOS, node,
1925 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1926 kind, os_name, base.name,
1927 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1929 # check any missing OSes
1930 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1931 _ErrorIf(missing, self.ENODEOS, node,
1932 "OSes present on reference node %s but missing on this node: %s",
1933 base.name, utils.CommaJoin(missing))
1935 def _VerifyOob(self, ninfo, nresult):
1936 """Verifies out of band functionality of a node.
1938 @type ninfo: L{objects.Node}
1939 @param ninfo: the node to check
1940 @param nresult: the remote results for the node
1944 # We just have to verify the paths on master and/or master candidates
1945 # as the oob helper is invoked on the master
1946 if ((ninfo.master_candidate or ninfo.master_capable) and
1947 constants.NV_OOB_PATHS in nresult):
1948 for path_result in nresult[constants.NV_OOB_PATHS]:
1949 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1951 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1952 """Verifies and updates the node volume data.
1954 This function will update a L{NodeImage}'s internal structures
1955 with data from the remote call.
1957 @type ninfo: L{objects.Node}
1958 @param ninfo: the node to check
1959 @param nresult: the remote results for the node
1960 @param nimg: the node image object
1961 @param vg_name: the configured VG name
1965 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1967 nimg.lvm_fail = True
1968 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1971 elif isinstance(lvdata, basestring):
1972 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1973 utils.SafeEncode(lvdata))
1974 elif not isinstance(lvdata, dict):
1975 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1977 nimg.volumes = lvdata
1978 nimg.lvm_fail = False
1980 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1981 """Verifies and updates the node instance list.
1983 If the listing was successful, then updates this node's instance
1984 list. Otherwise, it marks the RPC call as failed for the instance
1987 @type ninfo: L{objects.Node}
1988 @param ninfo: the node to check
1989 @param nresult: the remote results for the node
1990 @param nimg: the node image object
1993 idata = nresult.get(constants.NV_INSTANCELIST, None)
1994 test = not isinstance(idata, list)
1995 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1996 " (instancelist): %s", utils.SafeEncode(str(idata)))
1998 nimg.hyp_fail = True
2000 nimg.instances = idata
2002 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2003 """Verifies and computes a node information map
2005 @type ninfo: L{objects.Node}
2006 @param ninfo: the node to check
2007 @param nresult: the remote results for the node
2008 @param nimg: the node image object
2009 @param vg_name: the configured VG name
2013 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2015 # try to read free memory (from the hypervisor)
2016 hv_info = nresult.get(constants.NV_HVINFO, None)
2017 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2018 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2021 nimg.mfree = int(hv_info["memory_free"])
2022 except (ValueError, TypeError):
2023 _ErrorIf(True, self.ENODERPC, node,
2024 "node returned invalid nodeinfo, check hypervisor")
2026 # FIXME: devise a free space model for file based instances as well
2027 if vg_name is not None:
2028 test = (constants.NV_VGLIST not in nresult or
2029 vg_name not in nresult[constants.NV_VGLIST])
2030 _ErrorIf(test, self.ENODELVM, node,
2031 "node didn't return data for the volume group '%s'"
2032 " - it is either missing or broken", vg_name)
2035 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2036 except (ValueError, TypeError):
2037 _ErrorIf(True, self.ENODERPC, node,
2038 "node returned invalid LVM info, check LVM status")
2040 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2041 """Gets per-disk status information for all instances.
2043 @type nodelist: list of strings
2044 @param nodelist: Node names
2045 @type node_image: dict of (name, L{objects.Node})
2046 @param node_image: Node objects
2047 @type instanceinfo: dict of (name, L{objects.Instance})
2048 @param instanceinfo: Instance objects
2049 @rtype: {instance: {node: [(succes, payload)]}}
2050 @return: a dictionary of per-instance dictionaries with nodes as
2051 keys and disk information as values; the disk information is a
2052 list of tuples (success, payload)
2055 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2058 node_disks_devonly = {}
2059 diskless_instances = set()
2060 diskless = constants.DT_DISKLESS
2062 for nname in nodelist:
2063 node_instances = list(itertools.chain(node_image[nname].pinst,
2064 node_image[nname].sinst))
2065 diskless_instances.update(inst for inst in node_instances
2066 if instanceinfo[inst].disk_template == diskless)
2067 disks = [(inst, disk)
2068 for inst in node_instances
2069 for disk in instanceinfo[inst].disks]
2072 # No need to collect data
2075 node_disks[nname] = disks
2077 # Creating copies as SetDiskID below will modify the objects and that can
2078 # lead to incorrect data returned from nodes
2079 devonly = [dev.Copy() for (_, dev) in disks]
2082 self.cfg.SetDiskID(dev, nname)
2084 node_disks_devonly[nname] = devonly
2086 assert len(node_disks) == len(node_disks_devonly)
2088 # Collect data from all nodes with disks
2089 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2092 assert len(result) == len(node_disks)
2096 for (nname, nres) in result.items():
2097 disks = node_disks[nname]
2100 # No data from this node
2101 data = len(disks) * [(False, "node offline")]
2104 _ErrorIf(msg, self.ENODERPC, nname,
2105 "while getting disk information: %s", msg)
2107 # No data from this node
2108 data = len(disks) * [(False, msg)]
2111 for idx, i in enumerate(nres.payload):
2112 if isinstance(i, (tuple, list)) and len(i) == 2:
2115 logging.warning("Invalid result from node %s, entry %d: %s",
2117 data.append((False, "Invalid result from the remote node"))
2119 for ((inst, _), status) in zip(disks, data):
2120 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2122 # Add empty entries for diskless instances.
2123 for inst in diskless_instances:
2124 assert inst not in instdisk
2127 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2128 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2129 compat.all(isinstance(s, (tuple, list)) and
2130 len(s) == 2 for s in statuses)
2131 for inst, nnames in instdisk.items()
2132 for nname, statuses in nnames.items())
2133 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2137 def _VerifyHVP(self, hvp_data):
2138 """Verifies locally the syntax of the hypervisor parameters.
2141 for item, hv_name, hv_params in hvp_data:
2142 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2145 hv_class = hypervisor.GetHypervisor(hv_name)
2146 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2147 hv_class.CheckParameterSyntax(hv_params)
2148 except errors.GenericError, err:
2149 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2151 def BuildHooksEnv(self):
2154 Cluster-Verify hooks just ran in the post phase and their failure makes
2155 the output be logged in the verify output and the verification to fail.
2161 "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2164 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2165 for node in cfg.GetAllNodesInfo().values())
2169 def BuildHooksNodes(self):
2170 """Build hooks nodes.
2173 return ([], self.cfg.GetNodeList())
2175 def Exec(self, feedback_fn):
2176 """Verify integrity of cluster, performing various test on nodes.
2179 # This method has too many local variables. pylint: disable-msg=R0914
2181 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2182 verbose = self.op.verbose
2183 self._feedback_fn = feedback_fn
2184 feedback_fn("* Verifying global settings")
2185 for msg in self.cfg.VerifyConfig():
2186 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2188 # Check the cluster certificates
2189 for cert_filename in constants.ALL_CERT_FILES:
2190 (errcode, msg) = _VerifyCertificate(cert_filename)
2191 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2193 vg_name = self.cfg.GetVGName()
2194 drbd_helper = self.cfg.GetDRBDHelper()
2195 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2196 cluster = self.cfg.GetClusterInfo()
2197 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2198 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2199 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2200 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2201 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2202 for iname in instancelist)
2203 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2204 i_non_redundant = [] # Non redundant instances
2205 i_non_a_balanced = [] # Non auto-balanced instances
2206 n_offline = 0 # Count of offline nodes
2207 n_drained = 0 # Count of nodes being drained
2208 node_vol_should = {}
2210 # FIXME: verify OS list
2213 filemap = _ComputeAncillaryFiles(cluster, False)
2215 # do local checksums
2216 master_node = self.master_node = self.cfg.GetMasterNode()
2217 master_ip = self.cfg.GetMasterIP()
2219 # Compute the set of hypervisor parameters
2221 for hv_name in hypervisors:
2222 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2223 for os_name, os_hvp in cluster.os_hvp.items():
2224 for hv_name, hv_params in os_hvp.items():
2227 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2228 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2229 # TODO: collapse identical parameter values in a single one
2230 for instance in instanceinfo.values():
2231 if not instance.hvparams:
2233 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2234 cluster.FillHV(instance)))
2235 # and verify them locally
2236 self._VerifyHVP(hvp_data)
2238 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2239 node_verify_param = {
2240 constants.NV_FILELIST:
2241 utils.UniqueSequence(filename
2242 for files in filemap
2243 for filename in files),
2244 constants.NV_NODELIST: [node.name for node in nodeinfo
2245 if not node.offline],
2246 constants.NV_HYPERVISOR: hypervisors,
2247 constants.NV_HVPARAMS: hvp_data,
2248 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2249 node.secondary_ip) for node in nodeinfo
2250 if not node.offline],
2251 constants.NV_INSTANCELIST: hypervisors,
2252 constants.NV_VERSION: None,
2253 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2254 constants.NV_NODESETUP: None,
2255 constants.NV_TIME: None,
2256 constants.NV_MASTERIP: (master_node, master_ip),
2257 constants.NV_OSLIST: None,
2258 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2261 if vg_name is not None:
2262 node_verify_param[constants.NV_VGLIST] = None
2263 node_verify_param[constants.NV_LVLIST] = vg_name
2264 node_verify_param[constants.NV_PVLIST] = [vg_name]
2265 node_verify_param[constants.NV_DRBDLIST] = None
2268 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2270 # Build our expected cluster state
2271 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2273 vm_capable=node.vm_capable))
2274 for node in nodeinfo)
2278 for node in nodeinfo:
2279 path = _SupportsOob(self.cfg, node)
2280 if path and path not in oob_paths:
2281 oob_paths.append(path)
2284 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2286 for instance in instancelist:
2287 inst_config = instanceinfo[instance]
2289 for nname in inst_config.all_nodes:
2290 if nname not in node_image:
2292 gnode = self.NodeImage(name=nname)
2294 node_image[nname] = gnode
2296 inst_config.MapLVsByNode(node_vol_should)
2298 pnode = inst_config.primary_node
2299 node_image[pnode].pinst.append(instance)
2301 for snode in inst_config.secondary_nodes:
2302 nimg = node_image[snode]
2303 nimg.sinst.append(instance)
2304 if pnode not in nimg.sbp:
2305 nimg.sbp[pnode] = []
2306 nimg.sbp[pnode].append(instance)
2308 # At this point, we have the in-memory data structures complete,
2309 # except for the runtime information, which we'll gather next
2311 # Due to the way our RPC system works, exact response times cannot be
2312 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2313 # time before and after executing the request, we can at least have a time
2315 nvinfo_starttime = time.time()
2316 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2317 self.cfg.GetClusterName())
2318 nvinfo_endtime = time.time()
2320 all_drbd_map = self.cfg.ComputeDRBDMap()
2322 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2323 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2325 feedback_fn("* Verifying configuration file consistency")
2326 self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2328 feedback_fn("* Verifying node status")
2332 for node_i in nodeinfo:
2334 nimg = node_image[node]
2338 feedback_fn("* Skipping offline node %s" % (node,))
2342 if node == master_node:
2344 elif node_i.master_candidate:
2345 ntype = "master candidate"
2346 elif node_i.drained:
2352 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2354 msg = all_nvinfo[node].fail_msg
2355 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2357 nimg.rpc_fail = True
2360 nresult = all_nvinfo[node].payload
2362 nimg.call_ok = self._VerifyNode(node_i, nresult)
2363 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2364 self._VerifyNodeNetwork(node_i, nresult)
2365 self._VerifyOob(node_i, nresult)
2368 self._VerifyNodeLVM(node_i, nresult, vg_name)
2369 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2372 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2373 self._UpdateNodeInstances(node_i, nresult, nimg)
2374 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2375 self._UpdateNodeOS(node_i, nresult, nimg)
2376 if not nimg.os_fail:
2377 if refos_img is None:
2379 self._VerifyNodeOS(node_i, nimg, refos_img)
2381 feedback_fn("* Verifying instance status")
2382 for instance in instancelist:
2384 feedback_fn("* Verifying instance %s" % instance)
2385 inst_config = instanceinfo[instance]
2386 self._VerifyInstance(instance, inst_config, node_image,
2388 inst_nodes_offline = []
2390 pnode = inst_config.primary_node
2391 pnode_img = node_image[pnode]
2392 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2393 self.ENODERPC, pnode, "instance %s, connection to"
2394 " primary node failed", instance)
2396 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2397 self.EINSTANCEBADNODE, instance,
2398 "instance is marked as running and lives on offline node %s",
2399 inst_config.primary_node)
2401 # If the instance is non-redundant we cannot survive losing its primary
2402 # node, so we are not N+1 compliant. On the other hand we have no disk
2403 # templates with more than one secondary so that situation is not well
2405 # FIXME: does not support file-backed instances
2406 if not inst_config.secondary_nodes:
2407 i_non_redundant.append(instance)
2409 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2410 instance, "instance has multiple secondary nodes: %s",
2411 utils.CommaJoin(inst_config.secondary_nodes),
2412 code=self.ETYPE_WARNING)
2414 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2415 pnode = inst_config.primary_node
2416 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2417 instance_groups = {}
2419 for node in instance_nodes:
2420 instance_groups.setdefault(nodeinfo_byname[node].group,
2424 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2425 # Sort so that we always list the primary node first.
2426 for group, nodes in sorted(instance_groups.items(),
2427 key=lambda (_, nodes): pnode in nodes,
2430 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2431 instance, "instance has primary and secondary nodes in"
2432 " different groups: %s", utils.CommaJoin(pretty_list),
2433 code=self.ETYPE_WARNING)
2435 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2436 i_non_a_balanced.append(instance)
2438 for snode in inst_config.secondary_nodes:
2439 s_img = node_image[snode]
2440 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2441 "instance %s, connection to secondary node failed", instance)
2444 inst_nodes_offline.append(snode)
2446 # warn that the instance lives on offline nodes
2447 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2448 "instance has offline secondary node(s) %s",
2449 utils.CommaJoin(inst_nodes_offline))
2450 # ... or ghost/non-vm_capable nodes
2451 for node in inst_config.all_nodes:
2452 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2453 "instance lives on ghost node %s", node)
2454 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2455 instance, "instance lives on non-vm_capable node %s", node)
2457 feedback_fn("* Verifying orphan volumes")
2458 reserved = utils.FieldSet(*cluster.reserved_lvs)
2459 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2461 feedback_fn("* Verifying orphan instances")
2462 self._VerifyOrphanInstances(instancelist, node_image)
2464 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2465 feedback_fn("* Verifying N+1 Memory redundancy")
2466 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2468 feedback_fn("* Other Notes")
2470 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2471 % len(i_non_redundant))
2473 if i_non_a_balanced:
2474 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2475 % len(i_non_a_balanced))
2478 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2481 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2485 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2486 """Analyze the post-hooks' result
2488 This method analyses the hook result, handles it, and sends some
2489 nicely-formatted feedback back to the user.
2491 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2492 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2493 @param hooks_results: the results of the multi-node hooks rpc call
2494 @param feedback_fn: function used send feedback back to the caller
2495 @param lu_result: previous Exec result
2496 @return: the new Exec result, based on the previous result
2500 # We only really run POST phase hooks, and are only interested in
2502 if phase == constants.HOOKS_PHASE_POST:
2503 # Used to change hooks' output to proper indentation
2504 feedback_fn("* Hooks Results")
2505 assert hooks_results, "invalid result from hooks"
2507 for node_name in hooks_results:
2508 res = hooks_results[node_name]
2510 test = msg and not res.offline
2511 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2512 "Communication failure in hooks execution: %s", msg)
2513 if res.offline or msg:
2514 # No need to investigate payload if node is offline or gave an error.
2515 # override manually lu_result here as _ErrorIf only
2516 # overrides self.bad
2519 for script, hkr, output in res.payload:
2520 test = hkr == constants.HKR_FAIL
2521 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2522 "Script %s failed, output:", script)
2524 output = self._HOOKS_INDENT_RE.sub(' ', output)
2525 feedback_fn("%s" % output)
2531 class LUClusterVerifyDisks(NoHooksLU):
2532 """Verifies the cluster disks status.
2537 def ExpandNames(self):
2538 self.needed_locks = {
2539 locking.LEVEL_NODE: locking.ALL_SET,
2540 locking.LEVEL_INSTANCE: locking.ALL_SET,
2542 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2544 def Exec(self, feedback_fn):
2545 """Verify integrity of cluster disks.
2547 @rtype: tuple of three items
2548 @return: a tuple of (dict of node-to-node_error, list of instances
2549 which need activate-disks, dict of instance: (node, volume) for
2553 result = res_nodes, res_instances, res_missing = {}, [], {}
2555 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2556 instances = self.cfg.GetAllInstancesInfo().values()
2559 for inst in instances:
2561 if not inst.admin_up:
2563 inst.MapLVsByNode(inst_lvs)
2564 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2565 for node, vol_list in inst_lvs.iteritems():
2566 for vol in vol_list:
2567 nv_dict[(node, vol)] = inst
2572 node_lvs = self.rpc.call_lv_list(nodes, [])
2573 for node, node_res in node_lvs.items():
2574 if node_res.offline:
2576 msg = node_res.fail_msg
2578 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2579 res_nodes[node] = msg
2582 lvs = node_res.payload
2583 for lv_name, (_, _, lv_online) in lvs.items():
2584 inst = nv_dict.pop((node, lv_name), None)
2585 if (not lv_online and inst is not None
2586 and inst.name not in res_instances):
2587 res_instances.append(inst.name)
2589 # any leftover items in nv_dict are missing LVs, let's arrange the
2591 for key, inst in nv_dict.iteritems():
2592 if inst.name not in res_missing:
2593 res_missing[inst.name] = []
2594 res_missing[inst.name].append(key)
2599 class LUClusterRepairDiskSizes(NoHooksLU):
2600 """Verifies the cluster disks sizes.
2605 def ExpandNames(self):
2606 if self.op.instances:
2607 self.wanted_names = []
2608 for name in self.op.instances:
2609 full_name = _ExpandInstanceName(self.cfg, name)
2610 self.wanted_names.append(full_name)
2611 self.needed_locks = {
2612 locking.LEVEL_NODE: [],
2613 locking.LEVEL_INSTANCE: self.wanted_names,
2615 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2617 self.wanted_names = None
2618 self.needed_locks = {
2619 locking.LEVEL_NODE: locking.ALL_SET,
2620 locking.LEVEL_INSTANCE: locking.ALL_SET,
2622 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2624 def DeclareLocks(self, level):
2625 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2626 self._LockInstancesNodes(primary_only=True)
2628 def CheckPrereq(self):
2629 """Check prerequisites.
2631 This only checks the optional instance list against the existing names.
2634 if self.wanted_names is None:
2635 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2637 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2638 in self.wanted_names]
2640 def _EnsureChildSizes(self, disk):
2641 """Ensure children of the disk have the needed disk size.
2643 This is valid mainly for DRBD8 and fixes an issue where the
2644 children have smaller disk size.
2646 @param disk: an L{ganeti.objects.Disk} object
2649 if disk.dev_type == constants.LD_DRBD8:
2650 assert disk.children, "Empty children for DRBD8?"
2651 fchild = disk.children[0]
2652 mismatch = fchild.size < disk.size
2654 self.LogInfo("Child disk has size %d, parent %d, fixing",
2655 fchild.size, disk.size)
2656 fchild.size = disk.size
2658 # and we recurse on this child only, not on the metadev
2659 return self._EnsureChildSizes(fchild) or mismatch
2663 def Exec(self, feedback_fn):
2664 """Verify the size of cluster disks.
2667 # TODO: check child disks too
2668 # TODO: check differences in size between primary/secondary nodes
2670 for instance in self.wanted_instances:
2671 pnode = instance.primary_node
2672 if pnode not in per_node_disks:
2673 per_node_disks[pnode] = []
2674 for idx, disk in enumerate(instance.disks):
2675 per_node_disks[pnode].append((instance, idx, disk))
2678 for node, dskl in per_node_disks.items():
2679 newl = [v[2].Copy() for v in dskl]
2681 self.cfg.SetDiskID(dsk, node)
2682 result = self.rpc.call_blockdev_getsize(node, newl)
2684 self.LogWarning("Failure in blockdev_getsize call to node"
2685 " %s, ignoring", node)
2687 if len(result.payload) != len(dskl):
2688 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2689 " result.payload=%s", node, len(dskl), result.payload)
2690 self.LogWarning("Invalid result from node %s, ignoring node results",
2693 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2695 self.LogWarning("Disk %d of instance %s did not return size"
2696 " information, ignoring", idx, instance.name)
2698 if not isinstance(size, (int, long)):
2699 self.LogWarning("Disk %d of instance %s did not return valid"
2700 " size information, ignoring", idx, instance.name)
2703 if size != disk.size:
2704 self.LogInfo("Disk %d of instance %s has mismatched size,"
2705 " correcting: recorded %d, actual %d", idx,
2706 instance.name, disk.size, size)
2708 self.cfg.Update(instance, feedback_fn)
2709 changed.append((instance.name, idx, size))
2710 if self._EnsureChildSizes(disk):
2711 self.cfg.Update(instance, feedback_fn)
2712 changed.append((instance.name, idx, disk.size))
2716 class LUClusterRename(LogicalUnit):
2717 """Rename the cluster.
2720 HPATH = "cluster-rename"
2721 HTYPE = constants.HTYPE_CLUSTER
2723 def BuildHooksEnv(self):
2728 "OP_TARGET": self.cfg.GetClusterName(),
2729 "NEW_NAME": self.op.name,
2732 def BuildHooksNodes(self):
2733 """Build hooks nodes.
2736 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2738 def CheckPrereq(self):
2739 """Verify that the passed name is a valid one.
2742 hostname = netutils.GetHostname(name=self.op.name,
2743 family=self.cfg.GetPrimaryIPFamily())
2745 new_name = hostname.name
2746 self.ip = new_ip = hostname.ip
2747 old_name = self.cfg.GetClusterName()
2748 old_ip = self.cfg.GetMasterIP()
2749 if new_name == old_name and new_ip == old_ip:
2750 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2751 " cluster has changed",
2753 if new_ip != old_ip:
2754 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2755 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2756 " reachable on the network" %
2757 new_ip, errors.ECODE_NOTUNIQUE)
2759 self.op.name = new_name
2761 def Exec(self, feedback_fn):
2762 """Rename the cluster.
2765 clustername = self.op.name
2768 # shutdown the master IP
2769 master = self.cfg.GetMasterNode()
2770 result = self.rpc.call_node_stop_master(master, False)
2771 result.Raise("Could not disable the master role")
2774 cluster = self.cfg.GetClusterInfo()
2775 cluster.cluster_name = clustername
2776 cluster.master_ip = ip
2777 self.cfg.Update(cluster, feedback_fn)
2779 # update the known hosts file
2780 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2781 node_list = self.cfg.GetOnlineNodeList()
2783 node_list.remove(master)
2786 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2788 result = self.rpc.call_node_start_master(master, False, False)
2789 msg = result.fail_msg
2791 self.LogWarning("Could not re-enable the master role on"
2792 " the master, please restart manually: %s", msg)
2797 class LUClusterSetParams(LogicalUnit):
2798 """Change the parameters of the cluster.
2801 HPATH = "cluster-modify"
2802 HTYPE = constants.HTYPE_CLUSTER
2805 def CheckArguments(self):
2809 if self.op.uid_pool:
2810 uidpool.CheckUidPool(self.op.uid_pool)
2812 if self.op.add_uids:
2813 uidpool.CheckUidPool(self.op.add_uids)
2815 if self.op.remove_uids:
2816 uidpool.CheckUidPool(self.op.remove_uids)
2818 def ExpandNames(self):
2819 # FIXME: in the future maybe other cluster params won't require checking on
2820 # all nodes to be modified.
2821 self.needed_locks = {
2822 locking.LEVEL_NODE: locking.ALL_SET,
2824 self.share_locks[locking.LEVEL_NODE] = 1
2826 def BuildHooksEnv(self):
2831 "OP_TARGET": self.cfg.GetClusterName(),
2832 "NEW_VG_NAME": self.op.vg_name,
2835 def BuildHooksNodes(self):
2836 """Build hooks nodes.
2839 mn = self.cfg.GetMasterNode()
2842 def CheckPrereq(self):
2843 """Check prerequisites.
2845 This checks whether the given params don't conflict and
2846 if the given volume group is valid.
2849 if self.op.vg_name is not None and not self.op.vg_name:
2850 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2851 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2852 " instances exist", errors.ECODE_INVAL)
2854 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2855 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2856 raise errors.OpPrereqError("Cannot disable drbd helper while"
2857 " drbd-based instances exist",
2860 node_list = self.acquired_locks[locking.LEVEL_NODE]
2862 # if vg_name not None, checks given volume group on all nodes
2864 vglist = self.rpc.call_vg_list(node_list)
2865 for node in node_list:
2866 msg = vglist[node].fail_msg
2868 # ignoring down node
2869 self.LogWarning("Error while gathering data on node %s"
2870 " (ignoring node): %s", node, msg)
2872 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2874 constants.MIN_VG_SIZE)
2876 raise errors.OpPrereqError("Error on node '%s': %s" %
2877 (node, vgstatus), errors.ECODE_ENVIRON)
2879 if self.op.drbd_helper:
2880 # checks given drbd helper on all nodes
2881 helpers = self.rpc.call_drbd_helper(node_list)
2882 for node in node_list:
2883 ninfo = self.cfg.GetNodeInfo(node)
2885 self.LogInfo("Not checking drbd helper on offline node %s", node)
2887 msg = helpers[node].fail_msg
2889 raise errors.OpPrereqError("Error checking drbd helper on node"
2890 " '%s': %s" % (node, msg),
2891 errors.ECODE_ENVIRON)
2892 node_helper = helpers[node].payload
2893 if node_helper != self.op.drbd_helper:
2894 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2895 (node, node_helper), errors.ECODE_ENVIRON)
2897 self.cluster = cluster = self.cfg.GetClusterInfo()
2898 # validate params changes
2899 if self.op.beparams:
2900 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2901 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2903 if self.op.ndparams:
2904 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2905 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2907 # TODO: we need a more general way to handle resetting
2908 # cluster-level parameters to default values
2909 if self.new_ndparams["oob_program"] == "":
2910 self.new_ndparams["oob_program"] = \
2911 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2913 if self.op.nicparams:
2914 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2915 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2916 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2919 # check all instances for consistency
2920 for instance in self.cfg.GetAllInstancesInfo().values():
2921 for nic_idx, nic in enumerate(instance.nics):
2922 params_copy = copy.deepcopy(nic.nicparams)
2923 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2925 # check parameter syntax
2927 objects.NIC.CheckParameterSyntax(params_filled)
2928 except errors.ConfigurationError, err:
2929 nic_errors.append("Instance %s, nic/%d: %s" %
2930 (instance.name, nic_idx, err))
2932 # if we're moving instances to routed, check that they have an ip
2933 target_mode = params_filled[constants.NIC_MODE]
2934 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2935 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2936 (instance.name, nic_idx))
2938 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2939 "\n".join(nic_errors))
2941 # hypervisor list/parameters
2942 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2943 if self.op.hvparams:
2944 for hv_name, hv_dict in self.op.hvparams.items():
2945 if hv_name not in self.new_hvparams:
2946 self.new_hvparams[hv_name] = hv_dict
2948 self.new_hvparams[hv_name].update(hv_dict)
2950 # os hypervisor parameters
2951 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2953 for os_name, hvs in self.op.os_hvp.items():
2954 if os_name not in self.new_os_hvp:
2955 self.new_os_hvp[os_name] = hvs
2957 for hv_name, hv_dict in hvs.items():
2958 if hv_name not in self.new_os_hvp[os_name]:
2959 self.new_os_hvp[os_name][hv_name] = hv_dict
2961 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2964 self.new_osp = objects.FillDict(cluster.osparams, {})
2965 if self.op.osparams:
2966 for os_name, osp in self.op.osparams.items():
2967 if os_name not in self.new_osp:
2968 self.new_osp[os_name] = {}
2970 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2973 if not self.new_osp[os_name]:
2974 # we removed all parameters
2975 del self.new_osp[os_name]
2977 # check the parameter validity (remote check)
2978 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2979 os_name, self.new_osp[os_name])
2981 # changes to the hypervisor list
2982 if self.op.enabled_hypervisors is not None:
2983 self.hv_list = self.op.enabled_hypervisors
2984 for hv in self.hv_list:
2985 # if the hypervisor doesn't already exist in the cluster
2986 # hvparams, we initialize it to empty, and then (in both
2987 # cases) we make sure to fill the defaults, as we might not
2988 # have a complete defaults list if the hypervisor wasn't
2990 if hv not in new_hvp:
2992 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2993 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2995 self.hv_list = cluster.enabled_hypervisors
2997 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2998 # either the enabled list has changed, or the parameters have, validate
2999 for hv_name, hv_params in self.new_hvparams.items():
3000 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3001 (self.op.enabled_hypervisors and
3002 hv_name in self.op.enabled_hypervisors)):
3003 # either this is a new hypervisor, or its parameters have changed
3004 hv_class = hypervisor.GetHypervisor(hv_name)
3005 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3006 hv_class.CheckParameterSyntax(hv_params)
3007 _CheckHVParams(self, node_list, hv_name, hv_params)
3010 # no need to check any newly-enabled hypervisors, since the
3011 # defaults have already been checked in the above code-block
3012 for os_name, os_hvp in self.new_os_hvp.items():
3013 for hv_name, hv_params in os_hvp.items():
3014 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3015 # we need to fill in the new os_hvp on top of the actual hv_p
3016 cluster_defaults = self.new_hvparams.get(hv_name, {})
3017 new_osp = objects.FillDict(cluster_defaults, hv_params)
3018 hv_class = hypervisor.GetHypervisor(hv_name)
3019 hv_class.CheckParameterSyntax(new_osp)
3020 _CheckHVParams(self, node_list, hv_name, new_osp)
3022 if self.op.default_iallocator:
3023 alloc_script = utils.FindFile(self.op.default_iallocator,
3024 constants.IALLOCATOR_SEARCH_PATH,
3026 if alloc_script is None:
3027 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3028 " specified" % self.op.default_iallocator,
3031 def Exec(self, feedback_fn):
3032 """Change the parameters of the cluster.
3035 if self.op.vg_name is not None:
3036 new_volume = self.op.vg_name
3039 if new_volume != self.cfg.GetVGName():
3040 self.cfg.SetVGName(new_volume)
3042 feedback_fn("Cluster LVM configuration already in desired"
3043 " state, not changing")
3044 if self.op.drbd_helper is not None:
3045 new_helper = self.op.drbd_helper
3048 if new_helper != self.cfg.GetDRBDHelper():
3049 self.cfg.SetDRBDHelper(new_helper)
3051 feedback_fn("Cluster DRBD helper already in desired state,"
3053 if self.op.hvparams:
3054 self.cluster.hvparams = self.new_hvparams
3056 self.cluster.os_hvp = self.new_os_hvp
3057 if self.op.enabled_hypervisors is not None:
3058 self.cluster.hvparams = self.new_hvparams
3059 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3060 if self.op.beparams:
3061 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3062 if self.op.nicparams:
3063 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3064 if self.op.osparams:
3065 self.cluster.osparams = self.new_osp
3066 if self.op.ndparams:
3067 self.cluster.ndparams = self.new_ndparams
3069 if self.op.candidate_pool_size is not None:
3070 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3071 # we need to update the pool size here, otherwise the save will fail
3072 _AdjustCandidatePool(self, [])
3074 if self.op.maintain_node_health is not None:
3075 self.cluster.maintain_node_health = self.op.maintain_node_health
3077 if self.op.prealloc_wipe_disks is not None:
3078 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3080 if self.op.add_uids is not None:
3081 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3083 if self.op.remove_uids is not None:
3084 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3086 if self.op.uid_pool is not None:
3087 self.cluster.uid_pool = self.op.uid_pool
3089 if self.op.default_iallocator is not None:
3090 self.cluster.default_iallocator = self.op.default_iallocator
3092 if self.op.reserved_lvs is not None:
3093 self.cluster.reserved_lvs = self.op.reserved_lvs
3095 def helper_os(aname, mods, desc):
3097 lst = getattr(self.cluster, aname)
3098 for key, val in mods:
3099 if key == constants.DDM_ADD:
3101 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3104 elif key == constants.DDM_REMOVE:
3108 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3110 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3112 if self.op.hidden_os:
3113 helper_os("hidden_os", self.op.hidden_os, "hidden")
3115 if self.op.blacklisted_os:
3116 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3118 if self.op.master_netdev:
3119 master = self.cfg.GetMasterNode()
3120 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3121 self.cluster.master_netdev)
3122 result = self.rpc.call_node_stop_master(master, False)
3123 result.Raise("Could not disable the master ip")
3124 feedback_fn("Changing master_netdev from %s to %s" %
3125 (self.cluster.master_netdev, self.op.master_netdev))
3126 self.cluster.master_netdev = self.op.master_netdev
3128 self.cfg.Update(self.cluster, feedback_fn)
3130 if self.op.master_netdev:
3131 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3132 self.op.master_netdev)
3133 result = self.rpc.call_node_start_master(master, False, False)
3135 self.LogWarning("Could not re-enable the master ip on"
3136 " the master, please restart manually: %s",
3140 def _UploadHelper(lu, nodes, fname):
3141 """Helper for uploading a file and showing warnings.
3144 if os.path.exists(fname):
3145 result = lu.rpc.call_upload_file(nodes, fname)
3146 for to_node, to_result in result.items():
3147 msg = to_result.fail_msg
3149 msg = ("Copy of file %s to node %s failed: %s" %
3150 (fname, to_node, msg))
3151 lu.proc.LogWarning(msg)
3154 def _ComputeAncillaryFiles(cluster, redist):
3155 """Compute files external to Ganeti which need to be consistent.
3157 @type redist: boolean
3158 @param redist: Whether to include files which need to be redistributed
3161 # Compute files for all nodes
3163 constants.SSH_KNOWN_HOSTS_FILE,
3164 constants.CONFD_HMAC_KEY,
3165 constants.CLUSTER_DOMAIN_SECRET_FILE,
3169 files_all.update(constants.ALL_CERT_FILES)
3170 files_all.update(ssconf.SimpleStore().GetFileList())
3172 if cluster.modify_etc_hosts:
3173 files_all.add(constants.ETC_HOSTS)
3175 # Files which must either exist on all nodes or on none
3176 files_all_opt = set([
3177 constants.RAPI_USERS_FILE,
3180 # Files which should only be on master candidates
3183 files_mc.add(constants.CLUSTER_CONF_FILE)
3185 # Files which should only be on VM-capable nodes
3186 files_vm = set(filename
3187 for hv_name in cluster.enabled_hypervisors
3188 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3190 # Filenames must be unique
3191 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3192 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3193 "Found file listed in more than one file list"
3195 return (files_all, files_all_opt, files_mc, files_vm)
3198 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3199 """Distribute additional files which are part of the cluster configuration.
3201 ConfigWriter takes care of distributing the config and ssconf files, but
3202 there are more files which should be distributed to all nodes. This function
3203 makes sure those are copied.
3205 @param lu: calling logical unit
3206 @param additional_nodes: list of nodes not in the config to distribute to
3207 @type additional_vm: boolean
3208 @param additional_vm: whether the additional nodes are vm-capable or not
3211 # Gather target nodes
3212 cluster = lu.cfg.GetClusterInfo()
3213 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3215 online_nodes = lu.cfg.GetOnlineNodeList()
3216 vm_nodes = lu.cfg.GetVmCapableNodeList()
3218 if additional_nodes is not None:
3219 online_nodes.extend(additional_nodes)
3221 vm_nodes.extend(additional_nodes)
3223 # Never distribute to master node
3224 for nodelist in [online_nodes, vm_nodes]:
3225 if master_info.name in nodelist:
3226 nodelist.remove(master_info.name)
3229 (files_all, files_all_opt, files_mc, files_vm) = \
3230 _ComputeAncillaryFiles(cluster, True)
3232 # Never re-distribute configuration file from here
3233 assert not (constants.CLUSTER_CONF_FILE in files_all or
3234 constants.CLUSTER_CONF_FILE in files_vm)
3235 assert not files_mc, "Master candidates not handled in this function"
3238 (online_nodes, files_all),
3239 (online_nodes, files_all_opt),
3240 (vm_nodes, files_vm),
3244 for (node_list, files) in filemap:
3246 _UploadHelper(lu, node_list, fname)
3249 class LUClusterRedistConf(NoHooksLU):
3250 """Force the redistribution of cluster configuration.
3252 This is a very simple LU.
3257 def ExpandNames(self):
3258 self.needed_locks = {
3259 locking.LEVEL_NODE: locking.ALL_SET,
3261 self.share_locks[locking.LEVEL_NODE] = 1
3263 def Exec(self, feedback_fn):
3264 """Redistribute the configuration.
3267 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3268 _RedistributeAncillaryFiles(self)
3271 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3272 """Sleep and poll for an instance's disk to sync.
3275 if not instance.disks or disks is not None and not disks:
3278 disks = _ExpandCheckDisks(instance, disks)
3281 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3283 node = instance.primary_node
3286 lu.cfg.SetDiskID(dev, node)
3288 # TODO: Convert to utils.Retry
3291 degr_retries = 10 # in seconds, as we sleep 1 second each time
3295 cumul_degraded = False
3296 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3297 msg = rstats.fail_msg
3299 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3302 raise errors.RemoteError("Can't contact node %s for mirror data,"
3303 " aborting." % node)
3306 rstats = rstats.payload
3308 for i, mstat in enumerate(rstats):
3310 lu.LogWarning("Can't compute data for node %s/%s",
3311 node, disks[i].iv_name)
3314 cumul_degraded = (cumul_degraded or
3315 (mstat.is_degraded and mstat.sync_percent is None))
3316 if mstat.sync_percent is not None:
3318 if mstat.estimated_time is not None:
3319 rem_time = ("%s remaining (estimated)" %
3320 utils.FormatSeconds(mstat.estimated_time))
3321 max_time = mstat.estimated_time
3323 rem_time = "no time estimate"
3324 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3325 (disks[i].iv_name, mstat.sync_percent, rem_time))
3327 # if we're done but degraded, let's do a few small retries, to
3328 # make sure we see a stable and not transient situation; therefore
3329 # we force restart of the loop
3330 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3331 logging.info("Degraded disks found, %d retries left", degr_retries)
3339 time.sleep(min(60, max_time))
3342 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3343 return not cumul_degraded
3346 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3347 """Check that mirrors are not degraded.
3349 The ldisk parameter, if True, will change the test from the
3350 is_degraded attribute (which represents overall non-ok status for
3351 the device(s)) to the ldisk (representing the local storage status).
3354 lu.cfg.SetDiskID(dev, node)
3358 if on_primary or dev.AssembleOnSecondary():
3359 rstats = lu.rpc.call_blockdev_find(node, dev)
3360 msg = rstats.fail_msg
3362 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3364 elif not rstats.payload:
3365 lu.LogWarning("Can't find disk on node %s", node)
3369 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3371 result = result and not rstats.payload.is_degraded
3374 for child in dev.children:
3375 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3380 class LUOobCommand(NoHooksLU):
3381 """Logical unit for OOB handling.
3385 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3387 def CheckPrereq(self):
3388 """Check prerequisites.
3391 - the node exists in the configuration
3394 Any errors are signaled by raising errors.OpPrereqError.
3398 self.master_node = self.cfg.GetMasterNode()
3400 assert self.op.power_delay >= 0.0
3402 if self.op.node_names:
3403 if self.op.command in self._SKIP_MASTER:
3404 if self.master_node in self.op.node_names:
3405 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3406 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3408 if master_oob_handler:
3409 additional_text = ("Run '%s %s %s' if you want to operate on the"
3410 " master regardless") % (master_oob_handler,
3414 additional_text = "The master node does not support out-of-band"
3416 raise errors.OpPrereqError(("Operating on the master node %s is not"
3417 " allowed for %s\n%s") %
3418 (self.master_node, self.op.command,
3419 additional_text), errors.ECODE_INVAL)
3421 self.op.node_names = self.cfg.GetNodeList()
3422 if self.op.command in self._SKIP_MASTER:
3423 self.op.node_names.remove(self.master_node)
3425 if self.op.command in self._SKIP_MASTER:
3426 assert self.master_node not in self.op.node_names
3428 for node_name in self.op.node_names:
3429 node = self.cfg.GetNodeInfo(node_name)
3432 raise errors.OpPrereqError("Node %s not found" % node_name,
3435 self.nodes.append(node)
3437 if (not self.op.ignore_status and
3438 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3439 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3440 " not marked offline") % node_name,
3443 def ExpandNames(self):
3444 """Gather locks we need.
3447 if self.op.node_names:
3448 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3449 for name in self.op.node_names]
3450 lock_names = self.op.node_names
3452 lock_names = locking.ALL_SET
3454 self.needed_locks = {
3455 locking.LEVEL_NODE: lock_names,
3458 def Exec(self, feedback_fn):
3459 """Execute OOB and return result if we expect any.
3462 master_node = self.master_node
3465 for idx, node in enumerate(self.nodes):
3466 node_entry = [(constants.RS_NORMAL, node.name)]
3467 ret.append(node_entry)
3469 oob_program = _SupportsOob(self.cfg, node)
3472 node_entry.append((constants.RS_UNAVAIL, None))
3475 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3476 self.op.command, oob_program, node.name)
3477 result = self.rpc.call_run_oob(master_node, oob_program,
3478 self.op.command, node.name,
3482 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3483 node.name, result.fail_msg)
3484 node_entry.append((constants.RS_NODATA, None))
3487 self._CheckPayload(result)
3488 except errors.OpExecError, err:
3489 self.LogWarning("The payload returned by '%s' is not valid: %s",
3491 node_entry.append((constants.RS_NODATA, None))
3493 if self.op.command == constants.OOB_HEALTH:
3494 # For health we should log important events
3495 for item, status in result.payload:
3496 if status in [constants.OOB_STATUS_WARNING,
3497 constants.OOB_STATUS_CRITICAL]:
3498 self.LogWarning("On node '%s' item '%s' has status '%s'",
3499 node.name, item, status)
3501 if self.op.command == constants.OOB_POWER_ON:
3503 elif self.op.command == constants.OOB_POWER_OFF:
3504 node.powered = False
3505 elif self.op.command == constants.OOB_POWER_STATUS:
3506 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3507 if powered != node.powered:
3508 logging.warning(("Recorded power state (%s) of node '%s' does not"
3509 " match actual power state (%s)"), node.powered,
3512 # For configuration changing commands we should update the node
3513 if self.op.command in (constants.OOB_POWER_ON,
3514 constants.OOB_POWER_OFF):
3515 self.cfg.Update(node, feedback_fn)
3517 node_entry.append((constants.RS_NORMAL, result.payload))
3519 if (self.op.command == constants.OOB_POWER_ON and
3520 idx < len(self.nodes) - 1):
3521 time.sleep(self.op.power_delay)
3525 def _CheckPayload(self, result):
3526 """Checks if the payload is valid.
3528 @param result: RPC result
3529 @raises errors.OpExecError: If payload is not valid
3533 if self.op.command == constants.OOB_HEALTH:
3534 if not isinstance(result.payload, list):
3535 errs.append("command 'health' is expected to return a list but got %s" %
3536 type(result.payload))
3538 for item, status in result.payload:
3539 if status not in constants.OOB_STATUSES:
3540 errs.append("health item '%s' has invalid status '%s'" %
3543 if self.op.command == constants.OOB_POWER_STATUS:
3544 if not isinstance(result.payload, dict):
3545 errs.append("power-status is expected to return a dict but got %s" %
3546 type(result.payload))
3548 if self.op.command in [
3549 constants.OOB_POWER_ON,
3550 constants.OOB_POWER_OFF,
3551 constants.OOB_POWER_CYCLE,
3553 if result.payload is not None:
3554 errs.append("%s is expected to not return payload but got '%s'" %
3555 (self.op.command, result.payload))
3558 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3559 utils.CommaJoin(errs))
3561 class _OsQuery(_QueryBase):
3562 FIELDS = query.OS_FIELDS
3564 def ExpandNames(self, lu):
3565 # Lock all nodes in shared mode
3566 # Temporary removal of locks, should be reverted later
3567 # TODO: reintroduce locks when they are lighter-weight
3568 lu.needed_locks = {}
3569 #self.share_locks[locking.LEVEL_NODE] = 1
3570 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3572 # The following variables interact with _QueryBase._GetNames
3574 self.wanted = self.names
3576 self.wanted = locking.ALL_SET
3578 self.do_locking = self.use_locking
3580 def DeclareLocks(self, lu, level):
3584 def _DiagnoseByOS(rlist):
3585 """Remaps a per-node return list into an a per-os per-node dictionary
3587 @param rlist: a map with node names as keys and OS objects as values
3590 @return: a dictionary with osnames as keys and as value another
3591 map, with nodes as keys and tuples of (path, status, diagnose,
3592 variants, parameters, api_versions) as values, eg::
3594 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3595 (/srv/..., False, "invalid api")],
3596 "node2": [(/srv/..., True, "", [], [])]}
3601 # we build here the list of nodes that didn't fail the RPC (at RPC
3602 # level), so that nodes with a non-responding node daemon don't
3603 # make all OSes invalid
3604 good_nodes = [node_name for node_name in rlist
3605 if not rlist[node_name].fail_msg]
3606 for node_name, nr in rlist.items():
3607 if nr.fail_msg or not nr.payload:
3609 for (name, path, status, diagnose, variants,
3610 params, api_versions) in nr.payload:
3611 if name not in all_os:
3612 # build a list of nodes for this os containing empty lists
3613 # for each node in node_list
3615 for nname in good_nodes:
3616 all_os[name][nname] = []
3617 # convert params from [name, help] to (name, help)
3618 params = [tuple(v) for v in params]
3619 all_os[name][node_name].append((path, status, diagnose,
3620 variants, params, api_versions))
3623 def _GetQueryData(self, lu):
3624 """Computes the list of nodes and their attributes.
3627 # Locking is not used
3628 assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3630 valid_nodes = [node.name
3631 for node in lu.cfg.GetAllNodesInfo().values()
3632 if not node.offline and node.vm_capable]
3633 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3634 cluster = lu.cfg.GetClusterInfo()
3638 for (os_name, os_data) in pol.items():
3639 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3640 hidden=(os_name in cluster.hidden_os),
3641 blacklisted=(os_name in cluster.blacklisted_os))
3645 api_versions = set()
3647 for idx, osl in enumerate(os_data.values()):
3648 info.valid = bool(info.valid and osl and osl[0][1])
3652 (node_variants, node_params, node_api) = osl[0][3:6]
3655 variants.update(node_variants)
3656 parameters.update(node_params)
3657 api_versions.update(node_api)
3659 # Filter out inconsistent values
3660 variants.intersection_update(node_variants)
3661 parameters.intersection_update(node_params)
3662 api_versions.intersection_update(node_api)
3664 info.variants = list(variants)
3665 info.parameters = list(parameters)
3666 info.api_versions = list(api_versions)
3668 data[os_name] = info
3670 # Prepare data in requested order
3671 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3675 class LUOsDiagnose(NoHooksLU):
3676 """Logical unit for OS diagnose/query.
3682 def _BuildFilter(fields, names):
3683 """Builds a filter for querying OSes.
3686 name_filter = qlang.MakeSimpleFilter("name", names)
3688 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3689 # respective field is not requested
3690 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3691 for fname in ["hidden", "blacklisted"]
3692 if fname not in fields]
3693 if "valid" not in fields:
3694 status_filter.append([qlang.OP_TRUE, "valid"])
3697 status_filter.insert(0, qlang.OP_AND)
3699 status_filter = None
3701 if name_filter and status_filter:
3702 return [qlang.OP_AND, name_filter, status_filter]
3706 return status_filter
3708 def CheckArguments(self):
3709 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3710 self.op.output_fields, False)
3712 def ExpandNames(self):
3713 self.oq.ExpandNames(self)
3715 def Exec(self, feedback_fn):
3716 return self.oq.OldStyleQuery(self)
3719 class LUNodeRemove(LogicalUnit):
3720 """Logical unit for removing a node.
3723 HPATH = "node-remove"
3724 HTYPE = constants.HTYPE_NODE
3726 def BuildHooksEnv(self):
3729 This doesn't run on the target node in the pre phase as a failed
3730 node would then be impossible to remove.
3734 "OP_TARGET": self.op.node_name,
3735 "NODE_NAME": self.op.node_name,
3738 def BuildHooksNodes(self):
3739 """Build hooks nodes.
3742 all_nodes = self.cfg.GetNodeList()
3744 all_nodes.remove(self.op.node_name)
3746 logging.warning("Node '%s', which is about to be removed, was not found"
3747 " in the list of all nodes", self.op.node_name)
3748 return (all_nodes, all_nodes)
3750 def CheckPrereq(self):
3751 """Check prerequisites.
3754 - the node exists in the configuration
3755 - it does not have primary or secondary instances
3756 - it's not the master
3758 Any errors are signaled by raising errors.OpPrereqError.
3761 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3762 node = self.cfg.GetNodeInfo(self.op.node_name)
3763 assert node is not None
3765 instance_list = self.cfg.GetInstanceList()
3767 masternode = self.cfg.GetMasterNode()
3768 if node.name == masternode:
3769 raise errors.OpPrereqError("Node is the master node,"
3770 " you need to failover first.",
3773 for instance_name in instance_list:
3774 instance = self.cfg.GetInstanceInfo(instance_name)
3775 if node.name in instance.all_nodes:
3776 raise errors.OpPrereqError("Instance %s is still running on the node,"
3777 " please remove first." % instance_name,
3779 self.op.node_name = node.name
3782 def Exec(self, feedback_fn):
3783 """Removes the node from the cluster.
3787 logging.info("Stopping the node daemon and removing configs from node %s",
3790 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3792 # Promote nodes to master candidate as needed
3793 _AdjustCandidatePool(self, exceptions=[node.name])
3794 self.context.RemoveNode(node.name)
3796 # Run post hooks on the node before it's removed
3797 _RunPostHook(self, node.name)
3799 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3800 msg = result.fail_msg
3802 self.LogWarning("Errors encountered on the remote node while leaving"
3803 " the cluster: %s", msg)
3805 # Remove node from our /etc/hosts
3806 if self.cfg.GetClusterInfo().modify_etc_hosts:
3807 master_node = self.cfg.GetMasterNode()
3808 result = self.rpc.call_etc_hosts_modify(master_node,
3809 constants.ETC_HOSTS_REMOVE,
3811 result.Raise("Can't update hosts file with new host data")
3812 _RedistributeAncillaryFiles(self)
3815 class _NodeQuery(_QueryBase):
3816 FIELDS = query.NODE_FIELDS
3818 def ExpandNames(self, lu):
3819 lu.needed_locks = {}
3820 lu.share_locks[locking.LEVEL_NODE] = 1
3823 self.wanted = _GetWantedNodes(lu, self.names)
3825 self.wanted = locking.ALL_SET
3827 self.do_locking = (self.use_locking and
3828 query.NQ_LIVE in self.requested_data)
3831 # if we don't request only static fields, we need to lock the nodes
3832 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3834 def DeclareLocks(self, lu, level):
3837 def _GetQueryData(self, lu):
3838 """Computes the list of nodes and their attributes.
3841 all_info = lu.cfg.GetAllNodesInfo()
3843 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3845 # Gather data as requested
3846 if query.NQ_LIVE in self.requested_data:
3847 # filter out non-vm_capable nodes
3848 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3850 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3851 lu.cfg.GetHypervisorType())
3852 live_data = dict((name, nresult.payload)
3853 for (name, nresult) in node_data.items()
3854 if not nresult.fail_msg and nresult.payload)
3858 if query.NQ_INST in self.requested_data:
3859 node_to_primary = dict([(name, set()) for name in nodenames])
3860 node_to_secondary = dict([(name, set()) for name in nodenames])
3862 inst_data = lu.cfg.GetAllInstancesInfo()
3864 for inst in inst_data.values():
3865 if inst.primary_node in node_to_primary:
3866 node_to_primary[inst.primary_node].add(inst.name)
3867 for secnode in inst.secondary_nodes:
3868 if secnode in node_to_secondary:
3869 node_to_secondary[secnode].add(inst.name)
3871 node_to_primary = None
3872 node_to_secondary = None
3874 if query.NQ_OOB in self.requested_data:
3875 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3876 for name, node in all_info.iteritems())
3880 if query.NQ_GROUP in self.requested_data:
3881 groups = lu.cfg.GetAllNodeGroupsInfo()
3885 return query.NodeQueryData([all_info[name] for name in nodenames],
3886 live_data, lu.cfg.GetMasterNode(),
3887 node_to_primary, node_to_secondary, groups,
3888 oob_support, lu.cfg.GetClusterInfo())
3891 class LUNodeQuery(NoHooksLU):
3892 """Logical unit for querying nodes.
3895 # pylint: disable-msg=W0142
3898 def CheckArguments(self):
3899 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3900 self.op.output_fields, self.op.use_locking)
3902 def ExpandNames(self):
3903 self.nq.ExpandNames(self)
3905 def Exec(self, feedback_fn):
3906 return self.nq.OldStyleQuery(self)
3909 class LUNodeQueryvols(NoHooksLU):
3910 """Logical unit for getting volumes on node(s).
3914 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3915 _FIELDS_STATIC = utils.FieldSet("node")
3917 def CheckArguments(self):
3918 _CheckOutputFields(static=self._FIELDS_STATIC,
3919 dynamic=self._FIELDS_DYNAMIC,
3920 selected=self.op.output_fields)
3922 def ExpandNames(self):
3923 self.needed_locks = {}
3924 self.share_locks[locking.LEVEL_NODE] = 1
3925 if not self.op.nodes:
3926 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3928 self.needed_locks[locking.LEVEL_NODE] = \
3929 _GetWantedNodes(self, self.op.nodes)
3931 def Exec(self, feedback_fn):
3932 """Computes the list of nodes and their attributes.
3935 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3936 volumes = self.rpc.call_node_volumes(nodenames)
3938 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3939 in self.cfg.GetInstanceList()]
3941 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3944 for node in nodenames:
3945 nresult = volumes[node]
3948 msg = nresult.fail_msg
3950 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3953 node_vols = nresult.payload[:]
3954 node_vols.sort(key=lambda vol: vol['dev'])
3956 for vol in node_vols:
3958 for field in self.op.output_fields:
3961 elif field == "phys":
3965 elif field == "name":
3967 elif field == "size":
3968 val = int(float(vol['size']))
3969 elif field == "instance":
3971 if node not in lv_by_node[inst]:
3973 if vol['name'] in lv_by_node[inst][node]:
3979 raise errors.ParameterError(field)
3980 node_output.append(str(val))
3982 output.append(node_output)
3987 class LUNodeQueryStorage(NoHooksLU):
3988 """Logical unit for getting information on storage units on node(s).
3991 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3994 def CheckArguments(self):
3995 _CheckOutputFields(static=self._FIELDS_STATIC,
3996 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3997 selected=self.op.output_fields)
3999 def ExpandNames(self):
4000 self.needed_locks = {}
4001 self.share_locks[locking.LEVEL_NODE] = 1
4004 self.needed_locks[locking.LEVEL_NODE] = \
4005 _GetWantedNodes(self, self.op.nodes)
4007 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4009 def Exec(self, feedback_fn):
4010 """Computes the list of nodes and their attributes.
4013 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4015 # Always get name to sort by
4016 if constants.SF_NAME in self.op.output_fields:
4017 fields = self.op.output_fields[:]
4019 fields = [constants.SF_NAME] + self.op.output_fields
4021 # Never ask for node or type as it's only known to the LU
4022 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4023 while extra in fields:
4024 fields.remove(extra)
4026 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4027 name_idx = field_idx[constants.SF_NAME]
4029 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4030 data = self.rpc.call_storage_list(self.nodes,
4031 self.op.storage_type, st_args,
4032 self.op.name, fields)
4036 for node in utils.NiceSort(self.nodes):
4037 nresult = data[node]
4041 msg = nresult.fail_msg
4043 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4046 rows = dict([(row[name_idx], row) for row in nresult.payload])
4048 for name in utils.NiceSort(rows.keys()):
4053 for field in self.op.output_fields:
4054 if field == constants.SF_NODE:
4056 elif field == constants.SF_TYPE:
4057 val = self.op.storage_type
4058 elif field in field_idx:
4059 val = row[field_idx[field]]
4061 raise errors.ParameterError(field)
4070 class _InstanceQuery(_QueryBase):
4071 FIELDS = query.INSTANCE_FIELDS
4073 def ExpandNames(self, lu):
4074 lu.needed_locks = {}
4075 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4076 lu.share_locks[locking.LEVEL_NODE] = 1
4079 self.wanted = _GetWantedInstances(lu, self.names)
4081 self.wanted = locking.ALL_SET
4083 self.do_locking = (self.use_locking and
4084 query.IQ_LIVE in self.requested_data)
4086 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4087 lu.needed_locks[locking.LEVEL_NODE] = []
4088 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4090 def DeclareLocks(self, lu, level):
4091 if level == locking.LEVEL_NODE and self.do_locking:
4092 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4094 def _GetQueryData(self, lu):
4095 """Computes the list of instances and their attributes.
4098 cluster = lu.cfg.GetClusterInfo()
4099 all_info = lu.cfg.GetAllInstancesInfo()
4101 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4103 instance_list = [all_info[name] for name in instance_names]
4104 nodes = frozenset(itertools.chain(*(inst.all_nodes
4105 for inst in instance_list)))
4106 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4109 wrongnode_inst = set()
4111 # Gather data as requested
4112 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4114 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4116 result = node_data[name]
4118 # offline nodes will be in both lists
4119 assert result.fail_msg
4120 offline_nodes.append(name)
4122 bad_nodes.append(name)
4123 elif result.payload:
4124 for inst in result.payload:
4125 if inst in all_info:
4126 if all_info[inst].primary_node == name:
4127 live_data.update(result.payload)
4129 wrongnode_inst.add(inst)
4131 # orphan instance; we don't list it here as we don't
4132 # handle this case yet in the output of instance listing
4133 logging.warning("Orphan instance '%s' found on node %s",
4135 # else no instance is alive
4139 if query.IQ_DISKUSAGE in self.requested_data:
4140 disk_usage = dict((inst.name,
4141 _ComputeDiskSize(inst.disk_template,
4142 [{constants.IDISK_SIZE: disk.size}
4143 for disk in inst.disks]))
4144 for inst in instance_list)
4148 if query.IQ_CONSOLE in self.requested_data:
4150 for inst in instance_list:
4151 if inst.name in live_data:
4152 # Instance is running
4153 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4155 consinfo[inst.name] = None
4156 assert set(consinfo.keys()) == set(instance_names)
4160 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4161 disk_usage, offline_nodes, bad_nodes,
4162 live_data, wrongnode_inst, consinfo)
4165 class LUQuery(NoHooksLU):
4166 """Query for resources/items of a certain kind.
4169 # pylint: disable-msg=W0142
4172 def CheckArguments(self):
4173 qcls = _GetQueryImplementation(self.op.what)
4175 self.impl = qcls(self.op.filter, self.op.fields, False)
4177 def ExpandNames(self):
4178 self.impl.ExpandNames(self)
4180 def DeclareLocks(self, level):
4181 self.impl.DeclareLocks(self, level)
4183 def Exec(self, feedback_fn):
4184 return self.impl.NewStyleQuery(self)
4187 class LUQueryFields(NoHooksLU):
4188 """Query for resources/items of a certain kind.
4191 # pylint: disable-msg=W0142
4194 def CheckArguments(self):
4195 self.qcls = _GetQueryImplementation(self.op.what)
4197 def ExpandNames(self):
4198 self.needed_locks = {}
4200 def Exec(self, feedback_fn):
4201 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4204 class LUNodeModifyStorage(NoHooksLU):
4205 """Logical unit for modifying a storage volume on a node.
4210 def CheckArguments(self):
4211 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4213 storage_type = self.op.storage_type
4216 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4218 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4219 " modified" % storage_type,
4222 diff = set(self.op.changes.keys()) - modifiable
4224 raise errors.OpPrereqError("The following fields can not be modified for"
4225 " storage units of type '%s': %r" %
4226 (storage_type, list(diff)),
4229 def ExpandNames(self):
4230 self.needed_locks = {
4231 locking.LEVEL_NODE: self.op.node_name,
4234 def Exec(self, feedback_fn):
4235 """Computes the list of nodes and their attributes.
4238 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4239 result = self.rpc.call_storage_modify(self.op.node_name,
4240 self.op.storage_type, st_args,
4241 self.op.name, self.op.changes)
4242 result.Raise("Failed to modify storage unit '%s' on %s" %
4243 (self.op.name, self.op.node_name))
4246 class LUNodeAdd(LogicalUnit):
4247 """Logical unit for adding node to the cluster.
4251 HTYPE = constants.HTYPE_NODE
4252 _NFLAGS = ["master_capable", "vm_capable"]
4254 def CheckArguments(self):
4255 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4256 # validate/normalize the node name
4257 self.hostname = netutils.GetHostname(name=self.op.node_name,
4258 family=self.primary_ip_family)
4259 self.op.node_name = self.hostname.name
4261 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4262 raise errors.OpPrereqError("Cannot readd the master node",
4265 if self.op.readd and self.op.group:
4266 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4267 " being readded", errors.ECODE_INVAL)
4269 def BuildHooksEnv(self):
4272 This will run on all nodes before, and on all nodes + the new node after.
4276 "OP_TARGET": self.op.node_name,
4277 "NODE_NAME": self.op.node_name,
4278 "NODE_PIP": self.op.primary_ip,
4279 "NODE_SIP": self.op.secondary_ip,
4280 "MASTER_CAPABLE": str(self.op.master_capable),
4281 "VM_CAPABLE": str(self.op.vm_capable),
4284 def BuildHooksNodes(self):
4285 """Build hooks nodes.
4288 # Exclude added node
4289 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4290 post_nodes = pre_nodes + [self.op.node_name, ]
4292 return (pre_nodes, post_nodes)
4294 def CheckPrereq(self):
4295 """Check prerequisites.
4298 - the new node is not already in the config
4300 - its parameters (single/dual homed) matches the cluster
4302 Any errors are signaled by raising errors.OpPrereqError.
4306 hostname = self.hostname
4307 node = hostname.name
4308 primary_ip = self.op.primary_ip = hostname.ip
4309 if self.op.secondary_ip is None:
4310 if self.primary_ip_family == netutils.IP6Address.family:
4311 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4312 " IPv4 address must be given as secondary",
4314 self.op.secondary_ip = primary_ip
4316 secondary_ip = self.op.secondary_ip
4317 if not netutils.IP4Address.IsValid(secondary_ip):
4318 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4319 " address" % secondary_ip, errors.ECODE_INVAL)
4321 node_list = cfg.GetNodeList()
4322 if not self.op.readd and node in node_list:
4323 raise errors.OpPrereqError("Node %s is already in the configuration" %
4324 node, errors.ECODE_EXISTS)
4325 elif self.op.readd and node not in node_list:
4326 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4329 self.changed_primary_ip = False
4331 for existing_node_name in node_list:
4332 existing_node = cfg.GetNodeInfo(existing_node_name)
4334 if self.op.readd and node == existing_node_name:
4335 if existing_node.secondary_ip != secondary_ip:
4336 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4337 " address configuration as before",
4339 if existing_node.primary_ip != primary_ip:
4340 self.changed_primary_ip = True
4344 if (existing_node.primary_ip == primary_ip or
4345 existing_node.secondary_ip == primary_ip or
4346 existing_node.primary_ip == secondary_ip or
4347 existing_node.secondary_ip == secondary_ip):
4348 raise errors.OpPrereqError("New node ip address(es) conflict with"
4349 " existing node %s" % existing_node.name,
4350 errors.ECODE_NOTUNIQUE)
4352 # After this 'if' block, None is no longer a valid value for the
4353 # _capable op attributes
4355 old_node = self.cfg.GetNodeInfo(node)
4356 assert old_node is not None, "Can't retrieve locked node %s" % node
4357 for attr in self._NFLAGS:
4358 if getattr(self.op, attr) is None:
4359 setattr(self.op, attr, getattr(old_node, attr))
4361 for attr in self._NFLAGS:
4362 if getattr(self.op, attr) is None:
4363 setattr(self.op, attr, True)
4365 if self.op.readd and not self.op.vm_capable:
4366 pri, sec = cfg.GetNodeInstances(node)
4368 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4369 " flag set to false, but it already holds"
4370 " instances" % node,
4373 # check that the type of the node (single versus dual homed) is the
4374 # same as for the master
4375 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4376 master_singlehomed = myself.secondary_ip == myself.primary_ip
4377 newbie_singlehomed = secondary_ip == primary_ip
4378 if master_singlehomed != newbie_singlehomed:
4379 if master_singlehomed:
4380 raise errors.OpPrereqError("The master has no secondary ip but the"
4381 " new node has one",
4384 raise errors.OpPrereqError("The master has a secondary ip but the"
4385 " new node doesn't have one",
4388 # checks reachability
4389 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4390 raise errors.OpPrereqError("Node not reachable by ping",
4391 errors.ECODE_ENVIRON)
4393 if not newbie_singlehomed:
4394 # check reachability from my secondary ip to newbie's secondary ip
4395 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4396 source=myself.secondary_ip):
4397 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4398 " based ping to node daemon port",
4399 errors.ECODE_ENVIRON)
4406 if self.op.master_capable:
4407 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4409 self.master_candidate = False
4412 self.new_node = old_node
4414 node_group = cfg.LookupNodeGroup(self.op.group)
4415 self.new_node = objects.Node(name=node,
4416 primary_ip=primary_ip,
4417 secondary_ip=secondary_ip,
4418 master_candidate=self.master_candidate,
4419 offline=False, drained=False,
4422 if self.op.ndparams:
4423 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4425 def Exec(self, feedback_fn):
4426 """Adds the new node to the cluster.
4429 new_node = self.new_node
4430 node = new_node.name
4432 # We adding a new node so we assume it's powered
4433 new_node.powered = True
4435 # for re-adds, reset the offline/drained/master-candidate flags;
4436 # we need to reset here, otherwise offline would prevent RPC calls
4437 # later in the procedure; this also means that if the re-add
4438 # fails, we are left with a non-offlined, broken node
4440 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4441 self.LogInfo("Readding a node, the offline/drained flags were reset")
4442 # if we demote the node, we do cleanup later in the procedure
4443 new_node.master_candidate = self.master_candidate
4444 if self.changed_primary_ip:
4445 new_node.primary_ip = self.op.primary_ip
4447 # copy the master/vm_capable flags
4448 for attr in self._NFLAGS:
4449 setattr(new_node, attr, getattr(self.op, attr))
4451 # notify the user about any possible mc promotion
4452 if new_node.master_candidate:
4453 self.LogInfo("Node will be a master candidate")
4455 if self.op.ndparams:
4456 new_node.ndparams = self.op.ndparams
4458 new_node.ndparams = {}
4460 # check connectivity
4461 result = self.rpc.call_version([node])[node]
4462 result.Raise("Can't get version information from node %s" % node)
4463 if constants.PROTOCOL_VERSION == result.payload:
4464 logging.info("Communication to node %s fine, sw version %s match",
4465 node, result.payload)
4467 raise errors.OpExecError("Version mismatch master version %s,"
4468 " node version %s" %
4469 (constants.PROTOCOL_VERSION, result.payload))
4471 # Add node to our /etc/hosts, and add key to known_hosts
4472 if self.cfg.GetClusterInfo().modify_etc_hosts:
4473 master_node = self.cfg.GetMasterNode()
4474 result = self.rpc.call_etc_hosts_modify(master_node,
4475 constants.ETC_HOSTS_ADD,
4478 result.Raise("Can't update hosts file with new host data")
4480 if new_node.secondary_ip != new_node.primary_ip:
4481 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4484 node_verify_list = [self.cfg.GetMasterNode()]
4485 node_verify_param = {
4486 constants.NV_NODELIST: [node],
4487 # TODO: do a node-net-test as well?
4490 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4491 self.cfg.GetClusterName())
4492 for verifier in node_verify_list:
4493 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4494 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4496 for failed in nl_payload:
4497 feedback_fn("ssh/hostname verification failed"
4498 " (checking from %s): %s" %
4499 (verifier, nl_payload[failed]))
4500 raise errors.OpExecError("ssh/hostname verification failed")
4503 _RedistributeAncillaryFiles(self)
4504 self.context.ReaddNode(new_node)
4505 # make sure we redistribute the config
4506 self.cfg.Update(new_node, feedback_fn)
4507 # and make sure the new node will not have old files around
4508 if not new_node.master_candidate:
4509 result = self.rpc.call_node_demote_from_mc(new_node.name)
4510 msg = result.fail_msg
4512 self.LogWarning("Node failed to demote itself from master"
4513 " candidate status: %s" % msg)
4515 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4516 additional_vm=self.op.vm_capable)
4517 self.context.AddNode(new_node, self.proc.GetECId())
4520 class LUNodeSetParams(LogicalUnit):
4521 """Modifies the parameters of a node.
4523 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4524 to the node role (as _ROLE_*)
4525 @cvar _R2F: a dictionary from node role to tuples of flags
4526 @cvar _FLAGS: a list of attribute names corresponding to the flags
4529 HPATH = "node-modify"
4530 HTYPE = constants.HTYPE_NODE
4532 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4534 (True, False, False): _ROLE_CANDIDATE,
4535 (False, True, False): _ROLE_DRAINED,
4536 (False, False, True): _ROLE_OFFLINE,
4537 (False, False, False): _ROLE_REGULAR,
4539 _R2F = dict((v, k) for k, v in _F2R.items())
4540 _FLAGS = ["master_candidate", "drained", "offline"]
4542 def CheckArguments(self):
4543 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4544 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4545 self.op.master_capable, self.op.vm_capable,
4546 self.op.secondary_ip, self.op.ndparams]
4547 if all_mods.count(None) == len(all_mods):
4548 raise errors.OpPrereqError("Please pass at least one modification",
4550 if all_mods.count(True) > 1:
4551 raise errors.OpPrereqError("Can't set the node into more than one"
4552 " state at the same time",
4555 # Boolean value that tells us whether we might be demoting from MC
4556 self.might_demote = (self.op.master_candidate == False or
4557 self.op.offline == True or
4558 self.op.drained == True or
4559 self.op.master_capable == False)
4561 if self.op.secondary_ip:
4562 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4563 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4564 " address" % self.op.secondary_ip,
4567 self.lock_all = self.op.auto_promote and self.might_demote
4568 self.lock_instances = self.op.secondary_ip is not None
4570 def ExpandNames(self):
4572 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4574 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4576 if self.lock_instances:
4577 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4579 def DeclareLocks(self, level):
4580 # If we have locked all instances, before waiting to lock nodes, release
4581 # all the ones living on nodes unrelated to the current operation.
4582 if level == locking.LEVEL_NODE and self.lock_instances:
4583 instances_release = []
4585 self.affected_instances = []
4586 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4587 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4588 instance = self.context.cfg.GetInstanceInfo(instance_name)
4589 i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4590 if i_mirrored and self.op.node_name in instance.all_nodes:
4591 instances_keep.append(instance_name)
4592 self.affected_instances.append(instance)
4594 instances_release.append(instance_name)
4595 if instances_release:
4596 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4597 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4599 def BuildHooksEnv(self):
4602 This runs on the master node.
4606 "OP_TARGET": self.op.node_name,
4607 "MASTER_CANDIDATE": str(self.op.master_candidate),
4608 "OFFLINE": str(self.op.offline),
4609 "DRAINED": str(self.op.drained),
4610 "MASTER_CAPABLE": str(self.op.master_capable),
4611 "VM_CAPABLE": str(self.op.vm_capable),
4614 def BuildHooksNodes(self):
4615 """Build hooks nodes.
4618 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4621 def CheckPrereq(self):
4622 """Check prerequisites.
4624 This only checks the instance list against the existing names.
4627 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4629 if (self.op.master_candidate is not None or
4630 self.op.drained is not None or
4631 self.op.offline is not None):
4632 # we can't change the master's node flags
4633 if self.op.node_name == self.cfg.GetMasterNode():
4634 raise errors.OpPrereqError("The master role can be changed"
4635 " only via master-failover",
4638 if self.op.master_candidate and not node.master_capable:
4639 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4640 " it a master candidate" % node.name,
4643 if self.op.vm_capable == False:
4644 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4646 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4647 " the vm_capable flag" % node.name,
4650 if node.master_candidate and self.might_demote and not self.lock_all:
4651 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4652 # check if after removing the current node, we're missing master
4654 (mc_remaining, mc_should, _) = \
4655 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4656 if mc_remaining < mc_should:
4657 raise errors.OpPrereqError("Not enough master candidates, please"
4658 " pass auto promote option to allow"
4659 " promotion", errors.ECODE_STATE)
4661 self.old_flags = old_flags = (node.master_candidate,
4662 node.drained, node.offline)
4663 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4664 self.old_role = old_role = self._F2R[old_flags]
4666 # Check for ineffective changes
4667 for attr in self._FLAGS:
4668 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4669 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4670 setattr(self.op, attr, None)
4672 # Past this point, any flag change to False means a transition
4673 # away from the respective state, as only real changes are kept
4675 # TODO: We might query the real power state if it supports OOB
4676 if _SupportsOob(self.cfg, node):
4677 if self.op.offline is False and not (node.powered or
4678 self.op.powered == True):
4679 raise errors.OpPrereqError(("Please power on node %s first before you"
4680 " can reset offline state") %
4682 elif self.op.powered is not None:
4683 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4684 " which does not support out-of-band"
4685 " handling") % self.op.node_name)
4687 # If we're being deofflined/drained, we'll MC ourself if needed
4688 if (self.op.drained == False or self.op.offline == False or
4689 (self.op.master_capable and not node.master_capable)):
4690 if _DecideSelfPromotion(self):
4691 self.op.master_candidate = True
4692 self.LogInfo("Auto-promoting node to master candidate")
4694 # If we're no longer master capable, we'll demote ourselves from MC
4695 if self.op.master_capable == False and node.master_candidate:
4696 self.LogInfo("Demoting from master candidate")
4697 self.op.master_candidate = False
4700 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4701 if self.op.master_candidate:
4702 new_role = self._ROLE_CANDIDATE
4703 elif self.op.drained:
4704 new_role = self._ROLE_DRAINED
4705 elif self.op.offline:
4706 new_role = self._ROLE_OFFLINE
4707 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4708 # False is still in new flags, which means we're un-setting (the
4710 new_role = self._ROLE_REGULAR
4711 else: # no new flags, nothing, keep old role
4714 self.new_role = new_role
4716 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4717 # Trying to transition out of offline status
4718 result = self.rpc.call_version([node.name])[node.name]
4720 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4721 " to report its version: %s" %
4722 (node.name, result.fail_msg),
4725 self.LogWarning("Transitioning node from offline to online state"
4726 " without using re-add. Please make sure the node"
4729 if self.op.secondary_ip:
4730 # Ok even without locking, because this can't be changed by any LU
4731 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4732 master_singlehomed = master.secondary_ip == master.primary_ip
4733 if master_singlehomed and self.op.secondary_ip:
4734 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4735 " homed cluster", errors.ECODE_INVAL)
4738 if self.affected_instances:
4739 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4740 " node has instances (%s) configured"
4741 " to use it" % self.affected_instances)
4743 # On online nodes, check that no instances are running, and that
4744 # the node has the new ip and we can reach it.
4745 for instance in self.affected_instances:
4746 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4748 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4749 if master.name != node.name:
4750 # check reachability from master secondary ip to new secondary ip
4751 if not netutils.TcpPing(self.op.secondary_ip,
4752 constants.DEFAULT_NODED_PORT,
4753 source=master.secondary_ip):
4754 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4755 " based ping to node daemon port",
4756 errors.ECODE_ENVIRON)
4758 if self.op.ndparams:
4759 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4760 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4761 self.new_ndparams = new_ndparams
4763 def Exec(self, feedback_fn):
4768 old_role = self.old_role
4769 new_role = self.new_role
4773 if self.op.ndparams:
4774 node.ndparams = self.new_ndparams
4776 if self.op.powered is not None:
4777 node.powered = self.op.powered
4779 for attr in ["master_capable", "vm_capable"]:
4780 val = getattr(self.op, attr)
4782 setattr(node, attr, val)
4783 result.append((attr, str(val)))
4785 if new_role != old_role:
4786 # Tell the node to demote itself, if no longer MC and not offline
4787 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4788 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4790 self.LogWarning("Node failed to demote itself: %s", msg)
4792 new_flags = self._R2F[new_role]
4793 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4795 result.append((desc, str(nf)))
4796 (node.master_candidate, node.drained, node.offline) = new_flags
4798 # we locked all nodes, we adjust the CP before updating this node
4800 _AdjustCandidatePool(self, [node.name])
4802 if self.op.secondary_ip:
4803 node.secondary_ip = self.op.secondary_ip
4804 result.append(("secondary_ip", self.op.secondary_ip))
4806 # this will trigger configuration file update, if needed
4807 self.cfg.Update(node, feedback_fn)
4809 # this will trigger job queue propagation or cleanup if the mc
4811 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4812 self.context.ReaddNode(node)
4817 class LUNodePowercycle(NoHooksLU):
4818 """Powercycles a node.
4823 def CheckArguments(self):
4824 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4825 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4826 raise errors.OpPrereqError("The node is the master and the force"
4827 " parameter was not set",
4830 def ExpandNames(self):
4831 """Locking for PowercycleNode.
4833 This is a last-resort option and shouldn't block on other
4834 jobs. Therefore, we grab no locks.
4837 self.needed_locks = {}
4839 def Exec(self, feedback_fn):
4843 result = self.rpc.call_node_powercycle(self.op.node_name,
4844 self.cfg.GetHypervisorType())
4845 result.Raise("Failed to schedule the reboot")
4846 return result.payload
4849 class LUClusterQuery(NoHooksLU):
4850 """Query cluster configuration.
4855 def ExpandNames(self):
4856 self.needed_locks = {}
4858 def Exec(self, feedback_fn):
4859 """Return cluster config.
4862 cluster = self.cfg.GetClusterInfo()
4865 # Filter just for enabled hypervisors
4866 for os_name, hv_dict in cluster.os_hvp.items():
4867 os_hvp[os_name] = {}
4868 for hv_name, hv_params in hv_dict.items():
4869 if hv_name in cluster.enabled_hypervisors:
4870 os_hvp[os_name][hv_name] = hv_params
4872 # Convert ip_family to ip_version
4873 primary_ip_version = constants.IP4_VERSION
4874 if cluster.primary_ip_family == netutils.IP6Address.family:
4875 primary_ip_version = constants.IP6_VERSION
4878 "software_version": constants.RELEASE_VERSION,
4879 "protocol_version": constants.PROTOCOL_VERSION,
4880 "config_version": constants.CONFIG_VERSION,
4881 "os_api_version": max(constants.OS_API_VERSIONS),
4882 "export_version": constants.EXPORT_VERSION,
4883 "architecture": (platform.architecture()[0], platform.machine()),
4884 "name": cluster.cluster_name,
4885 "master": cluster.master_node,
4886 "default_hypervisor": cluster.enabled_hypervisors[0],
4887 "enabled_hypervisors": cluster.enabled_hypervisors,
4888 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4889 for hypervisor_name in cluster.enabled_hypervisors]),
4891 "beparams": cluster.beparams,
4892 "osparams": cluster.osparams,
4893 "nicparams": cluster.nicparams,
4894 "ndparams": cluster.ndparams,
4895 "candidate_pool_size": cluster.candidate_pool_size,
4896 "master_netdev": cluster.master_netdev,
4897 "volume_group_name": cluster.volume_group_name,
4898 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4899 "file_storage_dir": cluster.file_storage_dir,
4900 "shared_file_storage_dir": cluster.shared_file_storage_dir,
4901 "maintain_node_health": cluster.maintain_node_health,
4902 "ctime": cluster.ctime,
4903 "mtime": cluster.mtime,
4904 "uuid": cluster.uuid,
4905 "tags": list(cluster.GetTags()),
4906 "uid_pool": cluster.uid_pool,
4907 "default_iallocator": cluster.default_iallocator,
4908 "reserved_lvs": cluster.reserved_lvs,
4909 "primary_ip_version": primary_ip_version,
4910 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4911 "hidden_os": cluster.hidden_os,
4912 "blacklisted_os": cluster.blacklisted_os,
4918 class LUClusterConfigQuery(NoHooksLU):
4919 """Return configuration values.
4923 _FIELDS_DYNAMIC = utils.FieldSet()
4924 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4925 "watcher_pause", "volume_group_name")
4927 def CheckArguments(self):
4928 _CheckOutputFields(static=self._FIELDS_STATIC,
4929 dynamic=self._FIELDS_DYNAMIC,
4930 selected=self.op.output_fields)
4932 def ExpandNames(self):
4933 self.needed_locks = {}
4935 def Exec(self, feedback_fn):
4936 """Dump a representation of the cluster config to the standard output.
4940 for field in self.op.output_fields:
4941 if field == "cluster_name":
4942 entry = self.cfg.GetClusterName()
4943 elif field == "master_node":
4944 entry = self.cfg.GetMasterNode()
4945 elif field == "drain_flag":
4946 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4947 elif field == "watcher_pause":
4948 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4949 elif field == "volume_group_name":
4950 entry = self.cfg.GetVGName()
4952 raise errors.ParameterError(field)
4953 values.append(entry)
4957 class LUInstanceActivateDisks(NoHooksLU):
4958 """Bring up an instance's disks.
4963 def ExpandNames(self):
4964 self._ExpandAndLockInstance()
4965 self.needed_locks[locking.LEVEL_NODE] = []
4966 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4968 def DeclareLocks(self, level):
4969 if level == locking.LEVEL_NODE:
4970 self._LockInstancesNodes()
4972 def CheckPrereq(self):
4973 """Check prerequisites.
4975 This checks that the instance is in the cluster.
4978 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4979 assert self.instance is not None, \
4980 "Cannot retrieve locked instance %s" % self.op.instance_name
4981 _CheckNodeOnline(self, self.instance.primary_node)
4983 def Exec(self, feedback_fn):
4984 """Activate the disks.
4987 disks_ok, disks_info = \
4988 _AssembleInstanceDisks(self, self.instance,
4989 ignore_size=self.op.ignore_size)
4991 raise errors.OpExecError("Cannot activate block devices")
4996 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4998 """Prepare the block devices for an instance.
5000 This sets up the block devices on all nodes.
5002 @type lu: L{LogicalUnit}
5003 @param lu: the logical unit on whose behalf we execute
5004 @type instance: L{objects.Instance}
5005 @param instance: the instance for whose disks we assemble
5006 @type disks: list of L{objects.Disk} or None
5007 @param disks: which disks to assemble (or all, if None)
5008 @type ignore_secondaries: boolean
5009 @param ignore_secondaries: if true, errors on secondary nodes
5010 won't result in an error return from the function
5011 @type ignore_size: boolean
5012 @param ignore_size: if true, the current known size of the disk
5013 will not be used during the disk activation, useful for cases
5014 when the size is wrong
5015 @return: False if the operation failed, otherwise a list of
5016 (host, instance_visible_name, node_visible_name)
5017 with the mapping from node devices to instance devices
5022 iname = instance.name
5023 disks = _ExpandCheckDisks(instance, disks)
5025 # With the two passes mechanism we try to reduce the window of
5026 # opportunity for the race condition of switching DRBD to primary
5027 # before handshaking occured, but we do not eliminate it
5029 # The proper fix would be to wait (with some limits) until the
5030 # connection has been made and drbd transitions from WFConnection
5031 # into any other network-connected state (Connected, SyncTarget,
5034 # 1st pass, assemble on all nodes in secondary mode
5035 for idx, inst_disk in enumerate(disks):
5036 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5038 node_disk = node_disk.Copy()
5039 node_disk.UnsetSize()
5040 lu.cfg.SetDiskID(node_disk, node)
5041 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5042 msg = result.fail_msg
5044 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5045 " (is_primary=False, pass=1): %s",
5046 inst_disk.iv_name, node, msg)
5047 if not ignore_secondaries:
5050 # FIXME: race condition on drbd migration to primary
5052 # 2nd pass, do only the primary node
5053 for idx, inst_disk in enumerate(disks):
5056 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5057 if node != instance.primary_node:
5060 node_disk = node_disk.Copy()
5061 node_disk.UnsetSize()
5062 lu.cfg.SetDiskID(node_disk, node)
5063 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5064 msg = result.fail_msg
5066 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5067 " (is_primary=True, pass=2): %s",
5068 inst_disk.iv_name, node, msg)
5071 dev_path = result.payload
5073 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5075 # leave the disks configured for the primary node
5076 # this is a workaround that would be fixed better by
5077 # improving the logical/physical id handling
5079 lu.cfg.SetDiskID(disk, instance.primary_node)
5081 return disks_ok, device_info
5084 def _StartInstanceDisks(lu, instance, force):
5085 """Start the disks of an instance.
5088 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5089 ignore_secondaries=force)
5091 _ShutdownInstanceDisks(lu, instance)
5092 if force is not None and not force:
5093 lu.proc.LogWarning("", hint="If the message above refers to a"
5095 " you can retry the operation using '--force'.")
5096 raise errors.OpExecError("Disk consistency error")
5099 class LUInstanceDeactivateDisks(NoHooksLU):
5100 """Shutdown an instance's disks.
5105 def ExpandNames(self):
5106 self._ExpandAndLockInstance()
5107 self.needed_locks[locking.LEVEL_NODE] = []
5108 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5110 def DeclareLocks(self, level):
5111 if level == locking.LEVEL_NODE:
5112 self._LockInstancesNodes()
5114 def CheckPrereq(self):
5115 """Check prerequisites.
5117 This checks that the instance is in the cluster.
5120 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5121 assert self.instance is not None, \
5122 "Cannot retrieve locked instance %s" % self.op.instance_name
5124 def Exec(self, feedback_fn):
5125 """Deactivate the disks
5128 instance = self.instance
5130 _ShutdownInstanceDisks(self, instance)
5132 _SafeShutdownInstanceDisks(self, instance)
5135 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5136 """Shutdown block devices of an instance.
5138 This function checks if an instance is running, before calling
5139 _ShutdownInstanceDisks.
5142 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5143 _ShutdownInstanceDisks(lu, instance, disks=disks)
5146 def _ExpandCheckDisks(instance, disks):
5147 """Return the instance disks selected by the disks list
5149 @type disks: list of L{objects.Disk} or None
5150 @param disks: selected disks
5151 @rtype: list of L{objects.Disk}
5152 @return: selected instance disks to act on
5156 return instance.disks
5158 if not set(disks).issubset(instance.disks):
5159 raise errors.ProgrammerError("Can only act on disks belonging to the"
5164 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5165 """Shutdown block devices of an instance.
5167 This does the shutdown on all nodes of the instance.
5169 If the ignore_primary is false, errors on the primary node are
5174 disks = _ExpandCheckDisks(instance, disks)
5177 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5178 lu.cfg.SetDiskID(top_disk, node)
5179 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5180 msg = result.fail_msg
5182 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5183 disk.iv_name, node, msg)
5184 if ((node == instance.primary_node and not ignore_primary) or
5185 (node != instance.primary_node and not result.offline)):
5190 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5191 """Checks if a node has enough free memory.
5193 This function check if a given node has the needed amount of free
5194 memory. In case the node has less memory or we cannot get the
5195 information from the node, this function raise an OpPrereqError
5198 @type lu: C{LogicalUnit}
5199 @param lu: a logical unit from which we get configuration data
5201 @param node: the node to check
5202 @type reason: C{str}
5203 @param reason: string to use in the error message
5204 @type requested: C{int}
5205 @param requested: the amount of memory in MiB to check for
5206 @type hypervisor_name: C{str}
5207 @param hypervisor_name: the hypervisor to ask for memory stats
5208 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5209 we cannot check the node
5212 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5213 nodeinfo[node].Raise("Can't get data from node %s" % node,
5214 prereq=True, ecode=errors.ECODE_ENVIRON)
5215 free_mem = nodeinfo[node].payload.get('memory_free', None)
5216 if not isinstance(free_mem, int):
5217 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5218 " was '%s'" % (node, free_mem),
5219 errors.ECODE_ENVIRON)
5220 if requested > free_mem:
5221 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5222 " needed %s MiB, available %s MiB" %
5223 (node, reason, requested, free_mem),
5227 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5228 """Checks if nodes have enough free disk space in the all VGs.
5230 This function check if all given nodes have the needed amount of
5231 free disk. In case any node has less disk or we cannot get the
5232 information from the node, this function raise an OpPrereqError
5235 @type lu: C{LogicalUnit}
5236 @param lu: a logical unit from which we get configuration data
5237 @type nodenames: C{list}
5238 @param nodenames: the list of node names to check
5239 @type req_sizes: C{dict}
5240 @param req_sizes: the hash of vg and corresponding amount of disk in
5242 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5243 or we cannot check the node
5246 for vg, req_size in req_sizes.items():
5247 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5250 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5251 """Checks if nodes have enough free disk space in the specified VG.
5253 This function check if all given nodes have the needed amount of
5254 free disk. In case any node has less disk or we cannot get the
5255 information from the node, this function raise an OpPrereqError
5258 @type lu: C{LogicalUnit}
5259 @param lu: a logical unit from which we get configuration data
5260 @type nodenames: C{list}
5261 @param nodenames: the list of node names to check
5263 @param vg: the volume group to check
5264 @type requested: C{int}
5265 @param requested: the amount of disk in MiB to check for
5266 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5267 or we cannot check the node
5270 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5271 for node in nodenames:
5272 info = nodeinfo[node]
5273 info.Raise("Cannot get current information from node %s" % node,
5274 prereq=True, ecode=errors.ECODE_ENVIRON)
5275 vg_free = info.payload.get("vg_free", None)
5276 if not isinstance(vg_free, int):
5277 raise errors.OpPrereqError("Can't compute free disk space on node"
5278 " %s for vg %s, result was '%s'" %
5279 (node, vg, vg_free), errors.ECODE_ENVIRON)
5280 if requested > vg_free:
5281 raise errors.OpPrereqError("Not enough disk space on target node %s"
5282 " vg %s: required %d MiB, available %d MiB" %
5283 (node, vg, requested, vg_free),
5287 class LUInstanceStartup(LogicalUnit):
5288 """Starts an instance.
5291 HPATH = "instance-start"
5292 HTYPE = constants.HTYPE_INSTANCE
5295 def CheckArguments(self):
5297 if self.op.beparams:
5298 # fill the beparams dict
5299 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5301 def ExpandNames(self):
5302 self._ExpandAndLockInstance()
5304 def BuildHooksEnv(self):
5307 This runs on master, primary and secondary nodes of the instance.
5311 "FORCE": self.op.force,
5314 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5318 def BuildHooksNodes(self):
5319 """Build hooks nodes.
5322 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5325 def CheckPrereq(self):
5326 """Check prerequisites.
5328 This checks that the instance is in the cluster.
5331 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5332 assert self.instance is not None, \
5333 "Cannot retrieve locked instance %s" % self.op.instance_name
5336 if self.op.hvparams:
5337 # check hypervisor parameter syntax (locally)
5338 cluster = self.cfg.GetClusterInfo()
5339 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5340 filled_hvp = cluster.FillHV(instance)
5341 filled_hvp.update(self.op.hvparams)
5342 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5343 hv_type.CheckParameterSyntax(filled_hvp)
5344 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5346 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5348 if self.primary_offline and self.op.ignore_offline_nodes:
5349 self.proc.LogWarning("Ignoring offline primary node")
5351 if self.op.hvparams or self.op.beparams:
5352 self.proc.LogWarning("Overridden parameters are ignored")
5354 _CheckNodeOnline(self, instance.primary_node)
5356 bep = self.cfg.GetClusterInfo().FillBE(instance)
5358 # check bridges existence
5359 _CheckInstanceBridgesExist(self, instance)
5361 remote_info = self.rpc.call_instance_info(instance.primary_node,
5363 instance.hypervisor)
5364 remote_info.Raise("Error checking node %s" % instance.primary_node,
5365 prereq=True, ecode=errors.ECODE_ENVIRON)
5366 if not remote_info.payload: # not running already
5367 _CheckNodeFreeMemory(self, instance.primary_node,
5368 "starting instance %s" % instance.name,
5369 bep[constants.BE_MEMORY], instance.hypervisor)
5371 def Exec(self, feedback_fn):
5372 """Start the instance.
5375 instance = self.instance
5376 force = self.op.force
5378 self.cfg.MarkInstanceUp(instance.name)
5380 if self.primary_offline:
5381 assert self.op.ignore_offline_nodes
5382 self.proc.LogInfo("Primary node offline, marked instance as started")
5384 node_current = instance.primary_node
5386 _StartInstanceDisks(self, instance, force)
5388 result = self.rpc.call_instance_start(node_current, instance,
5389 self.op.hvparams, self.op.beparams)
5390 msg = result.fail_msg
5392 _ShutdownInstanceDisks(self, instance)
5393 raise errors.OpExecError("Could not start instance: %s" % msg)
5396 class LUInstanceReboot(LogicalUnit):
5397 """Reboot an instance.
5400 HPATH = "instance-reboot"
5401 HTYPE = constants.HTYPE_INSTANCE
5404 def ExpandNames(self):
5405 self._ExpandAndLockInstance()
5407 def BuildHooksEnv(self):
5410 This runs on master, primary and secondary nodes of the instance.
5414 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5415 "REBOOT_TYPE": self.op.reboot_type,
5416 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5419 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5423 def BuildHooksNodes(self):
5424 """Build hooks nodes.
5427 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5430 def CheckPrereq(self):
5431 """Check prerequisites.
5433 This checks that the instance is in the cluster.
5436 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5437 assert self.instance is not None, \
5438 "Cannot retrieve locked instance %s" % self.op.instance_name
5440 _CheckNodeOnline(self, instance.primary_node)
5442 # check bridges existence
5443 _CheckInstanceBridgesExist(self, instance)
5445 def Exec(self, feedback_fn):
5446 """Reboot the instance.
5449 instance = self.instance
5450 ignore_secondaries = self.op.ignore_secondaries
5451 reboot_type = self.op.reboot_type
5453 remote_info = self.rpc.call_instance_info(instance.primary_node,
5455 instance.hypervisor)
5456 remote_info.Raise("Error checking node %s" % instance.primary_node)
5457 instance_running = bool(remote_info.payload)
5459 node_current = instance.primary_node
5461 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5462 constants.INSTANCE_REBOOT_HARD]:
5463 for disk in instance.disks:
5464 self.cfg.SetDiskID(disk, node_current)
5465 result = self.rpc.call_instance_reboot(node_current, instance,
5467 self.op.shutdown_timeout)
5468 result.Raise("Could not reboot instance")
5470 if instance_running:
5471 result = self.rpc.call_instance_shutdown(node_current, instance,
5472 self.op.shutdown_timeout)
5473 result.Raise("Could not shutdown instance for full reboot")
5474 _ShutdownInstanceDisks(self, instance)
5476 self.LogInfo("Instance %s was already stopped, starting now",
5478 _StartInstanceDisks(self, instance, ignore_secondaries)
5479 result = self.rpc.call_instance_start(node_current, instance, None, None)
5480 msg = result.fail_msg
5482 _ShutdownInstanceDisks(self, instance)
5483 raise errors.OpExecError("Could not start instance for"
5484 " full reboot: %s" % msg)
5486 self.cfg.MarkInstanceUp(instance.name)
5489 class LUInstanceShutdown(LogicalUnit):
5490 """Shutdown an instance.
5493 HPATH = "instance-stop"
5494 HTYPE = constants.HTYPE_INSTANCE
5497 def ExpandNames(self):
5498 self._ExpandAndLockInstance()
5500 def BuildHooksEnv(self):
5503 This runs on master, primary and secondary nodes of the instance.
5506 env = _BuildInstanceHookEnvByObject(self, self.instance)
5507 env["TIMEOUT"] = self.op.timeout
5510 def BuildHooksNodes(self):
5511 """Build hooks nodes.
5514 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5517 def CheckPrereq(self):
5518 """Check prerequisites.
5520 This checks that the instance is in the cluster.
5523 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5524 assert self.instance is not None, \
5525 "Cannot retrieve locked instance %s" % self.op.instance_name
5527 self.primary_offline = \
5528 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5530 if self.primary_offline and self.op.ignore_offline_nodes:
5531 self.proc.LogWarning("Ignoring offline primary node")
5533 _CheckNodeOnline(self, self.instance.primary_node)
5535 def Exec(self, feedback_fn):
5536 """Shutdown the instance.
5539 instance = self.instance
5540 node_current = instance.primary_node
5541 timeout = self.op.timeout
5543 self.cfg.MarkInstanceDown(instance.name)
5545 if self.primary_offline:
5546 assert self.op.ignore_offline_nodes
5547 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5549 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5550 msg = result.fail_msg
5552 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5554 _ShutdownInstanceDisks(self, instance)
5557 class LUInstanceReinstall(LogicalUnit):
5558 """Reinstall an instance.
5561 HPATH = "instance-reinstall"
5562 HTYPE = constants.HTYPE_INSTANCE
5565 def ExpandNames(self):
5566 self._ExpandAndLockInstance()
5568 def BuildHooksEnv(self):
5571 This runs on master, primary and secondary nodes of the instance.
5574 return _BuildInstanceHookEnvByObject(self, self.instance)
5576 def BuildHooksNodes(self):
5577 """Build hooks nodes.
5580 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5583 def CheckPrereq(self):
5584 """Check prerequisites.
5586 This checks that the instance is in the cluster and is not running.
5589 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5590 assert instance is not None, \
5591 "Cannot retrieve locked instance %s" % self.op.instance_name
5592 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5593 " offline, cannot reinstall")
5594 for node in instance.secondary_nodes:
5595 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5596 " cannot reinstall")
5598 if instance.disk_template == constants.DT_DISKLESS:
5599 raise errors.OpPrereqError("Instance '%s' has no disks" %
5600 self.op.instance_name,
5602 _CheckInstanceDown(self, instance, "cannot reinstall")
5604 if self.op.os_type is not None:
5606 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5607 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5608 instance_os = self.op.os_type
5610 instance_os = instance.os
5612 nodelist = list(instance.all_nodes)
5614 if self.op.osparams:
5615 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5616 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5617 self.os_inst = i_osdict # the new dict (without defaults)
5621 self.instance = instance
5623 def Exec(self, feedback_fn):
5624 """Reinstall the instance.
5627 inst = self.instance
5629 if self.op.os_type is not None:
5630 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5631 inst.os = self.op.os_type
5632 # Write to configuration
5633 self.cfg.Update(inst, feedback_fn)
5635 _StartInstanceDisks(self, inst, None)
5637 feedback_fn("Running the instance OS create scripts...")
5638 # FIXME: pass debug option from opcode to backend
5639 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5640 self.op.debug_level,
5641 osparams=self.os_inst)
5642 result.Raise("Could not install OS for instance %s on node %s" %
5643 (inst.name, inst.primary_node))
5645 _ShutdownInstanceDisks(self, inst)
5648 class LUInstanceRecreateDisks(LogicalUnit):
5649 """Recreate an instance's missing disks.
5652 HPATH = "instance-recreate-disks"
5653 HTYPE = constants.HTYPE_INSTANCE
5656 def ExpandNames(self):
5657 self._ExpandAndLockInstance()
5659 def BuildHooksEnv(self):
5662 This runs on master, primary and secondary nodes of the instance.
5665 return _BuildInstanceHookEnvByObject(self, self.instance)
5667 def BuildHooksNodes(self):
5668 """Build hooks nodes.
5671 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5674 def CheckPrereq(self):
5675 """Check prerequisites.
5677 This checks that the instance is in the cluster and is not running.
5680 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5681 assert instance is not None, \
5682 "Cannot retrieve locked instance %s" % self.op.instance_name
5683 _CheckNodeOnline(self, instance.primary_node)
5685 if instance.disk_template == constants.DT_DISKLESS:
5686 raise errors.OpPrereqError("Instance '%s' has no disks" %
5687 self.op.instance_name, errors.ECODE_INVAL)
5688 _CheckInstanceDown(self, instance, "cannot recreate disks")
5690 if not self.op.disks:
5691 self.op.disks = range(len(instance.disks))
5693 for idx in self.op.disks:
5694 if idx >= len(instance.disks):
5695 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5698 self.instance = instance
5700 def Exec(self, feedback_fn):
5701 """Recreate the disks.
5705 for idx, _ in enumerate(self.instance.disks):
5706 if idx not in self.op.disks: # disk idx has not been passed in
5710 _CreateDisks(self, self.instance, to_skip=to_skip)
5713 class LUInstanceRename(LogicalUnit):
5714 """Rename an instance.
5717 HPATH = "instance-rename"
5718 HTYPE = constants.HTYPE_INSTANCE
5720 def CheckArguments(self):
5724 if self.op.ip_check and not self.op.name_check:
5725 # TODO: make the ip check more flexible and not depend on the name check
5726 raise errors.OpPrereqError("Cannot do ip check without a name check",
5729 def BuildHooksEnv(self):
5732 This runs on master, primary and secondary nodes of the instance.
5735 env = _BuildInstanceHookEnvByObject(self, self.instance)
5736 env["INSTANCE_NEW_NAME"] = self.op.new_name
5739 def BuildHooksNodes(self):
5740 """Build hooks nodes.
5743 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5746 def CheckPrereq(self):
5747 """Check prerequisites.
5749 This checks that the instance is in the cluster and is not running.
5752 self.op.instance_name = _ExpandInstanceName(self.cfg,
5753 self.op.instance_name)
5754 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5755 assert instance is not None
5756 _CheckNodeOnline(self, instance.primary_node)
5757 _CheckInstanceDown(self, instance, "cannot rename")
5758 self.instance = instance
5760 new_name = self.op.new_name
5761 if self.op.name_check:
5762 hostname = netutils.GetHostname(name=new_name)
5763 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5765 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5766 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5767 " same as given hostname '%s'") %
5768 (hostname.name, self.op.new_name),
5770 new_name = self.op.new_name = hostname.name
5771 if (self.op.ip_check and
5772 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5773 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5774 (hostname.ip, new_name),
5775 errors.ECODE_NOTUNIQUE)
5777 instance_list = self.cfg.GetInstanceList()
5778 if new_name in instance_list and new_name != instance.name:
5779 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5780 new_name, errors.ECODE_EXISTS)
5782 def Exec(self, feedback_fn):
5783 """Rename the instance.
5786 inst = self.instance
5787 old_name = inst.name
5789 rename_file_storage = False
5790 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5791 self.op.new_name != inst.name):
5792 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5793 rename_file_storage = True
5795 self.cfg.RenameInstance(inst.name, self.op.new_name)
5796 # Change the instance lock. This is definitely safe while we hold the BGL.
5797 # Otherwise the new lock would have to be added in acquired mode.
5799 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5800 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5802 # re-read the instance from the configuration after rename
5803 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5805 if rename_file_storage:
5806 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5807 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5808 old_file_storage_dir,
5809 new_file_storage_dir)
5810 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5811 " (but the instance has been renamed in Ganeti)" %
5812 (inst.primary_node, old_file_storage_dir,
5813 new_file_storage_dir))
5815 _StartInstanceDisks(self, inst, None)
5817 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5818 old_name, self.op.debug_level)
5819 msg = result.fail_msg
5821 msg = ("Could not run OS rename script for instance %s on node %s"
5822 " (but the instance has been renamed in Ganeti): %s" %
5823 (inst.name, inst.primary_node, msg))
5824 self.proc.LogWarning(msg)
5826 _ShutdownInstanceDisks(self, inst)
5831 class LUInstanceRemove(LogicalUnit):
5832 """Remove an instance.
5835 HPATH = "instance-remove"
5836 HTYPE = constants.HTYPE_INSTANCE
5839 def ExpandNames(self):
5840 self._ExpandAndLockInstance()
5841 self.needed_locks[locking.LEVEL_NODE] = []
5842 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5844 def DeclareLocks(self, level):
5845 if level == locking.LEVEL_NODE:
5846 self._LockInstancesNodes()
5848 def BuildHooksEnv(self):
5851 This runs on master, primary and secondary nodes of the instance.
5854 env = _BuildInstanceHookEnvByObject(self, self.instance)
5855 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5858 def BuildHooksNodes(self):
5859 """Build hooks nodes.
5862 nl = [self.cfg.GetMasterNode()]
5863 nl_post = list(self.instance.all_nodes) + nl
5864 return (nl, nl_post)
5866 def CheckPrereq(self):
5867 """Check prerequisites.
5869 This checks that the instance is in the cluster.
5872 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5873 assert self.instance is not None, \
5874 "Cannot retrieve locked instance %s" % self.op.instance_name
5876 def Exec(self, feedback_fn):
5877 """Remove the instance.
5880 instance = self.instance
5881 logging.info("Shutting down instance %s on node %s",
5882 instance.name, instance.primary_node)
5884 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5885 self.op.shutdown_timeout)
5886 msg = result.fail_msg
5888 if self.op.ignore_failures:
5889 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5891 raise errors.OpExecError("Could not shutdown instance %s on"
5893 (instance.name, instance.primary_node, msg))
5895 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5898 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5899 """Utility function to remove an instance.
5902 logging.info("Removing block devices for instance %s", instance.name)
5904 if not _RemoveDisks(lu, instance):
5905 if not ignore_failures:
5906 raise errors.OpExecError("Can't remove instance's disks")
5907 feedback_fn("Warning: can't remove instance's disks")
5909 logging.info("Removing instance %s out of cluster config", instance.name)
5911 lu.cfg.RemoveInstance(instance.name)
5913 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5914 "Instance lock removal conflict"
5916 # Remove lock for the instance
5917 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5920 class LUInstanceQuery(NoHooksLU):
5921 """Logical unit for querying instances.
5924 # pylint: disable-msg=W0142
5927 def CheckArguments(self):
5928 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5929 self.op.output_fields, self.op.use_locking)
5931 def ExpandNames(self):
5932 self.iq.ExpandNames(self)
5934 def DeclareLocks(self, level):
5935 self.iq.DeclareLocks(self, level)
5937 def Exec(self, feedback_fn):
5938 return self.iq.OldStyleQuery(self)
5941 class LUInstanceFailover(LogicalUnit):
5942 """Failover an instance.
5945 HPATH = "instance-failover"
5946 HTYPE = constants.HTYPE_INSTANCE
5949 def CheckArguments(self):
5950 """Check the arguments.
5953 self.iallocator = getattr(self.op, "iallocator", None)
5954 self.target_node = getattr(self.op, "target_node", None)
5956 def ExpandNames(self):
5957 self._ExpandAndLockInstance()
5959 if self.op.target_node is not None:
5960 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5962 self.needed_locks[locking.LEVEL_NODE] = []
5963 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5965 ignore_consistency = self.op.ignore_consistency
5966 shutdown_timeout = self.op.shutdown_timeout
5967 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5969 iallocator=self.op.iallocator,
5970 target_node=self.op.target_node,
5972 ignore_consistency=ignore_consistency,
5973 shutdown_timeout=shutdown_timeout)
5974 self.tasklets = [self._migrater]
5976 def DeclareLocks(self, level):
5977 if level == locking.LEVEL_NODE:
5978 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5979 if instance.disk_template in constants.DTS_EXT_MIRROR:
5980 if self.op.target_node is None:
5981 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5983 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5984 self.op.target_node]
5985 del self.recalculate_locks[locking.LEVEL_NODE]
5987 self._LockInstancesNodes()
5989 def BuildHooksEnv(self):
5992 This runs on master, primary and secondary nodes of the instance.
5995 instance = self._migrater.instance
5996 source_node = instance.primary_node
5997 target_node = self._migrater.target_node
5999 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6000 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6001 "OLD_PRIMARY": source_node,
6002 "NEW_PRIMARY": target_node,
6005 if instance.disk_template in constants.DTS_INT_MIRROR:
6006 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6007 env["NEW_SECONDARY"] = source_node
6009 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6011 env.update(_BuildInstanceHookEnvByObject(self, instance))
6015 def BuildHooksNodes(self):
6016 """Build hooks nodes.
6019 instance = self._migrater.instance
6020 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6021 return (nl, nl + [instance.primary_node])
6024 class LUInstanceMigrate(LogicalUnit):
6025 """Migrate an instance.
6027 This is migration without shutting down, compared to the failover,
6028 which is done with shutdown.
6031 HPATH = "instance-migrate"
6032 HTYPE = constants.HTYPE_INSTANCE
6035 def ExpandNames(self):
6036 self._ExpandAndLockInstance()
6038 if self.op.target_node is not None:
6039 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6041 self.needed_locks[locking.LEVEL_NODE] = []
6042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6044 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6045 cleanup=self.op.cleanup,
6046 iallocator=self.op.iallocator,
6047 target_node=self.op.target_node,
6049 fallback=self.op.allow_failover)
6050 self.tasklets = [self._migrater]
6052 def DeclareLocks(self, level):
6053 if level == locking.LEVEL_NODE:
6054 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6055 if instance.disk_template in constants.DTS_EXT_MIRROR:
6056 if self.op.target_node is None:
6057 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6059 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6060 self.op.target_node]
6061 del self.recalculate_locks[locking.LEVEL_NODE]
6063 self._LockInstancesNodes()
6065 def BuildHooksEnv(self):
6068 This runs on master, primary and secondary nodes of the instance.
6071 instance = self._migrater.instance
6072 source_node = instance.primary_node
6073 target_node = self._migrater.target_node
6074 env = _BuildInstanceHookEnvByObject(self, instance)
6076 "MIGRATE_LIVE": self._migrater.live,
6077 "MIGRATE_CLEANUP": self.op.cleanup,
6078 "OLD_PRIMARY": source_node,
6079 "NEW_PRIMARY": target_node,
6082 if instance.disk_template in constants.DTS_INT_MIRROR:
6083 env["OLD_SECONDARY"] = target_node
6084 env["NEW_SECONDARY"] = source_node
6086 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6090 def BuildHooksNodes(self):
6091 """Build hooks nodes.
6094 instance = self._migrater.instance
6095 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6096 return (nl, nl + [instance.primary_node])
6099 class LUInstanceMove(LogicalUnit):
6100 """Move an instance by data-copying.
6103 HPATH = "instance-move"
6104 HTYPE = constants.HTYPE_INSTANCE
6107 def ExpandNames(self):
6108 self._ExpandAndLockInstance()
6109 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6110 self.op.target_node = target_node
6111 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6112 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6114 def DeclareLocks(self, level):
6115 if level == locking.LEVEL_NODE:
6116 self._LockInstancesNodes(primary_only=True)
6118 def BuildHooksEnv(self):
6121 This runs on master, primary and secondary nodes of the instance.
6125 "TARGET_NODE": self.op.target_node,
6126 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6128 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6131 def BuildHooksNodes(self):
6132 """Build hooks nodes.
6136 self.cfg.GetMasterNode(),
6137 self.instance.primary_node,
6138 self.op.target_node,
6142 def CheckPrereq(self):
6143 """Check prerequisites.
6145 This checks that the instance is in the cluster.
6148 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6149 assert self.instance is not None, \
6150 "Cannot retrieve locked instance %s" % self.op.instance_name
6152 node = self.cfg.GetNodeInfo(self.op.target_node)
6153 assert node is not None, \
6154 "Cannot retrieve locked node %s" % self.op.target_node
6156 self.target_node = target_node = node.name
6158 if target_node == instance.primary_node:
6159 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6160 (instance.name, target_node),
6163 bep = self.cfg.GetClusterInfo().FillBE(instance)
6165 for idx, dsk in enumerate(instance.disks):
6166 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6167 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6168 " cannot copy" % idx, errors.ECODE_STATE)
6170 _CheckNodeOnline(self, target_node)
6171 _CheckNodeNotDrained(self, target_node)
6172 _CheckNodeVmCapable(self, target_node)
6174 if instance.admin_up:
6175 # check memory requirements on the secondary node
6176 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6177 instance.name, bep[constants.BE_MEMORY],
6178 instance.hypervisor)
6180 self.LogInfo("Not checking memory on the secondary node as"
6181 " instance will not be started")
6183 # check bridge existance
6184 _CheckInstanceBridgesExist(self, instance, node=target_node)
6186 def Exec(self, feedback_fn):
6187 """Move an instance.
6189 The move is done by shutting it down on its present node, copying
6190 the data over (slow) and starting it on the new node.
6193 instance = self.instance
6195 source_node = instance.primary_node
6196 target_node = self.target_node
6198 self.LogInfo("Shutting down instance %s on source node %s",
6199 instance.name, source_node)
6201 result = self.rpc.call_instance_shutdown(source_node, instance,
6202 self.op.shutdown_timeout)
6203 msg = result.fail_msg
6205 if self.op.ignore_consistency:
6206 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6207 " Proceeding anyway. Please make sure node"
6208 " %s is down. Error details: %s",
6209 instance.name, source_node, source_node, msg)
6211 raise errors.OpExecError("Could not shutdown instance %s on"
6213 (instance.name, source_node, msg))
6215 # create the target disks
6217 _CreateDisks(self, instance, target_node=target_node)
6218 except errors.OpExecError:
6219 self.LogWarning("Device creation failed, reverting...")
6221 _RemoveDisks(self, instance, target_node=target_node)
6223 self.cfg.ReleaseDRBDMinors(instance.name)
6226 cluster_name = self.cfg.GetClusterInfo().cluster_name
6229 # activate, get path, copy the data over
6230 for idx, disk in enumerate(instance.disks):
6231 self.LogInfo("Copying data for disk %d", idx)
6232 result = self.rpc.call_blockdev_assemble(target_node, disk,
6233 instance.name, True, idx)
6235 self.LogWarning("Can't assemble newly created disk %d: %s",
6236 idx, result.fail_msg)
6237 errs.append(result.fail_msg)
6239 dev_path = result.payload
6240 result = self.rpc.call_blockdev_export(source_node, disk,
6241 target_node, dev_path,
6244 self.LogWarning("Can't copy data over for disk %d: %s",
6245 idx, result.fail_msg)
6246 errs.append(result.fail_msg)
6250 self.LogWarning("Some disks failed to copy, aborting")
6252 _RemoveDisks(self, instance, target_node=target_node)
6254 self.cfg.ReleaseDRBDMinors(instance.name)
6255 raise errors.OpExecError("Errors during disk copy: %s" %
6258 instance.primary_node = target_node
6259 self.cfg.Update(instance, feedback_fn)
6261 self.LogInfo("Removing the disks on the original node")
6262 _RemoveDisks(self, instance, target_node=source_node)
6264 # Only start the instance if it's marked as up
6265 if instance.admin_up:
6266 self.LogInfo("Starting instance %s on node %s",
6267 instance.name, target_node)
6269 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6270 ignore_secondaries=True)
6272 _ShutdownInstanceDisks(self, instance)
6273 raise errors.OpExecError("Can't activate the instance's disks")
6275 result = self.rpc.call_instance_start(target_node, instance, None, None)
6276 msg = result.fail_msg
6278 _ShutdownInstanceDisks(self, instance)
6279 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6280 (instance.name, target_node, msg))
6283 class LUNodeMigrate(LogicalUnit):
6284 """Migrate all instances from a node.
6287 HPATH = "node-migrate"
6288 HTYPE = constants.HTYPE_NODE
6291 def CheckArguments(self):
6292 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6294 def ExpandNames(self):
6295 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6297 self.needed_locks = {}
6299 # Create tasklets for migrating instances for all instances on this node
6303 self.lock_all_nodes = False
6305 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6306 logging.debug("Migrating instance %s", inst.name)
6307 names.append(inst.name)
6309 tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6310 iallocator=self.op.iallocator,
6313 if inst.disk_template in constants.DTS_EXT_MIRROR:
6314 # We need to lock all nodes, as the iallocator will choose the
6315 # destination nodes afterwards
6316 self.lock_all_nodes = True
6318 self.tasklets = tasklets
6320 # Declare node locks
6321 if self.lock_all_nodes:
6322 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6324 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6325 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6327 # Declare instance locks
6328 self.needed_locks[locking.LEVEL_INSTANCE] = names
6330 def DeclareLocks(self, level):
6331 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6332 self._LockInstancesNodes()
6334 def BuildHooksEnv(self):
6337 This runs on the master, the primary and all the secondaries.
6341 "NODE_NAME": self.op.node_name,
6344 def BuildHooksNodes(self):
6345 """Build hooks nodes.
6348 nl = [self.cfg.GetMasterNode()]
6352 class TLMigrateInstance(Tasklet):
6353 """Tasklet class for instance migration.
6356 @ivar live: whether the migration will be done live or non-live;
6357 this variable is initalized only after CheckPrereq has run
6358 @type cleanup: boolean
6359 @ivar cleanup: Wheater we cleanup from a failed migration
6360 @type iallocator: string
6361 @ivar iallocator: The iallocator used to determine target_node
6362 @type target_node: string
6363 @ivar target_node: If given, the target_node to reallocate the instance to
6364 @type failover: boolean
6365 @ivar failover: Whether operation results in failover or migration
6366 @type fallback: boolean
6367 @ivar fallback: Whether fallback to failover is allowed if migration not
6369 @type ignore_consistency: boolean
6370 @ivar ignore_consistency: Wheter we should ignore consistency between source
6372 @type shutdown_timeout: int
6373 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6376 def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6377 target_node=None, failover=False, fallback=False,
6378 ignore_consistency=False,
6379 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6380 """Initializes this class.
6383 Tasklet.__init__(self, lu)
6386 self.instance_name = instance_name
6387 self.cleanup = cleanup
6388 self.live = False # will be overridden later
6389 self.iallocator = iallocator
6390 self.target_node = target_node
6391 self.failover = failover
6392 self.fallback = fallback
6393 self.ignore_consistency = ignore_consistency
6394 self.shutdown_timeout = shutdown_timeout
6396 def CheckPrereq(self):
6397 """Check prerequisites.
6399 This checks that the instance is in the cluster.
6402 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6403 instance = self.cfg.GetInstanceInfo(instance_name)
6404 assert instance is not None
6405 self.instance = instance
6407 if (not self.cleanup and not instance.admin_up and not self.failover and
6409 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6411 self.failover = True
6413 if instance.disk_template not in constants.DTS_MIRRORED:
6418 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6419 " %s" % (instance.disk_template, text),
6422 if instance.disk_template in constants.DTS_EXT_MIRROR:
6423 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6426 self._RunAllocator()
6428 # self.target_node is already populated, either directly or by the
6430 target_node = self.target_node
6432 if len(self.lu.tasklets) == 1:
6433 # It is safe to remove locks only when we're the only tasklet in the LU
6434 nodes_keep = [instance.primary_node, self.target_node]
6435 nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6436 if node not in nodes_keep]
6437 self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6438 self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6441 secondary_nodes = instance.secondary_nodes
6442 if not secondary_nodes:
6443 raise errors.ConfigurationError("No secondary node but using"
6444 " %s disk template" %
6445 instance.disk_template)
6446 target_node = secondary_nodes[0]
6447 if self.iallocator or (self.target_node and
6448 self.target_node != target_node):
6450 text = "failed over"
6453 raise errors.OpPrereqError("Instances with disk template %s cannot"
6454 " be %s to arbitrary nodes"
6455 " (neither an iallocator nor a target"
6456 " node can be passed)" %
6457 (instance.disk_template, text),
6460 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6462 # check memory requirements on the secondary node
6463 if not self.failover or instance.admin_up:
6464 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6465 instance.name, i_be[constants.BE_MEMORY],
6466 instance.hypervisor)
6468 self.lu.LogInfo("Not checking memory on the secondary node as"
6469 " instance will not be started")
6471 # check bridge existance
6472 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6474 if not self.cleanup:
6475 _CheckNodeNotDrained(self.lu, target_node)
6476 if not self.failover:
6477 result = self.rpc.call_instance_migratable(instance.primary_node,
6479 if result.fail_msg and self.fallback:
6480 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6482 self.failover = True
6484 result.Raise("Can't migrate, please use failover",
6485 prereq=True, ecode=errors.ECODE_STATE)
6487 assert not (self.failover and self.cleanup)
6489 def _RunAllocator(self):
6490 """Run the allocator based on input opcode.
6493 ial = IAllocator(self.cfg, self.rpc,
6494 mode=constants.IALLOCATOR_MODE_RELOC,
6495 name=self.instance_name,
6496 # TODO See why hail breaks with a single node below
6497 relocate_from=[self.instance.primary_node,
6498 self.instance.primary_node],
6501 ial.Run(self.iallocator)
6504 raise errors.OpPrereqError("Can't compute nodes using"
6505 " iallocator '%s': %s" %
6506 (self.iallocator, ial.info),
6508 if len(ial.result) != ial.required_nodes:
6509 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6510 " of nodes (%s), required %s" %
6511 (self.iallocator, len(ial.result),
6512 ial.required_nodes), errors.ECODE_FAULT)
6513 self.target_node = ial.result[0]
6514 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6515 self.instance_name, self.iallocator,
6516 utils.CommaJoin(ial.result))
6518 if not self.failover:
6519 if self.lu.op.live is not None and self.lu.op.mode is not None:
6520 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6521 " parameters are accepted",
6523 if self.lu.op.live is not None:
6525 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6527 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6528 # reset the 'live' parameter to None so that repeated
6529 # invocations of CheckPrereq do not raise an exception
6530 self.lu.op.live = None
6531 elif self.lu.op.mode is None:
6532 # read the default value from the hypervisor
6533 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6535 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6537 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6539 # Failover is never live
6542 def _WaitUntilSync(self):
6543 """Poll with custom rpc for disk sync.
6545 This uses our own step-based rpc call.
6548 self.feedback_fn("* wait until resync is done")
6552 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6554 self.instance.disks)
6556 for node, nres in result.items():
6557 nres.Raise("Cannot resync disks on node %s" % node)
6558 node_done, node_percent = nres.payload
6559 all_done = all_done and node_done
6560 if node_percent is not None:
6561 min_percent = min(min_percent, node_percent)
6563 if min_percent < 100:
6564 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6567 def _EnsureSecondary(self, node):
6568 """Demote a node to secondary.
6571 self.feedback_fn("* switching node %s to secondary mode" % node)
6573 for dev in self.instance.disks:
6574 self.cfg.SetDiskID(dev, node)
6576 result = self.rpc.call_blockdev_close(node, self.instance.name,
6577 self.instance.disks)
6578 result.Raise("Cannot change disk to secondary on node %s" % node)
6580 def _GoStandalone(self):
6581 """Disconnect from the network.
6584 self.feedback_fn("* changing into standalone mode")
6585 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6586 self.instance.disks)
6587 for node, nres in result.items():
6588 nres.Raise("Cannot disconnect disks node %s" % node)
6590 def _GoReconnect(self, multimaster):
6591 """Reconnect to the network.
6597 msg = "single-master"
6598 self.feedback_fn("* changing disks into %s mode" % msg)
6599 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6600 self.instance.disks,
6601 self.instance.name, multimaster)
6602 for node, nres in result.items():
6603 nres.Raise("Cannot change disks config on node %s" % node)
6605 def _ExecCleanup(self):
6606 """Try to cleanup after a failed migration.
6608 The cleanup is done by:
6609 - check that the instance is running only on one node
6610 (and update the config if needed)
6611 - change disks on its secondary node to secondary
6612 - wait until disks are fully synchronized
6613 - disconnect from the network
6614 - change disks into single-master mode
6615 - wait again until disks are fully synchronized
6618 instance = self.instance
6619 target_node = self.target_node
6620 source_node = self.source_node
6622 # check running on only one node
6623 self.feedback_fn("* checking where the instance actually runs"
6624 " (if this hangs, the hypervisor might be in"
6626 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6627 for node, result in ins_l.items():
6628 result.Raise("Can't contact node %s" % node)
6630 runningon_source = instance.name in ins_l[source_node].payload
6631 runningon_target = instance.name in ins_l[target_node].payload
6633 if runningon_source and runningon_target:
6634 raise errors.OpExecError("Instance seems to be running on two nodes,"
6635 " or the hypervisor is confused. You will have"
6636 " to ensure manually that it runs only on one"
6637 " and restart this operation.")
6639 if not (runningon_source or runningon_target):
6640 raise errors.OpExecError("Instance does not seem to be running at all."
6641 " In this case, it's safer to repair by"
6642 " running 'gnt-instance stop' to ensure disk"
6643 " shutdown, and then restarting it.")
6645 if runningon_target:
6646 # the migration has actually succeeded, we need to update the config
6647 self.feedback_fn("* instance running on secondary node (%s),"
6648 " updating config" % target_node)
6649 instance.primary_node = target_node
6650 self.cfg.Update(instance, self.feedback_fn)
6651 demoted_node = source_node
6653 self.feedback_fn("* instance confirmed to be running on its"
6654 " primary node (%s)" % source_node)
6655 demoted_node = target_node
6657 if instance.disk_template in constants.DTS_INT_MIRROR:
6658 self._EnsureSecondary(demoted_node)
6660 self._WaitUntilSync()
6661 except errors.OpExecError:
6662 # we ignore here errors, since if the device is standalone, it
6663 # won't be able to sync
6665 self._GoStandalone()
6666 self._GoReconnect(False)
6667 self._WaitUntilSync()
6669 self.feedback_fn("* done")
6671 def _RevertDiskStatus(self):
6672 """Try to revert the disk status after a failed migration.
6675 target_node = self.target_node
6676 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6680 self._EnsureSecondary(target_node)
6681 self._GoStandalone()
6682 self._GoReconnect(False)
6683 self._WaitUntilSync()
6684 except errors.OpExecError, err:
6685 self.lu.LogWarning("Migration failed and I can't reconnect the"
6686 " drives: error '%s'\n"
6687 "Please look and recover the instance status" %
6690 def _AbortMigration(self):
6691 """Call the hypervisor code to abort a started migration.
6694 instance = self.instance
6695 target_node = self.target_node
6696 migration_info = self.migration_info
6698 abort_result = self.rpc.call_finalize_migration(target_node,
6702 abort_msg = abort_result.fail_msg
6704 logging.error("Aborting migration failed on target node %s: %s",
6705 target_node, abort_msg)
6706 # Don't raise an exception here, as we stil have to try to revert the
6707 # disk status, even if this step failed.
6709 def _ExecMigration(self):
6710 """Migrate an instance.
6712 The migrate is done by:
6713 - change the disks into dual-master mode
6714 - wait until disks are fully synchronized again
6715 - migrate the instance
6716 - change disks on the new secondary node (the old primary) to secondary
6717 - wait until disks are fully synchronized
6718 - change disks into single-master mode
6721 instance = self.instance
6722 target_node = self.target_node
6723 source_node = self.source_node
6725 self.feedback_fn("* checking disk consistency between source and target")
6726 for dev in instance.disks:
6727 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6728 raise errors.OpExecError("Disk %s is degraded or not fully"
6729 " synchronized on target node,"
6730 " aborting migrate." % dev.iv_name)
6732 # First get the migration information from the remote node
6733 result = self.rpc.call_migration_info(source_node, instance)
6734 msg = result.fail_msg
6736 log_err = ("Failed fetching source migration information from %s: %s" %
6738 logging.error(log_err)
6739 raise errors.OpExecError(log_err)
6741 self.migration_info = migration_info = result.payload
6743 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6744 # Then switch the disks to master/master mode
6745 self._EnsureSecondary(target_node)
6746 self._GoStandalone()
6747 self._GoReconnect(True)
6748 self._WaitUntilSync()
6750 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6751 result = self.rpc.call_accept_instance(target_node,
6754 self.nodes_ip[target_node])
6756 msg = result.fail_msg
6758 logging.error("Instance pre-migration failed, trying to revert"
6759 " disk status: %s", msg)
6760 self.feedback_fn("Pre-migration failed, aborting")
6761 self._AbortMigration()
6762 self._RevertDiskStatus()
6763 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6764 (instance.name, msg))
6766 self.feedback_fn("* migrating instance to %s" % target_node)
6767 result = self.rpc.call_instance_migrate(source_node, instance,
6768 self.nodes_ip[target_node],
6770 msg = result.fail_msg
6772 logging.error("Instance migration failed, trying to revert"
6773 " disk status: %s", msg)
6774 self.feedback_fn("Migration failed, aborting")
6775 self._AbortMigration()
6776 self._RevertDiskStatus()
6777 raise errors.OpExecError("Could not migrate instance %s: %s" %
6778 (instance.name, msg))
6780 instance.primary_node = target_node
6781 # distribute new instance config to the other nodes
6782 self.cfg.Update(instance, self.feedback_fn)
6784 result = self.rpc.call_finalize_migration(target_node,
6788 msg = result.fail_msg
6790 logging.error("Instance migration succeeded, but finalization failed:"
6792 raise errors.OpExecError("Could not finalize instance migration: %s" %
6795 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6796 self._EnsureSecondary(source_node)
6797 self._WaitUntilSync()
6798 self._GoStandalone()
6799 self._GoReconnect(False)
6800 self._WaitUntilSync()
6802 self.feedback_fn("* done")
6804 def _ExecFailover(self):
6805 """Failover an instance.
6807 The failover is done by shutting it down on its present node and
6808 starting it on the secondary.
6811 instance = self.instance
6812 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6814 source_node = instance.primary_node
6815 target_node = self.target_node
6817 if instance.admin_up:
6818 self.feedback_fn("* checking disk consistency between source and target")
6819 for dev in instance.disks:
6820 # for drbd, these are drbd over lvm
6821 if not _CheckDiskConsistency(self, dev, target_node, False):
6822 if not self.ignore_consistency:
6823 raise errors.OpExecError("Disk %s is degraded on target node,"
6824 " aborting failover." % dev.iv_name)
6826 self.feedback_fn("* not checking disk consistency as instance is not"
6829 self.feedback_fn("* shutting down instance on source node")
6830 logging.info("Shutting down instance %s on node %s",
6831 instance.name, source_node)
6833 result = self.rpc.call_instance_shutdown(source_node, instance,
6834 self.shutdown_timeout)
6835 msg = result.fail_msg
6837 if self.ignore_consistency or primary_node.offline:
6838 self.lu.LogWarning("Could not shutdown instance %s on node %s."
6839 " Proceeding anyway. Please make sure node"
6840 " %s is down. Error details: %s",
6841 instance.name, source_node, source_node, msg)
6843 raise errors.OpExecError("Could not shutdown instance %s on"
6845 (instance.name, source_node, msg))
6847 self.feedback_fn("* deactivating the instance's disks on source node")
6848 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6849 raise errors.OpExecError("Can't shut down the instance's disks.")
6851 instance.primary_node = target_node
6852 # distribute new instance config to the other nodes
6853 self.cfg.Update(instance, self.feedback_fn)
6855 # Only start the instance if it's marked as up
6856 if instance.admin_up:
6857 self.feedback_fn("* activating the instance's disks on target node")
6858 logging.info("Starting instance %s on node %s",
6859 instance.name, target_node)
6861 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6862 ignore_secondaries=True)
6864 _ShutdownInstanceDisks(self, instance)
6865 raise errors.OpExecError("Can't activate the instance's disks")
6867 self.feedback_fn("* starting the instance on the target node")
6868 result = self.rpc.call_instance_start(target_node, instance, None, None)
6869 msg = result.fail_msg
6871 _ShutdownInstanceDisks(self, instance)
6872 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6873 (instance.name, target_node, msg))
6875 def Exec(self, feedback_fn):
6876 """Perform the migration.
6879 self.feedback_fn = feedback_fn
6880 self.source_node = self.instance.primary_node
6882 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6883 if self.instance.disk_template in constants.DTS_INT_MIRROR:
6884 self.target_node = self.instance.secondary_nodes[0]
6885 # Otherwise self.target_node has been populated either
6886 # directly, or through an iallocator.
6888 self.all_nodes = [self.source_node, self.target_node]
6890 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6891 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6895 feedback_fn("Failover instance %s" % self.instance.name)
6896 self._ExecFailover()
6898 feedback_fn("Migrating instance %s" % self.instance.name)
6901 return self._ExecCleanup()
6903 return self._ExecMigration()
6906 def _CreateBlockDev(lu, node, instance, device, force_create,
6908 """Create a tree of block devices on a given node.
6910 If this device type has to be created on secondaries, create it and
6913 If not, just recurse to children keeping the same 'force' value.
6915 @param lu: the lu on whose behalf we execute
6916 @param node: the node on which to create the device
6917 @type instance: L{objects.Instance}
6918 @param instance: the instance which owns the device
6919 @type device: L{objects.Disk}
6920 @param device: the device to create
6921 @type force_create: boolean
6922 @param force_create: whether to force creation of this device; this
6923 will be change to True whenever we find a device which has
6924 CreateOnSecondary() attribute
6925 @param info: the extra 'metadata' we should attach to the device
6926 (this will be represented as a LVM tag)
6927 @type force_open: boolean
6928 @param force_open: this parameter will be passes to the
6929 L{backend.BlockdevCreate} function where it specifies
6930 whether we run on primary or not, and it affects both
6931 the child assembly and the device own Open() execution
6934 if device.CreateOnSecondary():
6938 for child in device.children:
6939 _CreateBlockDev(lu, node, instance, child, force_create,
6942 if not force_create:
6945 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6948 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6949 """Create a single block device on a given node.
6951 This will not recurse over children of the device, so they must be
6954 @param lu: the lu on whose behalf we execute
6955 @param node: the node on which to create the device
6956 @type instance: L{objects.Instance}
6957 @param instance: the instance which owns the device
6958 @type device: L{objects.Disk}
6959 @param device: the device to create
6960 @param info: the extra 'metadata' we should attach to the device
6961 (this will be represented as a LVM tag)
6962 @type force_open: boolean
6963 @param force_open: this parameter will be passes to the
6964 L{backend.BlockdevCreate} function where it specifies
6965 whether we run on primary or not, and it affects both
6966 the child assembly and the device own Open() execution
6969 lu.cfg.SetDiskID(device, node)
6970 result = lu.rpc.call_blockdev_create(node, device, device.size,
6971 instance.name, force_open, info)
6972 result.Raise("Can't create block device %s on"
6973 " node %s for instance %s" % (device, node, instance.name))
6974 if device.physical_id is None:
6975 device.physical_id = result.payload
6978 def _GenerateUniqueNames(lu, exts):
6979 """Generate a suitable LV name.
6981 This will generate a logical volume name for the given instance.
6986 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6987 results.append("%s%s" % (new_id, val))
6991 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
6992 iv_name, p_minor, s_minor):
6993 """Generate a drbd8 device complete with its children.
6996 assert len(vgnames) == len(names) == 2
6997 port = lu.cfg.AllocatePort()
6998 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6999 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7000 logical_id=(vgnames[0], names[0]))
7001 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7002 logical_id=(vgnames[1], names[1]))
7003 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7004 logical_id=(primary, secondary, port,
7007 children=[dev_data, dev_meta],
7012 def _GenerateDiskTemplate(lu, template_name,
7013 instance_name, primary_node,
7014 secondary_nodes, disk_info,
7015 file_storage_dir, file_driver,
7016 base_index, feedback_fn):
7017 """Generate the entire disk layout for a given template type.
7020 #TODO: compute space requirements
7022 vgname = lu.cfg.GetVGName()
7023 disk_count = len(disk_info)
7025 if template_name == constants.DT_DISKLESS:
7027 elif template_name == constants.DT_PLAIN:
7028 if len(secondary_nodes) != 0:
7029 raise errors.ProgrammerError("Wrong template configuration")
7031 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7032 for i in range(disk_count)])
7033 for idx, disk in enumerate(disk_info):
7034 disk_index = idx + base_index
7035 vg = disk.get(constants.IDISK_VG, vgname)
7036 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7037 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7038 size=disk[constants.IDISK_SIZE],
7039 logical_id=(vg, names[idx]),
7040 iv_name="disk/%d" % disk_index,
7041 mode=disk[constants.IDISK_MODE])
7042 disks.append(disk_dev)
7043 elif template_name == constants.DT_DRBD8:
7044 if len(secondary_nodes) != 1:
7045 raise errors.ProgrammerError("Wrong template configuration")
7046 remote_node = secondary_nodes[0]
7047 minors = lu.cfg.AllocateDRBDMinor(
7048 [primary_node, remote_node] * len(disk_info), instance_name)
7051 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7052 for i in range(disk_count)]):
7053 names.append(lv_prefix + "_data")
7054 names.append(lv_prefix + "_meta")
7055 for idx, disk in enumerate(disk_info):
7056 disk_index = idx + base_index
7057 data_vg = disk.get(constants.IDISK_VG, vgname)
7058 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7059 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7060 disk[constants.IDISK_SIZE],
7062 names[idx * 2:idx * 2 + 2],
7063 "disk/%d" % disk_index,
7064 minors[idx * 2], minors[idx * 2 + 1])
7065 disk_dev.mode = disk[constants.IDISK_MODE]
7066 disks.append(disk_dev)
7067 elif template_name == constants.DT_FILE:
7068 if len(secondary_nodes) != 0:
7069 raise errors.ProgrammerError("Wrong template configuration")
7071 opcodes.RequireFileStorage()
7073 for idx, disk in enumerate(disk_info):
7074 disk_index = idx + base_index
7075 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7076 size=disk[constants.IDISK_SIZE],
7077 iv_name="disk/%d" % disk_index,
7078 logical_id=(file_driver,
7079 "%s/disk%d" % (file_storage_dir,
7081 mode=disk[constants.IDISK_MODE])
7082 disks.append(disk_dev)
7083 elif template_name == constants.DT_SHARED_FILE:
7084 if len(secondary_nodes) != 0:
7085 raise errors.ProgrammerError("Wrong template configuration")
7087 opcodes.RequireSharedFileStorage()
7089 for idx, disk in enumerate(disk_info):
7090 disk_index = idx + base_index
7091 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7092 size=disk[constants.IDISK_SIZE],
7093 iv_name="disk/%d" % disk_index,
7094 logical_id=(file_driver,
7095 "%s/disk%d" % (file_storage_dir,
7097 mode=disk[constants.IDISK_MODE])
7098 disks.append(disk_dev)
7099 elif template_name == constants.DT_BLOCK:
7100 if len(secondary_nodes) != 0:
7101 raise errors.ProgrammerError("Wrong template configuration")
7103 for idx, disk in enumerate(disk_info):
7104 disk_index = idx + base_index
7105 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7106 size=disk[constants.IDISK_SIZE],
7107 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7108 disk[constants.IDISK_ADOPT]),
7109 iv_name="disk/%d" % disk_index,
7110 mode=disk[constants.IDISK_MODE])
7111 disks.append(disk_dev)
7114 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7118 def _GetInstanceInfoText(instance):
7119 """Compute that text that should be added to the disk's metadata.
7122 return "originstname+%s" % instance.name
7125 def _CalcEta(time_taken, written, total_size):
7126 """Calculates the ETA based on size written and total size.
7128 @param time_taken: The time taken so far
7129 @param written: amount written so far
7130 @param total_size: The total size of data to be written
7131 @return: The remaining time in seconds
7134 avg_time = time_taken / float(written)
7135 return (total_size - written) * avg_time
7138 def _WipeDisks(lu, instance):
7139 """Wipes instance disks.
7141 @type lu: L{LogicalUnit}
7142 @param lu: the logical unit on whose behalf we execute
7143 @type instance: L{objects.Instance}
7144 @param instance: the instance whose disks we should create
7145 @return: the success of the wipe
7148 node = instance.primary_node
7150 for device in instance.disks:
7151 lu.cfg.SetDiskID(device, node)
7153 logging.info("Pause sync of instance %s disks", instance.name)
7154 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7156 for idx, success in enumerate(result.payload):
7158 logging.warn("pause-sync of instance %s for disks %d failed",
7162 for idx, device in enumerate(instance.disks):
7163 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7164 # MAX_WIPE_CHUNK at max
7165 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7166 constants.MIN_WIPE_CHUNK_PERCENT)
7167 # we _must_ make this an int, otherwise rounding errors will
7169 wipe_chunk_size = int(wipe_chunk_size)
7171 lu.LogInfo("* Wiping disk %d", idx)
7172 logging.info("Wiping disk %d for instance %s, node %s using"
7173 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7178 start_time = time.time()
7180 while offset < size:
7181 wipe_size = min(wipe_chunk_size, size - offset)
7182 logging.debug("Wiping disk %d, offset %s, chunk %s",
7183 idx, offset, wipe_size)
7184 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7185 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7186 (idx, offset, wipe_size))
7189 if now - last_output >= 60:
7190 eta = _CalcEta(now - start_time, offset, size)
7191 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7192 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7195 logging.info("Resume sync of instance %s disks", instance.name)
7197 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7199 for idx, success in enumerate(result.payload):
7201 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7202 " look at the status and troubleshoot the issue.", idx)
7203 logging.warn("resume-sync of instance %s for disks %d failed",
7207 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7208 """Create all disks for an instance.
7210 This abstracts away some work from AddInstance.
7212 @type lu: L{LogicalUnit}
7213 @param lu: the logical unit on whose behalf we execute
7214 @type instance: L{objects.Instance}
7215 @param instance: the instance whose disks we should create
7217 @param to_skip: list of indices to skip
7218 @type target_node: string
7219 @param target_node: if passed, overrides the target node for creation
7221 @return: the success of the creation
7224 info = _GetInstanceInfoText(instance)
7225 if target_node is None:
7226 pnode = instance.primary_node
7227 all_nodes = instance.all_nodes
7232 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7233 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7234 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7236 result.Raise("Failed to create directory '%s' on"
7237 " node %s" % (file_storage_dir, pnode))
7239 # Note: this needs to be kept in sync with adding of disks in
7240 # LUInstanceSetParams
7241 for idx, device in enumerate(instance.disks):
7242 if to_skip and idx in to_skip:
7244 logging.info("Creating volume %s for instance %s",
7245 device.iv_name, instance.name)
7247 for node in all_nodes:
7248 f_create = node == pnode
7249 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7252 def _RemoveDisks(lu, instance, target_node=None):
7253 """Remove all disks for an instance.
7255 This abstracts away some work from `AddInstance()` and
7256 `RemoveInstance()`. Note that in case some of the devices couldn't
7257 be removed, the removal will continue with the other ones (compare
7258 with `_CreateDisks()`).
7260 @type lu: L{LogicalUnit}
7261 @param lu: the logical unit on whose behalf we execute
7262 @type instance: L{objects.Instance}
7263 @param instance: the instance whose disks we should remove
7264 @type target_node: string
7265 @param target_node: used to override the node on which to remove the disks
7267 @return: the success of the removal
7270 logging.info("Removing block devices for instance %s", instance.name)
7273 for device in instance.disks:
7275 edata = [(target_node, device)]
7277 edata = device.ComputeNodeTree(instance.primary_node)
7278 for node, disk in edata:
7279 lu.cfg.SetDiskID(disk, node)
7280 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7282 lu.LogWarning("Could not remove block device %s on node %s,"
7283 " continuing anyway: %s", device.iv_name, node, msg)
7286 if instance.disk_template == constants.DT_FILE:
7287 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7291 tgt = instance.primary_node
7292 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7294 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7295 file_storage_dir, instance.primary_node, result.fail_msg)
7301 def _ComputeDiskSizePerVG(disk_template, disks):
7302 """Compute disk size requirements in the volume group
7305 def _compute(disks, payload):
7306 """Universal algorithm.
7311 vgs[disk[constants.IDISK_VG]] = \
7312 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7316 # Required free disk space as a function of disk and swap space
7318 constants.DT_DISKLESS: {},
7319 constants.DT_PLAIN: _compute(disks, 0),
7320 # 128 MB are added for drbd metadata for each disk
7321 constants.DT_DRBD8: _compute(disks, 128),
7322 constants.DT_FILE: {},
7323 constants.DT_SHARED_FILE: {},
7326 if disk_template not in req_size_dict:
7327 raise errors.ProgrammerError("Disk template '%s' size requirement"
7328 " is unknown" % disk_template)
7330 return req_size_dict[disk_template]
7333 def _ComputeDiskSize(disk_template, disks):
7334 """Compute disk size requirements in the volume group
7337 # Required free disk space as a function of disk and swap space
7339 constants.DT_DISKLESS: None,
7340 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7341 # 128 MB are added for drbd metadata for each disk
7342 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7343 constants.DT_FILE: None,
7344 constants.DT_SHARED_FILE: 0,
7345 constants.DT_BLOCK: 0,
7348 if disk_template not in req_size_dict:
7349 raise errors.ProgrammerError("Disk template '%s' size requirement"
7350 " is unknown" % disk_template)
7352 return req_size_dict[disk_template]
7355 def _FilterVmNodes(lu, nodenames):
7356 """Filters out non-vm_capable nodes from a list.
7358 @type lu: L{LogicalUnit}
7359 @param lu: the logical unit for which we check
7360 @type nodenames: list
7361 @param nodenames: the list of nodes on which we should check
7363 @return: the list of vm-capable nodes
7366 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7367 return [name for name in nodenames if name not in vm_nodes]
7370 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7371 """Hypervisor parameter validation.
7373 This function abstract the hypervisor parameter validation to be
7374 used in both instance create and instance modify.
7376 @type lu: L{LogicalUnit}
7377 @param lu: the logical unit for which we check
7378 @type nodenames: list
7379 @param nodenames: the list of nodes on which we should check
7380 @type hvname: string
7381 @param hvname: the name of the hypervisor we should use
7382 @type hvparams: dict
7383 @param hvparams: the parameters which we need to check
7384 @raise errors.OpPrereqError: if the parameters are not valid
7387 nodenames = _FilterVmNodes(lu, nodenames)
7388 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7391 for node in nodenames:
7395 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7398 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7399 """OS parameters validation.
7401 @type lu: L{LogicalUnit}
7402 @param lu: the logical unit for which we check
7403 @type required: boolean
7404 @param required: whether the validation should fail if the OS is not
7406 @type nodenames: list
7407 @param nodenames: the list of nodes on which we should check
7408 @type osname: string
7409 @param osname: the name of the hypervisor we should use
7410 @type osparams: dict
7411 @param osparams: the parameters which we need to check
7412 @raise errors.OpPrereqError: if the parameters are not valid
7415 nodenames = _FilterVmNodes(lu, nodenames)
7416 result = lu.rpc.call_os_validate(required, nodenames, osname,
7417 [constants.OS_VALIDATE_PARAMETERS],
7419 for node, nres in result.items():
7420 # we don't check for offline cases since this should be run only
7421 # against the master node and/or an instance's nodes
7422 nres.Raise("OS Parameters validation failed on node %s" % node)
7423 if not nres.payload:
7424 lu.LogInfo("OS %s not found on node %s, validation skipped",
7428 class LUInstanceCreate(LogicalUnit):
7429 """Create an instance.
7432 HPATH = "instance-add"
7433 HTYPE = constants.HTYPE_INSTANCE
7436 def CheckArguments(self):
7440 # do not require name_check to ease forward/backward compatibility
7442 if self.op.no_install and self.op.start:
7443 self.LogInfo("No-installation mode selected, disabling startup")
7444 self.op.start = False
7445 # validate/normalize the instance name
7446 self.op.instance_name = \
7447 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7449 if self.op.ip_check and not self.op.name_check:
7450 # TODO: make the ip check more flexible and not depend on the name check
7451 raise errors.OpPrereqError("Cannot do ip check without a name check",
7454 # check nics' parameter names
7455 for nic in self.op.nics:
7456 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7458 # check disks. parameter names and consistent adopt/no-adopt strategy
7459 has_adopt = has_no_adopt = False
7460 for disk in self.op.disks:
7461 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7462 if constants.IDISK_ADOPT in disk:
7466 if has_adopt and has_no_adopt:
7467 raise errors.OpPrereqError("Either all disks are adopted or none is",
7470 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7471 raise errors.OpPrereqError("Disk adoption is not supported for the"
7472 " '%s' disk template" %
7473 self.op.disk_template,
7475 if self.op.iallocator is not None:
7476 raise errors.OpPrereqError("Disk adoption not allowed with an"
7477 " iallocator script", errors.ECODE_INVAL)
7478 if self.op.mode == constants.INSTANCE_IMPORT:
7479 raise errors.OpPrereqError("Disk adoption not allowed for"
7480 " instance import", errors.ECODE_INVAL)
7482 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7483 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7484 " but no 'adopt' parameter given" %
7485 self.op.disk_template,
7488 self.adopt_disks = has_adopt
7490 # instance name verification
7491 if self.op.name_check:
7492 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7493 self.op.instance_name = self.hostname1.name
7494 # used in CheckPrereq for ip ping check
7495 self.check_ip = self.hostname1.ip
7497 self.check_ip = None
7499 # file storage checks
7500 if (self.op.file_driver and
7501 not self.op.file_driver in constants.FILE_DRIVER):
7502 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7503 self.op.file_driver, errors.ECODE_INVAL)
7505 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7506 raise errors.OpPrereqError("File storage directory path not absolute",
7509 ### Node/iallocator related checks
7510 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7512 if self.op.pnode is not None:
7513 if self.op.disk_template in constants.DTS_INT_MIRROR:
7514 if self.op.snode is None:
7515 raise errors.OpPrereqError("The networked disk templates need"
7516 " a mirror node", errors.ECODE_INVAL)
7518 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7520 self.op.snode = None
7522 self._cds = _GetClusterDomainSecret()
7524 if self.op.mode == constants.INSTANCE_IMPORT:
7525 # On import force_variant must be True, because if we forced it at
7526 # initial install, our only chance when importing it back is that it
7528 self.op.force_variant = True
7530 if self.op.no_install:
7531 self.LogInfo("No-installation mode has no effect during import")
7533 elif self.op.mode == constants.INSTANCE_CREATE:
7534 if self.op.os_type is None:
7535 raise errors.OpPrereqError("No guest OS specified",
7537 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7538 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7539 " installation" % self.op.os_type,
7541 if self.op.disk_template is None:
7542 raise errors.OpPrereqError("No disk template specified",
7545 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7546 # Check handshake to ensure both clusters have the same domain secret
7547 src_handshake = self.op.source_handshake
7548 if not src_handshake:
7549 raise errors.OpPrereqError("Missing source handshake",
7552 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7555 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7558 # Load and check source CA
7559 self.source_x509_ca_pem = self.op.source_x509_ca
7560 if not self.source_x509_ca_pem:
7561 raise errors.OpPrereqError("Missing source X509 CA",
7565 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7567 except OpenSSL.crypto.Error, err:
7568 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7569 (err, ), errors.ECODE_INVAL)
7571 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7572 if errcode is not None:
7573 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7576 self.source_x509_ca = cert
7578 src_instance_name = self.op.source_instance_name
7579 if not src_instance_name:
7580 raise errors.OpPrereqError("Missing source instance name",
7583 self.source_instance_name = \
7584 netutils.GetHostname(name=src_instance_name).name
7587 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7588 self.op.mode, errors.ECODE_INVAL)
7590 def ExpandNames(self):
7591 """ExpandNames for CreateInstance.
7593 Figure out the right locks for instance creation.
7596 self.needed_locks = {}
7598 instance_name = self.op.instance_name
7599 # this is just a preventive check, but someone might still add this
7600 # instance in the meantime, and creation will fail at lock-add time
7601 if instance_name in self.cfg.GetInstanceList():
7602 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7603 instance_name, errors.ECODE_EXISTS)
7605 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7607 if self.op.iallocator:
7608 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7610 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7611 nodelist = [self.op.pnode]
7612 if self.op.snode is not None:
7613 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7614 nodelist.append(self.op.snode)
7615 self.needed_locks[locking.LEVEL_NODE] = nodelist
7617 # in case of import lock the source node too
7618 if self.op.mode == constants.INSTANCE_IMPORT:
7619 src_node = self.op.src_node
7620 src_path = self.op.src_path
7622 if src_path is None:
7623 self.op.src_path = src_path = self.op.instance_name
7625 if src_node is None:
7626 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7627 self.op.src_node = None
7628 if os.path.isabs(src_path):
7629 raise errors.OpPrereqError("Importing an instance from an absolute"
7630 " path requires a source node option.",
7633 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7634 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7635 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7636 if not os.path.isabs(src_path):
7637 self.op.src_path = src_path = \
7638 utils.PathJoin(constants.EXPORT_DIR, src_path)
7640 def _RunAllocator(self):
7641 """Run the allocator based on input opcode.
7644 nics = [n.ToDict() for n in self.nics]
7645 ial = IAllocator(self.cfg, self.rpc,
7646 mode=constants.IALLOCATOR_MODE_ALLOC,
7647 name=self.op.instance_name,
7648 disk_template=self.op.disk_template,
7651 vcpus=self.be_full[constants.BE_VCPUS],
7652 mem_size=self.be_full[constants.BE_MEMORY],
7655 hypervisor=self.op.hypervisor,
7658 ial.Run(self.op.iallocator)
7661 raise errors.OpPrereqError("Can't compute nodes using"
7662 " iallocator '%s': %s" %
7663 (self.op.iallocator, ial.info),
7665 if len(ial.result) != ial.required_nodes:
7666 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7667 " of nodes (%s), required %s" %
7668 (self.op.iallocator, len(ial.result),
7669 ial.required_nodes), errors.ECODE_FAULT)
7670 self.op.pnode = ial.result[0]
7671 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7672 self.op.instance_name, self.op.iallocator,
7673 utils.CommaJoin(ial.result))
7674 if ial.required_nodes == 2:
7675 self.op.snode = ial.result[1]
7677 def BuildHooksEnv(self):
7680 This runs on master, primary and secondary nodes of the instance.
7684 "ADD_MODE": self.op.mode,
7686 if self.op.mode == constants.INSTANCE_IMPORT:
7687 env["SRC_NODE"] = self.op.src_node
7688 env["SRC_PATH"] = self.op.src_path
7689 env["SRC_IMAGES"] = self.src_images
7691 env.update(_BuildInstanceHookEnv(
7692 name=self.op.instance_name,
7693 primary_node=self.op.pnode,
7694 secondary_nodes=self.secondaries,
7695 status=self.op.start,
7696 os_type=self.op.os_type,
7697 memory=self.be_full[constants.BE_MEMORY],
7698 vcpus=self.be_full[constants.BE_VCPUS],
7699 nics=_NICListToTuple(self, self.nics),
7700 disk_template=self.op.disk_template,
7701 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7702 for d in self.disks],
7705 hypervisor_name=self.op.hypervisor,
7710 def BuildHooksNodes(self):
7711 """Build hooks nodes.
7714 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7717 def _ReadExportInfo(self):
7718 """Reads the export information from disk.
7720 It will override the opcode source node and path with the actual
7721 information, if these two were not specified before.
7723 @return: the export information
7726 assert self.op.mode == constants.INSTANCE_IMPORT
7728 src_node = self.op.src_node
7729 src_path = self.op.src_path
7731 if src_node is None:
7732 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7733 exp_list = self.rpc.call_export_list(locked_nodes)
7735 for node in exp_list:
7736 if exp_list[node].fail_msg:
7738 if src_path in exp_list[node].payload:
7740 self.op.src_node = src_node = node
7741 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7745 raise errors.OpPrereqError("No export found for relative path %s" %
7746 src_path, errors.ECODE_INVAL)
7748 _CheckNodeOnline(self, src_node)
7749 result = self.rpc.call_export_info(src_node, src_path)
7750 result.Raise("No export or invalid export found in dir %s" % src_path)
7752 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7753 if not export_info.has_section(constants.INISECT_EXP):
7754 raise errors.ProgrammerError("Corrupted export config",
7755 errors.ECODE_ENVIRON)
7757 ei_version = export_info.get(constants.INISECT_EXP, "version")
7758 if (int(ei_version) != constants.EXPORT_VERSION):
7759 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7760 (ei_version, constants.EXPORT_VERSION),
7761 errors.ECODE_ENVIRON)
7764 def _ReadExportParams(self, einfo):
7765 """Use export parameters as defaults.
7767 In case the opcode doesn't specify (as in override) some instance
7768 parameters, then try to use them from the export information, if
7772 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7774 if self.op.disk_template is None:
7775 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7776 self.op.disk_template = einfo.get(constants.INISECT_INS,
7779 raise errors.OpPrereqError("No disk template specified and the export"
7780 " is missing the disk_template information",
7783 if not self.op.disks:
7784 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7786 # TODO: import the disk iv_name too
7787 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7788 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7789 disks.append({constants.IDISK_SIZE: disk_sz})
7790 self.op.disks = disks
7792 raise errors.OpPrereqError("No disk info specified and the export"
7793 " is missing the disk information",
7796 if (not self.op.nics and
7797 einfo.has_option(constants.INISECT_INS, "nic_count")):
7799 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7801 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7802 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7807 if (self.op.hypervisor is None and
7808 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7809 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7810 if einfo.has_section(constants.INISECT_HYP):
7811 # use the export parameters but do not override the ones
7812 # specified by the user
7813 for name, value in einfo.items(constants.INISECT_HYP):
7814 if name not in self.op.hvparams:
7815 self.op.hvparams[name] = value
7817 if einfo.has_section(constants.INISECT_BEP):
7818 # use the parameters, without overriding
7819 for name, value in einfo.items(constants.INISECT_BEP):
7820 if name not in self.op.beparams:
7821 self.op.beparams[name] = value
7823 # try to read the parameters old style, from the main section
7824 for name in constants.BES_PARAMETERS:
7825 if (name not in self.op.beparams and
7826 einfo.has_option(constants.INISECT_INS, name)):
7827 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7829 if einfo.has_section(constants.INISECT_OSP):
7830 # use the parameters, without overriding
7831 for name, value in einfo.items(constants.INISECT_OSP):
7832 if name not in self.op.osparams:
7833 self.op.osparams[name] = value
7835 def _RevertToDefaults(self, cluster):
7836 """Revert the instance parameters to the default values.
7840 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7841 for name in self.op.hvparams.keys():
7842 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7843 del self.op.hvparams[name]
7845 be_defs = cluster.SimpleFillBE({})
7846 for name in self.op.beparams.keys():
7847 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7848 del self.op.beparams[name]
7850 nic_defs = cluster.SimpleFillNIC({})
7851 for nic in self.op.nics:
7852 for name in constants.NICS_PARAMETERS:
7853 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7856 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7857 for name in self.op.osparams.keys():
7858 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7859 del self.op.osparams[name]
7861 def CheckPrereq(self):
7862 """Check prerequisites.
7865 if self.op.mode == constants.INSTANCE_IMPORT:
7866 export_info = self._ReadExportInfo()
7867 self._ReadExportParams(export_info)
7869 if (not self.cfg.GetVGName() and
7870 self.op.disk_template not in constants.DTS_NOT_LVM):
7871 raise errors.OpPrereqError("Cluster does not support lvm-based"
7872 " instances", errors.ECODE_STATE)
7874 if self.op.hypervisor is None:
7875 self.op.hypervisor = self.cfg.GetHypervisorType()
7877 cluster = self.cfg.GetClusterInfo()
7878 enabled_hvs = cluster.enabled_hypervisors
7879 if self.op.hypervisor not in enabled_hvs:
7880 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7881 " cluster (%s)" % (self.op.hypervisor,
7882 ",".join(enabled_hvs)),
7885 # check hypervisor parameter syntax (locally)
7886 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7887 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7889 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7890 hv_type.CheckParameterSyntax(filled_hvp)
7891 self.hv_full = filled_hvp
7892 # check that we don't specify global parameters on an instance
7893 _CheckGlobalHvParams(self.op.hvparams)
7895 # fill and remember the beparams dict
7896 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7897 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7899 # build os parameters
7900 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7902 # now that hvp/bep are in final format, let's reset to defaults,
7904 if self.op.identify_defaults:
7905 self._RevertToDefaults(cluster)
7909 for idx, nic in enumerate(self.op.nics):
7910 nic_mode_req = nic.get(constants.INIC_MODE, None)
7911 nic_mode = nic_mode_req
7912 if nic_mode is None:
7913 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7915 # in routed mode, for the first nic, the default ip is 'auto'
7916 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7917 default_ip_mode = constants.VALUE_AUTO
7919 default_ip_mode = constants.VALUE_NONE
7921 # ip validity checks
7922 ip = nic.get(constants.INIC_IP, default_ip_mode)
7923 if ip is None or ip.lower() == constants.VALUE_NONE:
7925 elif ip.lower() == constants.VALUE_AUTO:
7926 if not self.op.name_check:
7927 raise errors.OpPrereqError("IP address set to auto but name checks"
7928 " have been skipped",
7930 nic_ip = self.hostname1.ip
7932 if not netutils.IPAddress.IsValid(ip):
7933 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7937 # TODO: check the ip address for uniqueness
7938 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7939 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7942 # MAC address verification
7943 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7944 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7945 mac = utils.NormalizeAndValidateMac(mac)
7948 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7949 except errors.ReservationError:
7950 raise errors.OpPrereqError("MAC address %s already in use"
7951 " in cluster" % mac,
7952 errors.ECODE_NOTUNIQUE)
7954 # Build nic parameters
7955 link = nic.get(constants.INIC_LINK, None)
7958 nicparams[constants.NIC_MODE] = nic_mode_req
7960 nicparams[constants.NIC_LINK] = link
7962 check_params = cluster.SimpleFillNIC(nicparams)
7963 objects.NIC.CheckParameterSyntax(check_params)
7964 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7966 # disk checks/pre-build
7967 default_vg = self.cfg.GetVGName()
7969 for disk in self.op.disks:
7970 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7971 if mode not in constants.DISK_ACCESS_SET:
7972 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7973 mode, errors.ECODE_INVAL)
7974 size = disk.get(constants.IDISK_SIZE, None)
7976 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7979 except (TypeError, ValueError):
7980 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7983 data_vg = disk.get(constants.IDISK_VG, default_vg)
7985 constants.IDISK_SIZE: size,
7986 constants.IDISK_MODE: mode,
7987 constants.IDISK_VG: data_vg,
7988 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
7990 if constants.IDISK_ADOPT in disk:
7991 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7992 self.disks.append(new_disk)
7994 if self.op.mode == constants.INSTANCE_IMPORT:
7996 # Check that the new instance doesn't have less disks than the export
7997 instance_disks = len(self.disks)
7998 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7999 if instance_disks < export_disks:
8000 raise errors.OpPrereqError("Not enough disks to import."
8001 " (instance: %d, export: %d)" %
8002 (instance_disks, export_disks),
8006 for idx in range(export_disks):
8007 option = 'disk%d_dump' % idx
8008 if export_info.has_option(constants.INISECT_INS, option):
8009 # FIXME: are the old os-es, disk sizes, etc. useful?
8010 export_name = export_info.get(constants.INISECT_INS, option)
8011 image = utils.PathJoin(self.op.src_path, export_name)
8012 disk_images.append(image)
8014 disk_images.append(False)
8016 self.src_images = disk_images
8018 old_name = export_info.get(constants.INISECT_INS, 'name')
8020 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8021 except (TypeError, ValueError), err:
8022 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8023 " an integer: %s" % str(err),
8025 if self.op.instance_name == old_name:
8026 for idx, nic in enumerate(self.nics):
8027 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8028 nic_mac_ini = 'nic%d_mac' % idx
8029 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8031 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8033 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8034 if self.op.ip_check:
8035 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8036 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8037 (self.check_ip, self.op.instance_name),
8038 errors.ECODE_NOTUNIQUE)
8040 #### mac address generation
8041 # By generating here the mac address both the allocator and the hooks get
8042 # the real final mac address rather than the 'auto' or 'generate' value.
8043 # There is a race condition between the generation and the instance object
8044 # creation, which means that we know the mac is valid now, but we're not
8045 # sure it will be when we actually add the instance. If things go bad
8046 # adding the instance will abort because of a duplicate mac, and the
8047 # creation job will fail.
8048 for nic in self.nics:
8049 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8050 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8054 if self.op.iallocator is not None:
8055 self._RunAllocator()
8057 #### node related checks
8059 # check primary node
8060 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8061 assert self.pnode is not None, \
8062 "Cannot retrieve locked node %s" % self.op.pnode
8064 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8065 pnode.name, errors.ECODE_STATE)
8067 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8068 pnode.name, errors.ECODE_STATE)
8069 if not pnode.vm_capable:
8070 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8071 " '%s'" % pnode.name, errors.ECODE_STATE)
8073 self.secondaries = []
8075 # mirror node verification
8076 if self.op.disk_template in constants.DTS_INT_MIRROR:
8077 if self.op.snode == pnode.name:
8078 raise errors.OpPrereqError("The secondary node cannot be the"
8079 " primary node.", errors.ECODE_INVAL)
8080 _CheckNodeOnline(self, self.op.snode)
8081 _CheckNodeNotDrained(self, self.op.snode)
8082 _CheckNodeVmCapable(self, self.op.snode)
8083 self.secondaries.append(self.op.snode)
8085 nodenames = [pnode.name] + self.secondaries
8087 if not self.adopt_disks:
8088 # Check lv size requirements, if not adopting
8089 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8090 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8092 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8093 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8094 disk[constants.IDISK_ADOPT])
8095 for disk in self.disks])
8096 if len(all_lvs) != len(self.disks):
8097 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8099 for lv_name in all_lvs:
8101 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8102 # to ReserveLV uses the same syntax
8103 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8104 except errors.ReservationError:
8105 raise errors.OpPrereqError("LV named %s used by another instance" %
8106 lv_name, errors.ECODE_NOTUNIQUE)
8108 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8109 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8111 node_lvs = self.rpc.call_lv_list([pnode.name],
8112 vg_names.payload.keys())[pnode.name]
8113 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8114 node_lvs = node_lvs.payload
8116 delta = all_lvs.difference(node_lvs.keys())
8118 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8119 utils.CommaJoin(delta),
8121 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8123 raise errors.OpPrereqError("Online logical volumes found, cannot"
8124 " adopt: %s" % utils.CommaJoin(online_lvs),
8126 # update the size of disk based on what is found
8127 for dsk in self.disks:
8128 dsk[constants.IDISK_SIZE] = \
8129 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8130 dsk[constants.IDISK_ADOPT])][0]))
8132 elif self.op.disk_template == constants.DT_BLOCK:
8133 # Normalize and de-duplicate device paths
8134 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8135 for disk in self.disks])
8136 if len(all_disks) != len(self.disks):
8137 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8139 baddisks = [d for d in all_disks
8140 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8142 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8143 " cannot be adopted" %
8144 (", ".join(baddisks),
8145 constants.ADOPTABLE_BLOCKDEV_ROOT),
8148 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8149 list(all_disks))[pnode.name]
8150 node_disks.Raise("Cannot get block device information from node %s" %
8152 node_disks = node_disks.payload
8153 delta = all_disks.difference(node_disks.keys())
8155 raise errors.OpPrereqError("Missing block device(s): %s" %
8156 utils.CommaJoin(delta),
8158 for dsk in self.disks:
8159 dsk[constants.IDISK_SIZE] = \
8160 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8162 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8164 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8165 # check OS parameters (remotely)
8166 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8168 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8170 # memory check on primary node
8172 _CheckNodeFreeMemory(self, self.pnode.name,
8173 "creating instance %s" % self.op.instance_name,
8174 self.be_full[constants.BE_MEMORY],
8177 self.dry_run_result = list(nodenames)
8179 def Exec(self, feedback_fn):
8180 """Create and add the instance to the cluster.
8183 instance = self.op.instance_name
8184 pnode_name = self.pnode.name
8186 ht_kind = self.op.hypervisor
8187 if ht_kind in constants.HTS_REQ_PORT:
8188 network_port = self.cfg.AllocatePort()
8192 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8193 # this is needed because os.path.join does not accept None arguments
8194 if self.op.file_storage_dir is None:
8195 string_file_storage_dir = ""
8197 string_file_storage_dir = self.op.file_storage_dir
8199 # build the full file storage dir path
8200 if self.op.disk_template == constants.DT_SHARED_FILE:
8201 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8203 get_fsd_fn = self.cfg.GetFileStorageDir
8205 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8206 string_file_storage_dir, instance)
8208 file_storage_dir = ""
8210 disks = _GenerateDiskTemplate(self,
8211 self.op.disk_template,
8212 instance, pnode_name,
8216 self.op.file_driver,
8220 iobj = objects.Instance(name=instance, os=self.op.os_type,
8221 primary_node=pnode_name,
8222 nics=self.nics, disks=disks,
8223 disk_template=self.op.disk_template,
8225 network_port=network_port,
8226 beparams=self.op.beparams,
8227 hvparams=self.op.hvparams,
8228 hypervisor=self.op.hypervisor,
8229 osparams=self.op.osparams,
8232 if self.adopt_disks:
8233 if self.op.disk_template == constants.DT_PLAIN:
8234 # rename LVs to the newly-generated names; we need to construct
8235 # 'fake' LV disks with the old data, plus the new unique_id
8236 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8238 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8239 rename_to.append(t_dsk.logical_id)
8240 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8241 self.cfg.SetDiskID(t_dsk, pnode_name)
8242 result = self.rpc.call_blockdev_rename(pnode_name,
8243 zip(tmp_disks, rename_to))
8244 result.Raise("Failed to rename adoped LVs")
8246 feedback_fn("* creating instance disks...")
8248 _CreateDisks(self, iobj)
8249 except errors.OpExecError:
8250 self.LogWarning("Device creation failed, reverting...")
8252 _RemoveDisks(self, iobj)
8254 self.cfg.ReleaseDRBDMinors(instance)
8257 feedback_fn("adding instance %s to cluster config" % instance)
8259 self.cfg.AddInstance(iobj, self.proc.GetECId())
8261 # Declare that we don't want to remove the instance lock anymore, as we've
8262 # added the instance to the config
8263 del self.remove_locks[locking.LEVEL_INSTANCE]
8264 # Unlock all the nodes
8265 if self.op.mode == constants.INSTANCE_IMPORT:
8266 nodes_keep = [self.op.src_node]
8267 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8268 if node != self.op.src_node]
8269 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8270 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8272 self.context.glm.release(locking.LEVEL_NODE)
8273 del self.acquired_locks[locking.LEVEL_NODE]
8276 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8277 feedback_fn("* wiping instance disks...")
8279 _WipeDisks(self, iobj)
8280 except errors.OpExecError, err:
8281 logging.exception("Wiping disks failed")
8282 self.LogWarning("Wiping instance disks failed (%s)", err)
8286 # Something is already wrong with the disks, don't do anything else
8288 elif self.op.wait_for_sync:
8289 disk_abort = not _WaitForSync(self, iobj)
8290 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8291 # make sure the disks are not degraded (still sync-ing is ok)
8293 feedback_fn("* checking mirrors status")
8294 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8299 _RemoveDisks(self, iobj)
8300 self.cfg.RemoveInstance(iobj.name)
8301 # Make sure the instance lock gets removed
8302 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8303 raise errors.OpExecError("There are some degraded disks for"
8306 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8307 if self.op.mode == constants.INSTANCE_CREATE:
8308 if not self.op.no_install:
8309 feedback_fn("* running the instance OS create scripts...")
8310 # FIXME: pass debug option from opcode to backend
8311 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8312 self.op.debug_level)
8313 result.Raise("Could not add os for instance %s"
8314 " on node %s" % (instance, pnode_name))
8316 elif self.op.mode == constants.INSTANCE_IMPORT:
8317 feedback_fn("* running the instance OS import scripts...")
8321 for idx, image in enumerate(self.src_images):
8325 # FIXME: pass debug option from opcode to backend
8326 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8327 constants.IEIO_FILE, (image, ),
8328 constants.IEIO_SCRIPT,
8329 (iobj.disks[idx], idx),
8331 transfers.append(dt)
8334 masterd.instance.TransferInstanceData(self, feedback_fn,
8335 self.op.src_node, pnode_name,
8336 self.pnode.secondary_ip,
8338 if not compat.all(import_result):
8339 self.LogWarning("Some disks for instance %s on node %s were not"
8340 " imported successfully" % (instance, pnode_name))
8342 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8343 feedback_fn("* preparing remote import...")
8344 # The source cluster will stop the instance before attempting to make a
8345 # connection. In some cases stopping an instance can take a long time,
8346 # hence the shutdown timeout is added to the connection timeout.
8347 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8348 self.op.source_shutdown_timeout)
8349 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8351 assert iobj.primary_node == self.pnode.name
8353 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8354 self.source_x509_ca,
8355 self._cds, timeouts)
8356 if not compat.all(disk_results):
8357 # TODO: Should the instance still be started, even if some disks
8358 # failed to import (valid for local imports, too)?
8359 self.LogWarning("Some disks for instance %s on node %s were not"
8360 " imported successfully" % (instance, pnode_name))
8362 # Run rename script on newly imported instance
8363 assert iobj.name == instance
8364 feedback_fn("Running rename script for %s" % instance)
8365 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8366 self.source_instance_name,
8367 self.op.debug_level)
8369 self.LogWarning("Failed to run rename script for %s on node"
8370 " %s: %s" % (instance, pnode_name, result.fail_msg))
8373 # also checked in the prereq part
8374 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8378 iobj.admin_up = True
8379 self.cfg.Update(iobj, feedback_fn)
8380 logging.info("Starting instance %s on node %s", instance, pnode_name)
8381 feedback_fn("* starting instance...")
8382 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8383 result.Raise("Could not start instance")
8385 return list(iobj.all_nodes)
8388 class LUInstanceConsole(NoHooksLU):
8389 """Connect to an instance's console.
8391 This is somewhat special in that it returns the command line that
8392 you need to run on the master node in order to connect to the
8398 def ExpandNames(self):
8399 self._ExpandAndLockInstance()
8401 def CheckPrereq(self):
8402 """Check prerequisites.
8404 This checks that the instance is in the cluster.
8407 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8408 assert self.instance is not None, \
8409 "Cannot retrieve locked instance %s" % self.op.instance_name
8410 _CheckNodeOnline(self, self.instance.primary_node)
8412 def Exec(self, feedback_fn):
8413 """Connect to the console of an instance
8416 instance = self.instance
8417 node = instance.primary_node
8419 node_insts = self.rpc.call_instance_list([node],
8420 [instance.hypervisor])[node]
8421 node_insts.Raise("Can't get node information from %s" % node)
8423 if instance.name not in node_insts.payload:
8424 if instance.admin_up:
8425 state = constants.INSTST_ERRORDOWN
8427 state = constants.INSTST_ADMINDOWN
8428 raise errors.OpExecError("Instance %s is not running (state %s)" %
8429 (instance.name, state))
8431 logging.debug("Connecting to console of %s on %s", instance.name, node)
8433 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8436 def _GetInstanceConsole(cluster, instance):
8437 """Returns console information for an instance.
8439 @type cluster: L{objects.Cluster}
8440 @type instance: L{objects.Instance}
8444 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8445 # beparams and hvparams are passed separately, to avoid editing the
8446 # instance and then saving the defaults in the instance itself.
8447 hvparams = cluster.FillHV(instance)
8448 beparams = cluster.FillBE(instance)
8449 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8451 assert console.instance == instance.name
8452 assert console.Validate()
8454 return console.ToDict()
8457 class LUInstanceReplaceDisks(LogicalUnit):
8458 """Replace the disks of an instance.
8461 HPATH = "mirrors-replace"
8462 HTYPE = constants.HTYPE_INSTANCE
8465 def CheckArguments(self):
8466 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8469 def ExpandNames(self):
8470 self._ExpandAndLockInstance()
8472 if self.op.iallocator is not None:
8473 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8475 elif self.op.remote_node is not None:
8476 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8477 self.op.remote_node = remote_node
8479 # Warning: do not remove the locking of the new secondary here
8480 # unless DRBD8.AddChildren is changed to work in parallel;
8481 # currently it doesn't since parallel invocations of
8482 # FindUnusedMinor will conflict
8483 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8484 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8487 self.needed_locks[locking.LEVEL_NODE] = []
8488 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8490 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8491 self.op.iallocator, self.op.remote_node,
8492 self.op.disks, False, self.op.early_release)
8494 self.tasklets = [self.replacer]
8496 def DeclareLocks(self, level):
8497 # If we're not already locking all nodes in the set we have to declare the
8498 # instance's primary/secondary nodes.
8499 if (level == locking.LEVEL_NODE and
8500 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8501 self._LockInstancesNodes()
8503 def BuildHooksEnv(self):
8506 This runs on the master, the primary and all the secondaries.
8509 instance = self.replacer.instance
8511 "MODE": self.op.mode,
8512 "NEW_SECONDARY": self.op.remote_node,
8513 "OLD_SECONDARY": instance.secondary_nodes[0],
8515 env.update(_BuildInstanceHookEnvByObject(self, instance))
8518 def BuildHooksNodes(self):
8519 """Build hooks nodes.
8522 instance = self.replacer.instance
8524 self.cfg.GetMasterNode(),
8525 instance.primary_node,
8527 if self.op.remote_node is not None:
8528 nl.append(self.op.remote_node)
8532 class TLReplaceDisks(Tasklet):
8533 """Replaces disks for an instance.
8535 Note: Locking is not within the scope of this class.
8538 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8539 disks, delay_iallocator, early_release):
8540 """Initializes this class.
8543 Tasklet.__init__(self, lu)
8546 self.instance_name = instance_name
8548 self.iallocator_name = iallocator_name
8549 self.remote_node = remote_node
8551 self.delay_iallocator = delay_iallocator
8552 self.early_release = early_release
8555 self.instance = None
8556 self.new_node = None
8557 self.target_node = None
8558 self.other_node = None
8559 self.remote_node_info = None
8560 self.node_secondary_ip = None
8563 def CheckArguments(mode, remote_node, iallocator):
8564 """Helper function for users of this class.
8567 # check for valid parameter combination
8568 if mode == constants.REPLACE_DISK_CHG:
8569 if remote_node is None and iallocator is None:
8570 raise errors.OpPrereqError("When changing the secondary either an"
8571 " iallocator script must be used or the"
8572 " new node given", errors.ECODE_INVAL)
8574 if remote_node is not None and iallocator is not None:
8575 raise errors.OpPrereqError("Give either the iallocator or the new"
8576 " secondary, not both", errors.ECODE_INVAL)
8578 elif remote_node is not None or iallocator is not None:
8579 # Not replacing the secondary
8580 raise errors.OpPrereqError("The iallocator and new node options can"
8581 " only be used when changing the"
8582 " secondary node", errors.ECODE_INVAL)
8585 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8586 """Compute a new secondary node using an IAllocator.
8589 ial = IAllocator(lu.cfg, lu.rpc,
8590 mode=constants.IALLOCATOR_MODE_RELOC,
8592 relocate_from=relocate_from)
8594 ial.Run(iallocator_name)
8597 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8598 " %s" % (iallocator_name, ial.info),
8601 if len(ial.result) != ial.required_nodes:
8602 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8603 " of nodes (%s), required %s" %
8605 len(ial.result), ial.required_nodes),
8608 remote_node_name = ial.result[0]
8610 lu.LogInfo("Selected new secondary for instance '%s': %s",
8611 instance_name, remote_node_name)
8613 return remote_node_name
8615 def _FindFaultyDisks(self, node_name):
8616 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8619 def _CheckDisksActivated(self, instance):
8620 """Checks if the instance disks are activated.
8622 @param instance: The instance to check disks
8623 @return: True if they are activated, False otherwise
8626 nodes = instance.all_nodes
8628 for idx, dev in enumerate(instance.disks):
8630 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8631 self.cfg.SetDiskID(dev, node)
8633 result = self.rpc.call_blockdev_find(node, dev)
8637 elif result.fail_msg or not result.payload:
8643 def CheckPrereq(self):
8644 """Check prerequisites.
8646 This checks that the instance is in the cluster.
8649 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8650 assert instance is not None, \
8651 "Cannot retrieve locked instance %s" % self.instance_name
8653 if instance.disk_template != constants.DT_DRBD8:
8654 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8655 " instances", errors.ECODE_INVAL)
8657 if len(instance.secondary_nodes) != 1:
8658 raise errors.OpPrereqError("The instance has a strange layout,"
8659 " expected one secondary but found %d" %
8660 len(instance.secondary_nodes),
8663 if not self.delay_iallocator:
8664 self._CheckPrereq2()
8666 def _CheckPrereq2(self):
8667 """Check prerequisites, second part.
8669 This function should always be part of CheckPrereq. It was separated and is
8670 now called from Exec because during node evacuation iallocator was only
8671 called with an unmodified cluster model, not taking planned changes into
8675 instance = self.instance
8676 secondary_node = instance.secondary_nodes[0]
8678 if self.iallocator_name is None:
8679 remote_node = self.remote_node
8681 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8682 instance.name, instance.secondary_nodes)
8684 if remote_node is not None:
8685 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8686 assert self.remote_node_info is not None, \
8687 "Cannot retrieve locked node %s" % remote_node
8689 self.remote_node_info = None
8691 if remote_node == self.instance.primary_node:
8692 raise errors.OpPrereqError("The specified node is the primary node of"
8693 " the instance.", errors.ECODE_INVAL)
8695 if remote_node == secondary_node:
8696 raise errors.OpPrereqError("The specified node is already the"
8697 " secondary node of the instance.",
8700 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8701 constants.REPLACE_DISK_CHG):
8702 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8705 if self.mode == constants.REPLACE_DISK_AUTO:
8706 if not self._CheckDisksActivated(instance):
8707 raise errors.OpPrereqError("Please run activate-disks on instance %s"
8708 " first" % self.instance_name,
8710 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8711 faulty_secondary = self._FindFaultyDisks(secondary_node)
8713 if faulty_primary and faulty_secondary:
8714 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8715 " one node and can not be repaired"
8716 " automatically" % self.instance_name,
8720 self.disks = faulty_primary
8721 self.target_node = instance.primary_node
8722 self.other_node = secondary_node
8723 check_nodes = [self.target_node, self.other_node]
8724 elif faulty_secondary:
8725 self.disks = faulty_secondary
8726 self.target_node = secondary_node
8727 self.other_node = instance.primary_node
8728 check_nodes = [self.target_node, self.other_node]
8734 # Non-automatic modes
8735 if self.mode == constants.REPLACE_DISK_PRI:
8736 self.target_node = instance.primary_node
8737 self.other_node = secondary_node
8738 check_nodes = [self.target_node, self.other_node]
8740 elif self.mode == constants.REPLACE_DISK_SEC:
8741 self.target_node = secondary_node
8742 self.other_node = instance.primary_node
8743 check_nodes = [self.target_node, self.other_node]
8745 elif self.mode == constants.REPLACE_DISK_CHG:
8746 self.new_node = remote_node
8747 self.other_node = instance.primary_node
8748 self.target_node = secondary_node
8749 check_nodes = [self.new_node, self.other_node]
8751 _CheckNodeNotDrained(self.lu, remote_node)
8752 _CheckNodeVmCapable(self.lu, remote_node)
8754 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8755 assert old_node_info is not None
8756 if old_node_info.offline and not self.early_release:
8757 # doesn't make sense to delay the release
8758 self.early_release = True
8759 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8760 " early-release mode", secondary_node)
8763 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8766 # If not specified all disks should be replaced
8768 self.disks = range(len(self.instance.disks))
8770 for node in check_nodes:
8771 _CheckNodeOnline(self.lu, node)
8773 # Check whether disks are valid
8774 for disk_idx in self.disks:
8775 instance.FindDisk(disk_idx)
8777 # Get secondary node IP addresses
8780 for node_name in [self.target_node, self.other_node, self.new_node]:
8781 if node_name is not None:
8782 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8784 self.node_secondary_ip = node_2nd_ip
8786 def Exec(self, feedback_fn):
8787 """Execute disk replacement.
8789 This dispatches the disk replacement to the appropriate handler.
8792 if self.delay_iallocator:
8793 self._CheckPrereq2()
8796 feedback_fn("No disks need replacement")
8799 feedback_fn("Replacing disk(s) %s for %s" %
8800 (utils.CommaJoin(self.disks), self.instance.name))
8802 activate_disks = (not self.instance.admin_up)
8804 # Activate the instance disks if we're replacing them on a down instance
8806 _StartInstanceDisks(self.lu, self.instance, True)
8809 # Should we replace the secondary node?
8810 if self.new_node is not None:
8811 fn = self._ExecDrbd8Secondary
8813 fn = self._ExecDrbd8DiskOnly
8815 return fn(feedback_fn)
8818 # Deactivate the instance disks if we're replacing them on a
8821 _SafeShutdownInstanceDisks(self.lu, self.instance)
8823 def _CheckVolumeGroup(self, nodes):
8824 self.lu.LogInfo("Checking volume groups")
8826 vgname = self.cfg.GetVGName()
8828 # Make sure volume group exists on all involved nodes
8829 results = self.rpc.call_vg_list(nodes)
8831 raise errors.OpExecError("Can't list volume groups on the nodes")
8835 res.Raise("Error checking node %s" % node)
8836 if vgname not in res.payload:
8837 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8840 def _CheckDisksExistence(self, nodes):
8841 # Check disk existence
8842 for idx, dev in enumerate(self.instance.disks):
8843 if idx not in self.disks:
8847 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8848 self.cfg.SetDiskID(dev, node)
8850 result = self.rpc.call_blockdev_find(node, dev)
8852 msg = result.fail_msg
8853 if msg or not result.payload:
8855 msg = "disk not found"
8856 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8859 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8860 for idx, dev in enumerate(self.instance.disks):
8861 if idx not in self.disks:
8864 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8867 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8869 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8870 " replace disks for instance %s" %
8871 (node_name, self.instance.name))
8873 def _CreateNewStorage(self, node_name):
8876 for idx, dev in enumerate(self.instance.disks):
8877 if idx not in self.disks:
8880 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8882 self.cfg.SetDiskID(dev, node_name)
8884 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8885 names = _GenerateUniqueNames(self.lu, lv_names)
8887 vg_data = dev.children[0].logical_id[0]
8888 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8889 logical_id=(vg_data, names[0]))
8890 vg_meta = dev.children[1].logical_id[0]
8891 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8892 logical_id=(vg_meta, names[1]))
8894 new_lvs = [lv_data, lv_meta]
8895 old_lvs = dev.children
8896 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8898 # we pass force_create=True to force the LVM creation
8899 for new_lv in new_lvs:
8900 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8901 _GetInstanceInfoText(self.instance), False)
8905 def _CheckDevices(self, node_name, iv_names):
8906 for name, (dev, _, _) in iv_names.iteritems():
8907 self.cfg.SetDiskID(dev, node_name)
8909 result = self.rpc.call_blockdev_find(node_name, dev)
8911 msg = result.fail_msg
8912 if msg or not result.payload:
8914 msg = "disk not found"
8915 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8918 if result.payload.is_degraded:
8919 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8921 def _RemoveOldStorage(self, node_name, iv_names):
8922 for name, (_, old_lvs, _) in iv_names.iteritems():
8923 self.lu.LogInfo("Remove logical volumes for %s" % name)
8926 self.cfg.SetDiskID(lv, node_name)
8928 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8930 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8931 hint="remove unused LVs manually")
8933 def _ReleaseNodeLock(self, node_name):
8934 """Releases the lock for a given node."""
8935 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8937 def _ExecDrbd8DiskOnly(self, feedback_fn):
8938 """Replace a disk on the primary or secondary for DRBD 8.
8940 The algorithm for replace is quite complicated:
8942 1. for each disk to be replaced:
8944 1. create new LVs on the target node with unique names
8945 1. detach old LVs from the drbd device
8946 1. rename old LVs to name_replaced.<time_t>
8947 1. rename new LVs to old LVs
8948 1. attach the new LVs (with the old names now) to the drbd device
8950 1. wait for sync across all devices
8952 1. for each modified disk:
8954 1. remove old LVs (which have the name name_replaces.<time_t>)
8956 Failures are not very well handled.
8961 # Step: check device activation
8962 self.lu.LogStep(1, steps_total, "Check device existence")
8963 self._CheckDisksExistence([self.other_node, self.target_node])
8964 self._CheckVolumeGroup([self.target_node, self.other_node])
8966 # Step: check other node consistency
8967 self.lu.LogStep(2, steps_total, "Check peer consistency")
8968 self._CheckDisksConsistency(self.other_node,
8969 self.other_node == self.instance.primary_node,
8972 # Step: create new storage
8973 self.lu.LogStep(3, steps_total, "Allocate new storage")
8974 iv_names = self._CreateNewStorage(self.target_node)
8976 # Step: for each lv, detach+rename*2+attach
8977 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8978 for dev, old_lvs, new_lvs in iv_names.itervalues():
8979 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8981 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8983 result.Raise("Can't detach drbd from local storage on node"
8984 " %s for device %s" % (self.target_node, dev.iv_name))
8986 #cfg.Update(instance)
8988 # ok, we created the new LVs, so now we know we have the needed
8989 # storage; as such, we proceed on the target node to rename
8990 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8991 # using the assumption that logical_id == physical_id (which in
8992 # turn is the unique_id on that node)
8994 # FIXME(iustin): use a better name for the replaced LVs
8995 temp_suffix = int(time.time())
8996 ren_fn = lambda d, suff: (d.physical_id[0],
8997 d.physical_id[1] + "_replaced-%s" % suff)
8999 # Build the rename list based on what LVs exist on the node
9000 rename_old_to_new = []
9001 for to_ren in old_lvs:
9002 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9003 if not result.fail_msg and result.payload:
9005 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9007 self.lu.LogInfo("Renaming the old LVs on the target node")
9008 result = self.rpc.call_blockdev_rename(self.target_node,
9010 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9012 # Now we rename the new LVs to the old LVs
9013 self.lu.LogInfo("Renaming the new LVs on the target node")
9014 rename_new_to_old = [(new, old.physical_id)
9015 for old, new in zip(old_lvs, new_lvs)]
9016 result = self.rpc.call_blockdev_rename(self.target_node,
9018 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9020 for old, new in zip(old_lvs, new_lvs):
9021 new.logical_id = old.logical_id
9022 self.cfg.SetDiskID(new, self.target_node)
9024 for disk in old_lvs:
9025 disk.logical_id = ren_fn(disk, temp_suffix)
9026 self.cfg.SetDiskID(disk, self.target_node)
9028 # Now that the new lvs have the old name, we can add them to the device
9029 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9030 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9032 msg = result.fail_msg
9034 for new_lv in new_lvs:
9035 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9038 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9039 hint=("cleanup manually the unused logical"
9041 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9043 dev.children = new_lvs
9045 self.cfg.Update(self.instance, feedback_fn)
9048 if self.early_release:
9049 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9051 self._RemoveOldStorage(self.target_node, iv_names)
9052 # WARNING: we release both node locks here, do not do other RPCs
9053 # than WaitForSync to the primary node
9054 self._ReleaseNodeLock([self.target_node, self.other_node])
9057 # This can fail as the old devices are degraded and _WaitForSync
9058 # does a combined result over all disks, so we don't check its return value
9059 self.lu.LogStep(cstep, steps_total, "Sync devices")
9061 _WaitForSync(self.lu, self.instance)
9063 # Check all devices manually
9064 self._CheckDevices(self.instance.primary_node, iv_names)
9066 # Step: remove old storage
9067 if not self.early_release:
9068 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9070 self._RemoveOldStorage(self.target_node, iv_names)
9072 def _ExecDrbd8Secondary(self, feedback_fn):
9073 """Replace the secondary node for DRBD 8.
9075 The algorithm for replace is quite complicated:
9076 - for all disks of the instance:
9077 - create new LVs on the new node with same names
9078 - shutdown the drbd device on the old secondary
9079 - disconnect the drbd network on the primary
9080 - create the drbd device on the new secondary
9081 - network attach the drbd on the primary, using an artifice:
9082 the drbd code for Attach() will connect to the network if it
9083 finds a device which is connected to the good local disks but
9085 - wait for sync across all devices
9086 - remove all disks from the old secondary
9088 Failures are not very well handled.
9093 # Step: check device activation
9094 self.lu.LogStep(1, steps_total, "Check device existence")
9095 self._CheckDisksExistence([self.instance.primary_node])
9096 self._CheckVolumeGroup([self.instance.primary_node])
9098 # Step: check other node consistency
9099 self.lu.LogStep(2, steps_total, "Check peer consistency")
9100 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9102 # Step: create new storage
9103 self.lu.LogStep(3, steps_total, "Allocate new storage")
9104 for idx, dev in enumerate(self.instance.disks):
9105 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9106 (self.new_node, idx))
9107 # we pass force_create=True to force LVM creation
9108 for new_lv in dev.children:
9109 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9110 _GetInstanceInfoText(self.instance), False)
9112 # Step 4: dbrd minors and drbd setups changes
9113 # after this, we must manually remove the drbd minors on both the
9114 # error and the success paths
9115 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9116 minors = self.cfg.AllocateDRBDMinor([self.new_node
9117 for dev in self.instance.disks],
9119 logging.debug("Allocated minors %r", minors)
9122 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9123 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9124 (self.new_node, idx))
9125 # create new devices on new_node; note that we create two IDs:
9126 # one without port, so the drbd will be activated without
9127 # networking information on the new node at this stage, and one
9128 # with network, for the latter activation in step 4
9129 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9130 if self.instance.primary_node == o_node1:
9133 assert self.instance.primary_node == o_node2, "Three-node instance?"
9136 new_alone_id = (self.instance.primary_node, self.new_node, None,
9137 p_minor, new_minor, o_secret)
9138 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9139 p_minor, new_minor, o_secret)
9141 iv_names[idx] = (dev, dev.children, new_net_id)
9142 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9144 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9145 logical_id=new_alone_id,
9146 children=dev.children,
9149 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9150 _GetInstanceInfoText(self.instance), False)
9151 except errors.GenericError:
9152 self.cfg.ReleaseDRBDMinors(self.instance.name)
9155 # We have new devices, shutdown the drbd on the old secondary
9156 for idx, dev in enumerate(self.instance.disks):
9157 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9158 self.cfg.SetDiskID(dev, self.target_node)
9159 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9161 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9162 "node: %s" % (idx, msg),
9163 hint=("Please cleanup this device manually as"
9164 " soon as possible"))
9166 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9167 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9168 self.node_secondary_ip,
9169 self.instance.disks)\
9170 [self.instance.primary_node]
9172 msg = result.fail_msg
9174 # detaches didn't succeed (unlikely)
9175 self.cfg.ReleaseDRBDMinors(self.instance.name)
9176 raise errors.OpExecError("Can't detach the disks from the network on"
9177 " old node: %s" % (msg,))
9179 # if we managed to detach at least one, we update all the disks of
9180 # the instance to point to the new secondary
9181 self.lu.LogInfo("Updating instance configuration")
9182 for dev, _, new_logical_id in iv_names.itervalues():
9183 dev.logical_id = new_logical_id
9184 self.cfg.SetDiskID(dev, self.instance.primary_node)
9186 self.cfg.Update(self.instance, feedback_fn)
9188 # and now perform the drbd attach
9189 self.lu.LogInfo("Attaching primary drbds to new secondary"
9190 " (standalone => connected)")
9191 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9193 self.node_secondary_ip,
9194 self.instance.disks,
9197 for to_node, to_result in result.items():
9198 msg = to_result.fail_msg
9200 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9202 hint=("please do a gnt-instance info to see the"
9203 " status of disks"))
9205 if self.early_release:
9206 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9208 self._RemoveOldStorage(self.target_node, iv_names)
9209 # WARNING: we release all node locks here, do not do other RPCs
9210 # than WaitForSync to the primary node
9211 self._ReleaseNodeLock([self.instance.primary_node,
9216 # This can fail as the old devices are degraded and _WaitForSync
9217 # does a combined result over all disks, so we don't check its return value
9218 self.lu.LogStep(cstep, steps_total, "Sync devices")
9220 _WaitForSync(self.lu, self.instance)
9222 # Check all devices manually
9223 self._CheckDevices(self.instance.primary_node, iv_names)
9225 # Step: remove old storage
9226 if not self.early_release:
9227 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9228 self._RemoveOldStorage(self.target_node, iv_names)
9231 class LURepairNodeStorage(NoHooksLU):
9232 """Repairs the volume group on a node.
9237 def CheckArguments(self):
9238 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9240 storage_type = self.op.storage_type
9242 if (constants.SO_FIX_CONSISTENCY not in
9243 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9244 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9245 " repaired" % storage_type,
9248 def ExpandNames(self):
9249 self.needed_locks = {
9250 locking.LEVEL_NODE: [self.op.node_name],
9253 def _CheckFaultyDisks(self, instance, node_name):
9254 """Ensure faulty disks abort the opcode or at least warn."""
9256 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9258 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9259 " node '%s'" % (instance.name, node_name),
9261 except errors.OpPrereqError, err:
9262 if self.op.ignore_consistency:
9263 self.proc.LogWarning(str(err.args[0]))
9267 def CheckPrereq(self):
9268 """Check prerequisites.
9271 # Check whether any instance on this node has faulty disks
9272 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9273 if not inst.admin_up:
9275 check_nodes = set(inst.all_nodes)
9276 check_nodes.discard(self.op.node_name)
9277 for inst_node_name in check_nodes:
9278 self._CheckFaultyDisks(inst, inst_node_name)
9280 def Exec(self, feedback_fn):
9281 feedback_fn("Repairing storage unit '%s' on %s ..." %
9282 (self.op.name, self.op.node_name))
9284 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9285 result = self.rpc.call_storage_execute(self.op.node_name,
9286 self.op.storage_type, st_args,
9288 constants.SO_FIX_CONSISTENCY)
9289 result.Raise("Failed to repair storage unit '%s' on %s" %
9290 (self.op.name, self.op.node_name))
9293 class LUNodeEvacStrategy(NoHooksLU):
9294 """Computes the node evacuation strategy.
9299 def CheckArguments(self):
9300 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9302 def ExpandNames(self):
9303 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9304 self.needed_locks = locks = {}
9305 if self.op.remote_node is None:
9306 locks[locking.LEVEL_NODE] = locking.ALL_SET
9308 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9309 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9311 def Exec(self, feedback_fn):
9312 if self.op.remote_node is not None:
9314 for node in self.op.nodes:
9315 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9318 if i.primary_node == self.op.remote_node:
9319 raise errors.OpPrereqError("Node %s is the primary node of"
9320 " instance %s, cannot use it as"
9322 (self.op.remote_node, i.name),
9324 result.append([i.name, self.op.remote_node])
9326 ial = IAllocator(self.cfg, self.rpc,
9327 mode=constants.IALLOCATOR_MODE_MEVAC,
9328 evac_nodes=self.op.nodes)
9329 ial.Run(self.op.iallocator, validate=True)
9331 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9337 class LUInstanceGrowDisk(LogicalUnit):
9338 """Grow a disk of an instance.
9342 HTYPE = constants.HTYPE_INSTANCE
9345 def ExpandNames(self):
9346 self._ExpandAndLockInstance()
9347 self.needed_locks[locking.LEVEL_NODE] = []
9348 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9350 def DeclareLocks(self, level):
9351 if level == locking.LEVEL_NODE:
9352 self._LockInstancesNodes()
9354 def BuildHooksEnv(self):
9357 This runs on the master, the primary and all the secondaries.
9361 "DISK": self.op.disk,
9362 "AMOUNT": self.op.amount,
9364 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9367 def BuildHooksNodes(self):
9368 """Build hooks nodes.
9371 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9374 def CheckPrereq(self):
9375 """Check prerequisites.
9377 This checks that the instance is in the cluster.
9380 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9381 assert instance is not None, \
9382 "Cannot retrieve locked instance %s" % self.op.instance_name
9383 nodenames = list(instance.all_nodes)
9384 for node in nodenames:
9385 _CheckNodeOnline(self, node)
9387 self.instance = instance
9389 if instance.disk_template not in constants.DTS_GROWABLE:
9390 raise errors.OpPrereqError("Instance's disk layout does not support"
9391 " growing.", errors.ECODE_INVAL)
9393 self.disk = instance.FindDisk(self.op.disk)
9395 if instance.disk_template not in (constants.DT_FILE,
9396 constants.DT_SHARED_FILE):
9397 # TODO: check the free disk space for file, when that feature will be
9399 _CheckNodesFreeDiskPerVG(self, nodenames,
9400 self.disk.ComputeGrowth(self.op.amount))
9402 def Exec(self, feedback_fn):
9403 """Execute disk grow.
9406 instance = self.instance
9409 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9411 raise errors.OpExecError("Cannot activate block device to grow")
9413 for node in instance.all_nodes:
9414 self.cfg.SetDiskID(disk, node)
9415 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9416 result.Raise("Grow request failed to node %s" % node)
9418 # TODO: Rewrite code to work properly
9419 # DRBD goes into sync mode for a short amount of time after executing the
9420 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9421 # calling "resize" in sync mode fails. Sleeping for a short amount of
9422 # time is a work-around.
9425 disk.RecordGrow(self.op.amount)
9426 self.cfg.Update(instance, feedback_fn)
9427 if self.op.wait_for_sync:
9428 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9430 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9431 " status.\nPlease check the instance.")
9432 if not instance.admin_up:
9433 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9434 elif not instance.admin_up:
9435 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9436 " not supposed to be running because no wait for"
9437 " sync mode was requested.")
9440 class LUInstanceQueryData(NoHooksLU):
9441 """Query runtime instance data.
9446 def ExpandNames(self):
9447 self.needed_locks = {}
9449 # Use locking if requested or when non-static information is wanted
9450 if not (self.op.static or self.op.use_locking):
9451 self.LogWarning("Non-static data requested, locks need to be acquired")
9452 self.op.use_locking = True
9454 if self.op.instances or not self.op.use_locking:
9455 # Expand instance names right here
9456 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9458 # Will use acquired locks
9459 self.wanted_names = None
9461 if self.op.use_locking:
9462 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9464 if self.wanted_names is None:
9465 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9467 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9469 self.needed_locks[locking.LEVEL_NODE] = []
9470 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9471 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9473 def DeclareLocks(self, level):
9474 if self.op.use_locking and level == locking.LEVEL_NODE:
9475 self._LockInstancesNodes()
9477 def CheckPrereq(self):
9478 """Check prerequisites.
9480 This only checks the optional instance list against the existing names.
9483 if self.wanted_names is None:
9484 assert self.op.use_locking, "Locking was not used"
9485 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9487 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9488 for name in self.wanted_names]
9490 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9491 """Returns the status of a block device
9494 if self.op.static or not node:
9497 self.cfg.SetDiskID(dev, node)
9499 result = self.rpc.call_blockdev_find(node, dev)
9503 result.Raise("Can't compute disk status for %s" % instance_name)
9505 status = result.payload
9509 return (status.dev_path, status.major, status.minor,
9510 status.sync_percent, status.estimated_time,
9511 status.is_degraded, status.ldisk_status)
9513 def _ComputeDiskStatus(self, instance, snode, dev):
9514 """Compute block device status.
9517 if dev.dev_type in constants.LDS_DRBD:
9518 # we change the snode then (otherwise we use the one passed in)
9519 if dev.logical_id[0] == instance.primary_node:
9520 snode = dev.logical_id[1]
9522 snode = dev.logical_id[0]
9524 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9526 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9529 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9530 for child in dev.children]
9535 "iv_name": dev.iv_name,
9536 "dev_type": dev.dev_type,
9537 "logical_id": dev.logical_id,
9538 "physical_id": dev.physical_id,
9539 "pstatus": dev_pstatus,
9540 "sstatus": dev_sstatus,
9541 "children": dev_children,
9546 def Exec(self, feedback_fn):
9547 """Gather and return data"""
9550 cluster = self.cfg.GetClusterInfo()
9552 for instance in self.wanted_instances:
9553 if not self.op.static:
9554 remote_info = self.rpc.call_instance_info(instance.primary_node,
9556 instance.hypervisor)
9557 remote_info.Raise("Error checking node %s" % instance.primary_node)
9558 remote_info = remote_info.payload
9559 if remote_info and "state" in remote_info:
9562 remote_state = "down"
9565 if instance.admin_up:
9568 config_state = "down"
9570 disks = [self._ComputeDiskStatus(instance, None, device)
9571 for device in instance.disks]
9573 result[instance.name] = {
9574 "name": instance.name,
9575 "config_state": config_state,
9576 "run_state": remote_state,
9577 "pnode": instance.primary_node,
9578 "snodes": instance.secondary_nodes,
9580 # this happens to be the same format used for hooks
9581 "nics": _NICListToTuple(self, instance.nics),
9582 "disk_template": instance.disk_template,
9584 "hypervisor": instance.hypervisor,
9585 "network_port": instance.network_port,
9586 "hv_instance": instance.hvparams,
9587 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9588 "be_instance": instance.beparams,
9589 "be_actual": cluster.FillBE(instance),
9590 "os_instance": instance.osparams,
9591 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9592 "serial_no": instance.serial_no,
9593 "mtime": instance.mtime,
9594 "ctime": instance.ctime,
9595 "uuid": instance.uuid,
9601 class LUInstanceSetParams(LogicalUnit):
9602 """Modifies an instances's parameters.
9605 HPATH = "instance-modify"
9606 HTYPE = constants.HTYPE_INSTANCE
9609 def CheckArguments(self):
9610 if not (self.op.nics or self.op.disks or self.op.disk_template or
9611 self.op.hvparams or self.op.beparams or self.op.os_name):
9612 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9614 if self.op.hvparams:
9615 _CheckGlobalHvParams(self.op.hvparams)
9619 for disk_op, disk_dict in self.op.disks:
9620 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9621 if disk_op == constants.DDM_REMOVE:
9624 elif disk_op == constants.DDM_ADD:
9627 if not isinstance(disk_op, int):
9628 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9629 if not isinstance(disk_dict, dict):
9630 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9631 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9633 if disk_op == constants.DDM_ADD:
9634 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9635 if mode not in constants.DISK_ACCESS_SET:
9636 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9638 size = disk_dict.get(constants.IDISK_SIZE, None)
9640 raise errors.OpPrereqError("Required disk parameter size missing",
9644 except (TypeError, ValueError), err:
9645 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9646 str(err), errors.ECODE_INVAL)
9647 disk_dict[constants.IDISK_SIZE] = size
9649 # modification of disk
9650 if constants.IDISK_SIZE in disk_dict:
9651 raise errors.OpPrereqError("Disk size change not possible, use"
9652 " grow-disk", errors.ECODE_INVAL)
9654 if disk_addremove > 1:
9655 raise errors.OpPrereqError("Only one disk add or remove operation"
9656 " supported at a time", errors.ECODE_INVAL)
9658 if self.op.disks and self.op.disk_template is not None:
9659 raise errors.OpPrereqError("Disk template conversion and other disk"
9660 " changes not supported at the same time",
9663 if (self.op.disk_template and
9664 self.op.disk_template in constants.DTS_INT_MIRROR and
9665 self.op.remote_node is None):
9666 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9667 " one requires specifying a secondary node",
9672 for nic_op, nic_dict in self.op.nics:
9673 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9674 if nic_op == constants.DDM_REMOVE:
9677 elif nic_op == constants.DDM_ADD:
9680 if not isinstance(nic_op, int):
9681 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9682 if not isinstance(nic_dict, dict):
9683 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9684 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9686 # nic_dict should be a dict
9687 nic_ip = nic_dict.get(constants.INIC_IP, None)
9688 if nic_ip is not None:
9689 if nic_ip.lower() == constants.VALUE_NONE:
9690 nic_dict[constants.INIC_IP] = None
9692 if not netutils.IPAddress.IsValid(nic_ip):
9693 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9696 nic_bridge = nic_dict.get('bridge', None)
9697 nic_link = nic_dict.get(constants.INIC_LINK, None)
9698 if nic_bridge and nic_link:
9699 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9700 " at the same time", errors.ECODE_INVAL)
9701 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9702 nic_dict['bridge'] = None
9703 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9704 nic_dict[constants.INIC_LINK] = None
9706 if nic_op == constants.DDM_ADD:
9707 nic_mac = nic_dict.get(constants.INIC_MAC, None)
9709 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9711 if constants.INIC_MAC in nic_dict:
9712 nic_mac = nic_dict[constants.INIC_MAC]
9713 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9714 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9716 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9717 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9718 " modifying an existing nic",
9721 if nic_addremove > 1:
9722 raise errors.OpPrereqError("Only one NIC add or remove operation"
9723 " supported at a time", errors.ECODE_INVAL)
9725 def ExpandNames(self):
9726 self._ExpandAndLockInstance()
9727 self.needed_locks[locking.LEVEL_NODE] = []
9728 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9730 def DeclareLocks(self, level):
9731 if level == locking.LEVEL_NODE:
9732 self._LockInstancesNodes()
9733 if self.op.disk_template and self.op.remote_node:
9734 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9735 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9737 def BuildHooksEnv(self):
9740 This runs on the master, primary and secondaries.
9744 if constants.BE_MEMORY in self.be_new:
9745 args['memory'] = self.be_new[constants.BE_MEMORY]
9746 if constants.BE_VCPUS in self.be_new:
9747 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9748 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9749 # information at all.
9752 nic_override = dict(self.op.nics)
9753 for idx, nic in enumerate(self.instance.nics):
9754 if idx in nic_override:
9755 this_nic_override = nic_override[idx]
9757 this_nic_override = {}
9758 if constants.INIC_IP in this_nic_override:
9759 ip = this_nic_override[constants.INIC_IP]
9762 if constants.INIC_MAC in this_nic_override:
9763 mac = this_nic_override[constants.INIC_MAC]
9766 if idx in self.nic_pnew:
9767 nicparams = self.nic_pnew[idx]
9769 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9770 mode = nicparams[constants.NIC_MODE]
9771 link = nicparams[constants.NIC_LINK]
9772 args['nics'].append((ip, mac, mode, link))
9773 if constants.DDM_ADD in nic_override:
9774 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9775 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9776 nicparams = self.nic_pnew[constants.DDM_ADD]
9777 mode = nicparams[constants.NIC_MODE]
9778 link = nicparams[constants.NIC_LINK]
9779 args['nics'].append((ip, mac, mode, link))
9780 elif constants.DDM_REMOVE in nic_override:
9781 del args['nics'][-1]
9783 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9784 if self.op.disk_template:
9785 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9789 def BuildHooksNodes(self):
9790 """Build hooks nodes.
9793 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9796 def CheckPrereq(self):
9797 """Check prerequisites.
9799 This only checks the instance list against the existing names.
9802 # checking the new params on the primary/secondary nodes
9804 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9805 cluster = self.cluster = self.cfg.GetClusterInfo()
9806 assert self.instance is not None, \
9807 "Cannot retrieve locked instance %s" % self.op.instance_name
9808 pnode = instance.primary_node
9809 nodelist = list(instance.all_nodes)
9812 if self.op.os_name and not self.op.force:
9813 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9814 self.op.force_variant)
9815 instance_os = self.op.os_name
9817 instance_os = instance.os
9819 if self.op.disk_template:
9820 if instance.disk_template == self.op.disk_template:
9821 raise errors.OpPrereqError("Instance already has disk template %s" %
9822 instance.disk_template, errors.ECODE_INVAL)
9824 if (instance.disk_template,
9825 self.op.disk_template) not in self._DISK_CONVERSIONS:
9826 raise errors.OpPrereqError("Unsupported disk template conversion from"
9827 " %s to %s" % (instance.disk_template,
9828 self.op.disk_template),
9830 _CheckInstanceDown(self, instance, "cannot change disk template")
9831 if self.op.disk_template in constants.DTS_INT_MIRROR:
9832 if self.op.remote_node == pnode:
9833 raise errors.OpPrereqError("Given new secondary node %s is the same"
9834 " as the primary node of the instance" %
9835 self.op.remote_node, errors.ECODE_STATE)
9836 _CheckNodeOnline(self, self.op.remote_node)
9837 _CheckNodeNotDrained(self, self.op.remote_node)
9838 # FIXME: here we assume that the old instance type is DT_PLAIN
9839 assert instance.disk_template == constants.DT_PLAIN
9840 disks = [{constants.IDISK_SIZE: d.size,
9841 constants.IDISK_VG: d.logical_id[0]}
9842 for d in instance.disks]
9843 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9844 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9846 # hvparams processing
9847 if self.op.hvparams:
9848 hv_type = instance.hypervisor
9849 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9850 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9851 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9854 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9855 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9856 self.hv_new = hv_new # the new actual values
9857 self.hv_inst = i_hvdict # the new dict (without defaults)
9859 self.hv_new = self.hv_inst = {}
9861 # beparams processing
9862 if self.op.beparams:
9863 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9865 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9866 be_new = cluster.SimpleFillBE(i_bedict)
9867 self.be_new = be_new # the new actual values
9868 self.be_inst = i_bedict # the new dict (without defaults)
9870 self.be_new = self.be_inst = {}
9872 # osparams processing
9873 if self.op.osparams:
9874 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9875 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9876 self.os_inst = i_osdict # the new dict (without defaults)
9882 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9883 mem_check_list = [pnode]
9884 if be_new[constants.BE_AUTO_BALANCE]:
9885 # either we changed auto_balance to yes or it was from before
9886 mem_check_list.extend(instance.secondary_nodes)
9887 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9888 instance.hypervisor)
9889 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9890 instance.hypervisor)
9891 pninfo = nodeinfo[pnode]
9892 msg = pninfo.fail_msg
9894 # Assume the primary node is unreachable and go ahead
9895 self.warn.append("Can't get info from primary node %s: %s" %
9897 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9898 self.warn.append("Node data from primary node %s doesn't contain"
9899 " free memory information" % pnode)
9900 elif instance_info.fail_msg:
9901 self.warn.append("Can't get instance runtime information: %s" %
9902 instance_info.fail_msg)
9904 if instance_info.payload:
9905 current_mem = int(instance_info.payload['memory'])
9907 # Assume instance not running
9908 # (there is a slight race condition here, but it's not very probable,
9909 # and we have no other way to check)
9911 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9912 pninfo.payload['memory_free'])
9914 raise errors.OpPrereqError("This change will prevent the instance"
9915 " from starting, due to %d MB of memory"
9916 " missing on its primary node" % miss_mem,
9919 if be_new[constants.BE_AUTO_BALANCE]:
9920 for node, nres in nodeinfo.items():
9921 if node not in instance.secondary_nodes:
9925 self.warn.append("Can't get info from secondary node %s: %s" %
9927 elif not isinstance(nres.payload.get('memory_free', None), int):
9928 self.warn.append("Secondary node %s didn't return free"
9929 " memory information" % node)
9930 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9931 self.warn.append("Not enough memory to failover instance to"
9932 " secondary node %s" % node)
9937 for nic_op, nic_dict in self.op.nics:
9938 if nic_op == constants.DDM_REMOVE:
9939 if not instance.nics:
9940 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9943 if nic_op != constants.DDM_ADD:
9945 if not instance.nics:
9946 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9947 " no NICs" % nic_op,
9949 if nic_op < 0 or nic_op >= len(instance.nics):
9950 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9952 (nic_op, len(instance.nics) - 1),
9954 old_nic_params = instance.nics[nic_op].nicparams
9955 old_nic_ip = instance.nics[nic_op].ip
9960 update_params_dict = dict([(key, nic_dict[key])
9961 for key in constants.NICS_PARAMETERS
9962 if key in nic_dict])
9964 if 'bridge' in nic_dict:
9965 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9967 new_nic_params = _GetUpdatedParams(old_nic_params,
9969 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9970 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9971 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9972 self.nic_pinst[nic_op] = new_nic_params
9973 self.nic_pnew[nic_op] = new_filled_nic_params
9974 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9976 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9977 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9978 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9980 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9982 self.warn.append(msg)
9984 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9985 if new_nic_mode == constants.NIC_MODE_ROUTED:
9986 if constants.INIC_IP in nic_dict:
9987 nic_ip = nic_dict[constants.INIC_IP]
9991 raise errors.OpPrereqError('Cannot set the nic ip to None'
9992 ' on a routed nic', errors.ECODE_INVAL)
9993 if constants.INIC_MAC in nic_dict:
9994 nic_mac = nic_dict[constants.INIC_MAC]
9996 raise errors.OpPrereqError('Cannot set the nic mac to None',
9998 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9999 # otherwise generate the mac
10000 nic_dict[constants.INIC_MAC] = \
10001 self.cfg.GenerateMAC(self.proc.GetECId())
10003 # or validate/reserve the current one
10005 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10006 except errors.ReservationError:
10007 raise errors.OpPrereqError("MAC address %s already in use"
10008 " in cluster" % nic_mac,
10009 errors.ECODE_NOTUNIQUE)
10012 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10013 raise errors.OpPrereqError("Disk operations not supported for"
10014 " diskless instances",
10015 errors.ECODE_INVAL)
10016 for disk_op, _ in self.op.disks:
10017 if disk_op == constants.DDM_REMOVE:
10018 if len(instance.disks) == 1:
10019 raise errors.OpPrereqError("Cannot remove the last disk of"
10020 " an instance", errors.ECODE_INVAL)
10021 _CheckInstanceDown(self, instance, "cannot remove disks")
10023 if (disk_op == constants.DDM_ADD and
10024 len(instance.disks) >= constants.MAX_DISKS):
10025 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10026 " add more" % constants.MAX_DISKS,
10027 errors.ECODE_STATE)
10028 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10030 if disk_op < 0 or disk_op >= len(instance.disks):
10031 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10033 (disk_op, len(instance.disks)),
10034 errors.ECODE_INVAL)
10038 def _ConvertPlainToDrbd(self, feedback_fn):
10039 """Converts an instance from plain to drbd.
10042 feedback_fn("Converting template to drbd")
10043 instance = self.instance
10044 pnode = instance.primary_node
10045 snode = self.op.remote_node
10047 # create a fake disk info for _GenerateDiskTemplate
10048 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10049 constants.IDISK_VG: d.logical_id[0]}
10050 for d in instance.disks]
10051 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10052 instance.name, pnode, [snode],
10053 disk_info, None, None, 0, feedback_fn)
10054 info = _GetInstanceInfoText(instance)
10055 feedback_fn("Creating aditional volumes...")
10056 # first, create the missing data and meta devices
10057 for disk in new_disks:
10058 # unfortunately this is... not too nice
10059 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10061 for child in disk.children:
10062 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10063 # at this stage, all new LVs have been created, we can rename the
10065 feedback_fn("Renaming original volumes...")
10066 rename_list = [(o, n.children[0].logical_id)
10067 for (o, n) in zip(instance.disks, new_disks)]
10068 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10069 result.Raise("Failed to rename original LVs")
10071 feedback_fn("Initializing DRBD devices...")
10072 # all child devices are in place, we can now create the DRBD devices
10073 for disk in new_disks:
10074 for node in [pnode, snode]:
10075 f_create = node == pnode
10076 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10078 # at this point, the instance has been modified
10079 instance.disk_template = constants.DT_DRBD8
10080 instance.disks = new_disks
10081 self.cfg.Update(instance, feedback_fn)
10083 # disks are created, waiting for sync
10084 disk_abort = not _WaitForSync(self, instance)
10086 raise errors.OpExecError("There are some degraded disks for"
10087 " this instance, please cleanup manually")
10089 def _ConvertDrbdToPlain(self, feedback_fn):
10090 """Converts an instance from drbd to plain.
10093 instance = self.instance
10094 assert len(instance.secondary_nodes) == 1
10095 pnode = instance.primary_node
10096 snode = instance.secondary_nodes[0]
10097 feedback_fn("Converting template to plain")
10099 old_disks = instance.disks
10100 new_disks = [d.children[0] for d in old_disks]
10102 # copy over size and mode
10103 for parent, child in zip(old_disks, new_disks):
10104 child.size = parent.size
10105 child.mode = parent.mode
10107 # update instance structure
10108 instance.disks = new_disks
10109 instance.disk_template = constants.DT_PLAIN
10110 self.cfg.Update(instance, feedback_fn)
10112 feedback_fn("Removing volumes on the secondary node...")
10113 for disk in old_disks:
10114 self.cfg.SetDiskID(disk, snode)
10115 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10117 self.LogWarning("Could not remove block device %s on node %s,"
10118 " continuing anyway: %s", disk.iv_name, snode, msg)
10120 feedback_fn("Removing unneeded volumes on the primary node...")
10121 for idx, disk in enumerate(old_disks):
10122 meta = disk.children[1]
10123 self.cfg.SetDiskID(meta, pnode)
10124 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10126 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10127 " continuing anyway: %s", idx, pnode, msg)
10129 def Exec(self, feedback_fn):
10130 """Modifies an instance.
10132 All parameters take effect only at the next restart of the instance.
10135 # Process here the warnings from CheckPrereq, as we don't have a
10136 # feedback_fn there.
10137 for warn in self.warn:
10138 feedback_fn("WARNING: %s" % warn)
10141 instance = self.instance
10143 for disk_op, disk_dict in self.op.disks:
10144 if disk_op == constants.DDM_REMOVE:
10145 # remove the last disk
10146 device = instance.disks.pop()
10147 device_idx = len(instance.disks)
10148 for node, disk in device.ComputeNodeTree(instance.primary_node):
10149 self.cfg.SetDiskID(disk, node)
10150 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10152 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10153 " continuing anyway", device_idx, node, msg)
10154 result.append(("disk/%d" % device_idx, "remove"))
10155 elif disk_op == constants.DDM_ADD:
10157 if instance.disk_template in (constants.DT_FILE,
10158 constants.DT_SHARED_FILE):
10159 file_driver, file_path = instance.disks[0].logical_id
10160 file_path = os.path.dirname(file_path)
10162 file_driver = file_path = None
10163 disk_idx_base = len(instance.disks)
10164 new_disk = _GenerateDiskTemplate(self,
10165 instance.disk_template,
10166 instance.name, instance.primary_node,
10167 instance.secondary_nodes,
10171 disk_idx_base, feedback_fn)[0]
10172 instance.disks.append(new_disk)
10173 info = _GetInstanceInfoText(instance)
10175 logging.info("Creating volume %s for instance %s",
10176 new_disk.iv_name, instance.name)
10177 # Note: this needs to be kept in sync with _CreateDisks
10179 for node in instance.all_nodes:
10180 f_create = node == instance.primary_node
10182 _CreateBlockDev(self, node, instance, new_disk,
10183 f_create, info, f_create)
10184 except errors.OpExecError, err:
10185 self.LogWarning("Failed to create volume %s (%s) on"
10187 new_disk.iv_name, new_disk, node, err)
10188 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10189 (new_disk.size, new_disk.mode)))
10191 # change a given disk
10192 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10193 result.append(("disk.mode/%d" % disk_op,
10194 disk_dict[constants.IDISK_MODE]))
10196 if self.op.disk_template:
10197 r_shut = _ShutdownInstanceDisks(self, instance)
10199 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10200 " proceed with disk template conversion")
10201 mode = (instance.disk_template, self.op.disk_template)
10203 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10205 self.cfg.ReleaseDRBDMinors(instance.name)
10207 result.append(("disk_template", self.op.disk_template))
10210 for nic_op, nic_dict in self.op.nics:
10211 if nic_op == constants.DDM_REMOVE:
10212 # remove the last nic
10213 del instance.nics[-1]
10214 result.append(("nic.%d" % len(instance.nics), "remove"))
10215 elif nic_op == constants.DDM_ADD:
10216 # mac and bridge should be set, by now
10217 mac = nic_dict[constants.INIC_MAC]
10218 ip = nic_dict.get(constants.INIC_IP, None)
10219 nicparams = self.nic_pinst[constants.DDM_ADD]
10220 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10221 instance.nics.append(new_nic)
10222 result.append(("nic.%d" % (len(instance.nics) - 1),
10223 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10224 (new_nic.mac, new_nic.ip,
10225 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10226 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10229 for key in (constants.INIC_MAC, constants.INIC_IP):
10230 if key in nic_dict:
10231 setattr(instance.nics[nic_op], key, nic_dict[key])
10232 if nic_op in self.nic_pinst:
10233 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10234 for key, val in nic_dict.iteritems():
10235 result.append(("nic.%s/%d" % (key, nic_op), val))
10238 if self.op.hvparams:
10239 instance.hvparams = self.hv_inst
10240 for key, val in self.op.hvparams.iteritems():
10241 result.append(("hv/%s" % key, val))
10244 if self.op.beparams:
10245 instance.beparams = self.be_inst
10246 for key, val in self.op.beparams.iteritems():
10247 result.append(("be/%s" % key, val))
10250 if self.op.os_name:
10251 instance.os = self.op.os_name
10254 if self.op.osparams:
10255 instance.osparams = self.os_inst
10256 for key, val in self.op.osparams.iteritems():
10257 result.append(("os/%s" % key, val))
10259 self.cfg.Update(instance, feedback_fn)
10263 _DISK_CONVERSIONS = {
10264 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10265 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10269 class LUBackupQuery(NoHooksLU):
10270 """Query the exports list
10275 def ExpandNames(self):
10276 self.needed_locks = {}
10277 self.share_locks[locking.LEVEL_NODE] = 1
10278 if not self.op.nodes:
10279 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10281 self.needed_locks[locking.LEVEL_NODE] = \
10282 _GetWantedNodes(self, self.op.nodes)
10284 def Exec(self, feedback_fn):
10285 """Compute the list of all the exported system images.
10288 @return: a dictionary with the structure node->(export-list)
10289 where export-list is a list of the instances exported on
10293 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10294 rpcresult = self.rpc.call_export_list(self.nodes)
10296 for node in rpcresult:
10297 if rpcresult[node].fail_msg:
10298 result[node] = False
10300 result[node] = rpcresult[node].payload
10305 class LUBackupPrepare(NoHooksLU):
10306 """Prepares an instance for an export and returns useful information.
10311 def ExpandNames(self):
10312 self._ExpandAndLockInstance()
10314 def CheckPrereq(self):
10315 """Check prerequisites.
10318 instance_name = self.op.instance_name
10320 self.instance = self.cfg.GetInstanceInfo(instance_name)
10321 assert self.instance is not None, \
10322 "Cannot retrieve locked instance %s" % self.op.instance_name
10323 _CheckNodeOnline(self, self.instance.primary_node)
10325 self._cds = _GetClusterDomainSecret()
10327 def Exec(self, feedback_fn):
10328 """Prepares an instance for an export.
10331 instance = self.instance
10333 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10334 salt = utils.GenerateSecret(8)
10336 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10337 result = self.rpc.call_x509_cert_create(instance.primary_node,
10338 constants.RIE_CERT_VALIDITY)
10339 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10341 (name, cert_pem) = result.payload
10343 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10347 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10348 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10350 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10356 class LUBackupExport(LogicalUnit):
10357 """Export an instance to an image in the cluster.
10360 HPATH = "instance-export"
10361 HTYPE = constants.HTYPE_INSTANCE
10364 def CheckArguments(self):
10365 """Check the arguments.
10368 self.x509_key_name = self.op.x509_key_name
10369 self.dest_x509_ca_pem = self.op.destination_x509_ca
10371 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10372 if not self.x509_key_name:
10373 raise errors.OpPrereqError("Missing X509 key name for encryption",
10374 errors.ECODE_INVAL)
10376 if not self.dest_x509_ca_pem:
10377 raise errors.OpPrereqError("Missing destination X509 CA",
10378 errors.ECODE_INVAL)
10380 def ExpandNames(self):
10381 self._ExpandAndLockInstance()
10383 # Lock all nodes for local exports
10384 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10385 # FIXME: lock only instance primary and destination node
10387 # Sad but true, for now we have do lock all nodes, as we don't know where
10388 # the previous export might be, and in this LU we search for it and
10389 # remove it from its current node. In the future we could fix this by:
10390 # - making a tasklet to search (share-lock all), then create the
10391 # new one, then one to remove, after
10392 # - removing the removal operation altogether
10393 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10395 def DeclareLocks(self, level):
10396 """Last minute lock declaration."""
10397 # All nodes are locked anyway, so nothing to do here.
10399 def BuildHooksEnv(self):
10400 """Build hooks env.
10402 This will run on the master, primary node and target node.
10406 "EXPORT_MODE": self.op.mode,
10407 "EXPORT_NODE": self.op.target_node,
10408 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10409 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10410 # TODO: Generic function for boolean env variables
10411 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10414 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10418 def BuildHooksNodes(self):
10419 """Build hooks nodes.
10422 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10424 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10425 nl.append(self.op.target_node)
10429 def CheckPrereq(self):
10430 """Check prerequisites.
10432 This checks that the instance and node names are valid.
10435 instance_name = self.op.instance_name
10437 self.instance = self.cfg.GetInstanceInfo(instance_name)
10438 assert self.instance is not None, \
10439 "Cannot retrieve locked instance %s" % self.op.instance_name
10440 _CheckNodeOnline(self, self.instance.primary_node)
10442 if (self.op.remove_instance and self.instance.admin_up and
10443 not self.op.shutdown):
10444 raise errors.OpPrereqError("Can not remove instance without shutting it"
10447 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10448 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10449 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10450 assert self.dst_node is not None
10452 _CheckNodeOnline(self, self.dst_node.name)
10453 _CheckNodeNotDrained(self, self.dst_node.name)
10456 self.dest_disk_info = None
10457 self.dest_x509_ca = None
10459 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10460 self.dst_node = None
10462 if len(self.op.target_node) != len(self.instance.disks):
10463 raise errors.OpPrereqError(("Received destination information for %s"
10464 " disks, but instance %s has %s disks") %
10465 (len(self.op.target_node), instance_name,
10466 len(self.instance.disks)),
10467 errors.ECODE_INVAL)
10469 cds = _GetClusterDomainSecret()
10471 # Check X509 key name
10473 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10474 except (TypeError, ValueError), err:
10475 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10477 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10478 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10479 errors.ECODE_INVAL)
10481 # Load and verify CA
10483 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10484 except OpenSSL.crypto.Error, err:
10485 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10486 (err, ), errors.ECODE_INVAL)
10488 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10489 if errcode is not None:
10490 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10491 (msg, ), errors.ECODE_INVAL)
10493 self.dest_x509_ca = cert
10495 # Verify target information
10497 for idx, disk_data in enumerate(self.op.target_node):
10499 (host, port, magic) = \
10500 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10501 except errors.GenericError, err:
10502 raise errors.OpPrereqError("Target info for disk %s: %s" %
10503 (idx, err), errors.ECODE_INVAL)
10505 disk_info.append((host, port, magic))
10507 assert len(disk_info) == len(self.op.target_node)
10508 self.dest_disk_info = disk_info
10511 raise errors.ProgrammerError("Unhandled export mode %r" %
10514 # instance disk type verification
10515 # TODO: Implement export support for file-based disks
10516 for disk in self.instance.disks:
10517 if disk.dev_type == constants.LD_FILE:
10518 raise errors.OpPrereqError("Export not supported for instances with"
10519 " file-based disks", errors.ECODE_INVAL)
10521 def _CleanupExports(self, feedback_fn):
10522 """Removes exports of current instance from all other nodes.
10524 If an instance in a cluster with nodes A..D was exported to node C, its
10525 exports will be removed from the nodes A, B and D.
10528 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10530 nodelist = self.cfg.GetNodeList()
10531 nodelist.remove(self.dst_node.name)
10533 # on one-node clusters nodelist will be empty after the removal
10534 # if we proceed the backup would be removed because OpBackupQuery
10535 # substitutes an empty list with the full cluster node list.
10536 iname = self.instance.name
10538 feedback_fn("Removing old exports for instance %s" % iname)
10539 exportlist = self.rpc.call_export_list(nodelist)
10540 for node in exportlist:
10541 if exportlist[node].fail_msg:
10543 if iname in exportlist[node].payload:
10544 msg = self.rpc.call_export_remove(node, iname).fail_msg
10546 self.LogWarning("Could not remove older export for instance %s"
10547 " on node %s: %s", iname, node, msg)
10549 def Exec(self, feedback_fn):
10550 """Export an instance to an image in the cluster.
10553 assert self.op.mode in constants.EXPORT_MODES
10555 instance = self.instance
10556 src_node = instance.primary_node
10558 if self.op.shutdown:
10559 # shutdown the instance, but not the disks
10560 feedback_fn("Shutting down instance %s" % instance.name)
10561 result = self.rpc.call_instance_shutdown(src_node, instance,
10562 self.op.shutdown_timeout)
10563 # TODO: Maybe ignore failures if ignore_remove_failures is set
10564 result.Raise("Could not shutdown instance %s on"
10565 " node %s" % (instance.name, src_node))
10567 # set the disks ID correctly since call_instance_start needs the
10568 # correct drbd minor to create the symlinks
10569 for disk in instance.disks:
10570 self.cfg.SetDiskID(disk, src_node)
10572 activate_disks = (not instance.admin_up)
10575 # Activate the instance disks if we'exporting a stopped instance
10576 feedback_fn("Activating disks for %s" % instance.name)
10577 _StartInstanceDisks(self, instance, None)
10580 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10583 helper.CreateSnapshots()
10585 if (self.op.shutdown and instance.admin_up and
10586 not self.op.remove_instance):
10587 assert not activate_disks
10588 feedback_fn("Starting instance %s" % instance.name)
10589 result = self.rpc.call_instance_start(src_node, instance, None, None)
10590 msg = result.fail_msg
10592 feedback_fn("Failed to start instance: %s" % msg)
10593 _ShutdownInstanceDisks(self, instance)
10594 raise errors.OpExecError("Could not start instance: %s" % msg)
10596 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10597 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10598 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10599 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10600 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10602 (key_name, _, _) = self.x509_key_name
10605 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10608 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10609 key_name, dest_ca_pem,
10614 # Check for backwards compatibility
10615 assert len(dresults) == len(instance.disks)
10616 assert compat.all(isinstance(i, bool) for i in dresults), \
10617 "Not all results are boolean: %r" % dresults
10621 feedback_fn("Deactivating disks for %s" % instance.name)
10622 _ShutdownInstanceDisks(self, instance)
10624 if not (compat.all(dresults) and fin_resu):
10627 failures.append("export finalization")
10628 if not compat.all(dresults):
10629 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10631 failures.append("disk export: disk(s) %s" % fdsk)
10633 raise errors.OpExecError("Export failed, errors in %s" %
10634 utils.CommaJoin(failures))
10636 # At this point, the export was successful, we can cleanup/finish
10638 # Remove instance if requested
10639 if self.op.remove_instance:
10640 feedback_fn("Removing instance %s" % instance.name)
10641 _RemoveInstance(self, feedback_fn, instance,
10642 self.op.ignore_remove_failures)
10644 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10645 self._CleanupExports(feedback_fn)
10647 return fin_resu, dresults
10650 class LUBackupRemove(NoHooksLU):
10651 """Remove exports related to the named instance.
10656 def ExpandNames(self):
10657 self.needed_locks = {}
10658 # We need all nodes to be locked in order for RemoveExport to work, but we
10659 # don't need to lock the instance itself, as nothing will happen to it (and
10660 # we can remove exports also for a removed instance)
10661 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10663 def Exec(self, feedback_fn):
10664 """Remove any export.
10667 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10668 # If the instance was not found we'll try with the name that was passed in.
10669 # This will only work if it was an FQDN, though.
10671 if not instance_name:
10673 instance_name = self.op.instance_name
10675 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10676 exportlist = self.rpc.call_export_list(locked_nodes)
10678 for node in exportlist:
10679 msg = exportlist[node].fail_msg
10681 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10683 if instance_name in exportlist[node].payload:
10685 result = self.rpc.call_export_remove(node, instance_name)
10686 msg = result.fail_msg
10688 logging.error("Could not remove export for instance %s"
10689 " on node %s: %s", instance_name, node, msg)
10691 if fqdn_warn and not found:
10692 feedback_fn("Export not found. If trying to remove an export belonging"
10693 " to a deleted instance please use its Fully Qualified"
10697 class LUGroupAdd(LogicalUnit):
10698 """Logical unit for creating node groups.
10701 HPATH = "group-add"
10702 HTYPE = constants.HTYPE_GROUP
10705 def ExpandNames(self):
10706 # We need the new group's UUID here so that we can create and acquire the
10707 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10708 # that it should not check whether the UUID exists in the configuration.
10709 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10710 self.needed_locks = {}
10711 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10713 def CheckPrereq(self):
10714 """Check prerequisites.
10716 This checks that the given group name is not an existing node group
10721 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10722 except errors.OpPrereqError:
10725 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10726 " node group (UUID: %s)" %
10727 (self.op.group_name, existing_uuid),
10728 errors.ECODE_EXISTS)
10730 if self.op.ndparams:
10731 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10733 def BuildHooksEnv(self):
10734 """Build hooks env.
10738 "GROUP_NAME": self.op.group_name,
10741 def BuildHooksNodes(self):
10742 """Build hooks nodes.
10745 mn = self.cfg.GetMasterNode()
10746 return ([mn], [mn])
10748 def Exec(self, feedback_fn):
10749 """Add the node group to the cluster.
10752 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10753 uuid=self.group_uuid,
10754 alloc_policy=self.op.alloc_policy,
10755 ndparams=self.op.ndparams)
10757 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10758 del self.remove_locks[locking.LEVEL_NODEGROUP]
10761 class LUGroupAssignNodes(NoHooksLU):
10762 """Logical unit for assigning nodes to groups.
10767 def ExpandNames(self):
10768 # These raise errors.OpPrereqError on their own:
10769 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10770 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10772 # We want to lock all the affected nodes and groups. We have readily
10773 # available the list of nodes, and the *destination* group. To gather the
10774 # list of "source" groups, we need to fetch node information.
10775 self.node_data = self.cfg.GetAllNodesInfo()
10776 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10777 affected_groups.add(self.group_uuid)
10779 self.needed_locks = {
10780 locking.LEVEL_NODEGROUP: list(affected_groups),
10781 locking.LEVEL_NODE: self.op.nodes,
10784 def CheckPrereq(self):
10785 """Check prerequisites.
10788 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10789 instance_data = self.cfg.GetAllInstancesInfo()
10791 if self.group is None:
10792 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10793 (self.op.group_name, self.group_uuid))
10795 (new_splits, previous_splits) = \
10796 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10797 for node in self.op.nodes],
10798 self.node_data, instance_data)
10801 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10803 if not self.op.force:
10804 raise errors.OpExecError("The following instances get split by this"
10805 " change and --force was not given: %s" %
10808 self.LogWarning("This operation will split the following instances: %s",
10811 if previous_splits:
10812 self.LogWarning("In addition, these already-split instances continue"
10813 " to be split across groups: %s",
10814 utils.CommaJoin(utils.NiceSort(previous_splits)))
10816 def Exec(self, feedback_fn):
10817 """Assign nodes to a new group.
10820 for node in self.op.nodes:
10821 self.node_data[node].group = self.group_uuid
10823 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10826 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10827 """Check for split instances after a node assignment.
10829 This method considers a series of node assignments as an atomic operation,
10830 and returns information about split instances after applying the set of
10833 In particular, it returns information about newly split instances, and
10834 instances that were already split, and remain so after the change.
10836 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10839 @type changes: list of (node_name, new_group_uuid) pairs.
10840 @param changes: list of node assignments to consider.
10841 @param node_data: a dict with data for all nodes
10842 @param instance_data: a dict with all instances to consider
10843 @rtype: a two-tuple
10844 @return: a list of instances that were previously okay and result split as a
10845 consequence of this change, and a list of instances that were previously
10846 split and this change does not fix.
10849 changed_nodes = dict((node, group) for node, group in changes
10850 if node_data[node].group != group)
10852 all_split_instances = set()
10853 previously_split_instances = set()
10855 def InstanceNodes(instance):
10856 return [instance.primary_node] + list(instance.secondary_nodes)
10858 for inst in instance_data.values():
10859 if inst.disk_template not in constants.DTS_INT_MIRROR:
10862 instance_nodes = InstanceNodes(inst)
10864 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10865 previously_split_instances.add(inst.name)
10867 if len(set(changed_nodes.get(node, node_data[node].group)
10868 for node in instance_nodes)) > 1:
10869 all_split_instances.add(inst.name)
10871 return (list(all_split_instances - previously_split_instances),
10872 list(previously_split_instances & all_split_instances))
10875 class _GroupQuery(_QueryBase):
10876 FIELDS = query.GROUP_FIELDS
10878 def ExpandNames(self, lu):
10879 lu.needed_locks = {}
10881 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10882 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10885 self.wanted = [name_to_uuid[name]
10886 for name in utils.NiceSort(name_to_uuid.keys())]
10888 # Accept names to be either names or UUIDs.
10891 all_uuid = frozenset(self._all_groups.keys())
10893 for name in self.names:
10894 if name in all_uuid:
10895 self.wanted.append(name)
10896 elif name in name_to_uuid:
10897 self.wanted.append(name_to_uuid[name])
10899 missing.append(name)
10902 raise errors.OpPrereqError("Some groups do not exist: %s" %
10903 utils.CommaJoin(missing),
10904 errors.ECODE_NOENT)
10906 def DeclareLocks(self, lu, level):
10909 def _GetQueryData(self, lu):
10910 """Computes the list of node groups and their attributes.
10913 do_nodes = query.GQ_NODE in self.requested_data
10914 do_instances = query.GQ_INST in self.requested_data
10916 group_to_nodes = None
10917 group_to_instances = None
10919 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10920 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10921 # latter GetAllInstancesInfo() is not enough, for we have to go through
10922 # instance->node. Hence, we will need to process nodes even if we only need
10923 # instance information.
10924 if do_nodes or do_instances:
10925 all_nodes = lu.cfg.GetAllNodesInfo()
10926 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10929 for node in all_nodes.values():
10930 if node.group in group_to_nodes:
10931 group_to_nodes[node.group].append(node.name)
10932 node_to_group[node.name] = node.group
10935 all_instances = lu.cfg.GetAllInstancesInfo()
10936 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10938 for instance in all_instances.values():
10939 node = instance.primary_node
10940 if node in node_to_group:
10941 group_to_instances[node_to_group[node]].append(instance.name)
10944 # Do not pass on node information if it was not requested.
10945 group_to_nodes = None
10947 return query.GroupQueryData([self._all_groups[uuid]
10948 for uuid in self.wanted],
10949 group_to_nodes, group_to_instances)
10952 class LUGroupQuery(NoHooksLU):
10953 """Logical unit for querying node groups.
10958 def CheckArguments(self):
10959 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10960 self.op.output_fields, False)
10962 def ExpandNames(self):
10963 self.gq.ExpandNames(self)
10965 def Exec(self, feedback_fn):
10966 return self.gq.OldStyleQuery(self)
10969 class LUGroupSetParams(LogicalUnit):
10970 """Modifies the parameters of a node group.
10973 HPATH = "group-modify"
10974 HTYPE = constants.HTYPE_GROUP
10977 def CheckArguments(self):
10980 self.op.alloc_policy,
10983 if all_changes.count(None) == len(all_changes):
10984 raise errors.OpPrereqError("Please pass at least one modification",
10985 errors.ECODE_INVAL)
10987 def ExpandNames(self):
10988 # This raises errors.OpPrereqError on its own:
10989 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10991 self.needed_locks = {
10992 locking.LEVEL_NODEGROUP: [self.group_uuid],
10995 def CheckPrereq(self):
10996 """Check prerequisites.
10999 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11001 if self.group is None:
11002 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11003 (self.op.group_name, self.group_uuid))
11005 if self.op.ndparams:
11006 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11007 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11008 self.new_ndparams = new_ndparams
11010 def BuildHooksEnv(self):
11011 """Build hooks env.
11015 "GROUP_NAME": self.op.group_name,
11016 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11019 def BuildHooksNodes(self):
11020 """Build hooks nodes.
11023 mn = self.cfg.GetMasterNode()
11024 return ([mn], [mn])
11026 def Exec(self, feedback_fn):
11027 """Modifies the node group.
11032 if self.op.ndparams:
11033 self.group.ndparams = self.new_ndparams
11034 result.append(("ndparams", str(self.group.ndparams)))
11036 if self.op.alloc_policy:
11037 self.group.alloc_policy = self.op.alloc_policy
11039 self.cfg.Update(self.group, feedback_fn)
11044 class LUGroupRemove(LogicalUnit):
11045 HPATH = "group-remove"
11046 HTYPE = constants.HTYPE_GROUP
11049 def ExpandNames(self):
11050 # This will raises errors.OpPrereqError on its own:
11051 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11052 self.needed_locks = {
11053 locking.LEVEL_NODEGROUP: [self.group_uuid],
11056 def CheckPrereq(self):
11057 """Check prerequisites.
11059 This checks that the given group name exists as a node group, that is
11060 empty (i.e., contains no nodes), and that is not the last group of the
11064 # Verify that the group is empty.
11065 group_nodes = [node.name
11066 for node in self.cfg.GetAllNodesInfo().values()
11067 if node.group == self.group_uuid]
11070 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11072 (self.op.group_name,
11073 utils.CommaJoin(utils.NiceSort(group_nodes))),
11074 errors.ECODE_STATE)
11076 # Verify the cluster would not be left group-less.
11077 if len(self.cfg.GetNodeGroupList()) == 1:
11078 raise errors.OpPrereqError("Group '%s' is the only group,"
11079 " cannot be removed" %
11080 self.op.group_name,
11081 errors.ECODE_STATE)
11083 def BuildHooksEnv(self):
11084 """Build hooks env.
11088 "GROUP_NAME": self.op.group_name,
11091 def BuildHooksNodes(self):
11092 """Build hooks nodes.
11095 mn = self.cfg.GetMasterNode()
11096 return ([mn], [mn])
11098 def Exec(self, feedback_fn):
11099 """Remove the node group.
11103 self.cfg.RemoveNodeGroup(self.group_uuid)
11104 except errors.ConfigurationError:
11105 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11106 (self.op.group_name, self.group_uuid))
11108 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11111 class LUGroupRename(LogicalUnit):
11112 HPATH = "group-rename"
11113 HTYPE = constants.HTYPE_GROUP
11116 def ExpandNames(self):
11117 # This raises errors.OpPrereqError on its own:
11118 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11120 self.needed_locks = {
11121 locking.LEVEL_NODEGROUP: [self.group_uuid],
11124 def CheckPrereq(self):
11125 """Check prerequisites.
11127 Ensures requested new name is not yet used.
11131 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11132 except errors.OpPrereqError:
11135 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11136 " node group (UUID: %s)" %
11137 (self.op.new_name, new_name_uuid),
11138 errors.ECODE_EXISTS)
11140 def BuildHooksEnv(self):
11141 """Build hooks env.
11145 "OLD_NAME": self.op.group_name,
11146 "NEW_NAME": self.op.new_name,
11149 def BuildHooksNodes(self):
11150 """Build hooks nodes.
11153 mn = self.cfg.GetMasterNode()
11155 all_nodes = self.cfg.GetAllNodesInfo()
11156 all_nodes.pop(mn, None)
11159 run_nodes.extend(node.name for node in all_nodes.values()
11160 if node.group == self.group_uuid)
11162 return (run_nodes, run_nodes)
11164 def Exec(self, feedback_fn):
11165 """Rename the node group.
11168 group = self.cfg.GetNodeGroup(self.group_uuid)
11171 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11172 (self.op.group_name, self.group_uuid))
11174 group.name = self.op.new_name
11175 self.cfg.Update(group, feedback_fn)
11177 return self.op.new_name
11180 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11181 """Generic tags LU.
11183 This is an abstract class which is the parent of all the other tags LUs.
11186 def ExpandNames(self):
11187 self.group_uuid = None
11188 self.needed_locks = {}
11189 if self.op.kind == constants.TAG_NODE:
11190 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11191 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11192 elif self.op.kind == constants.TAG_INSTANCE:
11193 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11194 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11195 elif self.op.kind == constants.TAG_NODEGROUP:
11196 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11198 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11199 # not possible to acquire the BGL based on opcode parameters)
11201 def CheckPrereq(self):
11202 """Check prerequisites.
11205 if self.op.kind == constants.TAG_CLUSTER:
11206 self.target = self.cfg.GetClusterInfo()
11207 elif self.op.kind == constants.TAG_NODE:
11208 self.target = self.cfg.GetNodeInfo(self.op.name)
11209 elif self.op.kind == constants.TAG_INSTANCE:
11210 self.target = self.cfg.GetInstanceInfo(self.op.name)
11211 elif self.op.kind == constants.TAG_NODEGROUP:
11212 self.target = self.cfg.GetNodeGroup(self.group_uuid)
11214 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11215 str(self.op.kind), errors.ECODE_INVAL)
11218 class LUTagsGet(TagsLU):
11219 """Returns the tags of a given object.
11224 def ExpandNames(self):
11225 TagsLU.ExpandNames(self)
11227 # Share locks as this is only a read operation
11228 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11230 def Exec(self, feedback_fn):
11231 """Returns the tag list.
11234 return list(self.target.GetTags())
11237 class LUTagsSearch(NoHooksLU):
11238 """Searches the tags for a given pattern.
11243 def ExpandNames(self):
11244 self.needed_locks = {}
11246 def CheckPrereq(self):
11247 """Check prerequisites.
11249 This checks the pattern passed for validity by compiling it.
11253 self.re = re.compile(self.op.pattern)
11254 except re.error, err:
11255 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11256 (self.op.pattern, err), errors.ECODE_INVAL)
11258 def Exec(self, feedback_fn):
11259 """Returns the tag list.
11263 tgts = [("/cluster", cfg.GetClusterInfo())]
11264 ilist = cfg.GetAllInstancesInfo().values()
11265 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11266 nlist = cfg.GetAllNodesInfo().values()
11267 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11268 tgts.extend(("/nodegroup/%s" % n.name, n)
11269 for n in cfg.GetAllNodeGroupsInfo().values())
11271 for path, target in tgts:
11272 for tag in target.GetTags():
11273 if self.re.search(tag):
11274 results.append((path, tag))
11278 class LUTagsSet(TagsLU):
11279 """Sets a tag on a given object.
11284 def CheckPrereq(self):
11285 """Check prerequisites.
11287 This checks the type and length of the tag name and value.
11290 TagsLU.CheckPrereq(self)
11291 for tag in self.op.tags:
11292 objects.TaggableObject.ValidateTag(tag)
11294 def Exec(self, feedback_fn):
11299 for tag in self.op.tags:
11300 self.target.AddTag(tag)
11301 except errors.TagError, err:
11302 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11303 self.cfg.Update(self.target, feedback_fn)
11306 class LUTagsDel(TagsLU):
11307 """Delete a list of tags from a given object.
11312 def CheckPrereq(self):
11313 """Check prerequisites.
11315 This checks that we have the given tag.
11318 TagsLU.CheckPrereq(self)
11319 for tag in self.op.tags:
11320 objects.TaggableObject.ValidateTag(tag)
11321 del_tags = frozenset(self.op.tags)
11322 cur_tags = self.target.GetTags()
11324 diff_tags = del_tags - cur_tags
11326 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11327 raise errors.OpPrereqError("Tag(s) %s not found" %
11328 (utils.CommaJoin(diff_names), ),
11329 errors.ECODE_NOENT)
11331 def Exec(self, feedback_fn):
11332 """Remove the tag from the object.
11335 for tag in self.op.tags:
11336 self.target.RemoveTag(tag)
11337 self.cfg.Update(self.target, feedback_fn)
11340 class LUTestDelay(NoHooksLU):
11341 """Sleep for a specified amount of time.
11343 This LU sleeps on the master and/or nodes for a specified amount of
11349 def ExpandNames(self):
11350 """Expand names and set required locks.
11352 This expands the node list, if any.
11355 self.needed_locks = {}
11356 if self.op.on_nodes:
11357 # _GetWantedNodes can be used here, but is not always appropriate to use
11358 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11359 # more information.
11360 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11361 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11363 def _TestDelay(self):
11364 """Do the actual sleep.
11367 if self.op.on_master:
11368 if not utils.TestDelay(self.op.duration):
11369 raise errors.OpExecError("Error during master delay test")
11370 if self.op.on_nodes:
11371 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11372 for node, node_result in result.items():
11373 node_result.Raise("Failure during rpc call to node %s" % node)
11375 def Exec(self, feedback_fn):
11376 """Execute the test delay opcode, with the wanted repetitions.
11379 if self.op.repeat == 0:
11382 top_value = self.op.repeat - 1
11383 for i in range(self.op.repeat):
11384 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11388 class LUTestJqueue(NoHooksLU):
11389 """Utility LU to test some aspects of the job queue.
11394 # Must be lower than default timeout for WaitForJobChange to see whether it
11395 # notices changed jobs
11396 _CLIENT_CONNECT_TIMEOUT = 20.0
11397 _CLIENT_CONFIRM_TIMEOUT = 60.0
11400 def _NotifyUsingSocket(cls, cb, errcls):
11401 """Opens a Unix socket and waits for another program to connect.
11404 @param cb: Callback to send socket name to client
11405 @type errcls: class
11406 @param errcls: Exception class to use for errors
11409 # Using a temporary directory as there's no easy way to create temporary
11410 # sockets without writing a custom loop around tempfile.mktemp and
11412 tmpdir = tempfile.mkdtemp()
11414 tmpsock = utils.PathJoin(tmpdir, "sock")
11416 logging.debug("Creating temporary socket at %s", tmpsock)
11417 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11422 # Send details to client
11425 # Wait for client to connect before continuing
11426 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11428 (conn, _) = sock.accept()
11429 except socket.error, err:
11430 raise errcls("Client didn't connect in time (%s)" % err)
11434 # Remove as soon as client is connected
11435 shutil.rmtree(tmpdir)
11437 # Wait for client to close
11440 # pylint: disable-msg=E1101
11441 # Instance of '_socketobject' has no ... member
11442 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11444 except socket.error, err:
11445 raise errcls("Client failed to confirm notification (%s)" % err)
11449 def _SendNotification(self, test, arg, sockname):
11450 """Sends a notification to the client.
11453 @param test: Test name
11454 @param arg: Test argument (depends on test)
11455 @type sockname: string
11456 @param sockname: Socket path
11459 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11461 def _Notify(self, prereq, test, arg):
11462 """Notifies the client of a test.
11465 @param prereq: Whether this is a prereq-phase test
11467 @param test: Test name
11468 @param arg: Test argument (depends on test)
11472 errcls = errors.OpPrereqError
11474 errcls = errors.OpExecError
11476 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11480 def CheckArguments(self):
11481 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11482 self.expandnames_calls = 0
11484 def ExpandNames(self):
11485 checkargs_calls = getattr(self, "checkargs_calls", 0)
11486 if checkargs_calls < 1:
11487 raise errors.ProgrammerError("CheckArguments was not called")
11489 self.expandnames_calls += 1
11491 if self.op.notify_waitlock:
11492 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11494 self.LogInfo("Expanding names")
11496 # Get lock on master node (just to get a lock, not for a particular reason)
11497 self.needed_locks = {
11498 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11501 def Exec(self, feedback_fn):
11502 if self.expandnames_calls < 1:
11503 raise errors.ProgrammerError("ExpandNames was not called")
11505 if self.op.notify_exec:
11506 self._Notify(False, constants.JQT_EXEC, None)
11508 self.LogInfo("Executing")
11510 if self.op.log_messages:
11511 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11512 for idx, msg in enumerate(self.op.log_messages):
11513 self.LogInfo("Sending log message %s", idx + 1)
11514 feedback_fn(constants.JQT_MSGPREFIX + msg)
11515 # Report how many test messages have been sent
11516 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11519 raise errors.OpExecError("Opcode failure was requested")
11524 class IAllocator(object):
11525 """IAllocator framework.
11527 An IAllocator instance has three sets of attributes:
11528 - cfg that is needed to query the cluster
11529 - input data (all members of the _KEYS class attribute are required)
11530 - four buffer attributes (in|out_data|text), that represent the
11531 input (to the external script) in text and data structure format,
11532 and the output from it, again in two formats
11533 - the result variables from the script (success, info, nodes) for
11537 # pylint: disable-msg=R0902
11538 # lots of instance attributes
11540 "name", "mem_size", "disks", "disk_template",
11541 "os", "tags", "nics", "vcpus", "hypervisor",
11544 "name", "relocate_from",
11550 def __init__(self, cfg, rpc, mode, **kwargs):
11553 # init buffer variables
11554 self.in_text = self.out_text = self.in_data = self.out_data = None
11555 # init all input fields so that pylint is happy
11557 self.mem_size = self.disks = self.disk_template = None
11558 self.os = self.tags = self.nics = self.vcpus = None
11559 self.hypervisor = None
11560 self.relocate_from = None
11562 self.evac_nodes = None
11564 self.required_nodes = None
11565 # init result fields
11566 self.success = self.info = self.result = None
11567 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11568 keyset = self._ALLO_KEYS
11569 fn = self._AddNewInstance
11570 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11571 keyset = self._RELO_KEYS
11572 fn = self._AddRelocateInstance
11573 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11574 keyset = self._EVAC_KEYS
11575 fn = self._AddEvacuateNodes
11577 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11578 " IAllocator" % self.mode)
11580 if key not in keyset:
11581 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11582 " IAllocator" % key)
11583 setattr(self, key, kwargs[key])
11586 if key not in kwargs:
11587 raise errors.ProgrammerError("Missing input parameter '%s' to"
11588 " IAllocator" % key)
11589 self._BuildInputData(fn)
11591 def _ComputeClusterData(self):
11592 """Compute the generic allocator input data.
11594 This is the data that is independent of the actual operation.
11598 cluster_info = cfg.GetClusterInfo()
11601 "version": constants.IALLOCATOR_VERSION,
11602 "cluster_name": cfg.GetClusterName(),
11603 "cluster_tags": list(cluster_info.GetTags()),
11604 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11605 # we don't have job IDs
11607 ninfo = cfg.GetAllNodesInfo()
11608 iinfo = cfg.GetAllInstancesInfo().values()
11609 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11612 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11614 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11615 hypervisor_name = self.hypervisor
11616 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11617 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11618 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11619 hypervisor_name = cluster_info.enabled_hypervisors[0]
11621 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11624 self.rpc.call_all_instances_info(node_list,
11625 cluster_info.enabled_hypervisors)
11627 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11629 config_ndata = self._ComputeBasicNodeData(ninfo)
11630 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11631 i_list, config_ndata)
11632 assert len(data["nodes"]) == len(ninfo), \
11633 "Incomplete node data computed"
11635 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11637 self.in_data = data
11640 def _ComputeNodeGroupData(cfg):
11641 """Compute node groups data.
11645 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11647 "name": gdata.name,
11648 "alloc_policy": gdata.alloc_policy,
11653 def _ComputeBasicNodeData(node_cfg):
11654 """Compute global node data.
11657 @returns: a dict of name: (node dict, node config)
11661 for ninfo in node_cfg.values():
11662 # fill in static (config-based) values
11664 "tags": list(ninfo.GetTags()),
11665 "primary_ip": ninfo.primary_ip,
11666 "secondary_ip": ninfo.secondary_ip,
11667 "offline": ninfo.offline,
11668 "drained": ninfo.drained,
11669 "master_candidate": ninfo.master_candidate,
11670 "group": ninfo.group,
11671 "master_capable": ninfo.master_capable,
11672 "vm_capable": ninfo.vm_capable,
11675 node_results[ninfo.name] = pnr
11677 return node_results
11680 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11682 """Compute global node data.
11684 @param node_results: the basic node structures as filled from the config
11687 # make a copy of the current dict
11688 node_results = dict(node_results)
11689 for nname, nresult in node_data.items():
11690 assert nname in node_results, "Missing basic data for node %s" % nname
11691 ninfo = node_cfg[nname]
11693 if not (ninfo.offline or ninfo.drained):
11694 nresult.Raise("Can't get data for node %s" % nname)
11695 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11697 remote_info = nresult.payload
11699 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11700 'vg_size', 'vg_free', 'cpu_total']:
11701 if attr not in remote_info:
11702 raise errors.OpExecError("Node '%s' didn't return attribute"
11703 " '%s'" % (nname, attr))
11704 if not isinstance(remote_info[attr], int):
11705 raise errors.OpExecError("Node '%s' returned invalid value"
11707 (nname, attr, remote_info[attr]))
11708 # compute memory used by primary instances
11709 i_p_mem = i_p_up_mem = 0
11710 for iinfo, beinfo in i_list:
11711 if iinfo.primary_node == nname:
11712 i_p_mem += beinfo[constants.BE_MEMORY]
11713 if iinfo.name not in node_iinfo[nname].payload:
11716 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11717 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11718 remote_info['memory_free'] -= max(0, i_mem_diff)
11721 i_p_up_mem += beinfo[constants.BE_MEMORY]
11723 # compute memory used by instances
11725 "total_memory": remote_info['memory_total'],
11726 "reserved_memory": remote_info['memory_dom0'],
11727 "free_memory": remote_info['memory_free'],
11728 "total_disk": remote_info['vg_size'],
11729 "free_disk": remote_info['vg_free'],
11730 "total_cpus": remote_info['cpu_total'],
11731 "i_pri_memory": i_p_mem,
11732 "i_pri_up_memory": i_p_up_mem,
11734 pnr_dyn.update(node_results[nname])
11735 node_results[nname] = pnr_dyn
11737 return node_results
11740 def _ComputeInstanceData(cluster_info, i_list):
11741 """Compute global instance data.
11745 for iinfo, beinfo in i_list:
11747 for nic in iinfo.nics:
11748 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11749 nic_dict = {"mac": nic.mac,
11751 "mode": filled_params[constants.NIC_MODE],
11752 "link": filled_params[constants.NIC_LINK],
11754 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11755 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11756 nic_data.append(nic_dict)
11758 "tags": list(iinfo.GetTags()),
11759 "admin_up": iinfo.admin_up,
11760 "vcpus": beinfo[constants.BE_VCPUS],
11761 "memory": beinfo[constants.BE_MEMORY],
11763 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11765 "disks": [{constants.IDISK_SIZE: dsk.size,
11766 constants.IDISK_MODE: dsk.mode}
11767 for dsk in iinfo.disks],
11768 "disk_template": iinfo.disk_template,
11769 "hypervisor": iinfo.hypervisor,
11771 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11773 instance_data[iinfo.name] = pir
11775 return instance_data
11777 def _AddNewInstance(self):
11778 """Add new instance data to allocator structure.
11780 This in combination with _AllocatorGetClusterData will create the
11781 correct structure needed as input for the allocator.
11783 The checks for the completeness of the opcode must have already been
11787 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11789 if self.disk_template in constants.DTS_INT_MIRROR:
11790 self.required_nodes = 2
11792 self.required_nodes = 1
11795 "disk_template": self.disk_template,
11798 "vcpus": self.vcpus,
11799 "memory": self.mem_size,
11800 "disks": self.disks,
11801 "disk_space_total": disk_space,
11803 "required_nodes": self.required_nodes,
11807 def _AddRelocateInstance(self):
11808 """Add relocate instance data to allocator structure.
11810 This in combination with _IAllocatorGetClusterData will create the
11811 correct structure needed as input for the allocator.
11813 The checks for the completeness of the opcode must have already been
11817 instance = self.cfg.GetInstanceInfo(self.name)
11818 if instance is None:
11819 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11820 " IAllocator" % self.name)
11822 if instance.disk_template not in constants.DTS_MIRRORED:
11823 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11824 errors.ECODE_INVAL)
11826 if instance.disk_template in constants.DTS_INT_MIRROR and \
11827 len(instance.secondary_nodes) != 1:
11828 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11829 errors.ECODE_STATE)
11831 self.required_nodes = 1
11832 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11833 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11837 "disk_space_total": disk_space,
11838 "required_nodes": self.required_nodes,
11839 "relocate_from": self.relocate_from,
11843 def _AddEvacuateNodes(self):
11844 """Add evacuate nodes data to allocator structure.
11848 "evac_nodes": self.evac_nodes
11852 def _BuildInputData(self, fn):
11853 """Build input data structures.
11856 self._ComputeClusterData()
11859 request["type"] = self.mode
11860 self.in_data["request"] = request
11862 self.in_text = serializer.Dump(self.in_data)
11864 def Run(self, name, validate=True, call_fn=None):
11865 """Run an instance allocator and return the results.
11868 if call_fn is None:
11869 call_fn = self.rpc.call_iallocator_runner
11871 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11872 result.Raise("Failure while running the iallocator script")
11874 self.out_text = result.payload
11876 self._ValidateResult()
11878 def _ValidateResult(self):
11879 """Process the allocator results.
11881 This will process and if successful save the result in
11882 self.out_data and the other parameters.
11886 rdict = serializer.Load(self.out_text)
11887 except Exception, err:
11888 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11890 if not isinstance(rdict, dict):
11891 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11893 # TODO: remove backwards compatiblity in later versions
11894 if "nodes" in rdict and "result" not in rdict:
11895 rdict["result"] = rdict["nodes"]
11898 for key in "success", "info", "result":
11899 if key not in rdict:
11900 raise errors.OpExecError("Can't parse iallocator results:"
11901 " missing key '%s'" % key)
11902 setattr(self, key, rdict[key])
11904 if not isinstance(rdict["result"], list):
11905 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11908 if self.mode == constants.IALLOCATOR_MODE_RELOC:
11909 assert self.relocate_from is not None
11910 assert self.required_nodes == 1
11912 node2group = dict((name, ndata["group"])
11913 for (name, ndata) in self.in_data["nodes"].items())
11915 fn = compat.partial(self._NodesToGroups, node2group,
11916 self.in_data["nodegroups"])
11918 request_groups = fn(self.relocate_from)
11919 result_groups = fn(rdict["result"])
11921 if result_groups != request_groups:
11922 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11923 " differ from original groups (%s)" %
11924 (utils.CommaJoin(result_groups),
11925 utils.CommaJoin(request_groups)))
11927 self.out_data = rdict
11930 def _NodesToGroups(node2group, groups, nodes):
11931 """Returns a list of unique group names for a list of nodes.
11933 @type node2group: dict
11934 @param node2group: Map from node name to group UUID
11936 @param groups: Group information
11938 @param nodes: Node names
11945 group_uuid = node2group[node]
11947 # Ignore unknown node
11951 group = groups[group_uuid]
11953 # Can't find group, let's use UUID
11954 group_name = group_uuid
11956 group_name = group["name"]
11958 result.add(group_name)
11960 return sorted(result)
11963 class LUTestAllocator(NoHooksLU):
11964 """Run allocator tests.
11966 This LU runs the allocator tests
11969 def CheckPrereq(self):
11970 """Check prerequisites.
11972 This checks the opcode parameters depending on the director and mode test.
11975 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11976 for attr in ["mem_size", "disks", "disk_template",
11977 "os", "tags", "nics", "vcpus"]:
11978 if not hasattr(self.op, attr):
11979 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11980 attr, errors.ECODE_INVAL)
11981 iname = self.cfg.ExpandInstanceName(self.op.name)
11982 if iname is not None:
11983 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11984 iname, errors.ECODE_EXISTS)
11985 if not isinstance(self.op.nics, list):
11986 raise errors.OpPrereqError("Invalid parameter 'nics'",
11987 errors.ECODE_INVAL)
11988 if not isinstance(self.op.disks, list):
11989 raise errors.OpPrereqError("Invalid parameter 'disks'",
11990 errors.ECODE_INVAL)
11991 for row in self.op.disks:
11992 if (not isinstance(row, dict) or
11993 "size" not in row or
11994 not isinstance(row["size"], int) or
11995 "mode" not in row or
11996 row["mode"] not in ['r', 'w']):
11997 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11998 " parameter", errors.ECODE_INVAL)
11999 if self.op.hypervisor is None:
12000 self.op.hypervisor = self.cfg.GetHypervisorType()
12001 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12002 fname = _ExpandInstanceName(self.cfg, self.op.name)
12003 self.op.name = fname
12004 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12005 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12006 if not hasattr(self.op, "evac_nodes"):
12007 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12008 " opcode input", errors.ECODE_INVAL)
12010 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12011 self.op.mode, errors.ECODE_INVAL)
12013 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12014 if self.op.allocator is None:
12015 raise errors.OpPrereqError("Missing allocator name",
12016 errors.ECODE_INVAL)
12017 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12018 raise errors.OpPrereqError("Wrong allocator test '%s'" %
12019 self.op.direction, errors.ECODE_INVAL)
12021 def Exec(self, feedback_fn):
12022 """Run the allocator test.
12025 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12026 ial = IAllocator(self.cfg, self.rpc,
12029 mem_size=self.op.mem_size,
12030 disks=self.op.disks,
12031 disk_template=self.op.disk_template,
12035 vcpus=self.op.vcpus,
12036 hypervisor=self.op.hypervisor,
12038 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12039 ial = IAllocator(self.cfg, self.rpc,
12042 relocate_from=list(self.relocate_from),
12044 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12045 ial = IAllocator(self.cfg, self.rpc,
12047 evac_nodes=self.op.evac_nodes)
12049 raise errors.ProgrammerError("Uncatched mode %s in"
12050 " LUTestAllocator.Exec", self.op.mode)
12052 if self.op.direction == constants.IALLOCATOR_DIR_IN:
12053 result = ial.in_text
12055 ial.Run(self.op.allocator, validate=False)
12056 result = ial.out_text
12060 #: Query type implementations
12062 constants.QR_INSTANCE: _InstanceQuery,
12063 constants.QR_NODE: _NodeQuery,
12064 constants.QR_GROUP: _GroupQuery,
12065 constants.QR_OS: _OsQuery,
12068 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12071 def _GetQueryImplementation(name):
12072 """Returns the implemtnation for a query type.
12074 @param name: Query type, must be one of L{constants.QR_VIA_OP}
12078 return _QUERY_IMPL[name]
12080 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12081 errors.ECODE_INVAL)