4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
63 # Common opcode attributes
65 #: output fields for a query operation
66 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
69 #: the shutdown timeout
70 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
73 #: the force parameter
74 _PForce = ("force", False, ht.TBool)
76 #: a required instance name (for single-instance LUs)
77 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
79 #: Whether to ignore offline nodes
80 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
82 #: a required node name (for single-node LUs)
83 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
85 #: a required node group name (for single-group LUs)
86 _PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString)
88 #: the migration type (live/non-live)
89 _PMigrationMode = ("mode", None,
90 ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
92 #: the obsolete 'live' mode (boolean)
93 _PMigrationLive = ("live", None, ht.TMaybeBool)
97 class LogicalUnit(object):
98 """Logical Unit base class.
100 Subclasses must follow these rules:
101 - implement ExpandNames
102 - implement CheckPrereq (except when tasklets are used)
103 - implement Exec (except when tasklets are used)
104 - implement BuildHooksEnv
105 - redefine HPATH and HTYPE
106 - optionally redefine their run requirements:
107 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
109 Note that all commands require root permissions.
111 @ivar dry_run_result: the value (if any) that will be returned to the caller
112 in dry-run mode (signalled by opcode dry_run parameter)
113 @cvar _OP_PARAMS: a list of opcode attributes, the default values
114 they should get if not already defined, and types they must match
122 def __init__(self, processor, op, context, rpc):
123 """Constructor for LogicalUnit.
125 This needs to be overridden in derived classes in order to check op
129 self.proc = processor
131 self.cfg = context.cfg
132 self.context = context
134 # Dicts used to declare locking needs to mcpu
135 self.needed_locks = None
136 self.acquired_locks = {}
137 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139 self.remove_locks = {}
140 # Used to force good behavior when calling helper functions
141 self.recalculate_locks = {}
144 self.Log = processor.Log # pylint: disable-msg=C0103
145 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148 # support for dry-run
149 self.dry_run_result = None
150 # support for generic debug attribute
151 if (not hasattr(self.op, "debug_level") or
152 not isinstance(self.op.debug_level, int)):
153 self.op.debug_level = 0
158 # The new kind-of-type-system
159 op_id = self.op.OP_ID
160 for attr_name, aval, test in self._OP_PARAMS:
161 if not hasattr(op, attr_name):
162 if aval == ht.NoDefault:
163 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
164 (op_id, attr_name), errors.ECODE_INVAL)
170 setattr(self.op, attr_name, dval)
171 attr_val = getattr(op, attr_name)
172 if test == ht.NoType:
175 if not callable(test):
176 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
177 " given type is not a proper type (%s)" %
178 (op_id, attr_name, test))
179 if not test(attr_val):
180 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
181 self.op.OP_ID, attr_name, type(attr_val), attr_val)
182 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
183 (op_id, attr_name), errors.ECODE_INVAL)
185 self.CheckArguments()
188 """Returns the SshRunner object
192 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
195 ssh = property(fget=__GetSSH)
197 def CheckArguments(self):
198 """Check syntactic validity for the opcode arguments.
200 This method is for doing a simple syntactic check and ensure
201 validity of opcode parameters, without any cluster-related
202 checks. While the same can be accomplished in ExpandNames and/or
203 CheckPrereq, doing these separate is better because:
205 - ExpandNames is left as as purely a lock-related function
206 - CheckPrereq is run after we have acquired locks (and possible
209 The function is allowed to change the self.op attribute so that
210 later methods can no longer worry about missing parameters.
215 def ExpandNames(self):
216 """Expand names for this LU.
218 This method is called before starting to execute the opcode, and it should
219 update all the parameters of the opcode to their canonical form (e.g. a
220 short node name must be fully expanded after this method has successfully
221 completed). This way locking, hooks, logging, etc. can work correctly.
223 LUs which implement this method must also populate the self.needed_locks
224 member, as a dict with lock levels as keys, and a list of needed lock names
227 - use an empty dict if you don't need any lock
228 - if you don't need any lock at a particular level omit that level
229 - don't put anything for the BGL level
230 - if you want all locks at a level use locking.ALL_SET as a value
232 If you need to share locks (rather than acquire them exclusively) at one
233 level you can modify self.share_locks, setting a true value (usually 1) for
234 that level. By default locks are not shared.
236 This function can also define a list of tasklets, which then will be
237 executed in order instead of the usual LU-level CheckPrereq and Exec
238 functions, if those are not defined by the LU.
242 # Acquire all nodes and one instance
243 self.needed_locks = {
244 locking.LEVEL_NODE: locking.ALL_SET,
245 locking.LEVEL_INSTANCE: ['instance1.example.com'],
247 # Acquire just two nodes
248 self.needed_locks = {
249 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
252 self.needed_locks = {} # No, you can't leave it to the default value None
255 # The implementation of this method is mandatory only if the new LU is
256 # concurrent, so that old LUs don't need to be changed all at the same
259 self.needed_locks = {} # Exclusive LUs don't need locks.
261 raise NotImplementedError
263 def DeclareLocks(self, level):
264 """Declare LU locking needs for a level
266 While most LUs can just declare their locking needs at ExpandNames time,
267 sometimes there's the need to calculate some locks after having acquired
268 the ones before. This function is called just before acquiring locks at a
269 particular level, but after acquiring the ones at lower levels, and permits
270 such calculations. It can be used to modify self.needed_locks, and by
271 default it does nothing.
273 This function is only called if you have something already set in
274 self.needed_locks for the level.
276 @param level: Locking level which is going to be locked
277 @type level: member of ganeti.locking.LEVELS
281 def CheckPrereq(self):
282 """Check prerequisites for this LU.
284 This method should check that the prerequisites for the execution
285 of this LU are fulfilled. It can do internode communication, but
286 it should be idempotent - no cluster or system changes are
289 The method should raise errors.OpPrereqError in case something is
290 not fulfilled. Its return value is ignored.
292 This method should also update all the parameters of the opcode to
293 their canonical form if it hasn't been done by ExpandNames before.
296 if self.tasklets is not None:
297 for (idx, tl) in enumerate(self.tasklets):
298 logging.debug("Checking prerequisites for tasklet %s/%s",
299 idx + 1, len(self.tasklets))
304 def Exec(self, feedback_fn):
307 This method should implement the actual work. It should raise
308 errors.OpExecError for failures that are somewhat dealt with in
312 if self.tasklets is not None:
313 for (idx, tl) in enumerate(self.tasklets):
314 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
317 raise NotImplementedError
319 def BuildHooksEnv(self):
320 """Build hooks environment for this LU.
322 This method should return a three-node tuple consisting of: a dict
323 containing the environment that will be used for running the
324 specific hook for this LU, a list of node names on which the hook
325 should run before the execution, and a list of node names on which
326 the hook should run after the execution.
328 The keys of the dict must not have 'GANETI_' prefixed as this will
329 be handled in the hooks runner. Also note additional keys will be
330 added by the hooks runner. If the LU doesn't define any
331 environment, an empty dict (and not None) should be returned.
333 No nodes should be returned as an empty list (and not None).
335 Note that if the HPATH for a LU class is None, this function will
339 raise NotImplementedError
341 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
342 """Notify the LU about the results of its hooks.
344 This method is called every time a hooks phase is executed, and notifies
345 the Logical Unit about the hooks' result. The LU can then use it to alter
346 its result based on the hooks. By default the method does nothing and the
347 previous result is passed back unchanged but any LU can define it if it
348 wants to use the local cluster hook-scripts somehow.
350 @param phase: one of L{constants.HOOKS_PHASE_POST} or
351 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
352 @param hook_results: the results of the multi-node hooks rpc call
353 @param feedback_fn: function used send feedback back to the caller
354 @param lu_result: the previous Exec result this LU had, or None
356 @return: the new Exec result, based on the previous result
360 # API must be kept, thus we ignore the unused argument and could
361 # be a function warnings
362 # pylint: disable-msg=W0613,R0201
365 def _ExpandAndLockInstance(self):
366 """Helper function to expand and lock an instance.
368 Many LUs that work on an instance take its name in self.op.instance_name
369 and need to expand it and then declare the expanded name for locking. This
370 function does it, and then updates self.op.instance_name to the expanded
371 name. It also initializes needed_locks as a dict, if this hasn't been done
375 if self.needed_locks is None:
376 self.needed_locks = {}
378 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
379 "_ExpandAndLockInstance called with instance-level locks set"
380 self.op.instance_name = _ExpandInstanceName(self.cfg,
381 self.op.instance_name)
382 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
384 def _LockInstancesNodes(self, primary_only=False):
385 """Helper function to declare instances' nodes for locking.
387 This function should be called after locking one or more instances to lock
388 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
389 with all primary or secondary nodes for instances already locked and
390 present in self.needed_locks[locking.LEVEL_INSTANCE].
392 It should be called from DeclareLocks, and for safety only works if
393 self.recalculate_locks[locking.LEVEL_NODE] is set.
395 In the future it may grow parameters to just lock some instance's nodes, or
396 to just lock primaries or secondary nodes, if needed.
398 If should be called in DeclareLocks in a way similar to::
400 if level == locking.LEVEL_NODE:
401 self._LockInstancesNodes()
403 @type primary_only: boolean
404 @param primary_only: only lock primary nodes of locked instances
407 assert locking.LEVEL_NODE in self.recalculate_locks, \
408 "_LockInstancesNodes helper function called with no nodes to recalculate"
410 # TODO: check if we're really been called with the instance locks held
412 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
413 # future we might want to have different behaviors depending on the value
414 # of self.recalculate_locks[locking.LEVEL_NODE]
416 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
417 instance = self.context.cfg.GetInstanceInfo(instance_name)
418 wanted_nodes.append(instance.primary_node)
420 wanted_nodes.extend(instance.secondary_nodes)
422 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
423 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
424 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
425 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
427 del self.recalculate_locks[locking.LEVEL_NODE]
430 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
431 """Simple LU which runs no hooks.
433 This LU is intended as a parent for other LogicalUnits which will
434 run no hooks, in order to reduce duplicate code.
440 def BuildHooksEnv(self):
441 """Empty BuildHooksEnv for NoHooksLu.
443 This just raises an error.
446 assert False, "BuildHooksEnv called for NoHooksLUs"
450 """Tasklet base class.
452 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
453 they can mix legacy code with tasklets. Locking needs to be done in the LU,
454 tasklets know nothing about locks.
456 Subclasses must follow these rules:
457 - Implement CheckPrereq
461 def __init__(self, lu):
468 def CheckPrereq(self):
469 """Check prerequisites for this tasklets.
471 This method should check whether the prerequisites for the execution of
472 this tasklet are fulfilled. It can do internode communication, but it
473 should be idempotent - no cluster or system changes are allowed.
475 The method should raise errors.OpPrereqError in case something is not
476 fulfilled. Its return value is ignored.
478 This method should also update all parameters to their canonical form if it
479 hasn't been done before.
484 def Exec(self, feedback_fn):
485 """Execute the tasklet.
487 This method should implement the actual work. It should raise
488 errors.OpExecError for failures that are somewhat dealt with in code, or
492 raise NotImplementedError
496 """Base for query utility classes.
499 #: Attribute holding field definitions
502 def __init__(self, names, fields, use_locking):
503 """Initializes this class.
507 self.use_locking = use_locking
509 self.query = query.Query(self.FIELDS, fields)
510 self.requested_data = self.query.RequestedData()
512 self.do_locking = None
515 def _GetNames(self, lu, all_names, lock_level):
516 """Helper function to determine names asked for in the query.
520 names = lu.acquired_locks[lock_level]
524 if self.wanted == locking.ALL_SET:
525 assert not self.names
526 # caller didn't specify names, so ordering is not important
527 return utils.NiceSort(names)
529 # caller specified names and we must keep the same order
531 assert not self.do_locking or lu.acquired_locks[lock_level]
533 missing = set(self.wanted).difference(names)
535 raise errors.OpExecError("Some items were removed before retrieving"
536 " their data: %s" % missing)
538 # Return expanded names
542 def FieldsQuery(cls, fields):
543 """Returns list of available fields.
545 @return: List of L{objects.QueryFieldDefinition}
549 # Client requests all fields, sort by name
550 fdefs = sorted(query.GetAllFields(cls.FIELDS.values()),
551 key=operator.attrgetter("name"))
553 # Keep order as requested by client
554 fdefs = query.Query(cls.FIELDS, fields).GetFields()
556 return objects.QueryFieldsResponse(fields=fdefs).ToDict()
558 def ExpandNames(self, lu):
559 """Expand names for this query.
561 See L{LogicalUnit.ExpandNames}.
564 raise NotImplementedError()
566 def DeclareLocks(self, lu, level):
567 """Declare locks for this query.
569 See L{LogicalUnit.DeclareLocks}.
572 raise NotImplementedError()
574 def _GetQueryData(self, lu):
575 """Collects all data for this query.
577 @return: Query data object
580 raise NotImplementedError()
582 def NewStyleQuery(self, lu):
583 """Collect data and execute query.
586 data = self._GetQueryData(lu)
588 return objects.QueryResponse(data=self.query.Query(data),
589 fields=self.query.GetFields()).ToDict()
591 def OldStyleQuery(self, lu):
592 """Collect data and execute query.
595 return self.query.OldStyleQuery(self._GetQueryData(lu))
598 def _GetWantedNodes(lu, nodes):
599 """Returns list of checked and expanded node names.
601 @type lu: L{LogicalUnit}
602 @param lu: the logical unit on whose behalf we execute
604 @param nodes: list of node names or None for all nodes
606 @return: the list of nodes, sorted
607 @raise errors.ProgrammerError: if the nodes parameter is wrong type
611 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
613 return utils.NiceSort(lu.cfg.GetNodeList())
616 def _GetWantedInstances(lu, instances):
617 """Returns list of checked and expanded instance names.
619 @type lu: L{LogicalUnit}
620 @param lu: the logical unit on whose behalf we execute
621 @type instances: list
622 @param instances: list of instance names or None for all instances
624 @return: the list of instances, sorted
625 @raise errors.OpPrereqError: if the instances parameter is wrong type
626 @raise errors.OpPrereqError: if any of the passed instances is not found
630 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
632 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
636 def _GetUpdatedParams(old_params, update_dict,
637 use_default=True, use_none=False):
638 """Return the new version of a parameter dictionary.
640 @type old_params: dict
641 @param old_params: old parameters
642 @type update_dict: dict
643 @param update_dict: dict containing new parameter values, or
644 constants.VALUE_DEFAULT to reset the parameter to its default
646 @param use_default: boolean
647 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
648 values as 'to be deleted' values
649 @param use_none: boolean
650 @type use_none: whether to recognise C{None} values as 'to be
653 @return: the new parameter dictionary
656 params_copy = copy.deepcopy(old_params)
657 for key, val in update_dict.iteritems():
658 if ((use_default and val == constants.VALUE_DEFAULT) or
659 (use_none and val is None)):
665 params_copy[key] = val
669 def _CheckOutputFields(static, dynamic, selected):
670 """Checks whether all selected fields are valid.
672 @type static: L{utils.FieldSet}
673 @param static: static fields set
674 @type dynamic: L{utils.FieldSet}
675 @param dynamic: dynamic fields set
682 delta = f.NonMatching(selected)
684 raise errors.OpPrereqError("Unknown output fields selected: %s"
685 % ",".join(delta), errors.ECODE_INVAL)
688 def _CheckGlobalHvParams(params):
689 """Validates that given hypervisor params are not global ones.
691 This will ensure that instances don't get customised versions of
695 used_globals = constants.HVC_GLOBALS.intersection(params)
697 msg = ("The following hypervisor parameters are global and cannot"
698 " be customized at instance level, please modify them at"
699 " cluster level: %s" % utils.CommaJoin(used_globals))
700 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
703 def _CheckNodeOnline(lu, node, msg=None):
704 """Ensure that a given node is online.
706 @param lu: the LU on behalf of which we make the check
707 @param node: the node to check
708 @param msg: if passed, should be a message to replace the default one
709 @raise errors.OpPrereqError: if the node is offline
713 msg = "Can't use offline node"
714 if lu.cfg.GetNodeInfo(node).offline:
715 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
718 def _CheckNodeNotDrained(lu, node):
719 """Ensure that a given node is not drained.
721 @param lu: the LU on behalf of which we make the check
722 @param node: the node to check
723 @raise errors.OpPrereqError: if the node is drained
726 if lu.cfg.GetNodeInfo(node).drained:
727 raise errors.OpPrereqError("Can't use drained node %s" % node,
731 def _CheckNodeVmCapable(lu, node):
732 """Ensure that a given node is vm capable.
734 @param lu: the LU on behalf of which we make the check
735 @param node: the node to check
736 @raise errors.OpPrereqError: if the node is not vm capable
739 if not lu.cfg.GetNodeInfo(node).vm_capable:
740 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
744 def _CheckNodeHasOS(lu, node, os_name, force_variant):
745 """Ensure that a node supports a given OS.
747 @param lu: the LU on behalf of which we make the check
748 @param node: the node to check
749 @param os_name: the OS to query about
750 @param force_variant: whether to ignore variant errors
751 @raise errors.OpPrereqError: if the node is not supporting the OS
754 result = lu.rpc.call_os_get(node, os_name)
755 result.Raise("OS '%s' not in supported OS list for node %s" %
757 prereq=True, ecode=errors.ECODE_INVAL)
758 if not force_variant:
759 _CheckOSVariant(result.payload, os_name)
762 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
763 """Ensure that a node has the given secondary ip.
765 @type lu: L{LogicalUnit}
766 @param lu: the LU on behalf of which we make the check
768 @param node: the node to check
769 @type secondary_ip: string
770 @param secondary_ip: the ip to check
771 @type prereq: boolean
772 @param prereq: whether to throw a prerequisite or an execute error
773 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
774 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
777 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
778 result.Raise("Failure checking secondary ip on node %s" % node,
779 prereq=prereq, ecode=errors.ECODE_ENVIRON)
780 if not result.payload:
781 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
782 " please fix and re-run this command" % secondary_ip)
784 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
786 raise errors.OpExecError(msg)
789 def _RequireFileStorage():
790 """Checks that file storage is enabled.
792 @raise errors.OpPrereqError: when file storage is disabled
795 if not constants.ENABLE_FILE_STORAGE:
796 raise errors.OpPrereqError("File storage disabled at configure time",
800 def _CheckDiskTemplate(template):
801 """Ensure a given disk template is valid.
804 if template not in constants.DISK_TEMPLATES:
805 msg = ("Invalid disk template name '%s', valid templates are: %s" %
806 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
807 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
808 if template == constants.DT_FILE:
809 _RequireFileStorage()
813 def _CheckStorageType(storage_type):
814 """Ensure a given storage type is valid.
817 if storage_type not in constants.VALID_STORAGE_TYPES:
818 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
820 if storage_type == constants.ST_FILE:
821 _RequireFileStorage()
825 def _GetClusterDomainSecret():
826 """Reads the cluster domain secret.
829 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
833 def _CheckInstanceDown(lu, instance, reason):
834 """Ensure that an instance is not running."""
835 if instance.admin_up:
836 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
837 (instance.name, reason), errors.ECODE_STATE)
839 pnode = instance.primary_node
840 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
841 ins_l.Raise("Can't contact node %s for instance information" % pnode,
842 prereq=True, ecode=errors.ECODE_ENVIRON)
844 if instance.name in ins_l.payload:
845 raise errors.OpPrereqError("Instance %s is running, %s" %
846 (instance.name, reason), errors.ECODE_STATE)
849 def _ExpandItemName(fn, name, kind):
850 """Expand an item name.
852 @param fn: the function to use for expansion
853 @param name: requested item name
854 @param kind: text description ('Node' or 'Instance')
855 @return: the resolved (full) name
856 @raise errors.OpPrereqError: if the item is not found
860 if full_name is None:
861 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
866 def _ExpandNodeName(cfg, name):
867 """Wrapper over L{_ExpandItemName} for nodes."""
868 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
871 def _ExpandInstanceName(cfg, name):
872 """Wrapper over L{_ExpandItemName} for instance."""
873 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
876 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
877 memory, vcpus, nics, disk_template, disks,
878 bep, hvp, hypervisor_name):
879 """Builds instance related env variables for hooks
881 This builds the hook environment from individual variables.
884 @param name: the name of the instance
885 @type primary_node: string
886 @param primary_node: the name of the instance's primary node
887 @type secondary_nodes: list
888 @param secondary_nodes: list of secondary nodes as strings
889 @type os_type: string
890 @param os_type: the name of the instance's OS
891 @type status: boolean
892 @param status: the should_run status of the instance
894 @param memory: the memory size of the instance
896 @param vcpus: the count of VCPUs the instance has
898 @param nics: list of tuples (ip, mac, mode, link) representing
899 the NICs the instance has
900 @type disk_template: string
901 @param disk_template: the disk template of the instance
903 @param disks: the list of (size, mode) pairs
905 @param bep: the backend parameters for the instance
907 @param hvp: the hypervisor parameters for the instance
908 @type hypervisor_name: string
909 @param hypervisor_name: the hypervisor for the instance
911 @return: the hook environment for this instance
920 "INSTANCE_NAME": name,
921 "INSTANCE_PRIMARY": primary_node,
922 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
923 "INSTANCE_OS_TYPE": os_type,
924 "INSTANCE_STATUS": str_status,
925 "INSTANCE_MEMORY": memory,
926 "INSTANCE_VCPUS": vcpus,
927 "INSTANCE_DISK_TEMPLATE": disk_template,
928 "INSTANCE_HYPERVISOR": hypervisor_name,
932 nic_count = len(nics)
933 for idx, (ip, mac, mode, link) in enumerate(nics):
936 env["INSTANCE_NIC%d_IP" % idx] = ip
937 env["INSTANCE_NIC%d_MAC" % idx] = mac
938 env["INSTANCE_NIC%d_MODE" % idx] = mode
939 env["INSTANCE_NIC%d_LINK" % idx] = link
940 if mode == constants.NIC_MODE_BRIDGED:
941 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
945 env["INSTANCE_NIC_COUNT"] = nic_count
948 disk_count = len(disks)
949 for idx, (size, mode) in enumerate(disks):
950 env["INSTANCE_DISK%d_SIZE" % idx] = size
951 env["INSTANCE_DISK%d_MODE" % idx] = mode
955 env["INSTANCE_DISK_COUNT"] = disk_count
957 for source, kind in [(bep, "BE"), (hvp, "HV")]:
958 for key, value in source.items():
959 env["INSTANCE_%s_%s" % (kind, key)] = value
964 def _NICListToTuple(lu, nics):
965 """Build a list of nic information tuples.
967 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
968 value in LUQueryInstanceData.
970 @type lu: L{LogicalUnit}
971 @param lu: the logical unit on whose behalf we execute
972 @type nics: list of L{objects.NIC}
973 @param nics: list of nics to convert to hooks tuples
977 cluster = lu.cfg.GetClusterInfo()
981 filled_params = cluster.SimpleFillNIC(nic.nicparams)
982 mode = filled_params[constants.NIC_MODE]
983 link = filled_params[constants.NIC_LINK]
984 hooks_nics.append((ip, mac, mode, link))
988 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
989 """Builds instance related env variables for hooks from an object.
991 @type lu: L{LogicalUnit}
992 @param lu: the logical unit on whose behalf we execute
993 @type instance: L{objects.Instance}
994 @param instance: the instance for which we should build the
997 @param override: dictionary with key/values that will override
1000 @return: the hook environment dictionary
1003 cluster = lu.cfg.GetClusterInfo()
1004 bep = cluster.FillBE(instance)
1005 hvp = cluster.FillHV(instance)
1007 'name': instance.name,
1008 'primary_node': instance.primary_node,
1009 'secondary_nodes': instance.secondary_nodes,
1010 'os_type': instance.os,
1011 'status': instance.admin_up,
1012 'memory': bep[constants.BE_MEMORY],
1013 'vcpus': bep[constants.BE_VCPUS],
1014 'nics': _NICListToTuple(lu, instance.nics),
1015 'disk_template': instance.disk_template,
1016 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1019 'hypervisor_name': instance.hypervisor,
1022 args.update(override)
1023 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1026 def _AdjustCandidatePool(lu, exceptions):
1027 """Adjust the candidate pool after node operations.
1030 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1032 lu.LogInfo("Promoted nodes to master candidate role: %s",
1033 utils.CommaJoin(node.name for node in mod_list))
1034 for name in mod_list:
1035 lu.context.ReaddNode(name)
1036 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1038 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1042 def _DecideSelfPromotion(lu, exceptions=None):
1043 """Decide whether I should promote myself as a master candidate.
1046 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1047 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1048 # the new node will increase mc_max with one, so:
1049 mc_should = min(mc_should + 1, cp_size)
1050 return mc_now < mc_should
1053 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1054 """Check that the brigdes needed by a list of nics exist.
1057 cluster = lu.cfg.GetClusterInfo()
1058 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1059 brlist = [params[constants.NIC_LINK] for params in paramslist
1060 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1062 result = lu.rpc.call_bridges_exist(target_node, brlist)
1063 result.Raise("Error checking bridges on destination node '%s'" %
1064 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1067 def _CheckInstanceBridgesExist(lu, instance, node=None):
1068 """Check that the brigdes needed by an instance exist.
1072 node = instance.primary_node
1073 _CheckNicsBridgesExist(lu, instance.nics, node)
1076 def _CheckOSVariant(os_obj, name):
1077 """Check whether an OS name conforms to the os variants specification.
1079 @type os_obj: L{objects.OS}
1080 @param os_obj: OS object to check
1082 @param name: OS name passed by the user, to check for validity
1085 if not os_obj.supported_variants:
1087 variant = objects.OS.GetVariant(name)
1089 raise errors.OpPrereqError("OS name must include a variant",
1092 if variant not in os_obj.supported_variants:
1093 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1096 def _GetNodeInstancesInner(cfg, fn):
1097 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1100 def _GetNodeInstances(cfg, node_name):
1101 """Returns a list of all primary and secondary instances on a node.
1105 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1108 def _GetNodePrimaryInstances(cfg, node_name):
1109 """Returns primary instances on a node.
1112 return _GetNodeInstancesInner(cfg,
1113 lambda inst: node_name == inst.primary_node)
1116 def _GetNodeSecondaryInstances(cfg, node_name):
1117 """Returns secondary instances on a node.
1120 return _GetNodeInstancesInner(cfg,
1121 lambda inst: node_name in inst.secondary_nodes)
1124 def _GetStorageTypeArgs(cfg, storage_type):
1125 """Returns the arguments for a storage type.
1128 # Special case for file storage
1129 if storage_type == constants.ST_FILE:
1130 # storage.FileStorage wants a list of storage directories
1131 return [[cfg.GetFileStorageDir()]]
1136 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1139 for dev in instance.disks:
1140 cfg.SetDiskID(dev, node_name)
1142 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1143 result.Raise("Failed to get disk status from node %s" % node_name,
1144 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1146 for idx, bdev_status in enumerate(result.payload):
1147 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1153 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1154 """Check the sanity of iallocator and node arguments and use the
1155 cluster-wide iallocator if appropriate.
1157 Check that at most one of (iallocator, node) is specified. If none is
1158 specified, then the LU's opcode's iallocator slot is filled with the
1159 cluster-wide default iallocator.
1161 @type iallocator_slot: string
1162 @param iallocator_slot: the name of the opcode iallocator slot
1163 @type node_slot: string
1164 @param node_slot: the name of the opcode target node slot
1167 node = getattr(lu.op, node_slot, None)
1168 iallocator = getattr(lu.op, iallocator_slot, None)
1170 if node is not None and iallocator is not None:
1171 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1173 elif node is None and iallocator is None:
1174 default_iallocator = lu.cfg.GetDefaultIAllocator()
1175 if default_iallocator:
1176 setattr(lu.op, iallocator_slot, default_iallocator)
1178 raise errors.OpPrereqError("No iallocator or node given and no"
1179 " cluster-wide default iallocator found."
1180 " Please specify either an iallocator or a"
1181 " node, or set a cluster-wide default"
1185 class LUPostInitCluster(LogicalUnit):
1186 """Logical unit for running hooks after cluster initialization.
1189 HPATH = "cluster-init"
1190 HTYPE = constants.HTYPE_CLUSTER
1192 def BuildHooksEnv(self):
1196 env = {"OP_TARGET": self.cfg.GetClusterName()}
1197 mn = self.cfg.GetMasterNode()
1198 return env, [], [mn]
1200 def Exec(self, feedback_fn):
1207 class LUDestroyCluster(LogicalUnit):
1208 """Logical unit for destroying the cluster.
1211 HPATH = "cluster-destroy"
1212 HTYPE = constants.HTYPE_CLUSTER
1214 def BuildHooksEnv(self):
1218 env = {"OP_TARGET": self.cfg.GetClusterName()}
1221 def CheckPrereq(self):
1222 """Check prerequisites.
1224 This checks whether the cluster is empty.
1226 Any errors are signaled by raising errors.OpPrereqError.
1229 master = self.cfg.GetMasterNode()
1231 nodelist = self.cfg.GetNodeList()
1232 if len(nodelist) != 1 or nodelist[0] != master:
1233 raise errors.OpPrereqError("There are still %d node(s) in"
1234 " this cluster." % (len(nodelist) - 1),
1236 instancelist = self.cfg.GetInstanceList()
1238 raise errors.OpPrereqError("There are still %d instance(s) in"
1239 " this cluster." % len(instancelist),
1242 def Exec(self, feedback_fn):
1243 """Destroys the cluster.
1246 master = self.cfg.GetMasterNode()
1248 # Run post hooks on master node before it's removed
1249 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1251 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1253 # pylint: disable-msg=W0702
1254 self.LogWarning("Errors occurred running hooks on %s" % master)
1256 result = self.rpc.call_node_stop_master(master, False)
1257 result.Raise("Could not disable the master role")
1262 def _VerifyCertificate(filename):
1263 """Verifies a certificate for LUVerifyCluster.
1265 @type filename: string
1266 @param filename: Path to PEM file
1270 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1271 utils.ReadFile(filename))
1272 except Exception, err: # pylint: disable-msg=W0703
1273 return (LUVerifyCluster.ETYPE_ERROR,
1274 "Failed to load X509 certificate %s: %s" % (filename, err))
1277 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1278 constants.SSL_CERT_EXPIRATION_ERROR)
1281 fnamemsg = "While verifying %s: %s" % (filename, msg)
1286 return (None, fnamemsg)
1287 elif errcode == utils.CERT_WARNING:
1288 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1289 elif errcode == utils.CERT_ERROR:
1290 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1292 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1295 class LUVerifyCluster(LogicalUnit):
1296 """Verifies the cluster status.
1299 HPATH = "cluster-verify"
1300 HTYPE = constants.HTYPE_CLUSTER
1302 ("skip_checks", ht.EmptyList,
1303 ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1304 ("verbose", False, ht.TBool),
1305 ("error_codes", False, ht.TBool),
1306 ("debug_simulate_errors", False, ht.TBool),
1310 TCLUSTER = "cluster"
1312 TINSTANCE = "instance"
1314 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1315 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1316 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1317 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1318 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1319 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1320 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1321 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1322 ENODEDRBD = (TNODE, "ENODEDRBD")
1323 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1324 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1325 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1326 ENODEHV = (TNODE, "ENODEHV")
1327 ENODELVM = (TNODE, "ENODELVM")
1328 ENODEN1 = (TNODE, "ENODEN1")
1329 ENODENET = (TNODE, "ENODENET")
1330 ENODEOS = (TNODE, "ENODEOS")
1331 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1332 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1333 ENODERPC = (TNODE, "ENODERPC")
1334 ENODESSH = (TNODE, "ENODESSH")
1335 ENODEVERSION = (TNODE, "ENODEVERSION")
1336 ENODESETUP = (TNODE, "ENODESETUP")
1337 ENODETIME = (TNODE, "ENODETIME")
1339 ETYPE_FIELD = "code"
1340 ETYPE_ERROR = "ERROR"
1341 ETYPE_WARNING = "WARNING"
1343 _HOOKS_INDENT_RE = re.compile("^", re.M)
1345 class NodeImage(object):
1346 """A class representing the logical and physical status of a node.
1349 @ivar name: the node name to which this object refers
1350 @ivar volumes: a structure as returned from
1351 L{ganeti.backend.GetVolumeList} (runtime)
1352 @ivar instances: a list of running instances (runtime)
1353 @ivar pinst: list of configured primary instances (config)
1354 @ivar sinst: list of configured secondary instances (config)
1355 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1356 of this node (config)
1357 @ivar mfree: free memory, as reported by hypervisor (runtime)
1358 @ivar dfree: free disk, as reported by the node (runtime)
1359 @ivar offline: the offline status (config)
1360 @type rpc_fail: boolean
1361 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1362 not whether the individual keys were correct) (runtime)
1363 @type lvm_fail: boolean
1364 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1365 @type hyp_fail: boolean
1366 @ivar hyp_fail: whether the RPC call didn't return the instance list
1367 @type ghost: boolean
1368 @ivar ghost: whether this is a known node or not (config)
1369 @type os_fail: boolean
1370 @ivar os_fail: whether the RPC call didn't return valid OS data
1372 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1373 @type vm_capable: boolean
1374 @ivar vm_capable: whether the node can host instances
1377 def __init__(self, offline=False, name=None, vm_capable=True):
1386 self.offline = offline
1387 self.vm_capable = vm_capable
1388 self.rpc_fail = False
1389 self.lvm_fail = False
1390 self.hyp_fail = False
1392 self.os_fail = False
1395 def ExpandNames(self):
1396 self.needed_locks = {
1397 locking.LEVEL_NODE: locking.ALL_SET,
1398 locking.LEVEL_INSTANCE: locking.ALL_SET,
1400 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1402 def _Error(self, ecode, item, msg, *args, **kwargs):
1403 """Format an error message.
1405 Based on the opcode's error_codes parameter, either format a
1406 parseable error code, or a simpler error string.
1408 This must be called only from Exec and functions called from Exec.
1411 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1413 # first complete the msg
1416 # then format the whole message
1417 if self.op.error_codes:
1418 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1424 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1425 # and finally report it via the feedback_fn
1426 self._feedback_fn(" - %s" % msg)
1428 def _ErrorIf(self, cond, *args, **kwargs):
1429 """Log an error message if the passed condition is True.
1432 cond = bool(cond) or self.op.debug_simulate_errors
1434 self._Error(*args, **kwargs)
1435 # do not mark the operation as failed for WARN cases only
1436 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1437 self.bad = self.bad or cond
1439 def _VerifyNode(self, ninfo, nresult):
1440 """Perform some basic validation on data returned from a node.
1442 - check the result data structure is well formed and has all the
1444 - check ganeti version
1446 @type ninfo: L{objects.Node}
1447 @param ninfo: the node to check
1448 @param nresult: the results from the node
1450 @return: whether overall this call was successful (and we can expect
1451 reasonable values in the respose)
1455 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1457 # main result, nresult should be a non-empty dict
1458 test = not nresult or not isinstance(nresult, dict)
1459 _ErrorIf(test, self.ENODERPC, node,
1460 "unable to verify node: no data returned")
1464 # compares ganeti version
1465 local_version = constants.PROTOCOL_VERSION
1466 remote_version = nresult.get("version", None)
1467 test = not (remote_version and
1468 isinstance(remote_version, (list, tuple)) and
1469 len(remote_version) == 2)
1470 _ErrorIf(test, self.ENODERPC, node,
1471 "connection to node returned invalid data")
1475 test = local_version != remote_version[0]
1476 _ErrorIf(test, self.ENODEVERSION, node,
1477 "incompatible protocol versions: master %s,"
1478 " node %s", local_version, remote_version[0])
1482 # node seems compatible, we can actually try to look into its results
1484 # full package version
1485 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1486 self.ENODEVERSION, node,
1487 "software version mismatch: master %s, node %s",
1488 constants.RELEASE_VERSION, remote_version[1],
1489 code=self.ETYPE_WARNING)
1491 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1492 if ninfo.vm_capable and isinstance(hyp_result, dict):
1493 for hv_name, hv_result in hyp_result.iteritems():
1494 test = hv_result is not None
1495 _ErrorIf(test, self.ENODEHV, node,
1496 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1498 test = nresult.get(constants.NV_NODESETUP,
1499 ["Missing NODESETUP results"])
1500 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1505 def _VerifyNodeTime(self, ninfo, nresult,
1506 nvinfo_starttime, nvinfo_endtime):
1507 """Check the node time.
1509 @type ninfo: L{objects.Node}
1510 @param ninfo: the node to check
1511 @param nresult: the remote results for the node
1512 @param nvinfo_starttime: the start time of the RPC call
1513 @param nvinfo_endtime: the end time of the RPC call
1517 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1519 ntime = nresult.get(constants.NV_TIME, None)
1521 ntime_merged = utils.MergeTime(ntime)
1522 except (ValueError, TypeError):
1523 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1526 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1527 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1528 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1529 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1533 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1534 "Node time diverges by at least %s from master node time",
1537 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1538 """Check the node time.
1540 @type ninfo: L{objects.Node}
1541 @param ninfo: the node to check
1542 @param nresult: the remote results for the node
1543 @param vg_name: the configured VG name
1550 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1552 # checks vg existence and size > 20G
1553 vglist = nresult.get(constants.NV_VGLIST, None)
1555 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1557 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1558 constants.MIN_VG_SIZE)
1559 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1562 pvlist = nresult.get(constants.NV_PVLIST, None)
1563 test = pvlist is None
1564 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1566 # check that ':' is not present in PV names, since it's a
1567 # special character for lvcreate (denotes the range of PEs to
1569 for _, pvname, owner_vg in pvlist:
1570 test = ":" in pvname
1571 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1572 " '%s' of VG '%s'", pvname, owner_vg)
1574 def _VerifyNodeNetwork(self, ninfo, nresult):
1575 """Check the node time.
1577 @type ninfo: L{objects.Node}
1578 @param ninfo: the node to check
1579 @param nresult: the remote results for the node
1583 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1585 test = constants.NV_NODELIST not in nresult
1586 _ErrorIf(test, self.ENODESSH, node,
1587 "node hasn't returned node ssh connectivity data")
1589 if nresult[constants.NV_NODELIST]:
1590 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1591 _ErrorIf(True, self.ENODESSH, node,
1592 "ssh communication with node '%s': %s", a_node, a_msg)
1594 test = constants.NV_NODENETTEST not in nresult
1595 _ErrorIf(test, self.ENODENET, node,
1596 "node hasn't returned node tcp connectivity data")
1598 if nresult[constants.NV_NODENETTEST]:
1599 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1601 _ErrorIf(True, self.ENODENET, node,
1602 "tcp communication with node '%s': %s",
1603 anode, nresult[constants.NV_NODENETTEST][anode])
1605 test = constants.NV_MASTERIP not in nresult
1606 _ErrorIf(test, self.ENODENET, node,
1607 "node hasn't returned node master IP reachability data")
1609 if not nresult[constants.NV_MASTERIP]:
1610 if node == self.master_node:
1611 msg = "the master node cannot reach the master IP (not configured?)"
1613 msg = "cannot reach the master IP"
1614 _ErrorIf(True, self.ENODENET, node, msg)
1616 def _VerifyInstance(self, instance, instanceconfig, node_image,
1618 """Verify an instance.
1620 This function checks to see if the required block devices are
1621 available on the instance's node.
1624 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625 node_current = instanceconfig.primary_node
1627 node_vol_should = {}
1628 instanceconfig.MapLVsByNode(node_vol_should)
1630 for node in node_vol_should:
1631 n_img = node_image[node]
1632 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1633 # ignore missing volumes on offline or broken nodes
1635 for volume in node_vol_should[node]:
1636 test = volume not in n_img.volumes
1637 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1638 "volume %s missing on node %s", volume, node)
1640 if instanceconfig.admin_up:
1641 pri_img = node_image[node_current]
1642 test = instance not in pri_img.instances and not pri_img.offline
1643 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1644 "instance not running on its primary node %s",
1647 for node, n_img in node_image.items():
1648 if (not node == node_current):
1649 test = instance in n_img.instances
1650 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1651 "instance should not run on node %s", node)
1653 diskdata = [(nname, success, status, idx)
1654 for (nname, disks) in diskstatus.items()
1655 for idx, (success, status) in enumerate(disks)]
1657 for nname, success, bdev_status, idx in diskdata:
1658 _ErrorIf(instanceconfig.admin_up and not success,
1659 self.EINSTANCEFAULTYDISK, instance,
1660 "couldn't retrieve status for disk/%s on %s: %s",
1661 idx, nname, bdev_status)
1662 _ErrorIf((instanceconfig.admin_up and success and
1663 bdev_status.ldisk_status == constants.LDS_FAULTY),
1664 self.EINSTANCEFAULTYDISK, instance,
1665 "disk/%s on %s is faulty", idx, nname)
1667 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668 """Verify if there are any unknown volumes in the cluster.
1670 The .os, .swap and backup volumes are ignored. All other volumes are
1671 reported as unknown.
1673 @type reserved: L{ganeti.utils.FieldSet}
1674 @param reserved: a FieldSet of reserved volume names
1677 for node, n_img in node_image.items():
1678 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679 # skip non-healthy nodes
1681 for volume in n_img.volumes:
1682 test = ((node not in node_vol_should or
1683 volume not in node_vol_should[node]) and
1684 not reserved.Matches(volume))
1685 self._ErrorIf(test, self.ENODEORPHANLV, node,
1686 "volume %s is unknown", volume)
1688 def _VerifyOrphanInstances(self, instancelist, node_image):
1689 """Verify the list of running instances.
1691 This checks what instances are running but unknown to the cluster.
1694 for node, n_img in node_image.items():
1695 for o_inst in n_img.instances:
1696 test = o_inst not in instancelist
1697 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698 "instance %s on node %s should not exist", o_inst, node)
1700 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701 """Verify N+1 Memory Resilience.
1703 Check that if one single node dies we can still start all the
1704 instances it was primary for.
1707 for node, n_img in node_image.items():
1708 # This code checks that every node which is now listed as
1709 # secondary has enough memory to host all instances it is
1710 # supposed to should a single other node in the cluster fail.
1711 # FIXME: not ready for failover to an arbitrary node
1712 # FIXME: does not support file-backed instances
1713 # WARNING: we currently take into account down instances as well
1714 # as up ones, considering that even if they're down someone
1715 # might want to start them even in the event of a node failure.
1716 for prinode, instances in n_img.sbp.items():
1718 for instance in instances:
1719 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1720 if bep[constants.BE_AUTO_BALANCE]:
1721 needed_mem += bep[constants.BE_MEMORY]
1722 test = n_img.mfree < needed_mem
1723 self._ErrorIf(test, self.ENODEN1, node,
1724 "not enough memory on to accommodate"
1725 " failovers should peer node %s fail", prinode)
1727 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1729 """Verifies and computes the node required file checksums.
1731 @type ninfo: L{objects.Node}
1732 @param ninfo: the node to check
1733 @param nresult: the remote results for the node
1734 @param file_list: required list of files
1735 @param local_cksum: dictionary of local files and their checksums
1736 @param master_files: list of files that only masters should have
1740 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1742 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1743 test = not isinstance(remote_cksum, dict)
1744 _ErrorIf(test, self.ENODEFILECHECK, node,
1745 "node hasn't returned file checksum data")
1749 for file_name in file_list:
1750 node_is_mc = ninfo.master_candidate
1751 must_have = (file_name not in master_files) or node_is_mc
1753 test1 = file_name not in remote_cksum
1755 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1757 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1758 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1759 "file '%s' missing", file_name)
1760 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1761 "file '%s' has wrong checksum", file_name)
1762 # not candidate and this is not a must-have file
1763 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1764 "file '%s' should not exist on non master"
1765 " candidates (and the file is outdated)", file_name)
1766 # all good, except non-master/non-must have combination
1767 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1768 "file '%s' should not exist"
1769 " on non master candidates", file_name)
1771 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1773 """Verifies and the node DRBD status.
1775 @type ninfo: L{objects.Node}
1776 @param ninfo: the node to check
1777 @param nresult: the remote results for the node
1778 @param instanceinfo: the dict of instances
1779 @param drbd_helper: the configured DRBD usermode helper
1780 @param drbd_map: the DRBD map as returned by
1781 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1785 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1789 test = (helper_result == None)
1790 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791 "no drbd usermode helper returned")
1793 status, payload = helper_result
1795 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796 "drbd usermode helper check unsuccessful: %s", payload)
1797 test = status and (payload != drbd_helper)
1798 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1799 "wrong drbd usermode helper: %s", payload)
1801 # compute the DRBD minors
1803 for minor, instance in drbd_map[node].items():
1804 test = instance not in instanceinfo
1805 _ErrorIf(test, self.ECLUSTERCFG, None,
1806 "ghost instance '%s' in temporary DRBD map", instance)
1807 # ghost instance should not be running, but otherwise we
1808 # don't give double warnings (both ghost instance and
1809 # unallocated minor in use)
1811 node_drbd[minor] = (instance, False)
1813 instance = instanceinfo[instance]
1814 node_drbd[minor] = (instance.name, instance.admin_up)
1816 # and now check them
1817 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1818 test = not isinstance(used_minors, (tuple, list))
1819 _ErrorIf(test, self.ENODEDRBD, node,
1820 "cannot parse drbd status file: %s", str(used_minors))
1822 # we cannot check drbd status
1825 for minor, (iname, must_exist) in node_drbd.items():
1826 test = minor not in used_minors and must_exist
1827 _ErrorIf(test, self.ENODEDRBD, node,
1828 "drbd minor %d of instance %s is not active", minor, iname)
1829 for minor in used_minors:
1830 test = minor not in node_drbd
1831 _ErrorIf(test, self.ENODEDRBD, node,
1832 "unallocated drbd minor %d is in use", minor)
1834 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1835 """Builds the node OS structures.
1837 @type ninfo: L{objects.Node}
1838 @param ninfo: the node to check
1839 @param nresult: the remote results for the node
1840 @param nimg: the node image object
1844 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1846 remote_os = nresult.get(constants.NV_OSLIST, None)
1847 test = (not isinstance(remote_os, list) or
1848 not compat.all(isinstance(v, list) and len(v) == 7
1849 for v in remote_os))
1851 _ErrorIf(test, self.ENODEOS, node,
1852 "node hasn't returned valid OS data")
1861 for (name, os_path, status, diagnose,
1862 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1864 if name not in os_dict:
1867 # parameters is a list of lists instead of list of tuples due to
1868 # JSON lacking a real tuple type, fix it:
1869 parameters = [tuple(v) for v in parameters]
1870 os_dict[name].append((os_path, status, diagnose,
1871 set(variants), set(parameters), set(api_ver)))
1873 nimg.oslist = os_dict
1875 def _VerifyNodeOS(self, ninfo, nimg, base):
1876 """Verifies the node OS list.
1878 @type ninfo: L{objects.Node}
1879 @param ninfo: the node to check
1880 @param nimg: the node image object
1881 @param base: the 'template' node we match against (e.g. from the master)
1885 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1887 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1889 for os_name, os_data in nimg.oslist.items():
1890 assert os_data, "Empty OS status for OS %s?!" % os_name
1891 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1892 _ErrorIf(not f_status, self.ENODEOS, node,
1893 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1894 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1895 "OS '%s' has multiple entries (first one shadows the rest): %s",
1896 os_name, utils.CommaJoin([v[0] for v in os_data]))
1897 # this will catched in backend too
1898 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1899 and not f_var, self.ENODEOS, node,
1900 "OS %s with API at least %d does not declare any variant",
1901 os_name, constants.OS_API_V15)
1902 # comparisons with the 'base' image
1903 test = os_name not in base.oslist
1904 _ErrorIf(test, self.ENODEOS, node,
1905 "Extra OS %s not present on reference node (%s)",
1909 assert base.oslist[os_name], "Base node has empty OS status?"
1910 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1912 # base OS is invalid, skipping
1914 for kind, a, b in [("API version", f_api, b_api),
1915 ("variants list", f_var, b_var),
1916 ("parameters", f_param, b_param)]:
1917 _ErrorIf(a != b, self.ENODEOS, node,
1918 "OS %s %s differs from reference node %s: %s vs. %s",
1919 kind, os_name, base.name,
1920 utils.CommaJoin(a), utils.CommaJoin(b))
1922 # check any missing OSes
1923 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1924 _ErrorIf(missing, self.ENODEOS, node,
1925 "OSes present on reference node %s but missing on this node: %s",
1926 base.name, utils.CommaJoin(missing))
1928 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1929 """Verifies and updates the node volume data.
1931 This function will update a L{NodeImage}'s internal structures
1932 with data from the remote call.
1934 @type ninfo: L{objects.Node}
1935 @param ninfo: the node to check
1936 @param nresult: the remote results for the node
1937 @param nimg: the node image object
1938 @param vg_name: the configured VG name
1942 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1944 nimg.lvm_fail = True
1945 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1948 elif isinstance(lvdata, basestring):
1949 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1950 utils.SafeEncode(lvdata))
1951 elif not isinstance(lvdata, dict):
1952 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1954 nimg.volumes = lvdata
1955 nimg.lvm_fail = False
1957 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1958 """Verifies and updates the node instance list.
1960 If the listing was successful, then updates this node's instance
1961 list. Otherwise, it marks the RPC call as failed for the instance
1964 @type ninfo: L{objects.Node}
1965 @param ninfo: the node to check
1966 @param nresult: the remote results for the node
1967 @param nimg: the node image object
1970 idata = nresult.get(constants.NV_INSTANCELIST, None)
1971 test = not isinstance(idata, list)
1972 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1973 " (instancelist): %s", utils.SafeEncode(str(idata)))
1975 nimg.hyp_fail = True
1977 nimg.instances = idata
1979 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1980 """Verifies and computes a node information map
1982 @type ninfo: L{objects.Node}
1983 @param ninfo: the node to check
1984 @param nresult: the remote results for the node
1985 @param nimg: the node image object
1986 @param vg_name: the configured VG name
1990 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1992 # try to read free memory (from the hypervisor)
1993 hv_info = nresult.get(constants.NV_HVINFO, None)
1994 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1995 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1998 nimg.mfree = int(hv_info["memory_free"])
1999 except (ValueError, TypeError):
2000 _ErrorIf(True, self.ENODERPC, node,
2001 "node returned invalid nodeinfo, check hypervisor")
2003 # FIXME: devise a free space model for file based instances as well
2004 if vg_name is not None:
2005 test = (constants.NV_VGLIST not in nresult or
2006 vg_name not in nresult[constants.NV_VGLIST])
2007 _ErrorIf(test, self.ENODELVM, node,
2008 "node didn't return data for the volume group '%s'"
2009 " - it is either missing or broken", vg_name)
2012 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2013 except (ValueError, TypeError):
2014 _ErrorIf(True, self.ENODERPC, node,
2015 "node returned invalid LVM info, check LVM status")
2017 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2018 """Gets per-disk status information for all instances.
2020 @type nodelist: list of strings
2021 @param nodelist: Node names
2022 @type node_image: dict of (name, L{objects.Node})
2023 @param node_image: Node objects
2024 @type instanceinfo: dict of (name, L{objects.Instance})
2025 @param instanceinfo: Instance objects
2028 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2031 node_disks_devonly = {}
2033 for nname in nodelist:
2034 disks = [(inst, disk)
2035 for instlist in [node_image[nname].pinst,
2036 node_image[nname].sinst]
2037 for inst in instlist
2038 for disk in instanceinfo[inst].disks]
2041 # No need to collect data
2044 node_disks[nname] = disks
2046 # Creating copies as SetDiskID below will modify the objects and that can
2047 # lead to incorrect data returned from nodes
2048 devonly = [dev.Copy() for (_, dev) in disks]
2051 self.cfg.SetDiskID(dev, nname)
2053 node_disks_devonly[nname] = devonly
2055 assert len(node_disks) == len(node_disks_devonly)
2057 # Collect data from all nodes with disks
2058 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2061 assert len(result) == len(node_disks)
2065 for (nname, nres) in result.items():
2067 # Ignore offline node
2070 disks = node_disks[nname]
2073 _ErrorIf(msg, self.ENODERPC, nname,
2074 "while getting disk information: %s", nres.fail_msg)
2076 # No data from this node
2077 data = len(disks) * [None]
2081 for ((inst, _), status) in zip(disks, data):
2082 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2084 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2085 len(nnames) <= len(instanceinfo[inst].all_nodes)
2086 for inst, nnames in instdisk.items()
2087 for nname, statuses in nnames.items())
2091 def BuildHooksEnv(self):
2094 Cluster-Verify hooks just ran in the post phase and their failure makes
2095 the output be logged in the verify output and the verification to fail.
2098 all_nodes = self.cfg.GetNodeList()
2100 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2102 for node in self.cfg.GetAllNodesInfo().values():
2103 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2105 return env, [], all_nodes
2107 def Exec(self, feedback_fn):
2108 """Verify integrity of cluster, performing various test on nodes.
2112 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2113 verbose = self.op.verbose
2114 self._feedback_fn = feedback_fn
2115 feedback_fn("* Verifying global settings")
2116 for msg in self.cfg.VerifyConfig():
2117 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2119 # Check the cluster certificates
2120 for cert_filename in constants.ALL_CERT_FILES:
2121 (errcode, msg) = _VerifyCertificate(cert_filename)
2122 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2124 vg_name = self.cfg.GetVGName()
2125 drbd_helper = self.cfg.GetDRBDHelper()
2126 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2127 cluster = self.cfg.GetClusterInfo()
2128 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2129 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2130 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2131 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2132 for iname in instancelist)
2133 i_non_redundant = [] # Non redundant instances
2134 i_non_a_balanced = [] # Non auto-balanced instances
2135 n_offline = 0 # Count of offline nodes
2136 n_drained = 0 # Count of nodes being drained
2137 node_vol_should = {}
2139 # FIXME: verify OS list
2140 # do local checksums
2141 master_files = [constants.CLUSTER_CONF_FILE]
2142 master_node = self.master_node = self.cfg.GetMasterNode()
2143 master_ip = self.cfg.GetMasterIP()
2145 file_names = ssconf.SimpleStore().GetFileList()
2146 file_names.extend(constants.ALL_CERT_FILES)
2147 file_names.extend(master_files)
2148 if cluster.modify_etc_hosts:
2149 file_names.append(constants.ETC_HOSTS)
2151 local_checksums = utils.FingerprintFiles(file_names)
2153 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2154 node_verify_param = {
2155 constants.NV_FILELIST: file_names,
2156 constants.NV_NODELIST: [node.name for node in nodeinfo
2157 if not node.offline],
2158 constants.NV_HYPERVISOR: hypervisors,
2159 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2160 node.secondary_ip) for node in nodeinfo
2161 if not node.offline],
2162 constants.NV_INSTANCELIST: hypervisors,
2163 constants.NV_VERSION: None,
2164 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2165 constants.NV_NODESETUP: None,
2166 constants.NV_TIME: None,
2167 constants.NV_MASTERIP: (master_node, master_ip),
2168 constants.NV_OSLIST: None,
2169 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2172 if vg_name is not None:
2173 node_verify_param[constants.NV_VGLIST] = None
2174 node_verify_param[constants.NV_LVLIST] = vg_name
2175 node_verify_param[constants.NV_PVLIST] = [vg_name]
2176 node_verify_param[constants.NV_DRBDLIST] = None
2179 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2181 # Build our expected cluster state
2182 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2184 vm_capable=node.vm_capable))
2185 for node in nodeinfo)
2187 for instance in instancelist:
2188 inst_config = instanceinfo[instance]
2190 for nname in inst_config.all_nodes:
2191 if nname not in node_image:
2193 gnode = self.NodeImage(name=nname)
2195 node_image[nname] = gnode
2197 inst_config.MapLVsByNode(node_vol_should)
2199 pnode = inst_config.primary_node
2200 node_image[pnode].pinst.append(instance)
2202 for snode in inst_config.secondary_nodes:
2203 nimg = node_image[snode]
2204 nimg.sinst.append(instance)
2205 if pnode not in nimg.sbp:
2206 nimg.sbp[pnode] = []
2207 nimg.sbp[pnode].append(instance)
2209 # At this point, we have the in-memory data structures complete,
2210 # except for the runtime information, which we'll gather next
2212 # Due to the way our RPC system works, exact response times cannot be
2213 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2214 # time before and after executing the request, we can at least have a time
2216 nvinfo_starttime = time.time()
2217 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2218 self.cfg.GetClusterName())
2219 nvinfo_endtime = time.time()
2221 all_drbd_map = self.cfg.ComputeDRBDMap()
2223 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2224 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2226 feedback_fn("* Verifying node status")
2230 for node_i in nodeinfo:
2232 nimg = node_image[node]
2236 feedback_fn("* Skipping offline node %s" % (node,))
2240 if node == master_node:
2242 elif node_i.master_candidate:
2243 ntype = "master candidate"
2244 elif node_i.drained:
2250 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2252 msg = all_nvinfo[node].fail_msg
2253 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2255 nimg.rpc_fail = True
2258 nresult = all_nvinfo[node].payload
2260 nimg.call_ok = self._VerifyNode(node_i, nresult)
2261 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2262 self._VerifyNodeNetwork(node_i, nresult)
2263 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2267 self._VerifyNodeLVM(node_i, nresult, vg_name)
2268 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2271 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2272 self._UpdateNodeInstances(node_i, nresult, nimg)
2273 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2274 self._UpdateNodeOS(node_i, nresult, nimg)
2275 if not nimg.os_fail:
2276 if refos_img is None:
2278 self._VerifyNodeOS(node_i, nimg, refos_img)
2280 feedback_fn("* Verifying instance status")
2281 for instance in instancelist:
2283 feedback_fn("* Verifying instance %s" % instance)
2284 inst_config = instanceinfo[instance]
2285 self._VerifyInstance(instance, inst_config, node_image,
2287 inst_nodes_offline = []
2289 pnode = inst_config.primary_node
2290 pnode_img = node_image[pnode]
2291 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2292 self.ENODERPC, pnode, "instance %s, connection to"
2293 " primary node failed", instance)
2295 if pnode_img.offline:
2296 inst_nodes_offline.append(pnode)
2298 # If the instance is non-redundant we cannot survive losing its primary
2299 # node, so we are not N+1 compliant. On the other hand we have no disk
2300 # templates with more than one secondary so that situation is not well
2302 # FIXME: does not support file-backed instances
2303 if not inst_config.secondary_nodes:
2304 i_non_redundant.append(instance)
2305 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2306 instance, "instance has multiple secondary nodes: %s",
2307 utils.CommaJoin(inst_config.secondary_nodes),
2308 code=self.ETYPE_WARNING)
2310 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2311 i_non_a_balanced.append(instance)
2313 for snode in inst_config.secondary_nodes:
2314 s_img = node_image[snode]
2315 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2316 "instance %s, connection to secondary node failed", instance)
2319 inst_nodes_offline.append(snode)
2321 # warn that the instance lives on offline nodes
2322 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2323 "instance lives on offline node(s) %s",
2324 utils.CommaJoin(inst_nodes_offline))
2325 # ... or ghost/non-vm_capable nodes
2326 for node in inst_config.all_nodes:
2327 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2328 "instance lives on ghost node %s", node)
2329 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2330 instance, "instance lives on non-vm_capable node %s", node)
2332 feedback_fn("* Verifying orphan volumes")
2333 reserved = utils.FieldSet(*cluster.reserved_lvs)
2334 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2336 feedback_fn("* Verifying orphan instances")
2337 self._VerifyOrphanInstances(instancelist, node_image)
2339 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2340 feedback_fn("* Verifying N+1 Memory redundancy")
2341 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2343 feedback_fn("* Other Notes")
2345 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2346 % len(i_non_redundant))
2348 if i_non_a_balanced:
2349 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2350 % len(i_non_a_balanced))
2353 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2356 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2360 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2361 """Analyze the post-hooks' result
2363 This method analyses the hook result, handles it, and sends some
2364 nicely-formatted feedback back to the user.
2366 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2367 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2368 @param hooks_results: the results of the multi-node hooks rpc call
2369 @param feedback_fn: function used send feedback back to the caller
2370 @param lu_result: previous Exec result
2371 @return: the new Exec result, based on the previous result
2375 # We only really run POST phase hooks, and are only interested in
2377 if phase == constants.HOOKS_PHASE_POST:
2378 # Used to change hooks' output to proper indentation
2379 feedback_fn("* Hooks Results")
2380 assert hooks_results, "invalid result from hooks"
2382 for node_name in hooks_results:
2383 res = hooks_results[node_name]
2385 test = msg and not res.offline
2386 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2387 "Communication failure in hooks execution: %s", msg)
2388 if res.offline or msg:
2389 # No need to investigate payload if node is offline or gave an error.
2390 # override manually lu_result here as _ErrorIf only
2391 # overrides self.bad
2394 for script, hkr, output in res.payload:
2395 test = hkr == constants.HKR_FAIL
2396 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2397 "Script %s failed, output:", script)
2399 output = self._HOOKS_INDENT_RE.sub(' ', output)
2400 feedback_fn("%s" % output)
2406 class LUVerifyDisks(NoHooksLU):
2407 """Verifies the cluster disks status.
2412 def ExpandNames(self):
2413 self.needed_locks = {
2414 locking.LEVEL_NODE: locking.ALL_SET,
2415 locking.LEVEL_INSTANCE: locking.ALL_SET,
2417 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2419 def Exec(self, feedback_fn):
2420 """Verify integrity of cluster disks.
2422 @rtype: tuple of three items
2423 @return: a tuple of (dict of node-to-node_error, list of instances
2424 which need activate-disks, dict of instance: (node, volume) for
2428 result = res_nodes, res_instances, res_missing = {}, [], {}
2430 nodes = utils.NiceSort(self.cfg.GetNodeList())
2431 instances = [self.cfg.GetInstanceInfo(name)
2432 for name in self.cfg.GetInstanceList()]
2435 for inst in instances:
2437 if (not inst.admin_up or
2438 inst.disk_template not in constants.DTS_NET_MIRROR):
2440 inst.MapLVsByNode(inst_lvs)
2441 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2442 for node, vol_list in inst_lvs.iteritems():
2443 for vol in vol_list:
2444 nv_dict[(node, vol)] = inst
2449 vg_names = self.rpc.call_vg_list(nodes)
2450 vg_names.Raise("Cannot get list of VGs")
2454 node_res = self.rpc.call_lv_list([node],
2455 vg_names[node].payload.keys())[node]
2456 if node_res.offline:
2458 msg = node_res.fail_msg
2460 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2461 res_nodes[node] = msg
2464 lvs = node_res.payload
2465 for lv_name, (_, _, lv_online) in lvs.items():
2466 inst = nv_dict.pop((node, lv_name), None)
2467 if (not lv_online and inst is not None
2468 and inst.name not in res_instances):
2469 res_instances.append(inst.name)
2471 # any leftover items in nv_dict are missing LVs, let's arrange the
2473 for key, inst in nv_dict.iteritems():
2474 if inst.name not in res_missing:
2475 res_missing[inst.name] = []
2476 res_missing[inst.name].append(key)
2481 class LURepairDiskSizes(NoHooksLU):
2482 """Verifies the cluster disks sizes.
2485 _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2488 def ExpandNames(self):
2489 if self.op.instances:
2490 self.wanted_names = []
2491 for name in self.op.instances:
2492 full_name = _ExpandInstanceName(self.cfg, name)
2493 self.wanted_names.append(full_name)
2494 self.needed_locks = {
2495 locking.LEVEL_NODE: [],
2496 locking.LEVEL_INSTANCE: self.wanted_names,
2498 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2500 self.wanted_names = None
2501 self.needed_locks = {
2502 locking.LEVEL_NODE: locking.ALL_SET,
2503 locking.LEVEL_INSTANCE: locking.ALL_SET,
2505 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2507 def DeclareLocks(self, level):
2508 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2509 self._LockInstancesNodes(primary_only=True)
2511 def CheckPrereq(self):
2512 """Check prerequisites.
2514 This only checks the optional instance list against the existing names.
2517 if self.wanted_names is None:
2518 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2520 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2521 in self.wanted_names]
2523 def _EnsureChildSizes(self, disk):
2524 """Ensure children of the disk have the needed disk size.
2526 This is valid mainly for DRBD8 and fixes an issue where the
2527 children have smaller disk size.
2529 @param disk: an L{ganeti.objects.Disk} object
2532 if disk.dev_type == constants.LD_DRBD8:
2533 assert disk.children, "Empty children for DRBD8?"
2534 fchild = disk.children[0]
2535 mismatch = fchild.size < disk.size
2537 self.LogInfo("Child disk has size %d, parent %d, fixing",
2538 fchild.size, disk.size)
2539 fchild.size = disk.size
2541 # and we recurse on this child only, not on the metadev
2542 return self._EnsureChildSizes(fchild) or mismatch
2546 def Exec(self, feedback_fn):
2547 """Verify the size of cluster disks.
2550 # TODO: check child disks too
2551 # TODO: check differences in size between primary/secondary nodes
2553 for instance in self.wanted_instances:
2554 pnode = instance.primary_node
2555 if pnode not in per_node_disks:
2556 per_node_disks[pnode] = []
2557 for idx, disk in enumerate(instance.disks):
2558 per_node_disks[pnode].append((instance, idx, disk))
2561 for node, dskl in per_node_disks.items():
2562 newl = [v[2].Copy() for v in dskl]
2564 self.cfg.SetDiskID(dsk, node)
2565 result = self.rpc.call_blockdev_getsizes(node, newl)
2567 self.LogWarning("Failure in blockdev_getsizes call to node"
2568 " %s, ignoring", node)
2570 if len(result.data) != len(dskl):
2571 self.LogWarning("Invalid result from node %s, ignoring node results",
2574 for ((instance, idx, disk), size) in zip(dskl, result.data):
2576 self.LogWarning("Disk %d of instance %s did not return size"
2577 " information, ignoring", idx, instance.name)
2579 if not isinstance(size, (int, long)):
2580 self.LogWarning("Disk %d of instance %s did not return valid"
2581 " size information, ignoring", idx, instance.name)
2584 if size != disk.size:
2585 self.LogInfo("Disk %d of instance %s has mismatched size,"
2586 " correcting: recorded %d, actual %d", idx,
2587 instance.name, disk.size, size)
2589 self.cfg.Update(instance, feedback_fn)
2590 changed.append((instance.name, idx, size))
2591 if self._EnsureChildSizes(disk):
2592 self.cfg.Update(instance, feedback_fn)
2593 changed.append((instance.name, idx, disk.size))
2597 class LURenameCluster(LogicalUnit):
2598 """Rename the cluster.
2601 HPATH = "cluster-rename"
2602 HTYPE = constants.HTYPE_CLUSTER
2603 _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2605 def BuildHooksEnv(self):
2610 "OP_TARGET": self.cfg.GetClusterName(),
2611 "NEW_NAME": self.op.name,
2613 mn = self.cfg.GetMasterNode()
2614 all_nodes = self.cfg.GetNodeList()
2615 return env, [mn], all_nodes
2617 def CheckPrereq(self):
2618 """Verify that the passed name is a valid one.
2621 hostname = netutils.GetHostname(name=self.op.name,
2622 family=self.cfg.GetPrimaryIPFamily())
2624 new_name = hostname.name
2625 self.ip = new_ip = hostname.ip
2626 old_name = self.cfg.GetClusterName()
2627 old_ip = self.cfg.GetMasterIP()
2628 if new_name == old_name and new_ip == old_ip:
2629 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2630 " cluster has changed",
2632 if new_ip != old_ip:
2633 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2634 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2635 " reachable on the network" %
2636 new_ip, errors.ECODE_NOTUNIQUE)
2638 self.op.name = new_name
2640 def Exec(self, feedback_fn):
2641 """Rename the cluster.
2644 clustername = self.op.name
2647 # shutdown the master IP
2648 master = self.cfg.GetMasterNode()
2649 result = self.rpc.call_node_stop_master(master, False)
2650 result.Raise("Could not disable the master role")
2653 cluster = self.cfg.GetClusterInfo()
2654 cluster.cluster_name = clustername
2655 cluster.master_ip = ip
2656 self.cfg.Update(cluster, feedback_fn)
2658 # update the known hosts file
2659 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2660 node_list = self.cfg.GetOnlineNodeList()
2662 node_list.remove(master)
2665 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2667 result = self.rpc.call_node_start_master(master, False, False)
2668 msg = result.fail_msg
2670 self.LogWarning("Could not re-enable the master role on"
2671 " the master, please restart manually: %s", msg)
2676 class LUSetClusterParams(LogicalUnit):
2677 """Change the parameters of the cluster.
2680 HPATH = "cluster-modify"
2681 HTYPE = constants.HTYPE_CLUSTER
2683 ("vg_name", None, ht.TMaybeString),
2684 ("enabled_hypervisors", None,
2685 ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2687 ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2689 ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2690 ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2692 ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2694 ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2695 ("uid_pool", None, ht.NoType),
2696 ("add_uids", None, ht.NoType),
2697 ("remove_uids", None, ht.NoType),
2698 ("maintain_node_health", None, ht.TMaybeBool),
2699 ("prealloc_wipe_disks", None, ht.TMaybeBool),
2700 ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2701 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2702 ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2703 ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2704 ("master_netdev", None, ht.TOr(ht.TString, ht.TNone)),
2705 ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2706 ("hidden_os", None, ht.TOr(ht.TListOf(\
2709 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2711 ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2714 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2719 def CheckArguments(self):
2723 if self.op.uid_pool:
2724 uidpool.CheckUidPool(self.op.uid_pool)
2726 if self.op.add_uids:
2727 uidpool.CheckUidPool(self.op.add_uids)
2729 if self.op.remove_uids:
2730 uidpool.CheckUidPool(self.op.remove_uids)
2732 def ExpandNames(self):
2733 # FIXME: in the future maybe other cluster params won't require checking on
2734 # all nodes to be modified.
2735 self.needed_locks = {
2736 locking.LEVEL_NODE: locking.ALL_SET,
2738 self.share_locks[locking.LEVEL_NODE] = 1
2740 def BuildHooksEnv(self):
2745 "OP_TARGET": self.cfg.GetClusterName(),
2746 "NEW_VG_NAME": self.op.vg_name,
2748 mn = self.cfg.GetMasterNode()
2749 return env, [mn], [mn]
2751 def CheckPrereq(self):
2752 """Check prerequisites.
2754 This checks whether the given params don't conflict and
2755 if the given volume group is valid.
2758 if self.op.vg_name is not None and not self.op.vg_name:
2759 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2760 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2761 " instances exist", errors.ECODE_INVAL)
2763 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2764 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2765 raise errors.OpPrereqError("Cannot disable drbd helper while"
2766 " drbd-based instances exist",
2769 node_list = self.acquired_locks[locking.LEVEL_NODE]
2771 # if vg_name not None, checks given volume group on all nodes
2773 vglist = self.rpc.call_vg_list(node_list)
2774 for node in node_list:
2775 msg = vglist[node].fail_msg
2777 # ignoring down node
2778 self.LogWarning("Error while gathering data on node %s"
2779 " (ignoring node): %s", node, msg)
2781 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2783 constants.MIN_VG_SIZE)
2785 raise errors.OpPrereqError("Error on node '%s': %s" %
2786 (node, vgstatus), errors.ECODE_ENVIRON)
2788 if self.op.drbd_helper:
2789 # checks given drbd helper on all nodes
2790 helpers = self.rpc.call_drbd_helper(node_list)
2791 for node in node_list:
2792 ninfo = self.cfg.GetNodeInfo(node)
2794 self.LogInfo("Not checking drbd helper on offline node %s", node)
2796 msg = helpers[node].fail_msg
2798 raise errors.OpPrereqError("Error checking drbd helper on node"
2799 " '%s': %s" % (node, msg),
2800 errors.ECODE_ENVIRON)
2801 node_helper = helpers[node].payload
2802 if node_helper != self.op.drbd_helper:
2803 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2804 (node, node_helper), errors.ECODE_ENVIRON)
2806 self.cluster = cluster = self.cfg.GetClusterInfo()
2807 # validate params changes
2808 if self.op.beparams:
2809 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2810 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2812 if self.op.ndparams:
2813 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2814 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2816 if self.op.nicparams:
2817 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2818 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2819 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2822 # check all instances for consistency
2823 for instance in self.cfg.GetAllInstancesInfo().values():
2824 for nic_idx, nic in enumerate(instance.nics):
2825 params_copy = copy.deepcopy(nic.nicparams)
2826 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2828 # check parameter syntax
2830 objects.NIC.CheckParameterSyntax(params_filled)
2831 except errors.ConfigurationError, err:
2832 nic_errors.append("Instance %s, nic/%d: %s" %
2833 (instance.name, nic_idx, err))
2835 # if we're moving instances to routed, check that they have an ip
2836 target_mode = params_filled[constants.NIC_MODE]
2837 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2838 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2839 (instance.name, nic_idx))
2841 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2842 "\n".join(nic_errors))
2844 # hypervisor list/parameters
2845 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2846 if self.op.hvparams:
2847 for hv_name, hv_dict in self.op.hvparams.items():
2848 if hv_name not in self.new_hvparams:
2849 self.new_hvparams[hv_name] = hv_dict
2851 self.new_hvparams[hv_name].update(hv_dict)
2853 # os hypervisor parameters
2854 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2856 for os_name, hvs in self.op.os_hvp.items():
2857 if os_name not in self.new_os_hvp:
2858 self.new_os_hvp[os_name] = hvs
2860 for hv_name, hv_dict in hvs.items():
2861 if hv_name not in self.new_os_hvp[os_name]:
2862 self.new_os_hvp[os_name][hv_name] = hv_dict
2864 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2867 self.new_osp = objects.FillDict(cluster.osparams, {})
2868 if self.op.osparams:
2869 for os_name, osp in self.op.osparams.items():
2870 if os_name not in self.new_osp:
2871 self.new_osp[os_name] = {}
2873 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2876 if not self.new_osp[os_name]:
2877 # we removed all parameters
2878 del self.new_osp[os_name]
2880 # check the parameter validity (remote check)
2881 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2882 os_name, self.new_osp[os_name])
2884 # changes to the hypervisor list
2885 if self.op.enabled_hypervisors is not None:
2886 self.hv_list = self.op.enabled_hypervisors
2887 for hv in self.hv_list:
2888 # if the hypervisor doesn't already exist in the cluster
2889 # hvparams, we initialize it to empty, and then (in both
2890 # cases) we make sure to fill the defaults, as we might not
2891 # have a complete defaults list if the hypervisor wasn't
2893 if hv not in new_hvp:
2895 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2896 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2898 self.hv_list = cluster.enabled_hypervisors
2900 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2901 # either the enabled list has changed, or the parameters have, validate
2902 for hv_name, hv_params in self.new_hvparams.items():
2903 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2904 (self.op.enabled_hypervisors and
2905 hv_name in self.op.enabled_hypervisors)):
2906 # either this is a new hypervisor, or its parameters have changed
2907 hv_class = hypervisor.GetHypervisor(hv_name)
2908 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2909 hv_class.CheckParameterSyntax(hv_params)
2910 _CheckHVParams(self, node_list, hv_name, hv_params)
2913 # no need to check any newly-enabled hypervisors, since the
2914 # defaults have already been checked in the above code-block
2915 for os_name, os_hvp in self.new_os_hvp.items():
2916 for hv_name, hv_params in os_hvp.items():
2917 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2918 # we need to fill in the new os_hvp on top of the actual hv_p
2919 cluster_defaults = self.new_hvparams.get(hv_name, {})
2920 new_osp = objects.FillDict(cluster_defaults, hv_params)
2921 hv_class = hypervisor.GetHypervisor(hv_name)
2922 hv_class.CheckParameterSyntax(new_osp)
2923 _CheckHVParams(self, node_list, hv_name, new_osp)
2925 if self.op.default_iallocator:
2926 alloc_script = utils.FindFile(self.op.default_iallocator,
2927 constants.IALLOCATOR_SEARCH_PATH,
2929 if alloc_script is None:
2930 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2931 " specified" % self.op.default_iallocator,
2934 def Exec(self, feedback_fn):
2935 """Change the parameters of the cluster.
2938 if self.op.vg_name is not None:
2939 new_volume = self.op.vg_name
2942 if new_volume != self.cfg.GetVGName():
2943 self.cfg.SetVGName(new_volume)
2945 feedback_fn("Cluster LVM configuration already in desired"
2946 " state, not changing")
2947 if self.op.drbd_helper is not None:
2948 new_helper = self.op.drbd_helper
2951 if new_helper != self.cfg.GetDRBDHelper():
2952 self.cfg.SetDRBDHelper(new_helper)
2954 feedback_fn("Cluster DRBD helper already in desired state,"
2956 if self.op.hvparams:
2957 self.cluster.hvparams = self.new_hvparams
2959 self.cluster.os_hvp = self.new_os_hvp
2960 if self.op.enabled_hypervisors is not None:
2961 self.cluster.hvparams = self.new_hvparams
2962 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2963 if self.op.beparams:
2964 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2965 if self.op.nicparams:
2966 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2967 if self.op.osparams:
2968 self.cluster.osparams = self.new_osp
2969 if self.op.ndparams:
2970 self.cluster.ndparams = self.new_ndparams
2972 if self.op.candidate_pool_size is not None:
2973 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2974 # we need to update the pool size here, otherwise the save will fail
2975 _AdjustCandidatePool(self, [])
2977 if self.op.maintain_node_health is not None:
2978 self.cluster.maintain_node_health = self.op.maintain_node_health
2980 if self.op.prealloc_wipe_disks is not None:
2981 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2983 if self.op.add_uids is not None:
2984 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2986 if self.op.remove_uids is not None:
2987 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2989 if self.op.uid_pool is not None:
2990 self.cluster.uid_pool = self.op.uid_pool
2992 if self.op.default_iallocator is not None:
2993 self.cluster.default_iallocator = self.op.default_iallocator
2995 if self.op.reserved_lvs is not None:
2996 self.cluster.reserved_lvs = self.op.reserved_lvs
2998 def helper_os(aname, mods, desc):
3000 lst = getattr(self.cluster, aname)
3001 for key, val in mods:
3002 if key == constants.DDM_ADD:
3004 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3007 elif key == constants.DDM_REMOVE:
3011 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3013 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3015 if self.op.hidden_os:
3016 helper_os("hidden_os", self.op.hidden_os, "hidden")
3018 if self.op.blacklisted_os:
3019 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3021 if self.op.master_netdev:
3022 master = self.cfg.GetMasterNode()
3023 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3024 self.cluster.master_netdev)
3025 result = self.rpc.call_node_stop_master(master, False)
3026 result.Raise("Could not disable the master ip")
3027 feedback_fn("Changing master_netdev from %s to %s" %
3028 (self.cluster.master_netdev, self.op.master_netdev))
3029 self.cluster.master_netdev = self.op.master_netdev
3031 self.cfg.Update(self.cluster, feedback_fn)
3033 if self.op.master_netdev:
3034 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3035 self.op.master_netdev)
3036 result = self.rpc.call_node_start_master(master, False, False)
3038 self.LogWarning("Could not re-enable the master ip on"
3039 " the master, please restart manually: %s",
3043 def _UploadHelper(lu, nodes, fname):
3044 """Helper for uploading a file and showing warnings.
3047 if os.path.exists(fname):
3048 result = lu.rpc.call_upload_file(nodes, fname)
3049 for to_node, to_result in result.items():
3050 msg = to_result.fail_msg
3052 msg = ("Copy of file %s to node %s failed: %s" %
3053 (fname, to_node, msg))
3054 lu.proc.LogWarning(msg)
3057 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3058 """Distribute additional files which are part of the cluster configuration.
3060 ConfigWriter takes care of distributing the config and ssconf files, but
3061 there are more files which should be distributed to all nodes. This function
3062 makes sure those are copied.
3064 @param lu: calling logical unit
3065 @param additional_nodes: list of nodes not in the config to distribute to
3066 @type additional_vm: boolean
3067 @param additional_vm: whether the additional nodes are vm-capable or not
3070 # 1. Gather target nodes
3071 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3072 dist_nodes = lu.cfg.GetOnlineNodeList()
3073 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3074 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3075 if additional_nodes is not None:
3076 dist_nodes.extend(additional_nodes)
3078 vm_nodes.extend(additional_nodes)
3079 if myself.name in dist_nodes:
3080 dist_nodes.remove(myself.name)
3081 if myself.name in vm_nodes:
3082 vm_nodes.remove(myself.name)
3084 # 2. Gather files to distribute
3085 dist_files = set([constants.ETC_HOSTS,
3086 constants.SSH_KNOWN_HOSTS_FILE,
3087 constants.RAPI_CERT_FILE,
3088 constants.RAPI_USERS_FILE,
3089 constants.CONFD_HMAC_KEY,
3090 constants.CLUSTER_DOMAIN_SECRET_FILE,
3094 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3095 for hv_name in enabled_hypervisors:
3096 hv_class = hypervisor.GetHypervisor(hv_name)
3097 vm_files.update(hv_class.GetAncillaryFiles())
3099 # 3. Perform the files upload
3100 for fname in dist_files:
3101 _UploadHelper(lu, dist_nodes, fname)
3102 for fname in vm_files:
3103 _UploadHelper(lu, vm_nodes, fname)
3106 class LURedistributeConfig(NoHooksLU):
3107 """Force the redistribution of cluster configuration.
3109 This is a very simple LU.
3114 def ExpandNames(self):
3115 self.needed_locks = {
3116 locking.LEVEL_NODE: locking.ALL_SET,
3118 self.share_locks[locking.LEVEL_NODE] = 1
3120 def Exec(self, feedback_fn):
3121 """Redistribute the configuration.
3124 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3125 _RedistributeAncillaryFiles(self)
3128 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3129 """Sleep and poll for an instance's disk to sync.
3132 if not instance.disks or disks is not None and not disks:
3135 disks = _ExpandCheckDisks(instance, disks)
3138 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3140 node = instance.primary_node
3143 lu.cfg.SetDiskID(dev, node)
3145 # TODO: Convert to utils.Retry
3148 degr_retries = 10 # in seconds, as we sleep 1 second each time
3152 cumul_degraded = False
3153 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3154 msg = rstats.fail_msg
3156 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3159 raise errors.RemoteError("Can't contact node %s for mirror data,"
3160 " aborting." % node)
3163 rstats = rstats.payload
3165 for i, mstat in enumerate(rstats):
3167 lu.LogWarning("Can't compute data for node %s/%s",
3168 node, disks[i].iv_name)
3171 cumul_degraded = (cumul_degraded or
3172 (mstat.is_degraded and mstat.sync_percent is None))
3173 if mstat.sync_percent is not None:
3175 if mstat.estimated_time is not None:
3176 rem_time = ("%s remaining (estimated)" %
3177 utils.FormatSeconds(mstat.estimated_time))
3178 max_time = mstat.estimated_time
3180 rem_time = "no time estimate"
3181 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3182 (disks[i].iv_name, mstat.sync_percent, rem_time))
3184 # if we're done but degraded, let's do a few small retries, to
3185 # make sure we see a stable and not transient situation; therefore
3186 # we force restart of the loop
3187 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3188 logging.info("Degraded disks found, %d retries left", degr_retries)
3196 time.sleep(min(60, max_time))
3199 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3200 return not cumul_degraded
3203 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3204 """Check that mirrors are not degraded.
3206 The ldisk parameter, if True, will change the test from the
3207 is_degraded attribute (which represents overall non-ok status for
3208 the device(s)) to the ldisk (representing the local storage status).
3211 lu.cfg.SetDiskID(dev, node)
3215 if on_primary or dev.AssembleOnSecondary():
3216 rstats = lu.rpc.call_blockdev_find(node, dev)
3217 msg = rstats.fail_msg
3219 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3221 elif not rstats.payload:
3222 lu.LogWarning("Can't find disk on node %s", node)
3226 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3228 result = result and not rstats.payload.is_degraded
3231 for child in dev.children:
3232 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3237 class LUOobCommand(NoHooksLU):
3238 """Logical unit for OOB handling.
3243 ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3244 ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3248 def CheckPrereq(self):
3249 """Check prerequisites.
3252 - the node exists in the configuration
3255 Any errors are signaled by raising errors.OpPrereqError.
3258 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3259 node = self.cfg.GetNodeInfo(self.op.node_name)
3262 raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3264 self.oob_program = self.cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
3266 if not self.oob_program:
3267 raise errors.OpPrereqError("OOB is not supported for node %s" %
3272 def ExpandNames(self):
3273 """Gather locks we need.
3276 node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3277 self.needed_locks = {
3278 locking.LEVEL_NODE: [node_name],
3281 def Exec(self, feedback_fn):
3282 """Execute OOB and return result if we expect any.
3285 master_node = self.cfg.GetMasterNode()
3288 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3289 self.op.command, self.oob_program, self.op.node_name)
3290 result = self.rpc.call_run_oob(master_node, self.oob_program,
3291 self.op.command, self.op.node_name,
3294 result.Raise("An error occurred on execution of OOB helper")
3296 self._CheckPayload(result)
3298 if self.op.command == constants.OOB_HEALTH:
3299 # For health we should log important events
3300 for item, status in result.payload:
3301 if status in [constants.OOB_STATUS_WARNING,
3302 constants.OOB_STATUS_CRITICAL]:
3303 logging.warning("On node '%s' item '%s' has status '%s'",
3304 self.op.node_name, item, status)
3306 if self.op.command == constants.OOB_POWER_ON:
3308 elif self.op.command == constants.OOB_POWER_OFF:
3309 node.powered = False
3310 elif self.op.command == constants.OOB_POWER_STATUS:
3311 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3312 if powered != self.node.powered:
3313 logging.warning(("Recorded power state (%s) of node '%s' does not match"
3314 " actual power state (%s)"), node.powered,
3315 self.op.node_name, powered)
3317 self.cfg.Update(node, feedback_fn)
3319 return result.payload
3321 def _CheckPayload(self, result):
3322 """Checks if the payload is valid.
3324 @param result: RPC result
3325 @raises errors.OpExecError: If payload is not valid
3329 if self.op.command == constants.OOB_HEALTH:
3330 if not isinstance(result.payload, list):
3331 errs.append("command 'health' is expected to return a list but got %s" %
3332 type(result.payload))
3333 for item, status in result.payload:
3334 if status not in constants.OOB_STATUSES:
3335 errs.append("health item '%s' has invalid status '%s'" %
3338 if self.op.command == constants.OOB_POWER_STATUS:
3339 if not isinstance(result.payload, dict):
3340 errs.append("power-status is expected to return a dict but got %s" %
3341 type(result.payload))
3343 if self.op.command in [
3344 constants.OOB_POWER_ON,
3345 constants.OOB_POWER_OFF,
3346 constants.OOB_POWER_CYCLE,
3348 if result.payload is not None:
3349 errs.append("%s is expected to not return payload but got '%s'" %
3350 (self.op.command, result.payload))
3353 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3354 utils.CommaJoin(errs))
3358 class LUDiagnoseOS(NoHooksLU):
3359 """Logical unit for OS diagnose/query.
3364 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3368 _BLK = "blacklisted"
3370 _FIELDS_STATIC = utils.FieldSet()
3371 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3372 "parameters", "api_versions", _HID, _BLK)
3374 def CheckArguments(self):
3376 raise errors.OpPrereqError("Selective OS query not supported",
3379 _CheckOutputFields(static=self._FIELDS_STATIC,
3380 dynamic=self._FIELDS_DYNAMIC,
3381 selected=self.op.output_fields)
3383 def ExpandNames(self):
3384 # Lock all nodes, in shared mode
3385 # Temporary removal of locks, should be reverted later
3386 # TODO: reintroduce locks when they are lighter-weight
3387 self.needed_locks = {}
3388 #self.share_locks[locking.LEVEL_NODE] = 1
3389 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3392 def _DiagnoseByOS(rlist):
3393 """Remaps a per-node return list into an a per-os per-node dictionary
3395 @param rlist: a map with node names as keys and OS objects as values
3398 @return: a dictionary with osnames as keys and as value another
3399 map, with nodes as keys and tuples of (path, status, diagnose,
3400 variants, parameters, api_versions) as values, eg::
3402 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3403 (/srv/..., False, "invalid api")],
3404 "node2": [(/srv/..., True, "", [], [])]}
3409 # we build here the list of nodes that didn't fail the RPC (at RPC
3410 # level), so that nodes with a non-responding node daemon don't
3411 # make all OSes invalid
3412 good_nodes = [node_name for node_name in rlist
3413 if not rlist[node_name].fail_msg]
3414 for node_name, nr in rlist.items():
3415 if nr.fail_msg or not nr.payload:
3417 for (name, path, status, diagnose, variants,
3418 params, api_versions) in nr.payload:
3419 if name not in all_os:
3420 # build a list of nodes for this os containing empty lists
3421 # for each node in node_list
3423 for nname in good_nodes:
3424 all_os[name][nname] = []
3425 # convert params from [name, help] to (name, help)
3426 params = [tuple(v) for v in params]
3427 all_os[name][node_name].append((path, status, diagnose,
3428 variants, params, api_versions))
3431 def Exec(self, feedback_fn):
3432 """Compute the list of OSes.
3435 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3436 node_data = self.rpc.call_os_diagnose(valid_nodes)
3437 pol = self._DiagnoseByOS(node_data)
3439 cluster = self.cfg.GetClusterInfo()
3441 for os_name in utils.NiceSort(pol.keys()):
3442 os_data = pol[os_name]
3445 (variants, params, api_versions) = null_state = (set(), set(), set())
3446 for idx, osl in enumerate(os_data.values()):
3447 valid = bool(valid and osl and osl[0][1])
3449 (variants, params, api_versions) = null_state
3451 node_variants, node_params, node_api = osl[0][3:6]
3452 if idx == 0: # first entry
3453 variants = set(node_variants)
3454 params = set(node_params)
3455 api_versions = set(node_api)
3456 else: # keep consistency
3457 variants.intersection_update(node_variants)
3458 params.intersection_update(node_params)
3459 api_versions.intersection_update(node_api)
3461 is_hid = os_name in cluster.hidden_os
3462 is_blk = os_name in cluster.blacklisted_os
3463 if ((self._HID not in self.op.output_fields and is_hid) or
3464 (self._BLK not in self.op.output_fields and is_blk) or
3465 (self._VLD not in self.op.output_fields and not valid)):
3468 for field in self.op.output_fields:
3471 elif field == self._VLD:
3473 elif field == "node_status":
3474 # this is just a copy of the dict
3476 for node_name, nos_list in os_data.items():
3477 val[node_name] = nos_list
3478 elif field == "variants":
3479 val = utils.NiceSort(list(variants))
3480 elif field == "parameters":
3482 elif field == "api_versions":
3483 val = list(api_versions)
3484 elif field == self._HID:
3486 elif field == self._BLK:
3489 raise errors.ParameterError(field)
3496 class LURemoveNode(LogicalUnit):
3497 """Logical unit for removing a node.
3500 HPATH = "node-remove"
3501 HTYPE = constants.HTYPE_NODE
3506 def BuildHooksEnv(self):
3509 This doesn't run on the target node in the pre phase as a failed
3510 node would then be impossible to remove.
3514 "OP_TARGET": self.op.node_name,
3515 "NODE_NAME": self.op.node_name,
3517 all_nodes = self.cfg.GetNodeList()
3519 all_nodes.remove(self.op.node_name)
3521 logging.warning("Node %s which is about to be removed not found"
3522 " in the all nodes list", self.op.node_name)
3523 return env, all_nodes, all_nodes
3525 def CheckPrereq(self):
3526 """Check prerequisites.
3529 - the node exists in the configuration
3530 - it does not have primary or secondary instances
3531 - it's not the master
3533 Any errors are signaled by raising errors.OpPrereqError.
3536 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3537 node = self.cfg.GetNodeInfo(self.op.node_name)
3538 assert node is not None
3540 instance_list = self.cfg.GetInstanceList()
3542 masternode = self.cfg.GetMasterNode()
3543 if node.name == masternode:
3544 raise errors.OpPrereqError("Node is the master node,"
3545 " you need to failover first.",
3548 for instance_name in instance_list:
3549 instance = self.cfg.GetInstanceInfo(instance_name)
3550 if node.name in instance.all_nodes:
3551 raise errors.OpPrereqError("Instance %s is still running on the node,"
3552 " please remove first." % instance_name,
3554 self.op.node_name = node.name
3557 def Exec(self, feedback_fn):
3558 """Removes the node from the cluster.
3562 logging.info("Stopping the node daemon and removing configs from node %s",
3565 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3567 # Promote nodes to master candidate as needed
3568 _AdjustCandidatePool(self, exceptions=[node.name])
3569 self.context.RemoveNode(node.name)
3571 # Run post hooks on the node before it's removed
3572 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3574 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3576 # pylint: disable-msg=W0702
3577 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3579 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3580 msg = result.fail_msg
3582 self.LogWarning("Errors encountered on the remote node while leaving"
3583 " the cluster: %s", msg)
3585 # Remove node from our /etc/hosts
3586 if self.cfg.GetClusterInfo().modify_etc_hosts:
3587 master_node = self.cfg.GetMasterNode()
3588 result = self.rpc.call_etc_hosts_modify(master_node,
3589 constants.ETC_HOSTS_REMOVE,
3591 result.Raise("Can't update hosts file with new host data")
3592 _RedistributeAncillaryFiles(self)
3595 class _NodeQuery(_QueryBase):
3596 FIELDS = query.NODE_FIELDS
3598 def ExpandNames(self, lu):
3599 lu.needed_locks = {}
3600 lu.share_locks[locking.LEVEL_NODE] = 1
3603 self.wanted = _GetWantedNodes(lu, self.names)
3605 self.wanted = locking.ALL_SET
3607 self.do_locking = (self.use_locking and
3608 query.NQ_LIVE in self.requested_data)
3611 # if we don't request only static fields, we need to lock the nodes
3612 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3614 def DeclareLocks(self, lu, level):
3617 def _GetQueryData(self, lu):
3618 """Computes the list of nodes and their attributes.
3621 all_info = lu.cfg.GetAllNodesInfo()
3623 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3625 # Gather data as requested
3626 if query.NQ_LIVE in self.requested_data:
3627 node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3628 lu.cfg.GetHypervisorType())
3629 live_data = dict((name, nresult.payload)
3630 for (name, nresult) in node_data.items()
3631 if not nresult.fail_msg and nresult.payload)
3635 if query.NQ_INST in self.requested_data:
3636 node_to_primary = dict([(name, set()) for name in nodenames])
3637 node_to_secondary = dict([(name, set()) for name in nodenames])
3639 inst_data = lu.cfg.GetAllInstancesInfo()
3641 for inst in inst_data.values():
3642 if inst.primary_node in node_to_primary:
3643 node_to_primary[inst.primary_node].add(inst.name)
3644 for secnode in inst.secondary_nodes:
3645 if secnode in node_to_secondary:
3646 node_to_secondary[secnode].add(inst.name)
3648 node_to_primary = None
3649 node_to_secondary = None
3651 if query.NQ_GROUP in self.requested_data:
3652 groups = lu.cfg.GetAllNodeGroupsInfo()
3656 return query.NodeQueryData([all_info[name] for name in nodenames],
3657 live_data, lu.cfg.GetMasterNode(),
3658 node_to_primary, node_to_secondary, groups)
3661 class LUQueryNodes(NoHooksLU):
3662 """Logical unit for querying nodes.
3665 # pylint: disable-msg=W0142
3668 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3669 ("use_locking", False, ht.TBool),
3673 def CheckArguments(self):
3674 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3675 self.op.use_locking)
3677 def ExpandNames(self):
3678 self.nq.ExpandNames(self)
3680 def Exec(self, feedback_fn):
3681 return self.nq.OldStyleQuery(self)
3684 class LUQueryNodeVolumes(NoHooksLU):
3685 """Logical unit for getting volumes on node(s).
3690 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3693 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3694 _FIELDS_STATIC = utils.FieldSet("node")
3696 def CheckArguments(self):
3697 _CheckOutputFields(static=self._FIELDS_STATIC,
3698 dynamic=self._FIELDS_DYNAMIC,
3699 selected=self.op.output_fields)
3701 def ExpandNames(self):
3702 self.needed_locks = {}
3703 self.share_locks[locking.LEVEL_NODE] = 1
3704 if not self.op.nodes:
3705 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3707 self.needed_locks[locking.LEVEL_NODE] = \
3708 _GetWantedNodes(self, self.op.nodes)
3710 def Exec(self, feedback_fn):
3711 """Computes the list of nodes and their attributes.
3714 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3715 volumes = self.rpc.call_node_volumes(nodenames)
3717 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3718 in self.cfg.GetInstanceList()]
3720 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3723 for node in nodenames:
3724 nresult = volumes[node]
3727 msg = nresult.fail_msg
3729 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3732 node_vols = nresult.payload[:]
3733 node_vols.sort(key=lambda vol: vol['dev'])
3735 for vol in node_vols:
3737 for field in self.op.output_fields:
3740 elif field == "phys":
3744 elif field == "name":
3746 elif field == "size":
3747 val = int(float(vol['size']))
3748 elif field == "instance":
3750 if node not in lv_by_node[inst]:
3752 if vol['name'] in lv_by_node[inst][node]:
3758 raise errors.ParameterError(field)
3759 node_output.append(str(val))
3761 output.append(node_output)
3766 class LUQueryNodeStorage(NoHooksLU):
3767 """Logical unit for getting information on storage units on node(s).
3770 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3773 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3774 ("storage_type", ht.NoDefault, _CheckStorageType),
3775 ("name", None, ht.TMaybeString),
3779 def CheckArguments(self):
3780 _CheckOutputFields(static=self._FIELDS_STATIC,
3781 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3782 selected=self.op.output_fields)
3784 def ExpandNames(self):
3785 self.needed_locks = {}
3786 self.share_locks[locking.LEVEL_NODE] = 1
3789 self.needed_locks[locking.LEVEL_NODE] = \
3790 _GetWantedNodes(self, self.op.nodes)
3792 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3794 def Exec(self, feedback_fn):
3795 """Computes the list of nodes and their attributes.
3798 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3800 # Always get name to sort by
3801 if constants.SF_NAME in self.op.output_fields:
3802 fields = self.op.output_fields[:]
3804 fields = [constants.SF_NAME] + self.op.output_fields
3806 # Never ask for node or type as it's only known to the LU
3807 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3808 while extra in fields:
3809 fields.remove(extra)
3811 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3812 name_idx = field_idx[constants.SF_NAME]
3814 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3815 data = self.rpc.call_storage_list(self.nodes,
3816 self.op.storage_type, st_args,
3817 self.op.name, fields)
3821 for node in utils.NiceSort(self.nodes):
3822 nresult = data[node]
3826 msg = nresult.fail_msg
3828 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3831 rows = dict([(row[name_idx], row) for row in nresult.payload])
3833 for name in utils.NiceSort(rows.keys()):
3838 for field in self.op.output_fields:
3839 if field == constants.SF_NODE:
3841 elif field == constants.SF_TYPE:
3842 val = self.op.storage_type
3843 elif field in field_idx:
3844 val = row[field_idx[field]]
3846 raise errors.ParameterError(field)
3855 class _InstanceQuery(_QueryBase):
3856 FIELDS = query.INSTANCE_FIELDS
3858 def ExpandNames(self, lu):
3859 lu.needed_locks = {}
3860 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3861 lu.share_locks[locking.LEVEL_NODE] = 1
3864 self.wanted = _GetWantedInstances(lu, self.names)
3866 self.wanted = locking.ALL_SET
3868 self.do_locking = (self.use_locking and
3869 query.IQ_LIVE in self.requested_data)
3871 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3872 lu.needed_locks[locking.LEVEL_NODE] = []
3873 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3875 def DeclareLocks(self, lu, level):
3876 if level == locking.LEVEL_NODE and self.do_locking:
3877 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3879 def _GetQueryData(self, lu):
3880 """Computes the list of instances and their attributes.
3883 all_info = lu.cfg.GetAllInstancesInfo()
3885 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3887 instance_list = [all_info[name] for name in instance_names]
3888 nodes = frozenset([inst.primary_node for inst in instance_list])
3889 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3893 # Gather data as requested
3894 if query.IQ_LIVE in self.requested_data:
3896 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3898 result = node_data[name]
3900 # offline nodes will be in both lists
3901 assert result.fail_msg
3902 offline_nodes.append(name)
3904 bad_nodes.append(name)
3905 elif result.payload:
3906 live_data.update(result.payload)
3907 # else no instance is alive
3911 if query.IQ_DISKUSAGE in self.requested_data:
3912 disk_usage = dict((inst.name,
3913 _ComputeDiskSize(inst.disk_template,
3914 [{"size": disk.size}
3915 for disk in inst.disks]))
3916 for inst in instance_list)
3920 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3921 disk_usage, offline_nodes, bad_nodes,
3925 #: Query type implementations
3927 constants.QR_INSTANCE: _InstanceQuery,
3928 constants.QR_NODE: _NodeQuery,
3932 def _GetQueryImplementation(name):
3933 """Returns the implemtnation for a query type.
3935 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3939 return _QUERY_IMPL[name]
3941 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3945 class LUQuery(NoHooksLU):
3946 """Query for resources/items of a certain kind.
3949 # pylint: disable-msg=W0142
3951 ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3952 ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3953 ("filter", None, ht.TOr(ht.TNone,
3954 ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3958 def CheckArguments(self):
3959 qcls = _GetQueryImplementation(self.op.what)
3960 names = qlang.ReadSimpleFilter("name", self.op.filter)
3962 self.impl = qcls(names, self.op.fields, False)
3964 def ExpandNames(self):
3965 self.impl.ExpandNames(self)
3967 def DeclareLocks(self, level):
3968 self.impl.DeclareLocks(self, level)
3970 def Exec(self, feedback_fn):
3971 return self.impl.NewStyleQuery(self)
3974 class LUQueryFields(NoHooksLU):
3975 """Query for resources/items of a certain kind.
3978 # pylint: disable-msg=W0142
3980 ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3981 ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
3985 def CheckArguments(self):
3986 self.qcls = _GetQueryImplementation(self.op.what)
3988 def ExpandNames(self):
3989 self.needed_locks = {}
3991 def Exec(self, feedback_fn):
3992 return self.qcls.FieldsQuery(self.op.fields)
3995 class LUModifyNodeStorage(NoHooksLU):
3996 """Logical unit for modifying a storage volume on a node.
4001 ("storage_type", ht.NoDefault, _CheckStorageType),
4002 ("name", ht.NoDefault, ht.TNonEmptyString),
4003 ("changes", ht.NoDefault, ht.TDict),
4007 def CheckArguments(self):
4008 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4010 storage_type = self.op.storage_type
4013 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4015 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4016 " modified" % storage_type,
4019 diff = set(self.op.changes.keys()) - modifiable
4021 raise errors.OpPrereqError("The following fields can not be modified for"
4022 " storage units of type '%s': %r" %
4023 (storage_type, list(diff)),
4026 def ExpandNames(self):
4027 self.needed_locks = {
4028 locking.LEVEL_NODE: self.op.node_name,
4031 def Exec(self, feedback_fn):
4032 """Computes the list of nodes and their attributes.
4035 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4036 result = self.rpc.call_storage_modify(self.op.node_name,
4037 self.op.storage_type, st_args,
4038 self.op.name, self.op.changes)
4039 result.Raise("Failed to modify storage unit '%s' on %s" %
4040 (self.op.name, self.op.node_name))
4043 class LUAddNode(LogicalUnit):
4044 """Logical unit for adding node to the cluster.
4048 HTYPE = constants.HTYPE_NODE
4051 ("primary_ip", None, ht.NoType),
4052 ("secondary_ip", None, ht.TMaybeString),
4053 ("readd", False, ht.TBool),
4054 ("group", None, ht.TMaybeString),
4055 ("master_capable", None, ht.TMaybeBool),
4056 ("vm_capable", None, ht.TMaybeBool),
4057 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4059 _NFLAGS = ["master_capable", "vm_capable"]
4061 def CheckArguments(self):
4062 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4063 # validate/normalize the node name
4064 self.hostname = netutils.GetHostname(name=self.op.node_name,
4065 family=self.primary_ip_family)
4066 self.op.node_name = self.hostname.name
4067 if self.op.readd and self.op.group:
4068 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4069 " being readded", errors.ECODE_INVAL)
4071 def BuildHooksEnv(self):
4074 This will run on all nodes before, and on all nodes + the new node after.
4078 "OP_TARGET": self.op.node_name,
4079 "NODE_NAME": self.op.node_name,
4080 "NODE_PIP": self.op.primary_ip,
4081 "NODE_SIP": self.op.secondary_ip,
4082 "MASTER_CAPABLE": str(self.op.master_capable),
4083 "VM_CAPABLE": str(self.op.vm_capable),
4085 nodes_0 = self.cfg.GetNodeList()
4086 nodes_1 = nodes_0 + [self.op.node_name, ]
4087 return env, nodes_0, nodes_1
4089 def CheckPrereq(self):
4090 """Check prerequisites.
4093 - the new node is not already in the config
4095 - its parameters (single/dual homed) matches the cluster
4097 Any errors are signaled by raising errors.OpPrereqError.
4101 hostname = self.hostname
4102 node = hostname.name
4103 primary_ip = self.op.primary_ip = hostname.ip
4104 if self.op.secondary_ip is None:
4105 if self.primary_ip_family == netutils.IP6Address.family:
4106 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4107 " IPv4 address must be given as secondary",
4109 self.op.secondary_ip = primary_ip
4111 secondary_ip = self.op.secondary_ip
4112 if not netutils.IP4Address.IsValid(secondary_ip):
4113 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4114 " address" % secondary_ip, errors.ECODE_INVAL)
4116 node_list = cfg.GetNodeList()
4117 if not self.op.readd and node in node_list:
4118 raise errors.OpPrereqError("Node %s is already in the configuration" %
4119 node, errors.ECODE_EXISTS)
4120 elif self.op.readd and node not in node_list:
4121 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4124 self.changed_primary_ip = False
4126 for existing_node_name in node_list:
4127 existing_node = cfg.GetNodeInfo(existing_node_name)
4129 if self.op.readd and node == existing_node_name:
4130 if existing_node.secondary_ip != secondary_ip:
4131 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4132 " address configuration as before",
4134 if existing_node.primary_ip != primary_ip:
4135 self.changed_primary_ip = True
4139 if (existing_node.primary_ip == primary_ip or
4140 existing_node.secondary_ip == primary_ip or
4141 existing_node.primary_ip == secondary_ip or
4142 existing_node.secondary_ip == secondary_ip):
4143 raise errors.OpPrereqError("New node ip address(es) conflict with"
4144 " existing node %s" % existing_node.name,
4145 errors.ECODE_NOTUNIQUE)
4147 # After this 'if' block, None is no longer a valid value for the
4148 # _capable op attributes
4150 old_node = self.cfg.GetNodeInfo(node)
4151 assert old_node is not None, "Can't retrieve locked node %s" % node
4152 for attr in self._NFLAGS:
4153 if getattr(self.op, attr) is None:
4154 setattr(self.op, attr, getattr(old_node, attr))
4156 for attr in self._NFLAGS:
4157 if getattr(self.op, attr) is None:
4158 setattr(self.op, attr, True)
4160 if self.op.readd and not self.op.vm_capable:
4161 pri, sec = cfg.GetNodeInstances(node)
4163 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4164 " flag set to false, but it already holds"
4165 " instances" % node,
4168 # check that the type of the node (single versus dual homed) is the
4169 # same as for the master
4170 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4171 master_singlehomed = myself.secondary_ip == myself.primary_ip
4172 newbie_singlehomed = secondary_ip == primary_ip
4173 if master_singlehomed != newbie_singlehomed:
4174 if master_singlehomed:
4175 raise errors.OpPrereqError("The master has no secondary ip but the"
4176 " new node has one",
4179 raise errors.OpPrereqError("The master has a secondary ip but the"
4180 " new node doesn't have one",
4183 # checks reachability
4184 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4185 raise errors.OpPrereqError("Node not reachable by ping",
4186 errors.ECODE_ENVIRON)
4188 if not newbie_singlehomed:
4189 # check reachability from my secondary ip to newbie's secondary ip
4190 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4191 source=myself.secondary_ip):
4192 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4193 " based ping to node daemon port",
4194 errors.ECODE_ENVIRON)
4201 if self.op.master_capable:
4202 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4204 self.master_candidate = False
4207 self.new_node = old_node
4209 node_group = cfg.LookupNodeGroup(self.op.group)
4210 self.new_node = objects.Node(name=node,
4211 primary_ip=primary_ip,
4212 secondary_ip=secondary_ip,
4213 master_candidate=self.master_candidate,
4214 offline=False, drained=False,
4217 if self.op.ndparams:
4218 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4220 def Exec(self, feedback_fn):
4221 """Adds the new node to the cluster.
4224 new_node = self.new_node
4225 node = new_node.name
4227 # We adding a new node so we assume it's powered
4228 new_node.powered = True
4230 # for re-adds, reset the offline/drained/master-candidate flags;
4231 # we need to reset here, otherwise offline would prevent RPC calls
4232 # later in the procedure; this also means that if the re-add
4233 # fails, we are left with a non-offlined, broken node
4235 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4236 self.LogInfo("Readding a node, the offline/drained flags were reset")
4237 # if we demote the node, we do cleanup later in the procedure
4238 new_node.master_candidate = self.master_candidate
4239 if self.changed_primary_ip:
4240 new_node.primary_ip = self.op.primary_ip
4242 # copy the master/vm_capable flags
4243 for attr in self._NFLAGS:
4244 setattr(new_node, attr, getattr(self.op, attr))
4246 # notify the user about any possible mc promotion
4247 if new_node.master_candidate:
4248 self.LogInfo("Node will be a master candidate")
4250 if self.op.ndparams:
4251 new_node.ndparams = self.op.ndparams
4253 # check connectivity
4254 result = self.rpc.call_version([node])[node]
4255 result.Raise("Can't get version information from node %s" % node)
4256 if constants.PROTOCOL_VERSION == result.payload:
4257 logging.info("Communication to node %s fine, sw version %s match",
4258 node, result.payload)
4260 raise errors.OpExecError("Version mismatch master version %s,"
4261 " node version %s" %
4262 (constants.PROTOCOL_VERSION, result.payload))
4264 # Add node to our /etc/hosts, and add key to known_hosts
4265 if self.cfg.GetClusterInfo().modify_etc_hosts:
4266 master_node = self.cfg.GetMasterNode()
4267 result = self.rpc.call_etc_hosts_modify(master_node,
4268 constants.ETC_HOSTS_ADD,
4271 result.Raise("Can't update hosts file with new host data")
4273 if new_node.secondary_ip != new_node.primary_ip:
4274 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4277 node_verify_list = [self.cfg.GetMasterNode()]
4278 node_verify_param = {
4279 constants.NV_NODELIST: [node],
4280 # TODO: do a node-net-test as well?
4283 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4284 self.cfg.GetClusterName())
4285 for verifier in node_verify_list:
4286 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4287 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4289 for failed in nl_payload:
4290 feedback_fn("ssh/hostname verification failed"
4291 " (checking from %s): %s" %
4292 (verifier, nl_payload[failed]))
4293 raise errors.OpExecError("ssh/hostname verification failed.")
4296 _RedistributeAncillaryFiles(self)
4297 self.context.ReaddNode(new_node)
4298 # make sure we redistribute the config
4299 self.cfg.Update(new_node, feedback_fn)
4300 # and make sure the new node will not have old files around
4301 if not new_node.master_candidate:
4302 result = self.rpc.call_node_demote_from_mc(new_node.name)
4303 msg = result.fail_msg
4305 self.LogWarning("Node failed to demote itself from master"
4306 " candidate status: %s" % msg)
4308 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4309 additional_vm=self.op.vm_capable)
4310 self.context.AddNode(new_node, self.proc.GetECId())
4313 class LUSetNodeParams(LogicalUnit):
4314 """Modifies the parameters of a node.
4316 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4317 to the node role (as _ROLE_*)
4318 @cvar _R2F: a dictionary from node role to tuples of flags
4319 @cvar _FLAGS: a list of attribute names corresponding to the flags
4322 HPATH = "node-modify"
4323 HTYPE = constants.HTYPE_NODE
4326 ("master_candidate", None, ht.TMaybeBool),
4327 ("offline", None, ht.TMaybeBool),
4328 ("drained", None, ht.TMaybeBool),
4329 ("auto_promote", False, ht.TBool),
4330 ("master_capable", None, ht.TMaybeBool),
4331 ("vm_capable", None, ht.TMaybeBool),
4332 ("secondary_ip", None, ht.TMaybeString),
4333 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4337 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4339 (True, False, False): _ROLE_CANDIDATE,
4340 (False, True, False): _ROLE_DRAINED,
4341 (False, False, True): _ROLE_OFFLINE,
4342 (False, False, False): _ROLE_REGULAR,
4344 _R2F = dict((v, k) for k, v in _F2R.items())
4345 _FLAGS = ["master_candidate", "drained", "offline"]
4347 def CheckArguments(self):
4348 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4349 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4350 self.op.master_capable, self.op.vm_capable,
4351 self.op.secondary_ip, self.op.ndparams]
4352 if all_mods.count(None) == len(all_mods):
4353 raise errors.OpPrereqError("Please pass at least one modification",
4355 if all_mods.count(True) > 1:
4356 raise errors.OpPrereqError("Can't set the node into more than one"
4357 " state at the same time",
4360 # Boolean value that tells us whether we might be demoting from MC
4361 self.might_demote = (self.op.master_candidate == False or
4362 self.op.offline == True or
4363 self.op.drained == True or
4364 self.op.master_capable == False)
4366 if self.op.secondary_ip:
4367 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4368 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4369 " address" % self.op.secondary_ip,
4372 self.lock_all = self.op.auto_promote and self.might_demote
4373 self.lock_instances = self.op.secondary_ip is not None
4375 def ExpandNames(self):
4377 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4379 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4381 if self.lock_instances:
4382 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4384 def DeclareLocks(self, level):
4385 # If we have locked all instances, before waiting to lock nodes, release
4386 # all the ones living on nodes unrelated to the current operation.
4387 if level == locking.LEVEL_NODE and self.lock_instances:
4388 instances_release = []
4390 self.affected_instances = []
4391 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4392 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4393 instance = self.context.cfg.GetInstanceInfo(instance_name)
4394 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4395 if i_mirrored and self.op.node_name in instance.all_nodes:
4396 instances_keep.append(instance_name)
4397 self.affected_instances.append(instance)
4399 instances_release.append(instance_name)
4400 if instances_release:
4401 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4402 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4404 def BuildHooksEnv(self):
4407 This runs on the master node.
4411 "OP_TARGET": self.op.node_name,
4412 "MASTER_CANDIDATE": str(self.op.master_candidate),
4413 "OFFLINE": str(self.op.offline),
4414 "DRAINED": str(self.op.drained),
4415 "MASTER_CAPABLE": str(self.op.master_capable),
4416 "VM_CAPABLE": str(self.op.vm_capable),
4418 nl = [self.cfg.GetMasterNode(),
4422 def CheckPrereq(self):
4423 """Check prerequisites.
4425 This only checks the instance list against the existing names.
4428 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4430 if (self.op.master_candidate is not None or
4431 self.op.drained is not None or
4432 self.op.offline is not None):
4433 # we can't change the master's node flags
4434 if self.op.node_name == self.cfg.GetMasterNode():
4435 raise errors.OpPrereqError("The master role can be changed"
4436 " only via master-failover",
4439 if self.op.master_candidate and not node.master_capable:
4440 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4441 " it a master candidate" % node.name,
4444 if self.op.vm_capable == False:
4445 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4447 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4448 " the vm_capable flag" % node.name,
4451 if node.master_candidate and self.might_demote and not self.lock_all:
4452 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4453 # check if after removing the current node, we're missing master
4455 (mc_remaining, mc_should, _) = \
4456 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4457 if mc_remaining < mc_should:
4458 raise errors.OpPrereqError("Not enough master candidates, please"
4459 " pass auto_promote to allow promotion",
4462 self.old_flags = old_flags = (node.master_candidate,
4463 node.drained, node.offline)
4464 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4465 self.old_role = old_role = self._F2R[old_flags]
4467 # Check for ineffective changes
4468 for attr in self._FLAGS:
4469 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4470 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4471 setattr(self.op, attr, None)
4473 # Past this point, any flag change to False means a transition
4474 # away from the respective state, as only real changes are kept
4476 # If we're being deofflined/drained, we'll MC ourself if needed
4477 if (self.op.drained == False or self.op.offline == False or
4478 (self.op.master_capable and not node.master_capable)):
4479 if _DecideSelfPromotion(self):
4480 self.op.master_candidate = True
4481 self.LogInfo("Auto-promoting node to master candidate")
4483 # If we're no longer master capable, we'll demote ourselves from MC
4484 if self.op.master_capable == False and node.master_candidate:
4485 self.LogInfo("Demoting from master candidate")
4486 self.op.master_candidate = False
4489 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4490 if self.op.master_candidate:
4491 new_role = self._ROLE_CANDIDATE
4492 elif self.op.drained:
4493 new_role = self._ROLE_DRAINED
4494 elif self.op.offline:
4495 new_role = self._ROLE_OFFLINE
4496 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4497 # False is still in new flags, which means we're un-setting (the
4499 new_role = self._ROLE_REGULAR
4500 else: # no new flags, nothing, keep old role
4503 self.new_role = new_role
4505 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4506 # Trying to transition out of offline status
4507 result = self.rpc.call_version([node.name])[node.name]
4509 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4510 " to report its version: %s" %
4511 (node.name, result.fail_msg),
4514 self.LogWarning("Transitioning node from offline to online state"
4515 " without using re-add. Please make sure the node"
4518 if self.op.secondary_ip:
4519 # Ok even without locking, because this can't be changed by any LU
4520 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4521 master_singlehomed = master.secondary_ip == master.primary_ip
4522 if master_singlehomed and self.op.secondary_ip:
4523 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4524 " homed cluster", errors.ECODE_INVAL)
4527 if self.affected_instances:
4528 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4529 " node has instances (%s) configured"
4530 " to use it" % self.affected_instances)
4532 # On online nodes, check that no instances are running, and that
4533 # the node has the new ip and we can reach it.
4534 for instance in self.affected_instances:
4535 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4537 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4538 if master.name != node.name:
4539 # check reachability from master secondary ip to new secondary ip
4540 if not netutils.TcpPing(self.op.secondary_ip,
4541 constants.DEFAULT_NODED_PORT,
4542 source=master.secondary_ip):
4543 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4544 " based ping to node daemon port",
4545 errors.ECODE_ENVIRON)
4547 if self.op.ndparams:
4548 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4549 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4550 self.new_ndparams = new_ndparams
4552 def Exec(self, feedback_fn):
4557 old_role = self.old_role
4558 new_role = self.new_role
4562 if self.op.ndparams:
4563 node.ndparams = self.new_ndparams
4565 for attr in ["master_capable", "vm_capable"]:
4566 val = getattr(self.op, attr)
4568 setattr(node, attr, val)
4569 result.append((attr, str(val)))
4571 if new_role != old_role:
4572 # Tell the node to demote itself, if no longer MC and not offline
4573 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4574 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4576 self.LogWarning("Node failed to demote itself: %s", msg)
4578 new_flags = self._R2F[new_role]
4579 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4581 result.append((desc, str(nf)))
4582 (node.master_candidate, node.drained, node.offline) = new_flags
4584 # we locked all nodes, we adjust the CP before updating this node
4586 _AdjustCandidatePool(self, [node.name])
4588 if self.op.secondary_ip:
4589 node.secondary_ip = self.op.secondary_ip
4590 result.append(("secondary_ip", self.op.secondary_ip))
4592 # this will trigger configuration file update, if needed
4593 self.cfg.Update(node, feedback_fn)
4595 # this will trigger job queue propagation or cleanup if the mc
4597 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4598 self.context.ReaddNode(node)
4603 class LUPowercycleNode(NoHooksLU):
4604 """Powercycles a node.
4613 def CheckArguments(self):
4614 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4615 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4616 raise errors.OpPrereqError("The node is the master and the force"
4617 " parameter was not set",
4620 def ExpandNames(self):
4621 """Locking for PowercycleNode.
4623 This is a last-resort option and shouldn't block on other
4624 jobs. Therefore, we grab no locks.
4627 self.needed_locks = {}
4629 def Exec(self, feedback_fn):
4633 result = self.rpc.call_node_powercycle(self.op.node_name,
4634 self.cfg.GetHypervisorType())
4635 result.Raise("Failed to schedule the reboot")
4636 return result.payload
4639 class LUQueryClusterInfo(NoHooksLU):
4640 """Query cluster configuration.
4645 def ExpandNames(self):
4646 self.needed_locks = {}
4648 def Exec(self, feedback_fn):
4649 """Return cluster config.
4652 cluster = self.cfg.GetClusterInfo()
4655 # Filter just for enabled hypervisors
4656 for os_name, hv_dict in cluster.os_hvp.items():
4657 os_hvp[os_name] = {}
4658 for hv_name, hv_params in hv_dict.items():
4659 if hv_name in cluster.enabled_hypervisors:
4660 os_hvp[os_name][hv_name] = hv_params
4662 # Convert ip_family to ip_version
4663 primary_ip_version = constants.IP4_VERSION
4664 if cluster.primary_ip_family == netutils.IP6Address.family:
4665 primary_ip_version = constants.IP6_VERSION
4668 "software_version": constants.RELEASE_VERSION,
4669 "protocol_version": constants.PROTOCOL_VERSION,
4670 "config_version": constants.CONFIG_VERSION,
4671 "os_api_version": max(constants.OS_API_VERSIONS),
4672 "export_version": constants.EXPORT_VERSION,
4673 "architecture": (platform.architecture()[0], platform.machine()),
4674 "name": cluster.cluster_name,
4675 "master": cluster.master_node,
4676 "default_hypervisor": cluster.enabled_hypervisors[0],
4677 "enabled_hypervisors": cluster.enabled_hypervisors,
4678 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4679 for hypervisor_name in cluster.enabled_hypervisors]),
4681 "beparams": cluster.beparams,
4682 "osparams": cluster.osparams,
4683 "nicparams": cluster.nicparams,
4684 "candidate_pool_size": cluster.candidate_pool_size,
4685 "master_netdev": cluster.master_netdev,
4686 "volume_group_name": cluster.volume_group_name,
4687 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4688 "file_storage_dir": cluster.file_storage_dir,
4689 "maintain_node_health": cluster.maintain_node_health,
4690 "ctime": cluster.ctime,
4691 "mtime": cluster.mtime,
4692 "uuid": cluster.uuid,
4693 "tags": list(cluster.GetTags()),
4694 "uid_pool": cluster.uid_pool,
4695 "default_iallocator": cluster.default_iallocator,
4696 "reserved_lvs": cluster.reserved_lvs,
4697 "primary_ip_version": primary_ip_version,
4698 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4704 class LUQueryConfigValues(NoHooksLU):
4705 """Return configuration values.
4708 _OP_PARAMS = [_POutputFields]
4710 _FIELDS_DYNAMIC = utils.FieldSet()
4711 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4712 "watcher_pause", "volume_group_name")
4714 def CheckArguments(self):
4715 _CheckOutputFields(static=self._FIELDS_STATIC,
4716 dynamic=self._FIELDS_DYNAMIC,
4717 selected=self.op.output_fields)
4719 def ExpandNames(self):
4720 self.needed_locks = {}
4722 def Exec(self, feedback_fn):
4723 """Dump a representation of the cluster config to the standard output.
4727 for field in self.op.output_fields:
4728 if field == "cluster_name":
4729 entry = self.cfg.GetClusterName()
4730 elif field == "master_node":
4731 entry = self.cfg.GetMasterNode()
4732 elif field == "drain_flag":
4733 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4734 elif field == "watcher_pause":
4735 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4736 elif field == "volume_group_name":
4737 entry = self.cfg.GetVGName()
4739 raise errors.ParameterError(field)
4740 values.append(entry)
4744 class LUActivateInstanceDisks(NoHooksLU):
4745 """Bring up an instance's disks.
4750 ("ignore_size", False, ht.TBool),
4754 def ExpandNames(self):
4755 self._ExpandAndLockInstance()
4756 self.needed_locks[locking.LEVEL_NODE] = []
4757 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4759 def DeclareLocks(self, level):
4760 if level == locking.LEVEL_NODE:
4761 self._LockInstancesNodes()
4763 def CheckPrereq(self):
4764 """Check prerequisites.
4766 This checks that the instance is in the cluster.
4769 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4770 assert self.instance is not None, \
4771 "Cannot retrieve locked instance %s" % self.op.instance_name
4772 _CheckNodeOnline(self, self.instance.primary_node)
4774 def Exec(self, feedback_fn):
4775 """Activate the disks.
4778 disks_ok, disks_info = \
4779 _AssembleInstanceDisks(self, self.instance,
4780 ignore_size=self.op.ignore_size)
4782 raise errors.OpExecError("Cannot activate block devices")
4787 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4789 """Prepare the block devices for an instance.
4791 This sets up the block devices on all nodes.
4793 @type lu: L{LogicalUnit}
4794 @param lu: the logical unit on whose behalf we execute
4795 @type instance: L{objects.Instance}
4796 @param instance: the instance for whose disks we assemble
4797 @type disks: list of L{objects.Disk} or None
4798 @param disks: which disks to assemble (or all, if None)
4799 @type ignore_secondaries: boolean
4800 @param ignore_secondaries: if true, errors on secondary nodes
4801 won't result in an error return from the function
4802 @type ignore_size: boolean
4803 @param ignore_size: if true, the current known size of the disk
4804 will not be used during the disk activation, useful for cases
4805 when the size is wrong
4806 @return: False if the operation failed, otherwise a list of
4807 (host, instance_visible_name, node_visible_name)
4808 with the mapping from node devices to instance devices
4813 iname = instance.name
4814 disks = _ExpandCheckDisks(instance, disks)
4816 # With the two passes mechanism we try to reduce the window of
4817 # opportunity for the race condition of switching DRBD to primary
4818 # before handshaking occured, but we do not eliminate it
4820 # The proper fix would be to wait (with some limits) until the
4821 # connection has been made and drbd transitions from WFConnection
4822 # into any other network-connected state (Connected, SyncTarget,
4825 # 1st pass, assemble on all nodes in secondary mode
4826 for inst_disk in disks:
4827 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4829 node_disk = node_disk.Copy()
4830 node_disk.UnsetSize()
4831 lu.cfg.SetDiskID(node_disk, node)
4832 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4833 msg = result.fail_msg
4835 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4836 " (is_primary=False, pass=1): %s",
4837 inst_disk.iv_name, node, msg)
4838 if not ignore_secondaries:
4841 # FIXME: race condition on drbd migration to primary
4843 # 2nd pass, do only the primary node
4844 for inst_disk in disks:
4847 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4848 if node != instance.primary_node:
4851 node_disk = node_disk.Copy()
4852 node_disk.UnsetSize()
4853 lu.cfg.SetDiskID(node_disk, node)
4854 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4855 msg = result.fail_msg
4857 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4858 " (is_primary=True, pass=2): %s",
4859 inst_disk.iv_name, node, msg)
4862 dev_path = result.payload
4864 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4866 # leave the disks configured for the primary node
4867 # this is a workaround that would be fixed better by
4868 # improving the logical/physical id handling
4870 lu.cfg.SetDiskID(disk, instance.primary_node)
4872 return disks_ok, device_info
4875 def _StartInstanceDisks(lu, instance, force):
4876 """Start the disks of an instance.
4879 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4880 ignore_secondaries=force)
4882 _ShutdownInstanceDisks(lu, instance)
4883 if force is not None and not force:
4884 lu.proc.LogWarning("", hint="If the message above refers to a"
4886 " you can retry the operation using '--force'.")
4887 raise errors.OpExecError("Disk consistency error")
4890 class LUDeactivateInstanceDisks(NoHooksLU):
4891 """Shutdown an instance's disks.
4899 def ExpandNames(self):
4900 self._ExpandAndLockInstance()
4901 self.needed_locks[locking.LEVEL_NODE] = []
4902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4904 def DeclareLocks(self, level):
4905 if level == locking.LEVEL_NODE:
4906 self._LockInstancesNodes()
4908 def CheckPrereq(self):
4909 """Check prerequisites.
4911 This checks that the instance is in the cluster.
4914 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4915 assert self.instance is not None, \
4916 "Cannot retrieve locked instance %s" % self.op.instance_name
4918 def Exec(self, feedback_fn):
4919 """Deactivate the disks
4922 instance = self.instance
4923 _SafeShutdownInstanceDisks(self, instance)
4926 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4927 """Shutdown block devices of an instance.
4929 This function checks if an instance is running, before calling
4930 _ShutdownInstanceDisks.
4933 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4934 _ShutdownInstanceDisks(lu, instance, disks=disks)
4937 def _ExpandCheckDisks(instance, disks):
4938 """Return the instance disks selected by the disks list
4940 @type disks: list of L{objects.Disk} or None
4941 @param disks: selected disks
4942 @rtype: list of L{objects.Disk}
4943 @return: selected instance disks to act on
4947 return instance.disks
4949 if not set(disks).issubset(instance.disks):
4950 raise errors.ProgrammerError("Can only act on disks belonging to the"
4955 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4956 """Shutdown block devices of an instance.
4958 This does the shutdown on all nodes of the instance.
4960 If the ignore_primary is false, errors on the primary node are
4965 disks = _ExpandCheckDisks(instance, disks)
4968 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4969 lu.cfg.SetDiskID(top_disk, node)
4970 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4971 msg = result.fail_msg
4973 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4974 disk.iv_name, node, msg)
4975 if not ignore_primary or node != instance.primary_node:
4980 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4981 """Checks if a node has enough free memory.
4983 This function check if a given node has the needed amount of free
4984 memory. In case the node has less memory or we cannot get the
4985 information from the node, this function raise an OpPrereqError
4988 @type lu: C{LogicalUnit}
4989 @param lu: a logical unit from which we get configuration data
4991 @param node: the node to check
4992 @type reason: C{str}
4993 @param reason: string to use in the error message
4994 @type requested: C{int}
4995 @param requested: the amount of memory in MiB to check for
4996 @type hypervisor_name: C{str}
4997 @param hypervisor_name: the hypervisor to ask for memory stats
4998 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4999 we cannot check the node
5002 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5003 nodeinfo[node].Raise("Can't get data from node %s" % node,
5004 prereq=True, ecode=errors.ECODE_ENVIRON)
5005 free_mem = nodeinfo[node].payload.get('memory_free', None)
5006 if not isinstance(free_mem, int):
5007 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5008 " was '%s'" % (node, free_mem),
5009 errors.ECODE_ENVIRON)
5010 if requested > free_mem:
5011 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5012 " needed %s MiB, available %s MiB" %
5013 (node, reason, requested, free_mem),
5017 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5018 """Checks if nodes have enough free disk space in the all VGs.
5020 This function check if all given nodes have the needed amount of
5021 free disk. In case any node has less disk or we cannot get the
5022 information from the node, this function raise an OpPrereqError
5025 @type lu: C{LogicalUnit}
5026 @param lu: a logical unit from which we get configuration data
5027 @type nodenames: C{list}
5028 @param nodenames: the list of node names to check
5029 @type req_sizes: C{dict}
5030 @param req_sizes: the hash of vg and corresponding amount of disk in
5032 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5033 or we cannot check the node
5036 if req_sizes is not None:
5037 for vg, req_size in req_sizes.iteritems():
5038 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5041 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5042 """Checks if nodes have enough free disk space in the specified VG.
5044 This function check if all given nodes have the needed amount of
5045 free disk. In case any node has less disk or we cannot get the
5046 information from the node, this function raise an OpPrereqError
5049 @type lu: C{LogicalUnit}
5050 @param lu: a logical unit from which we get configuration data
5051 @type nodenames: C{list}
5052 @param nodenames: the list of node names to check
5054 @param vg: the volume group to check
5055 @type requested: C{int}
5056 @param requested: the amount of disk in MiB to check for
5057 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5058 or we cannot check the node
5061 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5062 for node in nodenames:
5063 info = nodeinfo[node]
5064 info.Raise("Cannot get current information from node %s" % node,
5065 prereq=True, ecode=errors.ECODE_ENVIRON)
5066 vg_free = info.payload.get("vg_free", None)
5067 if not isinstance(vg_free, int):
5068 raise errors.OpPrereqError("Can't compute free disk space on node"
5069 " %s for vg %s, result was '%s'" %
5070 (node, vg, vg_free), errors.ECODE_ENVIRON)
5071 if requested > vg_free:
5072 raise errors.OpPrereqError("Not enough disk space on target node %s"
5073 " vg %s: required %d MiB, available %d MiB" %
5074 (node, vg, requested, vg_free),
5078 class LUStartupInstance(LogicalUnit):
5079 """Starts an instance.
5082 HPATH = "instance-start"
5083 HTYPE = constants.HTYPE_INSTANCE
5087 _PIgnoreOfflineNodes,
5088 ("hvparams", ht.EmptyDict, ht.TDict),
5089 ("beparams", ht.EmptyDict, ht.TDict),
5093 def CheckArguments(self):
5095 if self.op.beparams:
5096 # fill the beparams dict
5097 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5099 def ExpandNames(self):
5100 self._ExpandAndLockInstance()
5102 def BuildHooksEnv(self):
5105 This runs on master, primary and secondary nodes of the instance.
5109 "FORCE": self.op.force,
5111 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5112 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5115 def CheckPrereq(self):
5116 """Check prerequisites.
5118 This checks that the instance is in the cluster.
5121 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5122 assert self.instance is not None, \
5123 "Cannot retrieve locked instance %s" % self.op.instance_name
5126 if self.op.hvparams:
5127 # check hypervisor parameter syntax (locally)
5128 cluster = self.cfg.GetClusterInfo()
5129 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5130 filled_hvp = cluster.FillHV(instance)
5131 filled_hvp.update(self.op.hvparams)
5132 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5133 hv_type.CheckParameterSyntax(filled_hvp)
5134 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5136 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5138 if self.primary_offline and self.op.ignore_offline_nodes:
5139 self.proc.LogWarning("Ignoring offline primary node")
5141 if self.op.hvparams or self.op.beparams:
5142 self.proc.LogWarning("Overridden parameters are ignored")
5144 _CheckNodeOnline(self, instance.primary_node)
5146 bep = self.cfg.GetClusterInfo().FillBE(instance)
5148 # check bridges existence
5149 _CheckInstanceBridgesExist(self, instance)
5151 remote_info = self.rpc.call_instance_info(instance.primary_node,
5153 instance.hypervisor)
5154 remote_info.Raise("Error checking node %s" % instance.primary_node,
5155 prereq=True, ecode=errors.ECODE_ENVIRON)
5156 if not remote_info.payload: # not running already
5157 _CheckNodeFreeMemory(self, instance.primary_node,
5158 "starting instance %s" % instance.name,
5159 bep[constants.BE_MEMORY], instance.hypervisor)
5161 def Exec(self, feedback_fn):
5162 """Start the instance.
5165 instance = self.instance
5166 force = self.op.force
5168 self.cfg.MarkInstanceUp(instance.name)
5170 if self.primary_offline:
5171 assert self.op.ignore_offline_nodes
5172 self.proc.LogInfo("Primary node offline, marked instance as started")
5174 node_current = instance.primary_node
5176 _StartInstanceDisks(self, instance, force)
5178 result = self.rpc.call_instance_start(node_current, instance,
5179 self.op.hvparams, self.op.beparams)
5180 msg = result.fail_msg
5182 _ShutdownInstanceDisks(self, instance)
5183 raise errors.OpExecError("Could not start instance: %s" % msg)
5186 class LURebootInstance(LogicalUnit):
5187 """Reboot an instance.
5190 HPATH = "instance-reboot"
5191 HTYPE = constants.HTYPE_INSTANCE
5194 ("ignore_secondaries", False, ht.TBool),
5195 ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5200 def ExpandNames(self):
5201 self._ExpandAndLockInstance()
5203 def BuildHooksEnv(self):
5206 This runs on master, primary and secondary nodes of the instance.
5210 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5211 "REBOOT_TYPE": self.op.reboot_type,
5212 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5214 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5215 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5218 def CheckPrereq(self):
5219 """Check prerequisites.
5221 This checks that the instance is in the cluster.
5224 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5225 assert self.instance is not None, \
5226 "Cannot retrieve locked instance %s" % self.op.instance_name
5228 _CheckNodeOnline(self, instance.primary_node)
5230 # check bridges existence
5231 _CheckInstanceBridgesExist(self, instance)
5233 def Exec(self, feedback_fn):
5234 """Reboot the instance.
5237 instance = self.instance
5238 ignore_secondaries = self.op.ignore_secondaries
5239 reboot_type = self.op.reboot_type
5241 node_current = instance.primary_node
5243 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5244 constants.INSTANCE_REBOOT_HARD]:
5245 for disk in instance.disks:
5246 self.cfg.SetDiskID(disk, node_current)
5247 result = self.rpc.call_instance_reboot(node_current, instance,
5249 self.op.shutdown_timeout)
5250 result.Raise("Could not reboot instance")
5252 result = self.rpc.call_instance_shutdown(node_current, instance,
5253 self.op.shutdown_timeout)
5254 result.Raise("Could not shutdown instance for full reboot")
5255 _ShutdownInstanceDisks(self, instance)
5256 _StartInstanceDisks(self, instance, ignore_secondaries)
5257 result = self.rpc.call_instance_start(node_current, instance, None, None)
5258 msg = result.fail_msg
5260 _ShutdownInstanceDisks(self, instance)
5261 raise errors.OpExecError("Could not start instance for"
5262 " full reboot: %s" % msg)
5264 self.cfg.MarkInstanceUp(instance.name)
5267 class LUShutdownInstance(LogicalUnit):
5268 """Shutdown an instance.
5271 HPATH = "instance-stop"
5272 HTYPE = constants.HTYPE_INSTANCE
5275 _PIgnoreOfflineNodes,
5276 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5280 def ExpandNames(self):
5281 self._ExpandAndLockInstance()
5283 def BuildHooksEnv(self):
5286 This runs on master, primary and secondary nodes of the instance.
5289 env = _BuildInstanceHookEnvByObject(self, self.instance)
5290 env["TIMEOUT"] = self.op.timeout
5291 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5294 def CheckPrereq(self):
5295 """Check prerequisites.
5297 This checks that the instance is in the cluster.
5300 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5301 assert self.instance is not None, \
5302 "Cannot retrieve locked instance %s" % self.op.instance_name
5304 self.primary_offline = \
5305 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5307 if self.primary_offline and self.op.ignore_offline_nodes:
5308 self.proc.LogWarning("Ignoring offline primary node")
5310 _CheckNodeOnline(self, self.instance.primary_node)
5312 def Exec(self, feedback_fn):
5313 """Shutdown the instance.
5316 instance = self.instance
5317 node_current = instance.primary_node
5318 timeout = self.op.timeout
5320 self.cfg.MarkInstanceDown(instance.name)
5322 if self.primary_offline:
5323 assert self.op.ignore_offline_nodes
5324 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5326 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5327 msg = result.fail_msg
5329 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5331 _ShutdownInstanceDisks(self, instance)
5334 class LUReinstallInstance(LogicalUnit):
5335 """Reinstall an instance.
5338 HPATH = "instance-reinstall"
5339 HTYPE = constants.HTYPE_INSTANCE
5342 ("os_type", None, ht.TMaybeString),
5343 ("force_variant", False, ht.TBool),
5344 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5348 def ExpandNames(self):
5349 self._ExpandAndLockInstance()
5351 def BuildHooksEnv(self):
5354 This runs on master, primary and secondary nodes of the instance.
5357 env = _BuildInstanceHookEnvByObject(self, self.instance)
5358 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5361 def CheckPrereq(self):
5362 """Check prerequisites.
5364 This checks that the instance is in the cluster and is not running.
5367 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5368 assert instance is not None, \
5369 "Cannot retrieve locked instance %s" % self.op.instance_name
5370 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5371 " offline, cannot reinstall")
5372 for node in instance.secondary_nodes:
5373 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5374 " cannot reinstall")
5376 if instance.disk_template == constants.DT_DISKLESS:
5377 raise errors.OpPrereqError("Instance '%s' has no disks" %
5378 self.op.instance_name,
5380 _CheckInstanceDown(self, instance, "cannot reinstall")
5382 if self.op.os_type is not None:
5384 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5385 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5386 instance_os = self.op.os_type
5388 instance_os = instance.os
5390 nodelist = list(instance.all_nodes)
5392 if self.op.osparams:
5393 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5394 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5395 self.os_inst = i_osdict # the new dict (without defaults)
5399 self.instance = instance
5401 def Exec(self, feedback_fn):
5402 """Reinstall the instance.
5405 inst = self.instance
5407 if self.op.os_type is not None:
5408 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5409 inst.os = self.op.os_type
5410 # Write to configuration
5411 self.cfg.Update(inst, feedback_fn)
5413 _StartInstanceDisks(self, inst, None)
5415 feedback_fn("Running the instance OS create scripts...")
5416 # FIXME: pass debug option from opcode to backend
5417 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5418 self.op.debug_level,
5419 osparams=self.os_inst)
5420 result.Raise("Could not install OS for instance %s on node %s" %
5421 (inst.name, inst.primary_node))
5423 _ShutdownInstanceDisks(self, inst)
5426 class LURecreateInstanceDisks(LogicalUnit):
5427 """Recreate an instance's missing disks.
5430 HPATH = "instance-recreate-disks"
5431 HTYPE = constants.HTYPE_INSTANCE
5434 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5438 def ExpandNames(self):
5439 self._ExpandAndLockInstance()
5441 def BuildHooksEnv(self):
5444 This runs on master, primary and secondary nodes of the instance.
5447 env = _BuildInstanceHookEnvByObject(self, self.instance)
5448 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5451 def CheckPrereq(self):
5452 """Check prerequisites.
5454 This checks that the instance is in the cluster and is not running.
5457 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5458 assert instance is not None, \
5459 "Cannot retrieve locked instance %s" % self.op.instance_name
5460 _CheckNodeOnline(self, instance.primary_node)
5462 if instance.disk_template == constants.DT_DISKLESS:
5463 raise errors.OpPrereqError("Instance '%s' has no disks" %
5464 self.op.instance_name, errors.ECODE_INVAL)
5465 _CheckInstanceDown(self, instance, "cannot recreate disks")
5467 if not self.op.disks:
5468 self.op.disks = range(len(instance.disks))
5470 for idx in self.op.disks:
5471 if idx >= len(instance.disks):
5472 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5475 self.instance = instance
5477 def Exec(self, feedback_fn):
5478 """Recreate the disks.
5482 for idx, _ in enumerate(self.instance.disks):
5483 if idx not in self.op.disks: # disk idx has not been passed in
5487 _CreateDisks(self, self.instance, to_skip=to_skip)
5490 class LURenameInstance(LogicalUnit):
5491 """Rename an instance.
5494 HPATH = "instance-rename"
5495 HTYPE = constants.HTYPE_INSTANCE
5498 ("new_name", ht.NoDefault, ht.TNonEmptyString),
5499 ("ip_check", False, ht.TBool),
5500 ("name_check", True, ht.TBool),
5503 def CheckArguments(self):
5507 if self.op.ip_check and not self.op.name_check:
5508 # TODO: make the ip check more flexible and not depend on the name check
5509 raise errors.OpPrereqError("Cannot do ip check without a name check",
5512 def BuildHooksEnv(self):
5515 This runs on master, primary and secondary nodes of the instance.
5518 env = _BuildInstanceHookEnvByObject(self, self.instance)
5519 env["INSTANCE_NEW_NAME"] = self.op.new_name
5520 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5523 def CheckPrereq(self):
5524 """Check prerequisites.
5526 This checks that the instance is in the cluster and is not running.
5529 self.op.instance_name = _ExpandInstanceName(self.cfg,
5530 self.op.instance_name)
5531 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5532 assert instance is not None
5533 _CheckNodeOnline(self, instance.primary_node)
5534 _CheckInstanceDown(self, instance, "cannot rename")
5535 self.instance = instance
5537 new_name = self.op.new_name
5538 if self.op.name_check:
5539 hostname = netutils.GetHostname(name=new_name)
5540 new_name = self.op.new_name = hostname.name
5541 if (self.op.ip_check and
5542 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5543 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5544 (hostname.ip, new_name),
5545 errors.ECODE_NOTUNIQUE)
5547 instance_list = self.cfg.GetInstanceList()
5548 if new_name in instance_list and new_name != instance.name:
5549 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5550 new_name, errors.ECODE_EXISTS)
5552 def Exec(self, feedback_fn):
5553 """Reinstall the instance.
5556 inst = self.instance
5557 old_name = inst.name
5559 rename_file_storage = False
5560 if (inst.disk_template == constants.DT_FILE and
5561 self.op.new_name != inst.name):
5562 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5563 rename_file_storage = True
5565 self.cfg.RenameInstance(inst.name, self.op.new_name)
5566 # Change the instance lock. This is definitely safe while we hold the BGL
5567 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5568 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5570 # re-read the instance from the configuration after rename
5571 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5573 if rename_file_storage:
5574 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5575 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5576 old_file_storage_dir,
5577 new_file_storage_dir)
5578 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5579 " (but the instance has been renamed in Ganeti)" %
5580 (inst.primary_node, old_file_storage_dir,
5581 new_file_storage_dir))
5583 _StartInstanceDisks(self, inst, None)
5585 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5586 old_name, self.op.debug_level)
5587 msg = result.fail_msg
5589 msg = ("Could not run OS rename script for instance %s on node %s"
5590 " (but the instance has been renamed in Ganeti): %s" %
5591 (inst.name, inst.primary_node, msg))
5592 self.proc.LogWarning(msg)
5594 _ShutdownInstanceDisks(self, inst)
5599 class LURemoveInstance(LogicalUnit):
5600 """Remove an instance.
5603 HPATH = "instance-remove"
5604 HTYPE = constants.HTYPE_INSTANCE
5607 ("ignore_failures", False, ht.TBool),
5612 def ExpandNames(self):
5613 self._ExpandAndLockInstance()
5614 self.needed_locks[locking.LEVEL_NODE] = []
5615 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5617 def DeclareLocks(self, level):
5618 if level == locking.LEVEL_NODE:
5619 self._LockInstancesNodes()
5621 def BuildHooksEnv(self):
5624 This runs on master, primary and secondary nodes of the instance.
5627 env = _BuildInstanceHookEnvByObject(self, self.instance)
5628 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5629 nl = [self.cfg.GetMasterNode()]
5630 nl_post = list(self.instance.all_nodes) + nl
5631 return env, nl, nl_post
5633 def CheckPrereq(self):
5634 """Check prerequisites.
5636 This checks that the instance is in the cluster.
5639 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5640 assert self.instance is not None, \
5641 "Cannot retrieve locked instance %s" % self.op.instance_name
5643 def Exec(self, feedback_fn):
5644 """Remove the instance.
5647 instance = self.instance
5648 logging.info("Shutting down instance %s on node %s",
5649 instance.name, instance.primary_node)
5651 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5652 self.op.shutdown_timeout)
5653 msg = result.fail_msg
5655 if self.op.ignore_failures:
5656 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5658 raise errors.OpExecError("Could not shutdown instance %s on"
5660 (instance.name, instance.primary_node, msg))
5662 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5665 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5666 """Utility function to remove an instance.
5669 logging.info("Removing block devices for instance %s", instance.name)
5671 if not _RemoveDisks(lu, instance):
5672 if not ignore_failures:
5673 raise errors.OpExecError("Can't remove instance's disks")
5674 feedback_fn("Warning: can't remove instance's disks")
5676 logging.info("Removing instance %s out of cluster config", instance.name)
5678 lu.cfg.RemoveInstance(instance.name)
5680 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5681 "Instance lock removal conflict"
5683 # Remove lock for the instance
5684 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5687 class LUQueryInstances(NoHooksLU):
5688 """Logical unit for querying instances.
5691 # pylint: disable-msg=W0142
5694 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5695 ("use_locking", False, ht.TBool),
5699 def CheckArguments(self):
5700 self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5701 self.op.use_locking)
5703 def ExpandNames(self):
5704 self.iq.ExpandNames(self)
5706 def DeclareLocks(self, level):
5707 self.iq.DeclareLocks(self, level)
5709 def Exec(self, feedback_fn):
5710 return self.iq.OldStyleQuery(self)
5713 class LUFailoverInstance(LogicalUnit):
5714 """Failover an instance.
5717 HPATH = "instance-failover"
5718 HTYPE = constants.HTYPE_INSTANCE
5721 ("ignore_consistency", False, ht.TBool),
5726 def ExpandNames(self):
5727 self._ExpandAndLockInstance()
5728 self.needed_locks[locking.LEVEL_NODE] = []
5729 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5731 def DeclareLocks(self, level):
5732 if level == locking.LEVEL_NODE:
5733 self._LockInstancesNodes()
5735 def BuildHooksEnv(self):
5738 This runs on master, primary and secondary nodes of the instance.
5741 instance = self.instance
5742 source_node = instance.primary_node
5743 target_node = instance.secondary_nodes[0]
5745 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5746 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5747 "OLD_PRIMARY": source_node,
5748 "OLD_SECONDARY": target_node,
5749 "NEW_PRIMARY": target_node,
5750 "NEW_SECONDARY": source_node,
5752 env.update(_BuildInstanceHookEnvByObject(self, instance))
5753 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5755 nl_post.append(source_node)
5756 return env, nl, nl_post
5758 def CheckPrereq(self):
5759 """Check prerequisites.
5761 This checks that the instance is in the cluster.
5764 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5765 assert self.instance is not None, \
5766 "Cannot retrieve locked instance %s" % self.op.instance_name
5768 bep = self.cfg.GetClusterInfo().FillBE(instance)
5769 if instance.disk_template not in constants.DTS_NET_MIRROR:
5770 raise errors.OpPrereqError("Instance's disk layout is not"
5771 " network mirrored, cannot failover.",
5774 secondary_nodes = instance.secondary_nodes
5775 if not secondary_nodes:
5776 raise errors.ProgrammerError("no secondary node but using "
5777 "a mirrored disk template")
5779 target_node = secondary_nodes[0]
5780 _CheckNodeOnline(self, target_node)
5781 _CheckNodeNotDrained(self, target_node)
5782 if instance.admin_up:
5783 # check memory requirements on the secondary node
5784 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5785 instance.name, bep[constants.BE_MEMORY],
5786 instance.hypervisor)
5788 self.LogInfo("Not checking memory on the secondary node as"
5789 " instance will not be started")
5791 # check bridge existance
5792 _CheckInstanceBridgesExist(self, instance, node=target_node)
5794 def Exec(self, feedback_fn):
5795 """Failover an instance.
5797 The failover is done by shutting it down on its present node and
5798 starting it on the secondary.
5801 instance = self.instance
5802 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5804 source_node = instance.primary_node
5805 target_node = instance.secondary_nodes[0]
5807 if instance.admin_up:
5808 feedback_fn("* checking disk consistency between source and target")
5809 for dev in instance.disks:
5810 # for drbd, these are drbd over lvm
5811 if not _CheckDiskConsistency(self, dev, target_node, False):
5812 if not self.op.ignore_consistency:
5813 raise errors.OpExecError("Disk %s is degraded on target node,"
5814 " aborting failover." % dev.iv_name)
5816 feedback_fn("* not checking disk consistency as instance is not running")
5818 feedback_fn("* shutting down instance on source node")
5819 logging.info("Shutting down instance %s on node %s",
5820 instance.name, source_node)
5822 result = self.rpc.call_instance_shutdown(source_node, instance,
5823 self.op.shutdown_timeout)
5824 msg = result.fail_msg
5826 if self.op.ignore_consistency or primary_node.offline:
5827 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5828 " Proceeding anyway. Please make sure node"
5829 " %s is down. Error details: %s",
5830 instance.name, source_node, source_node, msg)
5832 raise errors.OpExecError("Could not shutdown instance %s on"
5834 (instance.name, source_node, msg))
5836 feedback_fn("* deactivating the instance's disks on source node")
5837 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5838 raise errors.OpExecError("Can't shut down the instance's disks.")
5840 instance.primary_node = target_node
5841 # distribute new instance config to the other nodes
5842 self.cfg.Update(instance, feedback_fn)
5844 # Only start the instance if it's marked as up
5845 if instance.admin_up:
5846 feedback_fn("* activating the instance's disks on target node")
5847 logging.info("Starting instance %s on node %s",
5848 instance.name, target_node)
5850 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5851 ignore_secondaries=True)
5853 _ShutdownInstanceDisks(self, instance)
5854 raise errors.OpExecError("Can't activate the instance's disks")
5856 feedback_fn("* starting the instance on the target node")
5857 result = self.rpc.call_instance_start(target_node, instance, None, None)
5858 msg = result.fail_msg
5860 _ShutdownInstanceDisks(self, instance)
5861 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5862 (instance.name, target_node, msg))
5865 class LUMigrateInstance(LogicalUnit):
5866 """Migrate an instance.
5868 This is migration without shutting down, compared to the failover,
5869 which is done with shutdown.
5872 HPATH = "instance-migrate"
5873 HTYPE = constants.HTYPE_INSTANCE
5878 ("cleanup", False, ht.TBool),
5883 def ExpandNames(self):
5884 self._ExpandAndLockInstance()
5886 self.needed_locks[locking.LEVEL_NODE] = []
5887 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5889 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5891 self.tasklets = [self._migrater]
5893 def DeclareLocks(self, level):
5894 if level == locking.LEVEL_NODE:
5895 self._LockInstancesNodes()
5897 def BuildHooksEnv(self):
5900 This runs on master, primary and secondary nodes of the instance.
5903 instance = self._migrater.instance
5904 source_node = instance.primary_node
5905 target_node = instance.secondary_nodes[0]
5906 env = _BuildInstanceHookEnvByObject(self, instance)
5907 env["MIGRATE_LIVE"] = self._migrater.live
5908 env["MIGRATE_CLEANUP"] = self.op.cleanup
5910 "OLD_PRIMARY": source_node,
5911 "OLD_SECONDARY": target_node,
5912 "NEW_PRIMARY": target_node,
5913 "NEW_SECONDARY": source_node,
5915 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5917 nl_post.append(source_node)
5918 return env, nl, nl_post
5921 class LUMoveInstance(LogicalUnit):
5922 """Move an instance by data-copying.
5925 HPATH = "instance-move"
5926 HTYPE = constants.HTYPE_INSTANCE
5929 ("target_node", ht.NoDefault, ht.TNonEmptyString),
5934 def ExpandNames(self):
5935 self._ExpandAndLockInstance()
5936 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5937 self.op.target_node = target_node
5938 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5939 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5941 def DeclareLocks(self, level):
5942 if level == locking.LEVEL_NODE:
5943 self._LockInstancesNodes(primary_only=True)
5945 def BuildHooksEnv(self):
5948 This runs on master, primary and secondary nodes of the instance.
5952 "TARGET_NODE": self.op.target_node,
5953 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5955 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5956 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5957 self.op.target_node]
5960 def CheckPrereq(self):
5961 """Check prerequisites.
5963 This checks that the instance is in the cluster.
5966 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5967 assert self.instance is not None, \
5968 "Cannot retrieve locked instance %s" % self.op.instance_name
5970 node = self.cfg.GetNodeInfo(self.op.target_node)
5971 assert node is not None, \
5972 "Cannot retrieve locked node %s" % self.op.target_node
5974 self.target_node = target_node = node.name
5976 if target_node == instance.primary_node:
5977 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5978 (instance.name, target_node),
5981 bep = self.cfg.GetClusterInfo().FillBE(instance)
5983 for idx, dsk in enumerate(instance.disks):
5984 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5985 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5986 " cannot copy" % idx, errors.ECODE_STATE)
5988 _CheckNodeOnline(self, target_node)
5989 _CheckNodeNotDrained(self, target_node)
5990 _CheckNodeVmCapable(self, target_node)
5992 if instance.admin_up:
5993 # check memory requirements on the secondary node
5994 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5995 instance.name, bep[constants.BE_MEMORY],
5996 instance.hypervisor)
5998 self.LogInfo("Not checking memory on the secondary node as"
5999 " instance will not be started")
6001 # check bridge existance
6002 _CheckInstanceBridgesExist(self, instance, node=target_node)
6004 def Exec(self, feedback_fn):
6005 """Move an instance.
6007 The move is done by shutting it down on its present node, copying
6008 the data over (slow) and starting it on the new node.
6011 instance = self.instance
6013 source_node = instance.primary_node
6014 target_node = self.target_node
6016 self.LogInfo("Shutting down instance %s on source node %s",
6017 instance.name, source_node)
6019 result = self.rpc.call_instance_shutdown(source_node, instance,
6020 self.op.shutdown_timeout)
6021 msg = result.fail_msg
6023 if self.op.ignore_consistency:
6024 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6025 " Proceeding anyway. Please make sure node"
6026 " %s is down. Error details: %s",
6027 instance.name, source_node, source_node, msg)
6029 raise errors.OpExecError("Could not shutdown instance %s on"
6031 (instance.name, source_node, msg))
6033 # create the target disks
6035 _CreateDisks(self, instance, target_node=target_node)
6036 except errors.OpExecError:
6037 self.LogWarning("Device creation failed, reverting...")
6039 _RemoveDisks(self, instance, target_node=target_node)
6041 self.cfg.ReleaseDRBDMinors(instance.name)
6044 cluster_name = self.cfg.GetClusterInfo().cluster_name
6047 # activate, get path, copy the data over
6048 for idx, disk in enumerate(instance.disks):
6049 self.LogInfo("Copying data for disk %d", idx)
6050 result = self.rpc.call_blockdev_assemble(target_node, disk,
6051 instance.name, True)
6053 self.LogWarning("Can't assemble newly created disk %d: %s",
6054 idx, result.fail_msg)
6055 errs.append(result.fail_msg)
6057 dev_path = result.payload
6058 result = self.rpc.call_blockdev_export(source_node, disk,
6059 target_node, dev_path,
6062 self.LogWarning("Can't copy data over for disk %d: %s",
6063 idx, result.fail_msg)
6064 errs.append(result.fail_msg)
6068 self.LogWarning("Some disks failed to copy, aborting")
6070 _RemoveDisks(self, instance, target_node=target_node)
6072 self.cfg.ReleaseDRBDMinors(instance.name)
6073 raise errors.OpExecError("Errors during disk copy: %s" %
6076 instance.primary_node = target_node
6077 self.cfg.Update(instance, feedback_fn)
6079 self.LogInfo("Removing the disks on the original node")
6080 _RemoveDisks(self, instance, target_node=source_node)
6082 # Only start the instance if it's marked as up
6083 if instance.admin_up:
6084 self.LogInfo("Starting instance %s on node %s",
6085 instance.name, target_node)
6087 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6088 ignore_secondaries=True)
6090 _ShutdownInstanceDisks(self, instance)
6091 raise errors.OpExecError("Can't activate the instance's disks")
6093 result = self.rpc.call_instance_start(target_node, instance, None, None)
6094 msg = result.fail_msg
6096 _ShutdownInstanceDisks(self, instance)
6097 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6098 (instance.name, target_node, msg))
6101 class LUMigrateNode(LogicalUnit):
6102 """Migrate all instances from a node.
6105 HPATH = "node-migrate"
6106 HTYPE = constants.HTYPE_NODE
6114 def ExpandNames(self):
6115 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6117 self.needed_locks = {
6118 locking.LEVEL_NODE: [self.op.node_name],
6121 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6123 # Create tasklets for migrating instances for all instances on this node
6127 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6128 logging.debug("Migrating instance %s", inst.name)
6129 names.append(inst.name)
6131 tasklets.append(TLMigrateInstance(self, inst.name, False))
6133 self.tasklets = tasklets
6135 # Declare instance locks
6136 self.needed_locks[locking.LEVEL_INSTANCE] = names
6138 def DeclareLocks(self, level):
6139 if level == locking.LEVEL_NODE:
6140 self._LockInstancesNodes()
6142 def BuildHooksEnv(self):
6145 This runs on the master, the primary and all the secondaries.
6149 "NODE_NAME": self.op.node_name,
6152 nl = [self.cfg.GetMasterNode()]
6154 return (env, nl, nl)
6157 class TLMigrateInstance(Tasklet):
6158 """Tasklet class for instance migration.
6161 @ivar live: whether the migration will be done live or non-live;
6162 this variable is initalized only after CheckPrereq has run
6165 def __init__(self, lu, instance_name, cleanup):
6166 """Initializes this class.
6169 Tasklet.__init__(self, lu)
6172 self.instance_name = instance_name
6173 self.cleanup = cleanup
6174 self.live = False # will be overridden later
6176 def CheckPrereq(self):
6177 """Check prerequisites.
6179 This checks that the instance is in the cluster.
6182 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6183 instance = self.cfg.GetInstanceInfo(instance_name)
6184 assert instance is not None
6186 if instance.disk_template != constants.DT_DRBD8:
6187 raise errors.OpPrereqError("Instance's disk layout is not"
6188 " drbd8, cannot migrate.", errors.ECODE_STATE)
6190 secondary_nodes = instance.secondary_nodes
6191 if not secondary_nodes:
6192 raise errors.ConfigurationError("No secondary node but using"
6193 " drbd8 disk template")
6195 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6197 target_node = secondary_nodes[0]
6198 # check memory requirements on the secondary node
6199 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6200 instance.name, i_be[constants.BE_MEMORY],
6201 instance.hypervisor)
6203 # check bridge existance
6204 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6206 if not self.cleanup:
6207 _CheckNodeNotDrained(self.lu, target_node)
6208 result = self.rpc.call_instance_migratable(instance.primary_node,
6210 result.Raise("Can't migrate, please use failover",
6211 prereq=True, ecode=errors.ECODE_STATE)
6213 self.instance = instance
6215 if self.lu.op.live is not None and self.lu.op.mode is not None:
6216 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6217 " parameters are accepted",
6219 if self.lu.op.live is not None:
6221 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6223 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6224 # reset the 'live' parameter to None so that repeated
6225 # invocations of CheckPrereq do not raise an exception
6226 self.lu.op.live = None
6227 elif self.lu.op.mode is None:
6228 # read the default value from the hypervisor
6229 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6230 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6232 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6234 def _WaitUntilSync(self):
6235 """Poll with custom rpc for disk sync.
6237 This uses our own step-based rpc call.
6240 self.feedback_fn("* wait until resync is done")
6244 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6246 self.instance.disks)
6248 for node, nres in result.items():
6249 nres.Raise("Cannot resync disks on node %s" % node)
6250 node_done, node_percent = nres.payload
6251 all_done = all_done and node_done
6252 if node_percent is not None:
6253 min_percent = min(min_percent, node_percent)
6255 if min_percent < 100:
6256 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6259 def _EnsureSecondary(self, node):
6260 """Demote a node to secondary.
6263 self.feedback_fn("* switching node %s to secondary mode" % node)
6265 for dev in self.instance.disks:
6266 self.cfg.SetDiskID(dev, node)
6268 result = self.rpc.call_blockdev_close(node, self.instance.name,
6269 self.instance.disks)
6270 result.Raise("Cannot change disk to secondary on node %s" % node)
6272 def _GoStandalone(self):
6273 """Disconnect from the network.
6276 self.feedback_fn("* changing into standalone mode")
6277 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6278 self.instance.disks)
6279 for node, nres in result.items():
6280 nres.Raise("Cannot disconnect disks node %s" % node)
6282 def _GoReconnect(self, multimaster):
6283 """Reconnect to the network.
6289 msg = "single-master"
6290 self.feedback_fn("* changing disks into %s mode" % msg)
6291 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6292 self.instance.disks,
6293 self.instance.name, multimaster)
6294 for node, nres in result.items():
6295 nres.Raise("Cannot change disks config on node %s" % node)
6297 def _ExecCleanup(self):
6298 """Try to cleanup after a failed migration.
6300 The cleanup is done by:
6301 - check that the instance is running only on one node
6302 (and update the config if needed)
6303 - change disks on its secondary node to secondary
6304 - wait until disks are fully synchronized
6305 - disconnect from the network
6306 - change disks into single-master mode
6307 - wait again until disks are fully synchronized
6310 instance = self.instance
6311 target_node = self.target_node
6312 source_node = self.source_node
6314 # check running on only one node
6315 self.feedback_fn("* checking where the instance actually runs"
6316 " (if this hangs, the hypervisor might be in"
6318 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6319 for node, result in ins_l.items():
6320 result.Raise("Can't contact node %s" % node)
6322 runningon_source = instance.name in ins_l[source_node].payload
6323 runningon_target = instance.name in ins_l[target_node].payload
6325 if runningon_source and runningon_target:
6326 raise errors.OpExecError("Instance seems to be running on two nodes,"
6327 " or the hypervisor is confused. You will have"
6328 " to ensure manually that it runs only on one"
6329 " and restart this operation.")
6331 if not (runningon_source or runningon_target):
6332 raise errors.OpExecError("Instance does not seem to be running at all."
6333 " In this case, it's safer to repair by"
6334 " running 'gnt-instance stop' to ensure disk"
6335 " shutdown, and then restarting it.")
6337 if runningon_target:
6338 # the migration has actually succeeded, we need to update the config
6339 self.feedback_fn("* instance running on secondary node (%s),"
6340 " updating config" % target_node)
6341 instance.primary_node = target_node
6342 self.cfg.Update(instance, self.feedback_fn)
6343 demoted_node = source_node
6345 self.feedback_fn("* instance confirmed to be running on its"
6346 " primary node (%s)" % source_node)
6347 demoted_node = target_node
6349 self._EnsureSecondary(demoted_node)
6351 self._WaitUntilSync()
6352 except errors.OpExecError:
6353 # we ignore here errors, since if the device is standalone, it
6354 # won't be able to sync
6356 self._GoStandalone()
6357 self._GoReconnect(False)
6358 self._WaitUntilSync()
6360 self.feedback_fn("* done")
6362 def _RevertDiskStatus(self):
6363 """Try to revert the disk status after a failed migration.
6366 target_node = self.target_node
6368 self._EnsureSecondary(target_node)
6369 self._GoStandalone()
6370 self._GoReconnect(False)
6371 self._WaitUntilSync()
6372 except errors.OpExecError, err:
6373 self.lu.LogWarning("Migration failed and I can't reconnect the"
6374 " drives: error '%s'\n"
6375 "Please look and recover the instance status" %
6378 def _AbortMigration(self):
6379 """Call the hypervisor code to abort a started migration.
6382 instance = self.instance
6383 target_node = self.target_node
6384 migration_info = self.migration_info
6386 abort_result = self.rpc.call_finalize_migration(target_node,
6390 abort_msg = abort_result.fail_msg
6392 logging.error("Aborting migration failed on target node %s: %s",
6393 target_node, abort_msg)
6394 # Don't raise an exception here, as we stil have to try to revert the
6395 # disk status, even if this step failed.
6397 def _ExecMigration(self):
6398 """Migrate an instance.
6400 The migrate is done by:
6401 - change the disks into dual-master mode
6402 - wait until disks are fully synchronized again
6403 - migrate the instance
6404 - change disks on the new secondary node (the old primary) to secondary
6405 - wait until disks are fully synchronized
6406 - change disks into single-master mode
6409 instance = self.instance
6410 target_node = self.target_node
6411 source_node = self.source_node
6413 self.feedback_fn("* checking disk consistency between source and target")
6414 for dev in instance.disks:
6415 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6416 raise errors.OpExecError("Disk %s is degraded or not fully"
6417 " synchronized on target node,"
6418 " aborting migrate." % dev.iv_name)
6420 # First get the migration information from the remote node
6421 result = self.rpc.call_migration_info(source_node, instance)
6422 msg = result.fail_msg
6424 log_err = ("Failed fetching source migration information from %s: %s" %
6426 logging.error(log_err)
6427 raise errors.OpExecError(log_err)
6429 self.migration_info = migration_info = result.payload
6431 # Then switch the disks to master/master mode
6432 self._EnsureSecondary(target_node)
6433 self._GoStandalone()
6434 self._GoReconnect(True)
6435 self._WaitUntilSync()
6437 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6438 result = self.rpc.call_accept_instance(target_node,
6441 self.nodes_ip[target_node])
6443 msg = result.fail_msg
6445 logging.error("Instance pre-migration failed, trying to revert"
6446 " disk status: %s", msg)
6447 self.feedback_fn("Pre-migration failed, aborting")
6448 self._AbortMigration()
6449 self._RevertDiskStatus()
6450 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6451 (instance.name, msg))
6453 self.feedback_fn("* migrating instance to %s" % target_node)
6455 result = self.rpc.call_instance_migrate(source_node, instance,
6456 self.nodes_ip[target_node],
6458 msg = result.fail_msg
6460 logging.error("Instance migration failed, trying to revert"
6461 " disk status: %s", msg)
6462 self.feedback_fn("Migration failed, aborting")
6463 self._AbortMigration()
6464 self._RevertDiskStatus()
6465 raise errors.OpExecError("Could not migrate instance %s: %s" %
6466 (instance.name, msg))
6469 instance.primary_node = target_node
6470 # distribute new instance config to the other nodes
6471 self.cfg.Update(instance, self.feedback_fn)
6473 result = self.rpc.call_finalize_migration(target_node,
6477 msg = result.fail_msg
6479 logging.error("Instance migration succeeded, but finalization failed:"
6481 raise errors.OpExecError("Could not finalize instance migration: %s" %
6484 self._EnsureSecondary(source_node)
6485 self._WaitUntilSync()
6486 self._GoStandalone()
6487 self._GoReconnect(False)
6488 self._WaitUntilSync()
6490 self.feedback_fn("* done")
6492 def Exec(self, feedback_fn):
6493 """Perform the migration.
6496 feedback_fn("Migrating instance %s" % self.instance.name)
6498 self.feedback_fn = feedback_fn
6500 self.source_node = self.instance.primary_node
6501 self.target_node = self.instance.secondary_nodes[0]
6502 self.all_nodes = [self.source_node, self.target_node]
6504 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6505 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6509 return self._ExecCleanup()
6511 return self._ExecMigration()
6514 def _CreateBlockDev(lu, node, instance, device, force_create,
6516 """Create a tree of block devices on a given node.
6518 If this device type has to be created on secondaries, create it and
6521 If not, just recurse to children keeping the same 'force' value.
6523 @param lu: the lu on whose behalf we execute
6524 @param node: the node on which to create the device
6525 @type instance: L{objects.Instance}
6526 @param instance: the instance which owns the device
6527 @type device: L{objects.Disk}
6528 @param device: the device to create
6529 @type force_create: boolean
6530 @param force_create: whether to force creation of this device; this
6531 will be change to True whenever we find a device which has
6532 CreateOnSecondary() attribute
6533 @param info: the extra 'metadata' we should attach to the device
6534 (this will be represented as a LVM tag)
6535 @type force_open: boolean
6536 @param force_open: this parameter will be passes to the
6537 L{backend.BlockdevCreate} function where it specifies
6538 whether we run on primary or not, and it affects both
6539 the child assembly and the device own Open() execution
6542 if device.CreateOnSecondary():
6546 for child in device.children:
6547 _CreateBlockDev(lu, node, instance, child, force_create,
6550 if not force_create:
6553 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6556 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6557 """Create a single block device on a given node.
6559 This will not recurse over children of the device, so they must be
6562 @param lu: the lu on whose behalf we execute
6563 @param node: the node on which to create the device
6564 @type instance: L{objects.Instance}
6565 @param instance: the instance which owns the device
6566 @type device: L{objects.Disk}
6567 @param device: the device to create
6568 @param info: the extra 'metadata' we should attach to the device
6569 (this will be represented as a LVM tag)
6570 @type force_open: boolean
6571 @param force_open: this parameter will be passes to the
6572 L{backend.BlockdevCreate} function where it specifies
6573 whether we run on primary or not, and it affects both
6574 the child assembly and the device own Open() execution
6577 lu.cfg.SetDiskID(device, node)
6578 result = lu.rpc.call_blockdev_create(node, device, device.size,
6579 instance.name, force_open, info)
6580 result.Raise("Can't create block device %s on"
6581 " node %s for instance %s" % (device, node, instance.name))
6582 if device.physical_id is None:
6583 device.physical_id = result.payload
6586 def _GenerateUniqueNames(lu, exts):
6587 """Generate a suitable LV name.
6589 This will generate a logical volume name for the given instance.
6594 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6595 results.append("%s%s" % (new_id, val))
6599 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6601 """Generate a drbd8 device complete with its children.
6604 port = lu.cfg.AllocatePort()
6605 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6606 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6607 logical_id=(vgname, names[0]))
6608 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6609 logical_id=(vgname, names[1]))
6610 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6611 logical_id=(primary, secondary, port,
6614 children=[dev_data, dev_meta],
6619 def _GenerateDiskTemplate(lu, template_name,
6620 instance_name, primary_node,
6621 secondary_nodes, disk_info,
6622 file_storage_dir, file_driver,
6623 base_index, feedback_fn):
6624 """Generate the entire disk layout for a given template type.
6627 #TODO: compute space requirements
6629 vgname = lu.cfg.GetVGName()
6630 disk_count = len(disk_info)
6632 if template_name == constants.DT_DISKLESS:
6634 elif template_name == constants.DT_PLAIN:
6635 if len(secondary_nodes) != 0:
6636 raise errors.ProgrammerError("Wrong template configuration")
6638 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6639 for i in range(disk_count)])
6640 for idx, disk in enumerate(disk_info):
6641 disk_index = idx + base_index
6642 vg = disk.get("vg", vgname)
6643 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6644 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6645 logical_id=(vg, names[idx]),
6646 iv_name="disk/%d" % disk_index,
6648 disks.append(disk_dev)
6649 elif template_name == constants.DT_DRBD8:
6650 if len(secondary_nodes) != 1:
6651 raise errors.ProgrammerError("Wrong template configuration")
6652 remote_node = secondary_nodes[0]
6653 minors = lu.cfg.AllocateDRBDMinor(
6654 [primary_node, remote_node] * len(disk_info), instance_name)
6657 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6658 for i in range(disk_count)]):
6659 names.append(lv_prefix + "_data")
6660 names.append(lv_prefix + "_meta")
6661 for idx, disk in enumerate(disk_info):
6662 disk_index = idx + base_index
6663 vg = disk.get("vg", vgname)
6664 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6665 disk["size"], vg, names[idx*2:idx*2+2],
6666 "disk/%d" % disk_index,
6667 minors[idx*2], minors[idx*2+1])
6668 disk_dev.mode = disk["mode"]
6669 disks.append(disk_dev)
6670 elif template_name == constants.DT_FILE:
6671 if len(secondary_nodes) != 0:
6672 raise errors.ProgrammerError("Wrong template configuration")
6674 _RequireFileStorage()
6676 for idx, disk in enumerate(disk_info):
6677 disk_index = idx + base_index
6678 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6679 iv_name="disk/%d" % disk_index,
6680 logical_id=(file_driver,
6681 "%s/disk%d" % (file_storage_dir,
6684 disks.append(disk_dev)
6686 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6690 def _GetInstanceInfoText(instance):
6691 """Compute that text that should be added to the disk's metadata.
6694 return "originstname+%s" % instance.name
6697 def _CalcEta(time_taken, written, total_size):
6698 """Calculates the ETA based on size written and total size.
6700 @param time_taken: The time taken so far
6701 @param written: amount written so far
6702 @param total_size: The total size of data to be written
6703 @return: The remaining time in seconds
6706 avg_time = time_taken / float(written)
6707 return (total_size - written) * avg_time
6710 def _WipeDisks(lu, instance):
6711 """Wipes instance disks.
6713 @type lu: L{LogicalUnit}
6714 @param lu: the logical unit on whose behalf we execute
6715 @type instance: L{objects.Instance}
6716 @param instance: the instance whose disks we should create
6717 @return: the success of the wipe
6720 node = instance.primary_node
6721 for idx, device in enumerate(instance.disks):
6722 lu.LogInfo("* Wiping disk %d", idx)
6723 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6725 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6726 # MAX_WIPE_CHUNK at max
6727 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6728 constants.MIN_WIPE_CHUNK_PERCENT)
6733 start_time = time.time()
6735 while offset < size:
6736 wipe_size = min(wipe_chunk_size, size - offset)
6737 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6738 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6739 (idx, offset, wipe_size))
6742 if now - last_output >= 60:
6743 eta = _CalcEta(now - start_time, offset, size)
6744 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6745 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6749 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6750 """Create all disks for an instance.
6752 This abstracts away some work from AddInstance.
6754 @type lu: L{LogicalUnit}
6755 @param lu: the logical unit on whose behalf we execute
6756 @type instance: L{objects.Instance}
6757 @param instance: the instance whose disks we should create
6759 @param to_skip: list of indices to skip
6760 @type target_node: string
6761 @param target_node: if passed, overrides the target node for creation
6763 @return: the success of the creation
6766 info = _GetInstanceInfoText(instance)
6767 if target_node is None:
6768 pnode = instance.primary_node
6769 all_nodes = instance.all_nodes
6774 if instance.disk_template == constants.DT_FILE:
6775 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6776 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6778 result.Raise("Failed to create directory '%s' on"
6779 " node %s" % (file_storage_dir, pnode))
6781 # Note: this needs to be kept in sync with adding of disks in
6782 # LUSetInstanceParams
6783 for idx, device in enumerate(instance.disks):
6784 if to_skip and idx in to_skip:
6786 logging.info("Creating volume %s for instance %s",
6787 device.iv_name, instance.name)
6789 for node in all_nodes:
6790 f_create = node == pnode
6791 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6794 def _RemoveDisks(lu, instance, target_node=None):
6795 """Remove all disks for an instance.
6797 This abstracts away some work from `AddInstance()` and
6798 `RemoveInstance()`. Note that in case some of the devices couldn't
6799 be removed, the removal will continue with the other ones (compare
6800 with `_CreateDisks()`).
6802 @type lu: L{LogicalUnit}
6803 @param lu: the logical unit on whose behalf we execute
6804 @type instance: L{objects.Instance}
6805 @param instance: the instance whose disks we should remove
6806 @type target_node: string
6807 @param target_node: used to override the node on which to remove the disks
6809 @return: the success of the removal
6812 logging.info("Removing block devices for instance %s", instance.name)
6815 for device in instance.disks:
6817 edata = [(target_node, device)]
6819 edata = device.ComputeNodeTree(instance.primary_node)
6820 for node, disk in edata:
6821 lu.cfg.SetDiskID(disk, node)
6822 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6824 lu.LogWarning("Could not remove block device %s on node %s,"
6825 " continuing anyway: %s", device.iv_name, node, msg)
6828 if instance.disk_template == constants.DT_FILE:
6829 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6833 tgt = instance.primary_node
6834 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6836 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6837 file_storage_dir, instance.primary_node, result.fail_msg)
6843 def _ComputeDiskSizePerVG(disk_template, disks):
6844 """Compute disk size requirements in the volume group
6847 def _compute(disks, payload):
6848 """Universal algorithm
6853 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6857 # Required free disk space as a function of disk and swap space
6859 constants.DT_DISKLESS: None,
6860 constants.DT_PLAIN: _compute(disks, 0),
6861 # 128 MB are added for drbd metadata for each disk
6862 constants.DT_DRBD8: _compute(disks, 128),
6863 constants.DT_FILE: None,
6866 if disk_template not in req_size_dict:
6867 raise errors.ProgrammerError("Disk template '%s' size requirement"
6868 " is unknown" % disk_template)
6870 return req_size_dict[disk_template]
6873 def _ComputeDiskSize(disk_template, disks):
6874 """Compute disk size requirements in the volume group
6877 # Required free disk space as a function of disk and swap space
6879 constants.DT_DISKLESS: None,
6880 constants.DT_PLAIN: sum(d["size"] for d in disks),
6881 # 128 MB are added for drbd metadata for each disk
6882 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6883 constants.DT_FILE: None,
6886 if disk_template not in req_size_dict:
6887 raise errors.ProgrammerError("Disk template '%s' size requirement"
6888 " is unknown" % disk_template)
6890 return req_size_dict[disk_template]
6893 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6894 """Hypervisor parameter validation.
6896 This function abstract the hypervisor parameter validation to be
6897 used in both instance create and instance modify.
6899 @type lu: L{LogicalUnit}
6900 @param lu: the logical unit for which we check
6901 @type nodenames: list
6902 @param nodenames: the list of nodes on which we should check
6903 @type hvname: string
6904 @param hvname: the name of the hypervisor we should use
6905 @type hvparams: dict
6906 @param hvparams: the parameters which we need to check
6907 @raise errors.OpPrereqError: if the parameters are not valid
6910 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6913 for node in nodenames:
6917 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6920 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6921 """OS parameters validation.
6923 @type lu: L{LogicalUnit}
6924 @param lu: the logical unit for which we check
6925 @type required: boolean
6926 @param required: whether the validation should fail if the OS is not
6928 @type nodenames: list
6929 @param nodenames: the list of nodes on which we should check
6930 @type osname: string
6931 @param osname: the name of the hypervisor we should use
6932 @type osparams: dict
6933 @param osparams: the parameters which we need to check
6934 @raise errors.OpPrereqError: if the parameters are not valid
6937 result = lu.rpc.call_os_validate(required, nodenames, osname,
6938 [constants.OS_VALIDATE_PARAMETERS],
6940 for node, nres in result.items():
6941 # we don't check for offline cases since this should be run only
6942 # against the master node and/or an instance's nodes
6943 nres.Raise("OS Parameters validation failed on node %s" % node)
6944 if not nres.payload:
6945 lu.LogInfo("OS %s not found on node %s, validation skipped",
6949 class LUCreateInstance(LogicalUnit):
6950 """Create an instance.
6953 HPATH = "instance-add"
6954 HTYPE = constants.HTYPE_INSTANCE
6957 ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6958 ("start", True, ht.TBool),
6959 ("wait_for_sync", True, ht.TBool),
6960 ("ip_check", True, ht.TBool),
6961 ("name_check", True, ht.TBool),
6962 ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6963 ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6964 ("hvparams", ht.EmptyDict, ht.TDict),
6965 ("beparams", ht.EmptyDict, ht.TDict),
6966 ("osparams", ht.EmptyDict, ht.TDict),
6967 ("no_install", None, ht.TMaybeBool),
6968 ("os_type", None, ht.TMaybeString),
6969 ("force_variant", False, ht.TBool),
6970 ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6971 ("source_x509_ca", None, ht.TMaybeString),
6972 ("source_instance_name", None, ht.TMaybeString),
6973 ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
6975 ("src_node", None, ht.TMaybeString),
6976 ("src_path", None, ht.TMaybeString),
6977 ("pnode", None, ht.TMaybeString),
6978 ("snode", None, ht.TMaybeString),
6979 ("iallocator", None, ht.TMaybeString),
6980 ("hypervisor", None, ht.TMaybeString),
6981 ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6982 ("identify_defaults", False, ht.TBool),
6983 ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6984 ("file_storage_dir", None, ht.TMaybeString),
6988 def CheckArguments(self):
6992 # do not require name_check to ease forward/backward compatibility
6994 if self.op.no_install and self.op.start:
6995 self.LogInfo("No-installation mode selected, disabling startup")
6996 self.op.start = False
6997 # validate/normalize the instance name
6998 self.op.instance_name = \
6999 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7001 if self.op.ip_check and not self.op.name_check:
7002 # TODO: make the ip check more flexible and not depend on the name check
7003 raise errors.OpPrereqError("Cannot do ip check without a name check",
7006 # check nics' parameter names
7007 for nic in self.op.nics:
7008 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7010 # check disks. parameter names and consistent adopt/no-adopt strategy
7011 has_adopt = has_no_adopt = False
7012 for disk in self.op.disks:
7013 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7018 if has_adopt and has_no_adopt:
7019 raise errors.OpPrereqError("Either all disks are adopted or none is",
7022 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7023 raise errors.OpPrereqError("Disk adoption is not supported for the"
7024 " '%s' disk template" %
7025 self.op.disk_template,
7027 if self.op.iallocator is not None:
7028 raise errors.OpPrereqError("Disk adoption not allowed with an"
7029 " iallocator script", errors.ECODE_INVAL)
7030 if self.op.mode == constants.INSTANCE_IMPORT:
7031 raise errors.OpPrereqError("Disk adoption not allowed for"
7032 " instance import", errors.ECODE_INVAL)
7034 self.adopt_disks = has_adopt
7036 # instance name verification
7037 if self.op.name_check:
7038 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7039 self.op.instance_name = self.hostname1.name
7040 # used in CheckPrereq for ip ping check
7041 self.check_ip = self.hostname1.ip
7043 self.check_ip = None
7045 # file storage checks
7046 if (self.op.file_driver and
7047 not self.op.file_driver in constants.FILE_DRIVER):
7048 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7049 self.op.file_driver, errors.ECODE_INVAL)
7051 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7052 raise errors.OpPrereqError("File storage directory path not absolute",
7055 ### Node/iallocator related checks
7056 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7058 if self.op.pnode is not None:
7059 if self.op.disk_template in constants.DTS_NET_MIRROR:
7060 if self.op.snode is None:
7061 raise errors.OpPrereqError("The networked disk templates need"
7062 " a mirror node", errors.ECODE_INVAL)
7064 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7066 self.op.snode = None
7068 self._cds = _GetClusterDomainSecret()
7070 if self.op.mode == constants.INSTANCE_IMPORT:
7071 # On import force_variant must be True, because if we forced it at
7072 # initial install, our only chance when importing it back is that it
7074 self.op.force_variant = True
7076 if self.op.no_install:
7077 self.LogInfo("No-installation mode has no effect during import")
7079 elif self.op.mode == constants.INSTANCE_CREATE:
7080 if self.op.os_type is None:
7081 raise errors.OpPrereqError("No guest OS specified",
7083 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7084 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7085 " installation" % self.op.os_type,
7087 if self.op.disk_template is None:
7088 raise errors.OpPrereqError("No disk template specified",
7091 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7092 # Check handshake to ensure both clusters have the same domain secret
7093 src_handshake = self.op.source_handshake
7094 if not src_handshake:
7095 raise errors.OpPrereqError("Missing source handshake",
7098 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7101 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7104 # Load and check source CA
7105 self.source_x509_ca_pem = self.op.source_x509_ca
7106 if not self.source_x509_ca_pem:
7107 raise errors.OpPrereqError("Missing source X509 CA",
7111 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7113 except OpenSSL.crypto.Error, err:
7114 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7115 (err, ), errors.ECODE_INVAL)
7117 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7118 if errcode is not None:
7119 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7122 self.source_x509_ca = cert
7124 src_instance_name = self.op.source_instance_name
7125 if not src_instance_name:
7126 raise errors.OpPrereqError("Missing source instance name",
7129 self.source_instance_name = \
7130 netutils.GetHostname(name=src_instance_name).name
7133 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7134 self.op.mode, errors.ECODE_INVAL)
7136 def ExpandNames(self):
7137 """ExpandNames for CreateInstance.
7139 Figure out the right locks for instance creation.
7142 self.needed_locks = {}
7144 instance_name = self.op.instance_name
7145 # this is just a preventive check, but someone might still add this
7146 # instance in the meantime, and creation will fail at lock-add time
7147 if instance_name in self.cfg.GetInstanceList():
7148 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7149 instance_name, errors.ECODE_EXISTS)
7151 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7153 if self.op.iallocator:
7154 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7156 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7157 nodelist = [self.op.pnode]
7158 if self.op.snode is not None:
7159 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7160 nodelist.append(self.op.snode)
7161 self.needed_locks[locking.LEVEL_NODE] = nodelist
7163 # in case of import lock the source node too
7164 if self.op.mode == constants.INSTANCE_IMPORT:
7165 src_node = self.op.src_node
7166 src_path = self.op.src_path
7168 if src_path is None:
7169 self.op.src_path = src_path = self.op.instance_name
7171 if src_node is None:
7172 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7173 self.op.src_node = None
7174 if os.path.isabs(src_path):
7175 raise errors.OpPrereqError("Importing an instance from an absolute"
7176 " path requires a source node option.",
7179 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7180 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7181 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7182 if not os.path.isabs(src_path):
7183 self.op.src_path = src_path = \
7184 utils.PathJoin(constants.EXPORT_DIR, src_path)
7186 def _RunAllocator(self):
7187 """Run the allocator based on input opcode.
7190 nics = [n.ToDict() for n in self.nics]
7191 ial = IAllocator(self.cfg, self.rpc,
7192 mode=constants.IALLOCATOR_MODE_ALLOC,
7193 name=self.op.instance_name,
7194 disk_template=self.op.disk_template,
7197 vcpus=self.be_full[constants.BE_VCPUS],
7198 mem_size=self.be_full[constants.BE_MEMORY],
7201 hypervisor=self.op.hypervisor,
7204 ial.Run(self.op.iallocator)
7207 raise errors.OpPrereqError("Can't compute nodes using"
7208 " iallocator '%s': %s" %
7209 (self.op.iallocator, ial.info),
7211 if len(ial.result) != ial.required_nodes:
7212 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7213 " of nodes (%s), required %s" %
7214 (self.op.iallocator, len(ial.result),
7215 ial.required_nodes), errors.ECODE_FAULT)
7216 self.op.pnode = ial.result[0]
7217 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7218 self.op.instance_name, self.op.iallocator,
7219 utils.CommaJoin(ial.result))
7220 if ial.required_nodes == 2:
7221 self.op.snode = ial.result[1]
7223 def BuildHooksEnv(self):
7226 This runs on master, primary and secondary nodes of the instance.
7230 "ADD_MODE": self.op.mode,
7232 if self.op.mode == constants.INSTANCE_IMPORT:
7233 env["SRC_NODE"] = self.op.src_node
7234 env["SRC_PATH"] = self.op.src_path
7235 env["SRC_IMAGES"] = self.src_images
7237 env.update(_BuildInstanceHookEnv(
7238 name=self.op.instance_name,
7239 primary_node=self.op.pnode,
7240 secondary_nodes=self.secondaries,
7241 status=self.op.start,
7242 os_type=self.op.os_type,
7243 memory=self.be_full[constants.BE_MEMORY],
7244 vcpus=self.be_full[constants.BE_VCPUS],
7245 nics=_NICListToTuple(self, self.nics),
7246 disk_template=self.op.disk_template,
7247 disks=[(d["size"], d["mode"]) for d in self.disks],
7250 hypervisor_name=self.op.hypervisor,
7253 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7257 def _ReadExportInfo(self):
7258 """Reads the export information from disk.
7260 It will override the opcode source node and path with the actual
7261 information, if these two were not specified before.
7263 @return: the export information
7266 assert self.op.mode == constants.INSTANCE_IMPORT
7268 src_node = self.op.src_node
7269 src_path = self.op.src_path
7271 if src_node is None:
7272 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7273 exp_list = self.rpc.call_export_list(locked_nodes)
7275 for node in exp_list:
7276 if exp_list[node].fail_msg:
7278 if src_path in exp_list[node].payload:
7280 self.op.src_node = src_node = node
7281 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7285 raise errors.OpPrereqError("No export found for relative path %s" %
7286 src_path, errors.ECODE_INVAL)
7288 _CheckNodeOnline(self, src_node)
7289 result = self.rpc.call_export_info(src_node, src_path)
7290 result.Raise("No export or invalid export found in dir %s" % src_path)
7292 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7293 if not export_info.has_section(constants.INISECT_EXP):
7294 raise errors.ProgrammerError("Corrupted export config",
7295 errors.ECODE_ENVIRON)
7297 ei_version = export_info.get(constants.INISECT_EXP, "version")
7298 if (int(ei_version) != constants.EXPORT_VERSION):
7299 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7300 (ei_version, constants.EXPORT_VERSION),
7301 errors.ECODE_ENVIRON)
7304 def _ReadExportParams(self, einfo):
7305 """Use export parameters as defaults.
7307 In case the opcode doesn't specify (as in override) some instance
7308 parameters, then try to use them from the export information, if
7312 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7314 if self.op.disk_template is None:
7315 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7316 self.op.disk_template = einfo.get(constants.INISECT_INS,
7319 raise errors.OpPrereqError("No disk template specified and the export"
7320 " is missing the disk_template information",
7323 if not self.op.disks:
7324 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7326 # TODO: import the disk iv_name too
7327 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7328 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7329 disks.append({"size": disk_sz})
7330 self.op.disks = disks
7332 raise errors.OpPrereqError("No disk info specified and the export"
7333 " is missing the disk information",
7336 if (not self.op.nics and
7337 einfo.has_option(constants.INISECT_INS, "nic_count")):
7339 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7341 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7342 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7347 if (self.op.hypervisor is None and
7348 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7349 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7350 if einfo.has_section(constants.INISECT_HYP):
7351 # use the export parameters but do not override the ones
7352 # specified by the user
7353 for name, value in einfo.items(constants.INISECT_HYP):
7354 if name not in self.op.hvparams:
7355 self.op.hvparams[name] = value
7357 if einfo.has_section(constants.INISECT_BEP):
7358 # use the parameters, without overriding
7359 for name, value in einfo.items(constants.INISECT_BEP):
7360 if name not in self.op.beparams:
7361 self.op.beparams[name] = value
7363 # try to read the parameters old style, from the main section
7364 for name in constants.BES_PARAMETERS:
7365 if (name not in self.op.beparams and
7366 einfo.has_option(constants.INISECT_INS, name)):
7367 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7369 if einfo.has_section(constants.INISECT_OSP):
7370 # use the parameters, without overriding
7371 for name, value in einfo.items(constants.INISECT_OSP):
7372 if name not in self.op.osparams:
7373 self.op.osparams[name] = value
7375 def _RevertToDefaults(self, cluster):
7376 """Revert the instance parameters to the default values.
7380 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7381 for name in self.op.hvparams.keys():
7382 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7383 del self.op.hvparams[name]
7385 be_defs = cluster.SimpleFillBE({})
7386 for name in self.op.beparams.keys():
7387 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7388 del self.op.beparams[name]
7390 nic_defs = cluster.SimpleFillNIC({})
7391 for nic in self.op.nics:
7392 for name in constants.NICS_PARAMETERS:
7393 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7396 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7397 for name in self.op.osparams.keys():
7398 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7399 del self.op.osparams[name]
7401 def CheckPrereq(self):
7402 """Check prerequisites.
7405 if self.op.mode == constants.INSTANCE_IMPORT:
7406 export_info = self._ReadExportInfo()
7407 self._ReadExportParams(export_info)
7409 _CheckDiskTemplate(self.op.disk_template)
7411 if (not self.cfg.GetVGName() and
7412 self.op.disk_template not in constants.DTS_NOT_LVM):
7413 raise errors.OpPrereqError("Cluster does not support lvm-based"
7414 " instances", errors.ECODE_STATE)
7416 if self.op.hypervisor is None:
7417 self.op.hypervisor = self.cfg.GetHypervisorType()
7419 cluster = self.cfg.GetClusterInfo()
7420 enabled_hvs = cluster.enabled_hypervisors
7421 if self.op.hypervisor not in enabled_hvs:
7422 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7423 " cluster (%s)" % (self.op.hypervisor,
7424 ",".join(enabled_hvs)),
7427 # check hypervisor parameter syntax (locally)
7428 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7429 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7431 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7432 hv_type.CheckParameterSyntax(filled_hvp)
7433 self.hv_full = filled_hvp
7434 # check that we don't specify global parameters on an instance
7435 _CheckGlobalHvParams(self.op.hvparams)
7437 # fill and remember the beparams dict
7438 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7439 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7441 # build os parameters
7442 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7444 # now that hvp/bep are in final format, let's reset to defaults,
7446 if self.op.identify_defaults:
7447 self._RevertToDefaults(cluster)
7451 for idx, nic in enumerate(self.op.nics):
7452 nic_mode_req = nic.get("mode", None)
7453 nic_mode = nic_mode_req
7454 if nic_mode is None:
7455 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7457 # in routed mode, for the first nic, the default ip is 'auto'
7458 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7459 default_ip_mode = constants.VALUE_AUTO
7461 default_ip_mode = constants.VALUE_NONE
7463 # ip validity checks
7464 ip = nic.get("ip", default_ip_mode)
7465 if ip is None or ip.lower() == constants.VALUE_NONE:
7467 elif ip.lower() == constants.VALUE_AUTO:
7468 if not self.op.name_check:
7469 raise errors.OpPrereqError("IP address set to auto but name checks"
7470 " have been skipped",
7472 nic_ip = self.hostname1.ip
7474 if not netutils.IPAddress.IsValid(ip):
7475 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7479 # TODO: check the ip address for uniqueness
7480 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7481 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7484 # MAC address verification
7485 mac = nic.get("mac", constants.VALUE_AUTO)
7486 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7487 mac = utils.NormalizeAndValidateMac(mac)
7490 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7491 except errors.ReservationError:
7492 raise errors.OpPrereqError("MAC address %s already in use"
7493 " in cluster" % mac,
7494 errors.ECODE_NOTUNIQUE)
7496 # bridge verification
7497 bridge = nic.get("bridge", None)
7498 link = nic.get("link", None)
7500 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7501 " at the same time", errors.ECODE_INVAL)
7502 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7503 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7510 nicparams[constants.NIC_MODE] = nic_mode_req
7512 nicparams[constants.NIC_LINK] = link
7514 check_params = cluster.SimpleFillNIC(nicparams)
7515 objects.NIC.CheckParameterSyntax(check_params)
7516 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7518 # disk checks/pre-build
7520 for disk in self.op.disks:
7521 mode = disk.get("mode", constants.DISK_RDWR)
7522 if mode not in constants.DISK_ACCESS_SET:
7523 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7524 mode, errors.ECODE_INVAL)
7525 size = disk.get("size", None)
7527 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7530 except (TypeError, ValueError):
7531 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7533 vg = disk.get("vg", self.cfg.GetVGName())
7534 new_disk = {"size": size, "mode": mode, "vg": vg}
7536 new_disk["adopt"] = disk["adopt"]
7537 self.disks.append(new_disk)
7539 if self.op.mode == constants.INSTANCE_IMPORT:
7541 # Check that the new instance doesn't have less disks than the export
7542 instance_disks = len(self.disks)
7543 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7544 if instance_disks < export_disks:
7545 raise errors.OpPrereqError("Not enough disks to import."
7546 " (instance: %d, export: %d)" %
7547 (instance_disks, export_disks),
7551 for idx in range(export_disks):
7552 option = 'disk%d_dump' % idx
7553 if export_info.has_option(constants.INISECT_INS, option):
7554 # FIXME: are the old os-es, disk sizes, etc. useful?
7555 export_name = export_info.get(constants.INISECT_INS, option)
7556 image = utils.PathJoin(self.op.src_path, export_name)
7557 disk_images.append(image)
7559 disk_images.append(False)
7561 self.src_images = disk_images
7563 old_name = export_info.get(constants.INISECT_INS, 'name')
7565 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7566 except (TypeError, ValueError), err:
7567 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7568 " an integer: %s" % str(err),
7570 if self.op.instance_name == old_name:
7571 for idx, nic in enumerate(self.nics):
7572 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7573 nic_mac_ini = 'nic%d_mac' % idx
7574 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7576 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7578 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7579 if self.op.ip_check:
7580 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7581 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7582 (self.check_ip, self.op.instance_name),
7583 errors.ECODE_NOTUNIQUE)
7585 #### mac address generation
7586 # By generating here the mac address both the allocator and the hooks get
7587 # the real final mac address rather than the 'auto' or 'generate' value.
7588 # There is a race condition between the generation and the instance object
7589 # creation, which means that we know the mac is valid now, but we're not
7590 # sure it will be when we actually add the instance. If things go bad
7591 # adding the instance will abort because of a duplicate mac, and the
7592 # creation job will fail.
7593 for nic in self.nics:
7594 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7595 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7599 if self.op.iallocator is not None:
7600 self._RunAllocator()
7602 #### node related checks
7604 # check primary node
7605 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7606 assert self.pnode is not None, \
7607 "Cannot retrieve locked node %s" % self.op.pnode
7609 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7610 pnode.name, errors.ECODE_STATE)
7612 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7613 pnode.name, errors.ECODE_STATE)
7614 if not pnode.vm_capable:
7615 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7616 " '%s'" % pnode.name, errors.ECODE_STATE)
7618 self.secondaries = []
7620 # mirror node verification
7621 if self.op.disk_template in constants.DTS_NET_MIRROR:
7622 if self.op.snode == pnode.name:
7623 raise errors.OpPrereqError("The secondary node cannot be the"
7624 " primary node.", errors.ECODE_INVAL)
7625 _CheckNodeOnline(self, self.op.snode)
7626 _CheckNodeNotDrained(self, self.op.snode)
7627 _CheckNodeVmCapable(self, self.op.snode)
7628 self.secondaries.append(self.op.snode)
7630 nodenames = [pnode.name] + self.secondaries
7632 if not self.adopt_disks:
7633 # Check lv size requirements, if not adopting
7634 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7635 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7637 else: # instead, we must check the adoption data
7638 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7639 if len(all_lvs) != len(self.disks):
7640 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7642 for lv_name in all_lvs:
7644 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7645 # to ReserveLV uses the same syntax
7646 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7647 except errors.ReservationError:
7648 raise errors.OpPrereqError("LV named %s used by another instance" %
7649 lv_name, errors.ECODE_NOTUNIQUE)
7651 vg_names = self.rpc.call_vg_list([pnode.name])
7652 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7654 node_lvs = self.rpc.call_lv_list([pnode.name],
7655 vg_names[pnode.name].payload.keys()
7657 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7658 node_lvs = node_lvs.payload
7660 delta = all_lvs.difference(node_lvs.keys())
7662 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7663 utils.CommaJoin(delta),
7665 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7667 raise errors.OpPrereqError("Online logical volumes found, cannot"
7668 " adopt: %s" % utils.CommaJoin(online_lvs),
7670 # update the size of disk based on what is found
7671 for dsk in self.disks:
7672 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7674 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7676 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7677 # check OS parameters (remotely)
7678 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7680 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7682 # memory check on primary node
7684 _CheckNodeFreeMemory(self, self.pnode.name,
7685 "creating instance %s" % self.op.instance_name,
7686 self.be_full[constants.BE_MEMORY],
7689 self.dry_run_result = list(nodenames)
7691 def Exec(self, feedback_fn):
7692 """Create and add the instance to the cluster.
7695 instance = self.op.instance_name
7696 pnode_name = self.pnode.name
7698 ht_kind = self.op.hypervisor
7699 if ht_kind in constants.HTS_REQ_PORT:
7700 network_port = self.cfg.AllocatePort()
7704 if constants.ENABLE_FILE_STORAGE:
7705 # this is needed because os.path.join does not accept None arguments
7706 if self.op.file_storage_dir is None:
7707 string_file_storage_dir = ""
7709 string_file_storage_dir = self.op.file_storage_dir
7711 # build the full file storage dir path
7712 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7713 string_file_storage_dir, instance)
7715 file_storage_dir = ""
7717 disks = _GenerateDiskTemplate(self,
7718 self.op.disk_template,
7719 instance, pnode_name,
7723 self.op.file_driver,
7727 iobj = objects.Instance(name=instance, os=self.op.os_type,
7728 primary_node=pnode_name,
7729 nics=self.nics, disks=disks,
7730 disk_template=self.op.disk_template,
7732 network_port=network_port,
7733 beparams=self.op.beparams,
7734 hvparams=self.op.hvparams,
7735 hypervisor=self.op.hypervisor,
7736 osparams=self.op.osparams,
7739 if self.adopt_disks:
7740 # rename LVs to the newly-generated names; we need to construct
7741 # 'fake' LV disks with the old data, plus the new unique_id
7742 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7744 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7745 rename_to.append(t_dsk.logical_id)
7746 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7747 self.cfg.SetDiskID(t_dsk, pnode_name)
7748 result = self.rpc.call_blockdev_rename(pnode_name,
7749 zip(tmp_disks, rename_to))
7750 result.Raise("Failed to rename adoped LVs")
7752 feedback_fn("* creating instance disks...")
7754 _CreateDisks(self, iobj)
7755 except errors.OpExecError:
7756 self.LogWarning("Device creation failed, reverting...")
7758 _RemoveDisks(self, iobj)
7760 self.cfg.ReleaseDRBDMinors(instance)
7763 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7764 feedback_fn("* wiping instance disks...")
7766 _WipeDisks(self, iobj)
7767 except errors.OpExecError:
7768 self.LogWarning("Device wiping failed, reverting...")
7770 _RemoveDisks(self, iobj)
7772 self.cfg.ReleaseDRBDMinors(instance)
7775 feedback_fn("adding instance %s to cluster config" % instance)
7777 self.cfg.AddInstance(iobj, self.proc.GetECId())
7779 # Declare that we don't want to remove the instance lock anymore, as we've
7780 # added the instance to the config
7781 del self.remove_locks[locking.LEVEL_INSTANCE]
7782 # Unlock all the nodes
7783 if self.op.mode == constants.INSTANCE_IMPORT:
7784 nodes_keep = [self.op.src_node]
7785 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7786 if node != self.op.src_node]
7787 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7788 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7790 self.context.glm.release(locking.LEVEL_NODE)
7791 del self.acquired_locks[locking.LEVEL_NODE]
7793 if self.op.wait_for_sync:
7794 disk_abort = not _WaitForSync(self, iobj)
7795 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7796 # make sure the disks are not degraded (still sync-ing is ok)
7798 feedback_fn("* checking mirrors status")
7799 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7804 _RemoveDisks(self, iobj)
7805 self.cfg.RemoveInstance(iobj.name)
7806 # Make sure the instance lock gets removed
7807 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7808 raise errors.OpExecError("There are some degraded disks for"
7811 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7812 if self.op.mode == constants.INSTANCE_CREATE:
7813 if not self.op.no_install:
7814 feedback_fn("* running the instance OS create scripts...")
7815 # FIXME: pass debug option from opcode to backend
7816 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7817 self.op.debug_level)
7818 result.Raise("Could not add os for instance %s"
7819 " on node %s" % (instance, pnode_name))
7821 elif self.op.mode == constants.INSTANCE_IMPORT:
7822 feedback_fn("* running the instance OS import scripts...")
7826 for idx, image in enumerate(self.src_images):
7830 # FIXME: pass debug option from opcode to backend
7831 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7832 constants.IEIO_FILE, (image, ),
7833 constants.IEIO_SCRIPT,
7834 (iobj.disks[idx], idx),
7836 transfers.append(dt)
7839 masterd.instance.TransferInstanceData(self, feedback_fn,
7840 self.op.src_node, pnode_name,
7841 self.pnode.secondary_ip,
7843 if not compat.all(import_result):
7844 self.LogWarning("Some disks for instance %s on node %s were not"
7845 " imported successfully" % (instance, pnode_name))
7847 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7848 feedback_fn("* preparing remote import...")
7849 # The source cluster will stop the instance before attempting to make a
7850 # connection. In some cases stopping an instance can take a long time,
7851 # hence the shutdown timeout is added to the connection timeout.
7852 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7853 self.op.source_shutdown_timeout)
7854 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7856 assert iobj.primary_node == self.pnode.name
7858 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7859 self.source_x509_ca,
7860 self._cds, timeouts)
7861 if not compat.all(disk_results):
7862 # TODO: Should the instance still be started, even if some disks
7863 # failed to import (valid for local imports, too)?
7864 self.LogWarning("Some disks for instance %s on node %s were not"
7865 " imported successfully" % (instance, pnode_name))
7867 # Run rename script on newly imported instance
7868 assert iobj.name == instance
7869 feedback_fn("Running rename script for %s" % instance)
7870 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7871 self.source_instance_name,
7872 self.op.debug_level)
7874 self.LogWarning("Failed to run rename script for %s on node"
7875 " %s: %s" % (instance, pnode_name, result.fail_msg))
7878 # also checked in the prereq part
7879 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7883 iobj.admin_up = True
7884 self.cfg.Update(iobj, feedback_fn)
7885 logging.info("Starting instance %s on node %s", instance, pnode_name)
7886 feedback_fn("* starting instance...")
7887 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7888 result.Raise("Could not start instance")
7890 return list(iobj.all_nodes)
7893 class LUConnectConsole(NoHooksLU):
7894 """Connect to an instance's console.
7896 This is somewhat special in that it returns the command line that
7897 you need to run on the master node in order to connect to the
7906 def ExpandNames(self):
7907 self._ExpandAndLockInstance()
7909 def CheckPrereq(self):
7910 """Check prerequisites.
7912 This checks that the instance is in the cluster.
7915 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7916 assert self.instance is not None, \
7917 "Cannot retrieve locked instance %s" % self.op.instance_name
7918 _CheckNodeOnline(self, self.instance.primary_node)
7920 def Exec(self, feedback_fn):
7921 """Connect to the console of an instance
7924 instance = self.instance
7925 node = instance.primary_node
7927 node_insts = self.rpc.call_instance_list([node],
7928 [instance.hypervisor])[node]
7929 node_insts.Raise("Can't get node information from %s" % node)
7931 if instance.name not in node_insts.payload:
7932 if instance.admin_up:
7933 state = "ERROR_down"
7935 state = "ADMIN_down"
7936 raise errors.OpExecError("Instance %s is not running (state %s)" %
7937 (instance.name, state))
7939 logging.debug("Connecting to console of %s on %s", instance.name, node)
7941 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7942 cluster = self.cfg.GetClusterInfo()
7943 # beparams and hvparams are passed separately, to avoid editing the
7944 # instance and then saving the defaults in the instance itself.
7945 hvparams = cluster.FillHV(instance)
7946 beparams = cluster.FillBE(instance)
7947 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7950 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7953 class LUReplaceDisks(LogicalUnit):
7954 """Replace the disks of an instance.
7957 HPATH = "mirrors-replace"
7958 HTYPE = constants.HTYPE_INSTANCE
7961 ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7962 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7963 ("remote_node", None, ht.TMaybeString),
7964 ("iallocator", None, ht.TMaybeString),
7965 ("early_release", False, ht.TBool),
7969 def CheckArguments(self):
7970 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7973 def ExpandNames(self):
7974 self._ExpandAndLockInstance()
7976 if self.op.iallocator is not None:
7977 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7979 elif self.op.remote_node is not None:
7980 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7981 self.op.remote_node = remote_node
7983 # Warning: do not remove the locking of the new secondary here
7984 # unless DRBD8.AddChildren is changed to work in parallel;
7985 # currently it doesn't since parallel invocations of
7986 # FindUnusedMinor will conflict
7987 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7988 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7991 self.needed_locks[locking.LEVEL_NODE] = []
7992 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7994 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7995 self.op.iallocator, self.op.remote_node,
7996 self.op.disks, False, self.op.early_release)
7998 self.tasklets = [self.replacer]
8000 def DeclareLocks(self, level):
8001 # If we're not already locking all nodes in the set we have to declare the
8002 # instance's primary/secondary nodes.
8003 if (level == locking.LEVEL_NODE and
8004 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8005 self._LockInstancesNodes()
8007 def BuildHooksEnv(self):
8010 This runs on the master, the primary and all the secondaries.
8013 instance = self.replacer.instance
8015 "MODE": self.op.mode,
8016 "NEW_SECONDARY": self.op.remote_node,
8017 "OLD_SECONDARY": instance.secondary_nodes[0],
8019 env.update(_BuildInstanceHookEnvByObject(self, instance))
8021 self.cfg.GetMasterNode(),
8022 instance.primary_node,
8024 if self.op.remote_node is not None:
8025 nl.append(self.op.remote_node)
8029 class TLReplaceDisks(Tasklet):
8030 """Replaces disks for an instance.
8032 Note: Locking is not within the scope of this class.
8035 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8036 disks, delay_iallocator, early_release):
8037 """Initializes this class.
8040 Tasklet.__init__(self, lu)
8043 self.instance_name = instance_name
8045 self.iallocator_name = iallocator_name
8046 self.remote_node = remote_node
8048 self.delay_iallocator = delay_iallocator
8049 self.early_release = early_release
8052 self.instance = None
8053 self.new_node = None
8054 self.target_node = None
8055 self.other_node = None
8056 self.remote_node_info = None
8057 self.node_secondary_ip = None
8060 def CheckArguments(mode, remote_node, iallocator):
8061 """Helper function for users of this class.
8064 # check for valid parameter combination
8065 if mode == constants.REPLACE_DISK_CHG:
8066 if remote_node is None and iallocator is None:
8067 raise errors.OpPrereqError("When changing the secondary either an"
8068 " iallocator script must be used or the"
8069 " new node given", errors.ECODE_INVAL)
8071 if remote_node is not None and iallocator is not None:
8072 raise errors.OpPrereqError("Give either the iallocator or the new"
8073 " secondary, not both", errors.ECODE_INVAL)
8075 elif remote_node is not None or iallocator is not None:
8076 # Not replacing the secondary
8077 raise errors.OpPrereqError("The iallocator and new node options can"
8078 " only be used when changing the"
8079 " secondary node", errors.ECODE_INVAL)
8082 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8083 """Compute a new secondary node using an IAllocator.
8086 ial = IAllocator(lu.cfg, lu.rpc,
8087 mode=constants.IALLOCATOR_MODE_RELOC,
8089 relocate_from=relocate_from)
8091 ial.Run(iallocator_name)
8094 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8095 " %s" % (iallocator_name, ial.info),
8098 if len(ial.result) != ial.required_nodes:
8099 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8100 " of nodes (%s), required %s" %
8102 len(ial.result), ial.required_nodes),
8105 remote_node_name = ial.result[0]
8107 lu.LogInfo("Selected new secondary for instance '%s': %s",
8108 instance_name, remote_node_name)
8110 return remote_node_name
8112 def _FindFaultyDisks(self, node_name):
8113 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8116 def CheckPrereq(self):
8117 """Check prerequisites.
8119 This checks that the instance is in the cluster.
8122 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8123 assert instance is not None, \
8124 "Cannot retrieve locked instance %s" % self.instance_name
8126 if instance.disk_template != constants.DT_DRBD8:
8127 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8128 " instances", errors.ECODE_INVAL)
8130 if len(instance.secondary_nodes) != 1:
8131 raise errors.OpPrereqError("The instance has a strange layout,"
8132 " expected one secondary but found %d" %
8133 len(instance.secondary_nodes),
8136 if not self.delay_iallocator:
8137 self._CheckPrereq2()
8139 def _CheckPrereq2(self):
8140 """Check prerequisites, second part.
8142 This function should always be part of CheckPrereq. It was separated and is
8143 now called from Exec because during node evacuation iallocator was only
8144 called with an unmodified cluster model, not taking planned changes into
8148 instance = self.instance
8149 secondary_node = instance.secondary_nodes[0]
8151 if self.iallocator_name is None:
8152 remote_node = self.remote_node
8154 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8155 instance.name, instance.secondary_nodes)
8157 if remote_node is not None:
8158 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8159 assert self.remote_node_info is not None, \
8160 "Cannot retrieve locked node %s" % remote_node
8162 self.remote_node_info = None
8164 if remote_node == self.instance.primary_node:
8165 raise errors.OpPrereqError("The specified node is the primary node of"
8166 " the instance.", errors.ECODE_INVAL)
8168 if remote_node == secondary_node:
8169 raise errors.OpPrereqError("The specified node is already the"
8170 " secondary node of the instance.",
8173 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8174 constants.REPLACE_DISK_CHG):
8175 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8178 if self.mode == constants.REPLACE_DISK_AUTO:
8179 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8180 faulty_secondary = self._FindFaultyDisks(secondary_node)
8182 if faulty_primary and faulty_secondary:
8183 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8184 " one node and can not be repaired"
8185 " automatically" % self.instance_name,
8189 self.disks = faulty_primary
8190 self.target_node = instance.primary_node
8191 self.other_node = secondary_node
8192 check_nodes = [self.target_node, self.other_node]
8193 elif faulty_secondary:
8194 self.disks = faulty_secondary
8195 self.target_node = secondary_node
8196 self.other_node = instance.primary_node
8197 check_nodes = [self.target_node, self.other_node]
8203 # Non-automatic modes
8204 if self.mode == constants.REPLACE_DISK_PRI:
8205 self.target_node = instance.primary_node
8206 self.other_node = secondary_node
8207 check_nodes = [self.target_node, self.other_node]
8209 elif self.mode == constants.REPLACE_DISK_SEC:
8210 self.target_node = secondary_node
8211 self.other_node = instance.primary_node
8212 check_nodes = [self.target_node, self.other_node]
8214 elif self.mode == constants.REPLACE_DISK_CHG:
8215 self.new_node = remote_node
8216 self.other_node = instance.primary_node
8217 self.target_node = secondary_node
8218 check_nodes = [self.new_node, self.other_node]
8220 _CheckNodeNotDrained(self.lu, remote_node)
8221 _CheckNodeVmCapable(self.lu, remote_node)
8223 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8224 assert old_node_info is not None
8225 if old_node_info.offline and not self.early_release:
8226 # doesn't make sense to delay the release
8227 self.early_release = True
8228 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8229 " early-release mode", secondary_node)
8232 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8235 # If not specified all disks should be replaced
8237 self.disks = range(len(self.instance.disks))
8239 for node in check_nodes:
8240 _CheckNodeOnline(self.lu, node)
8242 # Check whether disks are valid
8243 for disk_idx in self.disks:
8244 instance.FindDisk(disk_idx)
8246 # Get secondary node IP addresses
8249 for node_name in [self.target_node, self.other_node, self.new_node]:
8250 if node_name is not None:
8251 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8253 self.node_secondary_ip = node_2nd_ip
8255 def Exec(self, feedback_fn):
8256 """Execute disk replacement.
8258 This dispatches the disk replacement to the appropriate handler.
8261 if self.delay_iallocator:
8262 self._CheckPrereq2()
8265 feedback_fn("No disks need replacement")
8268 feedback_fn("Replacing disk(s) %s for %s" %
8269 (utils.CommaJoin(self.disks), self.instance.name))
8271 activate_disks = (not self.instance.admin_up)
8273 # Activate the instance disks if we're replacing them on a down instance
8275 _StartInstanceDisks(self.lu, self.instance, True)
8278 # Should we replace the secondary node?
8279 if self.new_node is not None:
8280 fn = self._ExecDrbd8Secondary
8282 fn = self._ExecDrbd8DiskOnly
8284 return fn(feedback_fn)
8287 # Deactivate the instance disks if we're replacing them on a
8290 _SafeShutdownInstanceDisks(self.lu, self.instance)
8292 def _CheckVolumeGroup(self, nodes):
8293 self.lu.LogInfo("Checking volume groups")
8295 vgname = self.cfg.GetVGName()
8297 # Make sure volume group exists on all involved nodes
8298 results = self.rpc.call_vg_list(nodes)
8300 raise errors.OpExecError("Can't list volume groups on the nodes")
8304 res.Raise("Error checking node %s" % node)
8305 if vgname not in res.payload:
8306 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8309 def _CheckDisksExistence(self, nodes):
8310 # Check disk existence
8311 for idx, dev in enumerate(self.instance.disks):
8312 if idx not in self.disks:
8316 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8317 self.cfg.SetDiskID(dev, node)
8319 result = self.rpc.call_blockdev_find(node, dev)
8321 msg = result.fail_msg
8322 if msg or not result.payload:
8324 msg = "disk not found"
8325 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8328 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8329 for idx, dev in enumerate(self.instance.disks):
8330 if idx not in self.disks:
8333 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8336 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8338 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8339 " replace disks for instance %s" %
8340 (node_name, self.instance.name))
8342 def _CreateNewStorage(self, node_name):
8343 vgname = self.cfg.GetVGName()
8346 for idx, dev in enumerate(self.instance.disks):
8347 if idx not in self.disks:
8350 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8352 self.cfg.SetDiskID(dev, node_name)
8354 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8355 names = _GenerateUniqueNames(self.lu, lv_names)
8357 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8358 logical_id=(vgname, names[0]))
8359 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8360 logical_id=(vgname, names[1]))
8362 new_lvs = [lv_data, lv_meta]
8363 old_lvs = dev.children
8364 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8366 # we pass force_create=True to force the LVM creation
8367 for new_lv in new_lvs:
8368 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8369 _GetInstanceInfoText(self.instance), False)
8373 def _CheckDevices(self, node_name, iv_names):
8374 for name, (dev, _, _) in iv_names.iteritems():
8375 self.cfg.SetDiskID(dev, node_name)
8377 result = self.rpc.call_blockdev_find(node_name, dev)
8379 msg = result.fail_msg
8380 if msg or not result.payload:
8382 msg = "disk not found"
8383 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8386 if result.payload.is_degraded:
8387 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8389 def _RemoveOldStorage(self, node_name, iv_names):
8390 for name, (_, old_lvs, _) in iv_names.iteritems():
8391 self.lu.LogInfo("Remove logical volumes for %s" % name)
8394 self.cfg.SetDiskID(lv, node_name)
8396 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8398 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8399 hint="remove unused LVs manually")
8401 def _ReleaseNodeLock(self, node_name):
8402 """Releases the lock for a given node."""
8403 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8405 def _ExecDrbd8DiskOnly(self, feedback_fn):
8406 """Replace a disk on the primary or secondary for DRBD 8.
8408 The algorithm for replace is quite complicated:
8410 1. for each disk to be replaced:
8412 1. create new LVs on the target node with unique names
8413 1. detach old LVs from the drbd device
8414 1. rename old LVs to name_replaced.<time_t>
8415 1. rename new LVs to old LVs
8416 1. attach the new LVs (with the old names now) to the drbd device
8418 1. wait for sync across all devices
8420 1. for each modified disk:
8422 1. remove old LVs (which have the name name_replaces.<time_t>)
8424 Failures are not very well handled.
8429 # Step: check device activation
8430 self.lu.LogStep(1, steps_total, "Check device existence")
8431 self._CheckDisksExistence([self.other_node, self.target_node])
8432 self._CheckVolumeGroup([self.target_node, self.other_node])
8434 # Step: check other node consistency
8435 self.lu.LogStep(2, steps_total, "Check peer consistency")
8436 self._CheckDisksConsistency(self.other_node,
8437 self.other_node == self.instance.primary_node,
8440 # Step: create new storage
8441 self.lu.LogStep(3, steps_total, "Allocate new storage")
8442 iv_names = self._CreateNewStorage(self.target_node)
8444 # Step: for each lv, detach+rename*2+attach
8445 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8446 for dev, old_lvs, new_lvs in iv_names.itervalues():
8447 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8449 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8451 result.Raise("Can't detach drbd from local storage on node"
8452 " %s for device %s" % (self.target_node, dev.iv_name))
8454 #cfg.Update(instance)
8456 # ok, we created the new LVs, so now we know we have the needed
8457 # storage; as such, we proceed on the target node to rename
8458 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8459 # using the assumption that logical_id == physical_id (which in
8460 # turn is the unique_id on that node)
8462 # FIXME(iustin): use a better name for the replaced LVs
8463 temp_suffix = int(time.time())
8464 ren_fn = lambda d, suff: (d.physical_id[0],
8465 d.physical_id[1] + "_replaced-%s" % suff)
8467 # Build the rename list based on what LVs exist on the node
8468 rename_old_to_new = []
8469 for to_ren in old_lvs:
8470 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8471 if not result.fail_msg and result.payload:
8473 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8475 self.lu.LogInfo("Renaming the old LVs on the target node")
8476 result = self.rpc.call_blockdev_rename(self.target_node,
8478 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8480 # Now we rename the new LVs to the old LVs
8481 self.lu.LogInfo("Renaming the new LVs on the target node")
8482 rename_new_to_old = [(new, old.physical_id)
8483 for old, new in zip(old_lvs, new_lvs)]
8484 result = self.rpc.call_blockdev_rename(self.target_node,
8486 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8488 for old, new in zip(old_lvs, new_lvs):
8489 new.logical_id = old.logical_id
8490 self.cfg.SetDiskID(new, self.target_node)
8492 for disk in old_lvs:
8493 disk.logical_id = ren_fn(disk, temp_suffix)
8494 self.cfg.SetDiskID(disk, self.target_node)
8496 # Now that the new lvs have the old name, we can add them to the device
8497 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8498 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8500 msg = result.fail_msg
8502 for new_lv in new_lvs:
8503 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8506 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8507 hint=("cleanup manually the unused logical"
8509 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8511 dev.children = new_lvs
8513 self.cfg.Update(self.instance, feedback_fn)
8516 if self.early_release:
8517 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8519 self._RemoveOldStorage(self.target_node, iv_names)
8520 # WARNING: we release both node locks here, do not do other RPCs
8521 # than WaitForSync to the primary node
8522 self._ReleaseNodeLock([self.target_node, self.other_node])
8525 # This can fail as the old devices are degraded and _WaitForSync
8526 # does a combined result over all disks, so we don't check its return value
8527 self.lu.LogStep(cstep, steps_total, "Sync devices")
8529 _WaitForSync(self.lu, self.instance)
8531 # Check all devices manually
8532 self._CheckDevices(self.instance.primary_node, iv_names)
8534 # Step: remove old storage
8535 if not self.early_release:
8536 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8538 self._RemoveOldStorage(self.target_node, iv_names)
8540 def _ExecDrbd8Secondary(self, feedback_fn):
8541 """Replace the secondary node for DRBD 8.
8543 The algorithm for replace is quite complicated:
8544 - for all disks of the instance:
8545 - create new LVs on the new node with same names
8546 - shutdown the drbd device on the old secondary
8547 - disconnect the drbd network on the primary
8548 - create the drbd device on the new secondary
8549 - network attach the drbd on the primary, using an artifice:
8550 the drbd code for Attach() will connect to the network if it
8551 finds a device which is connected to the good local disks but
8553 - wait for sync across all devices
8554 - remove all disks from the old secondary
8556 Failures are not very well handled.
8561 # Step: check device activation
8562 self.lu.LogStep(1, steps_total, "Check device existence")
8563 self._CheckDisksExistence([self.instance.primary_node])
8564 self._CheckVolumeGroup([self.instance.primary_node])
8566 # Step: check other node consistency
8567 self.lu.LogStep(2, steps_total, "Check peer consistency")
8568 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8570 # Step: create new storage
8571 self.lu.LogStep(3, steps_total, "Allocate new storage")
8572 for idx, dev in enumerate(self.instance.disks):
8573 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8574 (self.new_node, idx))
8575 # we pass force_create=True to force LVM creation
8576 for new_lv in dev.children:
8577 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8578 _GetInstanceInfoText(self.instance), False)
8580 # Step 4: dbrd minors and drbd setups changes
8581 # after this, we must manually remove the drbd minors on both the
8582 # error and the success paths
8583 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8584 minors = self.cfg.AllocateDRBDMinor([self.new_node
8585 for dev in self.instance.disks],
8587 logging.debug("Allocated minors %r", minors)
8590 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8591 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8592 (self.new_node, idx))
8593 # create new devices on new_node; note that we create two IDs:
8594 # one without port, so the drbd will be activated without
8595 # networking information on the new node at this stage, and one
8596 # with network, for the latter activation in step 4
8597 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8598 if self.instance.primary_node == o_node1:
8601 assert self.instance.primary_node == o_node2, "Three-node instance?"
8604 new_alone_id = (self.instance.primary_node, self.new_node, None,
8605 p_minor, new_minor, o_secret)
8606 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8607 p_minor, new_minor, o_secret)
8609 iv_names[idx] = (dev, dev.children, new_net_id)
8610 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8612 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8613 logical_id=new_alone_id,
8614 children=dev.children,
8617 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8618 _GetInstanceInfoText(self.instance), False)
8619 except errors.GenericError:
8620 self.cfg.ReleaseDRBDMinors(self.instance.name)
8623 # We have new devices, shutdown the drbd on the old secondary
8624 for idx, dev in enumerate(self.instance.disks):
8625 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8626 self.cfg.SetDiskID(dev, self.target_node)
8627 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8629 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8630 "node: %s" % (idx, msg),
8631 hint=("Please cleanup this device manually as"
8632 " soon as possible"))
8634 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8635 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8636 self.node_secondary_ip,
8637 self.instance.disks)\
8638 [self.instance.primary_node]
8640 msg = result.fail_msg
8642 # detaches didn't succeed (unlikely)
8643 self.cfg.ReleaseDRBDMinors(self.instance.name)
8644 raise errors.OpExecError("Can't detach the disks from the network on"
8645 " old node: %s" % (msg,))
8647 # if we managed to detach at least one, we update all the disks of
8648 # the instance to point to the new secondary
8649 self.lu.LogInfo("Updating instance configuration")
8650 for dev, _, new_logical_id in iv_names.itervalues():
8651 dev.logical_id = new_logical_id
8652 self.cfg.SetDiskID(dev, self.instance.primary_node)
8654 self.cfg.Update(self.instance, feedback_fn)
8656 # and now perform the drbd attach
8657 self.lu.LogInfo("Attaching primary drbds to new secondary"
8658 " (standalone => connected)")
8659 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8661 self.node_secondary_ip,
8662 self.instance.disks,
8665 for to_node, to_result in result.items():
8666 msg = to_result.fail_msg
8668 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8670 hint=("please do a gnt-instance info to see the"
8671 " status of disks"))
8673 if self.early_release:
8674 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8676 self._RemoveOldStorage(self.target_node, iv_names)
8677 # WARNING: we release all node locks here, do not do other RPCs
8678 # than WaitForSync to the primary node
8679 self._ReleaseNodeLock([self.instance.primary_node,
8684 # This can fail as the old devices are degraded and _WaitForSync
8685 # does a combined result over all disks, so we don't check its return value
8686 self.lu.LogStep(cstep, steps_total, "Sync devices")
8688 _WaitForSync(self.lu, self.instance)
8690 # Check all devices manually
8691 self._CheckDevices(self.instance.primary_node, iv_names)
8693 # Step: remove old storage
8694 if not self.early_release:
8695 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8696 self._RemoveOldStorage(self.target_node, iv_names)
8699 class LURepairNodeStorage(NoHooksLU):
8700 """Repairs the volume group on a node.
8705 ("storage_type", ht.NoDefault, _CheckStorageType),
8706 ("name", ht.NoDefault, ht.TNonEmptyString),
8707 ("ignore_consistency", False, ht.TBool),
8711 def CheckArguments(self):
8712 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8714 storage_type = self.op.storage_type
8716 if (constants.SO_FIX_CONSISTENCY not in
8717 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8718 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8719 " repaired" % storage_type,
8722 def ExpandNames(self):
8723 self.needed_locks = {
8724 locking.LEVEL_NODE: [self.op.node_name],
8727 def _CheckFaultyDisks(self, instance, node_name):
8728 """Ensure faulty disks abort the opcode or at least warn."""
8730 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8732 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8733 " node '%s'" % (instance.name, node_name),
8735 except errors.OpPrereqError, err:
8736 if self.op.ignore_consistency:
8737 self.proc.LogWarning(str(err.args[0]))
8741 def CheckPrereq(self):
8742 """Check prerequisites.
8745 # Check whether any instance on this node has faulty disks
8746 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8747 if not inst.admin_up:
8749 check_nodes = set(inst.all_nodes)
8750 check_nodes.discard(self.op.node_name)
8751 for inst_node_name in check_nodes:
8752 self._CheckFaultyDisks(inst, inst_node_name)
8754 def Exec(self, feedback_fn):
8755 feedback_fn("Repairing storage unit '%s' on %s ..." %
8756 (self.op.name, self.op.node_name))
8758 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8759 result = self.rpc.call_storage_execute(self.op.node_name,
8760 self.op.storage_type, st_args,
8762 constants.SO_FIX_CONSISTENCY)
8763 result.Raise("Failed to repair storage unit '%s' on %s" %
8764 (self.op.name, self.op.node_name))
8767 class LUNodeEvacuationStrategy(NoHooksLU):
8768 """Computes the node evacuation strategy.
8772 ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8773 ("remote_node", None, ht.TMaybeString),
8774 ("iallocator", None, ht.TMaybeString),
8778 def CheckArguments(self):
8779 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8781 def ExpandNames(self):
8782 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8783 self.needed_locks = locks = {}
8784 if self.op.remote_node is None:
8785 locks[locking.LEVEL_NODE] = locking.ALL_SET
8787 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8788 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8790 def Exec(self, feedback_fn):
8791 if self.op.remote_node is not None:
8793 for node in self.op.nodes:
8794 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8797 if i.primary_node == self.op.remote_node:
8798 raise errors.OpPrereqError("Node %s is the primary node of"
8799 " instance %s, cannot use it as"
8801 (self.op.remote_node, i.name),
8803 result.append([i.name, self.op.remote_node])
8805 ial = IAllocator(self.cfg, self.rpc,
8806 mode=constants.IALLOCATOR_MODE_MEVAC,
8807 evac_nodes=self.op.nodes)
8808 ial.Run(self.op.iallocator, validate=True)
8810 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8816 class LUGrowDisk(LogicalUnit):
8817 """Grow a disk of an instance.
8821 HTYPE = constants.HTYPE_INSTANCE
8824 ("disk", ht.NoDefault, ht.TInt),
8825 ("amount", ht.NoDefault, ht.TInt),
8826 ("wait_for_sync", True, ht.TBool),
8830 def ExpandNames(self):
8831 self._ExpandAndLockInstance()
8832 self.needed_locks[locking.LEVEL_NODE] = []
8833 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8835 def DeclareLocks(self, level):
8836 if level == locking.LEVEL_NODE:
8837 self._LockInstancesNodes()
8839 def BuildHooksEnv(self):
8842 This runs on the master, the primary and all the secondaries.
8846 "DISK": self.op.disk,
8847 "AMOUNT": self.op.amount,
8849 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8850 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8853 def CheckPrereq(self):
8854 """Check prerequisites.
8856 This checks that the instance is in the cluster.
8859 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8860 assert instance is not None, \
8861 "Cannot retrieve locked instance %s" % self.op.instance_name
8862 nodenames = list(instance.all_nodes)
8863 for node in nodenames:
8864 _CheckNodeOnline(self, node)
8866 self.instance = instance
8868 if instance.disk_template not in constants.DTS_GROWABLE:
8869 raise errors.OpPrereqError("Instance's disk layout does not support"
8870 " growing.", errors.ECODE_INVAL)
8872 self.disk = instance.FindDisk(self.op.disk)
8874 if instance.disk_template != constants.DT_FILE:
8875 # TODO: check the free disk space for file, when that feature
8877 _CheckNodesFreeDiskPerVG(self, nodenames,
8878 {self.disk.physical_id[0]: self.op.amount})
8880 def Exec(self, feedback_fn):
8881 """Execute disk grow.
8884 instance = self.instance
8887 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8889 raise errors.OpExecError("Cannot activate block device to grow")
8891 for node in instance.all_nodes:
8892 self.cfg.SetDiskID(disk, node)
8893 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8894 result.Raise("Grow request failed to node %s" % node)
8896 # TODO: Rewrite code to work properly
8897 # DRBD goes into sync mode for a short amount of time after executing the
8898 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8899 # calling "resize" in sync mode fails. Sleeping for a short amount of
8900 # time is a work-around.
8903 disk.RecordGrow(self.op.amount)
8904 self.cfg.Update(instance, feedback_fn)
8905 if self.op.wait_for_sync:
8906 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8908 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8909 " status.\nPlease check the instance.")
8910 if not instance.admin_up:
8911 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8912 elif not instance.admin_up:
8913 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8914 " not supposed to be running because no wait for"
8915 " sync mode was requested.")
8918 class LUQueryInstanceData(NoHooksLU):
8919 """Query runtime instance data.
8923 ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8924 ("static", False, ht.TBool),
8928 def ExpandNames(self):
8929 self.needed_locks = {}
8930 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8932 if self.op.instances:
8933 self.wanted_names = []
8934 for name in self.op.instances:
8935 full_name = _ExpandInstanceName(self.cfg, name)
8936 self.wanted_names.append(full_name)
8937 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8939 self.wanted_names = None
8940 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8942 self.needed_locks[locking.LEVEL_NODE] = []
8943 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8945 def DeclareLocks(self, level):
8946 if level == locking.LEVEL_NODE:
8947 self._LockInstancesNodes()
8949 def CheckPrereq(self):
8950 """Check prerequisites.
8952 This only checks the optional instance list against the existing names.
8955 if self.wanted_names is None:
8956 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8958 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8959 in self.wanted_names]
8961 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8962 """Returns the status of a block device
8965 if self.op.static or not node:
8968 self.cfg.SetDiskID(dev, node)
8970 result = self.rpc.call_blockdev_find(node, dev)
8974 result.Raise("Can't compute disk status for %s" % instance_name)
8976 status = result.payload
8980 return (status.dev_path, status.major, status.minor,
8981 status.sync_percent, status.estimated_time,
8982 status.is_degraded, status.ldisk_status)
8984 def _ComputeDiskStatus(self, instance, snode, dev):
8985 """Compute block device status.
8988 if dev.dev_type in constants.LDS_DRBD:
8989 # we change the snode then (otherwise we use the one passed in)
8990 if dev.logical_id[0] == instance.primary_node:
8991 snode = dev.logical_id[1]
8993 snode = dev.logical_id[0]
8995 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8997 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9000 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9001 for child in dev.children]
9006 "iv_name": dev.iv_name,
9007 "dev_type": dev.dev_type,
9008 "logical_id": dev.logical_id,
9009 "physical_id": dev.physical_id,
9010 "pstatus": dev_pstatus,
9011 "sstatus": dev_sstatus,
9012 "children": dev_children,
9019 def Exec(self, feedback_fn):
9020 """Gather and return data"""
9023 cluster = self.cfg.GetClusterInfo()
9025 for instance in self.wanted_instances:
9026 if not self.op.static:
9027 remote_info = self.rpc.call_instance_info(instance.primary_node,
9029 instance.hypervisor)
9030 remote_info.Raise("Error checking node %s" % instance.primary_node)
9031 remote_info = remote_info.payload
9032 if remote_info and "state" in remote_info:
9035 remote_state = "down"
9038 if instance.admin_up:
9041 config_state = "down"
9043 disks = [self._ComputeDiskStatus(instance, None, device)
9044 for device in instance.disks]
9047 "name": instance.name,
9048 "config_state": config_state,
9049 "run_state": remote_state,
9050 "pnode": instance.primary_node,
9051 "snodes": instance.secondary_nodes,
9053 # this happens to be the same format used for hooks
9054 "nics": _NICListToTuple(self, instance.nics),
9055 "disk_template": instance.disk_template,
9057 "hypervisor": instance.hypervisor,
9058 "network_port": instance.network_port,
9059 "hv_instance": instance.hvparams,
9060 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9061 "be_instance": instance.beparams,
9062 "be_actual": cluster.FillBE(instance),
9063 "os_instance": instance.osparams,
9064 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9065 "serial_no": instance.serial_no,
9066 "mtime": instance.mtime,
9067 "ctime": instance.ctime,
9068 "uuid": instance.uuid,
9071 result[instance.name] = idict
9076 class LUSetInstanceParams(LogicalUnit):
9077 """Modifies an instances's parameters.
9080 HPATH = "instance-modify"
9081 HTYPE = constants.HTYPE_INSTANCE
9084 ("nics", ht.EmptyList, ht.TList),
9085 ("disks", ht.EmptyList, ht.TList),
9086 ("beparams", ht.EmptyDict, ht.TDict),
9087 ("hvparams", ht.EmptyDict, ht.TDict),
9088 ("disk_template", None, ht.TMaybeString),
9089 ("remote_node", None, ht.TMaybeString),
9090 ("os_name", None, ht.TMaybeString),
9091 ("force_variant", False, ht.TBool),
9092 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9097 def CheckArguments(self):
9098 if not (self.op.nics or self.op.disks or self.op.disk_template or
9099 self.op.hvparams or self.op.beparams or self.op.os_name):
9100 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9102 if self.op.hvparams:
9103 _CheckGlobalHvParams(self.op.hvparams)
9107 for disk_op, disk_dict in self.op.disks:
9108 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9109 if disk_op == constants.DDM_REMOVE:
9112 elif disk_op == constants.DDM_ADD:
9115 if not isinstance(disk_op, int):
9116 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9117 if not isinstance(disk_dict, dict):
9118 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9119 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9121 if disk_op == constants.DDM_ADD:
9122 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9123 if mode not in constants.DISK_ACCESS_SET:
9124 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9126 size = disk_dict.get('size', None)
9128 raise errors.OpPrereqError("Required disk parameter size missing",
9132 except (TypeError, ValueError), err:
9133 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9134 str(err), errors.ECODE_INVAL)
9135 disk_dict['size'] = size
9137 # modification of disk
9138 if 'size' in disk_dict:
9139 raise errors.OpPrereqError("Disk size change not possible, use"
9140 " grow-disk", errors.ECODE_INVAL)
9142 if disk_addremove > 1:
9143 raise errors.OpPrereqError("Only one disk add or remove operation"
9144 " supported at a time", errors.ECODE_INVAL)
9146 if self.op.disks and self.op.disk_template is not None:
9147 raise errors.OpPrereqError("Disk template conversion and other disk"
9148 " changes not supported at the same time",
9151 if self.op.disk_template:
9152 _CheckDiskTemplate(self.op.disk_template)
9153 if (self.op.disk_template in constants.DTS_NET_MIRROR and
9154 self.op.remote_node is None):
9155 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9156 " one requires specifying a secondary node",
9161 for nic_op, nic_dict in self.op.nics:
9162 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9163 if nic_op == constants.DDM_REMOVE:
9166 elif nic_op == constants.DDM_ADD:
9169 if not isinstance(nic_op, int):
9170 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9171 if not isinstance(nic_dict, dict):
9172 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9173 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9175 # nic_dict should be a dict
9176 nic_ip = nic_dict.get('ip', None)
9177 if nic_ip is not None:
9178 if nic_ip.lower() == constants.VALUE_NONE:
9179 nic_dict['ip'] = None
9181 if not netutils.IPAddress.IsValid(nic_ip):
9182 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9185 nic_bridge = nic_dict.get('bridge', None)
9186 nic_link = nic_dict.get('link', None)
9187 if nic_bridge and nic_link:
9188 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9189 " at the same time", errors.ECODE_INVAL)
9190 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9191 nic_dict['bridge'] = None
9192 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9193 nic_dict['link'] = None
9195 if nic_op == constants.DDM_ADD:
9196 nic_mac = nic_dict.get('mac', None)
9198 nic_dict['mac'] = constants.VALUE_AUTO
9200 if 'mac' in nic_dict:
9201 nic_mac = nic_dict['mac']
9202 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9203 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9205 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9206 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9207 " modifying an existing nic",
9210 if nic_addremove > 1:
9211 raise errors.OpPrereqError("Only one NIC add or remove operation"
9212 " supported at a time", errors.ECODE_INVAL)
9214 def ExpandNames(self):
9215 self._ExpandAndLockInstance()
9216 self.needed_locks[locking.LEVEL_NODE] = []
9217 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9219 def DeclareLocks(self, level):
9220 if level == locking.LEVEL_NODE:
9221 self._LockInstancesNodes()
9222 if self.op.disk_template and self.op.remote_node:
9223 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9224 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9226 def BuildHooksEnv(self):
9229 This runs on the master, primary and secondaries.
9233 if constants.BE_MEMORY in self.be_new:
9234 args['memory'] = self.be_new[constants.BE_MEMORY]
9235 if constants.BE_VCPUS in self.be_new:
9236 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9237 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9238 # information at all.
9241 nic_override = dict(self.op.nics)
9242 for idx, nic in enumerate(self.instance.nics):
9243 if idx in nic_override:
9244 this_nic_override = nic_override[idx]
9246 this_nic_override = {}
9247 if 'ip' in this_nic_override:
9248 ip = this_nic_override['ip']
9251 if 'mac' in this_nic_override:
9252 mac = this_nic_override['mac']
9255 if idx in self.nic_pnew:
9256 nicparams = self.nic_pnew[idx]
9258 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9259 mode = nicparams[constants.NIC_MODE]
9260 link = nicparams[constants.NIC_LINK]
9261 args['nics'].append((ip, mac, mode, link))
9262 if constants.DDM_ADD in nic_override:
9263 ip = nic_override[constants.DDM_ADD].get('ip', None)
9264 mac = nic_override[constants.DDM_ADD]['mac']
9265 nicparams = self.nic_pnew[constants.DDM_ADD]
9266 mode = nicparams[constants.NIC_MODE]
9267 link = nicparams[constants.NIC_LINK]
9268 args['nics'].append((ip, mac, mode, link))
9269 elif constants.DDM_REMOVE in nic_override:
9270 del args['nics'][-1]
9272 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9273 if self.op.disk_template:
9274 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9275 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9278 def CheckPrereq(self):
9279 """Check prerequisites.
9281 This only checks the instance list against the existing names.
9284 # checking the new params on the primary/secondary nodes
9286 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9287 cluster = self.cluster = self.cfg.GetClusterInfo()
9288 assert self.instance is not None, \
9289 "Cannot retrieve locked instance %s" % self.op.instance_name
9290 pnode = instance.primary_node
9291 nodelist = list(instance.all_nodes)
9294 if self.op.os_name and not self.op.force:
9295 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9296 self.op.force_variant)
9297 instance_os = self.op.os_name
9299 instance_os = instance.os
9301 if self.op.disk_template:
9302 if instance.disk_template == self.op.disk_template:
9303 raise errors.OpPrereqError("Instance already has disk template %s" %
9304 instance.disk_template, errors.ECODE_INVAL)
9306 if (instance.disk_template,
9307 self.op.disk_template) not in self._DISK_CONVERSIONS:
9308 raise errors.OpPrereqError("Unsupported disk template conversion from"
9309 " %s to %s" % (instance.disk_template,
9310 self.op.disk_template),
9312 _CheckInstanceDown(self, instance, "cannot change disk template")
9313 if self.op.disk_template in constants.DTS_NET_MIRROR:
9314 if self.op.remote_node == pnode:
9315 raise errors.OpPrereqError("Given new secondary node %s is the same"
9316 " as the primary node of the instance" %
9317 self.op.remote_node, errors.ECODE_STATE)
9318 _CheckNodeOnline(self, self.op.remote_node)
9319 _CheckNodeNotDrained(self, self.op.remote_node)
9320 # FIXME: here we assume that the old instance type is DT_PLAIN
9321 assert instance.disk_template == constants.DT_PLAIN
9322 disks = [{"size": d.size, "vg": d.logical_id[0]}
9323 for d in instance.disks]
9324 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9325 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9327 # hvparams processing
9328 if self.op.hvparams:
9329 hv_type = instance.hypervisor
9330 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9331 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9332 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9335 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9336 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9337 self.hv_new = hv_new # the new actual values
9338 self.hv_inst = i_hvdict # the new dict (without defaults)
9340 self.hv_new = self.hv_inst = {}
9342 # beparams processing
9343 if self.op.beparams:
9344 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9346 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9347 be_new = cluster.SimpleFillBE(i_bedict)
9348 self.be_new = be_new # the new actual values
9349 self.be_inst = i_bedict # the new dict (without defaults)
9351 self.be_new = self.be_inst = {}
9353 # osparams processing
9354 if self.op.osparams:
9355 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9356 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9357 self.os_inst = i_osdict # the new dict (without defaults)
9363 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9364 mem_check_list = [pnode]
9365 if be_new[constants.BE_AUTO_BALANCE]:
9366 # either we changed auto_balance to yes or it was from before
9367 mem_check_list.extend(instance.secondary_nodes)
9368 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9369 instance.hypervisor)
9370 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9371 instance.hypervisor)
9372 pninfo = nodeinfo[pnode]
9373 msg = pninfo.fail_msg
9375 # Assume the primary node is unreachable and go ahead
9376 self.warn.append("Can't get info from primary node %s: %s" %
9378 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9379 self.warn.append("Node data from primary node %s doesn't contain"
9380 " free memory information" % pnode)
9381 elif instance_info.fail_msg:
9382 self.warn.append("Can't get instance runtime information: %s" %
9383 instance_info.fail_msg)
9385 if instance_info.payload:
9386 current_mem = int(instance_info.payload['memory'])
9388 # Assume instance not running
9389 # (there is a slight race condition here, but it's not very probable,
9390 # and we have no other way to check)
9392 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9393 pninfo.payload['memory_free'])
9395 raise errors.OpPrereqError("This change will prevent the instance"
9396 " from starting, due to %d MB of memory"
9397 " missing on its primary node" % miss_mem,
9400 if be_new[constants.BE_AUTO_BALANCE]:
9401 for node, nres in nodeinfo.items():
9402 if node not in instance.secondary_nodes:
9406 self.warn.append("Can't get info from secondary node %s: %s" %
9408 elif not isinstance(nres.payload.get('memory_free', None), int):
9409 self.warn.append("Secondary node %s didn't return free"
9410 " memory information" % node)
9411 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9412 self.warn.append("Not enough memory to failover instance to"
9413 " secondary node %s" % node)
9418 for nic_op, nic_dict in self.op.nics:
9419 if nic_op == constants.DDM_REMOVE:
9420 if not instance.nics:
9421 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9424 if nic_op != constants.DDM_ADD:
9426 if not instance.nics:
9427 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9428 " no NICs" % nic_op,
9430 if nic_op < 0 or nic_op >= len(instance.nics):
9431 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9433 (nic_op, len(instance.nics) - 1),
9435 old_nic_params = instance.nics[nic_op].nicparams
9436 old_nic_ip = instance.nics[nic_op].ip
9441 update_params_dict = dict([(key, nic_dict[key])
9442 for key in constants.NICS_PARAMETERS
9443 if key in nic_dict])
9445 if 'bridge' in nic_dict:
9446 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9448 new_nic_params = _GetUpdatedParams(old_nic_params,
9450 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9451 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9452 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9453 self.nic_pinst[nic_op] = new_nic_params
9454 self.nic_pnew[nic_op] = new_filled_nic_params
9455 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9457 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9458 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9459 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9461 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9463 self.warn.append(msg)
9465 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9466 if new_nic_mode == constants.NIC_MODE_ROUTED:
9467 if 'ip' in nic_dict:
9468 nic_ip = nic_dict['ip']
9472 raise errors.OpPrereqError('Cannot set the nic ip to None'
9473 ' on a routed nic', errors.ECODE_INVAL)
9474 if 'mac' in nic_dict:
9475 nic_mac = nic_dict['mac']
9477 raise errors.OpPrereqError('Cannot set the nic mac to None',
9479 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9480 # otherwise generate the mac
9481 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9483 # or validate/reserve the current one
9485 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9486 except errors.ReservationError:
9487 raise errors.OpPrereqError("MAC address %s already in use"
9488 " in cluster" % nic_mac,
9489 errors.ECODE_NOTUNIQUE)
9492 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9493 raise errors.OpPrereqError("Disk operations not supported for"
9494 " diskless instances",
9496 for disk_op, _ in self.op.disks:
9497 if disk_op == constants.DDM_REMOVE:
9498 if len(instance.disks) == 1:
9499 raise errors.OpPrereqError("Cannot remove the last disk of"
9500 " an instance", errors.ECODE_INVAL)
9501 _CheckInstanceDown(self, instance, "cannot remove disks")
9503 if (disk_op == constants.DDM_ADD and
9504 len(instance.nics) >= constants.MAX_DISKS):
9505 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9506 " add more" % constants.MAX_DISKS,
9508 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9510 if disk_op < 0 or disk_op >= len(instance.disks):
9511 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9513 (disk_op, len(instance.disks)),
9518 def _ConvertPlainToDrbd(self, feedback_fn):
9519 """Converts an instance from plain to drbd.
9522 feedback_fn("Converting template to drbd")
9523 instance = self.instance
9524 pnode = instance.primary_node
9525 snode = self.op.remote_node
9527 # create a fake disk info for _GenerateDiskTemplate
9528 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9529 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9530 instance.name, pnode, [snode],
9531 disk_info, None, None, 0, feedback_fn)
9532 info = _GetInstanceInfoText(instance)
9533 feedback_fn("Creating aditional volumes...")
9534 # first, create the missing data and meta devices
9535 for disk in new_disks:
9536 # unfortunately this is... not too nice
9537 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9539 for child in disk.children:
9540 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9541 # at this stage, all new LVs have been created, we can rename the
9543 feedback_fn("Renaming original volumes...")
9544 rename_list = [(o, n.children[0].logical_id)
9545 for (o, n) in zip(instance.disks, new_disks)]
9546 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9547 result.Raise("Failed to rename original LVs")
9549 feedback_fn("Initializing DRBD devices...")
9550 # all child devices are in place, we can now create the DRBD devices
9551 for disk in new_disks:
9552 for node in [pnode, snode]:
9553 f_create = node == pnode
9554 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9556 # at this point, the instance has been modified
9557 instance.disk_template = constants.DT_DRBD8
9558 instance.disks = new_disks
9559 self.cfg.Update(instance, feedback_fn)
9561 # disks are created, waiting for sync
9562 disk_abort = not _WaitForSync(self, instance)
9564 raise errors.OpExecError("There are some degraded disks for"
9565 " this instance, please cleanup manually")
9567 def _ConvertDrbdToPlain(self, feedback_fn):
9568 """Converts an instance from drbd to plain.
9571 instance = self.instance
9572 assert len(instance.secondary_nodes) == 1
9573 pnode = instance.primary_node
9574 snode = instance.secondary_nodes[0]
9575 feedback_fn("Converting template to plain")
9577 old_disks = instance.disks
9578 new_disks = [d.children[0] for d in old_disks]
9580 # copy over size and mode
9581 for parent, child in zip(old_disks, new_disks):
9582 child.size = parent.size
9583 child.mode = parent.mode
9585 # update instance structure
9586 instance.disks = new_disks
9587 instance.disk_template = constants.DT_PLAIN
9588 self.cfg.Update(instance, feedback_fn)
9590 feedback_fn("Removing volumes on the secondary node...")
9591 for disk in old_disks:
9592 self.cfg.SetDiskID(disk, snode)
9593 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9595 self.LogWarning("Could not remove block device %s on node %s,"
9596 " continuing anyway: %s", disk.iv_name, snode, msg)
9598 feedback_fn("Removing unneeded volumes on the primary node...")
9599 for idx, disk in enumerate(old_disks):
9600 meta = disk.children[1]
9601 self.cfg.SetDiskID(meta, pnode)
9602 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9604 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9605 " continuing anyway: %s", idx, pnode, msg)
9607 def Exec(self, feedback_fn):
9608 """Modifies an instance.
9610 All parameters take effect only at the next restart of the instance.
9613 # Process here the warnings from CheckPrereq, as we don't have a
9614 # feedback_fn there.
9615 for warn in self.warn:
9616 feedback_fn("WARNING: %s" % warn)
9619 instance = self.instance
9621 for disk_op, disk_dict in self.op.disks:
9622 if disk_op == constants.DDM_REMOVE:
9623 # remove the last disk
9624 device = instance.disks.pop()
9625 device_idx = len(instance.disks)
9626 for node, disk in device.ComputeNodeTree(instance.primary_node):
9627 self.cfg.SetDiskID(disk, node)
9628 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9630 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9631 " continuing anyway", device_idx, node, msg)
9632 result.append(("disk/%d" % device_idx, "remove"))
9633 elif disk_op == constants.DDM_ADD:
9635 if instance.disk_template == constants.DT_FILE:
9636 file_driver, file_path = instance.disks[0].logical_id
9637 file_path = os.path.dirname(file_path)
9639 file_driver = file_path = None
9640 disk_idx_base = len(instance.disks)
9641 new_disk = _GenerateDiskTemplate(self,
9642 instance.disk_template,
9643 instance.name, instance.primary_node,
9644 instance.secondary_nodes,
9648 disk_idx_base, feedback_fn)[0]
9649 instance.disks.append(new_disk)
9650 info = _GetInstanceInfoText(instance)
9652 logging.info("Creating volume %s for instance %s",
9653 new_disk.iv_name, instance.name)
9654 # Note: this needs to be kept in sync with _CreateDisks
9656 for node in instance.all_nodes:
9657 f_create = node == instance.primary_node
9659 _CreateBlockDev(self, node, instance, new_disk,
9660 f_create, info, f_create)
9661 except errors.OpExecError, err:
9662 self.LogWarning("Failed to create volume %s (%s) on"
9664 new_disk.iv_name, new_disk, node, err)
9665 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9666 (new_disk.size, new_disk.mode)))
9668 # change a given disk
9669 instance.disks[disk_op].mode = disk_dict['mode']
9670 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9672 if self.op.disk_template:
9673 r_shut = _ShutdownInstanceDisks(self, instance)
9675 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9676 " proceed with disk template conversion")
9677 mode = (instance.disk_template, self.op.disk_template)
9679 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9681 self.cfg.ReleaseDRBDMinors(instance.name)
9683 result.append(("disk_template", self.op.disk_template))
9686 for nic_op, nic_dict in self.op.nics:
9687 if nic_op == constants.DDM_REMOVE:
9688 # remove the last nic
9689 del instance.nics[-1]
9690 result.append(("nic.%d" % len(instance.nics), "remove"))
9691 elif nic_op == constants.DDM_ADD:
9692 # mac and bridge should be set, by now
9693 mac = nic_dict['mac']
9694 ip = nic_dict.get('ip', None)
9695 nicparams = self.nic_pinst[constants.DDM_ADD]
9696 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9697 instance.nics.append(new_nic)
9698 result.append(("nic.%d" % (len(instance.nics) - 1),
9699 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9700 (new_nic.mac, new_nic.ip,
9701 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9702 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9705 for key in 'mac', 'ip':
9707 setattr(instance.nics[nic_op], key, nic_dict[key])
9708 if nic_op in self.nic_pinst:
9709 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9710 for key, val in nic_dict.iteritems():
9711 result.append(("nic.%s/%d" % (key, nic_op), val))
9714 if self.op.hvparams:
9715 instance.hvparams = self.hv_inst
9716 for key, val in self.op.hvparams.iteritems():
9717 result.append(("hv/%s" % key, val))
9720 if self.op.beparams:
9721 instance.beparams = self.be_inst
9722 for key, val in self.op.beparams.iteritems():
9723 result.append(("be/%s" % key, val))
9727 instance.os = self.op.os_name
9730 if self.op.osparams:
9731 instance.osparams = self.os_inst
9732 for key, val in self.op.osparams.iteritems():
9733 result.append(("os/%s" % key, val))
9735 self.cfg.Update(instance, feedback_fn)
9739 _DISK_CONVERSIONS = {
9740 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9741 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9745 class LUQueryExports(NoHooksLU):
9746 """Query the exports list
9750 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9751 ("use_locking", False, ht.TBool),
9755 def ExpandNames(self):
9756 self.needed_locks = {}
9757 self.share_locks[locking.LEVEL_NODE] = 1
9758 if not self.op.nodes:
9759 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9761 self.needed_locks[locking.LEVEL_NODE] = \
9762 _GetWantedNodes(self, self.op.nodes)
9764 def Exec(self, feedback_fn):
9765 """Compute the list of all the exported system images.
9768 @return: a dictionary with the structure node->(export-list)
9769 where export-list is a list of the instances exported on
9773 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9774 rpcresult = self.rpc.call_export_list(self.nodes)
9776 for node in rpcresult:
9777 if rpcresult[node].fail_msg:
9778 result[node] = False
9780 result[node] = rpcresult[node].payload
9785 class LUPrepareExport(NoHooksLU):
9786 """Prepares an instance for an export and returns useful information.
9791 ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9795 def ExpandNames(self):
9796 self._ExpandAndLockInstance()
9798 def CheckPrereq(self):
9799 """Check prerequisites.
9802 instance_name = self.op.instance_name
9804 self.instance = self.cfg.GetInstanceInfo(instance_name)
9805 assert self.instance is not None, \
9806 "Cannot retrieve locked instance %s" % self.op.instance_name
9807 _CheckNodeOnline(self, self.instance.primary_node)
9809 self._cds = _GetClusterDomainSecret()
9811 def Exec(self, feedback_fn):
9812 """Prepares an instance for an export.
9815 instance = self.instance
9817 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9818 salt = utils.GenerateSecret(8)
9820 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9821 result = self.rpc.call_x509_cert_create(instance.primary_node,
9822 constants.RIE_CERT_VALIDITY)
9823 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9825 (name, cert_pem) = result.payload
9827 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9831 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9832 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9834 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9840 class LUExportInstance(LogicalUnit):
9841 """Export an instance to an image in the cluster.
9844 HPATH = "instance-export"
9845 HTYPE = constants.HTYPE_INSTANCE
9848 ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9849 ("shutdown", True, ht.TBool),
9851 ("remove_instance", False, ht.TBool),
9852 ("ignore_remove_failures", False, ht.TBool),
9853 ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9854 ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9855 ("destination_x509_ca", None, ht.TMaybeString),
9859 def CheckArguments(self):
9860 """Check the arguments.
9863 self.x509_key_name = self.op.x509_key_name
9864 self.dest_x509_ca_pem = self.op.destination_x509_ca
9866 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9867 if not self.x509_key_name:
9868 raise errors.OpPrereqError("Missing X509 key name for encryption",
9871 if not self.dest_x509_ca_pem:
9872 raise errors.OpPrereqError("Missing destination X509 CA",
9875 def ExpandNames(self):
9876 self._ExpandAndLockInstance()
9878 # Lock all nodes for local exports
9879 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9880 # FIXME: lock only instance primary and destination node
9882 # Sad but true, for now we have do lock all nodes, as we don't know where
9883 # the previous export might be, and in this LU we search for it and
9884 # remove it from its current node. In the future we could fix this by:
9885 # - making a tasklet to search (share-lock all), then create the
9886 # new one, then one to remove, after
9887 # - removing the removal operation altogether
9888 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9890 def DeclareLocks(self, level):
9891 """Last minute lock declaration."""
9892 # All nodes are locked anyway, so nothing to do here.
9894 def BuildHooksEnv(self):
9897 This will run on the master, primary node and target node.
9901 "EXPORT_MODE": self.op.mode,
9902 "EXPORT_NODE": self.op.target_node,
9903 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9904 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9905 # TODO: Generic function for boolean env variables
9906 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9909 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9911 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9913 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9914 nl.append(self.op.target_node)
9918 def CheckPrereq(self):
9919 """Check prerequisites.
9921 This checks that the instance and node names are valid.
9924 instance_name = self.op.instance_name
9926 self.instance = self.cfg.GetInstanceInfo(instance_name)
9927 assert self.instance is not None, \
9928 "Cannot retrieve locked instance %s" % self.op.instance_name
9929 _CheckNodeOnline(self, self.instance.primary_node)
9931 if (self.op.remove_instance and self.instance.admin_up and
9932 not self.op.shutdown):
9933 raise errors.OpPrereqError("Can not remove instance without shutting it"
9936 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9937 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9938 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9939 assert self.dst_node is not None
9941 _CheckNodeOnline(self, self.dst_node.name)
9942 _CheckNodeNotDrained(self, self.dst_node.name)
9945 self.dest_disk_info = None
9946 self.dest_x509_ca = None
9948 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9949 self.dst_node = None
9951 if len(self.op.target_node) != len(self.instance.disks):
9952 raise errors.OpPrereqError(("Received destination information for %s"
9953 " disks, but instance %s has %s disks") %
9954 (len(self.op.target_node), instance_name,
9955 len(self.instance.disks)),
9958 cds = _GetClusterDomainSecret()
9960 # Check X509 key name
9962 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9963 except (TypeError, ValueError), err:
9964 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9966 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9967 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9970 # Load and verify CA
9972 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9973 except OpenSSL.crypto.Error, err:
9974 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9975 (err, ), errors.ECODE_INVAL)
9977 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9978 if errcode is not None:
9979 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9980 (msg, ), errors.ECODE_INVAL)
9982 self.dest_x509_ca = cert
9984 # Verify target information
9986 for idx, disk_data in enumerate(self.op.target_node):
9988 (host, port, magic) = \
9989 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9990 except errors.GenericError, err:
9991 raise errors.OpPrereqError("Target info for disk %s: %s" %
9992 (idx, err), errors.ECODE_INVAL)
9994 disk_info.append((host, port, magic))
9996 assert len(disk_info) == len(self.op.target_node)
9997 self.dest_disk_info = disk_info
10000 raise errors.ProgrammerError("Unhandled export mode %r" %
10003 # instance disk type verification
10004 # TODO: Implement export support for file-based disks
10005 for disk in self.instance.disks:
10006 if disk.dev_type == constants.LD_FILE:
10007 raise errors.OpPrereqError("Export not supported for instances with"
10008 " file-based disks", errors.ECODE_INVAL)
10010 def _CleanupExports(self, feedback_fn):
10011 """Removes exports of current instance from all other nodes.
10013 If an instance in a cluster with nodes A..D was exported to node C, its
10014 exports will be removed from the nodes A, B and D.
10017 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10019 nodelist = self.cfg.GetNodeList()
10020 nodelist.remove(self.dst_node.name)
10022 # on one-node clusters nodelist will be empty after the removal
10023 # if we proceed the backup would be removed because OpQueryExports
10024 # substitutes an empty list with the full cluster node list.
10025 iname = self.instance.name
10027 feedback_fn("Removing old exports for instance %s" % iname)
10028 exportlist = self.rpc.call_export_list(nodelist)
10029 for node in exportlist:
10030 if exportlist[node].fail_msg:
10032 if iname in exportlist[node].payload:
10033 msg = self.rpc.call_export_remove(node, iname).fail_msg
10035 self.LogWarning("Could not remove older export for instance %s"
10036 " on node %s: %s", iname, node, msg)
10038 def Exec(self, feedback_fn):
10039 """Export an instance to an image in the cluster.
10042 assert self.op.mode in constants.EXPORT_MODES
10044 instance = self.instance
10045 src_node = instance.primary_node
10047 if self.op.shutdown:
10048 # shutdown the instance, but not the disks
10049 feedback_fn("Shutting down instance %s" % instance.name)
10050 result = self.rpc.call_instance_shutdown(src_node, instance,
10051 self.op.shutdown_timeout)
10052 # TODO: Maybe ignore failures if ignore_remove_failures is set
10053 result.Raise("Could not shutdown instance %s on"
10054 " node %s" % (instance.name, src_node))
10056 # set the disks ID correctly since call_instance_start needs the
10057 # correct drbd minor to create the symlinks
10058 for disk in instance.disks:
10059 self.cfg.SetDiskID(disk, src_node)
10061 activate_disks = (not instance.admin_up)
10064 # Activate the instance disks if we'exporting a stopped instance
10065 feedback_fn("Activating disks for %s" % instance.name)
10066 _StartInstanceDisks(self, instance, None)
10069 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10072 helper.CreateSnapshots()
10074 if (self.op.shutdown and instance.admin_up and
10075 not self.op.remove_instance):
10076 assert not activate_disks
10077 feedback_fn("Starting instance %s" % instance.name)
10078 result = self.rpc.call_instance_start(src_node, instance, None, None)
10079 msg = result.fail_msg
10081 feedback_fn("Failed to start instance: %s" % msg)
10082 _ShutdownInstanceDisks(self, instance)
10083 raise errors.OpExecError("Could not start instance: %s" % msg)
10085 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10086 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10087 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10088 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10089 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10091 (key_name, _, _) = self.x509_key_name
10094 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10097 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10098 key_name, dest_ca_pem,
10103 # Check for backwards compatibility
10104 assert len(dresults) == len(instance.disks)
10105 assert compat.all(isinstance(i, bool) for i in dresults), \
10106 "Not all results are boolean: %r" % dresults
10110 feedback_fn("Deactivating disks for %s" % instance.name)
10111 _ShutdownInstanceDisks(self, instance)
10113 if not (compat.all(dresults) and fin_resu):
10116 failures.append("export finalization")
10117 if not compat.all(dresults):
10118 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10120 failures.append("disk export: disk(s) %s" % fdsk)
10122 raise errors.OpExecError("Export failed, errors in %s" %
10123 utils.CommaJoin(failures))
10125 # At this point, the export was successful, we can cleanup/finish
10127 # Remove instance if requested
10128 if self.op.remove_instance:
10129 feedback_fn("Removing instance %s" % instance.name)
10130 _RemoveInstance(self, feedback_fn, instance,
10131 self.op.ignore_remove_failures)
10133 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10134 self._CleanupExports(feedback_fn)
10136 return fin_resu, dresults
10139 class LURemoveExport(NoHooksLU):
10140 """Remove exports related to the named instance.
10148 def ExpandNames(self):
10149 self.needed_locks = {}
10150 # We need all nodes to be locked in order for RemoveExport to work, but we
10151 # don't need to lock the instance itself, as nothing will happen to it (and
10152 # we can remove exports also for a removed instance)
10153 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10155 def Exec(self, feedback_fn):
10156 """Remove any export.
10159 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10160 # If the instance was not found we'll try with the name that was passed in.
10161 # This will only work if it was an FQDN, though.
10163 if not instance_name:
10165 instance_name = self.op.instance_name
10167 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10168 exportlist = self.rpc.call_export_list(locked_nodes)
10170 for node in exportlist:
10171 msg = exportlist[node].fail_msg
10173 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10175 if instance_name in exportlist[node].payload:
10177 result = self.rpc.call_export_remove(node, instance_name)
10178 msg = result.fail_msg
10180 logging.error("Could not remove export for instance %s"
10181 " on node %s: %s", instance_name, node, msg)
10183 if fqdn_warn and not found:
10184 feedback_fn("Export not found. If trying to remove an export belonging"
10185 " to a deleted instance please use its Fully Qualified"
10189 class LUAddGroup(LogicalUnit):
10190 """Logical unit for creating node groups.
10193 HPATH = "group-add"
10194 HTYPE = constants.HTYPE_GROUP
10198 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10199 ("alloc_policy", None, ht.TOr(ht.TNone,
10200 ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10205 def ExpandNames(self):
10206 # We need the new group's UUID here so that we can create and acquire the
10207 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10208 # that it should not check whether the UUID exists in the configuration.
10209 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10210 self.needed_locks = {}
10211 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10213 def CheckPrereq(self):
10214 """Check prerequisites.
10216 This checks that the given group name is not an existing node group
10221 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10222 except errors.OpPrereqError:
10225 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10226 " node group (UUID: %s)" %
10227 (self.op.group_name, existing_uuid),
10228 errors.ECODE_EXISTS)
10230 if self.op.ndparams:
10231 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10233 def BuildHooksEnv(self):
10234 """Build hooks env.
10238 "GROUP_NAME": self.op.group_name,
10240 mn = self.cfg.GetMasterNode()
10241 return env, [mn], [mn]
10243 def Exec(self, feedback_fn):
10244 """Add the node group to the cluster.
10247 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10248 uuid=self.group_uuid,
10249 alloc_policy=self.op.alloc_policy,
10250 ndparams=self.op.ndparams)
10252 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10253 del self.remove_locks[locking.LEVEL_NODEGROUP]
10256 class LUQueryGroups(NoHooksLU):
10257 """Logical unit for querying node groups.
10260 # pylint: disable-msg=W0142
10263 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10268 _FIELDS_DYNAMIC = utils.FieldSet()
10270 _SIMPLE_FIELDS = ["name", "uuid", "alloc_policy",
10271 "ctime", "mtime", "serial_no"]
10273 _FIELDS_STATIC = utils.FieldSet(
10274 "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10276 def CheckArguments(self):
10277 _CheckOutputFields(static=self._FIELDS_STATIC,
10278 dynamic=self._FIELDS_DYNAMIC,
10279 selected=self.op.output_fields)
10281 def ExpandNames(self):
10282 self.needed_locks = {}
10284 def Exec(self, feedback_fn):
10285 """Computes the list of groups and their attributes.
10288 all_groups = self.cfg.GetAllNodeGroupsInfo()
10289 name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10291 if not self.op.names:
10292 sorted_names = utils.NiceSort(name_to_uuid.keys())
10293 my_groups = [name_to_uuid[n] for n in sorted_names]
10295 # Accept names to be either names or UUIDs.
10296 all_uuid = frozenset(all_groups.keys())
10300 for name in self.op.names:
10301 if name in all_uuid:
10302 my_groups.append(name)
10303 elif name in name_to_uuid:
10304 my_groups.append(name_to_uuid[name])
10306 missing.append(name)
10309 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10310 errors.ECODE_NOENT)
10312 do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10313 intersection(self.op.output_fields))
10315 do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10316 intersection(self.op.output_fields))
10318 # We need to map group->[nodes], and group->[instances]. The former is
10319 # directly attainable, but the latter we have to do through instance->node,
10320 # hence we need to process nodes even if we only need instance information.
10321 if do_nodes or do_instances:
10322 all_nodes = self.cfg.GetAllNodesInfo()
10323 group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10326 for node in all_nodes.values():
10327 if node.group in group_to_nodes:
10328 group_to_nodes[node.group].append(node.name)
10329 node_to_group[node.name] = node.group
10332 all_instances = self.cfg.GetAllInstancesInfo()
10333 group_to_instances = dict((all_groups[name].uuid, [])
10334 for name in my_groups)
10335 for instance in all_instances.values():
10336 node = instance.primary_node
10337 if node in node_to_group:
10338 group_to_instances[node_to_group[node]].append(instance.name)
10342 for uuid in my_groups:
10343 group = all_groups[uuid]
10346 for field in self.op.output_fields:
10347 if field in self._SIMPLE_FIELDS:
10348 val = getattr(group, field)
10349 elif field == "node_list":
10350 val = utils.NiceSort(group_to_nodes[group.uuid])
10351 elif field == "node_cnt":
10352 val = len(group_to_nodes[group.uuid])
10353 elif field == "pinst_list":
10354 val = utils.NiceSort(group_to_instances[group.uuid])
10355 elif field == "pinst_cnt":
10356 val = len(group_to_instances[group.uuid])
10358 raise errors.ParameterError(field)
10359 group_output.append(val)
10360 output.append(group_output)
10365 class LUSetGroupParams(LogicalUnit):
10366 """Modifies the parameters of a node group.
10369 HPATH = "group-modify"
10370 HTYPE = constants.HTYPE_GROUP
10374 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10375 ("alloc_policy", None, ht.TOr(ht.TNone,
10376 ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10381 def CheckArguments(self):
10384 self.op.alloc_policy,
10387 if all_changes.count(None) == len(all_changes):
10388 raise errors.OpPrereqError("Please pass at least one modification",
10389 errors.ECODE_INVAL)
10391 def ExpandNames(self):
10392 # This raises errors.OpPrereqError on its own:
10393 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10395 self.needed_locks = {
10396 locking.LEVEL_NODEGROUP: [self.group_uuid],
10399 def CheckPrereq(self):
10400 """Check prerequisites.
10403 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10405 if self.group is None:
10406 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10407 (self.op.group_name, self.group_uuid))
10409 if self.op.ndparams:
10410 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10411 self.new_ndparams = self.group.SimpleFillND(self.op.ndparams)
10413 def BuildHooksEnv(self):
10414 """Build hooks env.
10418 "GROUP_NAME": self.op.group_name,
10419 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10421 mn = self.cfg.GetMasterNode()
10422 return env, [mn], [mn]
10424 def Exec(self, feedback_fn):
10425 """Modifies the node group.
10430 if self.op.ndparams:
10431 self.group.ndparams = self.new_ndparams
10432 result.append(("ndparams", str(self.group.ndparams)))
10434 if self.op.alloc_policy:
10435 self.group.alloc_policy = self.op.alloc_policy
10437 self.cfg.Update(self.group, feedback_fn)
10442 class LURemoveGroup(LogicalUnit):
10443 HPATH = "group-remove"
10444 HTYPE = constants.HTYPE_GROUP
10452 def ExpandNames(self):
10453 # This will raises errors.OpPrereqError on its own:
10454 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10455 self.needed_locks = {
10456 locking.LEVEL_NODEGROUP: [self.group_uuid],
10459 def CheckPrereq(self):
10460 """Check prerequisites.
10462 This checks that the given group name exists as a node group, that is
10463 empty (i.e., contains no nodes), and that is not the last group of the
10467 # Verify that the group is empty.
10468 group_nodes = [node.name
10469 for node in self.cfg.GetAllNodesInfo().values()
10470 if node.group == self.group_uuid]
10473 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10475 (self.op.group_name,
10476 utils.CommaJoin(utils.NiceSort(group_nodes))),
10477 errors.ECODE_STATE)
10479 # Verify the cluster would not be left group-less.
10480 if len(self.cfg.GetNodeGroupList()) == 1:
10481 raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10482 " which cannot be left without at least one"
10483 " group" % self.op.group_name,
10484 errors.ECODE_STATE)
10486 def BuildHooksEnv(self):
10487 """Build hooks env.
10491 "GROUP_NAME": self.op.group_name,
10493 mn = self.cfg.GetMasterNode()
10494 return env, [mn], [mn]
10496 def Exec(self, feedback_fn):
10497 """Remove the node group.
10501 self.cfg.RemoveNodeGroup(self.group_uuid)
10502 except errors.ConfigurationError:
10503 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10504 (self.op.group_name, self.group_uuid))
10506 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10509 class LURenameGroup(LogicalUnit):
10510 HPATH = "group-rename"
10511 HTYPE = constants.HTYPE_GROUP
10514 ("old_name", ht.NoDefault, ht.TNonEmptyString),
10515 ("new_name", ht.NoDefault, ht.TNonEmptyString),
10520 def ExpandNames(self):
10521 # This raises errors.OpPrereqError on its own:
10522 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10524 self.needed_locks = {
10525 locking.LEVEL_NODEGROUP: [self.group_uuid],
10528 def CheckPrereq(self):
10529 """Check prerequisites.
10531 This checks that the given old_name exists as a node group, and that
10536 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10537 except errors.OpPrereqError:
10540 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10541 " node group (UUID: %s)" %
10542 (self.op.new_name, new_name_uuid),
10543 errors.ECODE_EXISTS)
10545 def BuildHooksEnv(self):
10546 """Build hooks env.
10550 "OLD_NAME": self.op.old_name,
10551 "NEW_NAME": self.op.new_name,
10554 mn = self.cfg.GetMasterNode()
10555 all_nodes = self.cfg.GetAllNodesInfo()
10557 all_nodes.pop(mn, None)
10559 for node in all_nodes.values():
10560 if node.group == self.group_uuid:
10561 run_nodes.append(node.name)
10563 return env, run_nodes, run_nodes
10565 def Exec(self, feedback_fn):
10566 """Rename the node group.
10569 group = self.cfg.GetNodeGroup(self.group_uuid)
10572 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10573 (self.op.old_name, self.group_uuid))
10575 group.name = self.op.new_name
10576 self.cfg.Update(group, feedback_fn)
10578 return self.op.new_name
10581 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10582 """Generic tags LU.
10584 This is an abstract class which is the parent of all the other tags LUs.
10588 def ExpandNames(self):
10589 self.needed_locks = {}
10590 if self.op.kind == constants.TAG_NODE:
10591 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10592 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10593 elif self.op.kind == constants.TAG_INSTANCE:
10594 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10595 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10597 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10598 # not possible to acquire the BGL based on opcode parameters)
10600 def CheckPrereq(self):
10601 """Check prerequisites.
10604 if self.op.kind == constants.TAG_CLUSTER:
10605 self.target = self.cfg.GetClusterInfo()
10606 elif self.op.kind == constants.TAG_NODE:
10607 self.target = self.cfg.GetNodeInfo(self.op.name)
10608 elif self.op.kind == constants.TAG_INSTANCE:
10609 self.target = self.cfg.GetInstanceInfo(self.op.name)
10611 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10612 str(self.op.kind), errors.ECODE_INVAL)
10615 class LUGetTags(TagsLU):
10616 """Returns the tags of a given object.
10620 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10621 # Name is only meaningful for nodes and instances
10622 ("name", ht.NoDefault, ht.TMaybeString),
10626 def ExpandNames(self):
10627 TagsLU.ExpandNames(self)
10629 # Share locks as this is only a read operation
10630 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10632 def Exec(self, feedback_fn):
10633 """Returns the tag list.
10636 return list(self.target.GetTags())
10639 class LUSearchTags(NoHooksLU):
10640 """Searches the tags for a given pattern.
10644 ("pattern", ht.NoDefault, ht.TNonEmptyString),
10648 def ExpandNames(self):
10649 self.needed_locks = {}
10651 def CheckPrereq(self):
10652 """Check prerequisites.
10654 This checks the pattern passed for validity by compiling it.
10658 self.re = re.compile(self.op.pattern)
10659 except re.error, err:
10660 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10661 (self.op.pattern, err), errors.ECODE_INVAL)
10663 def Exec(self, feedback_fn):
10664 """Returns the tag list.
10668 tgts = [("/cluster", cfg.GetClusterInfo())]
10669 ilist = cfg.GetAllInstancesInfo().values()
10670 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10671 nlist = cfg.GetAllNodesInfo().values()
10672 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10674 for path, target in tgts:
10675 for tag in target.GetTags():
10676 if self.re.search(tag):
10677 results.append((path, tag))
10681 class LUAddTags(TagsLU):
10682 """Sets a tag on a given object.
10686 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10687 # Name is only meaningful for nodes and instances
10688 ("name", ht.NoDefault, ht.TMaybeString),
10689 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10693 def CheckPrereq(self):
10694 """Check prerequisites.
10696 This checks the type and length of the tag name and value.
10699 TagsLU.CheckPrereq(self)
10700 for tag in self.op.tags:
10701 objects.TaggableObject.ValidateTag(tag)
10703 def Exec(self, feedback_fn):
10708 for tag in self.op.tags:
10709 self.target.AddTag(tag)
10710 except errors.TagError, err:
10711 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10712 self.cfg.Update(self.target, feedback_fn)
10715 class LUDelTags(TagsLU):
10716 """Delete a list of tags from a given object.
10720 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10721 # Name is only meaningful for nodes and instances
10722 ("name", ht.NoDefault, ht.TMaybeString),
10723 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10727 def CheckPrereq(self):
10728 """Check prerequisites.
10730 This checks that we have the given tag.
10733 TagsLU.CheckPrereq(self)
10734 for tag in self.op.tags:
10735 objects.TaggableObject.ValidateTag(tag)
10736 del_tags = frozenset(self.op.tags)
10737 cur_tags = self.target.GetTags()
10739 diff_tags = del_tags - cur_tags
10741 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10742 raise errors.OpPrereqError("Tag(s) %s not found" %
10743 (utils.CommaJoin(diff_names), ),
10744 errors.ECODE_NOENT)
10746 def Exec(self, feedback_fn):
10747 """Remove the tag from the object.
10750 for tag in self.op.tags:
10751 self.target.RemoveTag(tag)
10752 self.cfg.Update(self.target, feedback_fn)
10755 class LUTestDelay(NoHooksLU):
10756 """Sleep for a specified amount of time.
10758 This LU sleeps on the master and/or nodes for a specified amount of
10763 ("duration", ht.NoDefault, ht.TFloat),
10764 ("on_master", True, ht.TBool),
10765 ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10766 ("repeat", 0, ht.TPositiveInt)
10770 def ExpandNames(self):
10771 """Expand names and set required locks.
10773 This expands the node list, if any.
10776 self.needed_locks = {}
10777 if self.op.on_nodes:
10778 # _GetWantedNodes can be used here, but is not always appropriate to use
10779 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10780 # more information.
10781 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10782 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10784 def _TestDelay(self):
10785 """Do the actual sleep.
10788 if self.op.on_master:
10789 if not utils.TestDelay(self.op.duration):
10790 raise errors.OpExecError("Error during master delay test")
10791 if self.op.on_nodes:
10792 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10793 for node, node_result in result.items():
10794 node_result.Raise("Failure during rpc call to node %s" % node)
10796 def Exec(self, feedback_fn):
10797 """Execute the test delay opcode, with the wanted repetitions.
10800 if self.op.repeat == 0:
10803 top_value = self.op.repeat - 1
10804 for i in range(self.op.repeat):
10805 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10809 class LUTestJobqueue(NoHooksLU):
10810 """Utility LU to test some aspects of the job queue.
10814 ("notify_waitlock", False, ht.TBool),
10815 ("notify_exec", False, ht.TBool),
10816 ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10817 ("fail", False, ht.TBool),
10821 # Must be lower than default timeout for WaitForJobChange to see whether it
10822 # notices changed jobs
10823 _CLIENT_CONNECT_TIMEOUT = 20.0
10824 _CLIENT_CONFIRM_TIMEOUT = 60.0
10827 def _NotifyUsingSocket(cls, cb, errcls):
10828 """Opens a Unix socket and waits for another program to connect.
10831 @param cb: Callback to send socket name to client
10832 @type errcls: class
10833 @param errcls: Exception class to use for errors
10836 # Using a temporary directory as there's no easy way to create temporary
10837 # sockets without writing a custom loop around tempfile.mktemp and
10839 tmpdir = tempfile.mkdtemp()
10841 tmpsock = utils.PathJoin(tmpdir, "sock")
10843 logging.debug("Creating temporary socket at %s", tmpsock)
10844 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10849 # Send details to client
10852 # Wait for client to connect before continuing
10853 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10855 (conn, _) = sock.accept()
10856 except socket.error, err:
10857 raise errcls("Client didn't connect in time (%s)" % err)
10861 # Remove as soon as client is connected
10862 shutil.rmtree(tmpdir)
10864 # Wait for client to close
10867 # pylint: disable-msg=E1101
10868 # Instance of '_socketobject' has no ... member
10869 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10871 except socket.error, err:
10872 raise errcls("Client failed to confirm notification (%s)" % err)
10876 def _SendNotification(self, test, arg, sockname):
10877 """Sends a notification to the client.
10880 @param test: Test name
10881 @param arg: Test argument (depends on test)
10882 @type sockname: string
10883 @param sockname: Socket path
10886 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10888 def _Notify(self, prereq, test, arg):
10889 """Notifies the client of a test.
10892 @param prereq: Whether this is a prereq-phase test
10894 @param test: Test name
10895 @param arg: Test argument (depends on test)
10899 errcls = errors.OpPrereqError
10901 errcls = errors.OpExecError
10903 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10907 def CheckArguments(self):
10908 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10909 self.expandnames_calls = 0
10911 def ExpandNames(self):
10912 checkargs_calls = getattr(self, "checkargs_calls", 0)
10913 if checkargs_calls < 1:
10914 raise errors.ProgrammerError("CheckArguments was not called")
10916 self.expandnames_calls += 1
10918 if self.op.notify_waitlock:
10919 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10921 self.LogInfo("Expanding names")
10923 # Get lock on master node (just to get a lock, not for a particular reason)
10924 self.needed_locks = {
10925 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10928 def Exec(self, feedback_fn):
10929 if self.expandnames_calls < 1:
10930 raise errors.ProgrammerError("ExpandNames was not called")
10932 if self.op.notify_exec:
10933 self._Notify(False, constants.JQT_EXEC, None)
10935 self.LogInfo("Executing")
10937 if self.op.log_messages:
10938 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10939 for idx, msg in enumerate(self.op.log_messages):
10940 self.LogInfo("Sending log message %s", idx + 1)
10941 feedback_fn(constants.JQT_MSGPREFIX + msg)
10942 # Report how many test messages have been sent
10943 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10946 raise errors.OpExecError("Opcode failure was requested")
10951 class IAllocator(object):
10952 """IAllocator framework.
10954 An IAllocator instance has three sets of attributes:
10955 - cfg that is needed to query the cluster
10956 - input data (all members of the _KEYS class attribute are required)
10957 - four buffer attributes (in|out_data|text), that represent the
10958 input (to the external script) in text and data structure format,
10959 and the output from it, again in two formats
10960 - the result variables from the script (success, info, nodes) for
10964 # pylint: disable-msg=R0902
10965 # lots of instance attributes
10967 "name", "mem_size", "disks", "disk_template",
10968 "os", "tags", "nics", "vcpus", "hypervisor",
10971 "name", "relocate_from",
10977 def __init__(self, cfg, rpc, mode, **kwargs):
10980 # init buffer variables
10981 self.in_text = self.out_text = self.in_data = self.out_data = None
10982 # init all input fields so that pylint is happy
10984 self.mem_size = self.disks = self.disk_template = None
10985 self.os = self.tags = self.nics = self.vcpus = None
10986 self.hypervisor = None
10987 self.relocate_from = None
10989 self.evac_nodes = None
10991 self.required_nodes = None
10992 # init result fields
10993 self.success = self.info = self.result = None
10994 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10995 keyset = self._ALLO_KEYS
10996 fn = self._AddNewInstance
10997 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10998 keyset = self._RELO_KEYS
10999 fn = self._AddRelocateInstance
11000 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11001 keyset = self._EVAC_KEYS
11002 fn = self._AddEvacuateNodes
11004 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11005 " IAllocator" % self.mode)
11007 if key not in keyset:
11008 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11009 " IAllocator" % key)
11010 setattr(self, key, kwargs[key])
11013 if key not in kwargs:
11014 raise errors.ProgrammerError("Missing input parameter '%s' to"
11015 " IAllocator" % key)
11016 self._BuildInputData(fn)
11018 def _ComputeClusterData(self):
11019 """Compute the generic allocator input data.
11021 This is the data that is independent of the actual operation.
11025 cluster_info = cfg.GetClusterInfo()
11028 "version": constants.IALLOCATOR_VERSION,
11029 "cluster_name": cfg.GetClusterName(),
11030 "cluster_tags": list(cluster_info.GetTags()),
11031 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11032 # we don't have job IDs
11034 iinfo = cfg.GetAllInstancesInfo().values()
11035 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11038 node_list = cfg.GetNodeList()
11040 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11041 hypervisor_name = self.hypervisor
11042 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11043 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11044 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11045 hypervisor_name = cluster_info.enabled_hypervisors[0]
11047 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11050 self.rpc.call_all_instances_info(node_list,
11051 cluster_info.enabled_hypervisors)
11053 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11055 data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
11057 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11059 self.in_data = data
11062 def _ComputeNodeGroupData(cfg):
11063 """Compute node groups data.
11067 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11069 "name": gdata.name,
11070 "alloc_policy": gdata.alloc_policy,
11075 def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
11076 """Compute global node data.
11080 for nname, nresult in node_data.items():
11081 # first fill in static (config-based) values
11082 ninfo = cfg.GetNodeInfo(nname)
11084 "tags": list(ninfo.GetTags()),
11085 "primary_ip": ninfo.primary_ip,
11086 "secondary_ip": ninfo.secondary_ip,
11087 "offline": ninfo.offline,
11088 "drained": ninfo.drained,
11089 "master_candidate": ninfo.master_candidate,
11090 "group": ninfo.group,
11091 "master_capable": ninfo.master_capable,
11092 "vm_capable": ninfo.vm_capable,
11095 if not (ninfo.offline or ninfo.drained):
11096 nresult.Raise("Can't get data for node %s" % nname)
11097 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11099 remote_info = nresult.payload
11101 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11102 'vg_size', 'vg_free', 'cpu_total']:
11103 if attr not in remote_info:
11104 raise errors.OpExecError("Node '%s' didn't return attribute"
11105 " '%s'" % (nname, attr))
11106 if not isinstance(remote_info[attr], int):
11107 raise errors.OpExecError("Node '%s' returned invalid value"
11109 (nname, attr, remote_info[attr]))
11110 # compute memory used by primary instances
11111 i_p_mem = i_p_up_mem = 0
11112 for iinfo, beinfo in i_list:
11113 if iinfo.primary_node == nname:
11114 i_p_mem += beinfo[constants.BE_MEMORY]
11115 if iinfo.name not in node_iinfo[nname].payload:
11118 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11119 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11120 remote_info['memory_free'] -= max(0, i_mem_diff)
11123 i_p_up_mem += beinfo[constants.BE_MEMORY]
11125 # compute memory used by instances
11127 "total_memory": remote_info['memory_total'],
11128 "reserved_memory": remote_info['memory_dom0'],
11129 "free_memory": remote_info['memory_free'],
11130 "total_disk": remote_info['vg_size'],
11131 "free_disk": remote_info['vg_free'],
11132 "total_cpus": remote_info['cpu_total'],
11133 "i_pri_memory": i_p_mem,
11134 "i_pri_up_memory": i_p_up_mem,
11136 pnr.update(pnr_dyn)
11138 node_results[nname] = pnr
11140 return node_results
11143 def _ComputeInstanceData(cluster_info, i_list):
11144 """Compute global instance data.
11148 for iinfo, beinfo in i_list:
11150 for nic in iinfo.nics:
11151 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11152 nic_dict = {"mac": nic.mac,
11154 "mode": filled_params[constants.NIC_MODE],
11155 "link": filled_params[constants.NIC_LINK],
11157 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11158 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11159 nic_data.append(nic_dict)
11161 "tags": list(iinfo.GetTags()),
11162 "admin_up": iinfo.admin_up,
11163 "vcpus": beinfo[constants.BE_VCPUS],
11164 "memory": beinfo[constants.BE_MEMORY],
11166 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11168 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11169 "disk_template": iinfo.disk_template,
11170 "hypervisor": iinfo.hypervisor,
11172 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11174 instance_data[iinfo.name] = pir
11176 return instance_data
11178 def _AddNewInstance(self):
11179 """Add new instance data to allocator structure.
11181 This in combination with _AllocatorGetClusterData will create the
11182 correct structure needed as input for the allocator.
11184 The checks for the completeness of the opcode must have already been
11188 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11190 if self.disk_template in constants.DTS_NET_MIRROR:
11191 self.required_nodes = 2
11193 self.required_nodes = 1
11196 "disk_template": self.disk_template,
11199 "vcpus": self.vcpus,
11200 "memory": self.mem_size,
11201 "disks": self.disks,
11202 "disk_space_total": disk_space,
11204 "required_nodes": self.required_nodes,
11208 def _AddRelocateInstance(self):
11209 """Add relocate instance data to allocator structure.
11211 This in combination with _IAllocatorGetClusterData will create the
11212 correct structure needed as input for the allocator.
11214 The checks for the completeness of the opcode must have already been
11218 instance = self.cfg.GetInstanceInfo(self.name)
11219 if instance is None:
11220 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11221 " IAllocator" % self.name)
11223 if instance.disk_template not in constants.DTS_NET_MIRROR:
11224 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11225 errors.ECODE_INVAL)
11227 if len(instance.secondary_nodes) != 1:
11228 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11229 errors.ECODE_STATE)
11231 self.required_nodes = 1
11232 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11233 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11237 "disk_space_total": disk_space,
11238 "required_nodes": self.required_nodes,
11239 "relocate_from": self.relocate_from,
11243 def _AddEvacuateNodes(self):
11244 """Add evacuate nodes data to allocator structure.
11248 "evac_nodes": self.evac_nodes
11252 def _BuildInputData(self, fn):
11253 """Build input data structures.
11256 self._ComputeClusterData()
11259 request["type"] = self.mode
11260 self.in_data["request"] = request
11262 self.in_text = serializer.Dump(self.in_data)
11264 def Run(self, name, validate=True, call_fn=None):
11265 """Run an instance allocator and return the results.
11268 if call_fn is None:
11269 call_fn = self.rpc.call_iallocator_runner
11271 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11272 result.Raise("Failure while running the iallocator script")
11274 self.out_text = result.payload
11276 self._ValidateResult()
11278 def _ValidateResult(self):
11279 """Process the allocator results.
11281 This will process and if successful save the result in
11282 self.out_data and the other parameters.
11286 rdict = serializer.Load(self.out_text)
11287 except Exception, err:
11288 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11290 if not isinstance(rdict, dict):
11291 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11293 # TODO: remove backwards compatiblity in later versions
11294 if "nodes" in rdict and "result" not in rdict:
11295 rdict["result"] = rdict["nodes"]
11298 for key in "success", "info", "result":
11299 if key not in rdict:
11300 raise errors.OpExecError("Can't parse iallocator results:"
11301 " missing key '%s'" % key)
11302 setattr(self, key, rdict[key])
11304 if not isinstance(rdict["result"], list):
11305 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11307 self.out_data = rdict
11310 class LUTestAllocator(NoHooksLU):
11311 """Run allocator tests.
11313 This LU runs the allocator tests
11317 ("direction", ht.NoDefault,
11318 ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11319 ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11320 ("name", ht.NoDefault, ht.TNonEmptyString),
11321 ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11322 ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11323 ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11324 ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11325 ("hypervisor", None, ht.TMaybeString),
11326 ("allocator", None, ht.TMaybeString),
11327 ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11328 ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11329 ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11330 ("os", None, ht.TMaybeString),
11331 ("disk_template", None, ht.TMaybeString),
11332 ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11335 def CheckPrereq(self):
11336 """Check prerequisites.
11338 This checks the opcode parameters depending on the director and mode test.
11341 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11342 for attr in ["mem_size", "disks", "disk_template",
11343 "os", "tags", "nics", "vcpus"]:
11344 if not hasattr(self.op, attr):
11345 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11346 attr, errors.ECODE_INVAL)
11347 iname = self.cfg.ExpandInstanceName(self.op.name)
11348 if iname is not None:
11349 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11350 iname, errors.ECODE_EXISTS)
11351 if not isinstance(self.op.nics, list):
11352 raise errors.OpPrereqError("Invalid parameter 'nics'",
11353 errors.ECODE_INVAL)
11354 if not isinstance(self.op.disks, list):
11355 raise errors.OpPrereqError("Invalid parameter 'disks'",
11356 errors.ECODE_INVAL)
11357 for row in self.op.disks:
11358 if (not isinstance(row, dict) or
11359 "size" not in row or
11360 not isinstance(row["size"], int) or
11361 "mode" not in row or
11362 row["mode"] not in ['r', 'w']):
11363 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11364 " parameter", errors.ECODE_INVAL)
11365 if self.op.hypervisor is None:
11366 self.op.hypervisor = self.cfg.GetHypervisorType()
11367 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11368 fname = _ExpandInstanceName(self.cfg, self.op.name)
11369 self.op.name = fname
11370 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11371 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11372 if not hasattr(self.op, "evac_nodes"):
11373 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11374 " opcode input", errors.ECODE_INVAL)
11376 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11377 self.op.mode, errors.ECODE_INVAL)
11379 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11380 if self.op.allocator is None:
11381 raise errors.OpPrereqError("Missing allocator name",
11382 errors.ECODE_INVAL)
11383 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11384 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11385 self.op.direction, errors.ECODE_INVAL)
11387 def Exec(self, feedback_fn):
11388 """Run the allocator test.
11391 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11392 ial = IAllocator(self.cfg, self.rpc,
11395 mem_size=self.op.mem_size,
11396 disks=self.op.disks,
11397 disk_template=self.op.disk_template,
11401 vcpus=self.op.vcpus,
11402 hypervisor=self.op.hypervisor,
11404 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11405 ial = IAllocator(self.cfg, self.rpc,
11408 relocate_from=list(self.relocate_from),
11410 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11411 ial = IAllocator(self.cfg, self.rpc,
11413 evac_nodes=self.op.evac_nodes)
11415 raise errors.ProgrammerError("Uncatched mode %s in"
11416 " LUTestAllocator.Exec", self.op.mode)
11418 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11419 result = ial.in_text
11421 ial.Run(self.op.allocator, validate=False)
11422 result = ial.out_text