4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
60 import ganeti.masterd.instance # pylint: disable-msg=W0611
62 # Common opcode attributes
64 #: output fields for a query operation
65 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
68 #: the shutdown timeout
69 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
72 #: the force parameter
73 _PForce = ("force", False, ht.TBool)
75 #: a required instance name (for single-instance LUs)
76 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
78 #: Whether to ignore offline nodes
79 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
81 #: a required node name (for single-node LUs)
82 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
84 #: a required node group name (for single-group LUs)
85 _PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString)
87 #: the migration type (live/non-live)
88 _PMigrationMode = ("mode", None,
89 ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
91 #: the obsolete 'live' mode (boolean)
92 _PMigrationLive = ("live", None, ht.TMaybeBool)
96 class LogicalUnit(object):
97 """Logical Unit base class.
99 Subclasses must follow these rules:
100 - implement ExpandNames
101 - implement CheckPrereq (except when tasklets are used)
102 - implement Exec (except when tasklets are used)
103 - implement BuildHooksEnv
104 - redefine HPATH and HTYPE
105 - optionally redefine their run requirements:
106 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
108 Note that all commands require root permissions.
110 @ivar dry_run_result: the value (if any) that will be returned to the caller
111 in dry-run mode (signalled by opcode dry_run parameter)
112 @cvar _OP_PARAMS: a list of opcode attributes, the default values
113 they should get if not already defined, and types they must match
121 def __init__(self, processor, op, context, rpc):
122 """Constructor for LogicalUnit.
124 This needs to be overridden in derived classes in order to check op
128 self.proc = processor
130 self.cfg = context.cfg
131 self.context = context
133 # Dicts used to declare locking needs to mcpu
134 self.needed_locks = None
135 self.acquired_locks = {}
136 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138 self.remove_locks = {}
139 # Used to force good behavior when calling helper functions
140 self.recalculate_locks = {}
143 self.Log = processor.Log # pylint: disable-msg=C0103
144 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
145 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
146 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
147 # support for dry-run
148 self.dry_run_result = None
149 # support for generic debug attribute
150 if (not hasattr(self.op, "debug_level") or
151 not isinstance(self.op.debug_level, int)):
152 self.op.debug_level = 0
157 # The new kind-of-type-system
158 op_id = self.op.OP_ID
159 for attr_name, aval, test in self._OP_PARAMS:
160 if not hasattr(op, attr_name):
161 if aval == ht.NoDefault:
162 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
163 (op_id, attr_name), errors.ECODE_INVAL)
169 setattr(self.op, attr_name, dval)
170 attr_val = getattr(op, attr_name)
171 if test == ht.NoType:
174 if not callable(test):
175 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
176 " given type is not a proper type (%s)" %
177 (op_id, attr_name, test))
178 if not test(attr_val):
179 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
180 self.op.OP_ID, attr_name, type(attr_val), attr_val)
181 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
182 (op_id, attr_name), errors.ECODE_INVAL)
184 self.CheckArguments()
187 """Returns the SshRunner object
191 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
194 ssh = property(fget=__GetSSH)
196 def CheckArguments(self):
197 """Check syntactic validity for the opcode arguments.
199 This method is for doing a simple syntactic check and ensure
200 validity of opcode parameters, without any cluster-related
201 checks. While the same can be accomplished in ExpandNames and/or
202 CheckPrereq, doing these separate is better because:
204 - ExpandNames is left as as purely a lock-related function
205 - CheckPrereq is run after we have acquired locks (and possible
208 The function is allowed to change the self.op attribute so that
209 later methods can no longer worry about missing parameters.
214 def ExpandNames(self):
215 """Expand names for this LU.
217 This method is called before starting to execute the opcode, and it should
218 update all the parameters of the opcode to their canonical form (e.g. a
219 short node name must be fully expanded after this method has successfully
220 completed). This way locking, hooks, logging, etc. can work correctly.
222 LUs which implement this method must also populate the self.needed_locks
223 member, as a dict with lock levels as keys, and a list of needed lock names
226 - use an empty dict if you don't need any lock
227 - if you don't need any lock at a particular level omit that level
228 - don't put anything for the BGL level
229 - if you want all locks at a level use locking.ALL_SET as a value
231 If you need to share locks (rather than acquire them exclusively) at one
232 level you can modify self.share_locks, setting a true value (usually 1) for
233 that level. By default locks are not shared.
235 This function can also define a list of tasklets, which then will be
236 executed in order instead of the usual LU-level CheckPrereq and Exec
237 functions, if those are not defined by the LU.
241 # Acquire all nodes and one instance
242 self.needed_locks = {
243 locking.LEVEL_NODE: locking.ALL_SET,
244 locking.LEVEL_INSTANCE: ['instance1.example.com'],
246 # Acquire just two nodes
247 self.needed_locks = {
248 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
251 self.needed_locks = {} # No, you can't leave it to the default value None
254 # The implementation of this method is mandatory only if the new LU is
255 # concurrent, so that old LUs don't need to be changed all at the same
258 self.needed_locks = {} # Exclusive LUs don't need locks.
260 raise NotImplementedError
262 def DeclareLocks(self, level):
263 """Declare LU locking needs for a level
265 While most LUs can just declare their locking needs at ExpandNames time,
266 sometimes there's the need to calculate some locks after having acquired
267 the ones before. This function is called just before acquiring locks at a
268 particular level, but after acquiring the ones at lower levels, and permits
269 such calculations. It can be used to modify self.needed_locks, and by
270 default it does nothing.
272 This function is only called if you have something already set in
273 self.needed_locks for the level.
275 @param level: Locking level which is going to be locked
276 @type level: member of ganeti.locking.LEVELS
280 def CheckPrereq(self):
281 """Check prerequisites for this LU.
283 This method should check that the prerequisites for the execution
284 of this LU are fulfilled. It can do internode communication, but
285 it should be idempotent - no cluster or system changes are
288 The method should raise errors.OpPrereqError in case something is
289 not fulfilled. Its return value is ignored.
291 This method should also update all the parameters of the opcode to
292 their canonical form if it hasn't been done by ExpandNames before.
295 if self.tasklets is not None:
296 for (idx, tl) in enumerate(self.tasklets):
297 logging.debug("Checking prerequisites for tasklet %s/%s",
298 idx + 1, len(self.tasklets))
303 def Exec(self, feedback_fn):
306 This method should implement the actual work. It should raise
307 errors.OpExecError for failures that are somewhat dealt with in
311 if self.tasklets is not None:
312 for (idx, tl) in enumerate(self.tasklets):
313 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
316 raise NotImplementedError
318 def BuildHooksEnv(self):
319 """Build hooks environment for this LU.
321 This method should return a three-node tuple consisting of: a dict
322 containing the environment that will be used for running the
323 specific hook for this LU, a list of node names on which the hook
324 should run before the execution, and a list of node names on which
325 the hook should run after the execution.
327 The keys of the dict must not have 'GANETI_' prefixed as this will
328 be handled in the hooks runner. Also note additional keys will be
329 added by the hooks runner. If the LU doesn't define any
330 environment, an empty dict (and not None) should be returned.
332 No nodes should be returned as an empty list (and not None).
334 Note that if the HPATH for a LU class is None, this function will
338 raise NotImplementedError
340 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
341 """Notify the LU about the results of its hooks.
343 This method is called every time a hooks phase is executed, and notifies
344 the Logical Unit about the hooks' result. The LU can then use it to alter
345 its result based on the hooks. By default the method does nothing and the
346 previous result is passed back unchanged but any LU can define it if it
347 wants to use the local cluster hook-scripts somehow.
349 @param phase: one of L{constants.HOOKS_PHASE_POST} or
350 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
351 @param hook_results: the results of the multi-node hooks rpc call
352 @param feedback_fn: function used send feedback back to the caller
353 @param lu_result: the previous Exec result this LU had, or None
355 @return: the new Exec result, based on the previous result
359 # API must be kept, thus we ignore the unused argument and could
360 # be a function warnings
361 # pylint: disable-msg=W0613,R0201
364 def _ExpandAndLockInstance(self):
365 """Helper function to expand and lock an instance.
367 Many LUs that work on an instance take its name in self.op.instance_name
368 and need to expand it and then declare the expanded name for locking. This
369 function does it, and then updates self.op.instance_name to the expanded
370 name. It also initializes needed_locks as a dict, if this hasn't been done
374 if self.needed_locks is None:
375 self.needed_locks = {}
377 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
378 "_ExpandAndLockInstance called with instance-level locks set"
379 self.op.instance_name = _ExpandInstanceName(self.cfg,
380 self.op.instance_name)
381 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
383 def _LockInstancesNodes(self, primary_only=False):
384 """Helper function to declare instances' nodes for locking.
386 This function should be called after locking one or more instances to lock
387 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
388 with all primary or secondary nodes for instances already locked and
389 present in self.needed_locks[locking.LEVEL_INSTANCE].
391 It should be called from DeclareLocks, and for safety only works if
392 self.recalculate_locks[locking.LEVEL_NODE] is set.
394 In the future it may grow parameters to just lock some instance's nodes, or
395 to just lock primaries or secondary nodes, if needed.
397 If should be called in DeclareLocks in a way similar to::
399 if level == locking.LEVEL_NODE:
400 self._LockInstancesNodes()
402 @type primary_only: boolean
403 @param primary_only: only lock primary nodes of locked instances
406 assert locking.LEVEL_NODE in self.recalculate_locks, \
407 "_LockInstancesNodes helper function called with no nodes to recalculate"
409 # TODO: check if we're really been called with the instance locks held
411 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
412 # future we might want to have different behaviors depending on the value
413 # of self.recalculate_locks[locking.LEVEL_NODE]
415 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
416 instance = self.context.cfg.GetInstanceInfo(instance_name)
417 wanted_nodes.append(instance.primary_node)
419 wanted_nodes.extend(instance.secondary_nodes)
421 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
422 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
423 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
424 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
426 del self.recalculate_locks[locking.LEVEL_NODE]
429 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
430 """Simple LU which runs no hooks.
432 This LU is intended as a parent for other LogicalUnits which will
433 run no hooks, in order to reduce duplicate code.
439 def BuildHooksEnv(self):
440 """Empty BuildHooksEnv for NoHooksLu.
442 This just raises an error.
445 assert False, "BuildHooksEnv called for NoHooksLUs"
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
501 def __init__(self, names, fields, use_locking):
502 """Initializes this class.
506 self.use_locking = use_locking
508 self.query = query.Query(self.FIELDS, fields)
509 self.requested_data = self.query.RequestedData()
512 def FieldsQuery(cls, fields):
513 """Returns list of available fields.
515 @return: List of L{objects.QueryFieldDefinition}
519 # Client requests all fields
520 fdefs = query.GetAllFields(cls.FIELDS.values())
522 fdefs = query.Query(cls.FIELDS, fields).GetFields()
525 "fields": [fdef.ToDict() for fdef in fdefs],
528 def ExpandNames(self, lu):
529 """Expand names for this query.
531 See L{LogicalUnit.ExpandNames}.
534 raise NotImplementedError()
536 def DeclareLocks(self, level):
537 """Declare locks for this query.
539 See L{LogicalUnit.DeclareLocks}.
542 raise NotImplementedError()
544 def _GetQueryData(self, lu):
545 """Collects all data for this query.
547 @return: Query data object
550 raise NotImplementedError()
552 def NewStyleQuery(self, lu):
553 """Collect data and execute query.
556 data = self._GetQueryData(lu)
559 "data": self.query.Query(data),
560 "fields": [fdef.ToDict()
561 for fdef in self.query.GetFields()],
564 def OldStyleQuery(self, lu):
565 """Collect data and execute query.
568 return self.query.OldStyleQuery(self._GetQueryData(lu))
571 def _GetWantedNodes(lu, nodes):
572 """Returns list of checked and expanded node names.
574 @type lu: L{LogicalUnit}
575 @param lu: the logical unit on whose behalf we execute
577 @param nodes: list of node names or None for all nodes
579 @return: the list of nodes, sorted
580 @raise errors.ProgrammerError: if the nodes parameter is wrong type
584 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
586 return utils.NiceSort(lu.cfg.GetNodeList())
589 def _GetWantedInstances(lu, instances):
590 """Returns list of checked and expanded instance names.
592 @type lu: L{LogicalUnit}
593 @param lu: the logical unit on whose behalf we execute
594 @type instances: list
595 @param instances: list of instance names or None for all instances
597 @return: the list of instances, sorted
598 @raise errors.OpPrereqError: if the instances parameter is wrong type
599 @raise errors.OpPrereqError: if any of the passed instances is not found
603 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
605 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
609 def _GetUpdatedParams(old_params, update_dict,
610 use_default=True, use_none=False):
611 """Return the new version of a parameter dictionary.
613 @type old_params: dict
614 @param old_params: old parameters
615 @type update_dict: dict
616 @param update_dict: dict containing new parameter values, or
617 constants.VALUE_DEFAULT to reset the parameter to its default
619 @param use_default: boolean
620 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621 values as 'to be deleted' values
622 @param use_none: boolean
623 @type use_none: whether to recognise C{None} values as 'to be
626 @return: the new parameter dictionary
629 params_copy = copy.deepcopy(old_params)
630 for key, val in update_dict.iteritems():
631 if ((use_default and val == constants.VALUE_DEFAULT) or
632 (use_none and val is None)):
638 params_copy[key] = val
642 def _CheckOutputFields(static, dynamic, selected):
643 """Checks whether all selected fields are valid.
645 @type static: L{utils.FieldSet}
646 @param static: static fields set
647 @type dynamic: L{utils.FieldSet}
648 @param dynamic: dynamic fields set
655 delta = f.NonMatching(selected)
657 raise errors.OpPrereqError("Unknown output fields selected: %s"
658 % ",".join(delta), errors.ECODE_INVAL)
661 def _CheckGlobalHvParams(params):
662 """Validates that given hypervisor params are not global ones.
664 This will ensure that instances don't get customised versions of
668 used_globals = constants.HVC_GLOBALS.intersection(params)
670 msg = ("The following hypervisor parameters are global and cannot"
671 " be customized at instance level, please modify them at"
672 " cluster level: %s" % utils.CommaJoin(used_globals))
673 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
676 def _CheckNodeOnline(lu, node, msg=None):
677 """Ensure that a given node is online.
679 @param lu: the LU on behalf of which we make the check
680 @param node: the node to check
681 @param msg: if passed, should be a message to replace the default one
682 @raise errors.OpPrereqError: if the node is offline
686 msg = "Can't use offline node"
687 if lu.cfg.GetNodeInfo(node).offline:
688 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
691 def _CheckNodeNotDrained(lu, node):
692 """Ensure that a given node is not drained.
694 @param lu: the LU on behalf of which we make the check
695 @param node: the node to check
696 @raise errors.OpPrereqError: if the node is drained
699 if lu.cfg.GetNodeInfo(node).drained:
700 raise errors.OpPrereqError("Can't use drained node %s" % node,
704 def _CheckNodeVmCapable(lu, node):
705 """Ensure that a given node is vm capable.
707 @param lu: the LU on behalf of which we make the check
708 @param node: the node to check
709 @raise errors.OpPrereqError: if the node is not vm capable
712 if not lu.cfg.GetNodeInfo(node).vm_capable:
713 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
717 def _CheckNodeHasOS(lu, node, os_name, force_variant):
718 """Ensure that a node supports a given OS.
720 @param lu: the LU on behalf of which we make the check
721 @param node: the node to check
722 @param os_name: the OS to query about
723 @param force_variant: whether to ignore variant errors
724 @raise errors.OpPrereqError: if the node is not supporting the OS
727 result = lu.rpc.call_os_get(node, os_name)
728 result.Raise("OS '%s' not in supported OS list for node %s" %
730 prereq=True, ecode=errors.ECODE_INVAL)
731 if not force_variant:
732 _CheckOSVariant(result.payload, os_name)
735 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
736 """Ensure that a node has the given secondary ip.
738 @type lu: L{LogicalUnit}
739 @param lu: the LU on behalf of which we make the check
741 @param node: the node to check
742 @type secondary_ip: string
743 @param secondary_ip: the ip to check
744 @type prereq: boolean
745 @param prereq: whether to throw a prerequisite or an execute error
746 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
747 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
750 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
751 result.Raise("Failure checking secondary ip on node %s" % node,
752 prereq=prereq, ecode=errors.ECODE_ENVIRON)
753 if not result.payload:
754 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
755 " please fix and re-run this command" % secondary_ip)
757 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
759 raise errors.OpExecError(msg)
762 def _RequireFileStorage():
763 """Checks that file storage is enabled.
765 @raise errors.OpPrereqError: when file storage is disabled
768 if not constants.ENABLE_FILE_STORAGE:
769 raise errors.OpPrereqError("File storage disabled at configure time",
773 def _CheckDiskTemplate(template):
774 """Ensure a given disk template is valid.
777 if template not in constants.DISK_TEMPLATES:
778 msg = ("Invalid disk template name '%s', valid templates are: %s" %
779 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
780 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
781 if template == constants.DT_FILE:
782 _RequireFileStorage()
786 def _CheckStorageType(storage_type):
787 """Ensure a given storage type is valid.
790 if storage_type not in constants.VALID_STORAGE_TYPES:
791 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
793 if storage_type == constants.ST_FILE:
794 _RequireFileStorage()
798 def _GetClusterDomainSecret():
799 """Reads the cluster domain secret.
802 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
806 def _CheckInstanceDown(lu, instance, reason):
807 """Ensure that an instance is not running."""
808 if instance.admin_up:
809 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
810 (instance.name, reason), errors.ECODE_STATE)
812 pnode = instance.primary_node
813 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
814 ins_l.Raise("Can't contact node %s for instance information" % pnode,
815 prereq=True, ecode=errors.ECODE_ENVIRON)
817 if instance.name in ins_l.payload:
818 raise errors.OpPrereqError("Instance %s is running, %s" %
819 (instance.name, reason), errors.ECODE_STATE)
822 def _ExpandItemName(fn, name, kind):
823 """Expand an item name.
825 @param fn: the function to use for expansion
826 @param name: requested item name
827 @param kind: text description ('Node' or 'Instance')
828 @return: the resolved (full) name
829 @raise errors.OpPrereqError: if the item is not found
833 if full_name is None:
834 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
839 def _ExpandNodeName(cfg, name):
840 """Wrapper over L{_ExpandItemName} for nodes."""
841 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
844 def _ExpandInstanceName(cfg, name):
845 """Wrapper over L{_ExpandItemName} for instance."""
846 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
849 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
850 memory, vcpus, nics, disk_template, disks,
851 bep, hvp, hypervisor_name):
852 """Builds instance related env variables for hooks
854 This builds the hook environment from individual variables.
857 @param name: the name of the instance
858 @type primary_node: string
859 @param primary_node: the name of the instance's primary node
860 @type secondary_nodes: list
861 @param secondary_nodes: list of secondary nodes as strings
862 @type os_type: string
863 @param os_type: the name of the instance's OS
864 @type status: boolean
865 @param status: the should_run status of the instance
867 @param memory: the memory size of the instance
869 @param vcpus: the count of VCPUs the instance has
871 @param nics: list of tuples (ip, mac, mode, link) representing
872 the NICs the instance has
873 @type disk_template: string
874 @param disk_template: the disk template of the instance
876 @param disks: the list of (size, mode) pairs
878 @param bep: the backend parameters for the instance
880 @param hvp: the hypervisor parameters for the instance
881 @type hypervisor_name: string
882 @param hypervisor_name: the hypervisor for the instance
884 @return: the hook environment for this instance
893 "INSTANCE_NAME": name,
894 "INSTANCE_PRIMARY": primary_node,
895 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
896 "INSTANCE_OS_TYPE": os_type,
897 "INSTANCE_STATUS": str_status,
898 "INSTANCE_MEMORY": memory,
899 "INSTANCE_VCPUS": vcpus,
900 "INSTANCE_DISK_TEMPLATE": disk_template,
901 "INSTANCE_HYPERVISOR": hypervisor_name,
905 nic_count = len(nics)
906 for idx, (ip, mac, mode, link) in enumerate(nics):
909 env["INSTANCE_NIC%d_IP" % idx] = ip
910 env["INSTANCE_NIC%d_MAC" % idx] = mac
911 env["INSTANCE_NIC%d_MODE" % idx] = mode
912 env["INSTANCE_NIC%d_LINK" % idx] = link
913 if mode == constants.NIC_MODE_BRIDGED:
914 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
918 env["INSTANCE_NIC_COUNT"] = nic_count
921 disk_count = len(disks)
922 for idx, (size, mode) in enumerate(disks):
923 env["INSTANCE_DISK%d_SIZE" % idx] = size
924 env["INSTANCE_DISK%d_MODE" % idx] = mode
928 env["INSTANCE_DISK_COUNT"] = disk_count
930 for source, kind in [(bep, "BE"), (hvp, "HV")]:
931 for key, value in source.items():
932 env["INSTANCE_%s_%s" % (kind, key)] = value
937 def _NICListToTuple(lu, nics):
938 """Build a list of nic information tuples.
940 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
941 value in LUQueryInstanceData.
943 @type lu: L{LogicalUnit}
944 @param lu: the logical unit on whose behalf we execute
945 @type nics: list of L{objects.NIC}
946 @param nics: list of nics to convert to hooks tuples
950 cluster = lu.cfg.GetClusterInfo()
954 filled_params = cluster.SimpleFillNIC(nic.nicparams)
955 mode = filled_params[constants.NIC_MODE]
956 link = filled_params[constants.NIC_LINK]
957 hooks_nics.append((ip, mac, mode, link))
961 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
962 """Builds instance related env variables for hooks from an object.
964 @type lu: L{LogicalUnit}
965 @param lu: the logical unit on whose behalf we execute
966 @type instance: L{objects.Instance}
967 @param instance: the instance for which we should build the
970 @param override: dictionary with key/values that will override
973 @return: the hook environment dictionary
976 cluster = lu.cfg.GetClusterInfo()
977 bep = cluster.FillBE(instance)
978 hvp = cluster.FillHV(instance)
980 'name': instance.name,
981 'primary_node': instance.primary_node,
982 'secondary_nodes': instance.secondary_nodes,
983 'os_type': instance.os,
984 'status': instance.admin_up,
985 'memory': bep[constants.BE_MEMORY],
986 'vcpus': bep[constants.BE_VCPUS],
987 'nics': _NICListToTuple(lu, instance.nics),
988 'disk_template': instance.disk_template,
989 'disks': [(disk.size, disk.mode) for disk in instance.disks],
992 'hypervisor_name': instance.hypervisor,
995 args.update(override)
996 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
999 def _AdjustCandidatePool(lu, exceptions):
1000 """Adjust the candidate pool after node operations.
1003 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1005 lu.LogInfo("Promoted nodes to master candidate role: %s",
1006 utils.CommaJoin(node.name for node in mod_list))
1007 for name in mod_list:
1008 lu.context.ReaddNode(name)
1009 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1011 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1015 def _DecideSelfPromotion(lu, exceptions=None):
1016 """Decide whether I should promote myself as a master candidate.
1019 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1020 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1021 # the new node will increase mc_max with one, so:
1022 mc_should = min(mc_should + 1, cp_size)
1023 return mc_now < mc_should
1026 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1027 """Check that the brigdes needed by a list of nics exist.
1030 cluster = lu.cfg.GetClusterInfo()
1031 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1032 brlist = [params[constants.NIC_LINK] for params in paramslist
1033 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1035 result = lu.rpc.call_bridges_exist(target_node, brlist)
1036 result.Raise("Error checking bridges on destination node '%s'" %
1037 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1040 def _CheckInstanceBridgesExist(lu, instance, node=None):
1041 """Check that the brigdes needed by an instance exist.
1045 node = instance.primary_node
1046 _CheckNicsBridgesExist(lu, instance.nics, node)
1049 def _CheckOSVariant(os_obj, name):
1050 """Check whether an OS name conforms to the os variants specification.
1052 @type os_obj: L{objects.OS}
1053 @param os_obj: OS object to check
1055 @param name: OS name passed by the user, to check for validity
1058 if not os_obj.supported_variants:
1060 variant = objects.OS.GetVariant(name)
1062 raise errors.OpPrereqError("OS name must include a variant",
1065 if variant not in os_obj.supported_variants:
1066 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1069 def _GetNodeInstancesInner(cfg, fn):
1070 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1073 def _GetNodeInstances(cfg, node_name):
1074 """Returns a list of all primary and secondary instances on a node.
1078 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1081 def _GetNodePrimaryInstances(cfg, node_name):
1082 """Returns primary instances on a node.
1085 return _GetNodeInstancesInner(cfg,
1086 lambda inst: node_name == inst.primary_node)
1089 def _GetNodeSecondaryInstances(cfg, node_name):
1090 """Returns secondary instances on a node.
1093 return _GetNodeInstancesInner(cfg,
1094 lambda inst: node_name in inst.secondary_nodes)
1097 def _GetStorageTypeArgs(cfg, storage_type):
1098 """Returns the arguments for a storage type.
1101 # Special case for file storage
1102 if storage_type == constants.ST_FILE:
1103 # storage.FileStorage wants a list of storage directories
1104 return [[cfg.GetFileStorageDir()]]
1109 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1112 for dev in instance.disks:
1113 cfg.SetDiskID(dev, node_name)
1115 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1116 result.Raise("Failed to get disk status from node %s" % node_name,
1117 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1119 for idx, bdev_status in enumerate(result.payload):
1120 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1126 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1127 """Check the sanity of iallocator and node arguments and use the
1128 cluster-wide iallocator if appropriate.
1130 Check that at most one of (iallocator, node) is specified. If none is
1131 specified, then the LU's opcode's iallocator slot is filled with the
1132 cluster-wide default iallocator.
1134 @type iallocator_slot: string
1135 @param iallocator_slot: the name of the opcode iallocator slot
1136 @type node_slot: string
1137 @param node_slot: the name of the opcode target node slot
1140 node = getattr(lu.op, node_slot, None)
1141 iallocator = getattr(lu.op, iallocator_slot, None)
1143 if node is not None and iallocator is not None:
1144 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1146 elif node is None and iallocator is None:
1147 default_iallocator = lu.cfg.GetDefaultIAllocator()
1148 if default_iallocator:
1149 setattr(lu.op, iallocator_slot, default_iallocator)
1151 raise errors.OpPrereqError("No iallocator or node given and no"
1152 " cluster-wide default iallocator found."
1153 " Please specify either an iallocator or a"
1154 " node, or set a cluster-wide default"
1158 class LUPostInitCluster(LogicalUnit):
1159 """Logical unit for running hooks after cluster initialization.
1162 HPATH = "cluster-init"
1163 HTYPE = constants.HTYPE_CLUSTER
1165 def BuildHooksEnv(self):
1169 env = {"OP_TARGET": self.cfg.GetClusterName()}
1170 mn = self.cfg.GetMasterNode()
1171 return env, [], [mn]
1173 def Exec(self, feedback_fn):
1180 class LUDestroyCluster(LogicalUnit):
1181 """Logical unit for destroying the cluster.
1184 HPATH = "cluster-destroy"
1185 HTYPE = constants.HTYPE_CLUSTER
1187 def BuildHooksEnv(self):
1191 env = {"OP_TARGET": self.cfg.GetClusterName()}
1194 def CheckPrereq(self):
1195 """Check prerequisites.
1197 This checks whether the cluster is empty.
1199 Any errors are signaled by raising errors.OpPrereqError.
1202 master = self.cfg.GetMasterNode()
1204 nodelist = self.cfg.GetNodeList()
1205 if len(nodelist) != 1 or nodelist[0] != master:
1206 raise errors.OpPrereqError("There are still %d node(s) in"
1207 " this cluster." % (len(nodelist) - 1),
1209 instancelist = self.cfg.GetInstanceList()
1211 raise errors.OpPrereqError("There are still %d instance(s) in"
1212 " this cluster." % len(instancelist),
1215 def Exec(self, feedback_fn):
1216 """Destroys the cluster.
1219 master = self.cfg.GetMasterNode()
1221 # Run post hooks on master node before it's removed
1222 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1224 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1226 # pylint: disable-msg=W0702
1227 self.LogWarning("Errors occurred running hooks on %s" % master)
1229 result = self.rpc.call_node_stop_master(master, False)
1230 result.Raise("Could not disable the master role")
1235 def _VerifyCertificate(filename):
1236 """Verifies a certificate for LUVerifyCluster.
1238 @type filename: string
1239 @param filename: Path to PEM file
1243 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1244 utils.ReadFile(filename))
1245 except Exception, err: # pylint: disable-msg=W0703
1246 return (LUVerifyCluster.ETYPE_ERROR,
1247 "Failed to load X509 certificate %s: %s" % (filename, err))
1250 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1251 constants.SSL_CERT_EXPIRATION_ERROR)
1254 fnamemsg = "While verifying %s: %s" % (filename, msg)
1259 return (None, fnamemsg)
1260 elif errcode == utils.CERT_WARNING:
1261 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1262 elif errcode == utils.CERT_ERROR:
1263 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1265 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1268 class LUVerifyCluster(LogicalUnit):
1269 """Verifies the cluster status.
1272 HPATH = "cluster-verify"
1273 HTYPE = constants.HTYPE_CLUSTER
1275 ("skip_checks", ht.EmptyList,
1276 ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1277 ("verbose", False, ht.TBool),
1278 ("error_codes", False, ht.TBool),
1279 ("debug_simulate_errors", False, ht.TBool),
1283 TCLUSTER = "cluster"
1285 TINSTANCE = "instance"
1287 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1288 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1289 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1290 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1291 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1292 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1293 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1294 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1295 ENODEDRBD = (TNODE, "ENODEDRBD")
1296 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1297 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1298 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1299 ENODEHV = (TNODE, "ENODEHV")
1300 ENODELVM = (TNODE, "ENODELVM")
1301 ENODEN1 = (TNODE, "ENODEN1")
1302 ENODENET = (TNODE, "ENODENET")
1303 ENODEOS = (TNODE, "ENODEOS")
1304 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1305 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1306 ENODERPC = (TNODE, "ENODERPC")
1307 ENODESSH = (TNODE, "ENODESSH")
1308 ENODEVERSION = (TNODE, "ENODEVERSION")
1309 ENODESETUP = (TNODE, "ENODESETUP")
1310 ENODETIME = (TNODE, "ENODETIME")
1312 ETYPE_FIELD = "code"
1313 ETYPE_ERROR = "ERROR"
1314 ETYPE_WARNING = "WARNING"
1316 _HOOKS_INDENT_RE = re.compile("^", re.M)
1318 class NodeImage(object):
1319 """A class representing the logical and physical status of a node.
1322 @ivar name: the node name to which this object refers
1323 @ivar volumes: a structure as returned from
1324 L{ganeti.backend.GetVolumeList} (runtime)
1325 @ivar instances: a list of running instances (runtime)
1326 @ivar pinst: list of configured primary instances (config)
1327 @ivar sinst: list of configured secondary instances (config)
1328 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1329 of this node (config)
1330 @ivar mfree: free memory, as reported by hypervisor (runtime)
1331 @ivar dfree: free disk, as reported by the node (runtime)
1332 @ivar offline: the offline status (config)
1333 @type rpc_fail: boolean
1334 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1335 not whether the individual keys were correct) (runtime)
1336 @type lvm_fail: boolean
1337 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1338 @type hyp_fail: boolean
1339 @ivar hyp_fail: whether the RPC call didn't return the instance list
1340 @type ghost: boolean
1341 @ivar ghost: whether this is a known node or not (config)
1342 @type os_fail: boolean
1343 @ivar os_fail: whether the RPC call didn't return valid OS data
1345 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1346 @type vm_capable: boolean
1347 @ivar vm_capable: whether the node can host instances
1350 def __init__(self, offline=False, name=None, vm_capable=True):
1359 self.offline = offline
1360 self.vm_capable = vm_capable
1361 self.rpc_fail = False
1362 self.lvm_fail = False
1363 self.hyp_fail = False
1365 self.os_fail = False
1368 def ExpandNames(self):
1369 self.needed_locks = {
1370 locking.LEVEL_NODE: locking.ALL_SET,
1371 locking.LEVEL_INSTANCE: locking.ALL_SET,
1373 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1375 def _Error(self, ecode, item, msg, *args, **kwargs):
1376 """Format an error message.
1378 Based on the opcode's error_codes parameter, either format a
1379 parseable error code, or a simpler error string.
1381 This must be called only from Exec and functions called from Exec.
1384 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1386 # first complete the msg
1389 # then format the whole message
1390 if self.op.error_codes:
1391 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1397 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1398 # and finally report it via the feedback_fn
1399 self._feedback_fn(" - %s" % msg)
1401 def _ErrorIf(self, cond, *args, **kwargs):
1402 """Log an error message if the passed condition is True.
1405 cond = bool(cond) or self.op.debug_simulate_errors
1407 self._Error(*args, **kwargs)
1408 # do not mark the operation as failed for WARN cases only
1409 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410 self.bad = self.bad or cond
1412 def _VerifyNode(self, ninfo, nresult):
1413 """Perform some basic validation on data returned from a node.
1415 - check the result data structure is well formed and has all the
1417 - check ganeti version
1419 @type ninfo: L{objects.Node}
1420 @param ninfo: the node to check
1421 @param nresult: the results from the node
1423 @return: whether overall this call was successful (and we can expect
1424 reasonable values in the respose)
1428 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1430 # main result, nresult should be a non-empty dict
1431 test = not nresult or not isinstance(nresult, dict)
1432 _ErrorIf(test, self.ENODERPC, node,
1433 "unable to verify node: no data returned")
1437 # compares ganeti version
1438 local_version = constants.PROTOCOL_VERSION
1439 remote_version = nresult.get("version", None)
1440 test = not (remote_version and
1441 isinstance(remote_version, (list, tuple)) and
1442 len(remote_version) == 2)
1443 _ErrorIf(test, self.ENODERPC, node,
1444 "connection to node returned invalid data")
1448 test = local_version != remote_version[0]
1449 _ErrorIf(test, self.ENODEVERSION, node,
1450 "incompatible protocol versions: master %s,"
1451 " node %s", local_version, remote_version[0])
1455 # node seems compatible, we can actually try to look into its results
1457 # full package version
1458 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1459 self.ENODEVERSION, node,
1460 "software version mismatch: master %s, node %s",
1461 constants.RELEASE_VERSION, remote_version[1],
1462 code=self.ETYPE_WARNING)
1464 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1465 if ninfo.vm_capable and isinstance(hyp_result, dict):
1466 for hv_name, hv_result in hyp_result.iteritems():
1467 test = hv_result is not None
1468 _ErrorIf(test, self.ENODEHV, node,
1469 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1471 test = nresult.get(constants.NV_NODESETUP,
1472 ["Missing NODESETUP results"])
1473 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1478 def _VerifyNodeTime(self, ninfo, nresult,
1479 nvinfo_starttime, nvinfo_endtime):
1480 """Check the node time.
1482 @type ninfo: L{objects.Node}
1483 @param ninfo: the node to check
1484 @param nresult: the remote results for the node
1485 @param nvinfo_starttime: the start time of the RPC call
1486 @param nvinfo_endtime: the end time of the RPC call
1490 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492 ntime = nresult.get(constants.NV_TIME, None)
1494 ntime_merged = utils.MergeTime(ntime)
1495 except (ValueError, TypeError):
1496 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1499 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1500 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1501 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1502 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1506 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1507 "Node time diverges by at least %s from master node time",
1510 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1511 """Check the node time.
1513 @type ninfo: L{objects.Node}
1514 @param ninfo: the node to check
1515 @param nresult: the remote results for the node
1516 @param vg_name: the configured VG name
1523 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1525 # checks vg existence and size > 20G
1526 vglist = nresult.get(constants.NV_VGLIST, None)
1528 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1530 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1531 constants.MIN_VG_SIZE)
1532 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1535 pvlist = nresult.get(constants.NV_PVLIST, None)
1536 test = pvlist is None
1537 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1539 # check that ':' is not present in PV names, since it's a
1540 # special character for lvcreate (denotes the range of PEs to
1542 for _, pvname, owner_vg in pvlist:
1543 test = ":" in pvname
1544 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1545 " '%s' of VG '%s'", pvname, owner_vg)
1547 def _VerifyNodeNetwork(self, ninfo, nresult):
1548 """Check the node time.
1550 @type ninfo: L{objects.Node}
1551 @param ninfo: the node to check
1552 @param nresult: the remote results for the node
1556 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558 test = constants.NV_NODELIST not in nresult
1559 _ErrorIf(test, self.ENODESSH, node,
1560 "node hasn't returned node ssh connectivity data")
1562 if nresult[constants.NV_NODELIST]:
1563 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1564 _ErrorIf(True, self.ENODESSH, node,
1565 "ssh communication with node '%s': %s", a_node, a_msg)
1567 test = constants.NV_NODENETTEST not in nresult
1568 _ErrorIf(test, self.ENODENET, node,
1569 "node hasn't returned node tcp connectivity data")
1571 if nresult[constants.NV_NODENETTEST]:
1572 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1574 _ErrorIf(True, self.ENODENET, node,
1575 "tcp communication with node '%s': %s",
1576 anode, nresult[constants.NV_NODENETTEST][anode])
1578 test = constants.NV_MASTERIP not in nresult
1579 _ErrorIf(test, self.ENODENET, node,
1580 "node hasn't returned node master IP reachability data")
1582 if not nresult[constants.NV_MASTERIP]:
1583 if node == self.master_node:
1584 msg = "the master node cannot reach the master IP (not configured?)"
1586 msg = "cannot reach the master IP"
1587 _ErrorIf(True, self.ENODENET, node, msg)
1589 def _VerifyInstance(self, instance, instanceconfig, node_image,
1591 """Verify an instance.
1593 This function checks to see if the required block devices are
1594 available on the instance's node.
1597 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1598 node_current = instanceconfig.primary_node
1600 node_vol_should = {}
1601 instanceconfig.MapLVsByNode(node_vol_should)
1603 for node in node_vol_should:
1604 n_img = node_image[node]
1605 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1606 # ignore missing volumes on offline or broken nodes
1608 for volume in node_vol_should[node]:
1609 test = volume not in n_img.volumes
1610 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1611 "volume %s missing on node %s", volume, node)
1613 if instanceconfig.admin_up:
1614 pri_img = node_image[node_current]
1615 test = instance not in pri_img.instances and not pri_img.offline
1616 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1617 "instance not running on its primary node %s",
1620 for node, n_img in node_image.items():
1621 if (not node == node_current):
1622 test = instance in n_img.instances
1623 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1624 "instance should not run on node %s", node)
1626 diskdata = [(nname, success, status, idx)
1627 for (nname, disks) in diskstatus.items()
1628 for idx, (success, status) in enumerate(disks)]
1630 for nname, success, bdev_status, idx in diskdata:
1631 _ErrorIf(instanceconfig.admin_up and not success,
1632 self.EINSTANCEFAULTYDISK, instance,
1633 "couldn't retrieve status for disk/%s on %s: %s",
1634 idx, nname, bdev_status)
1635 _ErrorIf((instanceconfig.admin_up and success and
1636 bdev_status.ldisk_status == constants.LDS_FAULTY),
1637 self.EINSTANCEFAULTYDISK, instance,
1638 "disk/%s on %s is faulty", idx, nname)
1640 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1641 """Verify if there are any unknown volumes in the cluster.
1643 The .os, .swap and backup volumes are ignored. All other volumes are
1644 reported as unknown.
1646 @type reserved: L{ganeti.utils.FieldSet}
1647 @param reserved: a FieldSet of reserved volume names
1650 for node, n_img in node_image.items():
1651 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1652 # skip non-healthy nodes
1654 for volume in n_img.volumes:
1655 test = ((node not in node_vol_should or
1656 volume not in node_vol_should[node]) and
1657 not reserved.Matches(volume))
1658 self._ErrorIf(test, self.ENODEORPHANLV, node,
1659 "volume %s is unknown", volume)
1661 def _VerifyOrphanInstances(self, instancelist, node_image):
1662 """Verify the list of running instances.
1664 This checks what instances are running but unknown to the cluster.
1667 for node, n_img in node_image.items():
1668 for o_inst in n_img.instances:
1669 test = o_inst not in instancelist
1670 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1671 "instance %s on node %s should not exist", o_inst, node)
1673 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1674 """Verify N+1 Memory Resilience.
1676 Check that if one single node dies we can still start all the
1677 instances it was primary for.
1680 for node, n_img in node_image.items():
1681 # This code checks that every node which is now listed as
1682 # secondary has enough memory to host all instances it is
1683 # supposed to should a single other node in the cluster fail.
1684 # FIXME: not ready for failover to an arbitrary node
1685 # FIXME: does not support file-backed instances
1686 # WARNING: we currently take into account down instances as well
1687 # as up ones, considering that even if they're down someone
1688 # might want to start them even in the event of a node failure.
1689 for prinode, instances in n_img.sbp.items():
1691 for instance in instances:
1692 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1693 if bep[constants.BE_AUTO_BALANCE]:
1694 needed_mem += bep[constants.BE_MEMORY]
1695 test = n_img.mfree < needed_mem
1696 self._ErrorIf(test, self.ENODEN1, node,
1697 "not enough memory on to accommodate"
1698 " failovers should peer node %s fail", prinode)
1700 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1702 """Verifies and computes the node required file checksums.
1704 @type ninfo: L{objects.Node}
1705 @param ninfo: the node to check
1706 @param nresult: the remote results for the node
1707 @param file_list: required list of files
1708 @param local_cksum: dictionary of local files and their checksums
1709 @param master_files: list of files that only masters should have
1713 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1715 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1716 test = not isinstance(remote_cksum, dict)
1717 _ErrorIf(test, self.ENODEFILECHECK, node,
1718 "node hasn't returned file checksum data")
1722 for file_name in file_list:
1723 node_is_mc = ninfo.master_candidate
1724 must_have = (file_name not in master_files) or node_is_mc
1726 test1 = file_name not in remote_cksum
1728 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1730 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1731 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1732 "file '%s' missing", file_name)
1733 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1734 "file '%s' has wrong checksum", file_name)
1735 # not candidate and this is not a must-have file
1736 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1737 "file '%s' should not exist on non master"
1738 " candidates (and the file is outdated)", file_name)
1739 # all good, except non-master/non-must have combination
1740 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1741 "file '%s' should not exist"
1742 " on non master candidates", file_name)
1744 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1746 """Verifies and the node DRBD status.
1748 @type ninfo: L{objects.Node}
1749 @param ninfo: the node to check
1750 @param nresult: the remote results for the node
1751 @param instanceinfo: the dict of instances
1752 @param drbd_helper: the configured DRBD usermode helper
1753 @param drbd_map: the DRBD map as returned by
1754 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1758 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1761 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1762 test = (helper_result == None)
1763 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1764 "no drbd usermode helper returned")
1766 status, payload = helper_result
1768 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1769 "drbd usermode helper check unsuccessful: %s", payload)
1770 test = status and (payload != drbd_helper)
1771 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1772 "wrong drbd usermode helper: %s", payload)
1774 # compute the DRBD minors
1776 for minor, instance in drbd_map[node].items():
1777 test = instance not in instanceinfo
1778 _ErrorIf(test, self.ECLUSTERCFG, None,
1779 "ghost instance '%s' in temporary DRBD map", instance)
1780 # ghost instance should not be running, but otherwise we
1781 # don't give double warnings (both ghost instance and
1782 # unallocated minor in use)
1784 node_drbd[minor] = (instance, False)
1786 instance = instanceinfo[instance]
1787 node_drbd[minor] = (instance.name, instance.admin_up)
1789 # and now check them
1790 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1791 test = not isinstance(used_minors, (tuple, list))
1792 _ErrorIf(test, self.ENODEDRBD, node,
1793 "cannot parse drbd status file: %s", str(used_minors))
1795 # we cannot check drbd status
1798 for minor, (iname, must_exist) in node_drbd.items():
1799 test = minor not in used_minors and must_exist
1800 _ErrorIf(test, self.ENODEDRBD, node,
1801 "drbd minor %d of instance %s is not active", minor, iname)
1802 for minor in used_minors:
1803 test = minor not in node_drbd
1804 _ErrorIf(test, self.ENODEDRBD, node,
1805 "unallocated drbd minor %d is in use", minor)
1807 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1808 """Builds the node OS structures.
1810 @type ninfo: L{objects.Node}
1811 @param ninfo: the node to check
1812 @param nresult: the remote results for the node
1813 @param nimg: the node image object
1817 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1819 remote_os = nresult.get(constants.NV_OSLIST, None)
1820 test = (not isinstance(remote_os, list) or
1821 not compat.all(isinstance(v, list) and len(v) == 7
1822 for v in remote_os))
1824 _ErrorIf(test, self.ENODEOS, node,
1825 "node hasn't returned valid OS data")
1834 for (name, os_path, status, diagnose,
1835 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1837 if name not in os_dict:
1840 # parameters is a list of lists instead of list of tuples due to
1841 # JSON lacking a real tuple type, fix it:
1842 parameters = [tuple(v) for v in parameters]
1843 os_dict[name].append((os_path, status, diagnose,
1844 set(variants), set(parameters), set(api_ver)))
1846 nimg.oslist = os_dict
1848 def _VerifyNodeOS(self, ninfo, nimg, base):
1849 """Verifies the node OS list.
1851 @type ninfo: L{objects.Node}
1852 @param ninfo: the node to check
1853 @param nimg: the node image object
1854 @param base: the 'template' node we match against (e.g. from the master)
1858 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1862 for os_name, os_data in nimg.oslist.items():
1863 assert os_data, "Empty OS status for OS %s?!" % os_name
1864 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1865 _ErrorIf(not f_status, self.ENODEOS, node,
1866 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1867 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1868 "OS '%s' has multiple entries (first one shadows the rest): %s",
1869 os_name, utils.CommaJoin([v[0] for v in os_data]))
1870 # this will catched in backend too
1871 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1872 and not f_var, self.ENODEOS, node,
1873 "OS %s with API at least %d does not declare any variant",
1874 os_name, constants.OS_API_V15)
1875 # comparisons with the 'base' image
1876 test = os_name not in base.oslist
1877 _ErrorIf(test, self.ENODEOS, node,
1878 "Extra OS %s not present on reference node (%s)",
1882 assert base.oslist[os_name], "Base node has empty OS status?"
1883 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1885 # base OS is invalid, skipping
1887 for kind, a, b in [("API version", f_api, b_api),
1888 ("variants list", f_var, b_var),
1889 ("parameters", f_param, b_param)]:
1890 _ErrorIf(a != b, self.ENODEOS, node,
1891 "OS %s %s differs from reference node %s: %s vs. %s",
1892 kind, os_name, base.name,
1893 utils.CommaJoin(a), utils.CommaJoin(b))
1895 # check any missing OSes
1896 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1897 _ErrorIf(missing, self.ENODEOS, node,
1898 "OSes present on reference node %s but missing on this node: %s",
1899 base.name, utils.CommaJoin(missing))
1901 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1902 """Verifies and updates the node volume data.
1904 This function will update a L{NodeImage}'s internal structures
1905 with data from the remote call.
1907 @type ninfo: L{objects.Node}
1908 @param ninfo: the node to check
1909 @param nresult: the remote results for the node
1910 @param nimg: the node image object
1911 @param vg_name: the configured VG name
1915 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1917 nimg.lvm_fail = True
1918 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1921 elif isinstance(lvdata, basestring):
1922 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1923 utils.SafeEncode(lvdata))
1924 elif not isinstance(lvdata, dict):
1925 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1927 nimg.volumes = lvdata
1928 nimg.lvm_fail = False
1930 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1931 """Verifies and updates the node instance list.
1933 If the listing was successful, then updates this node's instance
1934 list. Otherwise, it marks the RPC call as failed for the instance
1937 @type ninfo: L{objects.Node}
1938 @param ninfo: the node to check
1939 @param nresult: the remote results for the node
1940 @param nimg: the node image object
1943 idata = nresult.get(constants.NV_INSTANCELIST, None)
1944 test = not isinstance(idata, list)
1945 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1946 " (instancelist): %s", utils.SafeEncode(str(idata)))
1948 nimg.hyp_fail = True
1950 nimg.instances = idata
1952 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1953 """Verifies and computes a node information map
1955 @type ninfo: L{objects.Node}
1956 @param ninfo: the node to check
1957 @param nresult: the remote results for the node
1958 @param nimg: the node image object
1959 @param vg_name: the configured VG name
1963 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1965 # try to read free memory (from the hypervisor)
1966 hv_info = nresult.get(constants.NV_HVINFO, None)
1967 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1968 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1971 nimg.mfree = int(hv_info["memory_free"])
1972 except (ValueError, TypeError):
1973 _ErrorIf(True, self.ENODERPC, node,
1974 "node returned invalid nodeinfo, check hypervisor")
1976 # FIXME: devise a free space model for file based instances as well
1977 if vg_name is not None:
1978 test = (constants.NV_VGLIST not in nresult or
1979 vg_name not in nresult[constants.NV_VGLIST])
1980 _ErrorIf(test, self.ENODELVM, node,
1981 "node didn't return data for the volume group '%s'"
1982 " - it is either missing or broken", vg_name)
1985 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1986 except (ValueError, TypeError):
1987 _ErrorIf(True, self.ENODERPC, node,
1988 "node returned invalid LVM info, check LVM status")
1990 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1991 """Gets per-disk status information for all instances.
1993 @type nodelist: list of strings
1994 @param nodelist: Node names
1995 @type node_image: dict of (name, L{objects.Node})
1996 @param node_image: Node objects
1997 @type instanceinfo: dict of (name, L{objects.Instance})
1998 @param instanceinfo: Instance objects
2001 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2004 node_disks_devonly = {}
2006 for nname in nodelist:
2007 disks = [(inst, disk)
2008 for instlist in [node_image[nname].pinst,
2009 node_image[nname].sinst]
2010 for inst in instlist
2011 for disk in instanceinfo[inst].disks]
2014 # No need to collect data
2017 node_disks[nname] = disks
2019 # Creating copies as SetDiskID below will modify the objects and that can
2020 # lead to incorrect data returned from nodes
2021 devonly = [dev.Copy() for (_, dev) in disks]
2024 self.cfg.SetDiskID(dev, nname)
2026 node_disks_devonly[nname] = devonly
2028 assert len(node_disks) == len(node_disks_devonly)
2030 # Collect data from all nodes with disks
2031 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2034 assert len(result) == len(node_disks)
2038 for (nname, nres) in result.items():
2040 # Ignore offline node
2043 disks = node_disks[nname]
2046 _ErrorIf(msg, self.ENODERPC, nname,
2047 "while getting disk information: %s", nres.fail_msg)
2049 # No data from this node
2050 data = len(disks) * [None]
2054 for ((inst, _), status) in zip(disks, data):
2055 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2057 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2058 len(nnames) <= len(instanceinfo[inst].all_nodes)
2059 for inst, nnames in instdisk.items()
2060 for nname, statuses in nnames.items())
2064 def BuildHooksEnv(self):
2067 Cluster-Verify hooks just ran in the post phase and their failure makes
2068 the output be logged in the verify output and the verification to fail.
2071 all_nodes = self.cfg.GetNodeList()
2073 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2075 for node in self.cfg.GetAllNodesInfo().values():
2076 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2078 return env, [], all_nodes
2080 def Exec(self, feedback_fn):
2081 """Verify integrity of cluster, performing various test on nodes.
2085 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2086 verbose = self.op.verbose
2087 self._feedback_fn = feedback_fn
2088 feedback_fn("* Verifying global settings")
2089 for msg in self.cfg.VerifyConfig():
2090 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2092 # Check the cluster certificates
2093 for cert_filename in constants.ALL_CERT_FILES:
2094 (errcode, msg) = _VerifyCertificate(cert_filename)
2095 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2097 vg_name = self.cfg.GetVGName()
2098 drbd_helper = self.cfg.GetDRBDHelper()
2099 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2100 cluster = self.cfg.GetClusterInfo()
2101 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2102 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2103 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2104 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2105 for iname in instancelist)
2106 i_non_redundant = [] # Non redundant instances
2107 i_non_a_balanced = [] # Non auto-balanced instances
2108 n_offline = 0 # Count of offline nodes
2109 n_drained = 0 # Count of nodes being drained
2110 node_vol_should = {}
2112 # FIXME: verify OS list
2113 # do local checksums
2114 master_files = [constants.CLUSTER_CONF_FILE]
2115 master_node = self.master_node = self.cfg.GetMasterNode()
2116 master_ip = self.cfg.GetMasterIP()
2118 file_names = ssconf.SimpleStore().GetFileList()
2119 file_names.extend(constants.ALL_CERT_FILES)
2120 file_names.extend(master_files)
2121 if cluster.modify_etc_hosts:
2122 file_names.append(constants.ETC_HOSTS)
2124 local_checksums = utils.FingerprintFiles(file_names)
2126 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2127 node_verify_param = {
2128 constants.NV_FILELIST: file_names,
2129 constants.NV_NODELIST: [node.name for node in nodeinfo
2130 if not node.offline],
2131 constants.NV_HYPERVISOR: hypervisors,
2132 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2133 node.secondary_ip) for node in nodeinfo
2134 if not node.offline],
2135 constants.NV_INSTANCELIST: hypervisors,
2136 constants.NV_VERSION: None,
2137 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2138 constants.NV_NODESETUP: None,
2139 constants.NV_TIME: None,
2140 constants.NV_MASTERIP: (master_node, master_ip),
2141 constants.NV_OSLIST: None,
2142 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2145 if vg_name is not None:
2146 node_verify_param[constants.NV_VGLIST] = None
2147 node_verify_param[constants.NV_LVLIST] = vg_name
2148 node_verify_param[constants.NV_PVLIST] = [vg_name]
2149 node_verify_param[constants.NV_DRBDLIST] = None
2152 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2154 # Build our expected cluster state
2155 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2157 vm_capable=node.vm_capable))
2158 for node in nodeinfo)
2160 for instance in instancelist:
2161 inst_config = instanceinfo[instance]
2163 for nname in inst_config.all_nodes:
2164 if nname not in node_image:
2166 gnode = self.NodeImage(name=nname)
2168 node_image[nname] = gnode
2170 inst_config.MapLVsByNode(node_vol_should)
2172 pnode = inst_config.primary_node
2173 node_image[pnode].pinst.append(instance)
2175 for snode in inst_config.secondary_nodes:
2176 nimg = node_image[snode]
2177 nimg.sinst.append(instance)
2178 if pnode not in nimg.sbp:
2179 nimg.sbp[pnode] = []
2180 nimg.sbp[pnode].append(instance)
2182 # At this point, we have the in-memory data structures complete,
2183 # except for the runtime information, which we'll gather next
2185 # Due to the way our RPC system works, exact response times cannot be
2186 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2187 # time before and after executing the request, we can at least have a time
2189 nvinfo_starttime = time.time()
2190 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2191 self.cfg.GetClusterName())
2192 nvinfo_endtime = time.time()
2194 all_drbd_map = self.cfg.ComputeDRBDMap()
2196 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2197 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2199 feedback_fn("* Verifying node status")
2203 for node_i in nodeinfo:
2205 nimg = node_image[node]
2209 feedback_fn("* Skipping offline node %s" % (node,))
2213 if node == master_node:
2215 elif node_i.master_candidate:
2216 ntype = "master candidate"
2217 elif node_i.drained:
2223 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2225 msg = all_nvinfo[node].fail_msg
2226 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2228 nimg.rpc_fail = True
2231 nresult = all_nvinfo[node].payload
2233 nimg.call_ok = self._VerifyNode(node_i, nresult)
2234 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2235 self._VerifyNodeNetwork(node_i, nresult)
2236 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2240 self._VerifyNodeLVM(node_i, nresult, vg_name)
2241 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2244 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2245 self._UpdateNodeInstances(node_i, nresult, nimg)
2246 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2247 self._UpdateNodeOS(node_i, nresult, nimg)
2248 if not nimg.os_fail:
2249 if refos_img is None:
2251 self._VerifyNodeOS(node_i, nimg, refos_img)
2253 feedback_fn("* Verifying instance status")
2254 for instance in instancelist:
2256 feedback_fn("* Verifying instance %s" % instance)
2257 inst_config = instanceinfo[instance]
2258 self._VerifyInstance(instance, inst_config, node_image,
2260 inst_nodes_offline = []
2262 pnode = inst_config.primary_node
2263 pnode_img = node_image[pnode]
2264 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2265 self.ENODERPC, pnode, "instance %s, connection to"
2266 " primary node failed", instance)
2268 if pnode_img.offline:
2269 inst_nodes_offline.append(pnode)
2271 # If the instance is non-redundant we cannot survive losing its primary
2272 # node, so we are not N+1 compliant. On the other hand we have no disk
2273 # templates with more than one secondary so that situation is not well
2275 # FIXME: does not support file-backed instances
2276 if not inst_config.secondary_nodes:
2277 i_non_redundant.append(instance)
2278 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2279 instance, "instance has multiple secondary nodes: %s",
2280 utils.CommaJoin(inst_config.secondary_nodes),
2281 code=self.ETYPE_WARNING)
2283 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2284 i_non_a_balanced.append(instance)
2286 for snode in inst_config.secondary_nodes:
2287 s_img = node_image[snode]
2288 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2289 "instance %s, connection to secondary node failed", instance)
2292 inst_nodes_offline.append(snode)
2294 # warn that the instance lives on offline nodes
2295 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2296 "instance lives on offline node(s) %s",
2297 utils.CommaJoin(inst_nodes_offline))
2298 # ... or ghost/non-vm_capable nodes
2299 for node in inst_config.all_nodes:
2300 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2301 "instance lives on ghost node %s", node)
2302 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2303 instance, "instance lives on non-vm_capable node %s", node)
2305 feedback_fn("* Verifying orphan volumes")
2306 reserved = utils.FieldSet(*cluster.reserved_lvs)
2307 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2309 feedback_fn("* Verifying orphan instances")
2310 self._VerifyOrphanInstances(instancelist, node_image)
2312 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2313 feedback_fn("* Verifying N+1 Memory redundancy")
2314 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2316 feedback_fn("* Other Notes")
2318 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2319 % len(i_non_redundant))
2321 if i_non_a_balanced:
2322 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2323 % len(i_non_a_balanced))
2326 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2329 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2333 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2334 """Analyze the post-hooks' result
2336 This method analyses the hook result, handles it, and sends some
2337 nicely-formatted feedback back to the user.
2339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2341 @param hooks_results: the results of the multi-node hooks rpc call
2342 @param feedback_fn: function used send feedback back to the caller
2343 @param lu_result: previous Exec result
2344 @return: the new Exec result, based on the previous result
2348 # We only really run POST phase hooks, and are only interested in
2350 if phase == constants.HOOKS_PHASE_POST:
2351 # Used to change hooks' output to proper indentation
2352 feedback_fn("* Hooks Results")
2353 assert hooks_results, "invalid result from hooks"
2355 for node_name in hooks_results:
2356 res = hooks_results[node_name]
2358 test = msg and not res.offline
2359 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2360 "Communication failure in hooks execution: %s", msg)
2361 if res.offline or msg:
2362 # No need to investigate payload if node is offline or gave an error.
2363 # override manually lu_result here as _ErrorIf only
2364 # overrides self.bad
2367 for script, hkr, output in res.payload:
2368 test = hkr == constants.HKR_FAIL
2369 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2370 "Script %s failed, output:", script)
2372 output = self._HOOKS_INDENT_RE.sub(' ', output)
2373 feedback_fn("%s" % output)
2379 class LUVerifyDisks(NoHooksLU):
2380 """Verifies the cluster disks status.
2385 def ExpandNames(self):
2386 self.needed_locks = {
2387 locking.LEVEL_NODE: locking.ALL_SET,
2388 locking.LEVEL_INSTANCE: locking.ALL_SET,
2390 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2392 def Exec(self, feedback_fn):
2393 """Verify integrity of cluster disks.
2395 @rtype: tuple of three items
2396 @return: a tuple of (dict of node-to-node_error, list of instances
2397 which need activate-disks, dict of instance: (node, volume) for
2401 result = res_nodes, res_instances, res_missing = {}, [], {}
2403 nodes = utils.NiceSort(self.cfg.GetNodeList())
2404 instances = [self.cfg.GetInstanceInfo(name)
2405 for name in self.cfg.GetInstanceList()]
2408 for inst in instances:
2410 if (not inst.admin_up or
2411 inst.disk_template not in constants.DTS_NET_MIRROR):
2413 inst.MapLVsByNode(inst_lvs)
2414 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2415 for node, vol_list in inst_lvs.iteritems():
2416 for vol in vol_list:
2417 nv_dict[(node, vol)] = inst
2422 vg_names = self.rpc.call_vg_list(nodes)
2423 vg_names.Raise("Cannot get list of VGs")
2427 node_res = self.rpc.call_lv_list([node],
2428 vg_names[node].payload.keys())[node]
2429 if node_res.offline:
2431 msg = node_res.fail_msg
2433 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2434 res_nodes[node] = msg
2437 lvs = node_res.payload
2438 for lv_name, (_, _, lv_online) in lvs.items():
2439 inst = nv_dict.pop((node, lv_name), None)
2440 if (not lv_online and inst is not None
2441 and inst.name not in res_instances):
2442 res_instances.append(inst.name)
2444 # any leftover items in nv_dict are missing LVs, let's arrange the
2446 for key, inst in nv_dict.iteritems():
2447 if inst.name not in res_missing:
2448 res_missing[inst.name] = []
2449 res_missing[inst.name].append(key)
2454 class LURepairDiskSizes(NoHooksLU):
2455 """Verifies the cluster disks sizes.
2458 _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2461 def ExpandNames(self):
2462 if self.op.instances:
2463 self.wanted_names = []
2464 for name in self.op.instances:
2465 full_name = _ExpandInstanceName(self.cfg, name)
2466 self.wanted_names.append(full_name)
2467 self.needed_locks = {
2468 locking.LEVEL_NODE: [],
2469 locking.LEVEL_INSTANCE: self.wanted_names,
2471 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2473 self.wanted_names = None
2474 self.needed_locks = {
2475 locking.LEVEL_NODE: locking.ALL_SET,
2476 locking.LEVEL_INSTANCE: locking.ALL_SET,
2478 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2480 def DeclareLocks(self, level):
2481 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2482 self._LockInstancesNodes(primary_only=True)
2484 def CheckPrereq(self):
2485 """Check prerequisites.
2487 This only checks the optional instance list against the existing names.
2490 if self.wanted_names is None:
2491 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2493 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2494 in self.wanted_names]
2496 def _EnsureChildSizes(self, disk):
2497 """Ensure children of the disk have the needed disk size.
2499 This is valid mainly for DRBD8 and fixes an issue where the
2500 children have smaller disk size.
2502 @param disk: an L{ganeti.objects.Disk} object
2505 if disk.dev_type == constants.LD_DRBD8:
2506 assert disk.children, "Empty children for DRBD8?"
2507 fchild = disk.children[0]
2508 mismatch = fchild.size < disk.size
2510 self.LogInfo("Child disk has size %d, parent %d, fixing",
2511 fchild.size, disk.size)
2512 fchild.size = disk.size
2514 # and we recurse on this child only, not on the metadev
2515 return self._EnsureChildSizes(fchild) or mismatch
2519 def Exec(self, feedback_fn):
2520 """Verify the size of cluster disks.
2523 # TODO: check child disks too
2524 # TODO: check differences in size between primary/secondary nodes
2526 for instance in self.wanted_instances:
2527 pnode = instance.primary_node
2528 if pnode not in per_node_disks:
2529 per_node_disks[pnode] = []
2530 for idx, disk in enumerate(instance.disks):
2531 per_node_disks[pnode].append((instance, idx, disk))
2534 for node, dskl in per_node_disks.items():
2535 newl = [v[2].Copy() for v in dskl]
2537 self.cfg.SetDiskID(dsk, node)
2538 result = self.rpc.call_blockdev_getsizes(node, newl)
2540 self.LogWarning("Failure in blockdev_getsizes call to node"
2541 " %s, ignoring", node)
2543 if len(result.data) != len(dskl):
2544 self.LogWarning("Invalid result from node %s, ignoring node results",
2547 for ((instance, idx, disk), size) in zip(dskl, result.data):
2549 self.LogWarning("Disk %d of instance %s did not return size"
2550 " information, ignoring", idx, instance.name)
2552 if not isinstance(size, (int, long)):
2553 self.LogWarning("Disk %d of instance %s did not return valid"
2554 " size information, ignoring", idx, instance.name)
2557 if size != disk.size:
2558 self.LogInfo("Disk %d of instance %s has mismatched size,"
2559 " correcting: recorded %d, actual %d", idx,
2560 instance.name, disk.size, size)
2562 self.cfg.Update(instance, feedback_fn)
2563 changed.append((instance.name, idx, size))
2564 if self._EnsureChildSizes(disk):
2565 self.cfg.Update(instance, feedback_fn)
2566 changed.append((instance.name, idx, disk.size))
2570 class LURenameCluster(LogicalUnit):
2571 """Rename the cluster.
2574 HPATH = "cluster-rename"
2575 HTYPE = constants.HTYPE_CLUSTER
2576 _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2578 def BuildHooksEnv(self):
2583 "OP_TARGET": self.cfg.GetClusterName(),
2584 "NEW_NAME": self.op.name,
2586 mn = self.cfg.GetMasterNode()
2587 all_nodes = self.cfg.GetNodeList()
2588 return env, [mn], all_nodes
2590 def CheckPrereq(self):
2591 """Verify that the passed name is a valid one.
2594 hostname = netutils.GetHostname(name=self.op.name,
2595 family=self.cfg.GetPrimaryIPFamily())
2597 new_name = hostname.name
2598 self.ip = new_ip = hostname.ip
2599 old_name = self.cfg.GetClusterName()
2600 old_ip = self.cfg.GetMasterIP()
2601 if new_name == old_name and new_ip == old_ip:
2602 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2603 " cluster has changed",
2605 if new_ip != old_ip:
2606 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2607 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2608 " reachable on the network" %
2609 new_ip, errors.ECODE_NOTUNIQUE)
2611 self.op.name = new_name
2613 def Exec(self, feedback_fn):
2614 """Rename the cluster.
2617 clustername = self.op.name
2620 # shutdown the master IP
2621 master = self.cfg.GetMasterNode()
2622 result = self.rpc.call_node_stop_master(master, False)
2623 result.Raise("Could not disable the master role")
2626 cluster = self.cfg.GetClusterInfo()
2627 cluster.cluster_name = clustername
2628 cluster.master_ip = ip
2629 self.cfg.Update(cluster, feedback_fn)
2631 # update the known hosts file
2632 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2633 node_list = self.cfg.GetOnlineNodeList()
2635 node_list.remove(master)
2638 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2640 result = self.rpc.call_node_start_master(master, False, False)
2641 msg = result.fail_msg
2643 self.LogWarning("Could not re-enable the master role on"
2644 " the master, please restart manually: %s", msg)
2649 class LUSetClusterParams(LogicalUnit):
2650 """Change the parameters of the cluster.
2653 HPATH = "cluster-modify"
2654 HTYPE = constants.HTYPE_CLUSTER
2656 ("vg_name", None, ht.TMaybeString),
2657 ("enabled_hypervisors", None,
2658 ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2660 ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2662 ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2663 ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2665 ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2667 ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2668 ("uid_pool", None, ht.NoType),
2669 ("add_uids", None, ht.NoType),
2670 ("remove_uids", None, ht.NoType),
2671 ("maintain_node_health", None, ht.TMaybeBool),
2672 ("prealloc_wipe_disks", None, ht.TMaybeBool),
2673 ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2674 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2675 ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2676 ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2677 ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2678 ("hidden_os", None, ht.TOr(ht.TListOf(\
2681 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2683 ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2686 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2691 def CheckArguments(self):
2695 if self.op.uid_pool:
2696 uidpool.CheckUidPool(self.op.uid_pool)
2698 if self.op.add_uids:
2699 uidpool.CheckUidPool(self.op.add_uids)
2701 if self.op.remove_uids:
2702 uidpool.CheckUidPool(self.op.remove_uids)
2704 def ExpandNames(self):
2705 # FIXME: in the future maybe other cluster params won't require checking on
2706 # all nodes to be modified.
2707 self.needed_locks = {
2708 locking.LEVEL_NODE: locking.ALL_SET,
2710 self.share_locks[locking.LEVEL_NODE] = 1
2712 def BuildHooksEnv(self):
2717 "OP_TARGET": self.cfg.GetClusterName(),
2718 "NEW_VG_NAME": self.op.vg_name,
2720 mn = self.cfg.GetMasterNode()
2721 return env, [mn], [mn]
2723 def CheckPrereq(self):
2724 """Check prerequisites.
2726 This checks whether the given params don't conflict and
2727 if the given volume group is valid.
2730 if self.op.vg_name is not None and not self.op.vg_name:
2731 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2732 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2733 " instances exist", errors.ECODE_INVAL)
2735 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2736 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2737 raise errors.OpPrereqError("Cannot disable drbd helper while"
2738 " drbd-based instances exist",
2741 node_list = self.acquired_locks[locking.LEVEL_NODE]
2743 # if vg_name not None, checks given volume group on all nodes
2745 vglist = self.rpc.call_vg_list(node_list)
2746 for node in node_list:
2747 msg = vglist[node].fail_msg
2749 # ignoring down node
2750 self.LogWarning("Error while gathering data on node %s"
2751 " (ignoring node): %s", node, msg)
2753 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2755 constants.MIN_VG_SIZE)
2757 raise errors.OpPrereqError("Error on node '%s': %s" %
2758 (node, vgstatus), errors.ECODE_ENVIRON)
2760 if self.op.drbd_helper:
2761 # checks given drbd helper on all nodes
2762 helpers = self.rpc.call_drbd_helper(node_list)
2763 for node in node_list:
2764 ninfo = self.cfg.GetNodeInfo(node)
2766 self.LogInfo("Not checking drbd helper on offline node %s", node)
2768 msg = helpers[node].fail_msg
2770 raise errors.OpPrereqError("Error checking drbd helper on node"
2771 " '%s': %s" % (node, msg),
2772 errors.ECODE_ENVIRON)
2773 node_helper = helpers[node].payload
2774 if node_helper != self.op.drbd_helper:
2775 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2776 (node, node_helper), errors.ECODE_ENVIRON)
2778 self.cluster = cluster = self.cfg.GetClusterInfo()
2779 # validate params changes
2780 if self.op.beparams:
2781 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2782 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2784 if self.op.ndparams:
2785 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2786 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2788 if self.op.nicparams:
2789 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2790 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2791 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2794 # check all instances for consistency
2795 for instance in self.cfg.GetAllInstancesInfo().values():
2796 for nic_idx, nic in enumerate(instance.nics):
2797 params_copy = copy.deepcopy(nic.nicparams)
2798 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2800 # check parameter syntax
2802 objects.NIC.CheckParameterSyntax(params_filled)
2803 except errors.ConfigurationError, err:
2804 nic_errors.append("Instance %s, nic/%d: %s" %
2805 (instance.name, nic_idx, err))
2807 # if we're moving instances to routed, check that they have an ip
2808 target_mode = params_filled[constants.NIC_MODE]
2809 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2810 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2811 (instance.name, nic_idx))
2813 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2814 "\n".join(nic_errors))
2816 # hypervisor list/parameters
2817 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2818 if self.op.hvparams:
2819 for hv_name, hv_dict in self.op.hvparams.items():
2820 if hv_name not in self.new_hvparams:
2821 self.new_hvparams[hv_name] = hv_dict
2823 self.new_hvparams[hv_name].update(hv_dict)
2825 # os hypervisor parameters
2826 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2828 for os_name, hvs in self.op.os_hvp.items():
2829 if os_name not in self.new_os_hvp:
2830 self.new_os_hvp[os_name] = hvs
2832 for hv_name, hv_dict in hvs.items():
2833 if hv_name not in self.new_os_hvp[os_name]:
2834 self.new_os_hvp[os_name][hv_name] = hv_dict
2836 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2839 self.new_osp = objects.FillDict(cluster.osparams, {})
2840 if self.op.osparams:
2841 for os_name, osp in self.op.osparams.items():
2842 if os_name not in self.new_osp:
2843 self.new_osp[os_name] = {}
2845 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2848 if not self.new_osp[os_name]:
2849 # we removed all parameters
2850 del self.new_osp[os_name]
2852 # check the parameter validity (remote check)
2853 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2854 os_name, self.new_osp[os_name])
2856 # changes to the hypervisor list
2857 if self.op.enabled_hypervisors is not None:
2858 self.hv_list = self.op.enabled_hypervisors
2859 for hv in self.hv_list:
2860 # if the hypervisor doesn't already exist in the cluster
2861 # hvparams, we initialize it to empty, and then (in both
2862 # cases) we make sure to fill the defaults, as we might not
2863 # have a complete defaults list if the hypervisor wasn't
2865 if hv not in new_hvp:
2867 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2868 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2870 self.hv_list = cluster.enabled_hypervisors
2872 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2873 # either the enabled list has changed, or the parameters have, validate
2874 for hv_name, hv_params in self.new_hvparams.items():
2875 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2876 (self.op.enabled_hypervisors and
2877 hv_name in self.op.enabled_hypervisors)):
2878 # either this is a new hypervisor, or its parameters have changed
2879 hv_class = hypervisor.GetHypervisor(hv_name)
2880 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2881 hv_class.CheckParameterSyntax(hv_params)
2882 _CheckHVParams(self, node_list, hv_name, hv_params)
2885 # no need to check any newly-enabled hypervisors, since the
2886 # defaults have already been checked in the above code-block
2887 for os_name, os_hvp in self.new_os_hvp.items():
2888 for hv_name, hv_params in os_hvp.items():
2889 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2890 # we need to fill in the new os_hvp on top of the actual hv_p
2891 cluster_defaults = self.new_hvparams.get(hv_name, {})
2892 new_osp = objects.FillDict(cluster_defaults, hv_params)
2893 hv_class = hypervisor.GetHypervisor(hv_name)
2894 hv_class.CheckParameterSyntax(new_osp)
2895 _CheckHVParams(self, node_list, hv_name, new_osp)
2897 if self.op.default_iallocator:
2898 alloc_script = utils.FindFile(self.op.default_iallocator,
2899 constants.IALLOCATOR_SEARCH_PATH,
2901 if alloc_script is None:
2902 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2903 " specified" % self.op.default_iallocator,
2906 def Exec(self, feedback_fn):
2907 """Change the parameters of the cluster.
2910 if self.op.vg_name is not None:
2911 new_volume = self.op.vg_name
2914 if new_volume != self.cfg.GetVGName():
2915 self.cfg.SetVGName(new_volume)
2917 feedback_fn("Cluster LVM configuration already in desired"
2918 " state, not changing")
2919 if self.op.drbd_helper is not None:
2920 new_helper = self.op.drbd_helper
2923 if new_helper != self.cfg.GetDRBDHelper():
2924 self.cfg.SetDRBDHelper(new_helper)
2926 feedback_fn("Cluster DRBD helper already in desired state,"
2928 if self.op.hvparams:
2929 self.cluster.hvparams = self.new_hvparams
2931 self.cluster.os_hvp = self.new_os_hvp
2932 if self.op.enabled_hypervisors is not None:
2933 self.cluster.hvparams = self.new_hvparams
2934 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2935 if self.op.beparams:
2936 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2937 if self.op.nicparams:
2938 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2939 if self.op.osparams:
2940 self.cluster.osparams = self.new_osp
2941 if self.op.ndparams:
2942 self.cluster.ndparams = self.new_ndparams
2944 if self.op.candidate_pool_size is not None:
2945 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2946 # we need to update the pool size here, otherwise the save will fail
2947 _AdjustCandidatePool(self, [])
2949 if self.op.maintain_node_health is not None:
2950 self.cluster.maintain_node_health = self.op.maintain_node_health
2952 if self.op.prealloc_wipe_disks is not None:
2953 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2955 if self.op.add_uids is not None:
2956 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2958 if self.op.remove_uids is not None:
2959 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2961 if self.op.uid_pool is not None:
2962 self.cluster.uid_pool = self.op.uid_pool
2964 if self.op.default_iallocator is not None:
2965 self.cluster.default_iallocator = self.op.default_iallocator
2967 if self.op.reserved_lvs is not None:
2968 self.cluster.reserved_lvs = self.op.reserved_lvs
2970 def helper_os(aname, mods, desc):
2972 lst = getattr(self.cluster, aname)
2973 for key, val in mods:
2974 if key == constants.DDM_ADD:
2976 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2979 elif key == constants.DDM_REMOVE:
2983 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2985 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2987 if self.op.hidden_os:
2988 helper_os("hidden_os", self.op.hidden_os, "hidden")
2990 if self.op.blacklisted_os:
2991 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2993 self.cfg.Update(self.cluster, feedback_fn)
2996 def _UploadHelper(lu, nodes, fname):
2997 """Helper for uploading a file and showing warnings.
3000 if os.path.exists(fname):
3001 result = lu.rpc.call_upload_file(nodes, fname)
3002 for to_node, to_result in result.items():
3003 msg = to_result.fail_msg
3005 msg = ("Copy of file %s to node %s failed: %s" %
3006 (fname, to_node, msg))
3007 lu.proc.LogWarning(msg)
3010 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3011 """Distribute additional files which are part of the cluster configuration.
3013 ConfigWriter takes care of distributing the config and ssconf files, but
3014 there are more files which should be distributed to all nodes. This function
3015 makes sure those are copied.
3017 @param lu: calling logical unit
3018 @param additional_nodes: list of nodes not in the config to distribute to
3019 @type additional_vm: boolean
3020 @param additional_vm: whether the additional nodes are vm-capable or not
3023 # 1. Gather target nodes
3024 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3025 dist_nodes = lu.cfg.GetOnlineNodeList()
3026 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3027 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3028 if additional_nodes is not None:
3029 dist_nodes.extend(additional_nodes)
3031 vm_nodes.extend(additional_nodes)
3032 if myself.name in dist_nodes:
3033 dist_nodes.remove(myself.name)
3034 if myself.name in vm_nodes:
3035 vm_nodes.remove(myself.name)
3037 # 2. Gather files to distribute
3038 dist_files = set([constants.ETC_HOSTS,
3039 constants.SSH_KNOWN_HOSTS_FILE,
3040 constants.RAPI_CERT_FILE,
3041 constants.RAPI_USERS_FILE,
3042 constants.CONFD_HMAC_KEY,
3043 constants.CLUSTER_DOMAIN_SECRET_FILE,
3047 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3048 for hv_name in enabled_hypervisors:
3049 hv_class = hypervisor.GetHypervisor(hv_name)
3050 vm_files.update(hv_class.GetAncillaryFiles())
3052 # 3. Perform the files upload
3053 for fname in dist_files:
3054 _UploadHelper(lu, dist_nodes, fname)
3055 for fname in vm_files:
3056 _UploadHelper(lu, vm_nodes, fname)
3059 class LURedistributeConfig(NoHooksLU):
3060 """Force the redistribution of cluster configuration.
3062 This is a very simple LU.
3067 def ExpandNames(self):
3068 self.needed_locks = {
3069 locking.LEVEL_NODE: locking.ALL_SET,
3071 self.share_locks[locking.LEVEL_NODE] = 1
3073 def Exec(self, feedback_fn):
3074 """Redistribute the configuration.
3077 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3078 _RedistributeAncillaryFiles(self)
3081 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3082 """Sleep and poll for an instance's disk to sync.
3085 if not instance.disks or disks is not None and not disks:
3088 disks = _ExpandCheckDisks(instance, disks)
3091 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3093 node = instance.primary_node
3096 lu.cfg.SetDiskID(dev, node)
3098 # TODO: Convert to utils.Retry
3101 degr_retries = 10 # in seconds, as we sleep 1 second each time
3105 cumul_degraded = False
3106 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3107 msg = rstats.fail_msg
3109 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3112 raise errors.RemoteError("Can't contact node %s for mirror data,"
3113 " aborting." % node)
3116 rstats = rstats.payload
3118 for i, mstat in enumerate(rstats):
3120 lu.LogWarning("Can't compute data for node %s/%s",
3121 node, disks[i].iv_name)
3124 cumul_degraded = (cumul_degraded or
3125 (mstat.is_degraded and mstat.sync_percent is None))
3126 if mstat.sync_percent is not None:
3128 if mstat.estimated_time is not None:
3129 rem_time = ("%s remaining (estimated)" %
3130 utils.FormatSeconds(mstat.estimated_time))
3131 max_time = mstat.estimated_time
3133 rem_time = "no time estimate"
3134 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3135 (disks[i].iv_name, mstat.sync_percent, rem_time))
3137 # if we're done but degraded, let's do a few small retries, to
3138 # make sure we see a stable and not transient situation; therefore
3139 # we force restart of the loop
3140 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3141 logging.info("Degraded disks found, %d retries left", degr_retries)
3149 time.sleep(min(60, max_time))
3152 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3153 return not cumul_degraded
3156 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3157 """Check that mirrors are not degraded.
3159 The ldisk parameter, if True, will change the test from the
3160 is_degraded attribute (which represents overall non-ok status for
3161 the device(s)) to the ldisk (representing the local storage status).
3164 lu.cfg.SetDiskID(dev, node)
3168 if on_primary or dev.AssembleOnSecondary():
3169 rstats = lu.rpc.call_blockdev_find(node, dev)
3170 msg = rstats.fail_msg
3172 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3174 elif not rstats.payload:
3175 lu.LogWarning("Can't find disk on node %s", node)
3179 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3181 result = result and not rstats.payload.is_degraded
3184 for child in dev.children:
3185 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3190 class LUOutOfBand(NoHooksLU):
3191 """Logical unit for OOB handling.
3196 ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3197 ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3201 def CheckPrereq(self):
3202 """Check prerequisites.
3205 - the node exists in the configuration
3208 Any errors are signaled by raising errors.OpPrereqError.
3211 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3212 node = self.cfg.GetNodeInfo(self.op.node_name)
3215 raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3217 self.oob_program = self.cfg.GetOobProgram(node)
3219 if not self.oob_program:
3220 raise errors.OpPrereqError("OOB is not supported for node %s" %
3223 self.op.node_name = node.name
3226 def ExpandNames(self):
3227 """Gather locks we need.
3230 self.needed_locks = {
3231 locking.LEVEL_NODE: [self.op.node_name],
3234 def Exec(self, feedback_fn):
3235 """Execute OOB and return result if we expect any.
3238 master_node = self.cfg.GetMasterNode()
3240 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3241 self.op.command, self.oob_program, self.op.node_name)
3242 result = self.rpc.call_run_oob(master_node, self.oob_program,
3243 self.op.command, self.op.node_name,
3246 result.Raise("An error occurred on execution of OOB helper")
3248 if self.op.command == constants.OOB_HEALTH:
3249 # For health we should log important events
3250 for item, status in result.payload:
3251 if status in [constants.OOB_STATUS_WARNING,
3252 constants.OOB_STATUS_CRITICAL]:
3253 logging.warning("On node '%s' item '%s' has status '%s'",
3254 self.op.node_name, item, status)
3256 return result.payload
3259 class LUDiagnoseOS(NoHooksLU):
3260 """Logical unit for OS diagnose/query.
3265 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3269 _BLK = "blacklisted"
3271 _FIELDS_STATIC = utils.FieldSet()
3272 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3273 "parameters", "api_versions", _HID, _BLK)
3275 def CheckArguments(self):
3277 raise errors.OpPrereqError("Selective OS query not supported",
3280 _CheckOutputFields(static=self._FIELDS_STATIC,
3281 dynamic=self._FIELDS_DYNAMIC,
3282 selected=self.op.output_fields)
3284 def ExpandNames(self):
3285 # Lock all nodes, in shared mode
3286 # Temporary removal of locks, should be reverted later
3287 # TODO: reintroduce locks when they are lighter-weight
3288 self.needed_locks = {}
3289 #self.share_locks[locking.LEVEL_NODE] = 1
3290 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3293 def _DiagnoseByOS(rlist):
3294 """Remaps a per-node return list into an a per-os per-node dictionary
3296 @param rlist: a map with node names as keys and OS objects as values
3299 @return: a dictionary with osnames as keys and as value another
3300 map, with nodes as keys and tuples of (path, status, diagnose,
3301 variants, parameters, api_versions) as values, eg::
3303 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3304 (/srv/..., False, "invalid api")],
3305 "node2": [(/srv/..., True, "", [], [])]}
3310 # we build here the list of nodes that didn't fail the RPC (at RPC
3311 # level), so that nodes with a non-responding node daemon don't
3312 # make all OSes invalid
3313 good_nodes = [node_name for node_name in rlist
3314 if not rlist[node_name].fail_msg]
3315 for node_name, nr in rlist.items():
3316 if nr.fail_msg or not nr.payload:
3318 for (name, path, status, diagnose, variants,
3319 params, api_versions) in nr.payload:
3320 if name not in all_os:
3321 # build a list of nodes for this os containing empty lists
3322 # for each node in node_list
3324 for nname in good_nodes:
3325 all_os[name][nname] = []
3326 # convert params from [name, help] to (name, help)
3327 params = [tuple(v) for v in params]
3328 all_os[name][node_name].append((path, status, diagnose,
3329 variants, params, api_versions))
3332 def Exec(self, feedback_fn):
3333 """Compute the list of OSes.
3336 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3337 node_data = self.rpc.call_os_diagnose(valid_nodes)
3338 pol = self._DiagnoseByOS(node_data)
3340 cluster = self.cfg.GetClusterInfo()
3342 for os_name in utils.NiceSort(pol.keys()):
3343 os_data = pol[os_name]
3346 (variants, params, api_versions) = null_state = (set(), set(), set())
3347 for idx, osl in enumerate(os_data.values()):
3348 valid = bool(valid and osl and osl[0][1])
3350 (variants, params, api_versions) = null_state
3352 node_variants, node_params, node_api = osl[0][3:6]
3353 if idx == 0: # first entry
3354 variants = set(node_variants)
3355 params = set(node_params)
3356 api_versions = set(node_api)
3357 else: # keep consistency
3358 variants.intersection_update(node_variants)
3359 params.intersection_update(node_params)
3360 api_versions.intersection_update(node_api)
3362 is_hid = os_name in cluster.hidden_os
3363 is_blk = os_name in cluster.blacklisted_os
3364 if ((self._HID not in self.op.output_fields and is_hid) or
3365 (self._BLK not in self.op.output_fields and is_blk) or
3366 (self._VLD not in self.op.output_fields and not valid)):
3369 for field in self.op.output_fields:
3372 elif field == self._VLD:
3374 elif field == "node_status":
3375 # this is just a copy of the dict
3377 for node_name, nos_list in os_data.items():
3378 val[node_name] = nos_list
3379 elif field == "variants":
3380 val = utils.NiceSort(list(variants))
3381 elif field == "parameters":
3383 elif field == "api_versions":
3384 val = list(api_versions)
3385 elif field == self._HID:
3387 elif field == self._BLK:
3390 raise errors.ParameterError(field)
3397 class LURemoveNode(LogicalUnit):
3398 """Logical unit for removing a node.
3401 HPATH = "node-remove"
3402 HTYPE = constants.HTYPE_NODE
3407 def BuildHooksEnv(self):
3410 This doesn't run on the target node in the pre phase as a failed
3411 node would then be impossible to remove.
3415 "OP_TARGET": self.op.node_name,
3416 "NODE_NAME": self.op.node_name,
3418 all_nodes = self.cfg.GetNodeList()
3420 all_nodes.remove(self.op.node_name)
3422 logging.warning("Node %s which is about to be removed not found"
3423 " in the all nodes list", self.op.node_name)
3424 return env, all_nodes, all_nodes
3426 def CheckPrereq(self):
3427 """Check prerequisites.
3430 - the node exists in the configuration
3431 - it does not have primary or secondary instances
3432 - it's not the master
3434 Any errors are signaled by raising errors.OpPrereqError.
3437 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3438 node = self.cfg.GetNodeInfo(self.op.node_name)
3439 assert node is not None
3441 instance_list = self.cfg.GetInstanceList()
3443 masternode = self.cfg.GetMasterNode()
3444 if node.name == masternode:
3445 raise errors.OpPrereqError("Node is the master node,"
3446 " you need to failover first.",
3449 for instance_name in instance_list:
3450 instance = self.cfg.GetInstanceInfo(instance_name)
3451 if node.name in instance.all_nodes:
3452 raise errors.OpPrereqError("Instance %s is still running on the node,"
3453 " please remove first." % instance_name,
3455 self.op.node_name = node.name
3458 def Exec(self, feedback_fn):
3459 """Removes the node from the cluster.
3463 logging.info("Stopping the node daemon and removing configs from node %s",
3466 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3468 # Promote nodes to master candidate as needed
3469 _AdjustCandidatePool(self, exceptions=[node.name])
3470 self.context.RemoveNode(node.name)
3472 # Run post hooks on the node before it's removed
3473 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3475 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3477 # pylint: disable-msg=W0702
3478 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3480 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3481 msg = result.fail_msg
3483 self.LogWarning("Errors encountered on the remote node while leaving"
3484 " the cluster: %s", msg)
3486 # Remove node from our /etc/hosts
3487 if self.cfg.GetClusterInfo().modify_etc_hosts:
3488 master_node = self.cfg.GetMasterNode()
3489 result = self.rpc.call_etc_hosts_modify(master_node,
3490 constants.ETC_HOSTS_REMOVE,
3492 result.Raise("Can't update hosts file with new host data")
3493 _RedistributeAncillaryFiles(self)
3496 class _NodeQuery(_QueryBase):
3497 FIELDS = query.NODE_FIELDS
3499 def ExpandNames(self, lu):
3500 lu.needed_locks = {}
3501 lu.share_locks[locking.LEVEL_NODE] = 1
3504 self.wanted = _GetWantedNodes(lu, self.names)
3506 self.wanted = locking.ALL_SET
3508 self.do_locking = (self.use_locking and
3509 query.NQ_LIVE in self.requested_data)
3512 # if we don't request only static fields, we need to lock the nodes
3513 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3515 def DeclareLocks(self, _):
3518 def _GetQueryData(self, lu):
3519 """Computes the list of nodes and their attributes.
3522 all_info = lu.cfg.GetAllNodesInfo()
3525 nodenames = lu.acquired_locks[locking.LEVEL_NODE]
3526 elif self.wanted != locking.ALL_SET:
3527 nodenames = self.wanted
3528 missing = set(nodenames).difference(all_info.keys())
3530 raise errors.OpExecError("Some nodes were removed before retrieving"
3531 " their data: %s" % missing)
3533 nodenames = all_info.keys()
3535 nodenames = utils.NiceSort(nodenames)
3537 # Gather data as requested
3538 if query.NQ_LIVE in self.requested_data:
3539 node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3540 lu.cfg.GetHypervisorType())
3541 live_data = dict((name, nresult.payload)
3542 for (name, nresult) in node_data.items()
3543 if not nresult.fail_msg and nresult.payload)
3547 if query.NQ_INST in self.requested_data:
3548 node_to_primary = dict([(name, set()) for name in nodenames])
3549 node_to_secondary = dict([(name, set()) for name in nodenames])
3551 inst_data = lu.cfg.GetAllInstancesInfo()
3553 for inst in inst_data.values():
3554 if inst.primary_node in node_to_primary:
3555 node_to_primary[inst.primary_node].add(inst.name)
3556 for secnode in inst.secondary_nodes:
3557 if secnode in node_to_secondary:
3558 node_to_secondary[secnode].add(inst.name)
3560 node_to_primary = None
3561 node_to_secondary = None
3563 if query.NQ_GROUP in self.requested_data:
3564 groups = lu.cfg.GetAllNodeGroupsInfo()
3568 return query.NodeQueryData([all_info[name] for name in nodenames],
3569 live_data, lu.cfg.GetMasterNode(),
3570 node_to_primary, node_to_secondary, groups)
3573 class LUQueryNodes(NoHooksLU):
3574 """Logical unit for querying nodes.
3577 # pylint: disable-msg=W0142
3580 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3581 ("use_locking", False, ht.TBool),
3585 def CheckArguments(self):
3586 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3587 self.op.use_locking)
3589 def ExpandNames(self):
3590 self.nq.ExpandNames(self)
3592 def Exec(self, feedback_fn):
3593 return self.nq.OldStyleQuery(self)
3596 class LUQueryNodeVolumes(NoHooksLU):
3597 """Logical unit for getting volumes on node(s).
3602 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3605 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3606 _FIELDS_STATIC = utils.FieldSet("node")
3608 def CheckArguments(self):
3609 _CheckOutputFields(static=self._FIELDS_STATIC,
3610 dynamic=self._FIELDS_DYNAMIC,
3611 selected=self.op.output_fields)
3613 def ExpandNames(self):
3614 self.needed_locks = {}
3615 self.share_locks[locking.LEVEL_NODE] = 1
3616 if not self.op.nodes:
3617 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3619 self.needed_locks[locking.LEVEL_NODE] = \
3620 _GetWantedNodes(self, self.op.nodes)
3622 def Exec(self, feedback_fn):
3623 """Computes the list of nodes and their attributes.
3626 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3627 volumes = self.rpc.call_node_volumes(nodenames)
3629 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3630 in self.cfg.GetInstanceList()]
3632 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3635 for node in nodenames:
3636 nresult = volumes[node]
3639 msg = nresult.fail_msg
3641 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3644 node_vols = nresult.payload[:]
3645 node_vols.sort(key=lambda vol: vol['dev'])
3647 for vol in node_vols:
3649 for field in self.op.output_fields:
3652 elif field == "phys":
3656 elif field == "name":
3658 elif field == "size":
3659 val = int(float(vol['size']))
3660 elif field == "instance":
3662 if node not in lv_by_node[inst]:
3664 if vol['name'] in lv_by_node[inst][node]:
3670 raise errors.ParameterError(field)
3671 node_output.append(str(val))
3673 output.append(node_output)
3678 class LUQueryNodeStorage(NoHooksLU):
3679 """Logical unit for getting information on storage units on node(s).
3682 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3685 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3686 ("storage_type", ht.NoDefault, _CheckStorageType),
3687 ("name", None, ht.TMaybeString),
3691 def CheckArguments(self):
3692 _CheckOutputFields(static=self._FIELDS_STATIC,
3693 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3694 selected=self.op.output_fields)
3696 def ExpandNames(self):
3697 self.needed_locks = {}
3698 self.share_locks[locking.LEVEL_NODE] = 1
3701 self.needed_locks[locking.LEVEL_NODE] = \
3702 _GetWantedNodes(self, self.op.nodes)
3704 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3706 def Exec(self, feedback_fn):
3707 """Computes the list of nodes and their attributes.
3710 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3712 # Always get name to sort by
3713 if constants.SF_NAME in self.op.output_fields:
3714 fields = self.op.output_fields[:]
3716 fields = [constants.SF_NAME] + self.op.output_fields
3718 # Never ask for node or type as it's only known to the LU
3719 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3720 while extra in fields:
3721 fields.remove(extra)
3723 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3724 name_idx = field_idx[constants.SF_NAME]
3726 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3727 data = self.rpc.call_storage_list(self.nodes,
3728 self.op.storage_type, st_args,
3729 self.op.name, fields)
3733 for node in utils.NiceSort(self.nodes):
3734 nresult = data[node]
3738 msg = nresult.fail_msg
3740 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3743 rows = dict([(row[name_idx], row) for row in nresult.payload])
3745 for name in utils.NiceSort(rows.keys()):
3750 for field in self.op.output_fields:
3751 if field == constants.SF_NODE:
3753 elif field == constants.SF_TYPE:
3754 val = self.op.storage_type
3755 elif field in field_idx:
3756 val = row[field_idx[field]]
3758 raise errors.ParameterError(field)
3767 def _InstanceQuery(*args): # pylint: disable-msg=W0613
3768 """Dummy until instance queries have been converted to query2.
3771 raise NotImplementedError
3774 #: Query type implementations
3776 constants.QR_INSTANCE: _InstanceQuery,
3777 constants.QR_NODE: _NodeQuery,
3781 def _GetQueryImplementation(name):
3782 """Returns the implemtnation for a query type.
3784 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3788 return _QUERY_IMPL[name]
3790 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3794 class LUQuery(NoHooksLU):
3795 """Query for resources/items of a certain kind.
3798 # pylint: disable-msg=W0142
3800 ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3801 ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3802 ("filter", None, ht.TOr(ht.TNone,
3803 ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3807 def CheckArguments(self):
3808 qcls = _GetQueryImplementation(self.op.what)
3809 names = qlang.ReadSimpleFilter("name", self.op.filter)
3811 self.impl = qcls(names, self.op.fields, False)
3813 def ExpandNames(self):
3814 self.impl.ExpandNames(self)
3816 def DeclareLocks(self, level):
3817 self.impl.DeclareLocks(self, level)
3819 def Exec(self, feedback_fn):
3820 return self.impl.NewStyleQuery(self)
3823 class LUQueryFields(NoHooksLU):
3824 """Query for resources/items of a certain kind.
3827 # pylint: disable-msg=W0142
3829 ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3830 ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
3834 def CheckArguments(self):
3835 self.qcls = _GetQueryImplementation(self.op.what)
3837 def ExpandNames(self):
3838 self.needed_locks = {}
3840 def Exec(self, feedback_fn):
3841 return self.qcls.FieldsQuery(self.op.fields)
3844 class LUModifyNodeStorage(NoHooksLU):
3845 """Logical unit for modifying a storage volume on a node.
3850 ("storage_type", ht.NoDefault, _CheckStorageType),
3851 ("name", ht.NoDefault, ht.TNonEmptyString),
3852 ("changes", ht.NoDefault, ht.TDict),
3856 def CheckArguments(self):
3857 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3859 storage_type = self.op.storage_type
3862 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3864 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3865 " modified" % storage_type,
3868 diff = set(self.op.changes.keys()) - modifiable
3870 raise errors.OpPrereqError("The following fields can not be modified for"
3871 " storage units of type '%s': %r" %
3872 (storage_type, list(diff)),
3875 def ExpandNames(self):
3876 self.needed_locks = {
3877 locking.LEVEL_NODE: self.op.node_name,
3880 def Exec(self, feedback_fn):
3881 """Computes the list of nodes and their attributes.
3884 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3885 result = self.rpc.call_storage_modify(self.op.node_name,
3886 self.op.storage_type, st_args,
3887 self.op.name, self.op.changes)
3888 result.Raise("Failed to modify storage unit '%s' on %s" %
3889 (self.op.name, self.op.node_name))
3892 class LUAddNode(LogicalUnit):
3893 """Logical unit for adding node to the cluster.
3897 HTYPE = constants.HTYPE_NODE
3900 ("primary_ip", None, ht.NoType),
3901 ("secondary_ip", None, ht.TMaybeString),
3902 ("readd", False, ht.TBool),
3903 ("group", None, ht.TMaybeString),
3904 ("master_capable", None, ht.TMaybeBool),
3905 ("vm_capable", None, ht.TMaybeBool),
3906 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
3908 _NFLAGS = ["master_capable", "vm_capable"]
3910 def CheckArguments(self):
3911 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3912 # validate/normalize the node name
3913 self.hostname = netutils.GetHostname(name=self.op.node_name,
3914 family=self.primary_ip_family)
3915 self.op.node_name = self.hostname.name
3916 if self.op.readd and self.op.group:
3917 raise errors.OpPrereqError("Cannot pass a node group when a node is"
3918 " being readded", errors.ECODE_INVAL)
3920 def BuildHooksEnv(self):
3923 This will run on all nodes before, and on all nodes + the new node after.
3927 "OP_TARGET": self.op.node_name,
3928 "NODE_NAME": self.op.node_name,
3929 "NODE_PIP": self.op.primary_ip,
3930 "NODE_SIP": self.op.secondary_ip,
3931 "MASTER_CAPABLE": str(self.op.master_capable),
3932 "VM_CAPABLE": str(self.op.vm_capable),
3934 nodes_0 = self.cfg.GetNodeList()
3935 nodes_1 = nodes_0 + [self.op.node_name, ]
3936 return env, nodes_0, nodes_1
3938 def CheckPrereq(self):
3939 """Check prerequisites.
3942 - the new node is not already in the config
3944 - its parameters (single/dual homed) matches the cluster
3946 Any errors are signaled by raising errors.OpPrereqError.
3950 hostname = self.hostname
3951 node = hostname.name
3952 primary_ip = self.op.primary_ip = hostname.ip
3953 if self.op.secondary_ip is None:
3954 if self.primary_ip_family == netutils.IP6Address.family:
3955 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3956 " IPv4 address must be given as secondary",
3958 self.op.secondary_ip = primary_ip
3960 secondary_ip = self.op.secondary_ip
3961 if not netutils.IP4Address.IsValid(secondary_ip):
3962 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3963 " address" % secondary_ip, errors.ECODE_INVAL)
3965 node_list = cfg.GetNodeList()
3966 if not self.op.readd and node in node_list:
3967 raise errors.OpPrereqError("Node %s is already in the configuration" %
3968 node, errors.ECODE_EXISTS)
3969 elif self.op.readd and node not in node_list:
3970 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3973 self.changed_primary_ip = False
3975 for existing_node_name in node_list:
3976 existing_node = cfg.GetNodeInfo(existing_node_name)
3978 if self.op.readd and node == existing_node_name:
3979 if existing_node.secondary_ip != secondary_ip:
3980 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3981 " address configuration as before",
3983 if existing_node.primary_ip != primary_ip:
3984 self.changed_primary_ip = True
3988 if (existing_node.primary_ip == primary_ip or
3989 existing_node.secondary_ip == primary_ip or
3990 existing_node.primary_ip == secondary_ip or
3991 existing_node.secondary_ip == secondary_ip):
3992 raise errors.OpPrereqError("New node ip address(es) conflict with"
3993 " existing node %s" % existing_node.name,
3994 errors.ECODE_NOTUNIQUE)
3996 # After this 'if' block, None is no longer a valid value for the
3997 # _capable op attributes
3999 old_node = self.cfg.GetNodeInfo(node)
4000 assert old_node is not None, "Can't retrieve locked node %s" % node
4001 for attr in self._NFLAGS:
4002 if getattr(self.op, attr) is None:
4003 setattr(self.op, attr, getattr(old_node, attr))
4005 for attr in self._NFLAGS:
4006 if getattr(self.op, attr) is None:
4007 setattr(self.op, attr, True)
4009 if self.op.readd and not self.op.vm_capable:
4010 pri, sec = cfg.GetNodeInstances(node)
4012 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4013 " flag set to false, but it already holds"
4014 " instances" % node,
4017 # check that the type of the node (single versus dual homed) is the
4018 # same as for the master
4019 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4020 master_singlehomed = myself.secondary_ip == myself.primary_ip
4021 newbie_singlehomed = secondary_ip == primary_ip
4022 if master_singlehomed != newbie_singlehomed:
4023 if master_singlehomed:
4024 raise errors.OpPrereqError("The master has no secondary ip but the"
4025 " new node has one",
4028 raise errors.OpPrereqError("The master has a secondary ip but the"
4029 " new node doesn't have one",
4032 # checks reachability
4033 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4034 raise errors.OpPrereqError("Node not reachable by ping",
4035 errors.ECODE_ENVIRON)
4037 if not newbie_singlehomed:
4038 # check reachability from my secondary ip to newbie's secondary ip
4039 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4040 source=myself.secondary_ip):
4041 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4042 " based ping to node daemon port",
4043 errors.ECODE_ENVIRON)
4050 if self.op.master_capable:
4051 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4053 self.master_candidate = False
4056 self.new_node = old_node
4058 node_group = cfg.LookupNodeGroup(self.op.group)
4059 self.new_node = objects.Node(name=node,
4060 primary_ip=primary_ip,
4061 secondary_ip=secondary_ip,
4062 master_candidate=self.master_candidate,
4063 offline=False, drained=False,
4066 if self.op.ndparams:
4067 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4069 def Exec(self, feedback_fn):
4070 """Adds the new node to the cluster.
4073 new_node = self.new_node
4074 node = new_node.name
4076 # for re-adds, reset the offline/drained/master-candidate flags;
4077 # we need to reset here, otherwise offline would prevent RPC calls
4078 # later in the procedure; this also means that if the re-add
4079 # fails, we are left with a non-offlined, broken node
4081 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4082 self.LogInfo("Readding a node, the offline/drained flags were reset")
4083 # if we demote the node, we do cleanup later in the procedure
4084 new_node.master_candidate = self.master_candidate
4085 if self.changed_primary_ip:
4086 new_node.primary_ip = self.op.primary_ip
4088 # copy the master/vm_capable flags
4089 for attr in self._NFLAGS:
4090 setattr(new_node, attr, getattr(self.op, attr))
4092 # notify the user about any possible mc promotion
4093 if new_node.master_candidate:
4094 self.LogInfo("Node will be a master candidate")
4096 if self.op.ndparams:
4097 new_node.ndparams = self.op.ndparams
4099 # check connectivity
4100 result = self.rpc.call_version([node])[node]
4101 result.Raise("Can't get version information from node %s" % node)
4102 if constants.PROTOCOL_VERSION == result.payload:
4103 logging.info("Communication to node %s fine, sw version %s match",
4104 node, result.payload)
4106 raise errors.OpExecError("Version mismatch master version %s,"
4107 " node version %s" %
4108 (constants.PROTOCOL_VERSION, result.payload))
4110 # Add node to our /etc/hosts, and add key to known_hosts
4111 if self.cfg.GetClusterInfo().modify_etc_hosts:
4112 master_node = self.cfg.GetMasterNode()
4113 result = self.rpc.call_etc_hosts_modify(master_node,
4114 constants.ETC_HOSTS_ADD,
4117 result.Raise("Can't update hosts file with new host data")
4119 if new_node.secondary_ip != new_node.primary_ip:
4120 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4123 node_verify_list = [self.cfg.GetMasterNode()]
4124 node_verify_param = {
4125 constants.NV_NODELIST: [node],
4126 # TODO: do a node-net-test as well?
4129 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4130 self.cfg.GetClusterName())
4131 for verifier in node_verify_list:
4132 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4133 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4135 for failed in nl_payload:
4136 feedback_fn("ssh/hostname verification failed"
4137 " (checking from %s): %s" %
4138 (verifier, nl_payload[failed]))
4139 raise errors.OpExecError("ssh/hostname verification failed.")
4142 _RedistributeAncillaryFiles(self)
4143 self.context.ReaddNode(new_node)
4144 # make sure we redistribute the config
4145 self.cfg.Update(new_node, feedback_fn)
4146 # and make sure the new node will not have old files around
4147 if not new_node.master_candidate:
4148 result = self.rpc.call_node_demote_from_mc(new_node.name)
4149 msg = result.fail_msg
4151 self.LogWarning("Node failed to demote itself from master"
4152 " candidate status: %s" % msg)
4154 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4155 additional_vm=self.op.vm_capable)
4156 self.context.AddNode(new_node, self.proc.GetECId())
4159 class LUSetNodeParams(LogicalUnit):
4160 """Modifies the parameters of a node.
4162 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4163 to the node role (as _ROLE_*)
4164 @cvar _R2F: a dictionary from node role to tuples of flags
4165 @cvar _FLAGS: a list of attribute names corresponding to the flags
4168 HPATH = "node-modify"
4169 HTYPE = constants.HTYPE_NODE
4172 ("master_candidate", None, ht.TMaybeBool),
4173 ("offline", None, ht.TMaybeBool),
4174 ("drained", None, ht.TMaybeBool),
4175 ("auto_promote", False, ht.TBool),
4176 ("master_capable", None, ht.TMaybeBool),
4177 ("vm_capable", None, ht.TMaybeBool),
4178 ("secondary_ip", None, ht.TMaybeString),
4179 ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4183 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4185 (True, False, False): _ROLE_CANDIDATE,
4186 (False, True, False): _ROLE_DRAINED,
4187 (False, False, True): _ROLE_OFFLINE,
4188 (False, False, False): _ROLE_REGULAR,
4190 _R2F = dict((v, k) for k, v in _F2R.items())
4191 _FLAGS = ["master_candidate", "drained", "offline"]
4193 def CheckArguments(self):
4194 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4195 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4196 self.op.master_capable, self.op.vm_capable,
4197 self.op.secondary_ip, self.op.ndparams]
4198 if all_mods.count(None) == len(all_mods):
4199 raise errors.OpPrereqError("Please pass at least one modification",
4201 if all_mods.count(True) > 1:
4202 raise errors.OpPrereqError("Can't set the node into more than one"
4203 " state at the same time",
4206 # Boolean value that tells us whether we might be demoting from MC
4207 self.might_demote = (self.op.master_candidate == False or
4208 self.op.offline == True or
4209 self.op.drained == True or
4210 self.op.master_capable == False)
4212 if self.op.secondary_ip:
4213 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4214 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4215 " address" % self.op.secondary_ip,
4218 self.lock_all = self.op.auto_promote and self.might_demote
4219 self.lock_instances = self.op.secondary_ip is not None
4221 def ExpandNames(self):
4223 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4225 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4227 if self.lock_instances:
4228 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4230 def DeclareLocks(self, level):
4231 # If we have locked all instances, before waiting to lock nodes, release
4232 # all the ones living on nodes unrelated to the current operation.
4233 if level == locking.LEVEL_NODE and self.lock_instances:
4234 instances_release = []
4236 self.affected_instances = []
4237 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4238 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4239 instance = self.context.cfg.GetInstanceInfo(instance_name)
4240 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4241 if i_mirrored and self.op.node_name in instance.all_nodes:
4242 instances_keep.append(instance_name)
4243 self.affected_instances.append(instance)
4245 instances_release.append(instance_name)
4246 if instances_release:
4247 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4248 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4250 def BuildHooksEnv(self):
4253 This runs on the master node.
4257 "OP_TARGET": self.op.node_name,
4258 "MASTER_CANDIDATE": str(self.op.master_candidate),
4259 "OFFLINE": str(self.op.offline),
4260 "DRAINED": str(self.op.drained),
4261 "MASTER_CAPABLE": str(self.op.master_capable),
4262 "VM_CAPABLE": str(self.op.vm_capable),
4264 nl = [self.cfg.GetMasterNode(),
4268 def CheckPrereq(self):
4269 """Check prerequisites.
4271 This only checks the instance list against the existing names.
4274 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4276 if (self.op.master_candidate is not None or
4277 self.op.drained is not None or
4278 self.op.offline is not None):
4279 # we can't change the master's node flags
4280 if self.op.node_name == self.cfg.GetMasterNode():
4281 raise errors.OpPrereqError("The master role can be changed"
4282 " only via master-failover",
4285 if self.op.master_candidate and not node.master_capable:
4286 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4287 " it a master candidate" % node.name,
4290 if self.op.vm_capable == False:
4291 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4293 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4294 " the vm_capable flag" % node.name,
4297 if node.master_candidate and self.might_demote and not self.lock_all:
4298 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4299 # check if after removing the current node, we're missing master
4301 (mc_remaining, mc_should, _) = \
4302 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4303 if mc_remaining < mc_should:
4304 raise errors.OpPrereqError("Not enough master candidates, please"
4305 " pass auto_promote to allow promotion",
4308 self.old_flags = old_flags = (node.master_candidate,
4309 node.drained, node.offline)
4310 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4311 self.old_role = old_role = self._F2R[old_flags]
4313 # Check for ineffective changes
4314 for attr in self._FLAGS:
4315 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4316 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4317 setattr(self.op, attr, None)
4319 # Past this point, any flag change to False means a transition
4320 # away from the respective state, as only real changes are kept
4322 # If we're being deofflined/drained, we'll MC ourself if needed
4323 if (self.op.drained == False or self.op.offline == False or
4324 (self.op.master_capable and not node.master_capable)):
4325 if _DecideSelfPromotion(self):
4326 self.op.master_candidate = True
4327 self.LogInfo("Auto-promoting node to master candidate")
4329 # If we're no longer master capable, we'll demote ourselves from MC
4330 if self.op.master_capable == False and node.master_candidate:
4331 self.LogInfo("Demoting from master candidate")
4332 self.op.master_candidate = False
4335 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4336 if self.op.master_candidate:
4337 new_role = self._ROLE_CANDIDATE
4338 elif self.op.drained:
4339 new_role = self._ROLE_DRAINED
4340 elif self.op.offline:
4341 new_role = self._ROLE_OFFLINE
4342 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4343 # False is still in new flags, which means we're un-setting (the
4345 new_role = self._ROLE_REGULAR
4346 else: # no new flags, nothing, keep old role
4349 self.new_role = new_role
4351 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4352 # Trying to transition out of offline status
4353 result = self.rpc.call_version([node.name])[node.name]
4355 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4356 " to report its version: %s" %
4357 (node.name, result.fail_msg),
4360 self.LogWarning("Transitioning node from offline to online state"
4361 " without using re-add. Please make sure the node"
4364 if self.op.secondary_ip:
4365 # Ok even without locking, because this can't be changed by any LU
4366 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4367 master_singlehomed = master.secondary_ip == master.primary_ip
4368 if master_singlehomed and self.op.secondary_ip:
4369 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4370 " homed cluster", errors.ECODE_INVAL)
4373 if self.affected_instances:
4374 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4375 " node has instances (%s) configured"
4376 " to use it" % self.affected_instances)
4378 # On online nodes, check that no instances are running, and that
4379 # the node has the new ip and we can reach it.
4380 for instance in self.affected_instances:
4381 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4383 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4384 if master.name != node.name:
4385 # check reachability from master secondary ip to new secondary ip
4386 if not netutils.TcpPing(self.op.secondary_ip,
4387 constants.DEFAULT_NODED_PORT,
4388 source=master.secondary_ip):
4389 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4390 " based ping to node daemon port",
4391 errors.ECODE_ENVIRON)
4393 if self.op.ndparams:
4394 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4395 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4396 self.new_ndparams = new_ndparams
4398 def Exec(self, feedback_fn):
4403 old_role = self.old_role
4404 new_role = self.new_role
4408 if self.op.ndparams:
4409 node.ndparams = self.new_ndparams
4411 for attr in ["master_capable", "vm_capable"]:
4412 val = getattr(self.op, attr)
4414 setattr(node, attr, val)
4415 result.append((attr, str(val)))
4417 if new_role != old_role:
4418 # Tell the node to demote itself, if no longer MC and not offline
4419 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4420 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4422 self.LogWarning("Node failed to demote itself: %s", msg)
4424 new_flags = self._R2F[new_role]
4425 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4427 result.append((desc, str(nf)))
4428 (node.master_candidate, node.drained, node.offline) = new_flags
4430 # we locked all nodes, we adjust the CP before updating this node
4432 _AdjustCandidatePool(self, [node.name])
4434 if self.op.secondary_ip:
4435 node.secondary_ip = self.op.secondary_ip
4436 result.append(("secondary_ip", self.op.secondary_ip))
4438 # this will trigger configuration file update, if needed
4439 self.cfg.Update(node, feedback_fn)
4441 # this will trigger job queue propagation or cleanup if the mc
4443 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4444 self.context.ReaddNode(node)
4449 class LUPowercycleNode(NoHooksLU):
4450 """Powercycles a node.
4459 def CheckArguments(self):
4460 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4461 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4462 raise errors.OpPrereqError("The node is the master and the force"
4463 " parameter was not set",
4466 def ExpandNames(self):
4467 """Locking for PowercycleNode.
4469 This is a last-resort option and shouldn't block on other
4470 jobs. Therefore, we grab no locks.
4473 self.needed_locks = {}
4475 def Exec(self, feedback_fn):
4479 result = self.rpc.call_node_powercycle(self.op.node_name,
4480 self.cfg.GetHypervisorType())
4481 result.Raise("Failed to schedule the reboot")
4482 return result.payload
4485 class LUQueryClusterInfo(NoHooksLU):
4486 """Query cluster configuration.
4491 def ExpandNames(self):
4492 self.needed_locks = {}
4494 def Exec(self, feedback_fn):
4495 """Return cluster config.
4498 cluster = self.cfg.GetClusterInfo()
4501 # Filter just for enabled hypervisors
4502 for os_name, hv_dict in cluster.os_hvp.items():
4503 os_hvp[os_name] = {}
4504 for hv_name, hv_params in hv_dict.items():
4505 if hv_name in cluster.enabled_hypervisors:
4506 os_hvp[os_name][hv_name] = hv_params
4508 # Convert ip_family to ip_version
4509 primary_ip_version = constants.IP4_VERSION
4510 if cluster.primary_ip_family == netutils.IP6Address.family:
4511 primary_ip_version = constants.IP6_VERSION
4514 "software_version": constants.RELEASE_VERSION,
4515 "protocol_version": constants.PROTOCOL_VERSION,
4516 "config_version": constants.CONFIG_VERSION,
4517 "os_api_version": max(constants.OS_API_VERSIONS),
4518 "export_version": constants.EXPORT_VERSION,
4519 "architecture": (platform.architecture()[0], platform.machine()),
4520 "name": cluster.cluster_name,
4521 "master": cluster.master_node,
4522 "default_hypervisor": cluster.enabled_hypervisors[0],
4523 "enabled_hypervisors": cluster.enabled_hypervisors,
4524 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4525 for hypervisor_name in cluster.enabled_hypervisors]),
4527 "beparams": cluster.beparams,
4528 "osparams": cluster.osparams,
4529 "nicparams": cluster.nicparams,
4530 "candidate_pool_size": cluster.candidate_pool_size,
4531 "master_netdev": cluster.master_netdev,
4532 "volume_group_name": cluster.volume_group_name,
4533 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4534 "file_storage_dir": cluster.file_storage_dir,
4535 "maintain_node_health": cluster.maintain_node_health,
4536 "ctime": cluster.ctime,
4537 "mtime": cluster.mtime,
4538 "uuid": cluster.uuid,
4539 "tags": list(cluster.GetTags()),
4540 "uid_pool": cluster.uid_pool,
4541 "default_iallocator": cluster.default_iallocator,
4542 "reserved_lvs": cluster.reserved_lvs,
4543 "primary_ip_version": primary_ip_version,
4544 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4550 class LUQueryConfigValues(NoHooksLU):
4551 """Return configuration values.
4554 _OP_PARAMS = [_POutputFields]
4556 _FIELDS_DYNAMIC = utils.FieldSet()
4557 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4558 "watcher_pause", "volume_group_name")
4560 def CheckArguments(self):
4561 _CheckOutputFields(static=self._FIELDS_STATIC,
4562 dynamic=self._FIELDS_DYNAMIC,
4563 selected=self.op.output_fields)
4565 def ExpandNames(self):
4566 self.needed_locks = {}
4568 def Exec(self, feedback_fn):
4569 """Dump a representation of the cluster config to the standard output.
4573 for field in self.op.output_fields:
4574 if field == "cluster_name":
4575 entry = self.cfg.GetClusterName()
4576 elif field == "master_node":
4577 entry = self.cfg.GetMasterNode()
4578 elif field == "drain_flag":
4579 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4580 elif field == "watcher_pause":
4581 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4582 elif field == "volume_group_name":
4583 entry = self.cfg.GetVGName()
4585 raise errors.ParameterError(field)
4586 values.append(entry)
4590 class LUActivateInstanceDisks(NoHooksLU):
4591 """Bring up an instance's disks.
4596 ("ignore_size", False, ht.TBool),
4600 def ExpandNames(self):
4601 self._ExpandAndLockInstance()
4602 self.needed_locks[locking.LEVEL_NODE] = []
4603 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4605 def DeclareLocks(self, level):
4606 if level == locking.LEVEL_NODE:
4607 self._LockInstancesNodes()
4609 def CheckPrereq(self):
4610 """Check prerequisites.
4612 This checks that the instance is in the cluster.
4615 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4616 assert self.instance is not None, \
4617 "Cannot retrieve locked instance %s" % self.op.instance_name
4618 _CheckNodeOnline(self, self.instance.primary_node)
4620 def Exec(self, feedback_fn):
4621 """Activate the disks.
4624 disks_ok, disks_info = \
4625 _AssembleInstanceDisks(self, self.instance,
4626 ignore_size=self.op.ignore_size)
4628 raise errors.OpExecError("Cannot activate block devices")
4633 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4635 """Prepare the block devices for an instance.
4637 This sets up the block devices on all nodes.
4639 @type lu: L{LogicalUnit}
4640 @param lu: the logical unit on whose behalf we execute
4641 @type instance: L{objects.Instance}
4642 @param instance: the instance for whose disks we assemble
4643 @type disks: list of L{objects.Disk} or None
4644 @param disks: which disks to assemble (or all, if None)
4645 @type ignore_secondaries: boolean
4646 @param ignore_secondaries: if true, errors on secondary nodes
4647 won't result in an error return from the function
4648 @type ignore_size: boolean
4649 @param ignore_size: if true, the current known size of the disk
4650 will not be used during the disk activation, useful for cases
4651 when the size is wrong
4652 @return: False if the operation failed, otherwise a list of
4653 (host, instance_visible_name, node_visible_name)
4654 with the mapping from node devices to instance devices
4659 iname = instance.name
4660 disks = _ExpandCheckDisks(instance, disks)
4662 # With the two passes mechanism we try to reduce the window of
4663 # opportunity for the race condition of switching DRBD to primary
4664 # before handshaking occured, but we do not eliminate it
4666 # The proper fix would be to wait (with some limits) until the
4667 # connection has been made and drbd transitions from WFConnection
4668 # into any other network-connected state (Connected, SyncTarget,
4671 # 1st pass, assemble on all nodes in secondary mode
4672 for inst_disk in disks:
4673 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4675 node_disk = node_disk.Copy()
4676 node_disk.UnsetSize()
4677 lu.cfg.SetDiskID(node_disk, node)
4678 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4679 msg = result.fail_msg
4681 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4682 " (is_primary=False, pass=1): %s",
4683 inst_disk.iv_name, node, msg)
4684 if not ignore_secondaries:
4687 # FIXME: race condition on drbd migration to primary
4689 # 2nd pass, do only the primary node
4690 for inst_disk in disks:
4693 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4694 if node != instance.primary_node:
4697 node_disk = node_disk.Copy()
4698 node_disk.UnsetSize()
4699 lu.cfg.SetDiskID(node_disk, node)
4700 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4701 msg = result.fail_msg
4703 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4704 " (is_primary=True, pass=2): %s",
4705 inst_disk.iv_name, node, msg)
4708 dev_path = result.payload
4710 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4712 # leave the disks configured for the primary node
4713 # this is a workaround that would be fixed better by
4714 # improving the logical/physical id handling
4716 lu.cfg.SetDiskID(disk, instance.primary_node)
4718 return disks_ok, device_info
4721 def _StartInstanceDisks(lu, instance, force):
4722 """Start the disks of an instance.
4725 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4726 ignore_secondaries=force)
4728 _ShutdownInstanceDisks(lu, instance)
4729 if force is not None and not force:
4730 lu.proc.LogWarning("", hint="If the message above refers to a"
4732 " you can retry the operation using '--force'.")
4733 raise errors.OpExecError("Disk consistency error")
4736 class LUDeactivateInstanceDisks(NoHooksLU):
4737 """Shutdown an instance's disks.
4745 def ExpandNames(self):
4746 self._ExpandAndLockInstance()
4747 self.needed_locks[locking.LEVEL_NODE] = []
4748 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4750 def DeclareLocks(self, level):
4751 if level == locking.LEVEL_NODE:
4752 self._LockInstancesNodes()
4754 def CheckPrereq(self):
4755 """Check prerequisites.
4757 This checks that the instance is in the cluster.
4760 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4761 assert self.instance is not None, \
4762 "Cannot retrieve locked instance %s" % self.op.instance_name
4764 def Exec(self, feedback_fn):
4765 """Deactivate the disks
4768 instance = self.instance
4769 _SafeShutdownInstanceDisks(self, instance)
4772 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4773 """Shutdown block devices of an instance.
4775 This function checks if an instance is running, before calling
4776 _ShutdownInstanceDisks.
4779 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4780 _ShutdownInstanceDisks(lu, instance, disks=disks)
4783 def _ExpandCheckDisks(instance, disks):
4784 """Return the instance disks selected by the disks list
4786 @type disks: list of L{objects.Disk} or None
4787 @param disks: selected disks
4788 @rtype: list of L{objects.Disk}
4789 @return: selected instance disks to act on
4793 return instance.disks
4795 if not set(disks).issubset(instance.disks):
4796 raise errors.ProgrammerError("Can only act on disks belonging to the"
4801 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4802 """Shutdown block devices of an instance.
4804 This does the shutdown on all nodes of the instance.
4806 If the ignore_primary is false, errors on the primary node are
4811 disks = _ExpandCheckDisks(instance, disks)
4814 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4815 lu.cfg.SetDiskID(top_disk, node)
4816 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4817 msg = result.fail_msg
4819 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4820 disk.iv_name, node, msg)
4821 if not ignore_primary or node != instance.primary_node:
4826 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4827 """Checks if a node has enough free memory.
4829 This function check if a given node has the needed amount of free
4830 memory. In case the node has less memory or we cannot get the
4831 information from the node, this function raise an OpPrereqError
4834 @type lu: C{LogicalUnit}
4835 @param lu: a logical unit from which we get configuration data
4837 @param node: the node to check
4838 @type reason: C{str}
4839 @param reason: string to use in the error message
4840 @type requested: C{int}
4841 @param requested: the amount of memory in MiB to check for
4842 @type hypervisor_name: C{str}
4843 @param hypervisor_name: the hypervisor to ask for memory stats
4844 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4845 we cannot check the node
4848 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4849 nodeinfo[node].Raise("Can't get data from node %s" % node,
4850 prereq=True, ecode=errors.ECODE_ENVIRON)
4851 free_mem = nodeinfo[node].payload.get('memory_free', None)
4852 if not isinstance(free_mem, int):
4853 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4854 " was '%s'" % (node, free_mem),
4855 errors.ECODE_ENVIRON)
4856 if requested > free_mem:
4857 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4858 " needed %s MiB, available %s MiB" %
4859 (node, reason, requested, free_mem),
4863 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4864 """Checks if nodes have enough free disk space in the all VGs.
4866 This function check if all given nodes have the needed amount of
4867 free disk. In case any node has less disk or we cannot get the
4868 information from the node, this function raise an OpPrereqError
4871 @type lu: C{LogicalUnit}
4872 @param lu: a logical unit from which we get configuration data
4873 @type nodenames: C{list}
4874 @param nodenames: the list of node names to check
4875 @type req_sizes: C{dict}
4876 @param req_sizes: the hash of vg and corresponding amount of disk in
4878 @raise errors.OpPrereqError: if the node doesn't have enough disk,
4879 or we cannot check the node
4882 if req_sizes is not None:
4883 for vg, req_size in req_sizes.iteritems():
4884 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4887 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4888 """Checks if nodes have enough free disk space in the specified VG.
4890 This function check if all given nodes have the needed amount of
4891 free disk. In case any node has less disk or we cannot get the
4892 information from the node, this function raise an OpPrereqError
4895 @type lu: C{LogicalUnit}
4896 @param lu: a logical unit from which we get configuration data
4897 @type nodenames: C{list}
4898 @param nodenames: the list of node names to check
4900 @param vg: the volume group to check
4901 @type requested: C{int}
4902 @param requested: the amount of disk in MiB to check for
4903 @raise errors.OpPrereqError: if the node doesn't have enough disk,
4904 or we cannot check the node
4907 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4908 for node in nodenames:
4909 info = nodeinfo[node]
4910 info.Raise("Cannot get current information from node %s" % node,
4911 prereq=True, ecode=errors.ECODE_ENVIRON)
4912 vg_free = info.payload.get("vg_free", None)
4913 if not isinstance(vg_free, int):
4914 raise errors.OpPrereqError("Can't compute free disk space on node"
4915 " %s for vg %s, result was '%s'" %
4916 (node, vg, vg_free), errors.ECODE_ENVIRON)
4917 if requested > vg_free:
4918 raise errors.OpPrereqError("Not enough disk space on target node %s"
4919 " vg %s: required %d MiB, available %d MiB" %
4920 (node, vg, requested, vg_free),
4924 class LUStartupInstance(LogicalUnit):
4925 """Starts an instance.
4928 HPATH = "instance-start"
4929 HTYPE = constants.HTYPE_INSTANCE
4933 _PIgnoreOfflineNodes,
4934 ("hvparams", ht.EmptyDict, ht.TDict),
4935 ("beparams", ht.EmptyDict, ht.TDict),
4939 def CheckArguments(self):
4941 if self.op.beparams:
4942 # fill the beparams dict
4943 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4945 def ExpandNames(self):
4946 self._ExpandAndLockInstance()
4948 def BuildHooksEnv(self):
4951 This runs on master, primary and secondary nodes of the instance.
4955 "FORCE": self.op.force,
4957 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4958 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4961 def CheckPrereq(self):
4962 """Check prerequisites.
4964 This checks that the instance is in the cluster.
4967 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4968 assert self.instance is not None, \
4969 "Cannot retrieve locked instance %s" % self.op.instance_name
4972 if self.op.hvparams:
4973 # check hypervisor parameter syntax (locally)
4974 cluster = self.cfg.GetClusterInfo()
4975 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4976 filled_hvp = cluster.FillHV(instance)
4977 filled_hvp.update(self.op.hvparams)
4978 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4979 hv_type.CheckParameterSyntax(filled_hvp)
4980 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4982 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4984 if self.primary_offline and self.op.ignore_offline_nodes:
4985 self.proc.LogWarning("Ignoring offline primary node")
4987 if self.op.hvparams or self.op.beparams:
4988 self.proc.LogWarning("Overridden parameters are ignored")
4990 _CheckNodeOnline(self, instance.primary_node)
4992 bep = self.cfg.GetClusterInfo().FillBE(instance)
4994 # check bridges existence
4995 _CheckInstanceBridgesExist(self, instance)
4997 remote_info = self.rpc.call_instance_info(instance.primary_node,
4999 instance.hypervisor)
5000 remote_info.Raise("Error checking node %s" % instance.primary_node,
5001 prereq=True, ecode=errors.ECODE_ENVIRON)
5002 if not remote_info.payload: # not running already
5003 _CheckNodeFreeMemory(self, instance.primary_node,
5004 "starting instance %s" % instance.name,
5005 bep[constants.BE_MEMORY], instance.hypervisor)
5007 def Exec(self, feedback_fn):
5008 """Start the instance.
5011 instance = self.instance
5012 force = self.op.force
5014 self.cfg.MarkInstanceUp(instance.name)
5016 if self.primary_offline:
5017 assert self.op.ignore_offline_nodes
5018 self.proc.LogInfo("Primary node offline, marked instance as started")
5020 node_current = instance.primary_node
5022 _StartInstanceDisks(self, instance, force)
5024 result = self.rpc.call_instance_start(node_current, instance,
5025 self.op.hvparams, self.op.beparams)
5026 msg = result.fail_msg
5028 _ShutdownInstanceDisks(self, instance)
5029 raise errors.OpExecError("Could not start instance: %s" % msg)
5032 class LURebootInstance(LogicalUnit):
5033 """Reboot an instance.
5036 HPATH = "instance-reboot"
5037 HTYPE = constants.HTYPE_INSTANCE
5040 ("ignore_secondaries", False, ht.TBool),
5041 ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5046 def ExpandNames(self):
5047 self._ExpandAndLockInstance()
5049 def BuildHooksEnv(self):
5052 This runs on master, primary and secondary nodes of the instance.
5056 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5057 "REBOOT_TYPE": self.op.reboot_type,
5058 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5060 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5061 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5064 def CheckPrereq(self):
5065 """Check prerequisites.
5067 This checks that the instance is in the cluster.
5070 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5071 assert self.instance is not None, \
5072 "Cannot retrieve locked instance %s" % self.op.instance_name
5074 _CheckNodeOnline(self, instance.primary_node)
5076 # check bridges existence
5077 _CheckInstanceBridgesExist(self, instance)
5079 def Exec(self, feedback_fn):
5080 """Reboot the instance.
5083 instance = self.instance
5084 ignore_secondaries = self.op.ignore_secondaries
5085 reboot_type = self.op.reboot_type
5087 node_current = instance.primary_node
5089 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5090 constants.INSTANCE_REBOOT_HARD]:
5091 for disk in instance.disks:
5092 self.cfg.SetDiskID(disk, node_current)
5093 result = self.rpc.call_instance_reboot(node_current, instance,
5095 self.op.shutdown_timeout)
5096 result.Raise("Could not reboot instance")
5098 result = self.rpc.call_instance_shutdown(node_current, instance,
5099 self.op.shutdown_timeout)
5100 result.Raise("Could not shutdown instance for full reboot")
5101 _ShutdownInstanceDisks(self, instance)
5102 _StartInstanceDisks(self, instance, ignore_secondaries)
5103 result = self.rpc.call_instance_start(node_current, instance, None, None)
5104 msg = result.fail_msg
5106 _ShutdownInstanceDisks(self, instance)
5107 raise errors.OpExecError("Could not start instance for"
5108 " full reboot: %s" % msg)
5110 self.cfg.MarkInstanceUp(instance.name)
5113 class LUShutdownInstance(LogicalUnit):
5114 """Shutdown an instance.
5117 HPATH = "instance-stop"
5118 HTYPE = constants.HTYPE_INSTANCE
5121 _PIgnoreOfflineNodes,
5122 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5126 def ExpandNames(self):
5127 self._ExpandAndLockInstance()
5129 def BuildHooksEnv(self):
5132 This runs on master, primary and secondary nodes of the instance.
5135 env = _BuildInstanceHookEnvByObject(self, self.instance)
5136 env["TIMEOUT"] = self.op.timeout
5137 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5140 def CheckPrereq(self):
5141 """Check prerequisites.
5143 This checks that the instance is in the cluster.
5146 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5147 assert self.instance is not None, \
5148 "Cannot retrieve locked instance %s" % self.op.instance_name
5150 self.primary_offline = \
5151 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5153 if self.primary_offline and self.op.ignore_offline_nodes:
5154 self.proc.LogWarning("Ignoring offline primary node")
5156 _CheckNodeOnline(self, self.instance.primary_node)
5158 def Exec(self, feedback_fn):
5159 """Shutdown the instance.
5162 instance = self.instance
5163 node_current = instance.primary_node
5164 timeout = self.op.timeout
5166 self.cfg.MarkInstanceDown(instance.name)
5168 if self.primary_offline:
5169 assert self.op.ignore_offline_nodes
5170 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5172 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5173 msg = result.fail_msg
5175 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5177 _ShutdownInstanceDisks(self, instance)
5180 class LUReinstallInstance(LogicalUnit):
5181 """Reinstall an instance.
5184 HPATH = "instance-reinstall"
5185 HTYPE = constants.HTYPE_INSTANCE
5188 ("os_type", None, ht.TMaybeString),
5189 ("force_variant", False, ht.TBool),
5190 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5194 def ExpandNames(self):
5195 self._ExpandAndLockInstance()
5197 def BuildHooksEnv(self):
5200 This runs on master, primary and secondary nodes of the instance.
5203 env = _BuildInstanceHookEnvByObject(self, self.instance)
5204 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5207 def CheckPrereq(self):
5208 """Check prerequisites.
5210 This checks that the instance is in the cluster and is not running.
5213 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5214 assert instance is not None, \
5215 "Cannot retrieve locked instance %s" % self.op.instance_name
5216 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5217 " offline, cannot reinstall")
5218 for node in instance.secondary_nodes:
5219 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5220 " cannot reinstall")
5222 if instance.disk_template == constants.DT_DISKLESS:
5223 raise errors.OpPrereqError("Instance '%s' has no disks" %
5224 self.op.instance_name,
5226 _CheckInstanceDown(self, instance, "cannot reinstall")
5228 if self.op.os_type is not None:
5230 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5231 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5232 instance_os = self.op.os_type
5234 instance_os = instance.os
5236 nodelist = list(instance.all_nodes)
5238 if self.op.osparams:
5239 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5240 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5241 self.os_inst = i_osdict # the new dict (without defaults)
5245 self.instance = instance
5247 def Exec(self, feedback_fn):
5248 """Reinstall the instance.
5251 inst = self.instance
5253 if self.op.os_type is not None:
5254 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5255 inst.os = self.op.os_type
5256 # Write to configuration
5257 self.cfg.Update(inst, feedback_fn)
5259 _StartInstanceDisks(self, inst, None)
5261 feedback_fn("Running the instance OS create scripts...")
5262 # FIXME: pass debug option from opcode to backend
5263 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5264 self.op.debug_level,
5265 osparams=self.os_inst)
5266 result.Raise("Could not install OS for instance %s on node %s" %
5267 (inst.name, inst.primary_node))
5269 _ShutdownInstanceDisks(self, inst)
5272 class LURecreateInstanceDisks(LogicalUnit):
5273 """Recreate an instance's missing disks.
5276 HPATH = "instance-recreate-disks"
5277 HTYPE = constants.HTYPE_INSTANCE
5280 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5284 def ExpandNames(self):
5285 self._ExpandAndLockInstance()
5287 def BuildHooksEnv(self):
5290 This runs on master, primary and secondary nodes of the instance.
5293 env = _BuildInstanceHookEnvByObject(self, self.instance)
5294 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5297 def CheckPrereq(self):
5298 """Check prerequisites.
5300 This checks that the instance is in the cluster and is not running.
5303 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5304 assert instance is not None, \
5305 "Cannot retrieve locked instance %s" % self.op.instance_name
5306 _CheckNodeOnline(self, instance.primary_node)
5308 if instance.disk_template == constants.DT_DISKLESS:
5309 raise errors.OpPrereqError("Instance '%s' has no disks" %
5310 self.op.instance_name, errors.ECODE_INVAL)
5311 _CheckInstanceDown(self, instance, "cannot recreate disks")
5313 if not self.op.disks:
5314 self.op.disks = range(len(instance.disks))
5316 for idx in self.op.disks:
5317 if idx >= len(instance.disks):
5318 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5321 self.instance = instance
5323 def Exec(self, feedback_fn):
5324 """Recreate the disks.
5328 for idx, _ in enumerate(self.instance.disks):
5329 if idx not in self.op.disks: # disk idx has not been passed in
5333 _CreateDisks(self, self.instance, to_skip=to_skip)
5336 class LURenameInstance(LogicalUnit):
5337 """Rename an instance.
5340 HPATH = "instance-rename"
5341 HTYPE = constants.HTYPE_INSTANCE
5344 ("new_name", ht.NoDefault, ht.TNonEmptyString),
5345 ("ip_check", False, ht.TBool),
5346 ("name_check", True, ht.TBool),
5349 def CheckArguments(self):
5353 if self.op.ip_check and not self.op.name_check:
5354 # TODO: make the ip check more flexible and not depend on the name check
5355 raise errors.OpPrereqError("Cannot do ip check without a name check",
5358 def BuildHooksEnv(self):
5361 This runs on master, primary and secondary nodes of the instance.
5364 env = _BuildInstanceHookEnvByObject(self, self.instance)
5365 env["INSTANCE_NEW_NAME"] = self.op.new_name
5366 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5369 def CheckPrereq(self):
5370 """Check prerequisites.
5372 This checks that the instance is in the cluster and is not running.
5375 self.op.instance_name = _ExpandInstanceName(self.cfg,
5376 self.op.instance_name)
5377 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5378 assert instance is not None
5379 _CheckNodeOnline(self, instance.primary_node)
5380 _CheckInstanceDown(self, instance, "cannot rename")
5381 self.instance = instance
5383 new_name = self.op.new_name
5384 if self.op.name_check:
5385 hostname = netutils.GetHostname(name=new_name)
5386 new_name = self.op.new_name = hostname.name
5387 if (self.op.ip_check and
5388 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5389 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5390 (hostname.ip, new_name),
5391 errors.ECODE_NOTUNIQUE)
5393 instance_list = self.cfg.GetInstanceList()
5394 if new_name in instance_list:
5395 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5396 new_name, errors.ECODE_EXISTS)
5398 def Exec(self, feedback_fn):
5399 """Reinstall the instance.
5402 inst = self.instance
5403 old_name = inst.name
5405 if inst.disk_template == constants.DT_FILE:
5406 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5408 self.cfg.RenameInstance(inst.name, self.op.new_name)
5409 # Change the instance lock. This is definitely safe while we hold the BGL
5410 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5411 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5413 # re-read the instance from the configuration after rename
5414 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5416 if inst.disk_template == constants.DT_FILE:
5417 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5418 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5419 old_file_storage_dir,
5420 new_file_storage_dir)
5421 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5422 " (but the instance has been renamed in Ganeti)" %
5423 (inst.primary_node, old_file_storage_dir,
5424 new_file_storage_dir))
5426 _StartInstanceDisks(self, inst, None)
5428 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5429 old_name, self.op.debug_level)
5430 msg = result.fail_msg
5432 msg = ("Could not run OS rename script for instance %s on node %s"
5433 " (but the instance has been renamed in Ganeti): %s" %
5434 (inst.name, inst.primary_node, msg))
5435 self.proc.LogWarning(msg)
5437 _ShutdownInstanceDisks(self, inst)
5442 class LURemoveInstance(LogicalUnit):
5443 """Remove an instance.
5446 HPATH = "instance-remove"
5447 HTYPE = constants.HTYPE_INSTANCE
5450 ("ignore_failures", False, ht.TBool),
5455 def ExpandNames(self):
5456 self._ExpandAndLockInstance()
5457 self.needed_locks[locking.LEVEL_NODE] = []
5458 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5460 def DeclareLocks(self, level):
5461 if level == locking.LEVEL_NODE:
5462 self._LockInstancesNodes()
5464 def BuildHooksEnv(self):
5467 This runs on master, primary and secondary nodes of the instance.
5470 env = _BuildInstanceHookEnvByObject(self, self.instance)
5471 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5472 nl = [self.cfg.GetMasterNode()]
5473 nl_post = list(self.instance.all_nodes) + nl
5474 return env, nl, nl_post
5476 def CheckPrereq(self):
5477 """Check prerequisites.
5479 This checks that the instance is in the cluster.
5482 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5483 assert self.instance is not None, \
5484 "Cannot retrieve locked instance %s" % self.op.instance_name
5486 def Exec(self, feedback_fn):
5487 """Remove the instance.
5490 instance = self.instance
5491 logging.info("Shutting down instance %s on node %s",
5492 instance.name, instance.primary_node)
5494 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5495 self.op.shutdown_timeout)
5496 msg = result.fail_msg
5498 if self.op.ignore_failures:
5499 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5501 raise errors.OpExecError("Could not shutdown instance %s on"
5503 (instance.name, instance.primary_node, msg))
5505 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5508 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5509 """Utility function to remove an instance.
5512 logging.info("Removing block devices for instance %s", instance.name)
5514 if not _RemoveDisks(lu, instance):
5515 if not ignore_failures:
5516 raise errors.OpExecError("Can't remove instance's disks")
5517 feedback_fn("Warning: can't remove instance's disks")
5519 logging.info("Removing instance %s out of cluster config", instance.name)
5521 lu.cfg.RemoveInstance(instance.name)
5523 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5524 "Instance lock removal conflict"
5526 # Remove lock for the instance
5527 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5530 class LUQueryInstances(NoHooksLU):
5531 """Logical unit for querying instances.
5534 # pylint: disable-msg=W0142
5537 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5538 ("use_locking", False, ht.TBool),
5541 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5542 "serial_no", "ctime", "mtime", "uuid"]
5543 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5545 "disk_template", "ip", "mac", "bridge",
5546 "nic_mode", "nic_link",
5547 "sda_size", "sdb_size", "vcpus", "tags",
5548 "network_port", "beparams",
5549 r"(disk)\.(size)/([0-9]+)",
5550 r"(disk)\.(sizes)", "disk_usage",
5551 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5552 r"(nic)\.(bridge)/([0-9]+)",
5553 r"(nic)\.(macs|ips|modes|links|bridges)",
5554 r"(disk|nic)\.(count)",
5555 "hvparams", "custom_hvparams",
5556 "custom_beparams", "custom_nicparams",
5557 ] + _SIMPLE_FIELDS +
5559 for name in constants.HVS_PARAMETERS
5560 if name not in constants.HVC_GLOBALS] +
5562 for name in constants.BES_PARAMETERS])
5563 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5569 def CheckArguments(self):
5570 _CheckOutputFields(static=self._FIELDS_STATIC,
5571 dynamic=self._FIELDS_DYNAMIC,
5572 selected=self.op.output_fields)
5574 def ExpandNames(self):
5575 self.needed_locks = {}
5576 self.share_locks[locking.LEVEL_INSTANCE] = 1
5577 self.share_locks[locking.LEVEL_NODE] = 1
5580 self.wanted = _GetWantedInstances(self, self.op.names)
5582 self.wanted = locking.ALL_SET
5584 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5585 self.do_locking = self.do_node_query and self.op.use_locking
5587 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5588 self.needed_locks[locking.LEVEL_NODE] = []
5589 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5591 def DeclareLocks(self, level):
5592 if level == locking.LEVEL_NODE and self.do_locking:
5593 self._LockInstancesNodes()
5595 def Exec(self, feedback_fn):
5596 """Computes the list of nodes and their attributes.
5599 # pylint: disable-msg=R0912
5600 # way too many branches here
5601 all_info = self.cfg.GetAllInstancesInfo()
5602 if self.wanted == locking.ALL_SET:
5603 # caller didn't specify instance names, so ordering is not important
5605 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5607 instance_names = all_info.keys()
5608 instance_names = utils.NiceSort(instance_names)
5610 # caller did specify names, so we must keep the ordering
5612 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5614 tgt_set = all_info.keys()
5615 missing = set(self.wanted).difference(tgt_set)
5617 raise errors.OpExecError("Some instances were removed before"
5618 " retrieving their data: %s" % missing)
5619 instance_names = self.wanted
5621 instance_list = [all_info[iname] for iname in instance_names]
5623 # begin data gathering
5625 nodes = frozenset([inst.primary_node for inst in instance_list])
5626 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5630 if self.do_node_query:
5632 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5634 result = node_data[name]
5636 # offline nodes will be in both lists
5637 off_nodes.append(name)
5639 bad_nodes.append(name)
5642 live_data.update(result.payload)
5643 # else no instance is alive
5645 live_data = dict([(name, {}) for name in instance_names])
5647 # end data gathering
5652 cluster = self.cfg.GetClusterInfo()
5653 for instance in instance_list:
5655 i_hv = cluster.FillHV(instance, skip_globals=True)
5656 i_be = cluster.FillBE(instance)
5657 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5658 for field in self.op.output_fields:
5659 st_match = self._FIELDS_STATIC.Matches(field)
5660 if field in self._SIMPLE_FIELDS:
5661 val = getattr(instance, field)
5662 elif field == "pnode":
5663 val = instance.primary_node
5664 elif field == "snodes":
5665 val = list(instance.secondary_nodes)
5666 elif field == "admin_state":
5667 val = instance.admin_up
5668 elif field == "oper_state":
5669 if instance.primary_node in bad_nodes:
5672 val = bool(live_data.get(instance.name))
5673 elif field == "status":
5674 if instance.primary_node in off_nodes:
5675 val = "ERROR_nodeoffline"
5676 elif instance.primary_node in bad_nodes:
5677 val = "ERROR_nodedown"
5679 running = bool(live_data.get(instance.name))
5681 if instance.admin_up:
5686 if instance.admin_up:
5690 elif field == "oper_ram":
5691 if instance.primary_node in bad_nodes:
5693 elif instance.name in live_data:
5694 val = live_data[instance.name].get("memory", "?")
5697 elif field == "oper_vcpus":
5698 if instance.primary_node in bad_nodes:
5700 elif instance.name in live_data:
5701 val = live_data[instance.name].get("vcpus", "?")
5704 elif field == "vcpus":
5705 val = i_be[constants.BE_VCPUS]
5706 elif field == "disk_template":
5707 val = instance.disk_template
5710 val = instance.nics[0].ip
5713 elif field == "nic_mode":
5715 val = i_nicp[0][constants.NIC_MODE]
5718 elif field == "nic_link":
5720 val = i_nicp[0][constants.NIC_LINK]
5723 elif field == "bridge":
5724 if (instance.nics and
5725 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5726 val = i_nicp[0][constants.NIC_LINK]
5729 elif field == "mac":
5731 val = instance.nics[0].mac
5734 elif field == "custom_nicparams":
5735 val = [nic.nicparams for nic in instance.nics]
5736 elif field == "sda_size" or field == "sdb_size":
5737 idx = ord(field[2]) - ord('a')
5739 val = instance.FindDisk(idx).size
5740 except errors.OpPrereqError:
5742 elif field == "disk_usage": # total disk usage per node
5743 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5744 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5745 elif field == "tags":
5746 val = list(instance.GetTags())
5747 elif field == "custom_hvparams":
5748 val = instance.hvparams # not filled!
5749 elif field == "hvparams":
5751 elif (field.startswith(HVPREFIX) and
5752 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5753 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5754 val = i_hv.get(field[len(HVPREFIX):], None)
5755 elif field == "custom_beparams":
5756 val = instance.beparams
5757 elif field == "beparams":
5759 elif (field.startswith(BEPREFIX) and
5760 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5761 val = i_be.get(field[len(BEPREFIX):], None)
5762 elif st_match and st_match.groups():
5763 # matches a variable list
5764 st_groups = st_match.groups()
5765 if st_groups and st_groups[0] == "disk":
5766 if st_groups[1] == "count":
5767 val = len(instance.disks)
5768 elif st_groups[1] == "sizes":
5769 val = [disk.size for disk in instance.disks]
5770 elif st_groups[1] == "size":
5772 val = instance.FindDisk(st_groups[2]).size
5773 except errors.OpPrereqError:
5776 assert False, "Unhandled disk parameter"
5777 elif st_groups[0] == "nic":
5778 if st_groups[1] == "count":
5779 val = len(instance.nics)
5780 elif st_groups[1] == "macs":
5781 val = [nic.mac for nic in instance.nics]
5782 elif st_groups[1] == "ips":
5783 val = [nic.ip for nic in instance.nics]
5784 elif st_groups[1] == "modes":
5785 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5786 elif st_groups[1] == "links":
5787 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5788 elif st_groups[1] == "bridges":
5791 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5792 val.append(nicp[constants.NIC_LINK])
5797 nic_idx = int(st_groups[2])
5798 if nic_idx >= len(instance.nics):
5801 if st_groups[1] == "mac":
5802 val = instance.nics[nic_idx].mac
5803 elif st_groups[1] == "ip":
5804 val = instance.nics[nic_idx].ip
5805 elif st_groups[1] == "mode":
5806 val = i_nicp[nic_idx][constants.NIC_MODE]
5807 elif st_groups[1] == "link":
5808 val = i_nicp[nic_idx][constants.NIC_LINK]
5809 elif st_groups[1] == "bridge":
5810 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5811 if nic_mode == constants.NIC_MODE_BRIDGED:
5812 val = i_nicp[nic_idx][constants.NIC_LINK]
5816 assert False, "Unhandled NIC parameter"
5818 assert False, ("Declared but unhandled variable parameter '%s'" %
5821 assert False, "Declared but unhandled parameter '%s'" % field
5828 class LUFailoverInstance(LogicalUnit):
5829 """Failover an instance.
5832 HPATH = "instance-failover"
5833 HTYPE = constants.HTYPE_INSTANCE
5836 ("ignore_consistency", False, ht.TBool),
5841 def ExpandNames(self):
5842 self._ExpandAndLockInstance()
5843 self.needed_locks[locking.LEVEL_NODE] = []
5844 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5846 def DeclareLocks(self, level):
5847 if level == locking.LEVEL_NODE:
5848 self._LockInstancesNodes()
5850 def BuildHooksEnv(self):
5853 This runs on master, primary and secondary nodes of the instance.
5856 instance = self.instance
5857 source_node = instance.primary_node
5858 target_node = instance.secondary_nodes[0]
5860 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5861 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5862 "OLD_PRIMARY": source_node,
5863 "OLD_SECONDARY": target_node,
5864 "NEW_PRIMARY": target_node,
5865 "NEW_SECONDARY": source_node,
5867 env.update(_BuildInstanceHookEnvByObject(self, instance))
5868 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5870 nl_post.append(source_node)
5871 return env, nl, nl_post
5873 def CheckPrereq(self):
5874 """Check prerequisites.
5876 This checks that the instance is in the cluster.
5879 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5880 assert self.instance is not None, \
5881 "Cannot retrieve locked instance %s" % self.op.instance_name
5883 bep = self.cfg.GetClusterInfo().FillBE(instance)
5884 if instance.disk_template not in constants.DTS_NET_MIRROR:
5885 raise errors.OpPrereqError("Instance's disk layout is not"
5886 " network mirrored, cannot failover.",
5889 secondary_nodes = instance.secondary_nodes
5890 if not secondary_nodes:
5891 raise errors.ProgrammerError("no secondary node but using "
5892 "a mirrored disk template")
5894 target_node = secondary_nodes[0]
5895 _CheckNodeOnline(self, target_node)
5896 _CheckNodeNotDrained(self, target_node)
5897 if instance.admin_up:
5898 # check memory requirements on the secondary node
5899 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5900 instance.name, bep[constants.BE_MEMORY],
5901 instance.hypervisor)
5903 self.LogInfo("Not checking memory on the secondary node as"
5904 " instance will not be started")
5906 # check bridge existance
5907 _CheckInstanceBridgesExist(self, instance, node=target_node)
5909 def Exec(self, feedback_fn):
5910 """Failover an instance.
5912 The failover is done by shutting it down on its present node and
5913 starting it on the secondary.
5916 instance = self.instance
5917 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5919 source_node = instance.primary_node
5920 target_node = instance.secondary_nodes[0]
5922 if instance.admin_up:
5923 feedback_fn("* checking disk consistency between source and target")
5924 for dev in instance.disks:
5925 # for drbd, these are drbd over lvm
5926 if not _CheckDiskConsistency(self, dev, target_node, False):
5927 if not self.op.ignore_consistency:
5928 raise errors.OpExecError("Disk %s is degraded on target node,"
5929 " aborting failover." % dev.iv_name)
5931 feedback_fn("* not checking disk consistency as instance is not running")
5933 feedback_fn("* shutting down instance on source node")
5934 logging.info("Shutting down instance %s on node %s",
5935 instance.name, source_node)
5937 result = self.rpc.call_instance_shutdown(source_node, instance,
5938 self.op.shutdown_timeout)
5939 msg = result.fail_msg
5941 if self.op.ignore_consistency or primary_node.offline:
5942 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5943 " Proceeding anyway. Please make sure node"
5944 " %s is down. Error details: %s",
5945 instance.name, source_node, source_node, msg)
5947 raise errors.OpExecError("Could not shutdown instance %s on"
5949 (instance.name, source_node, msg))
5951 feedback_fn("* deactivating the instance's disks on source node")
5952 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5953 raise errors.OpExecError("Can't shut down the instance's disks.")
5955 instance.primary_node = target_node
5956 # distribute new instance config to the other nodes
5957 self.cfg.Update(instance, feedback_fn)
5959 # Only start the instance if it's marked as up
5960 if instance.admin_up:
5961 feedback_fn("* activating the instance's disks on target node")
5962 logging.info("Starting instance %s on node %s",
5963 instance.name, target_node)
5965 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5966 ignore_secondaries=True)
5968 _ShutdownInstanceDisks(self, instance)
5969 raise errors.OpExecError("Can't activate the instance's disks")
5971 feedback_fn("* starting the instance on the target node")
5972 result = self.rpc.call_instance_start(target_node, instance, None, None)
5973 msg = result.fail_msg
5975 _ShutdownInstanceDisks(self, instance)
5976 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5977 (instance.name, target_node, msg))
5980 class LUMigrateInstance(LogicalUnit):
5981 """Migrate an instance.
5983 This is migration without shutting down, compared to the failover,
5984 which is done with shutdown.
5987 HPATH = "instance-migrate"
5988 HTYPE = constants.HTYPE_INSTANCE
5993 ("cleanup", False, ht.TBool),
5998 def ExpandNames(self):
5999 self._ExpandAndLockInstance()
6001 self.needed_locks[locking.LEVEL_NODE] = []
6002 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6004 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6006 self.tasklets = [self._migrater]
6008 def DeclareLocks(self, level):
6009 if level == locking.LEVEL_NODE:
6010 self._LockInstancesNodes()
6012 def BuildHooksEnv(self):
6015 This runs on master, primary and secondary nodes of the instance.
6018 instance = self._migrater.instance
6019 source_node = instance.primary_node
6020 target_node = instance.secondary_nodes[0]
6021 env = _BuildInstanceHookEnvByObject(self, instance)
6022 env["MIGRATE_LIVE"] = self._migrater.live
6023 env["MIGRATE_CLEANUP"] = self.op.cleanup
6025 "OLD_PRIMARY": source_node,
6026 "OLD_SECONDARY": target_node,
6027 "NEW_PRIMARY": target_node,
6028 "NEW_SECONDARY": source_node,
6030 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6032 nl_post.append(source_node)
6033 return env, nl, nl_post
6036 class LUMoveInstance(LogicalUnit):
6037 """Move an instance by data-copying.
6040 HPATH = "instance-move"
6041 HTYPE = constants.HTYPE_INSTANCE
6044 ("target_node", ht.NoDefault, ht.TNonEmptyString),
6049 def ExpandNames(self):
6050 self._ExpandAndLockInstance()
6051 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6052 self.op.target_node = target_node
6053 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6054 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6056 def DeclareLocks(self, level):
6057 if level == locking.LEVEL_NODE:
6058 self._LockInstancesNodes(primary_only=True)
6060 def BuildHooksEnv(self):
6063 This runs on master, primary and secondary nodes of the instance.
6067 "TARGET_NODE": self.op.target_node,
6068 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6070 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6071 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6072 self.op.target_node]
6075 def CheckPrereq(self):
6076 """Check prerequisites.
6078 This checks that the instance is in the cluster.
6081 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6082 assert self.instance is not None, \
6083 "Cannot retrieve locked instance %s" % self.op.instance_name
6085 node = self.cfg.GetNodeInfo(self.op.target_node)
6086 assert node is not None, \
6087 "Cannot retrieve locked node %s" % self.op.target_node
6089 self.target_node = target_node = node.name
6091 if target_node == instance.primary_node:
6092 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6093 (instance.name, target_node),
6096 bep = self.cfg.GetClusterInfo().FillBE(instance)
6098 for idx, dsk in enumerate(instance.disks):
6099 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6100 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6101 " cannot copy" % idx, errors.ECODE_STATE)
6103 _CheckNodeOnline(self, target_node)
6104 _CheckNodeNotDrained(self, target_node)
6105 _CheckNodeVmCapable(self, target_node)
6107 if instance.admin_up:
6108 # check memory requirements on the secondary node
6109 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6110 instance.name, bep[constants.BE_MEMORY],
6111 instance.hypervisor)
6113 self.LogInfo("Not checking memory on the secondary node as"
6114 " instance will not be started")
6116 # check bridge existance
6117 _CheckInstanceBridgesExist(self, instance, node=target_node)
6119 def Exec(self, feedback_fn):
6120 """Move an instance.
6122 The move is done by shutting it down on its present node, copying
6123 the data over (slow) and starting it on the new node.
6126 instance = self.instance
6128 source_node = instance.primary_node
6129 target_node = self.target_node
6131 self.LogInfo("Shutting down instance %s on source node %s",
6132 instance.name, source_node)
6134 result = self.rpc.call_instance_shutdown(source_node, instance,
6135 self.op.shutdown_timeout)
6136 msg = result.fail_msg
6138 if self.op.ignore_consistency:
6139 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6140 " Proceeding anyway. Please make sure node"
6141 " %s is down. Error details: %s",
6142 instance.name, source_node, source_node, msg)
6144 raise errors.OpExecError("Could not shutdown instance %s on"
6146 (instance.name, source_node, msg))
6148 # create the target disks
6150 _CreateDisks(self, instance, target_node=target_node)
6151 except errors.OpExecError:
6152 self.LogWarning("Device creation failed, reverting...")
6154 _RemoveDisks(self, instance, target_node=target_node)
6156 self.cfg.ReleaseDRBDMinors(instance.name)
6159 cluster_name = self.cfg.GetClusterInfo().cluster_name
6162 # activate, get path, copy the data over
6163 for idx, disk in enumerate(instance.disks):
6164 self.LogInfo("Copying data for disk %d", idx)
6165 result = self.rpc.call_blockdev_assemble(target_node, disk,
6166 instance.name, True)
6168 self.LogWarning("Can't assemble newly created disk %d: %s",
6169 idx, result.fail_msg)
6170 errs.append(result.fail_msg)
6172 dev_path = result.payload
6173 result = self.rpc.call_blockdev_export(source_node, disk,
6174 target_node, dev_path,
6177 self.LogWarning("Can't copy data over for disk %d: %s",
6178 idx, result.fail_msg)
6179 errs.append(result.fail_msg)
6183 self.LogWarning("Some disks failed to copy, aborting")
6185 _RemoveDisks(self, instance, target_node=target_node)
6187 self.cfg.ReleaseDRBDMinors(instance.name)
6188 raise errors.OpExecError("Errors during disk copy: %s" %
6191 instance.primary_node = target_node
6192 self.cfg.Update(instance, feedback_fn)
6194 self.LogInfo("Removing the disks on the original node")
6195 _RemoveDisks(self, instance, target_node=source_node)
6197 # Only start the instance if it's marked as up
6198 if instance.admin_up:
6199 self.LogInfo("Starting instance %s on node %s",
6200 instance.name, target_node)
6202 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6203 ignore_secondaries=True)
6205 _ShutdownInstanceDisks(self, instance)
6206 raise errors.OpExecError("Can't activate the instance's disks")
6208 result = self.rpc.call_instance_start(target_node, instance, None, None)
6209 msg = result.fail_msg
6211 _ShutdownInstanceDisks(self, instance)
6212 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6213 (instance.name, target_node, msg))
6216 class LUMigrateNode(LogicalUnit):
6217 """Migrate all instances from a node.
6220 HPATH = "node-migrate"
6221 HTYPE = constants.HTYPE_NODE
6229 def ExpandNames(self):
6230 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6232 self.needed_locks = {
6233 locking.LEVEL_NODE: [self.op.node_name],
6236 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6238 # Create tasklets for migrating instances for all instances on this node
6242 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6243 logging.debug("Migrating instance %s", inst.name)
6244 names.append(inst.name)
6246 tasklets.append(TLMigrateInstance(self, inst.name, False))
6248 self.tasklets = tasklets
6250 # Declare instance locks
6251 self.needed_locks[locking.LEVEL_INSTANCE] = names
6253 def DeclareLocks(self, level):
6254 if level == locking.LEVEL_NODE:
6255 self._LockInstancesNodes()
6257 def BuildHooksEnv(self):
6260 This runs on the master, the primary and all the secondaries.
6264 "NODE_NAME": self.op.node_name,
6267 nl = [self.cfg.GetMasterNode()]
6269 return (env, nl, nl)
6272 class TLMigrateInstance(Tasklet):
6273 """Tasklet class for instance migration.
6276 @ivar live: whether the migration will be done live or non-live;
6277 this variable is initalized only after CheckPrereq has run
6280 def __init__(self, lu, instance_name, cleanup):
6281 """Initializes this class.
6284 Tasklet.__init__(self, lu)
6287 self.instance_name = instance_name
6288 self.cleanup = cleanup
6289 self.live = False # will be overridden later
6291 def CheckPrereq(self):
6292 """Check prerequisites.
6294 This checks that the instance is in the cluster.
6297 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6298 instance = self.cfg.GetInstanceInfo(instance_name)
6299 assert instance is not None
6301 if instance.disk_template != constants.DT_DRBD8:
6302 raise errors.OpPrereqError("Instance's disk layout is not"
6303 " drbd8, cannot migrate.", errors.ECODE_STATE)
6305 secondary_nodes = instance.secondary_nodes
6306 if not secondary_nodes:
6307 raise errors.ConfigurationError("No secondary node but using"
6308 " drbd8 disk template")
6310 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6312 target_node = secondary_nodes[0]
6313 # check memory requirements on the secondary node
6314 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6315 instance.name, i_be[constants.BE_MEMORY],
6316 instance.hypervisor)
6318 # check bridge existance
6319 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6321 if not self.cleanup:
6322 _CheckNodeNotDrained(self.lu, target_node)
6323 result = self.rpc.call_instance_migratable(instance.primary_node,
6325 result.Raise("Can't migrate, please use failover",
6326 prereq=True, ecode=errors.ECODE_STATE)
6328 self.instance = instance
6330 if self.lu.op.live is not None and self.lu.op.mode is not None:
6331 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6332 " parameters are accepted",
6334 if self.lu.op.live is not None:
6336 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6338 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6339 # reset the 'live' parameter to None so that repeated
6340 # invocations of CheckPrereq do not raise an exception
6341 self.lu.op.live = None
6342 elif self.lu.op.mode is None:
6343 # read the default value from the hypervisor
6344 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6345 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6347 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6349 def _WaitUntilSync(self):
6350 """Poll with custom rpc for disk sync.
6352 This uses our own step-based rpc call.
6355 self.feedback_fn("* wait until resync is done")
6359 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6361 self.instance.disks)
6363 for node, nres in result.items():
6364 nres.Raise("Cannot resync disks on node %s" % node)
6365 node_done, node_percent = nres.payload
6366 all_done = all_done and node_done
6367 if node_percent is not None:
6368 min_percent = min(min_percent, node_percent)
6370 if min_percent < 100:
6371 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6374 def _EnsureSecondary(self, node):
6375 """Demote a node to secondary.
6378 self.feedback_fn("* switching node %s to secondary mode" % node)
6380 for dev in self.instance.disks:
6381 self.cfg.SetDiskID(dev, node)
6383 result = self.rpc.call_blockdev_close(node, self.instance.name,
6384 self.instance.disks)
6385 result.Raise("Cannot change disk to secondary on node %s" % node)
6387 def _GoStandalone(self):
6388 """Disconnect from the network.
6391 self.feedback_fn("* changing into standalone mode")
6392 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6393 self.instance.disks)
6394 for node, nres in result.items():
6395 nres.Raise("Cannot disconnect disks node %s" % node)
6397 def _GoReconnect(self, multimaster):
6398 """Reconnect to the network.
6404 msg = "single-master"
6405 self.feedback_fn("* changing disks into %s mode" % msg)
6406 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6407 self.instance.disks,
6408 self.instance.name, multimaster)
6409 for node, nres in result.items():
6410 nres.Raise("Cannot change disks config on node %s" % node)
6412 def _ExecCleanup(self):
6413 """Try to cleanup after a failed migration.
6415 The cleanup is done by:
6416 - check that the instance is running only on one node
6417 (and update the config if needed)
6418 - change disks on its secondary node to secondary
6419 - wait until disks are fully synchronized
6420 - disconnect from the network
6421 - change disks into single-master mode
6422 - wait again until disks are fully synchronized
6425 instance = self.instance
6426 target_node = self.target_node
6427 source_node = self.source_node
6429 # check running on only one node
6430 self.feedback_fn("* checking where the instance actually runs"
6431 " (if this hangs, the hypervisor might be in"
6433 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6434 for node, result in ins_l.items():
6435 result.Raise("Can't contact node %s" % node)
6437 runningon_source = instance.name in ins_l[source_node].payload
6438 runningon_target = instance.name in ins_l[target_node].payload
6440 if runningon_source and runningon_target:
6441 raise errors.OpExecError("Instance seems to be running on two nodes,"
6442 " or the hypervisor is confused. You will have"
6443 " to ensure manually that it runs only on one"
6444 " and restart this operation.")
6446 if not (runningon_source or runningon_target):
6447 raise errors.OpExecError("Instance does not seem to be running at all."
6448 " In this case, it's safer to repair by"
6449 " running 'gnt-instance stop' to ensure disk"
6450 " shutdown, and then restarting it.")
6452 if runningon_target:
6453 # the migration has actually succeeded, we need to update the config
6454 self.feedback_fn("* instance running on secondary node (%s),"
6455 " updating config" % target_node)
6456 instance.primary_node = target_node
6457 self.cfg.Update(instance, self.feedback_fn)
6458 demoted_node = source_node
6460 self.feedback_fn("* instance confirmed to be running on its"
6461 " primary node (%s)" % source_node)
6462 demoted_node = target_node
6464 self._EnsureSecondary(demoted_node)
6466 self._WaitUntilSync()
6467 except errors.OpExecError:
6468 # we ignore here errors, since if the device is standalone, it
6469 # won't be able to sync
6471 self._GoStandalone()
6472 self._GoReconnect(False)
6473 self._WaitUntilSync()
6475 self.feedback_fn("* done")
6477 def _RevertDiskStatus(self):
6478 """Try to revert the disk status after a failed migration.
6481 target_node = self.target_node
6483 self._EnsureSecondary(target_node)
6484 self._GoStandalone()
6485 self._GoReconnect(False)
6486 self._WaitUntilSync()
6487 except errors.OpExecError, err:
6488 self.lu.LogWarning("Migration failed and I can't reconnect the"
6489 " drives: error '%s'\n"
6490 "Please look and recover the instance status" %
6493 def _AbortMigration(self):
6494 """Call the hypervisor code to abort a started migration.
6497 instance = self.instance
6498 target_node = self.target_node
6499 migration_info = self.migration_info
6501 abort_result = self.rpc.call_finalize_migration(target_node,
6505 abort_msg = abort_result.fail_msg
6507 logging.error("Aborting migration failed on target node %s: %s",
6508 target_node, abort_msg)
6509 # Don't raise an exception here, as we stil have to try to revert the
6510 # disk status, even if this step failed.
6512 def _ExecMigration(self):
6513 """Migrate an instance.
6515 The migrate is done by:
6516 - change the disks into dual-master mode
6517 - wait until disks are fully synchronized again
6518 - migrate the instance
6519 - change disks on the new secondary node (the old primary) to secondary
6520 - wait until disks are fully synchronized
6521 - change disks into single-master mode
6524 instance = self.instance
6525 target_node = self.target_node
6526 source_node = self.source_node
6528 self.feedback_fn("* checking disk consistency between source and target")
6529 for dev in instance.disks:
6530 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6531 raise errors.OpExecError("Disk %s is degraded or not fully"
6532 " synchronized on target node,"
6533 " aborting migrate." % dev.iv_name)
6535 # First get the migration information from the remote node
6536 result = self.rpc.call_migration_info(source_node, instance)
6537 msg = result.fail_msg
6539 log_err = ("Failed fetching source migration information from %s: %s" %
6541 logging.error(log_err)
6542 raise errors.OpExecError(log_err)
6544 self.migration_info = migration_info = result.payload
6546 # Then switch the disks to master/master mode
6547 self._EnsureSecondary(target_node)
6548 self._GoStandalone()
6549 self._GoReconnect(True)
6550 self._WaitUntilSync()
6552 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6553 result = self.rpc.call_accept_instance(target_node,
6556 self.nodes_ip[target_node])
6558 msg = result.fail_msg
6560 logging.error("Instance pre-migration failed, trying to revert"
6561 " disk status: %s", msg)
6562 self.feedback_fn("Pre-migration failed, aborting")
6563 self._AbortMigration()
6564 self._RevertDiskStatus()
6565 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6566 (instance.name, msg))
6568 self.feedback_fn("* migrating instance to %s" % target_node)
6570 result = self.rpc.call_instance_migrate(source_node, instance,
6571 self.nodes_ip[target_node],
6573 msg = result.fail_msg
6575 logging.error("Instance migration failed, trying to revert"
6576 " disk status: %s", msg)
6577 self.feedback_fn("Migration failed, aborting")
6578 self._AbortMigration()
6579 self._RevertDiskStatus()
6580 raise errors.OpExecError("Could not migrate instance %s: %s" %
6581 (instance.name, msg))
6584 instance.primary_node = target_node
6585 # distribute new instance config to the other nodes
6586 self.cfg.Update(instance, self.feedback_fn)
6588 result = self.rpc.call_finalize_migration(target_node,
6592 msg = result.fail_msg
6594 logging.error("Instance migration succeeded, but finalization failed:"
6596 raise errors.OpExecError("Could not finalize instance migration: %s" %
6599 self._EnsureSecondary(source_node)
6600 self._WaitUntilSync()
6601 self._GoStandalone()
6602 self._GoReconnect(False)
6603 self._WaitUntilSync()
6605 self.feedback_fn("* done")
6607 def Exec(self, feedback_fn):
6608 """Perform the migration.
6611 feedback_fn("Migrating instance %s" % self.instance.name)
6613 self.feedback_fn = feedback_fn
6615 self.source_node = self.instance.primary_node
6616 self.target_node = self.instance.secondary_nodes[0]
6617 self.all_nodes = [self.source_node, self.target_node]
6619 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6620 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6624 return self._ExecCleanup()
6626 return self._ExecMigration()
6629 def _CreateBlockDev(lu, node, instance, device, force_create,
6631 """Create a tree of block devices on a given node.
6633 If this device type has to be created on secondaries, create it and
6636 If not, just recurse to children keeping the same 'force' value.
6638 @param lu: the lu on whose behalf we execute
6639 @param node: the node on which to create the device
6640 @type instance: L{objects.Instance}
6641 @param instance: the instance which owns the device
6642 @type device: L{objects.Disk}
6643 @param device: the device to create
6644 @type force_create: boolean
6645 @param force_create: whether to force creation of this device; this
6646 will be change to True whenever we find a device which has
6647 CreateOnSecondary() attribute
6648 @param info: the extra 'metadata' we should attach to the device
6649 (this will be represented as a LVM tag)
6650 @type force_open: boolean
6651 @param force_open: this parameter will be passes to the
6652 L{backend.BlockdevCreate} function where it specifies
6653 whether we run on primary or not, and it affects both
6654 the child assembly and the device own Open() execution
6657 if device.CreateOnSecondary():
6661 for child in device.children:
6662 _CreateBlockDev(lu, node, instance, child, force_create,
6665 if not force_create:
6668 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6671 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6672 """Create a single block device on a given node.
6674 This will not recurse over children of the device, so they must be
6677 @param lu: the lu on whose behalf we execute
6678 @param node: the node on which to create the device
6679 @type instance: L{objects.Instance}
6680 @param instance: the instance which owns the device
6681 @type device: L{objects.Disk}
6682 @param device: the device to create
6683 @param info: the extra 'metadata' we should attach to the device
6684 (this will be represented as a LVM tag)
6685 @type force_open: boolean
6686 @param force_open: this parameter will be passes to the
6687 L{backend.BlockdevCreate} function where it specifies
6688 whether we run on primary or not, and it affects both
6689 the child assembly and the device own Open() execution
6692 lu.cfg.SetDiskID(device, node)
6693 result = lu.rpc.call_blockdev_create(node, device, device.size,
6694 instance.name, force_open, info)
6695 result.Raise("Can't create block device %s on"
6696 " node %s for instance %s" % (device, node, instance.name))
6697 if device.physical_id is None:
6698 device.physical_id = result.payload
6701 def _GenerateUniqueNames(lu, exts):
6702 """Generate a suitable LV name.
6704 This will generate a logical volume name for the given instance.
6709 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6710 results.append("%s%s" % (new_id, val))
6714 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6716 """Generate a drbd8 device complete with its children.
6719 port = lu.cfg.AllocatePort()
6720 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6721 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6722 logical_id=(vgname, names[0]))
6723 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6724 logical_id=(vgname, names[1]))
6725 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6726 logical_id=(primary, secondary, port,
6729 children=[dev_data, dev_meta],
6734 def _GenerateDiskTemplate(lu, template_name,
6735 instance_name, primary_node,
6736 secondary_nodes, disk_info,
6737 file_storage_dir, file_driver,
6738 base_index, feedback_fn):
6739 """Generate the entire disk layout for a given template type.
6742 #TODO: compute space requirements
6744 vgname = lu.cfg.GetVGName()
6745 disk_count = len(disk_info)
6747 if template_name == constants.DT_DISKLESS:
6749 elif template_name == constants.DT_PLAIN:
6750 if len(secondary_nodes) != 0:
6751 raise errors.ProgrammerError("Wrong template configuration")
6753 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6754 for i in range(disk_count)])
6755 for idx, disk in enumerate(disk_info):
6756 disk_index = idx + base_index
6757 vg = disk.get("vg", vgname)
6758 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6759 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6760 logical_id=(vg, names[idx]),
6761 iv_name="disk/%d" % disk_index,
6763 disks.append(disk_dev)
6764 elif template_name == constants.DT_DRBD8:
6765 if len(secondary_nodes) != 1:
6766 raise errors.ProgrammerError("Wrong template configuration")
6767 remote_node = secondary_nodes[0]
6768 minors = lu.cfg.AllocateDRBDMinor(
6769 [primary_node, remote_node] * len(disk_info), instance_name)
6772 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6773 for i in range(disk_count)]):
6774 names.append(lv_prefix + "_data")
6775 names.append(lv_prefix + "_meta")
6776 for idx, disk in enumerate(disk_info):
6777 disk_index = idx + base_index
6778 vg = disk.get("vg", vgname)
6779 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6780 disk["size"], vg, names[idx*2:idx*2+2],
6781 "disk/%d" % disk_index,
6782 minors[idx*2], minors[idx*2+1])
6783 disk_dev.mode = disk["mode"]
6784 disks.append(disk_dev)
6785 elif template_name == constants.DT_FILE:
6786 if len(secondary_nodes) != 0:
6787 raise errors.ProgrammerError("Wrong template configuration")
6789 _RequireFileStorage()
6791 for idx, disk in enumerate(disk_info):
6792 disk_index = idx + base_index
6793 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6794 iv_name="disk/%d" % disk_index,
6795 logical_id=(file_driver,
6796 "%s/disk%d" % (file_storage_dir,
6799 disks.append(disk_dev)
6801 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6805 def _GetInstanceInfoText(instance):
6806 """Compute that text that should be added to the disk's metadata.
6809 return "originstname+%s" % instance.name
6812 def _CalcEta(time_taken, written, total_size):
6813 """Calculates the ETA based on size written and total size.
6815 @param time_taken: The time taken so far
6816 @param written: amount written so far
6817 @param total_size: The total size of data to be written
6818 @return: The remaining time in seconds
6821 avg_time = time_taken / float(written)
6822 return (total_size - written) * avg_time
6825 def _WipeDisks(lu, instance):
6826 """Wipes instance disks.
6828 @type lu: L{LogicalUnit}
6829 @param lu: the logical unit on whose behalf we execute
6830 @type instance: L{objects.Instance}
6831 @param instance: the instance whose disks we should create
6832 @return: the success of the wipe
6835 node = instance.primary_node
6836 for idx, device in enumerate(instance.disks):
6837 lu.LogInfo("* Wiping disk %d", idx)
6838 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6840 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6841 # MAX_WIPE_CHUNK at max
6842 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6843 constants.MIN_WIPE_CHUNK_PERCENT)
6848 start_time = time.time()
6850 while offset < size:
6851 wipe_size = min(wipe_chunk_size, size - offset)
6852 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6853 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6854 (idx, offset, wipe_size))
6857 if now - last_output >= 60:
6858 eta = _CalcEta(now - start_time, offset, size)
6859 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6860 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6864 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6865 """Create all disks for an instance.
6867 This abstracts away some work from AddInstance.
6869 @type lu: L{LogicalUnit}
6870 @param lu: the logical unit on whose behalf we execute
6871 @type instance: L{objects.Instance}
6872 @param instance: the instance whose disks we should create
6874 @param to_skip: list of indices to skip
6875 @type target_node: string
6876 @param target_node: if passed, overrides the target node for creation
6878 @return: the success of the creation
6881 info = _GetInstanceInfoText(instance)
6882 if target_node is None:
6883 pnode = instance.primary_node
6884 all_nodes = instance.all_nodes
6889 if instance.disk_template == constants.DT_FILE:
6890 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6891 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6893 result.Raise("Failed to create directory '%s' on"
6894 " node %s" % (file_storage_dir, pnode))
6896 # Note: this needs to be kept in sync with adding of disks in
6897 # LUSetInstanceParams
6898 for idx, device in enumerate(instance.disks):
6899 if to_skip and idx in to_skip:
6901 logging.info("Creating volume %s for instance %s",
6902 device.iv_name, instance.name)
6904 for node in all_nodes:
6905 f_create = node == pnode
6906 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6909 def _RemoveDisks(lu, instance, target_node=None):
6910 """Remove all disks for an instance.
6912 This abstracts away some work from `AddInstance()` and
6913 `RemoveInstance()`. Note that in case some of the devices couldn't
6914 be removed, the removal will continue with the other ones (compare
6915 with `_CreateDisks()`).
6917 @type lu: L{LogicalUnit}
6918 @param lu: the logical unit on whose behalf we execute
6919 @type instance: L{objects.Instance}
6920 @param instance: the instance whose disks we should remove
6921 @type target_node: string
6922 @param target_node: used to override the node on which to remove the disks
6924 @return: the success of the removal
6927 logging.info("Removing block devices for instance %s", instance.name)
6930 for device in instance.disks:
6932 edata = [(target_node, device)]
6934 edata = device.ComputeNodeTree(instance.primary_node)
6935 for node, disk in edata:
6936 lu.cfg.SetDiskID(disk, node)
6937 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6939 lu.LogWarning("Could not remove block device %s on node %s,"
6940 " continuing anyway: %s", device.iv_name, node, msg)
6943 if instance.disk_template == constants.DT_FILE:
6944 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6948 tgt = instance.primary_node
6949 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6951 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6952 file_storage_dir, instance.primary_node, result.fail_msg)
6958 def _ComputeDiskSizePerVG(disk_template, disks):
6959 """Compute disk size requirements in the volume group
6962 def _compute(disks, payload):
6963 """Universal algorithm
6968 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6972 # Required free disk space as a function of disk and swap space
6974 constants.DT_DISKLESS: None,
6975 constants.DT_PLAIN: _compute(disks, 0),
6976 # 128 MB are added for drbd metadata for each disk
6977 constants.DT_DRBD8: _compute(disks, 128),
6978 constants.DT_FILE: None,
6981 if disk_template not in req_size_dict:
6982 raise errors.ProgrammerError("Disk template '%s' size requirement"
6983 " is unknown" % disk_template)
6985 return req_size_dict[disk_template]
6987 def _ComputeDiskSize(disk_template, disks):
6988 """Compute disk size requirements in the volume group
6991 # Required free disk space as a function of disk and swap space
6993 constants.DT_DISKLESS: None,
6994 constants.DT_PLAIN: sum(d["size"] for d in disks),
6995 # 128 MB are added for drbd metadata for each disk
6996 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6997 constants.DT_FILE: None,
7000 if disk_template not in req_size_dict:
7001 raise errors.ProgrammerError("Disk template '%s' size requirement"
7002 " is unknown" % disk_template)
7004 return req_size_dict[disk_template]
7007 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7008 """Hypervisor parameter validation.
7010 This function abstract the hypervisor parameter validation to be
7011 used in both instance create and instance modify.
7013 @type lu: L{LogicalUnit}
7014 @param lu: the logical unit for which we check
7015 @type nodenames: list
7016 @param nodenames: the list of nodes on which we should check
7017 @type hvname: string
7018 @param hvname: the name of the hypervisor we should use
7019 @type hvparams: dict
7020 @param hvparams: the parameters which we need to check
7021 @raise errors.OpPrereqError: if the parameters are not valid
7024 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7027 for node in nodenames:
7031 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7034 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7035 """OS parameters validation.
7037 @type lu: L{LogicalUnit}
7038 @param lu: the logical unit for which we check
7039 @type required: boolean
7040 @param required: whether the validation should fail if the OS is not
7042 @type nodenames: list
7043 @param nodenames: the list of nodes on which we should check
7044 @type osname: string
7045 @param osname: the name of the hypervisor we should use
7046 @type osparams: dict
7047 @param osparams: the parameters which we need to check
7048 @raise errors.OpPrereqError: if the parameters are not valid
7051 result = lu.rpc.call_os_validate(required, nodenames, osname,
7052 [constants.OS_VALIDATE_PARAMETERS],
7054 for node, nres in result.items():
7055 # we don't check for offline cases since this should be run only
7056 # against the master node and/or an instance's nodes
7057 nres.Raise("OS Parameters validation failed on node %s" % node)
7058 if not nres.payload:
7059 lu.LogInfo("OS %s not found on node %s, validation skipped",
7063 class LUCreateInstance(LogicalUnit):
7064 """Create an instance.
7067 HPATH = "instance-add"
7068 HTYPE = constants.HTYPE_INSTANCE
7071 ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
7072 ("start", True, ht.TBool),
7073 ("wait_for_sync", True, ht.TBool),
7074 ("ip_check", True, ht.TBool),
7075 ("name_check", True, ht.TBool),
7076 ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
7077 ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
7078 ("hvparams", ht.EmptyDict, ht.TDict),
7079 ("beparams", ht.EmptyDict, ht.TDict),
7080 ("osparams", ht.EmptyDict, ht.TDict),
7081 ("no_install", None, ht.TMaybeBool),
7082 ("os_type", None, ht.TMaybeString),
7083 ("force_variant", False, ht.TBool),
7084 ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
7085 ("source_x509_ca", None, ht.TMaybeString),
7086 ("source_instance_name", None, ht.TMaybeString),
7087 ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
7089 ("src_node", None, ht.TMaybeString),
7090 ("src_path", None, ht.TMaybeString),
7091 ("pnode", None, ht.TMaybeString),
7092 ("snode", None, ht.TMaybeString),
7093 ("iallocator", None, ht.TMaybeString),
7094 ("hypervisor", None, ht.TMaybeString),
7095 ("disk_template", ht.NoDefault, _CheckDiskTemplate),
7096 ("identify_defaults", False, ht.TBool),
7097 ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
7098 ("file_storage_dir", None, ht.TMaybeString),
7102 def CheckArguments(self):
7106 # do not require name_check to ease forward/backward compatibility
7108 if self.op.no_install and self.op.start:
7109 self.LogInfo("No-installation mode selected, disabling startup")
7110 self.op.start = False
7111 # validate/normalize the instance name
7112 self.op.instance_name = \
7113 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7115 if self.op.ip_check and not self.op.name_check:
7116 # TODO: make the ip check more flexible and not depend on the name check
7117 raise errors.OpPrereqError("Cannot do ip check without a name check",
7120 # check nics' parameter names
7121 for nic in self.op.nics:
7122 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7124 # check disks. parameter names and consistent adopt/no-adopt strategy
7125 has_adopt = has_no_adopt = False
7126 for disk in self.op.disks:
7127 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7132 if has_adopt and has_no_adopt:
7133 raise errors.OpPrereqError("Either all disks are adopted or none is",
7136 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7137 raise errors.OpPrereqError("Disk adoption is not supported for the"
7138 " '%s' disk template" %
7139 self.op.disk_template,
7141 if self.op.iallocator is not None:
7142 raise errors.OpPrereqError("Disk adoption not allowed with an"
7143 " iallocator script", errors.ECODE_INVAL)
7144 if self.op.mode == constants.INSTANCE_IMPORT:
7145 raise errors.OpPrereqError("Disk adoption not allowed for"
7146 " instance import", errors.ECODE_INVAL)
7148 self.adopt_disks = has_adopt
7150 # instance name verification
7151 if self.op.name_check:
7152 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7153 self.op.instance_name = self.hostname1.name
7154 # used in CheckPrereq for ip ping check
7155 self.check_ip = self.hostname1.ip
7157 self.check_ip = None
7159 # file storage checks
7160 if (self.op.file_driver and
7161 not self.op.file_driver in constants.FILE_DRIVER):
7162 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7163 self.op.file_driver, errors.ECODE_INVAL)
7165 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7166 raise errors.OpPrereqError("File storage directory path not absolute",
7169 ### Node/iallocator related checks
7170 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7172 if self.op.pnode is not None:
7173 if self.op.disk_template in constants.DTS_NET_MIRROR:
7174 if self.op.snode is None:
7175 raise errors.OpPrereqError("The networked disk templates need"
7176 " a mirror node", errors.ECODE_INVAL)
7178 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7180 self.op.snode = None
7182 self._cds = _GetClusterDomainSecret()
7184 if self.op.mode == constants.INSTANCE_IMPORT:
7185 # On import force_variant must be True, because if we forced it at
7186 # initial install, our only chance when importing it back is that it
7188 self.op.force_variant = True
7190 if self.op.no_install:
7191 self.LogInfo("No-installation mode has no effect during import")
7193 elif self.op.mode == constants.INSTANCE_CREATE:
7194 if self.op.os_type is None:
7195 raise errors.OpPrereqError("No guest OS specified",
7197 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7198 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7199 " installation" % self.op.os_type,
7201 if self.op.disk_template is None:
7202 raise errors.OpPrereqError("No disk template specified",
7205 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7206 # Check handshake to ensure both clusters have the same domain secret
7207 src_handshake = self.op.source_handshake
7208 if not src_handshake:
7209 raise errors.OpPrereqError("Missing source handshake",
7212 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7215 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7218 # Load and check source CA
7219 self.source_x509_ca_pem = self.op.source_x509_ca
7220 if not self.source_x509_ca_pem:
7221 raise errors.OpPrereqError("Missing source X509 CA",
7225 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7227 except OpenSSL.crypto.Error, err:
7228 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7229 (err, ), errors.ECODE_INVAL)
7231 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7232 if errcode is not None:
7233 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7236 self.source_x509_ca = cert
7238 src_instance_name = self.op.source_instance_name
7239 if not src_instance_name:
7240 raise errors.OpPrereqError("Missing source instance name",
7243 self.source_instance_name = \
7244 netutils.GetHostname(name=src_instance_name).name
7247 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7248 self.op.mode, errors.ECODE_INVAL)
7250 def ExpandNames(self):
7251 """ExpandNames for CreateInstance.
7253 Figure out the right locks for instance creation.
7256 self.needed_locks = {}
7258 instance_name = self.op.instance_name
7259 # this is just a preventive check, but someone might still add this
7260 # instance in the meantime, and creation will fail at lock-add time
7261 if instance_name in self.cfg.GetInstanceList():
7262 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7263 instance_name, errors.ECODE_EXISTS)
7265 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7267 if self.op.iallocator:
7268 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7270 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7271 nodelist = [self.op.pnode]
7272 if self.op.snode is not None:
7273 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7274 nodelist.append(self.op.snode)
7275 self.needed_locks[locking.LEVEL_NODE] = nodelist
7277 # in case of import lock the source node too
7278 if self.op.mode == constants.INSTANCE_IMPORT:
7279 src_node = self.op.src_node
7280 src_path = self.op.src_path
7282 if src_path is None:
7283 self.op.src_path = src_path = self.op.instance_name
7285 if src_node is None:
7286 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7287 self.op.src_node = None
7288 if os.path.isabs(src_path):
7289 raise errors.OpPrereqError("Importing an instance from an absolute"
7290 " path requires a source node option.",
7293 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7294 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7295 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7296 if not os.path.isabs(src_path):
7297 self.op.src_path = src_path = \
7298 utils.PathJoin(constants.EXPORT_DIR, src_path)
7300 def _RunAllocator(self):
7301 """Run the allocator based on input opcode.
7304 nics = [n.ToDict() for n in self.nics]
7305 ial = IAllocator(self.cfg, self.rpc,
7306 mode=constants.IALLOCATOR_MODE_ALLOC,
7307 name=self.op.instance_name,
7308 disk_template=self.op.disk_template,
7311 vcpus=self.be_full[constants.BE_VCPUS],
7312 mem_size=self.be_full[constants.BE_MEMORY],
7315 hypervisor=self.op.hypervisor,
7318 ial.Run(self.op.iallocator)
7321 raise errors.OpPrereqError("Can't compute nodes using"
7322 " iallocator '%s': %s" %
7323 (self.op.iallocator, ial.info),
7325 if len(ial.result) != ial.required_nodes:
7326 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7327 " of nodes (%s), required %s" %
7328 (self.op.iallocator, len(ial.result),
7329 ial.required_nodes), errors.ECODE_FAULT)
7330 self.op.pnode = ial.result[0]
7331 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7332 self.op.instance_name, self.op.iallocator,
7333 utils.CommaJoin(ial.result))
7334 if ial.required_nodes == 2:
7335 self.op.snode = ial.result[1]
7337 def BuildHooksEnv(self):
7340 This runs on master, primary and secondary nodes of the instance.
7344 "ADD_MODE": self.op.mode,
7346 if self.op.mode == constants.INSTANCE_IMPORT:
7347 env["SRC_NODE"] = self.op.src_node
7348 env["SRC_PATH"] = self.op.src_path
7349 env["SRC_IMAGES"] = self.src_images
7351 env.update(_BuildInstanceHookEnv(
7352 name=self.op.instance_name,
7353 primary_node=self.op.pnode,
7354 secondary_nodes=self.secondaries,
7355 status=self.op.start,
7356 os_type=self.op.os_type,
7357 memory=self.be_full[constants.BE_MEMORY],
7358 vcpus=self.be_full[constants.BE_VCPUS],
7359 nics=_NICListToTuple(self, self.nics),
7360 disk_template=self.op.disk_template,
7361 disks=[(d["size"], d["mode"]) for d in self.disks],
7364 hypervisor_name=self.op.hypervisor,
7367 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7371 def _ReadExportInfo(self):
7372 """Reads the export information from disk.
7374 It will override the opcode source node and path with the actual
7375 information, if these two were not specified before.
7377 @return: the export information
7380 assert self.op.mode == constants.INSTANCE_IMPORT
7382 src_node = self.op.src_node
7383 src_path = self.op.src_path
7385 if src_node is None:
7386 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7387 exp_list = self.rpc.call_export_list(locked_nodes)
7389 for node in exp_list:
7390 if exp_list[node].fail_msg:
7392 if src_path in exp_list[node].payload:
7394 self.op.src_node = src_node = node
7395 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7399 raise errors.OpPrereqError("No export found for relative path %s" %
7400 src_path, errors.ECODE_INVAL)
7402 _CheckNodeOnline(self, src_node)
7403 result = self.rpc.call_export_info(src_node, src_path)
7404 result.Raise("No export or invalid export found in dir %s" % src_path)
7406 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7407 if not export_info.has_section(constants.INISECT_EXP):
7408 raise errors.ProgrammerError("Corrupted export config",
7409 errors.ECODE_ENVIRON)
7411 ei_version = export_info.get(constants.INISECT_EXP, "version")
7412 if (int(ei_version) != constants.EXPORT_VERSION):
7413 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7414 (ei_version, constants.EXPORT_VERSION),
7415 errors.ECODE_ENVIRON)
7418 def _ReadExportParams(self, einfo):
7419 """Use export parameters as defaults.
7421 In case the opcode doesn't specify (as in override) some instance
7422 parameters, then try to use them from the export information, if
7426 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7428 if self.op.disk_template is None:
7429 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7430 self.op.disk_template = einfo.get(constants.INISECT_INS,
7433 raise errors.OpPrereqError("No disk template specified and the export"
7434 " is missing the disk_template information",
7437 if not self.op.disks:
7438 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7440 # TODO: import the disk iv_name too
7441 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7442 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7443 disks.append({"size": disk_sz})
7444 self.op.disks = disks
7446 raise errors.OpPrereqError("No disk info specified and the export"
7447 " is missing the disk information",
7450 if (not self.op.nics and
7451 einfo.has_option(constants.INISECT_INS, "nic_count")):
7453 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7455 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7456 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7461 if (self.op.hypervisor is None and
7462 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7463 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7464 if einfo.has_section(constants.INISECT_HYP):
7465 # use the export parameters but do not override the ones
7466 # specified by the user
7467 for name, value in einfo.items(constants.INISECT_HYP):
7468 if name not in self.op.hvparams:
7469 self.op.hvparams[name] = value
7471 if einfo.has_section(constants.INISECT_BEP):
7472 # use the parameters, without overriding
7473 for name, value in einfo.items(constants.INISECT_BEP):
7474 if name not in self.op.beparams:
7475 self.op.beparams[name] = value
7477 # try to read the parameters old style, from the main section
7478 for name in constants.BES_PARAMETERS:
7479 if (name not in self.op.beparams and
7480 einfo.has_option(constants.INISECT_INS, name)):
7481 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7483 if einfo.has_section(constants.INISECT_OSP):
7484 # use the parameters, without overriding
7485 for name, value in einfo.items(constants.INISECT_OSP):
7486 if name not in self.op.osparams:
7487 self.op.osparams[name] = value
7489 def _RevertToDefaults(self, cluster):
7490 """Revert the instance parameters to the default values.
7494 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7495 for name in self.op.hvparams.keys():
7496 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7497 del self.op.hvparams[name]
7499 be_defs = cluster.SimpleFillBE({})
7500 for name in self.op.beparams.keys():
7501 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7502 del self.op.beparams[name]
7504 nic_defs = cluster.SimpleFillNIC({})
7505 for nic in self.op.nics:
7506 for name in constants.NICS_PARAMETERS:
7507 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7510 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7511 for name in self.op.osparams.keys():
7512 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7513 del self.op.osparams[name]
7515 def CheckPrereq(self):
7516 """Check prerequisites.
7519 if self.op.mode == constants.INSTANCE_IMPORT:
7520 export_info = self._ReadExportInfo()
7521 self._ReadExportParams(export_info)
7523 _CheckDiskTemplate(self.op.disk_template)
7525 if (not self.cfg.GetVGName() and
7526 self.op.disk_template not in constants.DTS_NOT_LVM):
7527 raise errors.OpPrereqError("Cluster does not support lvm-based"
7528 " instances", errors.ECODE_STATE)
7530 if self.op.hypervisor is None:
7531 self.op.hypervisor = self.cfg.GetHypervisorType()
7533 cluster = self.cfg.GetClusterInfo()
7534 enabled_hvs = cluster.enabled_hypervisors
7535 if self.op.hypervisor not in enabled_hvs:
7536 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7537 " cluster (%s)" % (self.op.hypervisor,
7538 ",".join(enabled_hvs)),
7541 # check hypervisor parameter syntax (locally)
7542 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7543 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7545 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7546 hv_type.CheckParameterSyntax(filled_hvp)
7547 self.hv_full = filled_hvp
7548 # check that we don't specify global parameters on an instance
7549 _CheckGlobalHvParams(self.op.hvparams)
7551 # fill and remember the beparams dict
7552 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7553 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7555 # build os parameters
7556 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7558 # now that hvp/bep are in final format, let's reset to defaults,
7560 if self.op.identify_defaults:
7561 self._RevertToDefaults(cluster)
7565 for idx, nic in enumerate(self.op.nics):
7566 nic_mode_req = nic.get("mode", None)
7567 nic_mode = nic_mode_req
7568 if nic_mode is None:
7569 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7571 # in routed mode, for the first nic, the default ip is 'auto'
7572 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7573 default_ip_mode = constants.VALUE_AUTO
7575 default_ip_mode = constants.VALUE_NONE
7577 # ip validity checks
7578 ip = nic.get("ip", default_ip_mode)
7579 if ip is None or ip.lower() == constants.VALUE_NONE:
7581 elif ip.lower() == constants.VALUE_AUTO:
7582 if not self.op.name_check:
7583 raise errors.OpPrereqError("IP address set to auto but name checks"
7584 " have been skipped",
7586 nic_ip = self.hostname1.ip
7588 if not netutils.IPAddress.IsValid(ip):
7589 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7593 # TODO: check the ip address for uniqueness
7594 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7595 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7598 # MAC address verification
7599 mac = nic.get("mac", constants.VALUE_AUTO)
7600 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7601 mac = utils.NormalizeAndValidateMac(mac)
7604 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7605 except errors.ReservationError:
7606 raise errors.OpPrereqError("MAC address %s already in use"
7607 " in cluster" % mac,
7608 errors.ECODE_NOTUNIQUE)
7610 # bridge verification
7611 bridge = nic.get("bridge", None)
7612 link = nic.get("link", None)
7614 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7615 " at the same time", errors.ECODE_INVAL)
7616 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7617 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7624 nicparams[constants.NIC_MODE] = nic_mode_req
7626 nicparams[constants.NIC_LINK] = link
7628 check_params = cluster.SimpleFillNIC(nicparams)
7629 objects.NIC.CheckParameterSyntax(check_params)
7630 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7632 # disk checks/pre-build
7634 for disk in self.op.disks:
7635 mode = disk.get("mode", constants.DISK_RDWR)
7636 if mode not in constants.DISK_ACCESS_SET:
7637 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7638 mode, errors.ECODE_INVAL)
7639 size = disk.get("size", None)
7641 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7644 except (TypeError, ValueError):
7645 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7647 vg = disk.get("vg", self.cfg.GetVGName())
7648 new_disk = {"size": size, "mode": mode, "vg": vg}
7650 new_disk["adopt"] = disk["adopt"]
7651 self.disks.append(new_disk)
7653 if self.op.mode == constants.INSTANCE_IMPORT:
7655 # Check that the new instance doesn't have less disks than the export
7656 instance_disks = len(self.disks)
7657 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7658 if instance_disks < export_disks:
7659 raise errors.OpPrereqError("Not enough disks to import."
7660 " (instance: %d, export: %d)" %
7661 (instance_disks, export_disks),
7665 for idx in range(export_disks):
7666 option = 'disk%d_dump' % idx
7667 if export_info.has_option(constants.INISECT_INS, option):
7668 # FIXME: are the old os-es, disk sizes, etc. useful?
7669 export_name = export_info.get(constants.INISECT_INS, option)
7670 image = utils.PathJoin(self.op.src_path, export_name)
7671 disk_images.append(image)
7673 disk_images.append(False)
7675 self.src_images = disk_images
7677 old_name = export_info.get(constants.INISECT_INS, 'name')
7679 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7680 except (TypeError, ValueError), err:
7681 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7682 " an integer: %s" % str(err),
7684 if self.op.instance_name == old_name:
7685 for idx, nic in enumerate(self.nics):
7686 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7687 nic_mac_ini = 'nic%d_mac' % idx
7688 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7690 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7692 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7693 if self.op.ip_check:
7694 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7695 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7696 (self.check_ip, self.op.instance_name),
7697 errors.ECODE_NOTUNIQUE)
7699 #### mac address generation
7700 # By generating here the mac address both the allocator and the hooks get
7701 # the real final mac address rather than the 'auto' or 'generate' value.
7702 # There is a race condition between the generation and the instance object
7703 # creation, which means that we know the mac is valid now, but we're not
7704 # sure it will be when we actually add the instance. If things go bad
7705 # adding the instance will abort because of a duplicate mac, and the
7706 # creation job will fail.
7707 for nic in self.nics:
7708 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7709 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7713 if self.op.iallocator is not None:
7714 self._RunAllocator()
7716 #### node related checks
7718 # check primary node
7719 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7720 assert self.pnode is not None, \
7721 "Cannot retrieve locked node %s" % self.op.pnode
7723 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7724 pnode.name, errors.ECODE_STATE)
7726 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7727 pnode.name, errors.ECODE_STATE)
7728 if not pnode.vm_capable:
7729 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7730 " '%s'" % pnode.name, errors.ECODE_STATE)
7732 self.secondaries = []
7734 # mirror node verification
7735 if self.op.disk_template in constants.DTS_NET_MIRROR:
7736 if self.op.snode == pnode.name:
7737 raise errors.OpPrereqError("The secondary node cannot be the"
7738 " primary node.", errors.ECODE_INVAL)
7739 _CheckNodeOnline(self, self.op.snode)
7740 _CheckNodeNotDrained(self, self.op.snode)
7741 _CheckNodeVmCapable(self, self.op.snode)
7742 self.secondaries.append(self.op.snode)
7744 nodenames = [pnode.name] + self.secondaries
7746 if not self.adopt_disks:
7747 # Check lv size requirements, if not adopting
7748 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7749 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7751 else: # instead, we must check the adoption data
7752 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7753 if len(all_lvs) != len(self.disks):
7754 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7756 for lv_name in all_lvs:
7758 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7759 # to ReserveLV uses the same syntax
7760 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7761 except errors.ReservationError:
7762 raise errors.OpPrereqError("LV named %s used by another instance" %
7763 lv_name, errors.ECODE_NOTUNIQUE)
7765 vg_names = self.rpc.call_vg_list([pnode.name])
7766 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7768 node_lvs = self.rpc.call_lv_list([pnode.name],
7769 vg_names[pnode.name].payload.keys()
7771 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7772 node_lvs = node_lvs.payload
7774 delta = all_lvs.difference(node_lvs.keys())
7776 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7777 utils.CommaJoin(delta),
7779 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7781 raise errors.OpPrereqError("Online logical volumes found, cannot"
7782 " adopt: %s" % utils.CommaJoin(online_lvs),
7784 # update the size of disk based on what is found
7785 for dsk in self.disks:
7786 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7788 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7790 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7791 # check OS parameters (remotely)
7792 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7794 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7796 # memory check on primary node
7798 _CheckNodeFreeMemory(self, self.pnode.name,
7799 "creating instance %s" % self.op.instance_name,
7800 self.be_full[constants.BE_MEMORY],
7803 self.dry_run_result = list(nodenames)
7805 def Exec(self, feedback_fn):
7806 """Create and add the instance to the cluster.
7809 instance = self.op.instance_name
7810 pnode_name = self.pnode.name
7812 ht_kind = self.op.hypervisor
7813 if ht_kind in constants.HTS_REQ_PORT:
7814 network_port = self.cfg.AllocatePort()
7818 if constants.ENABLE_FILE_STORAGE:
7819 # this is needed because os.path.join does not accept None arguments
7820 if self.op.file_storage_dir is None:
7821 string_file_storage_dir = ""
7823 string_file_storage_dir = self.op.file_storage_dir
7825 # build the full file storage dir path
7826 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7827 string_file_storage_dir, instance)
7829 file_storage_dir = ""
7831 disks = _GenerateDiskTemplate(self,
7832 self.op.disk_template,
7833 instance, pnode_name,
7837 self.op.file_driver,
7841 iobj = objects.Instance(name=instance, os=self.op.os_type,
7842 primary_node=pnode_name,
7843 nics=self.nics, disks=disks,
7844 disk_template=self.op.disk_template,
7846 network_port=network_port,
7847 beparams=self.op.beparams,
7848 hvparams=self.op.hvparams,
7849 hypervisor=self.op.hypervisor,
7850 osparams=self.op.osparams,
7853 if self.adopt_disks:
7854 # rename LVs to the newly-generated names; we need to construct
7855 # 'fake' LV disks with the old data, plus the new unique_id
7856 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7858 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7859 rename_to.append(t_dsk.logical_id)
7860 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7861 self.cfg.SetDiskID(t_dsk, pnode_name)
7862 result = self.rpc.call_blockdev_rename(pnode_name,
7863 zip(tmp_disks, rename_to))
7864 result.Raise("Failed to rename adoped LVs")
7866 feedback_fn("* creating instance disks...")
7868 _CreateDisks(self, iobj)
7869 except errors.OpExecError:
7870 self.LogWarning("Device creation failed, reverting...")
7872 _RemoveDisks(self, iobj)
7874 self.cfg.ReleaseDRBDMinors(instance)
7877 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7878 feedback_fn("* wiping instance disks...")
7880 _WipeDisks(self, iobj)
7881 except errors.OpExecError:
7882 self.LogWarning("Device wiping failed, reverting...")
7884 _RemoveDisks(self, iobj)
7886 self.cfg.ReleaseDRBDMinors(instance)
7889 feedback_fn("adding instance %s to cluster config" % instance)
7891 self.cfg.AddInstance(iobj, self.proc.GetECId())
7893 # Declare that we don't want to remove the instance lock anymore, as we've
7894 # added the instance to the config
7895 del self.remove_locks[locking.LEVEL_INSTANCE]
7896 # Unlock all the nodes
7897 if self.op.mode == constants.INSTANCE_IMPORT:
7898 nodes_keep = [self.op.src_node]
7899 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7900 if node != self.op.src_node]
7901 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7902 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7904 self.context.glm.release(locking.LEVEL_NODE)
7905 del self.acquired_locks[locking.LEVEL_NODE]
7907 if self.op.wait_for_sync:
7908 disk_abort = not _WaitForSync(self, iobj)
7909 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7910 # make sure the disks are not degraded (still sync-ing is ok)
7912 feedback_fn("* checking mirrors status")
7913 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7918 _RemoveDisks(self, iobj)
7919 self.cfg.RemoveInstance(iobj.name)
7920 # Make sure the instance lock gets removed
7921 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7922 raise errors.OpExecError("There are some degraded disks for"
7925 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7926 if self.op.mode == constants.INSTANCE_CREATE:
7927 if not self.op.no_install:
7928 feedback_fn("* running the instance OS create scripts...")
7929 # FIXME: pass debug option from opcode to backend
7930 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7931 self.op.debug_level)
7932 result.Raise("Could not add os for instance %s"
7933 " on node %s" % (instance, pnode_name))
7935 elif self.op.mode == constants.INSTANCE_IMPORT:
7936 feedback_fn("* running the instance OS import scripts...")
7940 for idx, image in enumerate(self.src_images):
7944 # FIXME: pass debug option from opcode to backend
7945 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7946 constants.IEIO_FILE, (image, ),
7947 constants.IEIO_SCRIPT,
7948 (iobj.disks[idx], idx),
7950 transfers.append(dt)
7953 masterd.instance.TransferInstanceData(self, feedback_fn,
7954 self.op.src_node, pnode_name,
7955 self.pnode.secondary_ip,
7957 if not compat.all(import_result):
7958 self.LogWarning("Some disks for instance %s on node %s were not"
7959 " imported successfully" % (instance, pnode_name))
7961 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7962 feedback_fn("* preparing remote import...")
7963 # The source cluster will stop the instance before attempting to make a
7964 # connection. In some cases stopping an instance can take a long time,
7965 # hence the shutdown timeout is added to the connection timeout.
7966 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7967 self.op.source_shutdown_timeout)
7968 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7970 assert iobj.primary_node == self.pnode.name
7972 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7973 self.source_x509_ca,
7974 self._cds, timeouts)
7975 if not compat.all(disk_results):
7976 # TODO: Should the instance still be started, even if some disks
7977 # failed to import (valid for local imports, too)?
7978 self.LogWarning("Some disks for instance %s on node %s were not"
7979 " imported successfully" % (instance, pnode_name))
7981 # Run rename script on newly imported instance
7982 assert iobj.name == instance
7983 feedback_fn("Running rename script for %s" % instance)
7984 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7985 self.source_instance_name,
7986 self.op.debug_level)
7988 self.LogWarning("Failed to run rename script for %s on node"
7989 " %s: %s" % (instance, pnode_name, result.fail_msg))
7992 # also checked in the prereq part
7993 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7997 iobj.admin_up = True
7998 self.cfg.Update(iobj, feedback_fn)
7999 logging.info("Starting instance %s on node %s", instance, pnode_name)
8000 feedback_fn("* starting instance...")
8001 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8002 result.Raise("Could not start instance")
8004 return list(iobj.all_nodes)
8007 class LUConnectConsole(NoHooksLU):
8008 """Connect to an instance's console.
8010 This is somewhat special in that it returns the command line that
8011 you need to run on the master node in order to connect to the
8020 def ExpandNames(self):
8021 self._ExpandAndLockInstance()
8023 def CheckPrereq(self):
8024 """Check prerequisites.
8026 This checks that the instance is in the cluster.
8029 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8030 assert self.instance is not None, \
8031 "Cannot retrieve locked instance %s" % self.op.instance_name
8032 _CheckNodeOnline(self, self.instance.primary_node)
8034 def Exec(self, feedback_fn):
8035 """Connect to the console of an instance
8038 instance = self.instance
8039 node = instance.primary_node
8041 node_insts = self.rpc.call_instance_list([node],
8042 [instance.hypervisor])[node]
8043 node_insts.Raise("Can't get node information from %s" % node)
8045 if instance.name not in node_insts.payload:
8046 if instance.admin_up:
8047 state = "ERROR_down"
8049 state = "ADMIN_down"
8050 raise errors.OpExecError("Instance %s is not running (state %s)" %
8051 (instance.name, state))
8053 logging.debug("Connecting to console of %s on %s", instance.name, node)
8055 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8056 cluster = self.cfg.GetClusterInfo()
8057 # beparams and hvparams are passed separately, to avoid editing the
8058 # instance and then saving the defaults in the instance itself.
8059 hvparams = cluster.FillHV(instance)
8060 beparams = cluster.FillBE(instance)
8061 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
8064 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
8067 class LUReplaceDisks(LogicalUnit):
8068 """Replace the disks of an instance.
8071 HPATH = "mirrors-replace"
8072 HTYPE = constants.HTYPE_INSTANCE
8075 ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
8076 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
8077 ("remote_node", None, ht.TMaybeString),
8078 ("iallocator", None, ht.TMaybeString),
8079 ("early_release", False, ht.TBool),
8083 def CheckArguments(self):
8084 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8087 def ExpandNames(self):
8088 self._ExpandAndLockInstance()
8090 if self.op.iallocator is not None:
8091 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8093 elif self.op.remote_node is not None:
8094 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8095 self.op.remote_node = remote_node
8097 # Warning: do not remove the locking of the new secondary here
8098 # unless DRBD8.AddChildren is changed to work in parallel;
8099 # currently it doesn't since parallel invocations of
8100 # FindUnusedMinor will conflict
8101 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8102 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8105 self.needed_locks[locking.LEVEL_NODE] = []
8106 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8108 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8109 self.op.iallocator, self.op.remote_node,
8110 self.op.disks, False, self.op.early_release)
8112 self.tasklets = [self.replacer]
8114 def DeclareLocks(self, level):
8115 # If we're not already locking all nodes in the set we have to declare the
8116 # instance's primary/secondary nodes.
8117 if (level == locking.LEVEL_NODE and
8118 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8119 self._LockInstancesNodes()
8121 def BuildHooksEnv(self):
8124 This runs on the master, the primary and all the secondaries.
8127 instance = self.replacer.instance
8129 "MODE": self.op.mode,
8130 "NEW_SECONDARY": self.op.remote_node,
8131 "OLD_SECONDARY": instance.secondary_nodes[0],
8133 env.update(_BuildInstanceHookEnvByObject(self, instance))
8135 self.cfg.GetMasterNode(),
8136 instance.primary_node,
8138 if self.op.remote_node is not None:
8139 nl.append(self.op.remote_node)
8143 class TLReplaceDisks(Tasklet):
8144 """Replaces disks for an instance.
8146 Note: Locking is not within the scope of this class.
8149 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8150 disks, delay_iallocator, early_release):
8151 """Initializes this class.
8154 Tasklet.__init__(self, lu)
8157 self.instance_name = instance_name
8159 self.iallocator_name = iallocator_name
8160 self.remote_node = remote_node
8162 self.delay_iallocator = delay_iallocator
8163 self.early_release = early_release
8166 self.instance = None
8167 self.new_node = None
8168 self.target_node = None
8169 self.other_node = None
8170 self.remote_node_info = None
8171 self.node_secondary_ip = None
8174 def CheckArguments(mode, remote_node, iallocator):
8175 """Helper function for users of this class.
8178 # check for valid parameter combination
8179 if mode == constants.REPLACE_DISK_CHG:
8180 if remote_node is None and iallocator is None:
8181 raise errors.OpPrereqError("When changing the secondary either an"
8182 " iallocator script must be used or the"
8183 " new node given", errors.ECODE_INVAL)
8185 if remote_node is not None and iallocator is not None:
8186 raise errors.OpPrereqError("Give either the iallocator or the new"
8187 " secondary, not both", errors.ECODE_INVAL)
8189 elif remote_node is not None or iallocator is not None:
8190 # Not replacing the secondary
8191 raise errors.OpPrereqError("The iallocator and new node options can"
8192 " only be used when changing the"
8193 " secondary node", errors.ECODE_INVAL)
8196 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8197 """Compute a new secondary node using an IAllocator.
8200 ial = IAllocator(lu.cfg, lu.rpc,
8201 mode=constants.IALLOCATOR_MODE_RELOC,
8203 relocate_from=relocate_from)
8205 ial.Run(iallocator_name)
8208 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8209 " %s" % (iallocator_name, ial.info),
8212 if len(ial.result) != ial.required_nodes:
8213 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8214 " of nodes (%s), required %s" %
8216 len(ial.result), ial.required_nodes),
8219 remote_node_name = ial.result[0]
8221 lu.LogInfo("Selected new secondary for instance '%s': %s",
8222 instance_name, remote_node_name)
8224 return remote_node_name
8226 def _FindFaultyDisks(self, node_name):
8227 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8230 def CheckPrereq(self):
8231 """Check prerequisites.
8233 This checks that the instance is in the cluster.
8236 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8237 assert instance is not None, \
8238 "Cannot retrieve locked instance %s" % self.instance_name
8240 if instance.disk_template != constants.DT_DRBD8:
8241 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8242 " instances", errors.ECODE_INVAL)
8244 if len(instance.secondary_nodes) != 1:
8245 raise errors.OpPrereqError("The instance has a strange layout,"
8246 " expected one secondary but found %d" %
8247 len(instance.secondary_nodes),
8250 if not self.delay_iallocator:
8251 self._CheckPrereq2()
8253 def _CheckPrereq2(self):
8254 """Check prerequisites, second part.
8256 This function should always be part of CheckPrereq. It was separated and is
8257 now called from Exec because during node evacuation iallocator was only
8258 called with an unmodified cluster model, not taking planned changes into
8262 instance = self.instance
8263 secondary_node = instance.secondary_nodes[0]
8265 if self.iallocator_name is None:
8266 remote_node = self.remote_node
8268 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8269 instance.name, instance.secondary_nodes)
8271 if remote_node is not None:
8272 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8273 assert self.remote_node_info is not None, \
8274 "Cannot retrieve locked node %s" % remote_node
8276 self.remote_node_info = None
8278 if remote_node == self.instance.primary_node:
8279 raise errors.OpPrereqError("The specified node is the primary node of"
8280 " the instance.", errors.ECODE_INVAL)
8282 if remote_node == secondary_node:
8283 raise errors.OpPrereqError("The specified node is already the"
8284 " secondary node of the instance.",
8287 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8288 constants.REPLACE_DISK_CHG):
8289 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8292 if self.mode == constants.REPLACE_DISK_AUTO:
8293 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8294 faulty_secondary = self._FindFaultyDisks(secondary_node)
8296 if faulty_primary and faulty_secondary:
8297 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8298 " one node and can not be repaired"
8299 " automatically" % self.instance_name,
8303 self.disks = faulty_primary
8304 self.target_node = instance.primary_node
8305 self.other_node = secondary_node
8306 check_nodes = [self.target_node, self.other_node]
8307 elif faulty_secondary:
8308 self.disks = faulty_secondary
8309 self.target_node = secondary_node
8310 self.other_node = instance.primary_node
8311 check_nodes = [self.target_node, self.other_node]
8317 # Non-automatic modes
8318 if self.mode == constants.REPLACE_DISK_PRI:
8319 self.target_node = instance.primary_node
8320 self.other_node = secondary_node
8321 check_nodes = [self.target_node, self.other_node]
8323 elif self.mode == constants.REPLACE_DISK_SEC:
8324 self.target_node = secondary_node
8325 self.other_node = instance.primary_node
8326 check_nodes = [self.target_node, self.other_node]
8328 elif self.mode == constants.REPLACE_DISK_CHG:
8329 self.new_node = remote_node
8330 self.other_node = instance.primary_node
8331 self.target_node = secondary_node
8332 check_nodes = [self.new_node, self.other_node]
8334 _CheckNodeNotDrained(self.lu, remote_node)
8335 _CheckNodeVmCapable(self.lu, remote_node)
8337 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8338 assert old_node_info is not None
8339 if old_node_info.offline and not self.early_release:
8340 # doesn't make sense to delay the release
8341 self.early_release = True
8342 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8343 " early-release mode", secondary_node)
8346 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8349 # If not specified all disks should be replaced
8351 self.disks = range(len(self.instance.disks))
8353 for node in check_nodes:
8354 _CheckNodeOnline(self.lu, node)
8356 # Check whether disks are valid
8357 for disk_idx in self.disks:
8358 instance.FindDisk(disk_idx)
8360 # Get secondary node IP addresses
8363 for node_name in [self.target_node, self.other_node, self.new_node]:
8364 if node_name is not None:
8365 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8367 self.node_secondary_ip = node_2nd_ip
8369 def Exec(self, feedback_fn):
8370 """Execute disk replacement.
8372 This dispatches the disk replacement to the appropriate handler.
8375 if self.delay_iallocator:
8376 self._CheckPrereq2()
8379 feedback_fn("No disks need replacement")
8382 feedback_fn("Replacing disk(s) %s for %s" %
8383 (utils.CommaJoin(self.disks), self.instance.name))
8385 activate_disks = (not self.instance.admin_up)
8387 # Activate the instance disks if we're replacing them on a down instance
8389 _StartInstanceDisks(self.lu, self.instance, True)
8392 # Should we replace the secondary node?
8393 if self.new_node is not None:
8394 fn = self._ExecDrbd8Secondary
8396 fn = self._ExecDrbd8DiskOnly
8398 return fn(feedback_fn)
8401 # Deactivate the instance disks if we're replacing them on a
8404 _SafeShutdownInstanceDisks(self.lu, self.instance)
8406 def _CheckVolumeGroup(self, nodes):
8407 self.lu.LogInfo("Checking volume groups")
8409 vgname = self.cfg.GetVGName()
8411 # Make sure volume group exists on all involved nodes
8412 results = self.rpc.call_vg_list(nodes)
8414 raise errors.OpExecError("Can't list volume groups on the nodes")
8418 res.Raise("Error checking node %s" % node)
8419 if vgname not in res.payload:
8420 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8423 def _CheckDisksExistence(self, nodes):
8424 # Check disk existence
8425 for idx, dev in enumerate(self.instance.disks):
8426 if idx not in self.disks:
8430 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8431 self.cfg.SetDiskID(dev, node)
8433 result = self.rpc.call_blockdev_find(node, dev)
8435 msg = result.fail_msg
8436 if msg or not result.payload:
8438 msg = "disk not found"
8439 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8442 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8443 for idx, dev in enumerate(self.instance.disks):
8444 if idx not in self.disks:
8447 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8450 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8452 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8453 " replace disks for instance %s" %
8454 (node_name, self.instance.name))
8456 def _CreateNewStorage(self, node_name):
8457 vgname = self.cfg.GetVGName()
8460 for idx, dev in enumerate(self.instance.disks):
8461 if idx not in self.disks:
8464 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8466 self.cfg.SetDiskID(dev, node_name)
8468 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8469 names = _GenerateUniqueNames(self.lu, lv_names)
8471 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8472 logical_id=(vgname, names[0]))
8473 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8474 logical_id=(vgname, names[1]))
8476 new_lvs = [lv_data, lv_meta]
8477 old_lvs = dev.children
8478 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8480 # we pass force_create=True to force the LVM creation
8481 for new_lv in new_lvs:
8482 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8483 _GetInstanceInfoText(self.instance), False)
8487 def _CheckDevices(self, node_name, iv_names):
8488 for name, (dev, _, _) in iv_names.iteritems():
8489 self.cfg.SetDiskID(dev, node_name)
8491 result = self.rpc.call_blockdev_find(node_name, dev)
8493 msg = result.fail_msg
8494 if msg or not result.payload:
8496 msg = "disk not found"
8497 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8500 if result.payload.is_degraded:
8501 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8503 def _RemoveOldStorage(self, node_name, iv_names):
8504 for name, (_, old_lvs, _) in iv_names.iteritems():
8505 self.lu.LogInfo("Remove logical volumes for %s" % name)
8508 self.cfg.SetDiskID(lv, node_name)
8510 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8512 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8513 hint="remove unused LVs manually")
8515 def _ReleaseNodeLock(self, node_name):
8516 """Releases the lock for a given node."""
8517 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8519 def _ExecDrbd8DiskOnly(self, feedback_fn):
8520 """Replace a disk on the primary or secondary for DRBD 8.
8522 The algorithm for replace is quite complicated:
8524 1. for each disk to be replaced:
8526 1. create new LVs on the target node with unique names
8527 1. detach old LVs from the drbd device
8528 1. rename old LVs to name_replaced.<time_t>
8529 1. rename new LVs to old LVs
8530 1. attach the new LVs (with the old names now) to the drbd device
8532 1. wait for sync across all devices
8534 1. for each modified disk:
8536 1. remove old LVs (which have the name name_replaces.<time_t>)
8538 Failures are not very well handled.
8543 # Step: check device activation
8544 self.lu.LogStep(1, steps_total, "Check device existence")
8545 self._CheckDisksExistence([self.other_node, self.target_node])
8546 self._CheckVolumeGroup([self.target_node, self.other_node])
8548 # Step: check other node consistency
8549 self.lu.LogStep(2, steps_total, "Check peer consistency")
8550 self._CheckDisksConsistency(self.other_node,
8551 self.other_node == self.instance.primary_node,
8554 # Step: create new storage
8555 self.lu.LogStep(3, steps_total, "Allocate new storage")
8556 iv_names = self._CreateNewStorage(self.target_node)
8558 # Step: for each lv, detach+rename*2+attach
8559 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8560 for dev, old_lvs, new_lvs in iv_names.itervalues():
8561 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8563 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8565 result.Raise("Can't detach drbd from local storage on node"
8566 " %s for device %s" % (self.target_node, dev.iv_name))
8568 #cfg.Update(instance)
8570 # ok, we created the new LVs, so now we know we have the needed
8571 # storage; as such, we proceed on the target node to rename
8572 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8573 # using the assumption that logical_id == physical_id (which in
8574 # turn is the unique_id on that node)
8576 # FIXME(iustin): use a better name for the replaced LVs
8577 temp_suffix = int(time.time())
8578 ren_fn = lambda d, suff: (d.physical_id[0],
8579 d.physical_id[1] + "_replaced-%s" % suff)
8581 # Build the rename list based on what LVs exist on the node
8582 rename_old_to_new = []
8583 for to_ren in old_lvs:
8584 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8585 if not result.fail_msg and result.payload:
8587 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8589 self.lu.LogInfo("Renaming the old LVs on the target node")
8590 result = self.rpc.call_blockdev_rename(self.target_node,
8592 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8594 # Now we rename the new LVs to the old LVs
8595 self.lu.LogInfo("Renaming the new LVs on the target node")
8596 rename_new_to_old = [(new, old.physical_id)
8597 for old, new in zip(old_lvs, new_lvs)]
8598 result = self.rpc.call_blockdev_rename(self.target_node,
8600 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8602 for old, new in zip(old_lvs, new_lvs):
8603 new.logical_id = old.logical_id
8604 self.cfg.SetDiskID(new, self.target_node)
8606 for disk in old_lvs:
8607 disk.logical_id = ren_fn(disk, temp_suffix)
8608 self.cfg.SetDiskID(disk, self.target_node)
8610 # Now that the new lvs have the old name, we can add them to the device
8611 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8612 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8614 msg = result.fail_msg
8616 for new_lv in new_lvs:
8617 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8620 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8621 hint=("cleanup manually the unused logical"
8623 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8625 dev.children = new_lvs
8627 self.cfg.Update(self.instance, feedback_fn)
8630 if self.early_release:
8631 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8633 self._RemoveOldStorage(self.target_node, iv_names)
8634 # WARNING: we release both node locks here, do not do other RPCs
8635 # than WaitForSync to the primary node
8636 self._ReleaseNodeLock([self.target_node, self.other_node])
8639 # This can fail as the old devices are degraded and _WaitForSync
8640 # does a combined result over all disks, so we don't check its return value
8641 self.lu.LogStep(cstep, steps_total, "Sync devices")
8643 _WaitForSync(self.lu, self.instance)
8645 # Check all devices manually
8646 self._CheckDevices(self.instance.primary_node, iv_names)
8648 # Step: remove old storage
8649 if not self.early_release:
8650 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8652 self._RemoveOldStorage(self.target_node, iv_names)
8654 def _ExecDrbd8Secondary(self, feedback_fn):
8655 """Replace the secondary node for DRBD 8.
8657 The algorithm for replace is quite complicated:
8658 - for all disks of the instance:
8659 - create new LVs on the new node with same names
8660 - shutdown the drbd device on the old secondary
8661 - disconnect the drbd network on the primary
8662 - create the drbd device on the new secondary
8663 - network attach the drbd on the primary, using an artifice:
8664 the drbd code for Attach() will connect to the network if it
8665 finds a device which is connected to the good local disks but
8667 - wait for sync across all devices
8668 - remove all disks from the old secondary
8670 Failures are not very well handled.
8675 # Step: check device activation
8676 self.lu.LogStep(1, steps_total, "Check device existence")
8677 self._CheckDisksExistence([self.instance.primary_node])
8678 self._CheckVolumeGroup([self.instance.primary_node])
8680 # Step: check other node consistency
8681 self.lu.LogStep(2, steps_total, "Check peer consistency")
8682 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8684 # Step: create new storage
8685 self.lu.LogStep(3, steps_total, "Allocate new storage")
8686 for idx, dev in enumerate(self.instance.disks):
8687 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8688 (self.new_node, idx))
8689 # we pass force_create=True to force LVM creation
8690 for new_lv in dev.children:
8691 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8692 _GetInstanceInfoText(self.instance), False)
8694 # Step 4: dbrd minors and drbd setups changes
8695 # after this, we must manually remove the drbd minors on both the
8696 # error and the success paths
8697 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8698 minors = self.cfg.AllocateDRBDMinor([self.new_node
8699 for dev in self.instance.disks],
8701 logging.debug("Allocated minors %r", minors)
8704 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8705 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8706 (self.new_node, idx))
8707 # create new devices on new_node; note that we create two IDs:
8708 # one without port, so the drbd will be activated without
8709 # networking information on the new node at this stage, and one
8710 # with network, for the latter activation in step 4
8711 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8712 if self.instance.primary_node == o_node1:
8715 assert self.instance.primary_node == o_node2, "Three-node instance?"
8718 new_alone_id = (self.instance.primary_node, self.new_node, None,
8719 p_minor, new_minor, o_secret)
8720 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8721 p_minor, new_minor, o_secret)
8723 iv_names[idx] = (dev, dev.children, new_net_id)
8724 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8726 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8727 logical_id=new_alone_id,
8728 children=dev.children,
8731 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8732 _GetInstanceInfoText(self.instance), False)
8733 except errors.GenericError:
8734 self.cfg.ReleaseDRBDMinors(self.instance.name)
8737 # We have new devices, shutdown the drbd on the old secondary
8738 for idx, dev in enumerate(self.instance.disks):
8739 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8740 self.cfg.SetDiskID(dev, self.target_node)
8741 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8743 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8744 "node: %s" % (idx, msg),
8745 hint=("Please cleanup this device manually as"
8746 " soon as possible"))
8748 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8749 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8750 self.node_secondary_ip,
8751 self.instance.disks)\
8752 [self.instance.primary_node]
8754 msg = result.fail_msg
8756 # detaches didn't succeed (unlikely)
8757 self.cfg.ReleaseDRBDMinors(self.instance.name)
8758 raise errors.OpExecError("Can't detach the disks from the network on"
8759 " old node: %s" % (msg,))
8761 # if we managed to detach at least one, we update all the disks of
8762 # the instance to point to the new secondary
8763 self.lu.LogInfo("Updating instance configuration")
8764 for dev, _, new_logical_id in iv_names.itervalues():
8765 dev.logical_id = new_logical_id
8766 self.cfg.SetDiskID(dev, self.instance.primary_node)
8768 self.cfg.Update(self.instance, feedback_fn)
8770 # and now perform the drbd attach
8771 self.lu.LogInfo("Attaching primary drbds to new secondary"
8772 " (standalone => connected)")
8773 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8775 self.node_secondary_ip,
8776 self.instance.disks,
8779 for to_node, to_result in result.items():
8780 msg = to_result.fail_msg
8782 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8784 hint=("please do a gnt-instance info to see the"
8785 " status of disks"))
8787 if self.early_release:
8788 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8790 self._RemoveOldStorage(self.target_node, iv_names)
8791 # WARNING: we release all node locks here, do not do other RPCs
8792 # than WaitForSync to the primary node
8793 self._ReleaseNodeLock([self.instance.primary_node,
8798 # This can fail as the old devices are degraded and _WaitForSync
8799 # does a combined result over all disks, so we don't check its return value
8800 self.lu.LogStep(cstep, steps_total, "Sync devices")
8802 _WaitForSync(self.lu, self.instance)
8804 # Check all devices manually
8805 self._CheckDevices(self.instance.primary_node, iv_names)
8807 # Step: remove old storage
8808 if not self.early_release:
8809 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8810 self._RemoveOldStorage(self.target_node, iv_names)
8813 class LURepairNodeStorage(NoHooksLU):
8814 """Repairs the volume group on a node.
8819 ("storage_type", ht.NoDefault, _CheckStorageType),
8820 ("name", ht.NoDefault, ht.TNonEmptyString),
8821 ("ignore_consistency", False, ht.TBool),
8825 def CheckArguments(self):
8826 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8828 storage_type = self.op.storage_type
8830 if (constants.SO_FIX_CONSISTENCY not in
8831 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8832 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8833 " repaired" % storage_type,
8836 def ExpandNames(self):
8837 self.needed_locks = {
8838 locking.LEVEL_NODE: [self.op.node_name],
8841 def _CheckFaultyDisks(self, instance, node_name):
8842 """Ensure faulty disks abort the opcode or at least warn."""
8844 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8846 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8847 " node '%s'" % (instance.name, node_name),
8849 except errors.OpPrereqError, err:
8850 if self.op.ignore_consistency:
8851 self.proc.LogWarning(str(err.args[0]))
8855 def CheckPrereq(self):
8856 """Check prerequisites.
8859 # Check whether any instance on this node has faulty disks
8860 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8861 if not inst.admin_up:
8863 check_nodes = set(inst.all_nodes)
8864 check_nodes.discard(self.op.node_name)
8865 for inst_node_name in check_nodes:
8866 self._CheckFaultyDisks(inst, inst_node_name)
8868 def Exec(self, feedback_fn):
8869 feedback_fn("Repairing storage unit '%s' on %s ..." %
8870 (self.op.name, self.op.node_name))
8872 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8873 result = self.rpc.call_storage_execute(self.op.node_name,
8874 self.op.storage_type, st_args,
8876 constants.SO_FIX_CONSISTENCY)
8877 result.Raise("Failed to repair storage unit '%s' on %s" %
8878 (self.op.name, self.op.node_name))
8881 class LUNodeEvacuationStrategy(NoHooksLU):
8882 """Computes the node evacuation strategy.
8886 ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8887 ("remote_node", None, ht.TMaybeString),
8888 ("iallocator", None, ht.TMaybeString),
8892 def CheckArguments(self):
8893 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8895 def ExpandNames(self):
8896 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8897 self.needed_locks = locks = {}
8898 if self.op.remote_node is None:
8899 locks[locking.LEVEL_NODE] = locking.ALL_SET
8901 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8902 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8904 def Exec(self, feedback_fn):
8905 if self.op.remote_node is not None:
8907 for node in self.op.nodes:
8908 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8911 if i.primary_node == self.op.remote_node:
8912 raise errors.OpPrereqError("Node %s is the primary node of"
8913 " instance %s, cannot use it as"
8915 (self.op.remote_node, i.name),
8917 result.append([i.name, self.op.remote_node])
8919 ial = IAllocator(self.cfg, self.rpc,
8920 mode=constants.IALLOCATOR_MODE_MEVAC,
8921 evac_nodes=self.op.nodes)
8922 ial.Run(self.op.iallocator, validate=True)
8924 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8930 class LUGrowDisk(LogicalUnit):
8931 """Grow a disk of an instance.
8935 HTYPE = constants.HTYPE_INSTANCE
8938 ("disk", ht.NoDefault, ht.TInt),
8939 ("amount", ht.NoDefault, ht.TInt),
8940 ("wait_for_sync", True, ht.TBool),
8944 def ExpandNames(self):
8945 self._ExpandAndLockInstance()
8946 self.needed_locks[locking.LEVEL_NODE] = []
8947 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8949 def DeclareLocks(self, level):
8950 if level == locking.LEVEL_NODE:
8951 self._LockInstancesNodes()
8953 def BuildHooksEnv(self):
8956 This runs on the master, the primary and all the secondaries.
8960 "DISK": self.op.disk,
8961 "AMOUNT": self.op.amount,
8963 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8964 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8967 def CheckPrereq(self):
8968 """Check prerequisites.
8970 This checks that the instance is in the cluster.
8973 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8974 assert instance is not None, \
8975 "Cannot retrieve locked instance %s" % self.op.instance_name
8976 nodenames = list(instance.all_nodes)
8977 for node in nodenames:
8978 _CheckNodeOnline(self, node)
8980 self.instance = instance
8982 if instance.disk_template not in constants.DTS_GROWABLE:
8983 raise errors.OpPrereqError("Instance's disk layout does not support"
8984 " growing.", errors.ECODE_INVAL)
8986 self.disk = instance.FindDisk(self.op.disk)
8988 if instance.disk_template != constants.DT_FILE:
8989 # TODO: check the free disk space for file, when that feature
8991 _CheckNodesFreeDiskPerVG(self, nodenames,
8992 {self.disk.physical_id[0]: self.op.amount})
8994 def Exec(self, feedback_fn):
8995 """Execute disk grow.
8998 instance = self.instance
9001 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9003 raise errors.OpExecError("Cannot activate block device to grow")
9005 for node in instance.all_nodes:
9006 self.cfg.SetDiskID(disk, node)
9007 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9008 result.Raise("Grow request failed to node %s" % node)
9010 # TODO: Rewrite code to work properly
9011 # DRBD goes into sync mode for a short amount of time after executing the
9012 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9013 # calling "resize" in sync mode fails. Sleeping for a short amount of
9014 # time is a work-around.
9017 disk.RecordGrow(self.op.amount)
9018 self.cfg.Update(instance, feedback_fn)
9019 if self.op.wait_for_sync:
9020 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9022 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9023 " status.\nPlease check the instance.")
9024 if not instance.admin_up:
9025 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9026 elif not instance.admin_up:
9027 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9028 " not supposed to be running because no wait for"
9029 " sync mode was requested.")
9032 class LUQueryInstanceData(NoHooksLU):
9033 """Query runtime instance data.
9037 ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9038 ("static", False, ht.TBool),
9042 def ExpandNames(self):
9043 self.needed_locks = {}
9044 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9046 if self.op.instances:
9047 self.wanted_names = []
9048 for name in self.op.instances:
9049 full_name = _ExpandInstanceName(self.cfg, name)
9050 self.wanted_names.append(full_name)
9051 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9053 self.wanted_names = None
9054 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9056 self.needed_locks[locking.LEVEL_NODE] = []
9057 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9059 def DeclareLocks(self, level):
9060 if level == locking.LEVEL_NODE:
9061 self._LockInstancesNodes()
9063 def CheckPrereq(self):
9064 """Check prerequisites.
9066 This only checks the optional instance list against the existing names.
9069 if self.wanted_names is None:
9070 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9072 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9073 in self.wanted_names]
9075 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9076 """Returns the status of a block device
9079 if self.op.static or not node:
9082 self.cfg.SetDiskID(dev, node)
9084 result = self.rpc.call_blockdev_find(node, dev)
9088 result.Raise("Can't compute disk status for %s" % instance_name)
9090 status = result.payload
9094 return (status.dev_path, status.major, status.minor,
9095 status.sync_percent, status.estimated_time,
9096 status.is_degraded, status.ldisk_status)
9098 def _ComputeDiskStatus(self, instance, snode, dev):
9099 """Compute block device status.
9102 if dev.dev_type in constants.LDS_DRBD:
9103 # we change the snode then (otherwise we use the one passed in)
9104 if dev.logical_id[0] == instance.primary_node:
9105 snode = dev.logical_id[1]
9107 snode = dev.logical_id[0]
9109 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9111 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9114 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9115 for child in dev.children]
9120 "iv_name": dev.iv_name,
9121 "dev_type": dev.dev_type,
9122 "logical_id": dev.logical_id,
9123 "physical_id": dev.physical_id,
9124 "pstatus": dev_pstatus,
9125 "sstatus": dev_sstatus,
9126 "children": dev_children,
9133 def Exec(self, feedback_fn):
9134 """Gather and return data"""
9137 cluster = self.cfg.GetClusterInfo()
9139 for instance in self.wanted_instances:
9140 if not self.op.static:
9141 remote_info = self.rpc.call_instance_info(instance.primary_node,
9143 instance.hypervisor)
9144 remote_info.Raise("Error checking node %s" % instance.primary_node)
9145 remote_info = remote_info.payload
9146 if remote_info and "state" in remote_info:
9149 remote_state = "down"
9152 if instance.admin_up:
9155 config_state = "down"
9157 disks = [self._ComputeDiskStatus(instance, None, device)
9158 for device in instance.disks]
9161 "name": instance.name,
9162 "config_state": config_state,
9163 "run_state": remote_state,
9164 "pnode": instance.primary_node,
9165 "snodes": instance.secondary_nodes,
9167 # this happens to be the same format used for hooks
9168 "nics": _NICListToTuple(self, instance.nics),
9169 "disk_template": instance.disk_template,
9171 "hypervisor": instance.hypervisor,
9172 "network_port": instance.network_port,
9173 "hv_instance": instance.hvparams,
9174 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9175 "be_instance": instance.beparams,
9176 "be_actual": cluster.FillBE(instance),
9177 "os_instance": instance.osparams,
9178 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9179 "serial_no": instance.serial_no,
9180 "mtime": instance.mtime,
9181 "ctime": instance.ctime,
9182 "uuid": instance.uuid,
9185 result[instance.name] = idict
9190 class LUSetInstanceParams(LogicalUnit):
9191 """Modifies an instances's parameters.
9194 HPATH = "instance-modify"
9195 HTYPE = constants.HTYPE_INSTANCE
9198 ("nics", ht.EmptyList, ht.TList),
9199 ("disks", ht.EmptyList, ht.TList),
9200 ("beparams", ht.EmptyDict, ht.TDict),
9201 ("hvparams", ht.EmptyDict, ht.TDict),
9202 ("disk_template", None, ht.TMaybeString),
9203 ("remote_node", None, ht.TMaybeString),
9204 ("os_name", None, ht.TMaybeString),
9205 ("force_variant", False, ht.TBool),
9206 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9211 def CheckArguments(self):
9212 if not (self.op.nics or self.op.disks or self.op.disk_template or
9213 self.op.hvparams or self.op.beparams or self.op.os_name):
9214 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9216 if self.op.hvparams:
9217 _CheckGlobalHvParams(self.op.hvparams)
9221 for disk_op, disk_dict in self.op.disks:
9222 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9223 if disk_op == constants.DDM_REMOVE:
9226 elif disk_op == constants.DDM_ADD:
9229 if not isinstance(disk_op, int):
9230 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9231 if not isinstance(disk_dict, dict):
9232 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9233 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9235 if disk_op == constants.DDM_ADD:
9236 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9237 if mode not in constants.DISK_ACCESS_SET:
9238 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9240 size = disk_dict.get('size', None)
9242 raise errors.OpPrereqError("Required disk parameter size missing",
9246 except (TypeError, ValueError), err:
9247 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9248 str(err), errors.ECODE_INVAL)
9249 disk_dict['size'] = size
9251 # modification of disk
9252 if 'size' in disk_dict:
9253 raise errors.OpPrereqError("Disk size change not possible, use"
9254 " grow-disk", errors.ECODE_INVAL)
9256 if disk_addremove > 1:
9257 raise errors.OpPrereqError("Only one disk add or remove operation"
9258 " supported at a time", errors.ECODE_INVAL)
9260 if self.op.disks and self.op.disk_template is not None:
9261 raise errors.OpPrereqError("Disk template conversion and other disk"
9262 " changes not supported at the same time",
9265 if self.op.disk_template:
9266 _CheckDiskTemplate(self.op.disk_template)
9267 if (self.op.disk_template in constants.DTS_NET_MIRROR and
9268 self.op.remote_node is None):
9269 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9270 " one requires specifying a secondary node",
9275 for nic_op, nic_dict in self.op.nics:
9276 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9277 if nic_op == constants.DDM_REMOVE:
9280 elif nic_op == constants.DDM_ADD:
9283 if not isinstance(nic_op, int):
9284 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9285 if not isinstance(nic_dict, dict):
9286 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9287 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9289 # nic_dict should be a dict
9290 nic_ip = nic_dict.get('ip', None)
9291 if nic_ip is not None:
9292 if nic_ip.lower() == constants.VALUE_NONE:
9293 nic_dict['ip'] = None
9295 if not netutils.IPAddress.IsValid(nic_ip):
9296 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9299 nic_bridge = nic_dict.get('bridge', None)
9300 nic_link = nic_dict.get('link', None)
9301 if nic_bridge and nic_link:
9302 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9303 " at the same time", errors.ECODE_INVAL)
9304 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9305 nic_dict['bridge'] = None
9306 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9307 nic_dict['link'] = None
9309 if nic_op == constants.DDM_ADD:
9310 nic_mac = nic_dict.get('mac', None)
9312 nic_dict['mac'] = constants.VALUE_AUTO
9314 if 'mac' in nic_dict:
9315 nic_mac = nic_dict['mac']
9316 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9317 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9319 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9320 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9321 " modifying an existing nic",
9324 if nic_addremove > 1:
9325 raise errors.OpPrereqError("Only one NIC add or remove operation"
9326 " supported at a time", errors.ECODE_INVAL)
9328 def ExpandNames(self):
9329 self._ExpandAndLockInstance()
9330 self.needed_locks[locking.LEVEL_NODE] = []
9331 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9333 def DeclareLocks(self, level):
9334 if level == locking.LEVEL_NODE:
9335 self._LockInstancesNodes()
9336 if self.op.disk_template and self.op.remote_node:
9337 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9338 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9340 def BuildHooksEnv(self):
9343 This runs on the master, primary and secondaries.
9347 if constants.BE_MEMORY in self.be_new:
9348 args['memory'] = self.be_new[constants.BE_MEMORY]
9349 if constants.BE_VCPUS in self.be_new:
9350 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9351 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9352 # information at all.
9355 nic_override = dict(self.op.nics)
9356 for idx, nic in enumerate(self.instance.nics):
9357 if idx in nic_override:
9358 this_nic_override = nic_override[idx]
9360 this_nic_override = {}
9361 if 'ip' in this_nic_override:
9362 ip = this_nic_override['ip']
9365 if 'mac' in this_nic_override:
9366 mac = this_nic_override['mac']
9369 if idx in self.nic_pnew:
9370 nicparams = self.nic_pnew[idx]
9372 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9373 mode = nicparams[constants.NIC_MODE]
9374 link = nicparams[constants.NIC_LINK]
9375 args['nics'].append((ip, mac, mode, link))
9376 if constants.DDM_ADD in nic_override:
9377 ip = nic_override[constants.DDM_ADD].get('ip', None)
9378 mac = nic_override[constants.DDM_ADD]['mac']
9379 nicparams = self.nic_pnew[constants.DDM_ADD]
9380 mode = nicparams[constants.NIC_MODE]
9381 link = nicparams[constants.NIC_LINK]
9382 args['nics'].append((ip, mac, mode, link))
9383 elif constants.DDM_REMOVE in nic_override:
9384 del args['nics'][-1]
9386 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9387 if self.op.disk_template:
9388 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9389 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9392 def CheckPrereq(self):
9393 """Check prerequisites.
9395 This only checks the instance list against the existing names.
9398 # checking the new params on the primary/secondary nodes
9400 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9401 cluster = self.cluster = self.cfg.GetClusterInfo()
9402 assert self.instance is not None, \
9403 "Cannot retrieve locked instance %s" % self.op.instance_name
9404 pnode = instance.primary_node
9405 nodelist = list(instance.all_nodes)
9408 if self.op.os_name and not self.op.force:
9409 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9410 self.op.force_variant)
9411 instance_os = self.op.os_name
9413 instance_os = instance.os
9415 if self.op.disk_template:
9416 if instance.disk_template == self.op.disk_template:
9417 raise errors.OpPrereqError("Instance already has disk template %s" %
9418 instance.disk_template, errors.ECODE_INVAL)
9420 if (instance.disk_template,
9421 self.op.disk_template) not in self._DISK_CONVERSIONS:
9422 raise errors.OpPrereqError("Unsupported disk template conversion from"
9423 " %s to %s" % (instance.disk_template,
9424 self.op.disk_template),
9426 _CheckInstanceDown(self, instance, "cannot change disk template")
9427 if self.op.disk_template in constants.DTS_NET_MIRROR:
9428 if self.op.remote_node == pnode:
9429 raise errors.OpPrereqError("Given new secondary node %s is the same"
9430 " as the primary node of the instance" %
9431 self.op.remote_node, errors.ECODE_STATE)
9432 _CheckNodeOnline(self, self.op.remote_node)
9433 _CheckNodeNotDrained(self, self.op.remote_node)
9434 # FIXME: here we assume that the old instance type is DT_PLAIN
9435 assert instance.disk_template == constants.DT_PLAIN
9436 disks = [{"size": d.size, "vg": d.logical_id[0]}
9437 for d in instance.disks]
9438 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9439 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9441 # hvparams processing
9442 if self.op.hvparams:
9443 hv_type = instance.hypervisor
9444 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9445 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9446 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9449 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9450 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9451 self.hv_new = hv_new # the new actual values
9452 self.hv_inst = i_hvdict # the new dict (without defaults)
9454 self.hv_new = self.hv_inst = {}
9456 # beparams processing
9457 if self.op.beparams:
9458 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9460 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9461 be_new = cluster.SimpleFillBE(i_bedict)
9462 self.be_new = be_new # the new actual values
9463 self.be_inst = i_bedict # the new dict (without defaults)
9465 self.be_new = self.be_inst = {}
9467 # osparams processing
9468 if self.op.osparams:
9469 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9470 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9471 self.os_inst = i_osdict # the new dict (without defaults)
9477 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9478 mem_check_list = [pnode]
9479 if be_new[constants.BE_AUTO_BALANCE]:
9480 # either we changed auto_balance to yes or it was from before
9481 mem_check_list.extend(instance.secondary_nodes)
9482 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9483 instance.hypervisor)
9484 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9485 instance.hypervisor)
9486 pninfo = nodeinfo[pnode]
9487 msg = pninfo.fail_msg
9489 # Assume the primary node is unreachable and go ahead
9490 self.warn.append("Can't get info from primary node %s: %s" %
9492 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9493 self.warn.append("Node data from primary node %s doesn't contain"
9494 " free memory information" % pnode)
9495 elif instance_info.fail_msg:
9496 self.warn.append("Can't get instance runtime information: %s" %
9497 instance_info.fail_msg)
9499 if instance_info.payload:
9500 current_mem = int(instance_info.payload['memory'])
9502 # Assume instance not running
9503 # (there is a slight race condition here, but it's not very probable,
9504 # and we have no other way to check)
9506 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9507 pninfo.payload['memory_free'])
9509 raise errors.OpPrereqError("This change will prevent the instance"
9510 " from starting, due to %d MB of memory"
9511 " missing on its primary node" % miss_mem,
9514 if be_new[constants.BE_AUTO_BALANCE]:
9515 for node, nres in nodeinfo.items():
9516 if node not in instance.secondary_nodes:
9520 self.warn.append("Can't get info from secondary node %s: %s" %
9522 elif not isinstance(nres.payload.get('memory_free', None), int):
9523 self.warn.append("Secondary node %s didn't return free"
9524 " memory information" % node)
9525 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9526 self.warn.append("Not enough memory to failover instance to"
9527 " secondary node %s" % node)
9532 for nic_op, nic_dict in self.op.nics:
9533 if nic_op == constants.DDM_REMOVE:
9534 if not instance.nics:
9535 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9538 if nic_op != constants.DDM_ADD:
9540 if not instance.nics:
9541 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9542 " no NICs" % nic_op,
9544 if nic_op < 0 or nic_op >= len(instance.nics):
9545 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9547 (nic_op, len(instance.nics) - 1),
9549 old_nic_params = instance.nics[nic_op].nicparams
9550 old_nic_ip = instance.nics[nic_op].ip
9555 update_params_dict = dict([(key, nic_dict[key])
9556 for key in constants.NICS_PARAMETERS
9557 if key in nic_dict])
9559 if 'bridge' in nic_dict:
9560 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9562 new_nic_params = _GetUpdatedParams(old_nic_params,
9564 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9565 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9566 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9567 self.nic_pinst[nic_op] = new_nic_params
9568 self.nic_pnew[nic_op] = new_filled_nic_params
9569 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9571 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9572 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9573 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9575 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9577 self.warn.append(msg)
9579 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9580 if new_nic_mode == constants.NIC_MODE_ROUTED:
9581 if 'ip' in nic_dict:
9582 nic_ip = nic_dict['ip']
9586 raise errors.OpPrereqError('Cannot set the nic ip to None'
9587 ' on a routed nic', errors.ECODE_INVAL)
9588 if 'mac' in nic_dict:
9589 nic_mac = nic_dict['mac']
9591 raise errors.OpPrereqError('Cannot set the nic mac to None',
9593 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9594 # otherwise generate the mac
9595 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9597 # or validate/reserve the current one
9599 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9600 except errors.ReservationError:
9601 raise errors.OpPrereqError("MAC address %s already in use"
9602 " in cluster" % nic_mac,
9603 errors.ECODE_NOTUNIQUE)
9606 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9607 raise errors.OpPrereqError("Disk operations not supported for"
9608 " diskless instances",
9610 for disk_op, _ in self.op.disks:
9611 if disk_op == constants.DDM_REMOVE:
9612 if len(instance.disks) == 1:
9613 raise errors.OpPrereqError("Cannot remove the last disk of"
9614 " an instance", errors.ECODE_INVAL)
9615 _CheckInstanceDown(self, instance, "cannot remove disks")
9617 if (disk_op == constants.DDM_ADD and
9618 len(instance.nics) >= constants.MAX_DISKS):
9619 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9620 " add more" % constants.MAX_DISKS,
9622 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9624 if disk_op < 0 or disk_op >= len(instance.disks):
9625 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9627 (disk_op, len(instance.disks)),
9632 def _ConvertPlainToDrbd(self, feedback_fn):
9633 """Converts an instance from plain to drbd.
9636 feedback_fn("Converting template to drbd")
9637 instance = self.instance
9638 pnode = instance.primary_node
9639 snode = self.op.remote_node
9641 # create a fake disk info for _GenerateDiskTemplate
9642 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9643 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9644 instance.name, pnode, [snode],
9645 disk_info, None, None, 0, feedback_fn)
9646 info = _GetInstanceInfoText(instance)
9647 feedback_fn("Creating aditional volumes...")
9648 # first, create the missing data and meta devices
9649 for disk in new_disks:
9650 # unfortunately this is... not too nice
9651 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9653 for child in disk.children:
9654 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9655 # at this stage, all new LVs have been created, we can rename the
9657 feedback_fn("Renaming original volumes...")
9658 rename_list = [(o, n.children[0].logical_id)
9659 for (o, n) in zip(instance.disks, new_disks)]
9660 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9661 result.Raise("Failed to rename original LVs")
9663 feedback_fn("Initializing DRBD devices...")
9664 # all child devices are in place, we can now create the DRBD devices
9665 for disk in new_disks:
9666 for node in [pnode, snode]:
9667 f_create = node == pnode
9668 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9670 # at this point, the instance has been modified
9671 instance.disk_template = constants.DT_DRBD8
9672 instance.disks = new_disks
9673 self.cfg.Update(instance, feedback_fn)
9675 # disks are created, waiting for sync
9676 disk_abort = not _WaitForSync(self, instance)
9678 raise errors.OpExecError("There are some degraded disks for"
9679 " this instance, please cleanup manually")
9681 def _ConvertDrbdToPlain(self, feedback_fn):
9682 """Converts an instance from drbd to plain.
9685 instance = self.instance
9686 assert len(instance.secondary_nodes) == 1
9687 pnode = instance.primary_node
9688 snode = instance.secondary_nodes[0]
9689 feedback_fn("Converting template to plain")
9691 old_disks = instance.disks
9692 new_disks = [d.children[0] for d in old_disks]
9694 # copy over size and mode
9695 for parent, child in zip(old_disks, new_disks):
9696 child.size = parent.size
9697 child.mode = parent.mode
9699 # update instance structure
9700 instance.disks = new_disks
9701 instance.disk_template = constants.DT_PLAIN
9702 self.cfg.Update(instance, feedback_fn)
9704 feedback_fn("Removing volumes on the secondary node...")
9705 for disk in old_disks:
9706 self.cfg.SetDiskID(disk, snode)
9707 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9709 self.LogWarning("Could not remove block device %s on node %s,"
9710 " continuing anyway: %s", disk.iv_name, snode, msg)
9712 feedback_fn("Removing unneeded volumes on the primary node...")
9713 for idx, disk in enumerate(old_disks):
9714 meta = disk.children[1]
9715 self.cfg.SetDiskID(meta, pnode)
9716 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9718 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9719 " continuing anyway: %s", idx, pnode, msg)
9721 def Exec(self, feedback_fn):
9722 """Modifies an instance.
9724 All parameters take effect only at the next restart of the instance.
9727 # Process here the warnings from CheckPrereq, as we don't have a
9728 # feedback_fn there.
9729 for warn in self.warn:
9730 feedback_fn("WARNING: %s" % warn)
9733 instance = self.instance
9735 for disk_op, disk_dict in self.op.disks:
9736 if disk_op == constants.DDM_REMOVE:
9737 # remove the last disk
9738 device = instance.disks.pop()
9739 device_idx = len(instance.disks)
9740 for node, disk in device.ComputeNodeTree(instance.primary_node):
9741 self.cfg.SetDiskID(disk, node)
9742 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9744 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9745 " continuing anyway", device_idx, node, msg)
9746 result.append(("disk/%d" % device_idx, "remove"))
9747 elif disk_op == constants.DDM_ADD:
9749 if instance.disk_template == constants.DT_FILE:
9750 file_driver, file_path = instance.disks[0].logical_id
9751 file_path = os.path.dirname(file_path)
9753 file_driver = file_path = None
9754 disk_idx_base = len(instance.disks)
9755 new_disk = _GenerateDiskTemplate(self,
9756 instance.disk_template,
9757 instance.name, instance.primary_node,
9758 instance.secondary_nodes,
9762 disk_idx_base, feedback_fn)[0]
9763 instance.disks.append(new_disk)
9764 info = _GetInstanceInfoText(instance)
9766 logging.info("Creating volume %s for instance %s",
9767 new_disk.iv_name, instance.name)
9768 # Note: this needs to be kept in sync with _CreateDisks
9770 for node in instance.all_nodes:
9771 f_create = node == instance.primary_node
9773 _CreateBlockDev(self, node, instance, new_disk,
9774 f_create, info, f_create)
9775 except errors.OpExecError, err:
9776 self.LogWarning("Failed to create volume %s (%s) on"
9778 new_disk.iv_name, new_disk, node, err)
9779 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9780 (new_disk.size, new_disk.mode)))
9782 # change a given disk
9783 instance.disks[disk_op].mode = disk_dict['mode']
9784 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9786 if self.op.disk_template:
9787 r_shut = _ShutdownInstanceDisks(self, instance)
9789 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9790 " proceed with disk template conversion")
9791 mode = (instance.disk_template, self.op.disk_template)
9793 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9795 self.cfg.ReleaseDRBDMinors(instance.name)
9797 result.append(("disk_template", self.op.disk_template))
9800 for nic_op, nic_dict in self.op.nics:
9801 if nic_op == constants.DDM_REMOVE:
9802 # remove the last nic
9803 del instance.nics[-1]
9804 result.append(("nic.%d" % len(instance.nics), "remove"))
9805 elif nic_op == constants.DDM_ADD:
9806 # mac and bridge should be set, by now
9807 mac = nic_dict['mac']
9808 ip = nic_dict.get('ip', None)
9809 nicparams = self.nic_pinst[constants.DDM_ADD]
9810 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9811 instance.nics.append(new_nic)
9812 result.append(("nic.%d" % (len(instance.nics) - 1),
9813 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9814 (new_nic.mac, new_nic.ip,
9815 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9816 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9819 for key in 'mac', 'ip':
9821 setattr(instance.nics[nic_op], key, nic_dict[key])
9822 if nic_op in self.nic_pinst:
9823 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9824 for key, val in nic_dict.iteritems():
9825 result.append(("nic.%s/%d" % (key, nic_op), val))
9828 if self.op.hvparams:
9829 instance.hvparams = self.hv_inst
9830 for key, val in self.op.hvparams.iteritems():
9831 result.append(("hv/%s" % key, val))
9834 if self.op.beparams:
9835 instance.beparams = self.be_inst
9836 for key, val in self.op.beparams.iteritems():
9837 result.append(("be/%s" % key, val))
9841 instance.os = self.op.os_name
9844 if self.op.osparams:
9845 instance.osparams = self.os_inst
9846 for key, val in self.op.osparams.iteritems():
9847 result.append(("os/%s" % key, val))
9849 self.cfg.Update(instance, feedback_fn)
9853 _DISK_CONVERSIONS = {
9854 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9855 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9859 class LUQueryExports(NoHooksLU):
9860 """Query the exports list
9864 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9865 ("use_locking", False, ht.TBool),
9869 def ExpandNames(self):
9870 self.needed_locks = {}
9871 self.share_locks[locking.LEVEL_NODE] = 1
9872 if not self.op.nodes:
9873 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9875 self.needed_locks[locking.LEVEL_NODE] = \
9876 _GetWantedNodes(self, self.op.nodes)
9878 def Exec(self, feedback_fn):
9879 """Compute the list of all the exported system images.
9882 @return: a dictionary with the structure node->(export-list)
9883 where export-list is a list of the instances exported on
9887 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9888 rpcresult = self.rpc.call_export_list(self.nodes)
9890 for node in rpcresult:
9891 if rpcresult[node].fail_msg:
9892 result[node] = False
9894 result[node] = rpcresult[node].payload
9899 class LUPrepareExport(NoHooksLU):
9900 """Prepares an instance for an export and returns useful information.
9905 ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9909 def ExpandNames(self):
9910 self._ExpandAndLockInstance()
9912 def CheckPrereq(self):
9913 """Check prerequisites.
9916 instance_name = self.op.instance_name
9918 self.instance = self.cfg.GetInstanceInfo(instance_name)
9919 assert self.instance is not None, \
9920 "Cannot retrieve locked instance %s" % self.op.instance_name
9921 _CheckNodeOnline(self, self.instance.primary_node)
9923 self._cds = _GetClusterDomainSecret()
9925 def Exec(self, feedback_fn):
9926 """Prepares an instance for an export.
9929 instance = self.instance
9931 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9932 salt = utils.GenerateSecret(8)
9934 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9935 result = self.rpc.call_x509_cert_create(instance.primary_node,
9936 constants.RIE_CERT_VALIDITY)
9937 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9939 (name, cert_pem) = result.payload
9941 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9945 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9946 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9948 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9954 class LUExportInstance(LogicalUnit):
9955 """Export an instance to an image in the cluster.
9958 HPATH = "instance-export"
9959 HTYPE = constants.HTYPE_INSTANCE
9962 ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9963 ("shutdown", True, ht.TBool),
9965 ("remove_instance", False, ht.TBool),
9966 ("ignore_remove_failures", False, ht.TBool),
9967 ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9968 ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9969 ("destination_x509_ca", None, ht.TMaybeString),
9973 def CheckArguments(self):
9974 """Check the arguments.
9977 self.x509_key_name = self.op.x509_key_name
9978 self.dest_x509_ca_pem = self.op.destination_x509_ca
9980 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9981 if not self.x509_key_name:
9982 raise errors.OpPrereqError("Missing X509 key name for encryption",
9985 if not self.dest_x509_ca_pem:
9986 raise errors.OpPrereqError("Missing destination X509 CA",
9989 def ExpandNames(self):
9990 self._ExpandAndLockInstance()
9992 # Lock all nodes for local exports
9993 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9994 # FIXME: lock only instance primary and destination node
9996 # Sad but true, for now we have do lock all nodes, as we don't know where
9997 # the previous export might be, and in this LU we search for it and
9998 # remove it from its current node. In the future we could fix this by:
9999 # - making a tasklet to search (share-lock all), then create the
10000 # new one, then one to remove, after
10001 # - removing the removal operation altogether
10002 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10004 def DeclareLocks(self, level):
10005 """Last minute lock declaration."""
10006 # All nodes are locked anyway, so nothing to do here.
10008 def BuildHooksEnv(self):
10009 """Build hooks env.
10011 This will run on the master, primary node and target node.
10015 "EXPORT_MODE": self.op.mode,
10016 "EXPORT_NODE": self.op.target_node,
10017 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10018 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10019 # TODO: Generic function for boolean env variables
10020 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10023 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10025 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10027 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10028 nl.append(self.op.target_node)
10032 def CheckPrereq(self):
10033 """Check prerequisites.
10035 This checks that the instance and node names are valid.
10038 instance_name = self.op.instance_name
10040 self.instance = self.cfg.GetInstanceInfo(instance_name)
10041 assert self.instance is not None, \
10042 "Cannot retrieve locked instance %s" % self.op.instance_name
10043 _CheckNodeOnline(self, self.instance.primary_node)
10045 if (self.op.remove_instance and self.instance.admin_up and
10046 not self.op.shutdown):
10047 raise errors.OpPrereqError("Can not remove instance without shutting it"
10050 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10051 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10052 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10053 assert self.dst_node is not None
10055 _CheckNodeOnline(self, self.dst_node.name)
10056 _CheckNodeNotDrained(self, self.dst_node.name)
10059 self.dest_disk_info = None
10060 self.dest_x509_ca = None
10062 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10063 self.dst_node = None
10065 if len(self.op.target_node) != len(self.instance.disks):
10066 raise errors.OpPrereqError(("Received destination information for %s"
10067 " disks, but instance %s has %s disks") %
10068 (len(self.op.target_node), instance_name,
10069 len(self.instance.disks)),
10070 errors.ECODE_INVAL)
10072 cds = _GetClusterDomainSecret()
10074 # Check X509 key name
10076 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10077 except (TypeError, ValueError), err:
10078 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10080 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10081 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10082 errors.ECODE_INVAL)
10084 # Load and verify CA
10086 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10087 except OpenSSL.crypto.Error, err:
10088 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10089 (err, ), errors.ECODE_INVAL)
10091 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10092 if errcode is not None:
10093 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10094 (msg, ), errors.ECODE_INVAL)
10096 self.dest_x509_ca = cert
10098 # Verify target information
10100 for idx, disk_data in enumerate(self.op.target_node):
10102 (host, port, magic) = \
10103 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10104 except errors.GenericError, err:
10105 raise errors.OpPrereqError("Target info for disk %s: %s" %
10106 (idx, err), errors.ECODE_INVAL)
10108 disk_info.append((host, port, magic))
10110 assert len(disk_info) == len(self.op.target_node)
10111 self.dest_disk_info = disk_info
10114 raise errors.ProgrammerError("Unhandled export mode %r" %
10117 # instance disk type verification
10118 # TODO: Implement export support for file-based disks
10119 for disk in self.instance.disks:
10120 if disk.dev_type == constants.LD_FILE:
10121 raise errors.OpPrereqError("Export not supported for instances with"
10122 " file-based disks", errors.ECODE_INVAL)
10124 def _CleanupExports(self, feedback_fn):
10125 """Removes exports of current instance from all other nodes.
10127 If an instance in a cluster with nodes A..D was exported to node C, its
10128 exports will be removed from the nodes A, B and D.
10131 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10133 nodelist = self.cfg.GetNodeList()
10134 nodelist.remove(self.dst_node.name)
10136 # on one-node clusters nodelist will be empty after the removal
10137 # if we proceed the backup would be removed because OpQueryExports
10138 # substitutes an empty list with the full cluster node list.
10139 iname = self.instance.name
10141 feedback_fn("Removing old exports for instance %s" % iname)
10142 exportlist = self.rpc.call_export_list(nodelist)
10143 for node in exportlist:
10144 if exportlist[node].fail_msg:
10146 if iname in exportlist[node].payload:
10147 msg = self.rpc.call_export_remove(node, iname).fail_msg
10149 self.LogWarning("Could not remove older export for instance %s"
10150 " on node %s: %s", iname, node, msg)
10152 def Exec(self, feedback_fn):
10153 """Export an instance to an image in the cluster.
10156 assert self.op.mode in constants.EXPORT_MODES
10158 instance = self.instance
10159 src_node = instance.primary_node
10161 if self.op.shutdown:
10162 # shutdown the instance, but not the disks
10163 feedback_fn("Shutting down instance %s" % instance.name)
10164 result = self.rpc.call_instance_shutdown(src_node, instance,
10165 self.op.shutdown_timeout)
10166 # TODO: Maybe ignore failures if ignore_remove_failures is set
10167 result.Raise("Could not shutdown instance %s on"
10168 " node %s" % (instance.name, src_node))
10170 # set the disks ID correctly since call_instance_start needs the
10171 # correct drbd minor to create the symlinks
10172 for disk in instance.disks:
10173 self.cfg.SetDiskID(disk, src_node)
10175 activate_disks = (not instance.admin_up)
10178 # Activate the instance disks if we'exporting a stopped instance
10179 feedback_fn("Activating disks for %s" % instance.name)
10180 _StartInstanceDisks(self, instance, None)
10183 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10186 helper.CreateSnapshots()
10188 if (self.op.shutdown and instance.admin_up and
10189 not self.op.remove_instance):
10190 assert not activate_disks
10191 feedback_fn("Starting instance %s" % instance.name)
10192 result = self.rpc.call_instance_start(src_node, instance, None, None)
10193 msg = result.fail_msg
10195 feedback_fn("Failed to start instance: %s" % msg)
10196 _ShutdownInstanceDisks(self, instance)
10197 raise errors.OpExecError("Could not start instance: %s" % msg)
10199 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10200 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10201 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10202 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10203 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10205 (key_name, _, _) = self.x509_key_name
10208 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10211 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10212 key_name, dest_ca_pem,
10217 # Check for backwards compatibility
10218 assert len(dresults) == len(instance.disks)
10219 assert compat.all(isinstance(i, bool) for i in dresults), \
10220 "Not all results are boolean: %r" % dresults
10224 feedback_fn("Deactivating disks for %s" % instance.name)
10225 _ShutdownInstanceDisks(self, instance)
10227 if not (compat.all(dresults) and fin_resu):
10230 failures.append("export finalization")
10231 if not compat.all(dresults):
10232 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10234 failures.append("disk export: disk(s) %s" % fdsk)
10236 raise errors.OpExecError("Export failed, errors in %s" %
10237 utils.CommaJoin(failures))
10239 # At this point, the export was successful, we can cleanup/finish
10241 # Remove instance if requested
10242 if self.op.remove_instance:
10243 feedback_fn("Removing instance %s" % instance.name)
10244 _RemoveInstance(self, feedback_fn, instance,
10245 self.op.ignore_remove_failures)
10247 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10248 self._CleanupExports(feedback_fn)
10250 return fin_resu, dresults
10253 class LURemoveExport(NoHooksLU):
10254 """Remove exports related to the named instance.
10262 def ExpandNames(self):
10263 self.needed_locks = {}
10264 # We need all nodes to be locked in order for RemoveExport to work, but we
10265 # don't need to lock the instance itself, as nothing will happen to it (and
10266 # we can remove exports also for a removed instance)
10267 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10269 def Exec(self, feedback_fn):
10270 """Remove any export.
10273 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10274 # If the instance was not found we'll try with the name that was passed in.
10275 # This will only work if it was an FQDN, though.
10277 if not instance_name:
10279 instance_name = self.op.instance_name
10281 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10282 exportlist = self.rpc.call_export_list(locked_nodes)
10284 for node in exportlist:
10285 msg = exportlist[node].fail_msg
10287 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10289 if instance_name in exportlist[node].payload:
10291 result = self.rpc.call_export_remove(node, instance_name)
10292 msg = result.fail_msg
10294 logging.error("Could not remove export for instance %s"
10295 " on node %s: %s", instance_name, node, msg)
10297 if fqdn_warn and not found:
10298 feedback_fn("Export not found. If trying to remove an export belonging"
10299 " to a deleted instance please use its Fully Qualified"
10303 class LUAddGroup(LogicalUnit):
10304 """Logical unit for creating node groups.
10307 HPATH = "group-add"
10308 HTYPE = constants.HTYPE_GROUP
10316 def ExpandNames(self):
10317 # We need the new group's UUID here so that we can create and acquire the
10318 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10319 # that it should not check whether the UUID exists in the configuration.
10320 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10321 self.needed_locks = {}
10322 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10324 def CheckPrereq(self):
10325 """Check prerequisites.
10327 This checks that the given group name is not an existing node group
10332 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10333 except errors.OpPrereqError:
10336 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10337 " node group (UUID: %s)" %
10338 (self.op.group_name, existing_uuid),
10339 errors.ECODE_EXISTS)
10341 def BuildHooksEnv(self):
10342 """Build hooks env.
10346 "GROUP_NAME": self.op.group_name,
10348 mn = self.cfg.GetMasterNode()
10349 return env, [mn], [mn]
10351 def Exec(self, feedback_fn):
10352 """Add the node group to the cluster.
10355 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10356 uuid=self.group_uuid)
10358 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10359 del self.remove_locks[locking.LEVEL_NODEGROUP]
10362 class LUQueryGroups(NoHooksLU):
10363 """Logical unit for querying node groups.
10366 # pylint: disable-msg=W0142
10369 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10374 _FIELDS_DYNAMIC = utils.FieldSet()
10376 _SIMPLE_FIELDS = ["name", "uuid", "ctime", "mtime", "serial_no"]
10378 _FIELDS_STATIC = utils.FieldSet(
10379 "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10381 def CheckArguments(self):
10382 _CheckOutputFields(static=self._FIELDS_STATIC,
10383 dynamic=self._FIELDS_DYNAMIC,
10384 selected=self.op.output_fields)
10386 def ExpandNames(self):
10387 self.needed_locks = {}
10389 def Exec(self, feedback_fn):
10390 """Computes the list of groups and their attributes.
10393 all_groups = self.cfg.GetAllNodeGroupsInfo()
10395 if not self.op.names:
10396 my_groups = utils.NiceSort(all_groups.keys())
10398 # Accept names to be either names or UUIDs.
10399 all_uuid = frozenset(all_groups.keys())
10400 name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10404 for name in self.op.names:
10405 if name in all_uuid:
10406 my_groups.append(name)
10407 elif name in name_to_uuid:
10408 my_groups.append(name_to_uuid[name])
10410 missing.append(name)
10413 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10414 errors.ECODE_NOENT)
10416 do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10417 intersection(self.op.output_fields))
10419 do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10420 intersection(self.op.output_fields))
10422 # We need to map group->[nodes], and group->[instances]. The former is
10423 # directly attainable, but the latter we have to do through instance->node,
10424 # hence we need to process nodes even if we only need instance information.
10425 if do_nodes or do_instances:
10426 all_nodes = self.cfg.GetAllNodesInfo()
10427 group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10430 for node in all_nodes.values():
10431 if node.group in group_to_nodes:
10432 group_to_nodes[node.group].append(node.name)
10433 node_to_group[node.name] = node.group
10436 all_instances = self.cfg.GetAllInstancesInfo()
10437 group_to_instances = dict((all_groups[name].uuid, [])
10438 for name in my_groups)
10439 for instance in all_instances.values():
10440 node = instance.primary_node
10441 if node in node_to_group:
10442 group_to_instances[node_to_group[node]].append(instance.name)
10446 for name in my_groups:
10447 group = all_groups[name]
10450 for field in self.op.output_fields:
10451 if field in self._SIMPLE_FIELDS:
10452 val = getattr(group, field)
10453 elif field == "node_list":
10454 val = utils.NiceSort(group_to_nodes[group.uuid])
10455 elif field == "node_cnt":
10456 val = len(group_to_nodes[group.uuid])
10457 elif field == "pinst_list":
10458 val = utils.NiceSort(group_to_instances[group.uuid])
10459 elif field == "pinst_cnt":
10460 val = len(group_to_instances[group.uuid])
10462 raise errors.ParameterError(field)
10463 group_output.append(val)
10464 output.append(group_output)
10469 class LURemoveGroup(LogicalUnit):
10470 HPATH = "group-remove"
10471 HTYPE = constants.HTYPE_GROUP
10479 def ExpandNames(self):
10480 # This will raises errors.OpPrereqError on its own:
10481 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10482 self.needed_locks = {
10483 locking.LEVEL_NODEGROUP: [self.group_uuid],
10486 def CheckPrereq(self):
10487 """Check prerequisites.
10489 This checks that the given group name exists as a node group, that is
10490 empty (i.e., contains no nodes), and that is not the last group of the
10494 # Verify that the group is empty.
10495 group_nodes = [node.name
10496 for node in self.cfg.GetAllNodesInfo().values()
10497 if node.group == self.group_uuid]
10500 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10502 (self.op.group_name,
10503 utils.CommaJoin(utils.NiceSort(group_nodes))),
10504 errors.ECODE_STATE)
10506 # Verify the cluster would not be left group-less.
10507 if len(self.cfg.GetNodeGroupList()) == 1:
10508 raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10509 " which cannot be left without at least one"
10510 " group" % self.op.group_name,
10511 errors.ECODE_STATE)
10513 def BuildHooksEnv(self):
10514 """Build hooks env.
10518 "GROUP_NAME": self.op.group_name,
10520 mn = self.cfg.GetMasterNode()
10521 return env, [mn], [mn]
10523 def Exec(self, feedback_fn):
10524 """Remove the node group.
10528 self.cfg.RemoveNodeGroup(self.group_uuid)
10529 except errors.ConfigurationError:
10530 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10531 (self.op.group_name, self.group_uuid))
10533 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10536 class LURenameGroup(LogicalUnit):
10537 HPATH = "group-rename"
10538 HTYPE = constants.HTYPE_GROUP
10541 ("old_name", ht.NoDefault, ht.TNonEmptyString),
10542 ("new_name", ht.NoDefault, ht.TNonEmptyString),
10547 def ExpandNames(self):
10548 # This raises errors.OpPrereqError on its own:
10549 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10551 self.needed_locks = {
10552 locking.LEVEL_NODEGROUP: [self.group_uuid],
10555 def CheckPrereq(self):
10556 """Check prerequisites.
10558 This checks that the given old_name exists as a node group, and that
10563 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10564 except errors.OpPrereqError:
10567 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10568 " node group (UUID: %s)" %
10569 (self.op.new_name, new_name_uuid),
10570 errors.ECODE_EXISTS)
10572 def BuildHooksEnv(self):
10573 """Build hooks env.
10577 "OLD_NAME": self.op.old_name,
10578 "NEW_NAME": self.op.new_name,
10581 mn = self.cfg.GetMasterNode()
10582 all_nodes = self.cfg.GetAllNodesInfo()
10584 all_nodes.pop(mn, None)
10586 for node in all_nodes.values():
10587 if node.group == self.group_uuid:
10588 run_nodes.append(node.name)
10590 return env, run_nodes, run_nodes
10592 def Exec(self, feedback_fn):
10593 """Rename the node group.
10596 group = self.cfg.GetNodeGroup(self.group_uuid)
10599 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10600 (self.op.old_name, self.group_uuid))
10602 group.name = self.op.new_name
10603 self.cfg.Update(group, feedback_fn)
10605 return self.op.new_name
10608 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10609 """Generic tags LU.
10611 This is an abstract class which is the parent of all the other tags LUs.
10615 def ExpandNames(self):
10616 self.needed_locks = {}
10617 if self.op.kind == constants.TAG_NODE:
10618 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10619 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10620 elif self.op.kind == constants.TAG_INSTANCE:
10621 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10622 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10624 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10625 # not possible to acquire the BGL based on opcode parameters)
10627 def CheckPrereq(self):
10628 """Check prerequisites.
10631 if self.op.kind == constants.TAG_CLUSTER:
10632 self.target = self.cfg.GetClusterInfo()
10633 elif self.op.kind == constants.TAG_NODE:
10634 self.target = self.cfg.GetNodeInfo(self.op.name)
10635 elif self.op.kind == constants.TAG_INSTANCE:
10636 self.target = self.cfg.GetInstanceInfo(self.op.name)
10638 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10639 str(self.op.kind), errors.ECODE_INVAL)
10642 class LUGetTags(TagsLU):
10643 """Returns the tags of a given object.
10647 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10648 # Name is only meaningful for nodes and instances
10649 ("name", ht.NoDefault, ht.TMaybeString),
10653 def ExpandNames(self):
10654 TagsLU.ExpandNames(self)
10656 # Share locks as this is only a read operation
10657 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10659 def Exec(self, feedback_fn):
10660 """Returns the tag list.
10663 return list(self.target.GetTags())
10666 class LUSearchTags(NoHooksLU):
10667 """Searches the tags for a given pattern.
10671 ("pattern", ht.NoDefault, ht.TNonEmptyString),
10675 def ExpandNames(self):
10676 self.needed_locks = {}
10678 def CheckPrereq(self):
10679 """Check prerequisites.
10681 This checks the pattern passed for validity by compiling it.
10685 self.re = re.compile(self.op.pattern)
10686 except re.error, err:
10687 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10688 (self.op.pattern, err), errors.ECODE_INVAL)
10690 def Exec(self, feedback_fn):
10691 """Returns the tag list.
10695 tgts = [("/cluster", cfg.GetClusterInfo())]
10696 ilist = cfg.GetAllInstancesInfo().values()
10697 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10698 nlist = cfg.GetAllNodesInfo().values()
10699 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10701 for path, target in tgts:
10702 for tag in target.GetTags():
10703 if self.re.search(tag):
10704 results.append((path, tag))
10708 class LUAddTags(TagsLU):
10709 """Sets a tag on a given object.
10713 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10714 # Name is only meaningful for nodes and instances
10715 ("name", ht.NoDefault, ht.TMaybeString),
10716 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10720 def CheckPrereq(self):
10721 """Check prerequisites.
10723 This checks the type and length of the tag name and value.
10726 TagsLU.CheckPrereq(self)
10727 for tag in self.op.tags:
10728 objects.TaggableObject.ValidateTag(tag)
10730 def Exec(self, feedback_fn):
10735 for tag in self.op.tags:
10736 self.target.AddTag(tag)
10737 except errors.TagError, err:
10738 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10739 self.cfg.Update(self.target, feedback_fn)
10742 class LUDelTags(TagsLU):
10743 """Delete a list of tags from a given object.
10747 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10748 # Name is only meaningful for nodes and instances
10749 ("name", ht.NoDefault, ht.TMaybeString),
10750 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10754 def CheckPrereq(self):
10755 """Check prerequisites.
10757 This checks that we have the given tag.
10760 TagsLU.CheckPrereq(self)
10761 for tag in self.op.tags:
10762 objects.TaggableObject.ValidateTag(tag)
10763 del_tags = frozenset(self.op.tags)
10764 cur_tags = self.target.GetTags()
10766 diff_tags = del_tags - cur_tags
10768 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10769 raise errors.OpPrereqError("Tag(s) %s not found" %
10770 (utils.CommaJoin(diff_names), ),
10771 errors.ECODE_NOENT)
10773 def Exec(self, feedback_fn):
10774 """Remove the tag from the object.
10777 for tag in self.op.tags:
10778 self.target.RemoveTag(tag)
10779 self.cfg.Update(self.target, feedback_fn)
10782 class LUTestDelay(NoHooksLU):
10783 """Sleep for a specified amount of time.
10785 This LU sleeps on the master and/or nodes for a specified amount of
10790 ("duration", ht.NoDefault, ht.TFloat),
10791 ("on_master", True, ht.TBool),
10792 ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10793 ("repeat", 0, ht.TPositiveInt)
10797 def ExpandNames(self):
10798 """Expand names and set required locks.
10800 This expands the node list, if any.
10803 self.needed_locks = {}
10804 if self.op.on_nodes:
10805 # _GetWantedNodes can be used here, but is not always appropriate to use
10806 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10807 # more information.
10808 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10809 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10811 def _TestDelay(self):
10812 """Do the actual sleep.
10815 if self.op.on_master:
10816 if not utils.TestDelay(self.op.duration):
10817 raise errors.OpExecError("Error during master delay test")
10818 if self.op.on_nodes:
10819 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10820 for node, node_result in result.items():
10821 node_result.Raise("Failure during rpc call to node %s" % node)
10823 def Exec(self, feedback_fn):
10824 """Execute the test delay opcode, with the wanted repetitions.
10827 if self.op.repeat == 0:
10830 top_value = self.op.repeat - 1
10831 for i in range(self.op.repeat):
10832 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10836 class LUTestJobqueue(NoHooksLU):
10837 """Utility LU to test some aspects of the job queue.
10841 ("notify_waitlock", False, ht.TBool),
10842 ("notify_exec", False, ht.TBool),
10843 ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10844 ("fail", False, ht.TBool),
10848 # Must be lower than default timeout for WaitForJobChange to see whether it
10849 # notices changed jobs
10850 _CLIENT_CONNECT_TIMEOUT = 20.0
10851 _CLIENT_CONFIRM_TIMEOUT = 60.0
10854 def _NotifyUsingSocket(cls, cb, errcls):
10855 """Opens a Unix socket and waits for another program to connect.
10858 @param cb: Callback to send socket name to client
10859 @type errcls: class
10860 @param errcls: Exception class to use for errors
10863 # Using a temporary directory as there's no easy way to create temporary
10864 # sockets without writing a custom loop around tempfile.mktemp and
10866 tmpdir = tempfile.mkdtemp()
10868 tmpsock = utils.PathJoin(tmpdir, "sock")
10870 logging.debug("Creating temporary socket at %s", tmpsock)
10871 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10876 # Send details to client
10879 # Wait for client to connect before continuing
10880 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10882 (conn, _) = sock.accept()
10883 except socket.error, err:
10884 raise errcls("Client didn't connect in time (%s)" % err)
10888 # Remove as soon as client is connected
10889 shutil.rmtree(tmpdir)
10891 # Wait for client to close
10894 # pylint: disable-msg=E1101
10895 # Instance of '_socketobject' has no ... member
10896 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10898 except socket.error, err:
10899 raise errcls("Client failed to confirm notification (%s)" % err)
10903 def _SendNotification(self, test, arg, sockname):
10904 """Sends a notification to the client.
10907 @param test: Test name
10908 @param arg: Test argument (depends on test)
10909 @type sockname: string
10910 @param sockname: Socket path
10913 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10915 def _Notify(self, prereq, test, arg):
10916 """Notifies the client of a test.
10919 @param prereq: Whether this is a prereq-phase test
10921 @param test: Test name
10922 @param arg: Test argument (depends on test)
10926 errcls = errors.OpPrereqError
10928 errcls = errors.OpExecError
10930 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10934 def CheckArguments(self):
10935 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10936 self.expandnames_calls = 0
10938 def ExpandNames(self):
10939 checkargs_calls = getattr(self, "checkargs_calls", 0)
10940 if checkargs_calls < 1:
10941 raise errors.ProgrammerError("CheckArguments was not called")
10943 self.expandnames_calls += 1
10945 if self.op.notify_waitlock:
10946 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10948 self.LogInfo("Expanding names")
10950 # Get lock on master node (just to get a lock, not for a particular reason)
10951 self.needed_locks = {
10952 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10955 def Exec(self, feedback_fn):
10956 if self.expandnames_calls < 1:
10957 raise errors.ProgrammerError("ExpandNames was not called")
10959 if self.op.notify_exec:
10960 self._Notify(False, constants.JQT_EXEC, None)
10962 self.LogInfo("Executing")
10964 if self.op.log_messages:
10965 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10966 for idx, msg in enumerate(self.op.log_messages):
10967 self.LogInfo("Sending log message %s", idx + 1)
10968 feedback_fn(constants.JQT_MSGPREFIX + msg)
10969 # Report how many test messages have been sent
10970 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10973 raise errors.OpExecError("Opcode failure was requested")
10978 class IAllocator(object):
10979 """IAllocator framework.
10981 An IAllocator instance has three sets of attributes:
10982 - cfg that is needed to query the cluster
10983 - input data (all members of the _KEYS class attribute are required)
10984 - four buffer attributes (in|out_data|text), that represent the
10985 input (to the external script) in text and data structure format,
10986 and the output from it, again in two formats
10987 - the result variables from the script (success, info, nodes) for
10991 # pylint: disable-msg=R0902
10992 # lots of instance attributes
10994 "name", "mem_size", "disks", "disk_template",
10995 "os", "tags", "nics", "vcpus", "hypervisor",
10998 "name", "relocate_from",
11004 def __init__(self, cfg, rpc, mode, **kwargs):
11007 # init buffer variables
11008 self.in_text = self.out_text = self.in_data = self.out_data = None
11009 # init all input fields so that pylint is happy
11011 self.mem_size = self.disks = self.disk_template = None
11012 self.os = self.tags = self.nics = self.vcpus = None
11013 self.hypervisor = None
11014 self.relocate_from = None
11016 self.evac_nodes = None
11018 self.required_nodes = None
11019 # init result fields
11020 self.success = self.info = self.result = None
11021 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11022 keyset = self._ALLO_KEYS
11023 fn = self._AddNewInstance
11024 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11025 keyset = self._RELO_KEYS
11026 fn = self._AddRelocateInstance
11027 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11028 keyset = self._EVAC_KEYS
11029 fn = self._AddEvacuateNodes
11031 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11032 " IAllocator" % self.mode)
11034 if key not in keyset:
11035 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11036 " IAllocator" % key)
11037 setattr(self, key, kwargs[key])
11040 if key not in kwargs:
11041 raise errors.ProgrammerError("Missing input parameter '%s' to"
11042 " IAllocator" % key)
11043 self._BuildInputData(fn)
11045 def _ComputeClusterData(self):
11046 """Compute the generic allocator input data.
11048 This is the data that is independent of the actual operation.
11052 cluster_info = cfg.GetClusterInfo()
11055 "version": constants.IALLOCATOR_VERSION,
11056 "cluster_name": cfg.GetClusterName(),
11057 "cluster_tags": list(cluster_info.GetTags()),
11058 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11059 # we don't have job IDs
11061 iinfo = cfg.GetAllInstancesInfo().values()
11062 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11065 node_list = cfg.GetNodeList()
11067 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11068 hypervisor_name = self.hypervisor
11069 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11070 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11071 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11072 hypervisor_name = cluster_info.enabled_hypervisors[0]
11074 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11077 self.rpc.call_all_instances_info(node_list,
11078 cluster_info.enabled_hypervisors)
11080 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11082 data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
11084 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11086 self.in_data = data
11089 def _ComputeNodeGroupData(cfg):
11090 """Compute node groups data.
11094 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11095 ng[guuid] = { "name": gdata.name }
11099 def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
11100 """Compute global node data.
11104 for nname, nresult in node_data.items():
11105 # first fill in static (config-based) values
11106 ninfo = cfg.GetNodeInfo(nname)
11108 "tags": list(ninfo.GetTags()),
11109 "primary_ip": ninfo.primary_ip,
11110 "secondary_ip": ninfo.secondary_ip,
11111 "offline": ninfo.offline,
11112 "drained": ninfo.drained,
11113 "master_candidate": ninfo.master_candidate,
11114 "group": ninfo.group,
11115 "master_capable": ninfo.master_capable,
11116 "vm_capable": ninfo.vm_capable,
11119 if not (ninfo.offline or ninfo.drained):
11120 nresult.Raise("Can't get data for node %s" % nname)
11121 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11123 remote_info = nresult.payload
11125 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11126 'vg_size', 'vg_free', 'cpu_total']:
11127 if attr not in remote_info:
11128 raise errors.OpExecError("Node '%s' didn't return attribute"
11129 " '%s'" % (nname, attr))
11130 if not isinstance(remote_info[attr], int):
11131 raise errors.OpExecError("Node '%s' returned invalid value"
11133 (nname, attr, remote_info[attr]))
11134 # compute memory used by primary instances
11135 i_p_mem = i_p_up_mem = 0
11136 for iinfo, beinfo in i_list:
11137 if iinfo.primary_node == nname:
11138 i_p_mem += beinfo[constants.BE_MEMORY]
11139 if iinfo.name not in node_iinfo[nname].payload:
11142 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11143 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11144 remote_info['memory_free'] -= max(0, i_mem_diff)
11147 i_p_up_mem += beinfo[constants.BE_MEMORY]
11149 # compute memory used by instances
11151 "total_memory": remote_info['memory_total'],
11152 "reserved_memory": remote_info['memory_dom0'],
11153 "free_memory": remote_info['memory_free'],
11154 "total_disk": remote_info['vg_size'],
11155 "free_disk": remote_info['vg_free'],
11156 "total_cpus": remote_info['cpu_total'],
11157 "i_pri_memory": i_p_mem,
11158 "i_pri_up_memory": i_p_up_mem,
11160 pnr.update(pnr_dyn)
11162 node_results[nname] = pnr
11164 return node_results
11167 def _ComputeInstanceData(cluster_info, i_list):
11168 """Compute global instance data.
11172 for iinfo, beinfo in i_list:
11174 for nic in iinfo.nics:
11175 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11176 nic_dict = {"mac": nic.mac,
11178 "mode": filled_params[constants.NIC_MODE],
11179 "link": filled_params[constants.NIC_LINK],
11181 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11182 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11183 nic_data.append(nic_dict)
11185 "tags": list(iinfo.GetTags()),
11186 "admin_up": iinfo.admin_up,
11187 "vcpus": beinfo[constants.BE_VCPUS],
11188 "memory": beinfo[constants.BE_MEMORY],
11190 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11192 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11193 "disk_template": iinfo.disk_template,
11194 "hypervisor": iinfo.hypervisor,
11196 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11198 instance_data[iinfo.name] = pir
11200 return instance_data
11202 def _AddNewInstance(self):
11203 """Add new instance data to allocator structure.
11205 This in combination with _AllocatorGetClusterData will create the
11206 correct structure needed as input for the allocator.
11208 The checks for the completeness of the opcode must have already been
11212 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11214 if self.disk_template in constants.DTS_NET_MIRROR:
11215 self.required_nodes = 2
11217 self.required_nodes = 1
11220 "disk_template": self.disk_template,
11223 "vcpus": self.vcpus,
11224 "memory": self.mem_size,
11225 "disks": self.disks,
11226 "disk_space_total": disk_space,
11228 "required_nodes": self.required_nodes,
11232 def _AddRelocateInstance(self):
11233 """Add relocate instance data to allocator structure.
11235 This in combination with _IAllocatorGetClusterData will create the
11236 correct structure needed as input for the allocator.
11238 The checks for the completeness of the opcode must have already been
11242 instance = self.cfg.GetInstanceInfo(self.name)
11243 if instance is None:
11244 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11245 " IAllocator" % self.name)
11247 if instance.disk_template not in constants.DTS_NET_MIRROR:
11248 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11249 errors.ECODE_INVAL)
11251 if len(instance.secondary_nodes) != 1:
11252 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11253 errors.ECODE_STATE)
11255 self.required_nodes = 1
11256 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11257 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11261 "disk_space_total": disk_space,
11262 "required_nodes": self.required_nodes,
11263 "relocate_from": self.relocate_from,
11267 def _AddEvacuateNodes(self):
11268 """Add evacuate nodes data to allocator structure.
11272 "evac_nodes": self.evac_nodes
11276 def _BuildInputData(self, fn):
11277 """Build input data structures.
11280 self._ComputeClusterData()
11283 request["type"] = self.mode
11284 self.in_data["request"] = request
11286 self.in_text = serializer.Dump(self.in_data)
11288 def Run(self, name, validate=True, call_fn=None):
11289 """Run an instance allocator and return the results.
11292 if call_fn is None:
11293 call_fn = self.rpc.call_iallocator_runner
11295 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11296 result.Raise("Failure while running the iallocator script")
11298 self.out_text = result.payload
11300 self._ValidateResult()
11302 def _ValidateResult(self):
11303 """Process the allocator results.
11305 This will process and if successful save the result in
11306 self.out_data and the other parameters.
11310 rdict = serializer.Load(self.out_text)
11311 except Exception, err:
11312 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11314 if not isinstance(rdict, dict):
11315 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11317 # TODO: remove backwards compatiblity in later versions
11318 if "nodes" in rdict and "result" not in rdict:
11319 rdict["result"] = rdict["nodes"]
11322 for key in "success", "info", "result":
11323 if key not in rdict:
11324 raise errors.OpExecError("Can't parse iallocator results:"
11325 " missing key '%s'" % key)
11326 setattr(self, key, rdict[key])
11328 if not isinstance(rdict["result"], list):
11329 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11331 self.out_data = rdict
11334 class LUTestAllocator(NoHooksLU):
11335 """Run allocator tests.
11337 This LU runs the allocator tests
11341 ("direction", ht.NoDefault,
11342 ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11343 ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11344 ("name", ht.NoDefault, ht.TNonEmptyString),
11345 ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11346 ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11347 ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11348 ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11349 ("hypervisor", None, ht.TMaybeString),
11350 ("allocator", None, ht.TMaybeString),
11351 ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11352 ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11353 ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11354 ("os", None, ht.TMaybeString),
11355 ("disk_template", None, ht.TMaybeString),
11356 ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11359 def CheckPrereq(self):
11360 """Check prerequisites.
11362 This checks the opcode parameters depending on the director and mode test.
11365 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11366 for attr in ["mem_size", "disks", "disk_template",
11367 "os", "tags", "nics", "vcpus"]:
11368 if not hasattr(self.op, attr):
11369 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11370 attr, errors.ECODE_INVAL)
11371 iname = self.cfg.ExpandInstanceName(self.op.name)
11372 if iname is not None:
11373 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11374 iname, errors.ECODE_EXISTS)
11375 if not isinstance(self.op.nics, list):
11376 raise errors.OpPrereqError("Invalid parameter 'nics'",
11377 errors.ECODE_INVAL)
11378 if not isinstance(self.op.disks, list):
11379 raise errors.OpPrereqError("Invalid parameter 'disks'",
11380 errors.ECODE_INVAL)
11381 for row in self.op.disks:
11382 if (not isinstance(row, dict) or
11383 "size" not in row or
11384 not isinstance(row["size"], int) or
11385 "mode" not in row or
11386 row["mode"] not in ['r', 'w']):
11387 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11388 " parameter", errors.ECODE_INVAL)
11389 if self.op.hypervisor is None:
11390 self.op.hypervisor = self.cfg.GetHypervisorType()
11391 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11392 fname = _ExpandInstanceName(self.cfg, self.op.name)
11393 self.op.name = fname
11394 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11395 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11396 if not hasattr(self.op, "evac_nodes"):
11397 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11398 " opcode input", errors.ECODE_INVAL)
11400 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11401 self.op.mode, errors.ECODE_INVAL)
11403 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11404 if self.op.allocator is None:
11405 raise errors.OpPrereqError("Missing allocator name",
11406 errors.ECODE_INVAL)
11407 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11408 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11409 self.op.direction, errors.ECODE_INVAL)
11411 def Exec(self, feedback_fn):
11412 """Run the allocator test.
11415 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11416 ial = IAllocator(self.cfg, self.rpc,
11419 mem_size=self.op.mem_size,
11420 disks=self.op.disks,
11421 disk_template=self.op.disk_template,
11425 vcpus=self.op.vcpus,
11426 hypervisor=self.op.hypervisor,
11428 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11429 ial = IAllocator(self.cfg, self.rpc,
11432 relocate_from=list(self.relocate_from),
11434 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11435 ial = IAllocator(self.cfg, self.rpc,
11437 evac_nodes=self.op.evac_nodes)
11439 raise errors.ProgrammerError("Uncatched mode %s in"
11440 " LUTestAllocator.Exec", self.op.mode)
11442 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11443 result = ial.in_text
11445 ial.Run(self.op.allocator, validate=False)
11446 result = ial.out_text