4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Logical units dealing with nodes."""
27 from ganeti import constants
28 from ganeti import errors
29 from ganeti import locking
30 from ganeti import netutils
31 from ganeti import objects
32 from ganeti import opcodes
33 from ganeti import qlang
34 from ganeti import query
35 from ganeti import rpc
36 from ganeti import utils
37 from ganeti.masterd import iallocator
39 from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, _QueryBase, \
41 from ganeti.cmdlib.common import _CheckParamsNotGlobal, \
42 _MergeAndVerifyHvState, _MergeAndVerifyDiskState, \
43 _IsExclusiveStorageEnabledNode, _CheckNodePVs, \
44 _RedistributeAncillaryFiles, _ExpandNodeName, _ShareAll, _SupportsOob, \
45 _CheckInstanceState, INSTANCE_DOWN, _GetUpdatedParams, \
46 _AdjustCandidatePool, _CheckIAllocatorOrNode, _LoadNodeEvacResult, \
47 _GetWantedNodes, _MapInstanceDisksToNodes, _RunPostHook, \
48 _FindFaultyInstanceDisks
51 def _DecideSelfPromotion(lu, exceptions=None):
52 """Decide whether I should promote myself as a master candidate.
55 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
56 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
57 # the new node will increase mc_max with one, so:
58 mc_should = min(mc_should + 1, cp_size)
59 return mc_now < mc_should
62 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
63 """Ensure that a node has the given secondary ip.
65 @type lu: L{LogicalUnit}
66 @param lu: the LU on behalf of which we make the check
68 @param node: the node to check
69 @type secondary_ip: string
70 @param secondary_ip: the ip to check
72 @param prereq: whether to throw a prerequisite or an execute error
73 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
74 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
77 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
78 result.Raise("Failure checking secondary ip on node %s" % node,
79 prereq=prereq, ecode=errors.ECODE_ENVIRON)
80 if not result.payload:
81 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
82 " please fix and re-run this command" % secondary_ip)
84 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
86 raise errors.OpExecError(msg)
89 class LUNodeAdd(LogicalUnit):
90 """Logical unit for adding node to the cluster.
94 HTYPE = constants.HTYPE_NODE
95 _NFLAGS = ["master_capable", "vm_capable"]
97 def CheckArguments(self):
98 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
99 # validate/normalize the node name
100 self.hostname = netutils.GetHostname(name=self.op.node_name,
101 family=self.primary_ip_family)
102 self.op.node_name = self.hostname.name
104 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
105 raise errors.OpPrereqError("Cannot readd the master node",
108 if self.op.readd and self.op.group:
109 raise errors.OpPrereqError("Cannot pass a node group when a node is"
110 " being readded", errors.ECODE_INVAL)
112 def BuildHooksEnv(self):
115 This will run on all nodes before, and on all nodes + the new node after.
119 "OP_TARGET": self.op.node_name,
120 "NODE_NAME": self.op.node_name,
121 "NODE_PIP": self.op.primary_ip,
122 "NODE_SIP": self.op.secondary_ip,
123 "MASTER_CAPABLE": str(self.op.master_capable),
124 "VM_CAPABLE": str(self.op.vm_capable),
127 def BuildHooksNodes(self):
128 """Build hooks nodes.
132 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
133 post_nodes = pre_nodes + [self.op.node_name, ]
135 return (pre_nodes, post_nodes)
137 def CheckPrereq(self):
138 """Check prerequisites.
141 - the new node is not already in the config
143 - its parameters (single/dual homed) matches the cluster
145 Any errors are signaled by raising errors.OpPrereqError.
149 hostname = self.hostname
151 primary_ip = self.op.primary_ip = hostname.ip
152 if self.op.secondary_ip is None:
153 if self.primary_ip_family == netutils.IP6Address.family:
154 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
155 " IPv4 address must be given as secondary",
157 self.op.secondary_ip = primary_ip
159 secondary_ip = self.op.secondary_ip
160 if not netutils.IP4Address.IsValid(secondary_ip):
161 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
162 " address" % secondary_ip, errors.ECODE_INVAL)
164 node_list = cfg.GetNodeList()
165 if not self.op.readd and node in node_list:
166 raise errors.OpPrereqError("Node %s is already in the configuration" %
167 node, errors.ECODE_EXISTS)
168 elif self.op.readd and node not in node_list:
169 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
172 self.changed_primary_ip = False
174 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
175 if self.op.readd and node == existing_node_name:
176 if existing_node.secondary_ip != secondary_ip:
177 raise errors.OpPrereqError("Readded node doesn't have the same IP"
178 " address configuration as before",
180 if existing_node.primary_ip != primary_ip:
181 self.changed_primary_ip = True
185 if (existing_node.primary_ip == primary_ip or
186 existing_node.secondary_ip == primary_ip or
187 existing_node.primary_ip == secondary_ip or
188 existing_node.secondary_ip == secondary_ip):
189 raise errors.OpPrereqError("New node ip address(es) conflict with"
190 " existing node %s" % existing_node.name,
191 errors.ECODE_NOTUNIQUE)
193 # After this 'if' block, None is no longer a valid value for the
194 # _capable op attributes
196 old_node = self.cfg.GetNodeInfo(node)
197 assert old_node is not None, "Can't retrieve locked node %s" % node
198 for attr in self._NFLAGS:
199 if getattr(self.op, attr) is None:
200 setattr(self.op, attr, getattr(old_node, attr))
202 for attr in self._NFLAGS:
203 if getattr(self.op, attr) is None:
204 setattr(self.op, attr, True)
206 if self.op.readd and not self.op.vm_capable:
207 pri, sec = cfg.GetNodeInstances(node)
209 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
210 " flag set to false, but it already holds"
214 # check that the type of the node (single versus dual homed) is the
215 # same as for the master
216 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
217 master_singlehomed = myself.secondary_ip == myself.primary_ip
218 newbie_singlehomed = secondary_ip == primary_ip
219 if master_singlehomed != newbie_singlehomed:
220 if master_singlehomed:
221 raise errors.OpPrereqError("The master has no secondary ip but the"
225 raise errors.OpPrereqError("The master has a secondary ip but the"
226 " new node doesn't have one",
229 # checks reachability
230 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
231 raise errors.OpPrereqError("Node not reachable by ping",
232 errors.ECODE_ENVIRON)
234 if not newbie_singlehomed:
235 # check reachability from my secondary ip to newbie's secondary ip
236 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
237 source=myself.secondary_ip):
238 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
239 " based ping to node daemon port",
240 errors.ECODE_ENVIRON)
247 if self.op.master_capable:
248 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
250 self.master_candidate = False
253 self.new_node = old_node
255 node_group = cfg.LookupNodeGroup(self.op.group)
256 self.new_node = objects.Node(name=node,
257 primary_ip=primary_ip,
258 secondary_ip=secondary_ip,
259 master_candidate=self.master_candidate,
260 offline=False, drained=False,
261 group=node_group, ndparams={})
264 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
265 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
266 "node", "cluster or group")
269 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
271 if self.op.disk_state:
272 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
274 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
275 # it a property on the base class.
276 rpcrunner = rpc.DnsOnlyRunner()
277 result = rpcrunner.call_version([node])[node]
278 result.Raise("Can't get version information from node %s" % node)
279 if constants.PROTOCOL_VERSION == result.payload:
280 logging.info("Communication to node %s fine, sw version %s match",
281 node, result.payload)
283 raise errors.OpPrereqError("Version mismatch master version %s,"
285 (constants.PROTOCOL_VERSION, result.payload),
286 errors.ECODE_ENVIRON)
288 vg_name = cfg.GetVGName()
289 if vg_name is not None:
290 vparams = {constants.NV_PVLIST: [vg_name]}
291 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
292 cname = self.cfg.GetClusterName()
293 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
294 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
296 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
297 "; ".join(errmsgs), errors.ECODE_ENVIRON)
299 def Exec(self, feedback_fn):
300 """Adds the new node to the cluster.
303 new_node = self.new_node
306 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
309 # We adding a new node so we assume it's powered
310 new_node.powered = True
312 # for re-adds, reset the offline/drained/master-candidate flags;
313 # we need to reset here, otherwise offline would prevent RPC calls
314 # later in the procedure; this also means that if the re-add
315 # fails, we are left with a non-offlined, broken node
317 new_node.drained = new_node.offline = False # pylint: disable=W0201
318 self.LogInfo("Readding a node, the offline/drained flags were reset")
319 # if we demote the node, we do cleanup later in the procedure
320 new_node.master_candidate = self.master_candidate
321 if self.changed_primary_ip:
322 new_node.primary_ip = self.op.primary_ip
324 # copy the master/vm_capable flags
325 for attr in self._NFLAGS:
326 setattr(new_node, attr, getattr(self.op, attr))
328 # notify the user about any possible mc promotion
329 if new_node.master_candidate:
330 self.LogInfo("Node will be a master candidate")
333 new_node.ndparams = self.op.ndparams
335 new_node.ndparams = {}
338 new_node.hv_state_static = self.new_hv_state
340 if self.op.disk_state:
341 new_node.disk_state_static = self.new_disk_state
343 # Add node to our /etc/hosts, and add key to known_hosts
344 if self.cfg.GetClusterInfo().modify_etc_hosts:
345 master_node = self.cfg.GetMasterNode()
346 result = self.rpc.call_etc_hosts_modify(master_node,
347 constants.ETC_HOSTS_ADD,
350 result.Raise("Can't update hosts file with new host data")
352 if new_node.secondary_ip != new_node.primary_ip:
353 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
356 node_verify_list = [self.cfg.GetMasterNode()]
357 node_verify_param = {
358 constants.NV_NODELIST: ([node], {}),
359 # TODO: do a node-net-test as well?
362 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
363 self.cfg.GetClusterName())
364 for verifier in node_verify_list:
365 result[verifier].Raise("Cannot communicate with node %s" % verifier)
366 nl_payload = result[verifier].payload[constants.NV_NODELIST]
368 for failed in nl_payload:
369 feedback_fn("ssh/hostname verification failed"
370 " (checking from %s): %s" %
371 (verifier, nl_payload[failed]))
372 raise errors.OpExecError("ssh/hostname verification failed")
375 _RedistributeAncillaryFiles(self)
376 self.context.ReaddNode(new_node)
377 # make sure we redistribute the config
378 self.cfg.Update(new_node, feedback_fn)
379 # and make sure the new node will not have old files around
380 if not new_node.master_candidate:
381 result = self.rpc.call_node_demote_from_mc(new_node.name)
382 msg = result.fail_msg
384 self.LogWarning("Node failed to demote itself from master"
385 " candidate status: %s" % msg)
387 _RedistributeAncillaryFiles(self, additional_nodes=[node],
388 additional_vm=self.op.vm_capable)
389 self.context.AddNode(new_node, self.proc.GetECId())
392 class LUNodeSetParams(LogicalUnit):
393 """Modifies the parameters of a node.
395 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
396 to the node role (as _ROLE_*)
397 @cvar _R2F: a dictionary from node role to tuples of flags
398 @cvar _FLAGS: a list of attribute names corresponding to the flags
401 HPATH = "node-modify"
402 HTYPE = constants.HTYPE_NODE
404 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
406 (True, False, False): _ROLE_CANDIDATE,
407 (False, True, False): _ROLE_DRAINED,
408 (False, False, True): _ROLE_OFFLINE,
409 (False, False, False): _ROLE_REGULAR,
411 _R2F = dict((v, k) for k, v in _F2R.items())
412 _FLAGS = ["master_candidate", "drained", "offline"]
414 def CheckArguments(self):
415 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
416 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
417 self.op.master_capable, self.op.vm_capable,
418 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
420 if all_mods.count(None) == len(all_mods):
421 raise errors.OpPrereqError("Please pass at least one modification",
423 if all_mods.count(True) > 1:
424 raise errors.OpPrereqError("Can't set the node into more than one"
425 " state at the same time",
428 # Boolean value that tells us whether we might be demoting from MC
429 self.might_demote = (self.op.master_candidate is False or
430 self.op.offline is True or
431 self.op.drained is True or
432 self.op.master_capable is False)
434 if self.op.secondary_ip:
435 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
436 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
437 " address" % self.op.secondary_ip,
440 self.lock_all = self.op.auto_promote and self.might_demote
441 self.lock_instances = self.op.secondary_ip is not None
443 def _InstanceFilter(self, instance):
444 """Filter for getting affected instances.
447 return (instance.disk_template in constants.DTS_INT_MIRROR and
448 self.op.node_name in instance.all_nodes)
450 def ExpandNames(self):
452 self.needed_locks = {
453 locking.LEVEL_NODE: locking.ALL_SET,
455 # Block allocations when all nodes are locked
456 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
459 self.needed_locks = {
460 locking.LEVEL_NODE: self.op.node_name,
463 # Since modifying a node can have severe effects on currently running
464 # operations the resource lock is at least acquired in shared mode
465 self.needed_locks[locking.LEVEL_NODE_RES] = \
466 self.needed_locks[locking.LEVEL_NODE]
468 # Get all locks except nodes in shared mode; they are not used for anything
469 # but read-only access
470 self.share_locks = _ShareAll()
471 self.share_locks[locking.LEVEL_NODE] = 0
472 self.share_locks[locking.LEVEL_NODE_RES] = 0
473 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
475 if self.lock_instances:
476 self.needed_locks[locking.LEVEL_INSTANCE] = \
477 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
479 def BuildHooksEnv(self):
482 This runs on the master node.
486 "OP_TARGET": self.op.node_name,
487 "MASTER_CANDIDATE": str(self.op.master_candidate),
488 "OFFLINE": str(self.op.offline),
489 "DRAINED": str(self.op.drained),
490 "MASTER_CAPABLE": str(self.op.master_capable),
491 "VM_CAPABLE": str(self.op.vm_capable),
494 def BuildHooksNodes(self):
495 """Build hooks nodes.
498 nl = [self.cfg.GetMasterNode(), self.op.node_name]
501 def CheckPrereq(self):
502 """Check prerequisites.
504 This only checks the instance list against the existing names.
507 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
509 if self.lock_instances:
510 affected_instances = \
511 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
513 # Verify instance locks
514 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
515 wanted_instances = frozenset(affected_instances.keys())
516 if wanted_instances - owned_instances:
517 raise errors.OpPrereqError("Instances affected by changing node %s's"
518 " secondary IP address have changed since"
519 " locks were acquired, wanted '%s', have"
520 " '%s'; retry the operation" %
522 utils.CommaJoin(wanted_instances),
523 utils.CommaJoin(owned_instances)),
526 affected_instances = None
528 if (self.op.master_candidate is not None or
529 self.op.drained is not None or
530 self.op.offline is not None):
531 # we can't change the master's node flags
532 if self.op.node_name == self.cfg.GetMasterNode():
533 raise errors.OpPrereqError("The master role can be changed"
534 " only via master-failover",
537 if self.op.master_candidate and not node.master_capable:
538 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
539 " it a master candidate" % node.name,
542 if self.op.vm_capable is False:
543 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
545 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
546 " the vm_capable flag" % node.name,
549 if node.master_candidate and self.might_demote and not self.lock_all:
550 assert not self.op.auto_promote, "auto_promote set but lock_all not"
551 # check if after removing the current node, we're missing master
553 (mc_remaining, mc_should, _) = \
554 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
555 if mc_remaining < mc_should:
556 raise errors.OpPrereqError("Not enough master candidates, please"
557 " pass auto promote option to allow"
558 " promotion (--auto-promote or RAPI"
559 " auto_promote=True)", errors.ECODE_STATE)
561 self.old_flags = old_flags = (node.master_candidate,
562 node.drained, node.offline)
563 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
564 self.old_role = old_role = self._F2R[old_flags]
566 # Check for ineffective changes
567 for attr in self._FLAGS:
568 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
569 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
570 setattr(self.op, attr, None)
572 # Past this point, any flag change to False means a transition
573 # away from the respective state, as only real changes are kept
575 # TODO: We might query the real power state if it supports OOB
576 if _SupportsOob(self.cfg, node):
577 if self.op.offline is False and not (node.powered or
578 self.op.powered is True):
579 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
580 " offline status can be reset") %
581 self.op.node_name, errors.ECODE_STATE)
582 elif self.op.powered is not None:
583 raise errors.OpPrereqError(("Unable to change powered state for node %s"
584 " as it does not support out-of-band"
585 " handling") % self.op.node_name,
588 # If we're being deofflined/drained, we'll MC ourself if needed
589 if (self.op.drained is False or self.op.offline is False or
590 (self.op.master_capable and not node.master_capable)):
591 if _DecideSelfPromotion(self):
592 self.op.master_candidate = True
593 self.LogInfo("Auto-promoting node to master candidate")
595 # If we're no longer master capable, we'll demote ourselves from MC
596 if self.op.master_capable is False and node.master_candidate:
597 self.LogInfo("Demoting from master candidate")
598 self.op.master_candidate = False
601 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
602 if self.op.master_candidate:
603 new_role = self._ROLE_CANDIDATE
604 elif self.op.drained:
605 new_role = self._ROLE_DRAINED
606 elif self.op.offline:
607 new_role = self._ROLE_OFFLINE
608 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
609 # False is still in new flags, which means we're un-setting (the
611 new_role = self._ROLE_REGULAR
612 else: # no new flags, nothing, keep old role
615 self.new_role = new_role
617 if old_role == self._ROLE_OFFLINE and new_role != old_role:
618 # Trying to transition out of offline status
619 result = self.rpc.call_version([node.name])[node.name]
621 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
622 " to report its version: %s" %
623 (node.name, result.fail_msg),
626 self.LogWarning("Transitioning node from offline to online state"
627 " without using re-add. Please make sure the node"
630 # When changing the secondary ip, verify if this is a single-homed to
631 # multi-homed transition or vice versa, and apply the relevant
633 if self.op.secondary_ip:
634 # Ok even without locking, because this can't be changed by any LU
635 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
636 master_singlehomed = master.secondary_ip == master.primary_ip
637 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
638 if self.op.force and node.name == master.name:
639 self.LogWarning("Transitioning from single-homed to multi-homed"
640 " cluster; all nodes will require a secondary IP"
643 raise errors.OpPrereqError("Changing the secondary ip on a"
644 " single-homed cluster requires the"
645 " --force option to be passed, and the"
646 " target node to be the master",
648 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
649 if self.op.force and node.name == master.name:
650 self.LogWarning("Transitioning from multi-homed to single-homed"
651 " cluster; secondary IP addresses will have to be"
654 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
655 " same as the primary IP on a multi-homed"
656 " cluster, unless the --force option is"
657 " passed, and the target node is the"
658 " master", errors.ECODE_INVAL)
660 assert not (frozenset(affected_instances) -
661 self.owned_locks(locking.LEVEL_INSTANCE))
664 if affected_instances:
665 msg = ("Cannot change secondary IP address: offline node has"
666 " instances (%s) configured to use it" %
667 utils.CommaJoin(affected_instances.keys()))
668 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
670 # On online nodes, check that no instances are running, and that
671 # the node has the new ip and we can reach it.
672 for instance in affected_instances.values():
673 _CheckInstanceState(self, instance, INSTANCE_DOWN,
674 msg="cannot change secondary ip")
676 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
677 if master.name != node.name:
678 # check reachability from master secondary ip to new secondary ip
679 if not netutils.TcpPing(self.op.secondary_ip,
680 constants.DEFAULT_NODED_PORT,
681 source=master.secondary_ip):
682 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
683 " based ping to node daemon port",
684 errors.ECODE_ENVIRON)
687 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
688 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
689 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
690 "node", "cluster or group")
691 self.new_ndparams = new_ndparams
694 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
695 self.node.hv_state_static)
697 if self.op.disk_state:
698 self.new_disk_state = \
699 _MergeAndVerifyDiskState(self.op.disk_state,
700 self.node.disk_state_static)
702 def Exec(self, feedback_fn):
707 old_role = self.old_role
708 new_role = self.new_role
713 node.ndparams = self.new_ndparams
715 if self.op.powered is not None:
716 node.powered = self.op.powered
719 node.hv_state_static = self.new_hv_state
721 if self.op.disk_state:
722 node.disk_state_static = self.new_disk_state
724 for attr in ["master_capable", "vm_capable"]:
725 val = getattr(self.op, attr)
727 setattr(node, attr, val)
728 result.append((attr, str(val)))
730 if new_role != old_role:
731 # Tell the node to demote itself, if no longer MC and not offline
732 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
733 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
735 self.LogWarning("Node failed to demote itself: %s", msg)
737 new_flags = self._R2F[new_role]
738 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
740 result.append((desc, str(nf)))
741 (node.master_candidate, node.drained, node.offline) = new_flags
743 # we locked all nodes, we adjust the CP before updating this node
745 _AdjustCandidatePool(self, [node.name])
747 if self.op.secondary_ip:
748 node.secondary_ip = self.op.secondary_ip
749 result.append(("secondary_ip", self.op.secondary_ip))
751 # this will trigger configuration file update, if needed
752 self.cfg.Update(node, feedback_fn)
754 # this will trigger job queue propagation or cleanup if the mc
756 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
757 self.context.ReaddNode(node)
762 class LUNodePowercycle(NoHooksLU):
763 """Powercycles a node.
768 def CheckArguments(self):
769 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
770 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
771 raise errors.OpPrereqError("The node is the master and the force"
772 " parameter was not set",
775 def ExpandNames(self):
776 """Locking for PowercycleNode.
778 This is a last-resort option and shouldn't block on other
779 jobs. Therefore, we grab no locks.
782 self.needed_locks = {}
784 def Exec(self, feedback_fn):
788 result = self.rpc.call_node_powercycle(self.op.node_name,
789 self.cfg.GetHypervisorType())
790 result.Raise("Failed to schedule the reboot")
791 return result.payload
794 def _GetNodeInstancesInner(cfg, fn):
795 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
798 def _GetNodePrimaryInstances(cfg, node_name):
799 """Returns primary instances on a node.
802 return _GetNodeInstancesInner(cfg,
803 lambda inst: node_name == inst.primary_node)
806 def _GetNodeSecondaryInstances(cfg, node_name):
807 """Returns secondary instances on a node.
810 return _GetNodeInstancesInner(cfg,
811 lambda inst: node_name in inst.secondary_nodes)
814 def _GetNodeInstances(cfg, node_name):
815 """Returns a list of all primary and secondary instances on a node.
819 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
822 class LUNodeEvacuate(NoHooksLU):
823 """Evacuates instances off a list of nodes.
829 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
830 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
831 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
833 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
834 assert (frozenset(_MODE2IALLOCATOR.values()) ==
835 constants.IALLOCATOR_NEVAC_MODES)
837 def CheckArguments(self):
838 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
840 def ExpandNames(self):
841 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
843 if self.op.remote_node is not None:
844 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
845 assert self.op.remote_node
847 if self.op.remote_node == self.op.node_name:
848 raise errors.OpPrereqError("Can not use evacuated node as a new"
849 " secondary node", errors.ECODE_INVAL)
851 if self.op.mode != constants.NODE_EVAC_SEC:
852 raise errors.OpPrereqError("Without the use of an iallocator only"
853 " secondary instances can be evacuated",
857 self.share_locks = _ShareAll()
858 self.needed_locks = {
859 locking.LEVEL_INSTANCE: [],
860 locking.LEVEL_NODEGROUP: [],
861 locking.LEVEL_NODE: [],
864 # Determine nodes (via group) optimistically, needs verification once locks
866 self.lock_nodes = self._DetermineNodes()
868 def _DetermineNodes(self):
869 """Gets the list of nodes to operate on.
872 if self.op.remote_node is None:
873 # Iallocator will choose any node(s) in the same group
874 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
876 group_nodes = frozenset([self.op.remote_node])
878 # Determine nodes to be locked
879 return set([self.op.node_name]) | group_nodes
881 def _DetermineInstances(self):
882 """Builds list of instances to operate on.
885 assert self.op.mode in constants.NODE_EVAC_MODES
887 if self.op.mode == constants.NODE_EVAC_PRI:
888 # Primary instances only
889 inst_fn = _GetNodePrimaryInstances
890 assert self.op.remote_node is None, \
891 "Evacuating primary instances requires iallocator"
892 elif self.op.mode == constants.NODE_EVAC_SEC:
893 # Secondary instances only
894 inst_fn = _GetNodeSecondaryInstances
897 assert self.op.mode == constants.NODE_EVAC_ALL
898 inst_fn = _GetNodeInstances
899 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
901 raise errors.OpPrereqError("Due to an issue with the iallocator"
902 " interface it is not possible to evacuate"
903 " all instances at once; specify explicitly"
904 " whether to evacuate primary or secondary"
908 return inst_fn(self.cfg, self.op.node_name)
910 def DeclareLocks(self, level):
911 if level == locking.LEVEL_INSTANCE:
912 # Lock instances optimistically, needs verification once node and group
913 # locks have been acquired
914 self.needed_locks[locking.LEVEL_INSTANCE] = \
915 set(i.name for i in self._DetermineInstances())
917 elif level == locking.LEVEL_NODEGROUP:
918 # Lock node groups for all potential target nodes optimistically, needs
919 # verification once nodes have been acquired
920 self.needed_locks[locking.LEVEL_NODEGROUP] = \
921 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
923 elif level == locking.LEVEL_NODE:
924 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
926 def CheckPrereq(self):
928 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
929 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
930 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
932 need_nodes = self._DetermineNodes()
934 if not owned_nodes.issuperset(need_nodes):
935 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
936 " locks were acquired, current nodes are"
937 " are '%s', used to be '%s'; retry the"
940 utils.CommaJoin(need_nodes),
941 utils.CommaJoin(owned_nodes)),
944 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
945 if owned_groups != wanted_groups:
946 raise errors.OpExecError("Node groups changed since locks were acquired,"
947 " current groups are '%s', used to be '%s';"
948 " retry the operation" %
949 (utils.CommaJoin(wanted_groups),
950 utils.CommaJoin(owned_groups)))
952 # Determine affected instances
953 self.instances = self._DetermineInstances()
954 self.instance_names = [i.name for i in self.instances]
956 if set(self.instance_names) != owned_instances:
957 raise errors.OpExecError("Instances on node '%s' changed since locks"
958 " were acquired, current instances are '%s',"
959 " used to be '%s'; retry the operation" %
961 utils.CommaJoin(self.instance_names),
962 utils.CommaJoin(owned_instances)))
964 if self.instance_names:
965 self.LogInfo("Evacuating instances from node '%s': %s",
967 utils.CommaJoin(utils.NiceSort(self.instance_names)))
969 self.LogInfo("No instances to evacuate from node '%s'",
972 if self.op.remote_node is not None:
973 for i in self.instances:
974 if i.primary_node == self.op.remote_node:
975 raise errors.OpPrereqError("Node %s is the primary node of"
976 " instance %s, cannot use it as"
978 (self.op.remote_node, i.name),
981 def Exec(self, feedback_fn):
982 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
984 if not self.instance_names:
985 # No instances to evacuate
988 elif self.op.iallocator is not None:
989 # TODO: Implement relocation to other group
990 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
991 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
992 instances=list(self.instance_names))
993 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
995 ial.Run(self.op.iallocator)
998 raise errors.OpPrereqError("Can't compute node evacuation using"
999 " iallocator '%s': %s" %
1000 (self.op.iallocator, ial.info),
1003 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
1005 elif self.op.remote_node is not None:
1006 assert self.op.mode == constants.NODE_EVAC_SEC
1008 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
1009 remote_node=self.op.remote_node,
1011 mode=constants.REPLACE_DISK_CHG,
1012 early_release=self.op.early_release)]
1013 for instance_name in self.instance_names]
1016 raise errors.ProgrammerError("No iallocator or remote node")
1018 return ResultWithJobs(jobs)
1021 class LUNodeMigrate(LogicalUnit):
1022 """Migrate all instances from a node.
1025 HPATH = "node-migrate"
1026 HTYPE = constants.HTYPE_NODE
1029 def CheckArguments(self):
1032 def ExpandNames(self):
1033 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1035 self.share_locks = _ShareAll()
1036 self.needed_locks = {
1037 locking.LEVEL_NODE: [self.op.node_name],
1040 def BuildHooksEnv(self):
1043 This runs on the master, the primary and all the secondaries.
1047 "NODE_NAME": self.op.node_name,
1048 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
1051 def BuildHooksNodes(self):
1052 """Build hooks nodes.
1055 nl = [self.cfg.GetMasterNode()]
1058 def CheckPrereq(self):
1061 def Exec(self, feedback_fn):
1062 # Prepare jobs for migration instances
1063 allow_runtime_changes = self.op.allow_runtime_changes
1065 [opcodes.OpInstanceMigrate(instance_name=inst.name,
1068 iallocator=self.op.iallocator,
1069 target_node=self.op.target_node,
1070 allow_runtime_changes=allow_runtime_changes,
1071 ignore_ipolicy=self.op.ignore_ipolicy)]
1072 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
1074 # TODO: Run iallocator in this opcode and pass correct placement options to
1075 # OpInstanceMigrate. Since other jobs can modify the cluster between
1076 # running the iallocator and the actual migration, a good consistency model
1077 # will have to be found.
1079 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
1080 frozenset([self.op.node_name]))
1082 return ResultWithJobs(jobs)
1085 def _GetStorageTypeArgs(cfg, storage_type):
1086 """Returns the arguments for a storage type.
1089 # Special case for file storage
1090 if storage_type == constants.ST_FILE:
1091 # storage.FileStorage wants a list of storage directories
1092 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1097 class LUNodeModifyStorage(NoHooksLU):
1098 """Logical unit for modifying a storage volume on a node.
1103 def CheckArguments(self):
1104 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1106 storage_type = self.op.storage_type
1109 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
1111 raise errors.OpPrereqError("Storage units of type '%s' can not be"
1112 " modified" % storage_type,
1115 diff = set(self.op.changes.keys()) - modifiable
1117 raise errors.OpPrereqError("The following fields can not be modified for"
1118 " storage units of type '%s': %r" %
1119 (storage_type, list(diff)),
1122 def ExpandNames(self):
1123 self.needed_locks = {
1124 locking.LEVEL_NODE: self.op.node_name,
1127 def Exec(self, feedback_fn):
1128 """Computes the list of nodes and their attributes.
1131 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
1132 result = self.rpc.call_storage_modify(self.op.node_name,
1133 self.op.storage_type, st_args,
1134 self.op.name, self.op.changes)
1135 result.Raise("Failed to modify storage unit '%s' on %s" %
1136 (self.op.name, self.op.node_name))
1139 class _NodeQuery(_QueryBase):
1140 FIELDS = query.NODE_FIELDS
1142 def ExpandNames(self, lu):
1143 lu.needed_locks = {}
1144 lu.share_locks = _ShareAll()
1147 self.wanted = _GetWantedNodes(lu, self.names)
1149 self.wanted = locking.ALL_SET
1151 self.do_locking = (self.use_locking and
1152 query.NQ_LIVE in self.requested_data)
1155 # If any non-static field is requested we need to lock the nodes
1156 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
1157 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1159 def DeclareLocks(self, lu, level):
1162 def _GetQueryData(self, lu):
1163 """Computes the list of nodes and their attributes.
1166 all_info = lu.cfg.GetAllNodesInfo()
1168 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
1170 # Gather data as requested
1171 if query.NQ_LIVE in self.requested_data:
1172 # filter out non-vm_capable nodes
1173 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
1175 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
1176 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
1177 [lu.cfg.GetHypervisorType()], es_flags)
1178 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
1179 for (name, nresult) in node_data.items()
1180 if not nresult.fail_msg and nresult.payload)
1184 if query.NQ_INST in self.requested_data:
1185 node_to_primary = dict([(name, set()) for name in nodenames])
1186 node_to_secondary = dict([(name, set()) for name in nodenames])
1188 inst_data = lu.cfg.GetAllInstancesInfo()
1190 for inst in inst_data.values():
1191 if inst.primary_node in node_to_primary:
1192 node_to_primary[inst.primary_node].add(inst.name)
1193 for secnode in inst.secondary_nodes:
1194 if secnode in node_to_secondary:
1195 node_to_secondary[secnode].add(inst.name)
1197 node_to_primary = None
1198 node_to_secondary = None
1200 if query.NQ_OOB in self.requested_data:
1201 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
1202 for name, node in all_info.iteritems())
1206 if query.NQ_GROUP in self.requested_data:
1207 groups = lu.cfg.GetAllNodeGroupsInfo()
1211 return query.NodeQueryData([all_info[name] for name in nodenames],
1212 live_data, lu.cfg.GetMasterNode(),
1213 node_to_primary, node_to_secondary, groups,
1214 oob_support, lu.cfg.GetClusterInfo())
1217 class LUNodeQuery(NoHooksLU):
1218 """Logical unit for querying nodes.
1221 # pylint: disable=W0142
1224 def CheckArguments(self):
1225 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
1226 self.op.output_fields, self.op.use_locking)
1228 def ExpandNames(self):
1229 self.nq.ExpandNames(self)
1231 def DeclareLocks(self, level):
1232 self.nq.DeclareLocks(self, level)
1234 def Exec(self, feedback_fn):
1235 return self.nq.OldStyleQuery(self)
1238 def _CheckOutputFields(static, dynamic, selected):
1239 """Checks whether all selected fields are valid.
1241 @type static: L{utils.FieldSet}
1242 @param static: static fields set
1243 @type dynamic: L{utils.FieldSet}
1244 @param dynamic: dynamic fields set
1247 f = utils.FieldSet()
1251 delta = f.NonMatching(selected)
1253 raise errors.OpPrereqError("Unknown output fields selected: %s"
1254 % ",".join(delta), errors.ECODE_INVAL)
1257 class LUNodeQueryvols(NoHooksLU):
1258 """Logical unit for getting volumes on node(s).
1262 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1263 _FIELDS_STATIC = utils.FieldSet("node")
1265 def CheckArguments(self):
1266 _CheckOutputFields(static=self._FIELDS_STATIC,
1267 dynamic=self._FIELDS_DYNAMIC,
1268 selected=self.op.output_fields)
1270 def ExpandNames(self):
1271 self.share_locks = _ShareAll()
1274 self.needed_locks = {
1275 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1278 self.needed_locks = {
1279 locking.LEVEL_NODE: locking.ALL_SET,
1280 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1283 def Exec(self, feedback_fn):
1284 """Computes the list of nodes and their attributes.
1287 nodenames = self.owned_locks(locking.LEVEL_NODE)
1288 volumes = self.rpc.call_node_volumes(nodenames)
1290 ilist = self.cfg.GetAllInstancesInfo()
1291 vol2inst = _MapInstanceDisksToNodes(ilist.values())
1294 for node in nodenames:
1295 nresult = volumes[node]
1298 msg = nresult.fail_msg
1300 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
1303 node_vols = sorted(nresult.payload,
1304 key=operator.itemgetter("dev"))
1306 for vol in node_vols:
1308 for field in self.op.output_fields:
1311 elif field == "phys":
1315 elif field == "name":
1317 elif field == "size":
1318 val = int(float(vol["size"]))
1319 elif field == "instance":
1320 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
1322 raise errors.ParameterError(field)
1323 node_output.append(str(val))
1325 output.append(node_output)
1330 class LUNodeQueryStorage(NoHooksLU):
1331 """Logical unit for getting information on storage units on node(s).
1334 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
1337 def CheckArguments(self):
1338 _CheckOutputFields(static=self._FIELDS_STATIC,
1339 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
1340 selected=self.op.output_fields)
1342 def ExpandNames(self):
1343 self.share_locks = _ShareAll()
1346 self.needed_locks = {
1347 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1350 self.needed_locks = {
1351 locking.LEVEL_NODE: locking.ALL_SET,
1352 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1355 def Exec(self, feedback_fn):
1356 """Computes the list of nodes and their attributes.
1359 self.nodes = self.owned_locks(locking.LEVEL_NODE)
1361 # Always get name to sort by
1362 if constants.SF_NAME in self.op.output_fields:
1363 fields = self.op.output_fields[:]
1365 fields = [constants.SF_NAME] + self.op.output_fields
1367 # Never ask for node or type as it's only known to the LU
1368 for extra in [constants.SF_NODE, constants.SF_TYPE]:
1369 while extra in fields:
1370 fields.remove(extra)
1372 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
1373 name_idx = field_idx[constants.SF_NAME]
1375 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
1376 data = self.rpc.call_storage_list(self.nodes,
1377 self.op.storage_type, st_args,
1378 self.op.name, fields)
1382 for node in utils.NiceSort(self.nodes):
1383 nresult = data[node]
1387 msg = nresult.fail_msg
1389 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
1392 rows = dict([(row[name_idx], row) for row in nresult.payload])
1394 for name in utils.NiceSort(rows.keys()):
1399 for field in self.op.output_fields:
1400 if field == constants.SF_NODE:
1402 elif field == constants.SF_TYPE:
1403 val = self.op.storage_type
1404 elif field in field_idx:
1405 val = row[field_idx[field]]
1407 raise errors.ParameterError(field)
1416 class LUNodeRemove(LogicalUnit):
1417 """Logical unit for removing a node.
1420 HPATH = "node-remove"
1421 HTYPE = constants.HTYPE_NODE
1423 def BuildHooksEnv(self):
1428 "OP_TARGET": self.op.node_name,
1429 "NODE_NAME": self.op.node_name,
1432 def BuildHooksNodes(self):
1433 """Build hooks nodes.
1435 This doesn't run on the target node in the pre phase as a failed
1436 node would then be impossible to remove.
1439 all_nodes = self.cfg.GetNodeList()
1441 all_nodes.remove(self.op.node_name)
1444 return (all_nodes, all_nodes)
1446 def CheckPrereq(self):
1447 """Check prerequisites.
1450 - the node exists in the configuration
1451 - it does not have primary or secondary instances
1452 - it's not the master
1454 Any errors are signaled by raising errors.OpPrereqError.
1457 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1458 node = self.cfg.GetNodeInfo(self.op.node_name)
1459 assert node is not None
1461 masternode = self.cfg.GetMasterNode()
1462 if node.name == masternode:
1463 raise errors.OpPrereqError("Node is the master node, failover to another"
1464 " node is required", errors.ECODE_INVAL)
1466 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
1467 if node.name in instance.all_nodes:
1468 raise errors.OpPrereqError("Instance %s is still running on the node,"
1469 " please remove first" % instance_name,
1471 self.op.node_name = node.name
1474 def Exec(self, feedback_fn):
1475 """Removes the node from the cluster.
1479 logging.info("Stopping the node daemon and removing configs from node %s",
1482 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1484 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
1487 # Promote nodes to master candidate as needed
1488 _AdjustCandidatePool(self, exceptions=[node.name])
1489 self.context.RemoveNode(node.name)
1491 # Run post hooks on the node before it's removed
1492 _RunPostHook(self, node.name)
1494 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
1495 msg = result.fail_msg
1497 self.LogWarning("Errors encountered on the remote node while leaving"
1498 " the cluster: %s", msg)
1500 # Remove node from our /etc/hosts
1501 if self.cfg.GetClusterInfo().modify_etc_hosts:
1502 master_node = self.cfg.GetMasterNode()
1503 result = self.rpc.call_etc_hosts_modify(master_node,
1504 constants.ETC_HOSTS_REMOVE,
1506 result.Raise("Can't update hosts file with new host data")
1507 _RedistributeAncillaryFiles(self)
1510 class LURepairNodeStorage(NoHooksLU):
1511 """Repairs the volume group on a node.
1516 def CheckArguments(self):
1517 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1519 storage_type = self.op.storage_type
1521 if (constants.SO_FIX_CONSISTENCY not in
1522 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
1523 raise errors.OpPrereqError("Storage units of type '%s' can not be"
1524 " repaired" % storage_type,
1527 def ExpandNames(self):
1528 self.needed_locks = {
1529 locking.LEVEL_NODE: [self.op.node_name],
1532 def _CheckFaultyDisks(self, instance, node_name):
1533 """Ensure faulty disks abort the opcode or at least warn."""
1535 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
1537 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
1538 " node '%s'" % (instance.name, node_name),
1540 except errors.OpPrereqError, err:
1541 if self.op.ignore_consistency:
1542 self.LogWarning(str(err.args[0]))
1546 def CheckPrereq(self):
1547 """Check prerequisites.
1550 # Check whether any instance on this node has faulty disks
1551 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
1552 if inst.admin_state != constants.ADMINST_UP:
1554 check_nodes = set(inst.all_nodes)
1555 check_nodes.discard(self.op.node_name)
1556 for inst_node_name in check_nodes:
1557 self._CheckFaultyDisks(inst, inst_node_name)
1559 def Exec(self, feedback_fn):
1560 feedback_fn("Repairing storage unit '%s' on %s ..." %
1561 (self.op.name, self.op.node_name))
1563 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
1564 result = self.rpc.call_storage_execute(self.op.node_name,
1565 self.op.storage_type, st_args,
1567 constants.SO_FIX_CONSISTENCY)
1568 result.Raise("Failed to repair storage unit '%s' on %s" %
1569 (self.op.name, self.op.node_name))