4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the logic behind the cluster operations
24 This module implements the logic for doing operations in the cluster. There
25 are two kinds of classes defined:
26 - logical units, which know how to deal with their specific opcode only
27 - the processor, which dispatches the opcodes to their logical units
32 from ganeti import opcodes
33 from ganeti import constants
34 from ganeti import errors
35 from ganeti import rpc
36 from ganeti import cmdlib
37 from ganeti import ssconf
38 from ganeti import logger
39 from ganeti import locking
42 class Processor(object):
43 """Object which runs OpCodes"""
46 opcodes.OpDestroyCluster: cmdlib.LUDestroyCluster,
47 opcodes.OpQueryClusterInfo: cmdlib.LUQueryClusterInfo,
48 opcodes.OpVerifyCluster: cmdlib.LUVerifyCluster,
49 opcodes.OpDumpClusterConfig: cmdlib.LUDumpClusterConfig,
50 opcodes.OpRenameCluster: cmdlib.LURenameCluster,
51 opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks,
52 opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams,
54 opcodes.OpAddNode: cmdlib.LUAddNode,
55 opcodes.OpQueryNodes: cmdlib.LUQueryNodes,
56 opcodes.OpQueryNodeVolumes: cmdlib.LUQueryNodeVolumes,
57 opcodes.OpRemoveNode: cmdlib.LURemoveNode,
59 opcodes.OpCreateInstance: cmdlib.LUCreateInstance,
60 opcodes.OpReinstallInstance: cmdlib.LUReinstallInstance,
61 opcodes.OpRemoveInstance: cmdlib.LURemoveInstance,
62 opcodes.OpRenameInstance: cmdlib.LURenameInstance,
63 opcodes.OpActivateInstanceDisks: cmdlib.LUActivateInstanceDisks,
64 opcodes.OpShutdownInstance: cmdlib.LUShutdownInstance,
65 opcodes.OpStartupInstance: cmdlib.LUStartupInstance,
66 opcodes.OpRebootInstance: cmdlib.LURebootInstance,
67 opcodes.OpDeactivateInstanceDisks: cmdlib.LUDeactivateInstanceDisks,
68 opcodes.OpReplaceDisks: cmdlib.LUReplaceDisks,
69 opcodes.OpFailoverInstance: cmdlib.LUFailoverInstance,
70 opcodes.OpConnectConsole: cmdlib.LUConnectConsole,
71 opcodes.OpQueryInstances: cmdlib.LUQueryInstances,
72 opcodes.OpQueryInstanceData: cmdlib.LUQueryInstanceData,
73 opcodes.OpSetInstanceParams: cmdlib.LUSetInstanceParams,
74 opcodes.OpGrowDisk: cmdlib.LUGrowDisk,
76 opcodes.OpDiagnoseOS: cmdlib.LUDiagnoseOS,
78 opcodes.OpQueryExports: cmdlib.LUQueryExports,
79 opcodes.OpExportInstance: cmdlib.LUExportInstance,
80 opcodes.OpRemoveExport: cmdlib.LURemoveExport,
82 opcodes.OpGetTags: cmdlib.LUGetTags,
83 opcodes.OpSearchTags: cmdlib.LUSearchTags,
84 opcodes.OpAddTags: cmdlib.LUAddTags,
85 opcodes.OpDelTags: cmdlib.LUDelTags,
87 opcodes.OpTestDelay: cmdlib.LUTestDelay,
88 opcodes.OpTestAllocator: cmdlib.LUTestAllocator,
91 def __init__(self, context):
92 """Constructor for Processor
95 - feedback_fn: the feedback function (taking one string) to be run when
96 interesting events are happening
98 self.context = context
99 self._feedback_fn = None
100 self.exclusive_BGL = False
102 def _ExecLU(self, lu):
103 """Logical Unit execution sequence.
106 write_count = self.context.cfg.write_count
108 hm = HooksMaster(rpc.call_hooks_runner, self, lu)
109 h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
110 lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results,
111 self._feedback_fn, None)
113 result = lu.Exec(self._feedback_fn)
114 h_results = hm.RunPhase(constants.HOOKS_PHASE_POST)
115 result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results,
116 self._feedback_fn, result)
118 # FIXME: This needs locks if not lu_class.REQ_BGL
119 if write_count != self.context.cfg.write_count:
124 def _LockAndExecLU(self, lu, level):
125 """Execute a Logical Unit, with the needed locks.
127 This is a recursive function that starts locking the given level, and
128 proceeds up, till there are no more locks to acquire. Then it executes the
129 given LU and its opcodes.
132 if level not in locking.LEVELS:
133 result = self._ExecLU(lu)
134 elif level in lu.needed_locks:
135 # This gives a chance to LUs to make last-minute changes after acquiring
136 # locks at any preceding level.
137 lu.DeclareLocks(level)
138 needed_locks = lu.needed_locks[level]
139 share = lu.share_locks[level]
140 # This is always safe to do, as we can't acquire more/less locks than
141 # what was requested.
142 lu.needed_locks[level] = self.context.glm.acquire(level,
146 result = self._LockAndExecLU(lu, level + 1)
148 if lu.needed_locks[level]:
149 self.context.glm.release(level)
151 result = self._LockAndExecLU(lu, level + 1)
155 def ExecOpCode(self, op, feedback_fn):
156 """Execute an opcode.
159 op: the opcode to be executed
162 if not isinstance(op, opcodes.OpCode):
163 raise errors.ProgrammerError("Non-opcode instance passed"
166 self._feedback_fn = feedback_fn
167 lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
169 raise errors.OpCodeUnknown("Unknown opcode")
171 if lu_class.REQ_WSSTORE:
172 sstore = ssconf.WritableSimpleStore()
174 sstore = ssconf.SimpleStore()
176 # Acquire the Big Ganeti Lock exclusively if this LU requires it, and in a
177 # shared fashion otherwise (to prevent concurrent run with an exclusive LU.
178 self.context.glm.acquire(locking.LEVEL_CLUSTER, [locking.BGL],
179 shared=not lu_class.REQ_BGL)
181 self.exclusive_BGL = lu_class.REQ_BGL
182 lu = lu_class(self, op, self.context, sstore)
184 assert lu.needed_locks is not None, "needed_locks not set by LU"
185 result = self._LockAndExecLU(lu, locking.LEVEL_INSTANCE)
187 self.context.glm.release(locking.LEVEL_CLUSTER)
188 self.exclusive_BGL = False
192 def ChainOpCode(self, op):
193 """Chain and execute an opcode.
195 This is used by LUs when they need to execute a child LU.
198 - opcode: the opcode to be executed
201 if not isinstance(op, opcodes.OpCode):
202 raise errors.ProgrammerError("Non-opcode instance passed"
205 lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
207 raise errors.OpCodeUnknown("Unknown opcode")
209 if lu_class.REQ_BGL and not self.exclusive_BGL:
210 raise errors.ProgrammerError("LUs which require the BGL cannot"
211 " be chained to granular ones.")
213 assert lu_class.REQ_BGL, "ChainOpCode is still BGL-only"
215 if lu_class.REQ_WSSTORE:
216 sstore = ssconf.WritableSimpleStore()
218 sstore = ssconf.SimpleStore()
220 #do_hooks = lu_class.HPATH is not None
221 lu = lu_class(self, op, self.context, sstore)
224 # hm = HooksMaster(rpc.call_hooks_runner, self, lu)
225 # h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
226 # lu.HooksCallBack(constants.HOOKS_PHASE_PRE,
227 # h_results, self._feedback_fn, None)
228 result = lu.Exec(self._feedback_fn)
230 # h_results = hm.RunPhase(constants.HOOKS_PHASE_POST)
231 # result = lu.HooksCallBack(constants.HOOKS_PHASE_POST,
232 # h_results, self._feedback_fn, result)
235 def LogStep(self, current, total, message):
236 """Log a change in LU execution progress.
239 logger.Debug("Step %d/%d %s" % (current, total, message))
240 self._feedback_fn("STEP %d/%d %s" % (current, total, message))
242 def LogWarning(self, message, hint=None):
243 """Log a warning to the logs and the user.
246 logger.Error(message)
247 self._feedback_fn(" - WARNING: %s" % message)
249 self._feedback_fn(" Hint: %s" % hint)
251 def LogInfo(self, message):
252 """Log an informational message to the logs and the user.
256 self._feedback_fn(" - INFO: %s" % message)
259 class HooksMaster(object):
262 This class distributes the run commands to the nodes based on the
265 In order to remove the direct dependency on the rpc module, the
266 constructor needs a function which actually does the remote
267 call. This will usually be rpc.call_hooks_runner, but any function
268 which behaves the same works.
271 def __init__(self, callfn, proc, lu):
276 self.env, node_list_pre, node_list_post = self._BuildEnv()
277 self.node_list = {constants.HOOKS_PHASE_PRE: node_list_pre,
278 constants.HOOKS_PHASE_POST: node_list_post}
281 """Compute the environment and the target nodes.
283 Based on the opcode and the current node list, this builds the
284 environment for the hooks and the target node list for the run.
288 "PATH": "/sbin:/bin:/usr/sbin:/usr/bin",
289 "GANETI_HOOKS_VERSION": constants.HOOKS_VERSION,
290 "GANETI_OP_CODE": self.op.OP_ID,
291 "GANETI_OBJECT_TYPE": self.lu.HTYPE,
292 "GANETI_DATA_DIR": constants.DATA_DIR,
295 if self.lu.HPATH is not None:
296 lu_env, lu_nodes_pre, lu_nodes_post = self.lu.BuildHooksEnv()
299 env["GANETI_" + key] = lu_env[key]
301 lu_nodes_pre = lu_nodes_post = []
303 return env, frozenset(lu_nodes_pre), frozenset(lu_nodes_post)
305 def _RunWrapper(self, node_list, hpath, phase):
306 """Simple wrapper over self.callfn.
308 This method fixes the environment before doing the rpc call.
311 env = self.env.copy()
312 env["GANETI_HOOKS_PHASE"] = phase
313 env["GANETI_HOOKS_PATH"] = hpath
314 if self.lu.sstore is not None:
315 env["GANETI_CLUSTER"] = self.lu.sstore.GetClusterName()
316 env["GANETI_MASTER"] = self.lu.sstore.GetMasterNode()
318 env = dict([(str(key), str(val)) for key, val in env.iteritems()])
320 return self.callfn(node_list, hpath, phase, env)
322 def RunPhase(self, phase):
323 """Run all the scripts for a phase.
325 This is the main function of the HookMaster.
328 phase: the hooks phase to run
331 the result of the hooks multi-node rpc call
334 if not self.node_list[phase]:
335 # empty node list, we should not attempt to run this as either
336 # we're in the cluster init phase and the rpc client part can't
337 # even attempt to run, or this LU doesn't do hooks at all
339 hpath = self.lu.HPATH
340 results = self._RunWrapper(self.node_list[phase], hpath, phase)
341 if phase == constants.HOOKS_PHASE_PRE:
344 raise errors.HooksFailure("Communication failure")
345 for node_name in results:
346 res = results[node_name]
347 if res is False or not isinstance(res, list):
348 self.proc.LogWarning("Communication failure to node %s" % node_name)
350 for script, hkr, output in res:
351 if hkr == constants.HKR_FAIL:
352 output = output.strip().encode("string_escape")
353 errs.append((node_name, script, output))
355 raise errors.HooksAbort(errs)
358 def RunConfigUpdate(self):
359 """Run the special configuration update hook
361 This is a special hook that runs only on the master after each
362 top-level LI if the configuration has been updated.
365 phase = constants.HOOKS_PHASE_POST
366 hpath = constants.HOOKS_NAME_CFGUPDATE
367 if self.lu.sstore is None:
368 raise errors.ProgrammerError("Null sstore on config update hook")
369 nodes = [self.lu.sstore.GetMasterNode()]
370 results = self._RunWrapper(nodes, hpath, phase)