X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/dcb939711bb4ed751a71f522e41c9af986bfe96e..3956cee12cc47c8c5c63411a2a0b3532841f920c:/lib/mcpu.py diff --git a/lib/mcpu.py b/lib/mcpu.py index daf7923..a802051 100644 --- a/lib/mcpu.py +++ b/lib/mcpu.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +# # # Copyright (C) 2006, 2007 Google Inc. @@ -29,133 +29,230 @@ are two kinds of classes defined: """ -import os -import os.path -import time - from ganeti import opcodes -from ganeti import logger from ganeti import constants -from ganeti import utils from ganeti import errors from ganeti import rpc from ganeti import cmdlib -from ganeti import config from ganeti import ssconf +from ganeti import logger +from ganeti import locking + class Processor(object): """Object which runs OpCodes""" DISPATCH_TABLE = { # Cluster - opcodes.OpInitCluster: cmdlib.LUInitCluster, opcodes.OpDestroyCluster: cmdlib.LUDestroyCluster, opcodes.OpQueryClusterInfo: cmdlib.LUQueryClusterInfo, - opcodes.OpClusterCopyFile: cmdlib.LUClusterCopyFile, - opcodes.OpRunClusterCommand: cmdlib.LURunClusterCommand, opcodes.OpVerifyCluster: cmdlib.LUVerifyCluster, - opcodes.OpMasterFailover: cmdlib.LUMasterFailover, opcodes.OpDumpClusterConfig: cmdlib.LUDumpClusterConfig, + opcodes.OpRenameCluster: cmdlib.LURenameCluster, + opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks, + opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams, # node lu opcodes.OpAddNode: cmdlib.LUAddNode, opcodes.OpQueryNodes: cmdlib.LUQueryNodes, - opcodes.OpQueryNodeData: cmdlib.LUQueryNodeData, opcodes.OpQueryNodeVolumes: cmdlib.LUQueryNodeVolumes, opcodes.OpRemoveNode: cmdlib.LURemoveNode, # instance lu opcodes.OpCreateInstance: cmdlib.LUCreateInstance, + opcodes.OpReinstallInstance: cmdlib.LUReinstallInstance, opcodes.OpRemoveInstance: cmdlib.LURemoveInstance, + opcodes.OpRenameInstance: cmdlib.LURenameInstance, opcodes.OpActivateInstanceDisks: cmdlib.LUActivateInstanceDisks, opcodes.OpShutdownInstance: cmdlib.LUShutdownInstance, opcodes.OpStartupInstance: cmdlib.LUStartupInstance, + opcodes.OpRebootInstance: cmdlib.LURebootInstance, opcodes.OpDeactivateInstanceDisks: cmdlib.LUDeactivateInstanceDisks, - opcodes.OpAddMDDRBDComponent: cmdlib.LUAddMDDRBDComponent, - opcodes.OpRemoveMDDRBDComponent: cmdlib.LURemoveMDDRBDComponent, opcodes.OpReplaceDisks: cmdlib.LUReplaceDisks, opcodes.OpFailoverInstance: cmdlib.LUFailoverInstance, opcodes.OpConnectConsole: cmdlib.LUConnectConsole, opcodes.OpQueryInstances: cmdlib.LUQueryInstances, opcodes.OpQueryInstanceData: cmdlib.LUQueryInstanceData, - opcodes.OpSetInstanceParms: cmdlib.LUSetInstanceParms, + opcodes.OpSetInstanceParams: cmdlib.LUSetInstanceParams, + opcodes.OpGrowDisk: cmdlib.LUGrowDisk, # os lu opcodes.OpDiagnoseOS: cmdlib.LUDiagnoseOS, # exports lu opcodes.OpQueryExports: cmdlib.LUQueryExports, opcodes.OpExportInstance: cmdlib.LUExportInstance, + opcodes.OpRemoveExport: cmdlib.LURemoveExport, + # tags lu + opcodes.OpGetTags: cmdlib.LUGetTags, + opcodes.OpSearchTags: cmdlib.LUSearchTags, + opcodes.OpAddTags: cmdlib.LUAddTags, + opcodes.OpDelTags: cmdlib.LUDelTags, + # test lu + opcodes.OpTestDelay: cmdlib.LUTestDelay, + opcodes.OpTestAllocator: cmdlib.LUTestAllocator, } - - def __init__(self): + def __init__(self, context): """Constructor for Processor + Args: + - feedback_fn: the feedback function (taking one string) to be run when + interesting events are happening + """ + self.context = context + self._feedback_fn = None + self.exclusive_BGL = False + + def _ExecLU(self, lu): + """Logical Unit execution sequence. + """ - self.cfg = None - self.sstore = None + write_count = self.context.cfg.write_count + lu.CheckPrereq() + hm = HooksMaster(rpc.call_hooks_runner, self, lu) + h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE) + lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results, + self._feedback_fn, None) + try: + result = lu.Exec(self._feedback_fn) + h_results = hm.RunPhase(constants.HOOKS_PHASE_POST) + result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results, + self._feedback_fn, result) + finally: + # FIXME: This needs locks if not lu_class.REQ_BGL + if write_count != self.context.cfg.write_count: + hm.RunConfigUpdate() + + return result + + def _LockAndExecLU(self, lu, level): + """Execute a Logical Unit, with the needed locks. + + This is a recursive function that starts locking the given level, and + proceeds up, till there are no more locks to acquire. Then it executes the + given LU and its opcodes. + + """ + if level in lu.needed_locks: + # This gives a chance to LUs to make last-minute changes after acquiring + # locks at any preceding level. + lu.DeclareLocks(level) + needed_locks = lu.needed_locks[level] + share = lu.share_locks[level] + # This is always safe to do, as we can't acquire more/less locks than + # what was requested. + lu.needed_locks[level] = self.context.glm.acquire(level, + needed_locks, + shared=share) + try: + result = self._LockAndExecLU(lu, level + 1) + finally: + if lu.needed_locks[level]: + self.context.glm.release(level) + else: + result = self._ExecLU(lu) + + return result def ExecOpCode(self, op, feedback_fn): """Execute an opcode. Args: - - cfg: the configuration in which we execute this opcode - - opcode: the opcode to be executed - - feedback_fn: the feedback function (taking one string) to be run when - interesting events are happening + op: the opcode to be executed """ if not isinstance(op, opcodes.OpCode): - raise errors.ProgrammerError, ("Non-opcode instance passed" - " to ExecOpcode") + raise errors.ProgrammerError("Non-opcode instance passed" + " to ExecOpcode") + self._feedback_fn = feedback_fn lu_class = self.DISPATCH_TABLE.get(op.__class__, None) if lu_class is None: - raise errors.OpCodeUnknown, "Unknown opcode" + raise errors.OpCodeUnknown("Unknown opcode") + + if lu_class.REQ_WSSTORE: + sstore = ssconf.WritableSimpleStore() + else: + sstore = ssconf.SimpleStore() + + # Acquire the Big Ganeti Lock exclusively if this LU requires it, and in a + # shared fashion otherwise (to prevent concurrent run with an exclusive LU. + self.context.glm.acquire(locking.LEVEL_CLUSTER, [locking.BGL], + shared=not lu_class.REQ_BGL) + try: + self.exclusive_BGL = lu_class.REQ_BGL + lu = lu_class(self, op, self.context, sstore) + lu.ExpandNames() + assert lu.needed_locks is not None, "needed_locks not set by LU" + result = self._LockAndExecLU(lu, locking.LEVEL_INSTANCE) + finally: + self.context.glm.release(locking.LEVEL_CLUSTER) + self.exclusive_BGL = False - if lu_class.REQ_CLUSTER and self.cfg is None: - self.cfg = config.ConfigWriter() - self.sstore = ssconf.SimpleStore() - lu = lu_class(self, op, self.cfg, self.sstore) - lu.CheckPrereq() - do_hooks = lu_class.HPATH is not None - if do_hooks: - hm = HooksMaster(rpc.call_hooks_runner, self.cfg, self.sstore, lu) - hm.RunPhase(constants.HOOKS_PHASE_PRE) - result = lu.Exec(feedback_fn) - if do_hooks: - hm.RunPhase(constants.HOOKS_PHASE_POST) return result - def ChainOpCode(self, op, feedback_fn): + def ChainOpCode(self, op): """Chain and execute an opcode. This is used by LUs when they need to execute a child LU. Args: - opcode: the opcode to be executed - - feedback_fn: the feedback function (taking one string) to be run when - interesting events are happening """ if not isinstance(op, opcodes.OpCode): - raise errors.ProgrammerError, ("Non-opcode instance passed" - " to ExecOpcode") + raise errors.ProgrammerError("Non-opcode instance passed" + " to ExecOpcode") lu_class = self.DISPATCH_TABLE.get(op.__class__, None) if lu_class is None: - raise errors.OpCodeUnknown, "Unknown opcode" + raise errors.OpCodeUnknown("Unknown opcode") - if lu_class.REQ_CLUSTER and self.cfg is None: - self.cfg = config.ConfigWriter() - self.sstore = ssconf.SimpleStore() - do_hooks = lu_class.HPATH is not None - lu = lu_class(self, op, self.cfg, self.sstore) + if lu_class.REQ_BGL and not self.exclusive_BGL: + raise errors.ProgrammerError("LUs which require the BGL cannot" + " be chained to granular ones.") + + assert lu_class.REQ_BGL, "ChainOpCode is still BGL-only" + + if lu_class.REQ_WSSTORE: + sstore = ssconf.WritableSimpleStore() + else: + sstore = ssconf.SimpleStore() + + #do_hooks = lu_class.HPATH is not None + lu = lu_class(self, op, self.context, sstore) lu.CheckPrereq() #if do_hooks: - # hm = HooksMaster(rpc.call_hooks_runner, self.cfg, self.sstore, lu) - # hm.RunPhase(constants.HOOKS_PHASE_PRE) - result = lu.Exec(feedback_fn) + # hm = HooksMaster(rpc.call_hooks_runner, self, lu) + # h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE) + # lu.HooksCallBack(constants.HOOKS_PHASE_PRE, + # h_results, self._feedback_fn, None) + result = lu.Exec(self._feedback_fn) #if do_hooks: - # hm.RunPhase(constants.HOOKS_PHASE_POST) + # h_results = hm.RunPhase(constants.HOOKS_PHASE_POST) + # result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, + # h_results, self._feedback_fn, result) return result + def LogStep(self, current, total, message): + """Log a change in LU execution progress. + + """ + logger.Debug("Step %d/%d %s" % (current, total, message)) + self._feedback_fn("STEP %d/%d %s" % (current, total, message)) + + def LogWarning(self, message, hint=None): + """Log a warning to the logs and the user. + + """ + logger.Error(message) + self._feedback_fn(" - WARNING: %s" % message) + if hint: + self._feedback_fn(" Hint: %s" % hint) + + def LogInfo(self, message): + """Log an informational message to the logs and the user. + + """ + logger.Info(message) + self._feedback_fn(" - INFO: %s" % message) + class HooksMaster(object): """Hooks master. @@ -169,15 +266,12 @@ class HooksMaster(object): which behaves the same works. """ - def __init__(self, callfn, cfg, sstore, lu): + def __init__(self, callfn, proc, lu): self.callfn = callfn - self.cfg = cfg - self.sstore = sstore + self.proc = proc self.lu = lu self.op = lu.op - self.hpath = self.lu.HPATH self.env, node_list_pre, node_list_post = self._BuildEnv() - self.node_list = {constants.HOOKS_PHASE_PRE: node_list_pre, constants.HOOKS_PHASE_POST: node_list_post} @@ -193,49 +287,82 @@ class HooksMaster(object): "GANETI_HOOKS_VERSION": constants.HOOKS_VERSION, "GANETI_OP_CODE": self.op.OP_ID, "GANETI_OBJECT_TYPE": self.lu.HTYPE, + "GANETI_DATA_DIR": constants.DATA_DIR, } - lu_env, lu_nodes_pre, lu_nodes_post = self.lu.BuildHooksEnv() - if lu_env: - for key in lu_env: - env["GANETI_" + key] = lu_env[key] + if self.lu.HPATH is not None: + lu_env, lu_nodes_pre, lu_nodes_post = self.lu.BuildHooksEnv() + if lu_env: + for key in lu_env: + env["GANETI_" + key] = lu_env[key] + else: + lu_nodes_pre = lu_nodes_post = [] - if self.cfg is not None: - env["GANETI_CLUSTER"] = self.cfg.GetClusterName() - if self.sstore is not None: - env["GANETI_MASTER"] = self.sstore.GetMasterNode() + return env, frozenset(lu_nodes_pre), frozenset(lu_nodes_post) - for key in env: - if not isinstance(env[key], str): - env[key] = str(env[key]) + def _RunWrapper(self, node_list, hpath, phase): + """Simple wrapper over self.callfn. - return env, frozenset(lu_nodes_pre), frozenset(lu_nodes_post) + This method fixes the environment before doing the rpc call. + + """ + env = self.env.copy() + env["GANETI_HOOKS_PHASE"] = phase + env["GANETI_HOOKS_PATH"] = hpath + if self.lu.sstore is not None: + env["GANETI_CLUSTER"] = self.lu.sstore.GetClusterName() + env["GANETI_MASTER"] = self.lu.sstore.GetMasterNode() + + env = dict([(str(key), str(val)) for key, val in env.iteritems()]) + + return self.callfn(node_list, hpath, phase, env) def RunPhase(self, phase): """Run all the scripts for a phase. This is the main function of the HookMaster. + Args: + phase: the hooks phase to run + + Returns: + the result of the hooks multi-node rpc call + """ if not self.node_list[phase]: - # empty node list, we should not attempt to run this - # as most probably we're in the cluster init phase and the rpc client - # part can't even attempt to run + # empty node list, we should not attempt to run this as either + # we're in the cluster init phase and the rpc client part can't + # even attempt to run, or this LU doesn't do hooks at all return - self.env["GANETI_HOOKS_PHASE"] = str(phase) - results = self.callfn(self.node_list[phase], self.hpath, phase, self.env) + hpath = self.lu.HPATH + results = self._RunWrapper(self.node_list[phase], hpath, phase) if phase == constants.HOOKS_PHASE_PRE: errs = [] if not results: - raise errors.HooksFailure, "Communication failure" + raise errors.HooksFailure("Communication failure") for node_name in results: res = results[node_name] if res is False or not isinstance(res, list): - raise errors.HooksFailure, ("Communication failure to node %s" % - node_name) + self.proc.LogWarning("Communication failure to node %s" % node_name) + continue for script, hkr, output in res: if hkr == constants.HKR_FAIL: output = output.strip().encode("string_escape") errs.append((node_name, script, output)) if errs: - raise errors.HooksAbort, errs + raise errors.HooksAbort(errs) + return results + + def RunConfigUpdate(self): + """Run the special configuration update hook + + This is a special hook that runs only on the master after each + top-level LI if the configuration has been updated. + + """ + phase = constants.HOOKS_PHASE_POST + hpath = constants.HOOKS_NAME_CFGUPDATE + if self.lu.sstore is None: + raise errors.ProgrammerError("Null sstore on config update hook") + nodes = [self.lu.sstore.GetMasterNode()] + results = self._RunWrapper(nodes, hpath, phase)