X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/29921401f2fb372efac3d5e022a4622b37c8e07e..19bed813438c89914ead80e5f3a8e57fcb5bd5f2:/lib/mcpu.py diff --git a/lib/mcpu.py b/lib/mcpu.py index 11fa743..f9d5ab7 100644 --- a/lib/mcpu.py +++ b/lib/mcpu.py @@ -36,12 +36,37 @@ from ganeti import errors from ganeti import rpc from ganeti import cmdlib from ganeti import locking +from ganeti import utils + + +class OpExecCbBase: + """Base class for OpCode execution callbacks. + + """ + def NotifyStart(self): + """Called when we are about to execute the LU. + + This function is called when we're about to start the lu's Exec() method, + that is, after we have acquired all locks. + + """ + + def Feedback(self, *args): + """Sends feedback from the LU code to the end-user. + + """ + + def ReportLocks(self, msg): + """Report lock operations. + + """ class Processor(object): """Object which runs OpCodes""" DISPATCH_TABLE = { # Cluster + opcodes.OpPostInitCluster: cmdlib.LUPostInitCluster, opcodes.OpDestroyCluster: cmdlib.LUDestroyCluster, opcodes.OpQueryClusterInfo: cmdlib.LUQueryClusterInfo, opcodes.OpVerifyCluster: cmdlib.LUVerifyCluster, @@ -50,13 +75,19 @@ class Processor(object): opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks, opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams, opcodes.OpRedistributeConfig: cmdlib.LURedistributeConfig, + opcodes.OpRepairDiskSizes: cmdlib.LURepairDiskSizes, # node lu opcodes.OpAddNode: cmdlib.LUAddNode, opcodes.OpQueryNodes: cmdlib.LUQueryNodes, opcodes.OpQueryNodeVolumes: cmdlib.LUQueryNodeVolumes, + opcodes.OpQueryNodeStorage: cmdlib.LUQueryNodeStorage, + opcodes.OpModifyNodeStorage: cmdlib.LUModifyNodeStorage, + opcodes.OpRepairNodeStorage: cmdlib.LURepairNodeStorage, opcodes.OpRemoveNode: cmdlib.LURemoveNode, opcodes.OpSetNodeParams: cmdlib.LUSetNodeParams, opcodes.OpPowercycleNode: cmdlib.LUPowercycleNode, + opcodes.OpEvacuateNode: cmdlib.LUEvacuateNode, + opcodes.OpMigrateNode: cmdlib.LUMigrateNode, # instance lu opcodes.OpCreateInstance: cmdlib.LUCreateInstance, opcodes.OpReinstallInstance: cmdlib.LUReinstallInstance, @@ -68,8 +99,10 @@ class Processor(object): opcodes.OpRebootInstance: cmdlib.LURebootInstance, opcodes.OpDeactivateInstanceDisks: cmdlib.LUDeactivateInstanceDisks, opcodes.OpReplaceDisks: cmdlib.LUReplaceDisks, + opcodes.OpRecreateInstanceDisks: cmdlib.LURecreateInstanceDisks, opcodes.OpFailoverInstance: cmdlib.LUFailoverInstance, opcodes.OpMigrateInstance: cmdlib.LUMigrateInstance, + opcodes.OpMoveInstance: cmdlib.LUMoveInstance, opcodes.OpConnectConsole: cmdlib.LUConnectConsole, opcodes.OpQueryInstances: cmdlib.LUQueryInstances, opcodes.OpQueryInstanceData: cmdlib.LUQueryInstanceData, @@ -94,14 +127,54 @@ class Processor(object): def __init__(self, context): """Constructor for Processor - Args: - - feedback_fn: the feedback function (taking one string) to be run when - interesting events are happening """ self.context = context - self._feedback_fn = None + self._cbs = None self.exclusive_BGL = False self.rpc = rpc.RpcRunner(context.cfg) + self.hmclass = HooksMaster + + def _ReportLocks(self, level, names, shared, acquired): + """Reports lock operations. + + @type level: int + @param level: Lock level + @type names: list or string + @param names: Lock names + @type shared: bool + @param shared: Whether the lock should be acquired in shared mode + @type acquired: bool + @param acquired: Whether the lock has already been acquired + + """ + parts = [] + + # Build message + if acquired: + parts.append("acquired") + else: + parts.append("waiting") + + parts.append(locking.LEVEL_NAMES[level]) + + if names == locking.ALL_SET: + parts.append("ALL") + elif isinstance(names, basestring): + parts.append(names) + else: + parts.append(",".join(names)) + + if shared: + parts.append("shared") + else: + parts.append("exclusive") + + msg = "/".join(parts) + + logging.debug("LU locks %s", msg) + + if self._cbs: + self._cbs.ReportLocks(msg) def _ExecLU(self, lu): """Logical Unit execution sequence. @@ -109,15 +182,24 @@ class Processor(object): """ write_count = self.context.cfg.write_count lu.CheckPrereq() - hm = HooksMaster(self.rpc.call_hooks_runner, self, lu) + hm = HooksMaster(self.rpc.call_hooks_runner, lu) h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE) lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results, - self._feedback_fn, None) + self._Feedback, None) + + if getattr(lu.op, "dry_run", False): + # in this mode, no post-hooks are run, and the config is not + # written (as it might have been modified by another LU, and we + # shouldn't do writeout on behalf of other threads + self.LogInfo("dry-run mode requested, not actually executing" + " the operation") + return lu.dry_run_result + try: - result = lu.Exec(self._feedback_fn) + result = lu.Exec(self._Feedback) h_results = hm.RunPhase(constants.HOOKS_PHASE_POST) result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results, - self._feedback_fn, result) + self._Feedback, result) finally: # FIXME: This needs locks if not lu_class.REQ_BGL if write_count != self.context.cfg.write_count: @@ -136,8 +218,9 @@ class Processor(object): adding_locks = level in lu.add_locks acquiring_locks = level in lu.needed_locks if level not in locking.LEVELS: - if callable(self._run_notifier): - self._run_notifier() + if self._cbs: + self._cbs.NotifyStart() + result = self._ExecLU(lu) elif adding_locks and acquiring_locks: # We could both acquire and add locks at the same level, but for now we @@ -149,9 +232,13 @@ class Processor(object): share = lu.share_locks[level] if acquiring_locks: needed_locks = lu.needed_locks[level] + + self._ReportLocks(level, needed_locks, share, False) lu.acquired_locks[level] = self.context.glm.acquire(level, needed_locks, shared=share) + self._ReportLocks(level, needed_locks, share, True) + else: # adding_locks add_locks = lu.add_locks[level] lu.remove_locks[level] = add_locks @@ -159,7 +246,7 @@ class Processor(object): self.context.glm.add(level, add_locks, acquired=1, shared=share) except errors.LockError: raise errors.OpPrereqError( - "Coudn't add locks (%s), probably because of a race condition" + "Couldn't add locks (%s), probably because of a race condition" " with another job, who added them first" % add_locks) try: try: @@ -177,52 +264,63 @@ class Processor(object): return result - def ExecOpCode(self, op, feedback_fn, run_notifier): + def ExecOpCode(self, op, cbs): """Execute an opcode. @type op: an OpCode instance @param op: the opcode to be executed - @type feedback_fn: a function that takes a single argument - @param feedback_fn: this function will be used as feedback from the LU - code to the end-user - @type run_notifier: callable (no arguments) or None - @param run_notifier: this function (if callable) will be called when - we are about to call the lu's Exec() method, that - is, after we have aquired all locks + @type cbs: L{OpExecCbBase} + @param cbs: Runtime callbacks """ if not isinstance(op, opcodes.OpCode): raise errors.ProgrammerError("Non-opcode instance passed" " to ExecOpcode") - self._feedback_fn = feedback_fn - self._run_notifier = run_notifier - lu_class = self.DISPATCH_TABLE.get(op.__class__, None) - if lu_class is None: - raise errors.OpCodeUnknown("Unknown opcode") - - # Acquire the Big Ganeti Lock exclusively if this LU requires it, and in a - # shared fashion otherwise (to prevent concurrent run with an exclusive LU. - self.context.glm.acquire(locking.LEVEL_CLUSTER, [locking.BGL], - shared=not lu_class.REQ_BGL) + self._cbs = cbs try: - self.exclusive_BGL = lu_class.REQ_BGL - lu = lu_class(self, op, self.context, self.rpc) - lu.ExpandNames() - assert lu.needed_locks is not None, "needed_locks not set by LU" - result = self._LockAndExecLU(lu, locking.LEVEL_INSTANCE) + lu_class = self.DISPATCH_TABLE.get(op.__class__, None) + if lu_class is None: + raise errors.OpCodeUnknown("Unknown opcode") + + # Acquire the Big Ganeti Lock exclusively if this LU requires it, and in a + # shared fashion otherwise (to prevent concurrent run with an exclusive + # LU. + self._ReportLocks(locking.LEVEL_CLUSTER, [locking.BGL], + not lu_class.REQ_BGL, False) + try: + self.context.glm.acquire(locking.LEVEL_CLUSTER, [locking.BGL], + shared=not lu_class.REQ_BGL) + finally: + self._ReportLocks(locking.LEVEL_CLUSTER, [locking.BGL], + not lu_class.REQ_BGL, True) + try: + self.exclusive_BGL = lu_class.REQ_BGL + lu = lu_class(self, op, self.context, self.rpc) + lu.ExpandNames() + assert lu.needed_locks is not None, "needed_locks not set by LU" + result = self._LockAndExecLU(lu, locking.LEVEL_INSTANCE) + finally: + self.context.glm.release(locking.LEVEL_CLUSTER) + self.exclusive_BGL = False finally: - self.context.glm.release(locking.LEVEL_CLUSTER) - self.exclusive_BGL = False + self._cbs = None return result + def _Feedback(self, *args): + """Forward call to feedback callback function. + + """ + if self._cbs: + self._cbs.Feedback(*args) + def LogStep(self, current, total, message): """Log a change in LU execution progress. """ logging.debug("Step %d/%d %s", current, total, message) - self._feedback_fn("STEP %d/%d %s" % (current, total, message)) + self._Feedback("STEP %d/%d %s" % (current, total, message)) def LogWarning(self, message, *args, **kwargs): """Log a warning to the logs and the user. @@ -239,9 +337,9 @@ class Processor(object): message = message % tuple(args) if message: logging.warning(message) - self._feedback_fn(" - WARNING: %s" % message) + self._Feedback(" - WARNING: %s" % message) if "hint" in kwargs: - self._feedback_fn(" Hint: %s" % kwargs["hint"]) + self._Feedback(" Hint: %s" % kwargs["hint"]) def LogInfo(self, message, *args): """Log an informational message to the logs and the user. @@ -250,7 +348,7 @@ class Processor(object): if args: message = message % tuple(args) logging.info(message) - self._feedback_fn(" - INFO: %s" % message) + self._Feedback(" - INFO: %s" % message) class HooksMaster(object): @@ -265,9 +363,8 @@ class HooksMaster(object): which behaves the same works. """ - def __init__(self, callfn, proc, lu): + def __init__(self, callfn, lu): self.callfn = callfn - self.proc = proc self.lu = lu self.op = lu.op self.env, node_list_pre, node_list_post = self._BuildEnv() @@ -316,42 +413,57 @@ class HooksMaster(object): return self.callfn(node_list, hpath, phase, env) - def RunPhase(self, phase): + def RunPhase(self, phase, nodes=None): """Run all the scripts for a phase. This is the main function of the HookMaster. @param phase: one of L{constants.HOOKS_PHASE_POST} or L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase + @param nodes: overrides the predefined list of nodes for the given phase @return: the processed results of the hooks multi-node rpc call @raise errors.HooksFailure: on communication failure to the nodes + @raise errors.HooksAbort: on failure of one of the hooks """ - if not self.node_list[phase]: + if not self.node_list[phase] and not nodes: # empty node list, we should not attempt to run this as either # we're in the cluster init phase and the rpc client part can't # even attempt to run, or this LU doesn't do hooks at all return hpath = self.lu.HPATH - results = self._RunWrapper(self.node_list[phase], hpath, phase) - if phase == constants.HOOKS_PHASE_PRE: - errs = [] - if not results: - raise errors.HooksFailure("Communication failure") - for node_name in results: - res = results[node_name] - if res.offline: - continue - msg = res.RemoteFailMsg() - if msg: - self.proc.LogWarning("Communication failure to node %s: %s", - node_name, msg) - continue - for script, hkr, output in res.payload: - if hkr == constants.HKR_FAIL: + if nodes is not None: + results = self._RunWrapper(nodes, hpath, phase) + else: + results = self._RunWrapper(self.node_list[phase], hpath, phase) + errs = [] + if not results: + msg = "Communication Failure" + if phase == constants.HOOKS_PHASE_PRE: + raise errors.HooksFailure(msg) + else: + self.lu.LogWarning(msg) + return results + for node_name in results: + res = results[node_name] + if res.offline: + continue + msg = res.fail_msg + if msg: + self.lu.LogWarning("Communication failure to node %s: %s", + node_name, msg) + continue + for script, hkr, output in res.payload: + if hkr == constants.HKR_FAIL: + if phase == constants.HOOKS_PHASE_PRE: errs.append((node_name, script, output)) - if errs: - raise errors.HooksAbort(errs) + else: + if not output: + output = "(no output)" + self.lu.LogWarning("On %s script %s failed, output: %s" % + (node_name, script, output)) + if errs and phase == constants.HOOKS_PHASE_PRE: + raise errors.HooksAbort(errs) return results def RunConfigUpdate(self):