+ opctx = _OpExecContext(op, idx, "Op %s/%s" % (idx + 1, len(job.ops)),
+ timeout_strategy_factory)
+
+ if op.status not in constants.OPS_FINALIZED:
+ return opctx
+
+ # This is a job that was partially completed before master daemon
+ # shutdown, so it can be expected that some opcodes are already
+ # completed successfully (if any did error out, then the whole job
+ # should have been aborted and not resubmitted for processing).
+ logging.info("%s: opcode %s already processed, skipping",
+ opctx.log_prefix, opctx.summary)
+
+ @staticmethod
+ def _MarkWaitlock(job, op):
+ """Marks an opcode as waiting for locks.
+
+ The job's start timestamp is also set if necessary.
+
+ @type job: L{_QueuedJob}
+ @param job: Job object
+ @type op: L{_QueuedOpCode}
+ @param op: Opcode object
+
+ """
+ assert op in job.ops
+ assert op.status in (constants.OP_STATUS_QUEUED,
+ constants.OP_STATUS_WAITING)
+
+ update = False
+
+ op.result = None
+
+ if op.status == constants.OP_STATUS_QUEUED:
+ op.status = constants.OP_STATUS_WAITING
+ update = True
+
+ if op.start_timestamp is None:
+ op.start_timestamp = TimeStampNow()
+ update = True
+
+ if job.start_timestamp is None:
+ job.start_timestamp = op.start_timestamp
+ update = True
+
+ assert op.status == constants.OP_STATUS_WAITING
+
+ return update
+
+ @staticmethod
+ def _CheckDependencies(queue, job, opctx):
+ """Checks if an opcode has dependencies and if so, processes them.
+
+ @type queue: L{JobQueue}
+ @param queue: Queue object
+ @type job: L{_QueuedJob}
+ @param job: Job object
+ @type opctx: L{_OpExecContext}
+ @param opctx: Opcode execution context
+ @rtype: bool
+ @return: Whether opcode will be re-scheduled by dependency tracker
+
+ """
+ op = opctx.op
+
+ result = False
+
+ while opctx.jobdeps:
+ (dep_job_id, dep_status) = opctx.jobdeps[0]
+
+ (depresult, depmsg) = queue.depmgr.CheckAndRegister(job, dep_job_id,
+ dep_status)
+ assert ht.TNonEmptyString(depmsg), "No dependency message"
+
+ logging.info("%s: %s", opctx.log_prefix, depmsg)
+
+ if depresult == _JobDependencyManager.CONTINUE:
+ # Remove dependency and continue
+ opctx.jobdeps.pop(0)
+
+ elif depresult == _JobDependencyManager.WAIT:
+ # Need to wait for notification, dependency tracker will re-add job
+ # to workerpool
+ result = True
+ break
+
+ elif depresult == _JobDependencyManager.CANCEL:
+ # Job was cancelled, cancel this job as well
+ job.Cancel()
+ assert op.status == constants.OP_STATUS_CANCELING
+ break
+
+ elif depresult in (_JobDependencyManager.WRONGSTATUS,
+ _JobDependencyManager.ERROR):
+ # Job failed or there was an error, this job must fail
+ op.status = constants.OP_STATUS_ERROR
+ op.result = _EncodeOpError(errors.OpExecError(depmsg))
+ break
+
+ else:
+ raise errors.ProgrammerError("Unknown dependency result '%s'" %
+ depresult)
+
+ return result
+
+ def _ExecOpCodeUnlocked(self, opctx):
+ """Processes one opcode and returns the result.
+
+ """
+ op = opctx.op
+
+ assert op.status == constants.OP_STATUS_WAITING
+
+ timeout = opctx.GetNextLockTimeout()
+
+ try:
+ # Make sure not to hold queue lock while calling ExecOpCode
+ result = self.opexec_fn(op.input,
+ _OpExecCallbacks(self.queue, self.job, op),
+ timeout=timeout, priority=op.priority)
+ except mcpu.LockAcquireTimeout:
+ assert timeout is not None, "Received timeout for blocking acquire"
+ logging.debug("Couldn't acquire locks in %0.6fs", timeout)
+
+ assert op.status in (constants.OP_STATUS_WAITING,
+ constants.OP_STATUS_CANCELING)
+
+ # Was job cancelled while we were waiting for the lock?
+ if op.status == constants.OP_STATUS_CANCELING:
+ return (constants.OP_STATUS_CANCELING, None)
+
+ # Stay in waitlock while trying to re-acquire lock
+ return (constants.OP_STATUS_WAITING, None)
+ except CancelJob:
+ logging.exception("%s: Canceling job", opctx.log_prefix)
+ assert op.status == constants.OP_STATUS_CANCELING
+ return (constants.OP_STATUS_CANCELING, None)
+ except Exception, err: # pylint: disable=W0703
+ logging.exception("%s: Caught exception in %s",
+ opctx.log_prefix, opctx.summary)
+ return (constants.OP_STATUS_ERROR, _EncodeOpError(err))
+ else:
+ logging.debug("%s: %s successful",
+ opctx.log_prefix, opctx.summary)
+ return (constants.OP_STATUS_SUCCESS, result)
+
+ def __call__(self, _nextop_fn=None):
+ """Continues execution of a job.
+
+ @param _nextop_fn: Callback function for tests
+ @return: C{FINISHED} if job is fully processed, C{DEFER} if the job should
+ be deferred and C{WAITDEP} if the dependency manager
+ (L{_JobDependencyManager}) will re-schedule the job when appropriate
+
+ """
+ queue = self.queue
+ job = self.job
+
+ logging.debug("Processing job %s", job.id)
+
+ queue.acquire(shared=1)
+ try:
+ opcount = len(job.ops)
+
+ assert job.writable, "Expected writable job"
+
+ # Don't do anything for finalized jobs
+ if job.CalcStatus() in constants.JOBS_FINALIZED:
+ return self.FINISHED
+
+ # Is a previous opcode still pending?
+ if job.cur_opctx:
+ opctx = job.cur_opctx
+ job.cur_opctx = None
+ else:
+ if __debug__ and _nextop_fn:
+ _nextop_fn()
+ opctx = self._FindNextOpcode(job, self._timeout_strategy_factory)
+
+ op = opctx.op
+
+ # Consistency check
+ assert compat.all(i.status in (constants.OP_STATUS_QUEUED,
+ constants.OP_STATUS_CANCELING)
+ for i in job.ops[opctx.index + 1:])
+
+ assert op.status in (constants.OP_STATUS_QUEUED,
+ constants.OP_STATUS_WAITING,
+ constants.OP_STATUS_CANCELING)
+
+ assert (op.priority <= constants.OP_PRIO_LOWEST and
+ op.priority >= constants.OP_PRIO_HIGHEST)
+
+ waitjob = None
+
+ if op.status != constants.OP_STATUS_CANCELING:
+ assert op.status in (constants.OP_STATUS_QUEUED,
+ constants.OP_STATUS_WAITING)
+
+ # Prepare to start opcode
+ if self._MarkWaitlock(job, op):
+ # Write to disk
+ queue.UpdateJobUnlocked(job)
+
+ assert op.status == constants.OP_STATUS_WAITING
+ assert job.CalcStatus() == constants.JOB_STATUS_WAITING
+ assert job.start_timestamp and op.start_timestamp
+ assert waitjob is None
+
+ # Check if waiting for a job is necessary
+ waitjob = self._CheckDependencies(queue, job, opctx)
+
+ assert op.status in (constants.OP_STATUS_WAITING,
+ constants.OP_STATUS_CANCELING,
+ constants.OP_STATUS_ERROR)
+
+ if not (waitjob or op.status in (constants.OP_STATUS_CANCELING,
+ constants.OP_STATUS_ERROR)):
+ logging.info("%s: opcode %s waiting for locks",
+ opctx.log_prefix, opctx.summary)
+
+ assert not opctx.jobdeps, "Not all dependencies were removed"
+
+ queue.release()
+ try:
+ (op_status, op_result) = self._ExecOpCodeUnlocked(opctx)
+ finally: