X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/df5a573043c2341b56d2ba338a8b70a7fe5f7ddd..81f7ea25fa942612873eb159e24c44e24ce84e69:/lib/jqueue.py diff --git a/lib/jqueue.py b/lib/jqueue.py index 84f6120..34ac6a7 100644 --- a/lib/jqueue.py +++ b/lib/jqueue.py @@ -1447,28 +1447,39 @@ class _JobDependencyManager: " not one of '%s' as required" % (dep_job_id, status, utils.CommaJoin(dep_status))) - @locking.ssynchronized(_LOCK) + def _RemoveEmptyWaitersUnlocked(self): + """Remove all jobs without actual waiters. + + """ + for job_id in [job_id for (job_id, waiters) in self._waiters.items() + if not waiters]: + del self._waiters[job_id] + def NotifyWaiters(self, job_id): """Notifies all jobs waiting for a certain job ID. + @attention: Do not call until L{CheckAndRegister} returned a status other + than C{WAITDEP} for C{job_id}, or behaviour is undefined @type job_id: string @param job_id: Job ID """ assert ht.TString(job_id) - jobs = self._waiters.pop(job_id, None) + self._lock.acquire() + try: + self._RemoveEmptyWaitersUnlocked() + + jobs = self._waiters.pop(job_id, None) + finally: + self._lock.release() + if jobs: # Re-add jobs to workerpool logging.debug("Re-adding %s jobs which were waiting for job %s", len(jobs), job_id) self._enqueue_fn(jobs) - # Remove all jobs without actual waiters - for job_id in [job_id for (job_id, waiters) in self._waiters.items() - if not waiters]: - del self._waiters[job_id] - def _RequireOpenQueue(fn): """Decorator for "public" functions. @@ -1495,6 +1506,31 @@ def _RequireOpenQueue(fn): return wrapper +def _RequireNonDrainedQueue(fn): + """Decorator checking for a non-drained queue. + + To be used with functions submitting new jobs. + + """ + def wrapper(self, *args, **kwargs): + """Wrapper function. + + @raise errors.JobQueueDrainError: if the job queue is marked for draining + + """ + # Ok when sharing the big job queue lock, as the drain file is created when + # the lock is exclusive. + # Needs access to protected member, pylint: disable=W0212 + if self._drained: + raise errors.JobQueueDrainError("Job queue is drained, refusing job") + + if not self._accepting_jobs: + raise errors.JobQueueError("Job queue is shutting down, refusing job") + + return fn(self, *args, **kwargs) + return wrapper + + class JobQueue(object): """Queue used to manage the jobs. @@ -1526,6 +1562,9 @@ class JobQueue(object): self.acquire = self._lock.acquire self.release = self._lock.release + # Accept jobs by default + self._accepting_jobs = True + # Initialize the queue, and acquire the filelock. # This ensures no other process is working on the job queue. self._queue_filelock = jstore.InitAndVerifyQueue(must_lock=True) @@ -1545,8 +1584,9 @@ class JobQueue(object): # TODO: Check consistency across nodes - self._queue_size = 0 + self._queue_size = None self._UpdateQueueSizeUnlocked() + assert ht.TInt(self._queue_size) self._drained = jstore.CheckDrainFlag() # Job dependencies @@ -1619,6 +1659,12 @@ class JobQueue(object): logging.info("Job queue inspection finished") + def _GetRpc(self, address_list): + """Gets RPC runner with context. + + """ + return rpc.JobQueueRunner(self.context, address_list) + @locking.ssynchronized(_LOCK) @_RequireOpenQueue def AddNode(self, node): @@ -1632,7 +1678,7 @@ class JobQueue(object): assert node_name != self._my_hostname # Clean queue directory on added node - result = rpc.RpcRunner.call_jobqueue_purge(node_name) + result = self._GetRpc(None).call_jobqueue_purge(node_name) msg = result.fail_msg if msg: logging.warning("Cannot cleanup queue directory on node %s: %s", @@ -1650,13 +1696,15 @@ class JobQueue(object): # Upload current serial file files.append(constants.JOB_QUEUE_SERIAL_FILE) + # Static address list + addrs = [node.primary_ip] + for file_name in files: # Read file content content = utils.ReadFile(file_name) - result = rpc.RpcRunner.call_jobqueue_update([node_name], - [node.primary_ip], - file_name, content) + result = self._GetRpc(addrs).call_jobqueue_update([node_name], file_name, + content) msg = result[node_name].fail_msg if msg: logging.error("Failed to upload file %s to node %s: %s", @@ -1740,7 +1788,7 @@ class JobQueue(object): if replicate: names, addrs = self._GetNodeIp() - result = rpc.RpcRunner.call_jobqueue_update(names, addrs, file_name, data) + result = self._GetRpc(addrs).call_jobqueue_update(names, file_name, data) self._CheckRpcResult(result, self._nodes, "Updating %s" % file_name) def _RenameFilesUnlocked(self, rename): @@ -1759,7 +1807,7 @@ class JobQueue(object): # ... and on all nodes names, addrs = self._GetNodeIp() - result = rpc.RpcRunner.call_jobqueue_rename(names, addrs, rename) + result = self._GetRpc(addrs).call_jobqueue_rename(names, rename) self._CheckRpcResult(result, self._nodes, "Renaming files (%r)" % rename) @staticmethod @@ -2014,16 +2062,10 @@ class JobQueue(object): @param ops: The list of OpCodes that will become the new job. @rtype: L{_QueuedJob} @return: the job object to be queued - @raise errors.JobQueueDrainError: if the job queue is marked for draining @raise errors.JobQueueFull: if the job queue has too many jobs in it @raise errors.GenericError: If an opcode is not valid """ - # Ok when sharing the big job queue lock, as the drain file is created when - # the lock is exclusive. - if self._drained: - raise errors.JobQueueDrainError("Job queue is drained, refusing job") - if self._queue_size >= constants.JOB_QUEUE_SIZE_HARD_LIMIT: raise errors.JobQueueFull() @@ -2055,6 +2097,7 @@ class JobQueue(object): @locking.ssynchronized(_LOCK) @_RequireOpenQueue + @_RequireNonDrainedQueue def SubmitJob(self, ops): """Create and store a new job. @@ -2067,6 +2110,7 @@ class JobQueue(object): @locking.ssynchronized(_LOCK) @_RequireOpenQueue + @_RequireNonDrainedQueue def SubmitManyJobs(self, jobs): """Create and store multiple jobs. @@ -2223,7 +2267,7 @@ class JobQueue(object): assert job.writable, "Can't update read-only job" filename = self._GetJobPath(job.id) - data = serializer.DumpJson(job.Serialize(), indent=False) + data = serializer.DumpJson(job.Serialize()) logging.debug("Writing job %s to %s", job.id, filename) self._UpdateJobQueueFile(filename, data, replicate) @@ -2434,6 +2478,31 @@ class JobQueue(object): return jobs @locking.ssynchronized(_LOCK) + def PrepareShutdown(self): + """Prepare to stop the job queue. + + Disables execution of jobs in the workerpool and returns whether there are + any jobs currently running. If the latter is the case, the job queue is not + yet ready for shutdown. Once this function returns C{True} L{Shutdown} can + be called without interfering with any job. Queued and unfinished jobs will + be resumed next time. + + Once this function has been called no new job submissions will be accepted + (see L{_RequireNonDrainedQueue}). + + @rtype: bool + @return: Whether there are any running jobs + + """ + if self._accepting_jobs: + self._accepting_jobs = False + + # Tell worker pool to stop processing pending tasks + self._wpool.SetActive(False) + + return self._wpool.HasRunningTasks() + + @locking.ssynchronized(_LOCK) @_RequireOpenQueue def Shutdown(self): """Stops the job queue.