import sys
import SocketServer
-import threading
import time
import collections
import Queue
from ganeti import errors
from ganeti import ssconf
from ganeti import logger
+from ganeti import workerpool
+CLIENT_REQUEST_WORKERS = 16
+
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
+class ClientRequestWorker(workerpool.BaseWorker):
+ def RunTask(self, server, request, client_address):
+ """Process the request.
+
+ This is copied from the code in ThreadingMixIn.
+
+ """
+ try:
+ server.finish_request(request, client_address)
+ server.close_request(request)
+ except:
+ server.handle_error(request, client_address)
+ server.close_request(request)
+
+
class IOServer(SocketServer.UnixStreamServer):
"""IO thread class.
cleanup at shutdown.
"""
- QUEUE_PROCESSOR_SIZE = 5
-
def __init__(self, address, rqhandler, context):
"""IOServer constructor
"""
SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
self.do_quit = False
- self.queue = jqueue.QueueManager()
self.context = context
- self.processors = []
# We'll only start threads once we've forked.
self.jobqueue = None
+ self.request_workers = None
signal.signal(signal.SIGINT, self.handle_quit_signals)
signal.signal(signal.SIGTERM, self.handle_quit_signals)
def setup_queue(self):
self.jobqueue = jqueue.JobQueue(self.context)
-
- def setup_processors(self):
- """Spawn the processors threads.
-
- This initializes the queue and the thread processors. It is done
- separately from the constructor because we want the clone()
- syscalls to happen after the daemonize part.
-
- """
- for i in range(self.QUEUE_PROCESSOR_SIZE):
- self.processors.append(threading.Thread(target=PoolWorker,
- args=(i, self.queue.new_queue,
- self.context)))
- for t in self.processors:
- t.start()
-
- def process_request_thread(self, request, client_address):
- """Process the request.
-
- This is copied from the code in ThreadingMixIn.
-
- """
- try:
- self.finish_request(request, client_address)
- self.close_request(request)
- except:
- self.handle_error(request, client_address)
- self.close_request(request)
+ self.request_workers = workerpool.WorkerPool(CLIENT_REQUEST_WORKERS,
+ ClientRequestWorker)
def process_request(self, request, client_address):
- """Start a new thread to process the request.
-
- This is copied from the coode in ThreadingMixIn.
+ """Add task to workerpool to process request.
"""
- t = threading.Thread(target=self.process_request_thread,
- args=(request, client_address))
- t.start()
+ self.request_workers.AddTask(self, request, client_address)
def handle_quit_signals(self, signum, frame):
print "received %s in %s" % (signum, frame)
try:
self.server_close()
utils.RemoveFile(constants.MASTER_SOCKET)
- for i in range(self.QUEUE_PROCESSOR_SIZE):
- self.queue.new_queue.put(None)
- for idx, t in enumerate(self.processors):
- logging.debug("waiting for processor thread %s...", idx)
- t.join()
- logging.debug("threads done")
finally:
+ if self.request_workers:
+ self.request_workers.TerminateWorkers()
if self.jobqueue:
self.jobqueue.Shutdown()
while True:
msg = self.read_message()
if msg is None:
- print "client closed connection"
+ logging.info("client closed connection")
break
+
request = simplejson.loads(msg)
+ logging.debug("request: %s", request)
if not isinstance(request, dict):
- print "wrong request received: %s" % msg
+ logging.error("wrong request received: %s", msg)
break
- method = request.get('request', None)
- data = request.get('data', None)
- if method is None or data is None:
- print "no method or data in request"
+
+ method = request.get(luxi.KEY_METHOD, None)
+ args = request.get(luxi.KEY_ARGS, None)
+ if method is None or args is None:
+ logging.error("no method or args in request")
break
- print "request:", method, data
- result = self._ops.handle_request(method, data)
- print "result:", result
- self.send_message(simplejson.dumps({'success': True, 'result': result}))
+
+ success = False
+ try:
+ result = self._ops.handle_request(method, args)
+ success = True
+ except:
+ logging.error("Unexpected exception", exc_info=True)
+ err = sys.exc_info()
+ result = "Caught exception: %s" % str(err[1])
+
+ response = {
+ luxi.KEY_SUCCESS: success,
+ luxi.KEY_RESULT: result,
+ }
+ logging.debug("response: %s", response)
+ self.send_message(simplejson.dumps(response))
def read_message(self):
while not self._msgs:
"""Class holding high-level client operations."""
def __init__(self, server):
self.server = server
- self._cpu = None
-
- def _getcpu(self):
- if self._cpu is None:
- self._cpu = mcpu.Processor(lambda x: None)
- return self._cpu
-
- def handle_request(self, operation, args):
- print operation, args
- if operation == "submit":
- return self.put(args)
- elif operation == "query":
- return self.query(args)
- else:
- raise ValueError("Invalid operation")
-
- def put(self, args):
- job = luxi.UnserializeJob(args)
- rid = self.server.queue.put(job)
- return rid
-
- def query(self, args):
- path = args["object"]
- fields = args["fields"]
- names = args["names"]
- if path == "instances":
- opclass = opcodes.OpQueryInstances
- elif path == "jobs":
- # early exit because job query-ing is special (not via opcodes)
- return self.query_jobs(fields, names)
- else:
- raise ValueError("Invalid object %s" % path)
- op = opclass(output_fields = fields, names=names)
- cpu = self._getcpu()
- result = cpu.ExecOpCode(op)
- return result
+ def handle_request(self, method, args):
+ queue = self.server.jobqueue
- def query_jobs(self, fields, names):
- return self.server.queue.query_jobs(fields, names)
-
-
-def JobRunner(proc, job, context):
- """Job executor.
-
- This functions processes a single job in the context of given
- processor instance.
-
- Args:
- proc: Ganeti Processor to run the job on
- job: The job to run (unserialized format)
- context: Ganeti shared context
-
- """
- job.SetStatus(opcodes.Job.STATUS_RUNNING)
- fail = False
- for idx, op in enumerate(job.data.op_list):
- job.data.op_status[idx] = opcodes.Job.STATUS_RUNNING
- try:
- job.data.op_result[idx] = proc.ExecOpCode(op)
- job.data.op_status[idx] = opcodes.Job.STATUS_SUCCESS
- except (errors.OpPrereqError, errors.OpExecError), err:
- fail = True
- job.data.op_result[idx] = str(err)
- job.data.op_status[idx] = opcodes.Job.STATUS_FAIL
- if fail:
- job.SetStatus(opcodes.Job.STATUS_FAIL)
- else:
- job.SetStatus(opcodes.Job.STATUS_SUCCESS)
+ # TODO: Parameter validation
+ if method == luxi.REQ_SUBMIT_JOB:
+ ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
+ return queue.SubmitJob(ops)
-def PoolWorker(worker_id, incoming_queue, context):
- """A worker thread function.
+ elif method == luxi.REQ_CANCEL_JOB:
+ (job_id, ) = args
+ return queue.CancelJob(job_id)
- This is the actual processor of a single thread of Job execution.
+ elif method == luxi.REQ_ARCHIVE_JOB:
+ (job_id, ) = args
+ return queue.ArchiveJob(job_id)
- Args:
- worker_id: the unique id for this worker
- incoming_queue: a queue to get jobs from
- context: the common server context, containing all shared data and
- synchronization structures.
+ elif method == luxi.REQ_QUERY_JOBS:
+ (job_ids, fields) = args
+ return queue.QueryJobs(job_ids, fields)
- """
- while True:
- logging.debug("worker %s sleeping", worker_id)
- item = incoming_queue.get(True)
- if item is None:
- break
- logging.debug("worker %s processing job %s", worker_id, item.data.job_id)
- proc = mcpu.Processor(context, feedback=lambda x: None)
- try:
- JobRunner(proc, item, context)
- except errors.GenericError, err:
- msg = "ganeti exception"
- logging.error(msg, exc_info=err)
- item.SetStatus(opcodes.Job.STATUS_FAIL, result=[msg])
- except Exception, err:
- msg = "unhandled exception"
- logging.error(msg, exc_info=err)
- item.SetStatus(opcodes.Job.STATUS_FAIL, result=[msg])
- except:
- msg = "unhandled unknown exception"
- logging.error(msg, exc_info=True)
- item.SetStatus(opcodes.Job.STATUS_FAIL, result=[msg])
- logging.debug("worker %s finish job %s", worker_id, item.data.job_id)
- logging.debug("worker %s exiting", worker_id)
+ else:
+ raise ValueError("Invalid operation")
class GanetiContext(object):
utils.Daemonize(logfile=constants.LOG_MASTERDAEMON,
noclose_fds=[master.fileno()])
- logger.SetupDaemon(constants.LOG_MASTERDAEMON, debug=options.debug)
+ logger.SetupDaemon(constants.LOG_MASTERDAEMON, debug=options.debug,
+ stderr_logging=not options.fork)
- logger.Info("ganeti master daemon startup")
+ logging.info("ganeti master daemon startup")
+ master.setup_queue()
try:
- utils.Lock('cmd', debug=options.debug)
- except errors.LockError, err:
- print >> sys.stderr, str(err)
- master.server_cleanup()
- return
-
- try:
- master.setup_processors()
- master.setup_queue()
- try:
- master.serve_forever()
- finally:
- master.server_cleanup()
+ master.serve_forever()
finally:
- utils.Unlock('cmd')
- utils.LockCleanup()
+ master.server_cleanup()
if __name__ == "__main__":