import Queue
import random
import signal
-import simplejson
import logging
from cStringIO import StringIO
from ganeti import workerpool
from ganeti import rpc
from ganeti import bootstrap
+from ganeti import serializer
CLIENT_REQUEST_WORKERS = 16
def __init__(self, address, rqhandler):
"""IOServer constructor
- Args:
- address: the address to bind this IOServer to
- rqhandler: RequestHandler type object
+ @param address: the address to bind this IOServer to
+ @param rqhandler: RequestHandler type object
"""
SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
while True:
msg = self.read_message()
if msg is None:
- logging.info("client closed connection")
+ logging.debug("client closed connection")
break
- request = simplejson.loads(msg)
+ request = serializer.LoadJson(msg)
logging.debug("request: %s", request)
if not isinstance(request, dict):
logging.error("wrong request received: %s", msg)
luxi.KEY_RESULT: result,
}
logging.debug("response: %s", response)
- self.send_message(simplejson.dumps(response))
+ self.send_message(serializer.DumpJson(response))
def read_message(self):
while not self._msgs:
# TODO: Parameter validation
if method == luxi.REQ_SUBMIT_JOB:
+ logging.info("Received new job")
ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
return queue.SubmitJob(ops)
+ if method == luxi.REQ_SUBMIT_MANY_JOBS:
+ logging.info("Received multiple jobs")
+ jobs = []
+ for ops in args:
+ jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
+ return queue.SubmitManyJobs(jobs)
+
elif method == luxi.REQ_CANCEL_JOB:
job_id = args
+ logging.info("Received job cancel request for %s", job_id)
return queue.CancelJob(job_id)
elif method == luxi.REQ_ARCHIVE_JOB:
job_id = args
+ logging.info("Received job archive request for %s", job_id)
return queue.ArchiveJob(job_id)
elif method == luxi.REQ_AUTOARCHIVE_JOBS:
- age = args
- return queue.AutoArchiveJobs(age)
+ (age, timeout) = args
+ logging.info("Received job autoarchive request for age %s, timeout %s",
+ age, timeout)
+ return queue.AutoArchiveJobs(age, timeout)
elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
(job_id, fields, prev_job_info, prev_log_serial, timeout) = args
+ logging.info("Received job poll request for %s", job_id)
return queue.WaitForJobChanges(job_id, fields, prev_job_info,
prev_log_serial, timeout)
elif method == luxi.REQ_QUERY_JOBS:
(job_ids, fields) = args
+ if isinstance(job_ids, (tuple, list)) and job_ids:
+ msg = ", ".join(job_ids)
+ else:
+ msg = str(job_ids)
+ logging.info("Received job query request for %s", msg)
return queue.QueryJobs(job_ids, fields)
elif method == luxi.REQ_QUERY_INSTANCES:
- (names, fields) = args
- op = opcodes.OpQueryInstances(names=names, output_fields=fields)
+ (names, fields, use_locking) = args
+ logging.info("Received instance query request for %s", names)
+ if use_locking:
+ raise errors.OpPrereqError("Sync queries are not allowed")
+ op = opcodes.OpQueryInstances(names=names, output_fields=fields,
+ use_locking=use_locking)
return self._Query(op)
elif method == luxi.REQ_QUERY_NODES:
- (names, fields) = args
- op = opcodes.OpQueryNodes(names=names, output_fields=fields)
+ (names, fields, use_locking) = args
+ logging.info("Received node query request for %s", names)
+ if use_locking:
+ raise errors.OpPrereqError("Sync queries are not allowed")
+ op = opcodes.OpQueryNodes(names=names, output_fields=fields,
+ use_locking=use_locking)
return self._Query(op)
elif method == luxi.REQ_QUERY_EXPORTS:
- nodes = args
- op = opcodes.OpQueryExports(nodes=nodes)
+ nodes, use_locking = args
+ if use_locking:
+ raise errors.OpPrereqError("Sync queries are not allowed")
+ logging.info("Received exports query request")
+ op = opcodes.OpQueryExports(nodes=nodes, use_locking=use_locking)
return self._Query(op)
elif method == luxi.REQ_QUERY_CONFIG_VALUES:
fields = args
+ logging.info("Received config values query request for %s", fields)
op = opcodes.OpQueryConfigValues(output_fields=fields)
return self._Query(op)
+ elif method == luxi.REQ_QUERY_CLUSTER_INFO:
+ logging.info("Received cluster info query request")
+ op = opcodes.OpQueryClusterInfo()
+ return self._Query(op)
+
elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
drain_flag = args
+ logging.info("Received queue drain flag change request to %s",
+ drain_flag)
return queue.SetDrainFlag(drain_flag)
else:
- raise ValueError("Invalid operation")
+ logging.info("Received invalid request '%s'", method)
+ raise ValueError("Invalid operation '%s'" % method)
def _DummyLog(self, *args):
pass
def ParseOptions():
"""Parse the command line options.
- Returns:
- (options, args) as from OptionParser.parse_args()
+ @return: (options, args) as from OptionParser.parse_args()
"""
parser = OptionParser(description="Ganeti master daemon",
parser.add_option("-d", "--debug", dest="debug",
help="Enable some debug messages",
default=False, action="store_true")
+ parser.add_option("--no-voting", dest="no_voting",
+ help="Do not check that the nodes agree on this node"
+ " being the master and start the daemon unconditionally",
+ default=False, action="store_true")
options, args = parser.parse_args()
return options, args
continue
break
if retries == 0:
- logging.critical("Cluster inconsistent, most of the nodes didn't answer"
- " after multiple retries. Aborting startup")
- return False
+ logging.critical("Cluster inconsistent, most of the nodes didn't answer"
+ " after multiple retries. Aborting startup")
+ return False
# here a real node is at the top of the list
all_votes = sum(item[1] for item in votes)
top_node, top_votes = votes[0]
result = False
if top_node != myself:
logging.critical("It seems we are not the master (top-voted node"
- " is %s)", top_node)
+ " is %s with %d out of %d votes)", top_node, top_votes,
+ all_votes)
elif top_votes < all_votes - top_votes:
logging.critical("It seems we are not the master (%d votes for,"
" %d votes against)", top_votes, all_votes - top_votes)
utils.debug = options.debug
utils.no_fork = True
+ if options.fork:
+ utils.CloseFDs()
+
rpc.Init()
try:
ssconf.CheckMaster(options.debug)
# we believe we are the master, let's ask the other nodes...
- if not CheckAgreement():
- return
+ if options.no_voting:
+ sys.stdout.write("The 'no voting' option has been selected.\n")
+ sys.stdout.write("This is dangerous, please confirm by"
+ " typing uppercase 'yes': ")
+ sys.stdout.flush()
+ confirmation = sys.stdin.readline().strip()
+ if confirmation != "YES":
+ print "Aborting."
+ return
+ else:
+ if not CheckAgreement():
+ return
- try:
- os.mkdir(constants.SOCKET_DIR, constants.SOCKET_DIR_MODE)
- except EnvironmentError, err:
- if err.errno != errno.EEXIST:
- raise errors.GenericError("Cannot create socket directory"
- " '%s': %s" % (constants.SOCKET_DIR, err))
+ dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE),
+ (constants.SOCKET_DIR, constants.SOCKET_DIR_MODE),
+ ]
+ utils.EnsureDirs(dirs)
# This is safe to do as the pid file guarantees against
# concurrent execution.
# become a daemon
if options.fork:
- utils.Daemonize(logfile=constants.LOG_MASTERDAEMON,
- noclose_fds=[master.fileno()])
+ utils.Daemonize(logfile=constants.LOG_MASTERDAEMON)
utils.WritePidFile(constants.MASTERD_PID)
try:
utils.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
- stderr_logging=not options.fork)
+ stderr_logging=not options.fork, multithreaded=True)
logging.info("Ganeti master daemon startup")