"""
+import os
+import errno
import sys
import SocketServer
import time
from ganeti import utils
from ganeti import errors
from ganeti import ssconf
-from ganeti import logger
from ganeti import workerpool
from ganeti import rpc
+from ganeti import bootstrap
CLIENT_REQUEST_WORKERS = 16
def __init__(self, address, rqhandler):
"""IOServer constructor
- Args:
- address: the address to bind this IOServer to
- rqhandler: RequestHandler type object
+ @param address: the address to bind this IOServer to
+ @param rqhandler: RequestHandler type object
"""
SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
try:
result = self._ops.handle_request(method, args)
success = True
+ except errors.GenericError, err:
+ success = False
+ result = (err.__class__.__name__, err.args)
except:
logging.error("Unexpected exception", exc_info=True)
err = sys.exc_info()
job_id = args
return queue.ArchiveJob(job_id)
+ elif method == luxi.REQ_AUTOARCHIVE_JOBS:
+ (age, timeout) = args
+ return queue.AutoArchiveJobs(age, timeout)
+
elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
- (job_id, fields, previous) = args
- return queue.WaitForJobChanges(job_id, fields, previous)
+ (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
+ return queue.WaitForJobChanges(job_id, fields, prev_job_info,
+ prev_log_serial, timeout)
elif method == luxi.REQ_QUERY_JOBS:
(job_ids, fields) = args
op = opcodes.OpQueryExports(nodes=nodes)
return self._Query(op)
+ elif method == luxi.REQ_QUERY_CONFIG_VALUES:
+ fields = args
+ op = opcodes.OpQueryConfigValues(output_fields=fields)
+ return self._Query(op)
+
+ elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
+ drain_flag = args
+ return queue.SetDrainFlag(drain_flag)
+
else:
raise ValueError("Invalid operation")
"""
proc = mcpu.Processor(self.server.context)
# TODO: Where should log messages go?
- return proc.ExecOpCode(op, self._DummyLog)
+ return proc.ExecOpCode(op, self._DummyLog, None)
class GanetiContext(object):
self.cfg.AddNode(node)
# If preseeding fails it'll not be added
- self.jobqueue.AddNode(node.name)
+ self.jobqueue.AddNode(node)
# Add the new node to the Ganeti Lock Manager
self.glm.add(locking.LEVEL_NODE, node.name)
"""
# Synchronize the queue again
- self.jobqueue.AddNode(node.name)
+ self.jobqueue.AddNode(node)
def RemoveNode(self, name):
"""Removes a node from the configuration and lock manager.
def ParseOptions():
"""Parse the command line options.
- Returns:
- (options, args) as from OptionParser.parse_args()
+ @return: (options, args) as from OptionParser.parse_args()
"""
parser = OptionParser(description="Ganeti master daemon",
return options, args
+def CheckAgreement():
+ """Check the agreement on who is the master.
+
+ The function uses a very simple algorithm: we must get more positive
+ than negative answers. Since in most of the cases we are the master,
+ we'll use our own config file for getting the node list. In the
+ future we could collect the current node list from our (possibly
+ obsolete) known nodes.
+
+ In order to account for cold-start of all nodes, we retry for up to
+ a minute until we get a real answer as the top-voted one. If the
+ nodes are more out-of-sync, for now manual startup of the master
+ should be attempted.
+
+ Note that for a even number of nodes cluster, we need at least half
+ of the nodes (beside ourselves) to vote for us. This creates a
+ problem on two-node clusters, since in this case we require the
+ other node to be up too to confirm our status.
+
+ """
+ myself = utils.HostInfo().name
+ #temp instantiation of a config writer, used only to get the node list
+ cfg = config.ConfigWriter()
+ node_list = cfg.GetNodeList()
+ del cfg
+ retries = 6
+ while retries > 0:
+ votes = bootstrap.GatherMasterVotes(node_list)
+ if not votes:
+ # empty node list, this is a one node cluster
+ return True
+ if votes[0][0] is None:
+ retries -= 1
+ time.sleep(10)
+ continue
+ break
+ if retries == 0:
+ logging.critical("Cluster inconsistent, most of the nodes didn't answer"
+ " after multiple retries. Aborting startup")
+ return False
+ # here a real node is at the top of the list
+ all_votes = sum(item[1] for item in votes)
+ top_node, top_votes = votes[0]
+ result = False
+ if top_node != myself:
+ logging.critical("It seems we are not the master (top-voted node"
+ " is %s with %d out of %d votes)", top_node, top_votes,
+ all_votes)
+ elif top_votes < all_votes - top_votes:
+ logging.critical("It seems we are not the master (%d votes for,"
+ " %d votes against)", top_votes, all_votes - top_votes)
+ else:
+ result = True
+
+ return result
+
+
def main():
"""Main function"""
utils.debug = options.debug
utils.no_fork = True
- ssconf.CheckMaster(options.debug)
+ if options.fork:
+ utils.CloseFDs()
- master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
+ rpc.Init()
+ try:
+ ssconf.CheckMaster(options.debug)
+
+ # we believe we are the master, let's ask the other nodes...
+ if not CheckAgreement():
+ return
+
+ dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE),
+ (constants.SOCKET_DIR, constants.SOCKET_DIR_MODE),
+ ]
+ for dir, mode in dirs:
+ try:
+ os.mkdir(dir, mode)
+ except EnvironmentError, err:
+ if err.errno != errno.EEXIST:
+ raise errors.GenericError("Cannot create needed directory"
+ " '%s': %s" % (constants.SOCKET_DIR, err))
+ if not os.path.isdir(dir):
+ raise errors.GenericError("%s is not a directory" % dir)
+
+ # This is safe to do as the pid file guarantees against
+ # concurrent execution.
+ utils.RemoveFile(constants.MASTER_SOCKET)
+
+ master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
+ finally:
+ rpc.Shutdown()
# become a daemon
if options.fork:
- utils.Daemonize(logfile=constants.LOG_MASTERDAEMON,
- noclose_fds=[master.fileno()])
+ utils.Daemonize(logfile=constants.LOG_MASTERDAEMON)
utils.WritePidFile(constants.MASTERD_PID)
+ try:
+ utils.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
+ stderr_logging=not options.fork)
- logger.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
- stderr_logging=not options.fork)
-
- logging.info("ganeti master daemon startup")
+ logging.info("Ganeti master daemon startup")
- # activate ip
- master_node = ssconf.SimpleStore().GetMasterNode()
- if not rpc.call_node_start_master(master_node, False):
- logging.error("Can't activate master IP address")
+ rpc.Init()
+ try:
+ # activate ip
+ master_node = ssconf.SimpleConfigReader().GetMasterNode()
+ if not rpc.RpcRunner.call_node_start_master(master_node, False):
+ logging.error("Can't activate master IP address")
- master.setup_queue()
- try:
- master.serve_forever()
+ master.setup_queue()
+ try:
+ master.serve_forever()
+ finally:
+ master.server_cleanup()
+ finally:
+ rpc.Shutdown()
finally:
- master.server_cleanup()
utils.RemovePidFile(constants.MASTERD_PID)
+ utils.RemoveFile(constants.MASTER_SOCKET)
if __name__ == "__main__":