X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/1e063ccd7d0791271ed6b0317e61b7bb9bf25877..a6682fdcc173998be87b77043bfa56c0d12b1ca4:/lib/daemon.py diff --git a/lib/daemon.py b/lib/daemon.py index 98b9fce..c26a176 100644 --- a/lib/daemon.py +++ b/lib/daemon.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007, 2008 Google Inc. +# Copyright (C) 2006, 2007, 2008, 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,9 +25,7 @@ import asyncore import asynchat import collections -import grp import os -import pwd import signal import logging import sched @@ -39,10 +37,9 @@ import sys from ganeti import utils from ganeti import constants from ganeti import errors - - -_DEFAULT_RUN_USER = "root" -_DEFAULT_RUN_GROUP = "root" +from ganeti import netutils +from ganeti import ssconf +from ganeti import runtime class SchedulerBreakout(Exception): @@ -99,23 +96,6 @@ class GanetiBaseAsyncoreDispatcher(asyncore.dispatcher): return False -def FormatAddress(family, address): - """Format a client's address - - @type family: integer - @param family: socket family (one of socket.AF_*) - @type address: family specific (usually tuple) - @param address: address, as reported by this class - - """ - if family == socket.AF_INET and len(address) == 2: - return "%s:%d" % address - elif family == socket.AF_UNIX and len(address) == 3: - return "pid=%s, uid=%s, gid=%s" % address - else: - return str(address) - - class AsyncStreamServer(GanetiBaseAsyncoreDispatcher): """A stream server to use with asyncore. @@ -156,9 +136,9 @@ class AsyncStreamServer(GanetiBaseAsyncoreDispatcher): if self.family == socket.AF_UNIX: # override the client address, as for unix sockets nothing meaningful # is passed in from accept anyway - client_address = utils.GetSocketCredentials(connected_socket) + client_address = netutils.GetSocketCredentials(connected_socket) logging.info("Accepted connection from %s", - FormatAddress(self.family, client_address)) + netutils.FormatAddress(client_address, family=self.family)) self.handle_connection(connected_socket, client_address) def handle_connection(self, connected_socket, client_address): @@ -175,7 +155,8 @@ class AsyncTerminatedMessageStream(asynchat.async_chat): separator. For each complete message handle_message is called. """ - def __init__(self, connected_socket, peer_address, terminator, family): + def __init__(self, connected_socket, peer_address, terminator, family, + unhandled_limit): """AsyncTerminatedMessageStream constructor. @type connected_socket: socket.socket @@ -185,6 +166,8 @@ class AsyncTerminatedMessageStream(asynchat.async_chat): @param terminator: terminator separating messages in the stream @type family: integer @param family: socket family + @type unhandled_limit: integer or None + @param unhandled_limit: maximum unanswered messages """ # python 2.4/2.5 uses conn=... while 2.6 has sock=... we have to cheat by @@ -197,22 +180,36 @@ class AsyncTerminatedMessageStream(asynchat.async_chat): self.family = family self.peer_address = peer_address self.terminator = terminator + self.unhandled_limit = unhandled_limit self.set_terminator(terminator) self.ibuffer = [] - self.next_incoming_message = 0 + self.receive_count = 0 + self.send_count = 0 self.oqueue = collections.deque() + self.iqueue = collections.deque() # this method is overriding an asynchat.async_chat method def collect_incoming_data(self, data): self.ibuffer.append(data) + def _can_handle_message(self): + return (self.unhandled_limit is None or + (self.receive_count < self.send_count + self.unhandled_limit) and + not self.iqueue) + # this method is overriding an asynchat.async_chat method def found_terminator(self): message = "".join(self.ibuffer) self.ibuffer = [] - message_id = self.next_incoming_message - self.next_incoming_message += 1 - self.handle_message(message, message_id) + message_id = self.receive_count + # We need to increase the receive_count after checking if the message can + # be handled, but before calling handle_message + can_handle = self._can_handle_message() + self.receive_count += 1 + if can_handle: + self.handle_message(message, message_id) + else: + self.iqueue.append((message, message_id)) def handle_message(self, message, message_id): """Handle a terminated message. @@ -240,10 +237,17 @@ class AsyncTerminatedMessageStream(asynchat.async_chat): """ # If we just append the message we received to the output queue, this # function can be safely called by multiple threads at the same time, and - # we don't need locking, since deques are thread safe. + # we don't need locking, since deques are thread safe. handle_write in the + # asyncore thread will handle the next input message if there are any + # enqueued. self.oqueue.append(message) # this method is overriding an asyncore.dispatcher method + def readable(self): + # read from the socket if we can handle the next requests + return self._can_handle_message() and asynchat.async_chat.readable(self) + + # this method is overriding an asyncore.dispatcher method def writable(self): # the output queue may become full just after we called writable. This only # works if we know we'll have something else waking us up from the select, @@ -253,13 +257,19 @@ class AsyncTerminatedMessageStream(asynchat.async_chat): # this method is overriding an asyncore.dispatcher method def handle_write(self): if self.oqueue: + # if we have data in the output queue, then send_message was called. + # this means we can process one more message from the input queue, if + # there are any. data = self.oqueue.popleft() self.push(data + self.terminator) + self.send_count += 1 + if self.iqueue: + self.handle_message(*self.iqueue.popleft()) self.initiate_send() def close_log(self): logging.info("Closing connection from %s", - FormatAddress(self.family, self.peer_address)) + netutils.FormatAddress(self.peer_address, family=self.family)) self.close() # this method is overriding an asyncore.dispatcher method @@ -279,13 +289,14 @@ class AsyncUDPSocket(GanetiBaseAsyncoreDispatcher): """An improved asyncore udp socket. """ - def __init__(self): + def __init__(self, family): """Constructor for AsyncUDPSocket """ GanetiBaseAsyncoreDispatcher.__init__(self) self._out_queue = [] - self.create_socket(socket.AF_INET, socket.SOCK_DGRAM) + self._family = family + self.create_socket(family, socket.SOCK_DGRAM) # this method is overriding an asyncore.dispatcher method def handle_connect(self): @@ -300,7 +311,12 @@ class AsyncUDPSocket(GanetiBaseAsyncoreDispatcher): constants.MAX_UDP_DATA_SIZE) if recv_result is not None: payload, address = recv_result - ip, port = address + if self._family == socket.AF_INET6: + # we ignore 'flow info' and 'scope id' as we don't need them + ip, port, _, _ = address + else: + ip, port = address + self.handle_datagram(payload, ip, port) def handle_datagram(self, payload, ip, port): @@ -472,10 +488,61 @@ class Mainloop(object): self._signal_wait.append(owner) -def GenericMain(daemon_name, optionparser, dirs, check_fn, exec_fn, +def _VerifyDaemonUser(daemon_name): + """Verifies the process uid matches the configured uid. + + This method verifies that a daemon is started as the user it is + intended to be run + + @param daemon_name: The name of daemon to be started + @return: A tuple with the first item indicating success or not, + the second item current uid and third with expected uid + + """ + getents = runtime.GetEnts() + running_uid = os.getuid() + daemon_uids = { + constants.MASTERD: getents.masterd_uid, + constants.RAPI: getents.rapi_uid, + constants.NODED: getents.noded_uid, + constants.CONFD: getents.confd_uid, + } + + return (daemon_uids[daemon_name] == running_uid, running_uid, + daemon_uids[daemon_name]) + + +def _BeautifyError(err): + """Try to format an error better. + + Since we're dealing with daemon startup errors, in many cases this + will be due to socket error and such, so we try to format these cases better. + + @param err: an exception object + @rtype: string + @return: the formatted error description + + """ + try: + if isinstance(err, socket.error): + return "Socket-related error: %s (errno=%s)" % (err.args[1], err.args[0]) + elif isinstance(err, EnvironmentError): + if err.filename is None: + return "%s (errno=%s)" % (err.strerror, err.errno) + else: + return "%s (file %s) (errno=%s)" % (err.strerror, err.filename, + err.errno) + else: + return str(err) + except Exception: # pylint: disable-msg=W0703 + logging.exception("Error while handling existing error %s", err) + return "%s" % str(err) + + +def GenericMain(daemon_name, optionparser, + check_fn, prepare_fn, exec_fn, multithreaded=False, console_logging=False, - default_ssl_cert=None, default_ssl_key=None, - user=_DEFAULT_RUN_USER, group=_DEFAULT_RUN_GROUP): + default_ssl_cert=None, default_ssl_key=None): """Shared main function for daemons. @type daemon_name: string @@ -483,13 +550,14 @@ def GenericMain(daemon_name, optionparser, dirs, check_fn, exec_fn, @type optionparser: optparse.OptionParser @param optionparser: initialized optionparser with daemon-specific options (common -f -d options will be handled by this module) - @type dirs: list of (string, integer) - @param dirs: list of directories that must be created if they don't exist, - and the permissions to be used to create them @type check_fn: function which accepts (options, args) @param check_fn: function that checks start conditions and exits if they're not met - @type exec_fn: function which accepts (options, args) + @type prepare_fn: function which accepts (options, args) + @param prepare_fn: function that is run before forking, or None; + it's result will be passed as the third parameter to exec_fn, or + if None was passed in, we will just pass None to exec_fn + @type exec_fn: function which accepts (options, args, prepare_results) @param exec_fn: function that's executed with the daemon's pid file held, and runs the daemon itself. @type multithreaded: bool @@ -501,10 +569,6 @@ def GenericMain(daemon_name, optionparser, dirs, check_fn, exec_fn, @param default_ssl_cert: Default SSL certificate path @type default_ssl_key: string @param default_ssl_key: Default SSL key path - @param user: Default user to run as - @type user: string - @param group: Default group to run as - @type group: string """ optionparser.add_option("-f", "--foreground", dest="fork", @@ -521,15 +585,22 @@ def GenericMain(daemon_name, optionparser, dirs, check_fn, exec_fn, choices=["no", "yes", "only"]) if daemon_name in constants.DAEMONS_PORTS: - default_bind_address = "0.0.0.0" - default_port = utils.GetDaemonPort(daemon_name) + default_bind_address = constants.IP4_ADDRESS_ANY + family = ssconf.SimpleStore().GetPrimaryIPFamily() + # family will default to AF_INET if there is no ssconf file (e.g. when + # upgrading a cluster from 2.2 -> 2.3. This is intended, as Ganeti clusters + # <= 2.2 can not be AF_INET6 + if family == netutils.IP6Address.family: + default_bind_address = constants.IP6_ADDRESS_ANY + + default_port = netutils.GetDaemonPort(daemon_name) # For networked daemons we allow choosing the port and bind address optionparser.add_option("-p", "--port", dest="port", help="Network port (default: %s)" % default_port, default=default_port, type="int") optionparser.add_option("-b", "--bind", dest="bind_address", - help=("Bind address (default: %s)" % + help=("Bind address (default: '%s')" % default_bind_address), default=default_bind_address, metavar="ADDRESS") @@ -549,7 +620,8 @@ def GenericMain(daemon_name, optionparser, dirs, check_fn, exec_fn, metavar="SSL_CERT_PATH") # Disable the use of fork(2) if the daemon uses threads - utils.no_fork = multithreaded + if multithreaded: + utils.DisableFork() options, args = optionparser.parse_args() @@ -568,31 +640,46 @@ def GenericMain(daemon_name, optionparser, dirs, check_fn, exec_fn, # once and have a proper validation (isfile returns False on directories) # at the same time. + result, running_uid, expected_uid = _VerifyDaemonUser(daemon_name) + if not result: + msg = ("%s started using wrong user ID (%d), expected %d" % + (daemon_name, running_uid, expected_uid)) + print >> sys.stderr, msg + sys.exit(constants.EXIT_FAILURE) + if check_fn is not None: check_fn(options, args) - utils.EnsureDirs(dirs) - if options.fork: - try: - uid = pwd.getpwnam(user).pw_uid - gid = grp.getgrnam(group).gr_gid - except KeyError: - raise errors.ConfigurationError("User or group not existing on system:" - " %s:%s" % (user, group)) utils.CloseFDs() - utils.Daemonize(constants.DAEMONS_LOGFILES[daemon_name], uid, gid) + wpipe = utils.Daemonize(logfile=constants.DAEMONS_LOGFILES[daemon_name]) + else: + wpipe = None - utils.WritePidFile(daemon_name) + utils.WritePidFile(utils.DaemonPidFileName(daemon_name)) try: - utils.SetupLogging(logfile=constants.DAEMONS_LOGFILES[daemon_name], - debug=options.debug, - stderr_logging=not options.fork, - multithreaded=multithreaded, - program=daemon_name, - syslog=options.syslog, - console_logging=console_logging) - logging.info("%s daemon startup", daemon_name) - exec_fn(options, args) + try: + utils.SetupLogging(logfile=constants.DAEMONS_LOGFILES[daemon_name], + debug=options.debug, + stderr_logging=not options.fork, + multithreaded=multithreaded, + program=daemon_name, + syslog=options.syslog, + console_logging=console_logging) + if callable(prepare_fn): + prep_results = prepare_fn(options, args) + else: + prep_results = None + logging.info("%s daemon startup", daemon_name) + except Exception, err: + utils.WriteErrorToFD(wpipe, _BeautifyError(err)) + raise + + if wpipe is not None: + # we're done with the preparation phase, we close the pipe to + # let the parent know it's safe to exit + os.close(wpipe) + + exec_fn(options, args, prep_results) finally: - utils.RemovePidFile(daemon_name) + utils.RemoveFile(utils.DaemonPidFileName(daemon_name))