X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/62c9ec9297bffb3e9054cdf665ad8e24a3a4d796..ee844e2001c61fc404e004b8f8f4e4968ea7f9ad:/daemons/ganeti-noded diff --git a/daemons/ganeti-noded b/daemons/ganeti-noded index 495b65c..30087f1 100755 --- a/daemons/ganeti-noded +++ b/daemons/ganeti-noded @@ -21,32 +21,56 @@ """Ganeti node daemon""" -# functions in this module need to have a given name structure, so: -# pylint: disable-msg=C0103 +# pylint: disable-msg=C0103,W0142 + +# C0103: Functions in this module need to have a given name structure, +# and the name of the daemon doesn't match + +# W0142: Used * or ** magic, since we do use it extensively in this +# module import os import sys -import traceback -import SocketServer -import errno import logging import signal from optparse import OptionParser from ganeti import backend -from ganeti import logger from ganeti import constants from ganeti import objects from ganeti import errors from ganeti import jstore +from ganeti import daemon from ganeti import http from ganeti import utils +from ganeti import storage + +import ganeti.http.server # pylint: disable-msg=W0611 queue_lock = None +def _PrepareQueueLock(): + """Try to prepare the queue lock. + + @return: None for success, otherwise an exception object + + """ + global queue_lock # pylint: disable-msg=W0603 + + if queue_lock is not None: + return None + + # Prepare job queue + try: + queue_lock = jstore.InitAndVerifyQueue(must_lock=False) + return None + except EnvironmentError, err: + return err + + def _RequireJobQueueLock(fn): """Decorator for job queue manipulating functions. @@ -56,6 +80,9 @@ def _RequireJobQueueLock(fn): def wrapper(*args, **kwargs): # Locking in exclusive, blocking mode because there could be several # children running at the same time. Waiting up to 10 seconds. + if _PrepareQueueLock() is not None: + raise errors.JobQueueError("Job queue failed initialization," + " cannot update jobs") queue_lock.Exclusive(blocking=True, timeout=QUEUE_LOCK_TIMEOUT) try: return fn(*args, **kwargs) @@ -65,36 +92,54 @@ def _RequireJobQueueLock(fn): return wrapper -class NodeDaemonRequestHandler(http.HTTPRequestHandler): +class NodeHttpServer(http.server.HttpServer): """The server implementation. This class holds all methods exposed over the RPC interface. """ - def HandleRequest(self): + # too many public methods, and unused args - all methods get params + # due to the API + # pylint: disable-msg=R0904,W0613 + def __init__(self, *args, **kwargs): + http.server.HttpServer.__init__(self, *args, **kwargs) + self.noded_pid = os.getpid() + + def HandleRequest(self, req): """Handle a request. """ - if self.command.upper() != "PUT": - raise http.HTTPBadRequest() + if req.request_method.upper() != http.HTTP_PUT: + raise http.HttpBadRequest() - path = self.path + path = req.request_path if path.startswith("/"): path = path[1:] method = getattr(self, "perspective_%s" % path, None) if method is None: - raise httperror.HTTPNotFound() + raise http.HttpNotFound() try: - try: - return method(self.post_data) - except: - logging.exception("Error in RPC call") - raise + rvalue = method(req.request_body) + return True, rvalue + + except backend.RPCFail, err: + # our custom failure exception; str(err) works fine if the + # exception was constructed with a single argument, and in + # this case, err.message == err.args[0] == str(err) + return (False, str(err)) except errors.QuitGanetiException, err: # Tell parent to quit - os.kill(self.server.noded_pid, signal.SIGTERM) + logging.info("Shutting down the node daemon, arguments: %s", + str(err.args)) + os.kill(self.noded_pid, signal.SIGTERM) + # And return the error's arguments, which must be already in + # correct tuple format + return err.args + except Exception, err: + logging.exception("Error in RPC call") + return False, "Error while executing backend function: %s" % str(err) # the new block devices -------------------------- @@ -107,7 +152,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") - return backend.CreateBlockDevice(bdev, size, owner, on_primary, info) + return backend.BlockdevCreate(bdev, size, owner, on_primary, info) @staticmethod def perspective_blockdev_remove(params): @@ -116,7 +161,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ bdev_s = params[0] bdev = objects.Disk.FromDict(bdev_s) - return backend.RemoveBlockDevice(bdev) + return backend.BlockdevRemove(bdev) @staticmethod def perspective_blockdev_rename(params): @@ -124,7 +169,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ devlist = [(objects.Disk.FromDict(ds), uid) for ds, uid in params] - return backend.RenameBlockDevices(devlist) + return backend.BlockdevRename(devlist) @staticmethod def perspective_blockdev_assemble(params): @@ -135,7 +180,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") - return backend.AssembleBlockDevice(bdev, owner, on_primary) + return backend.BlockdevAssemble(bdev, owner, on_primary) @staticmethod def perspective_blockdev_shutdown(params): @@ -146,7 +191,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") - return backend.ShutdownBlockDevice(bdev) + return backend.BlockdevShutdown(bdev) @staticmethod def perspective_blockdev_addchildren(params): @@ -161,7 +206,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s] if bdev is None or ndevs.count(None) > 0: raise ValueError("can't unserialize data!") - return backend.MirrorAddChildren(bdev, ndevs) + return backend.BlockdevAddchildren(bdev, ndevs) @staticmethod def perspective_blockdev_removechildren(params): @@ -176,7 +221,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s] if bdev is None or ndevs.count(None) > 0: raise ValueError("can't unserialize data!") - return backend.MirrorRemoveChildren(bdev, ndevs) + return backend.BlockdevRemovechildren(bdev, ndevs) @staticmethod def perspective_blockdev_getmirrorstatus(params): @@ -184,8 +229,9 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ disks = [objects.Disk.FromDict(dsk_s) - for dsk_s in params] - return backend.GetMirrorStatus(disks) + for dsk_s in params] + return [status.ToDict() + for status in backend.BlockdevGetmirrorstatus(disks)] @staticmethod def perspective_blockdev_find(params): @@ -195,7 +241,12 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ disk = objects.Disk.FromDict(params[0]) - return backend.FindBlockDevice(disk) + + result = backend.BlockdevFind(disk) + if result is None: + return None + + return result.ToDict() @staticmethod def perspective_blockdev_snapshot(params): @@ -207,7 +258,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ cfbd = objects.Disk.FromDict(params[0]) - return backend.SnapshotBlockDevice(cfbd) + return backend.BlockdevSnapshot(cfbd) @staticmethod def perspective_blockdev_grow(params): @@ -216,15 +267,71 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ cfbd = objects.Disk.FromDict(params[0]) amount = params[1] - return backend.GrowBlockDevice(cfbd, amount) + return backend.BlockdevGrow(cfbd, amount) @staticmethod def perspective_blockdev_close(params): """Closes the given block devices. """ - disks = [objects.Disk.FromDict(cf) for cf in params] - return backend.CloseBlockDevices(disks) + disks = [objects.Disk.FromDict(cf) for cf in params[1]] + return backend.BlockdevClose(params[0], disks) + + @staticmethod + def perspective_blockdev_getsize(params): + """Compute the sizes of the given block devices. + + """ + disks = [objects.Disk.FromDict(cf) for cf in params[0]] + return backend.BlockdevGetsize(disks) + + @staticmethod + def perspective_blockdev_export(params): + """Compute the sizes of the given block devices. + + """ + disk = objects.Disk.FromDict(params[0]) + dest_node, dest_path, cluster_name = params[1:] + return backend.BlockdevExport(disk, dest_node, dest_path, cluster_name) + + # blockdev/drbd specific methods ---------- + + @staticmethod + def perspective_drbd_disconnect_net(params): + """Disconnects the network connection of drbd disks. + + Note that this is only valid for drbd disks, so the members of the + disk list must all be drbd devices. + + """ + nodes_ip, disks = params + disks = [objects.Disk.FromDict(cf) for cf in disks] + return backend.DrbdDisconnectNet(nodes_ip, disks) + + @staticmethod + def perspective_drbd_attach_net(params): + """Attaches the network connection of drbd disks. + + Note that this is only valid for drbd disks, so the members of the + disk list must all be drbd devices. + + """ + nodes_ip, disks, instance_name, multimaster = params + disks = [objects.Disk.FromDict(cf) for cf in disks] + return backend.DrbdAttachNet(nodes_ip, disks, + instance_name, multimaster) + + @staticmethod + def perspective_drbd_wait_sync(params): + """Wait until DRBD disks are synched. + + Note that this is only valid for drbd disks, so the members of the + disk list must all be drbd devices. + + """ + nodes_ip, disks = params + disks = [objects.Disk.FromDict(cf) for cf in disks] + return backend.DrbdWaitSync(nodes_ip, disks) # export/import -------------------------- @@ -237,7 +344,10 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): dest_node = params[1] instance = objects.Instance.FromDict(params[2]) cluster_name = params[3] - return backend.ExportSnapshot(disk, dest_node, instance, cluster_name) + dev_idx = params[4] + debug = params[5] + return backend.ExportSnapshot(disk, dest_node, instance, + cluster_name, dev_idx, debug) @staticmethod def perspective_finalize_export(params): @@ -258,10 +368,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ path = params[0] - einfo = backend.ExportInfo(path) - if einfo is None: - return einfo - return einfo.Dumps() + return backend.ExportInfo(path) @staticmethod def perspective_export_list(params): @@ -285,7 +392,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): # volume -------------------------- @staticmethod - def perspective_volume_list(params): + def perspective_lv_list(params): """Query the list of logical volumes in a given volume group. """ @@ -299,6 +406,32 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ return backend.ListVolumeGroups() + # Storage -------------------------- + + @staticmethod + def perspective_storage_list(params): + """Get list of storage units. + + """ + (su_name, su_args, name, fields) = params + return storage.GetStorage(su_name, *su_args).List(name, fields) + + @staticmethod + def perspective_storage_modify(params): + """Modify a storage unit. + + """ + (su_name, su_args, name, changes) = params + return storage.GetStorage(su_name, *su_args).Modify(name, changes) + + @staticmethod + def perspective_storage_execute(params): + """Execute an operation on a storage unit. + + """ + (su_name, su_args, name, op) = params + return storage.GetStorage(su_name, *su_args).Execute(name, op) + # bridge -------------------------- @staticmethod @@ -316,28 +449,30 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """Install an OS on a given instance. """ - inst_s, os_disk, swap_disk = params + inst_s = params[0] inst = objects.Instance.FromDict(inst_s) - return backend.AddOSToInstance(inst, os_disk, swap_disk) + reinstall = params[1] + debug = params[2] + return backend.InstanceOsAdd(inst, reinstall, debug) @staticmethod def perspective_instance_run_rename(params): """Runs the OS rename script for an instance. """ - inst_s, old_name, os_disk, swap_disk = params + inst_s, old_name, debug = params inst = objects.Instance.FromDict(inst_s) - return backend.RunRenameInstance(inst, old_name, os_disk, swap_disk) + return backend.RunRenameInstance(inst, old_name, debug) @staticmethod def perspective_instance_os_import(params): """Run the import function of an OS onto a given instance. """ - inst_s, os_disk, swap_disk, src_node, src_image, cluster_name = params + inst_s, src_node, src_images, cluster_name, debug = params inst = objects.Instance.FromDict(inst_s) - return backend.ImportOSIntoInstance(inst, os_disk, swap_disk, - src_node, src_image, cluster_name) + return backend.ImportOSIntoInstance(inst, src_node, src_images, + cluster_name, debug) @staticmethod def perspective_instance_shutdown(params): @@ -345,7 +480,8 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ instance = objects.Instance.FromDict(params[0]) - return backend.ShutdownInstance(instance) + timeout = params[1] + return backend.InstanceShutdown(instance, timeout) @staticmethod def perspective_instance_start(params): @@ -353,8 +489,33 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ instance = objects.Instance.FromDict(params[0]) - extra_args = params[1] - return backend.StartInstance(instance, extra_args) + return backend.StartInstance(instance) + + @staticmethod + def perspective_migration_info(params): + """Gather information about an instance to be migrated. + + """ + instance = objects.Instance.FromDict(params[0]) + return backend.MigrationInfo(instance) + + @staticmethod + def perspective_accept_instance(params): + """Prepare the node to accept an instance. + + """ + instance, info, target = params + instance = objects.Instance.FromDict(instance) + return backend.AcceptInstance(instance, info, target) + + @staticmethod + def perspective_finalize_migration(params): + """Finalize the instance migration. + + """ + instance, info, success = params + instance = objects.Instance.FromDict(instance) + return backend.FinalizeMigration(instance, info, success) @staticmethod def perspective_instance_migrate(params): @@ -362,6 +523,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ instance, target, live = params + instance = objects.Instance.FromDict(instance) return backend.MigrateInstance(instance, target, live) @staticmethod @@ -371,29 +533,37 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ instance = objects.Instance.FromDict(params[0]) reboot_type = params[1] - extra_args = params[2] - return backend.RebootInstance(instance, reboot_type, extra_args) + shutdown_timeout = params[2] + return backend.InstanceReboot(instance, reboot_type, shutdown_timeout) @staticmethod def perspective_instance_info(params): """Query instance information. """ - return backend.GetInstanceInfo(params[0]) + return backend.GetInstanceInfo(params[0], params[1]) + + @staticmethod + def perspective_instance_migratable(params): + """Query whether the specified instance can be migrated. + + """ + instance = objects.Instance.FromDict(params[0]) + return backend.GetInstanceMigratable(instance) @staticmethod def perspective_all_instances_info(params): """Query information about all instances. """ - return backend.GetAllInstancesInfo() + return backend.GetAllInstancesInfo(params[0]) @staticmethod def perspective_instance_list(params): """Query the list of running instances. """ - return backend.GetInstanceList() + return backend.GetInstanceList(params[0]) # node -------------------------- @@ -406,12 +576,19 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): live_port_needed=params[4], source=params[0]) @staticmethod + def perspective_node_has_ip_address(params): + """Checks if a node has the given ip address. + + """ + return utils.OwnIpAddress(params[0]) + + @staticmethod def perspective_node_info(params): """Query node information. """ - vgname = params[0] - return backend.GetNodeInfo(vgname) + vgname, hypervisor_type = params + return backend.GetNodeInfo(vgname, hypervisor_type) @staticmethod def perspective_node_add(params): @@ -433,7 +610,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """Promote this node to master status. """ - return backend.StartMaster(params[0]) + return backend.StartMaster(params[0], params[1]) @staticmethod def perspective_node_stop_master(params): @@ -447,7 +624,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """Cleanup after leaving a cluster. """ - return backend.LeaveCluster() + return backend.LeaveCluster(params[0]) @staticmethod def perspective_node_volumes(params): @@ -456,6 +633,23 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ return backend.NodeVolumes() + @staticmethod + def perspective_node_demote_from_mc(params): + """Demote a node from the master candidate role. + + """ + return backend.DemoteFromMC() + + + @staticmethod + def perspective_node_powercycle(params): + """Tries to powercycle the nod. + + """ + hypervisor_type = params[0] + return backend.PowercycleNode(hypervisor_type) + + # cluster -------------------------- @staticmethod @@ -482,6 +676,14 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ return backend.GetMasterInfo() + @staticmethod + def perspective_write_ssconf_files(params): + """Write ssconf files. + + """ + (values,) = params + return backend.WriteSsconfFiles(values) + # os ----------------------- @staticmethod @@ -489,7 +691,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """Query detailed information about existing OSes. """ - return [os.ToDict() for os in backend.DiagnoseOS()] + return backend.DiagnoseOS() @staticmethod def perspective_os_get(params): @@ -497,10 +699,7 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ name = params[0] - try: - os_obj = backend.OSFromDisk(name) - except errors.InvalidOS, err: - os_obj = objects.OS.FromInvalidOS(err) + os_obj = backend.OSFromDisk(name) return os_obj.ToDict() # hooks ----------------------- @@ -533,7 +732,10 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """ duration = params[0] - return utils.TestDelay(duration) + status, rval = utils.TestDelay(duration) + if not status: + raise backend.RPCFail(rval) + return rval # file storage --------------- @@ -588,115 +790,79 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): """Rename a job queue file. """ - (old, new) = params + # TODO: What if a file fails to rename? + return [backend.JobQueueRename(old, new) for old, new in params] - return backend.JobQueueRename(old, new) + @staticmethod + def perspective_jobqueue_set_drain(params): + """Set/unset the queue drain flag. + """ + drain_flag = params[0] + return backend.JobQueueSetDrainFlag(drain_flag) -class NodeDaemonHttpServer(http.HTTPServer): - def __init__(self, server_address): - http.HTTPServer.__init__(self, server_address, NodeDaemonRequestHandler) - self.noded_pid = os.getpid() - def serve_forever(self): - """Handle requests until told to quit.""" - sighandler = utils.SignalHandler([signal.SIGINT, signal.SIGTERM]) - try: - while not sighandler.called: - self.handle_request() - # TODO: There could be children running at this point - finally: - sighandler.Reset() + # hypervisor --------------- + @staticmethod + def perspective_hypervisor_validate_params(params): + """Validate the hypervisor parameters. -class ForkingHTTPServer(SocketServer.ForkingMixIn, NodeDaemonHttpServer): - """Forking HTTP Server. + """ + (hvname, hvparams) = params + return backend.ValidateHVParams(hvname, hvparams) - This inherits from ForkingMixIn and HTTPServer in order to fork for each - request we handle. This allows more requests to be handled concurrently. - """ +def CheckNoded(_, args): + """Initial checks whether to run or exit with a failure. + """ + if args: # noded doesn't take any arguments + print >> sys.stderr, ("Usage: %s [-f] [-d] [-p port] [-b ADDRESS]" % + sys.argv[0]) + sys.exit(constants.EXIT_FAILURE) -def ParseOptions(): - """Parse the command line options. - Returns: - (options, args) as from OptionParser.parse_args() +def ExecNoded(options, _): + """Main node daemon function, executed with the PID file held. """ - parser = OptionParser(description="Ganeti node daemon", - usage="%prog [-f] [-d]", - version="%%prog (ganeti) %s" % - constants.RELEASE_VERSION) - - parser.add_option("-f", "--foreground", dest="fork", - help="Don't detach from the current terminal", - default=True, action="store_false") - parser.add_option("-d", "--debug", dest="debug", - help="Enable some debug messages", - default=False, action="store_true") - options, args = parser.parse_args() - return options, args + # Read SSL certificate + if options.ssl: + ssl_params = http.HttpSslParams(ssl_key_path=options.ssl_key, + ssl_cert_path=options.ssl_cert) + else: + ssl_params = None + + err = _PrepareQueueLock() + if err is not None: + # this might be some kind of file-system/permission error; while + # this breaks the job queue functionality, we shouldn't prevent + # startup of the whole node daemon because of this + logging.critical("Can't init/verify the queue, proceeding anyway: %s", err) + + mainloop = daemon.Mainloop() + server = NodeHttpServer(mainloop, options.bind_address, options.port, + ssl_params=ssl_params, ssl_verify_peer=True) + server.Start() + try: + mainloop.Run() + finally: + server.Stop() def main(): """Main function for the node daemon. """ - global queue_lock - - options, args = ParseOptions() - utils.debug = options.debug - for fname in (constants.SSL_CERT_FILE,): - if not os.path.isfile(fname): - print "config %s not there, will not run." % fname - sys.exit(5) - - try: - port = utils.GetNodeDaemonPort() - pwdata = utils.GetNodeDaemonPassword() - except errors.ConfigurationError, err: - print "Cluster configuration incomplete: '%s'" % str(err) - sys.exit(5) - - # create the various SUB_RUN_DIRS, if not existing, so that we handle the - # situation where RUN_DIR is tmpfs - for dir_name in constants.SUB_RUN_DIRS: - if not os.path.exists(dir_name): - try: - os.mkdir(dir_name, 0755) - except EnvironmentError, err: - if err.errno != errno.EEXIST: - print ("Node setup wrong, cannot create directory %s: %s" % - (dir_name, err)) - sys.exit(5) - if not os.path.isdir(dir_name): - print ("Node setup wrong, %s is not a directory" % dir_name) - sys.exit(5) - - # become a daemon - if options.fork: - utils.Daemonize(logfile=constants.LOG_NODESERVER) - - utils.WritePidFile(constants.NODED_PID) - - logger.SetupLogging(logfile=constants.LOG_NODESERVER, debug=options.debug, - stderr_logging=not options.fork) - logging.info("ganeti node daemon startup") - - # Prepare job queue - queue_lock = jstore.InitAndVerifyQueue(must_lock=False) - - if options.fork: - server = ForkingHTTPServer(('', port)) - else: - server = NodeDaemonHttpServer(('', port)) - - try: - server.serve_forever() - finally: - utils.RemovePidFile(constants.NODED_PID) + parser = OptionParser(description="Ganeti node daemon", + usage="%prog [-f] [-d] [-p port] [-b ADDRESS]", + version="%%prog (ganeti) %s" % + constants.RELEASE_VERSION) + dirs = [(val, constants.RUN_DIRS_MODE) for val in constants.SUB_RUN_DIRS] + dirs.append((constants.LOG_OS_DIR, 0750)) + dirs.append((constants.LOCK_DIR, 1777)) + daemon.GenericMain(constants.NODED, parser, dirs, CheckNoded, ExecNoded) if __name__ == '__main__':