Merge branch 'devel-2.1'
[ganeti-local] / daemons / ganeti-noded
index 58d755e..732d681 100755 (executable)
 
 """Ganeti node daemon"""
 
-# functions in this module need to have a given name structure, so:
-# pylint: disable-msg=C0103
+# pylint: disable-msg=C0103,W0142
+
+# C0103: Functions in this module need to have a given name structure,
+# and the name of the daemon doesn't match
+
+# W0142: Used * or ** magic, since we do use it extensively in this
+# module
 
 import os
 import sys
-import SocketServer
 import logging
 import signal
 
@@ -41,13 +45,33 @@ from ganeti import daemon
 from ganeti import http
 from ganeti import utils
 from ganeti import storage
+from ganeti import serializer
 
-import ganeti.http.server
+import ganeti.http.server # pylint: disable-msg=W0611
 
 
 queue_lock = None
 
 
+def _PrepareQueueLock():
+  """Try to prepare the queue lock.
+
+  @return: None for success, otherwise an exception object
+
+  """
+  global queue_lock # pylint: disable-msg=W0603
+
+  if queue_lock is not None:
+    return None
+
+  # Prepare job queue
+  try:
+    queue_lock = jstore.InitAndVerifyQueue(must_lock=False)
+    return None
+  except EnvironmentError, err:
+    return err
+
+
 def _RequireJobQueueLock(fn):
   """Decorator for job queue manipulating functions.
 
@@ -57,6 +81,9 @@ def _RequireJobQueueLock(fn):
   def wrapper(*args, **kwargs):
     # Locking in exclusive, blocking mode because there could be several
     # children running at the same time. Waiting up to 10 seconds.
+    if _PrepareQueueLock() is not None:
+      raise errors.JobQueueError("Job queue failed initialization,"
+                                 " cannot update jobs")
     queue_lock.Exclusive(blocking=True, timeout=QUEUE_LOCK_TIMEOUT)
     try:
       return fn(*args, **kwargs)
@@ -66,12 +93,41 @@ def _RequireJobQueueLock(fn):
   return wrapper
 
 
+def _DecodeImportExportIO(ieio, ieioargs):
+  """Decodes import/export I/O information.
+
+  """
+  if ieio == constants.IEIO_RAW_DISK:
+    assert len(ieioargs) == 1
+    return (objects.Disk.FromDict(ieioargs[0]), )
+
+  if ieio == constants.IEIO_SCRIPT:
+    assert len(ieioargs) == 2
+    return (objects.Disk.FromDict(ieioargs[0]), ieioargs[1])
+
+  return ieioargs
+
+
+class MlockallRequestExecutor(http.server.HttpServerRequestExecutor):
+  """Custom Request Executor class that ensures NodeHttpServer children are
+  locked in ram.
+
+  """
+  def __init__(self, *args, **kwargs):
+    utils.Mlockall()
+
+    http.server.HttpServerRequestExecutor.__init__(self, *args, **kwargs)
+
+
 class NodeHttpServer(http.server.HttpServer):
   """The server implementation.
 
   This class holds all methods exposed over the RPC interface.
 
   """
+  # too many public methods, and unused args - all methods get params
+  # due to the API
+  # pylint: disable-msg=R0904,W0613
   def __init__(self, *args, **kwargs):
     http.server.HttpServer.__init__(self, *args, **kwargs)
     self.noded_pid = os.getpid()
@@ -92,14 +148,13 @@ class NodeHttpServer(http.server.HttpServer):
       raise http.HttpNotFound()
 
     try:
-      rvalue = method(req.request_body)
-      return True, rvalue
+      result = (True, method(serializer.LoadJson(req.request_body)))
 
     except backend.RPCFail, err:
       # our custom failure exception; str(err) works fine if the
       # exception was constructed with a single argument, and in
       # this case, err.message == err.args[0] == str(err)
-      return (False, str(err))
+      result = (False, str(err))
     except errors.QuitGanetiException, err:
       # Tell parent to quit
       logging.info("Shutting down the node daemon, arguments: %s",
@@ -107,10 +162,12 @@ class NodeHttpServer(http.server.HttpServer):
       os.kill(self.noded_pid, signal.SIGTERM)
       # And return the error's arguments, which must be already in
       # correct tuple format
-      return err.args
+      result = err.args
     except Exception, err:
       logging.exception("Error in RPC call")
-      return False, "Error while executing backend function: %s" % str(err)
+      result = (False, "Error while executing backend function: %s" % str(err))
+
+    return serializer.DumpJson(result, indent=False)
 
   # the new block devices  --------------------------
 
@@ -256,6 +313,15 @@ class NodeHttpServer(http.server.HttpServer):
     disks = [objects.Disk.FromDict(cf) for cf in params[0]]
     return backend.BlockdevGetsize(disks)
 
+  @staticmethod
+  def perspective_blockdev_export(params):
+    """Compute the sizes of the given block devices.
+
+    """
+    disk = objects.Disk.FromDict(params[0])
+    dest_node, dest_path, cluster_name = params[1:]
+    return backend.BlockdevExport(disk, dest_node, dest_path, cluster_name)
+
   # blockdev/drbd specific methods ----------
 
   @staticmethod
@@ -295,20 +361,14 @@ class NodeHttpServer(http.server.HttpServer):
     disks = [objects.Disk.FromDict(cf) for cf in disks]
     return backend.DrbdWaitSync(nodes_ip, disks)
 
-  # export/import  --------------------------
-
   @staticmethod
-  def perspective_snapshot_export(params):
-    """Export a given snapshot.
+  def perspective_drbd_helper(params):
+    """Query drbd helper.
 
     """
-    disk = objects.Disk.FromDict(params[0])
-    dest_node = params[1]
-    instance = objects.Instance.FromDict(params[2])
-    cluster_name = params[3]
-    dev_idx = params[4]
-    return backend.ExportSnapshot(disk, dest_node, instance,
-                                  cluster_name, dev_idx)
+    return backend.GetDrbdUsermodeHelper()
+
+  # export/import  --------------------------
 
   @staticmethod
   def perspective_finalize_export(params):
@@ -316,8 +376,14 @@ class NodeHttpServer(http.server.HttpServer):
 
     """
     instance = objects.Instance.FromDict(params[0])
-    snap_disks = [objects.Disk.FromDict(str_data)
-                  for str_data in params[1]]
+
+    snap_disks = []
+    for disk in params[1]:
+      if isinstance(disk, bool):
+        snap_disks.append(disk)
+      else:
+        snap_disks.append(objects.Disk.FromDict(disk))
+
     return backend.FinalizeExport(instance, snap_disks)
 
   @staticmethod
@@ -413,26 +479,17 @@ class NodeHttpServer(http.server.HttpServer):
     inst_s = params[0]
     inst = objects.Instance.FromDict(inst_s)
     reinstall = params[1]
-    return backend.InstanceOsAdd(inst, reinstall)
+    debug = params[2]
+    return backend.InstanceOsAdd(inst, reinstall, debug)
 
   @staticmethod
   def perspective_instance_run_rename(params):
     """Runs the OS rename script for an instance.
 
     """
-    inst_s, old_name = params
-    inst = objects.Instance.FromDict(inst_s)
-    return backend.RunRenameInstance(inst, old_name)
-
-  @staticmethod
-  def perspective_instance_os_import(params):
-    """Run the import function of an OS onto a given instance.
-
-    """
-    inst_s, src_node, src_images, cluster_name = params
+    inst_s, old_name, debug = params
     inst = objects.Instance.FromDict(inst_s)
-    return backend.ImportOSIntoInstance(inst, src_node, src_images,
-                                        cluster_name)
+    return backend.RunRenameInstance(inst, old_name, debug)
 
   @staticmethod
   def perspective_instance_shutdown(params):
@@ -440,7 +497,8 @@ class NodeHttpServer(http.server.HttpServer):
 
     """
     instance = objects.Instance.FromDict(params[0])
-    return backend.InstanceShutdown(instance)
+    timeout = params[1]
+    return backend.InstanceShutdown(instance, timeout)
 
   @staticmethod
   def perspective_instance_start(params):
@@ -492,7 +550,8 @@ class NodeHttpServer(http.server.HttpServer):
     """
     instance = objects.Instance.FromDict(params[0])
     reboot_type = params[1]
-    return backend.InstanceReboot(instance, reboot_type)
+    shutdown_timeout = params[2]
+    return backend.InstanceReboot(instance, reboot_type, shutdown_timeout)
 
   @staticmethod
   def perspective_instance_info(params):
@@ -582,7 +641,7 @@ class NodeHttpServer(http.server.HttpServer):
     """Cleanup after leaving a cluster.
 
     """
-    return backend.LeaveCluster()
+    return backend.LeaveCluster(params[0])
 
   @staticmethod
   def perspective_node_volumes(params):
@@ -660,6 +719,14 @@ class NodeHttpServer(http.server.HttpServer):
     os_obj = backend.OSFromDisk(name)
     return os_obj.ToDict()
 
+  @staticmethod
+  def perspective_os_validate(params):
+    """Run a given OS' validation routine.
+
+    """
+    required, name, checks, params = params
+    return backend.ValidateOS(required, name, checks, params)
+
   # hooks -----------------------
 
   @staticmethod
@@ -751,15 +818,6 @@ class NodeHttpServer(http.server.HttpServer):
     # TODO: What if a file fails to rename?
     return [backend.JobQueueRename(old, new) for old, new in params]
 
-  @staticmethod
-  def perspective_jobqueue_set_drain(params):
-    """Set/unset the queue drain flag.
-
-    """
-    drain_flag = params[0]
-    return backend.JobQueueSetDrainFlag(drain_flag)
-
-
   # hypervisor ---------------
 
   @staticmethod
@@ -770,12 +828,103 @@ class NodeHttpServer(http.server.HttpServer):
     (hvname, hvparams) = params
     return backend.ValidateHVParams(hvname, hvparams)
 
+  # Crypto
+
+  @staticmethod
+  def perspective_x509_cert_create(params):
+    """Creates a new X509 certificate for SSL/TLS.
+
+    """
+    (validity, ) = params
+    return backend.CreateX509Certificate(validity)
+
+  @staticmethod
+  def perspective_x509_cert_remove(params):
+    """Removes a X509 certificate.
+
+    """
+    (name, ) = params
+    return backend.RemoveX509Certificate(name)
+
+  # Import and export
+
+  @staticmethod
+  def perspective_import_start(params):
+    """Starts an import daemon.
+
+    """
+    (opts_s, instance, dest, dest_args) = params
+
+    opts = objects.ImportExportOptions.FromDict(opts_s)
+
+    return backend.StartImportExportDaemon(constants.IEM_IMPORT, opts,
+                                           None, None,
+                                           objects.Instance.FromDict(instance),
+                                           dest,
+                                           _DecodeImportExportIO(dest,
+                                                                 dest_args))
+
+  @staticmethod
+  def perspective_export_start(params):
+    """Starts an export daemon.
+
+    """
+    (opts_s, host, port, instance, source, source_args) = params
+
+    opts = objects.ImportExportOptions.FromDict(opts_s)
+
+    return backend.StartImportExportDaemon(constants.IEM_EXPORT, opts,
+                                           host, port,
+                                           objects.Instance.FromDict(instance),
+                                           source,
+                                           _DecodeImportExportIO(source,
+                                                                 source_args))
+
+  @staticmethod
+  def perspective_impexp_status(params):
+    """Retrieves the status of an import or export daemon.
+
+    """
+    return backend.GetImportExportStatus(params[0])
+
+  @staticmethod
+  def perspective_impexp_abort(params):
+    """Aborts an import or export.
+
+    """
+    return backend.AbortImportExport(params[0])
 
-def ExecNODED(options, args):
-  """Main NODED function, executed with the pidfile held.
+  @staticmethod
+  def perspective_impexp_cleanup(params):
+    """Cleans up after an import or export.
+
+    """
+    return backend.CleanupImportExport(params[0])
+
+
+def CheckNoded(_, args):
+  """Initial checks whether to run or exit with a failure.
 
   """
-  global queue_lock
+  if args: # noded doesn't take any arguments
+    print >> sys.stderr, ("Usage: %s [-f] [-d] [-p port] [-b ADDRESS]" %
+                          sys.argv[0])
+    sys.exit(constants.EXIT_FAILURE)
+
+
+def ExecNoded(options, _):
+  """Main node daemon function, executed with the PID file held.
+
+  """
+  if options.mlock:
+    request_executor_class = MlockallRequestExecutor
+    try:
+      utils.Mlockall()
+    except errors.NoCtypesError:
+      logging.warning("Cannot set memory lock, ctypes module not found")
+      request_executor_class = http.server.HttpServerRequestExecutor
+  else:
+    request_executor_class = http.server.HttpServerRequestExecutor
 
   # Read SSL certificate
   if options.ssl:
@@ -784,12 +933,17 @@ def ExecNODED(options, args):
   else:
     ssl_params = None
 
-  # Prepare job queue
-  queue_lock = jstore.InitAndVerifyQueue(must_lock=False)
+  err = _PrepareQueueLock()
+  if err is not None:
+    # this might be some kind of file-system/permission error; while
+    # this breaks the job queue functionality, we shouldn't prevent
+    # startup of the whole node daemon because of this
+    logging.critical("Can't init/verify the queue, proceeding anyway: %s", err)
 
   mainloop = daemon.Mainloop()
   server = NodeHttpServer(mainloop, options.bind_address, options.port,
-                          ssl_params=ssl_params, ssl_verify_peer=True)
+                          ssl_params=ssl_params, ssl_verify_peer=True,
+                          request_executor_class=request_executor_class)
   server.Start()
   try:
     mainloop.Run()
@@ -805,10 +959,19 @@ def main():
                         usage="%prog [-f] [-d] [-p port] [-b ADDRESS]",
                         version="%%prog (ganeti) %s" %
                         constants.RELEASE_VERSION)
+  parser.add_option("--no-mlock", dest="mlock",
+                    help="Do not mlock the node memory in ram",
+                    default=True, action="store_false")
+
   dirs = [(val, constants.RUN_DIRS_MODE) for val in constants.SUB_RUN_DIRS]
   dirs.append((constants.LOG_OS_DIR, 0750))
   dirs.append((constants.LOCK_DIR, 1777))
-  daemon.GenericMain(constants.NODED, parser, dirs, None, ExecNODED)
+  dirs.append((constants.CRYPTO_KEYS_DIR, constants.CRYPTO_KEYS_DIR_MODE))
+  dirs.append((constants.IMPORT_EXPORT_DIR, constants.IMPORT_EXPORT_DIR_MODE))
+  daemon.GenericMain(constants.NODED, parser, dirs, CheckNoded, ExecNoded,
+                     default_ssl_cert=constants.NODED_CERT_FILE,
+                     default_ssl_key=constants.NODED_CERT_FILE,
+                     console_logging=True)
 
 
 if __name__ == '__main__':