4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Ganeti node daemon"""
24 # pylint: disable=C0103,W0142
26 # C0103: Functions in this module need to have a given name structure,
27 # and the name of the daemon doesn't match
29 # W0142: Used * or ** magic, since we do use it extensively in this
38 from optparse import OptionParser
40 from ganeti import backend
41 from ganeti import constants
42 from ganeti import objects
43 from ganeti import errors
44 from ganeti import jstore
45 from ganeti import daemon
46 from ganeti import http
47 from ganeti import utils
48 from ganeti import storage
49 from ganeti import serializer
50 from ganeti import netutils
52 import ganeti.http.server # pylint: disable=W0611
58 def _PrepareQueueLock():
59 """Try to prepare the queue lock.
61 @return: None for success, otherwise an exception object
64 global queue_lock # pylint: disable=W0603
66 if queue_lock is not None:
71 queue_lock = jstore.InitAndVerifyQueue(must_lock=False)
73 except EnvironmentError, err:
77 def _RequireJobQueueLock(fn):
78 """Decorator for job queue manipulating functions.
81 QUEUE_LOCK_TIMEOUT = 10
83 def wrapper(*args, **kwargs):
84 # Locking in exclusive, blocking mode because there could be several
85 # children running at the same time. Waiting up to 10 seconds.
86 if _PrepareQueueLock() is not None:
87 raise errors.JobQueueError("Job queue failed initialization,"
88 " cannot update jobs")
89 queue_lock.Exclusive(blocking=True, timeout=QUEUE_LOCK_TIMEOUT)
91 return fn(*args, **kwargs)
98 def _DecodeImportExportIO(ieio, ieioargs):
99 """Decodes import/export I/O information.
102 if ieio == constants.IEIO_RAW_DISK:
103 assert len(ieioargs) == 1
104 return (objects.Disk.FromDict(ieioargs[0]), )
106 if ieio == constants.IEIO_SCRIPT:
107 assert len(ieioargs) == 2
108 return (objects.Disk.FromDict(ieioargs[0]), ieioargs[1])
113 class MlockallRequestExecutor(http.server.HttpServerRequestExecutor):
114 """Custom Request Executor class that ensures NodeHttpServer children are
118 def __init__(self, *args, **kwargs):
121 http.server.HttpServerRequestExecutor.__init__(self, *args, **kwargs)
124 class NodeHttpServer(http.server.HttpServer):
125 """The server implementation.
127 This class holds all methods exposed over the RPC interface.
130 # too many public methods, and unused args - all methods get params
132 # pylint: disable=R0904,W0613
133 def __init__(self, *args, **kwargs):
134 http.server.HttpServer.__init__(self, *args, **kwargs)
135 self.noded_pid = os.getpid()
137 def HandleRequest(self, req):
141 if req.request_method.upper() != http.HTTP_PUT:
142 raise http.HttpBadRequest()
144 path = req.request_path
145 if path.startswith("/"):
148 method = getattr(self, "perspective_%s" % path, None)
150 raise http.HttpNotFound()
153 result = (True, method(serializer.LoadJson(req.request_body)))
155 except backend.RPCFail, err:
156 # our custom failure exception; str(err) works fine if the
157 # exception was constructed with a single argument, and in
158 # this case, err.message == err.args[0] == str(err)
159 result = (False, str(err))
160 except errors.QuitGanetiException, err:
161 # Tell parent to quit
162 logging.info("Shutting down the node daemon, arguments: %s",
164 os.kill(self.noded_pid, signal.SIGTERM)
165 # And return the error's arguments, which must be already in
166 # correct tuple format
168 except Exception, err:
169 logging.exception("Error in RPC call")
170 result = (False, "Error while executing backend function: %s" % str(err))
172 return serializer.DumpJson(result, indent=False)
174 # the new block devices --------------------------
177 def perspective_blockdev_create(params):
178 """Create a block device.
181 bdev_s, size, owner, on_primary, info = params
182 bdev = objects.Disk.FromDict(bdev_s)
184 raise ValueError("can't unserialize data!")
185 return backend.BlockdevCreate(bdev, size, owner, on_primary, info)
188 def perspective_blockdev_pause_resume_sync(params):
189 """Pause/resume sync of a block device.
192 disks_s, pause = params
193 disks = [objects.Disk.FromDict(bdev_s) for bdev_s in disks_s]
194 return backend.BlockdevPauseResumeSync(disks, pause)
197 def perspective_blockdev_wipe(params):
198 """Wipe a block device.
201 bdev_s, offset, size = params
202 bdev = objects.Disk.FromDict(bdev_s)
203 return backend.BlockdevWipe(bdev, offset, size)
206 def perspective_blockdev_remove(params):
207 """Remove a block device.
211 bdev = objects.Disk.FromDict(bdev_s)
212 return backend.BlockdevRemove(bdev)
215 def perspective_blockdev_rename(params):
216 """Remove a block device.
219 devlist = [(objects.Disk.FromDict(ds), uid) for ds, uid in params]
220 return backend.BlockdevRename(devlist)
223 def perspective_blockdev_assemble(params):
224 """Assemble a block device.
227 bdev_s, owner, on_primary, idx = params
228 bdev = objects.Disk.FromDict(bdev_s)
230 raise ValueError("can't unserialize data!")
231 return backend.BlockdevAssemble(bdev, owner, on_primary, idx)
234 def perspective_blockdev_shutdown(params):
235 """Shutdown a block device.
239 bdev = objects.Disk.FromDict(bdev_s)
241 raise ValueError("can't unserialize data!")
242 return backend.BlockdevShutdown(bdev)
245 def perspective_blockdev_addchildren(params):
246 """Add a child to a mirror device.
248 Note: this is only valid for mirror devices. It's the caller's duty
249 to send a correct disk, otherwise we raise an error.
252 bdev_s, ndev_s = params
253 bdev = objects.Disk.FromDict(bdev_s)
254 ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s]
255 if bdev is None or ndevs.count(None) > 0:
256 raise ValueError("can't unserialize data!")
257 return backend.BlockdevAddchildren(bdev, ndevs)
260 def perspective_blockdev_removechildren(params):
261 """Remove a child from a mirror device.
263 This is only valid for mirror devices, of course. It's the callers
264 duty to send a correct disk, otherwise we raise an error.
267 bdev_s, ndev_s = params
268 bdev = objects.Disk.FromDict(bdev_s)
269 ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s]
270 if bdev is None or ndevs.count(None) > 0:
271 raise ValueError("can't unserialize data!")
272 return backend.BlockdevRemovechildren(bdev, ndevs)
275 def perspective_blockdev_getmirrorstatus(params):
276 """Return the mirror status for a list of disks.
279 disks = [objects.Disk.FromDict(dsk_s)
281 return [status.ToDict()
282 for status in backend.BlockdevGetmirrorstatus(disks)]
285 def perspective_blockdev_getmirrorstatus_multi(params):
286 """Return the mirror status for a list of disks.
289 (node_disks, ) = params
291 node_name = netutils.Hostname.GetSysName()
293 disks = [objects.Disk.FromDict(dsk_s)
294 for dsk_s in node_disks.get(node_name, [])]
298 for (success, status) in backend.BlockdevGetmirrorstatusMulti(disks):
300 result.append((success, status.ToDict()))
302 result.append((success, status))
307 def perspective_blockdev_find(params):
308 """Expose the FindBlockDevice functionality for a disk.
310 This will try to find but not activate a disk.
313 disk = objects.Disk.FromDict(params[0])
315 result = backend.BlockdevFind(disk)
319 return result.ToDict()
322 def perspective_blockdev_snapshot(params):
323 """Create a snapshot device.
325 Note that this is only valid for LVM disks, if we get passed
326 something else we raise an exception. The snapshot device can be
327 remove by calling the generic block device remove call.
330 cfbd = objects.Disk.FromDict(params[0])
331 return backend.BlockdevSnapshot(cfbd)
334 def perspective_blockdev_grow(params):
335 """Grow a stack of devices.
338 cfbd = objects.Disk.FromDict(params[0])
341 return backend.BlockdevGrow(cfbd, amount, dryrun)
344 def perspective_blockdev_close(params):
345 """Closes the given block devices.
348 disks = [objects.Disk.FromDict(cf) for cf in params[1]]
349 return backend.BlockdevClose(params[0], disks)
352 def perspective_blockdev_getsize(params):
353 """Compute the sizes of the given block devices.
356 disks = [objects.Disk.FromDict(cf) for cf in params[0]]
357 return backend.BlockdevGetsize(disks)
360 def perspective_blockdev_export(params):
361 """Compute the sizes of the given block devices.
364 disk = objects.Disk.FromDict(params[0])
365 dest_node, dest_path, cluster_name = params[1:]
366 return backend.BlockdevExport(disk, dest_node, dest_path, cluster_name)
368 # blockdev/drbd specific methods ----------
371 def perspective_drbd_disconnect_net(params):
372 """Disconnects the network connection of drbd disks.
374 Note that this is only valid for drbd disks, so the members of the
375 disk list must all be drbd devices.
378 nodes_ip, disks = params
379 disks = [objects.Disk.FromDict(cf) for cf in disks]
380 return backend.DrbdDisconnectNet(nodes_ip, disks)
383 def perspective_drbd_attach_net(params):
384 """Attaches the network connection of drbd disks.
386 Note that this is only valid for drbd disks, so the members of the
387 disk list must all be drbd devices.
390 nodes_ip, disks, instance_name, multimaster = params
391 disks = [objects.Disk.FromDict(cf) for cf in disks]
392 return backend.DrbdAttachNet(nodes_ip, disks,
393 instance_name, multimaster)
396 def perspective_drbd_wait_sync(params):
397 """Wait until DRBD disks are synched.
399 Note that this is only valid for drbd disks, so the members of the
400 disk list must all be drbd devices.
403 nodes_ip, disks = params
404 disks = [objects.Disk.FromDict(cf) for cf in disks]
405 return backend.DrbdWaitSync(nodes_ip, disks)
408 def perspective_drbd_helper(params):
409 """Query drbd helper.
412 return backend.GetDrbdUsermodeHelper()
414 # export/import --------------------------
417 def perspective_finalize_export(params):
418 """Expose the finalize export functionality.
421 instance = objects.Instance.FromDict(params[0])
424 for disk in params[1]:
425 if isinstance(disk, bool):
426 snap_disks.append(disk)
428 snap_disks.append(objects.Disk.FromDict(disk))
430 return backend.FinalizeExport(instance, snap_disks)
433 def perspective_export_info(params):
434 """Query information about an existing export on this node.
436 The given path may not contain an export, in which case we return
441 return backend.ExportInfo(path)
444 def perspective_export_list(params):
445 """List the available exports on this node.
447 Note that as opposed to export_info, which may query data about an
448 export in any path, this only queries the standard Ganeti path
449 (constants.EXPORT_DIR).
452 return backend.ListExports()
455 def perspective_export_remove(params):
460 return backend.RemoveExport(export)
462 # block device ---------------------
464 def perspective_bdev_sizes(params):
465 """Query the list of block devices
469 return backend.GetBlockDevSizes(devices)
471 # volume --------------------------
474 def perspective_lv_list(params):
475 """Query the list of logical volumes in a given volume group.
479 return backend.GetVolumeList(vgname)
482 def perspective_vg_list(params):
483 """Query the list of volume groups.
486 return backend.ListVolumeGroups()
488 # Storage --------------------------
491 def perspective_storage_list(params):
492 """Get list of storage units.
495 (su_name, su_args, name, fields) = params
496 return storage.GetStorage(su_name, *su_args).List(name, fields)
499 def perspective_storage_modify(params):
500 """Modify a storage unit.
503 (su_name, su_args, name, changes) = params
504 return storage.GetStorage(su_name, *su_args).Modify(name, changes)
507 def perspective_storage_execute(params):
508 """Execute an operation on a storage unit.
511 (su_name, su_args, name, op) = params
512 return storage.GetStorage(su_name, *su_args).Execute(name, op)
514 # bridge --------------------------
517 def perspective_bridges_exist(params):
518 """Check if all bridges given exist on this node.
521 bridges_list = params[0]
522 return backend.BridgesExist(bridges_list)
524 # instance --------------------------
527 def perspective_instance_os_add(params):
528 """Install an OS on a given instance.
532 inst = objects.Instance.FromDict(inst_s)
533 reinstall = params[1]
535 return backend.InstanceOsAdd(inst, reinstall, debug)
538 def perspective_instance_run_rename(params):
539 """Runs the OS rename script for an instance.
542 inst_s, old_name, debug = params
543 inst = objects.Instance.FromDict(inst_s)
544 return backend.RunRenameInstance(inst, old_name, debug)
547 def perspective_instance_shutdown(params):
548 """Shutdown an instance.
551 instance = objects.Instance.FromDict(params[0])
553 return backend.InstanceShutdown(instance, timeout)
556 def perspective_instance_start(params):
557 """Start an instance.
560 (instance_name, startup_paused) = params
561 instance = objects.Instance.FromDict(instance_name)
562 return backend.StartInstance(instance, startup_paused)
565 def perspective_migration_info(params):
566 """Gather information about an instance to be migrated.
569 instance = objects.Instance.FromDict(params[0])
570 return backend.MigrationInfo(instance)
573 def perspective_accept_instance(params):
574 """Prepare the node to accept an instance.
577 instance, info, target = params
578 instance = objects.Instance.FromDict(instance)
579 return backend.AcceptInstance(instance, info, target)
582 def perspective_finalize_migration(params):
583 """Finalize the instance migration.
586 instance, info, success = params
587 instance = objects.Instance.FromDict(instance)
588 return backend.FinalizeMigration(instance, info, success)
591 def perspective_instance_migrate(params):
592 """Migrates an instance.
595 instance, target, live = params
596 instance = objects.Instance.FromDict(instance)
597 return backend.MigrateInstance(instance, target, live)
600 def perspective_instance_reboot(params):
601 """Reboot an instance.
604 instance = objects.Instance.FromDict(params[0])
605 reboot_type = params[1]
606 shutdown_timeout = params[2]
607 return backend.InstanceReboot(instance, reboot_type, shutdown_timeout)
610 def perspective_instance_info(params):
611 """Query instance information.
614 return backend.GetInstanceInfo(params[0], params[1])
617 def perspective_instance_migratable(params):
618 """Query whether the specified instance can be migrated.
621 instance = objects.Instance.FromDict(params[0])
622 return backend.GetInstanceMigratable(instance)
625 def perspective_all_instances_info(params):
626 """Query information about all instances.
629 return backend.GetAllInstancesInfo(params[0])
632 def perspective_instance_list(params):
633 """Query the list of running instances.
636 return backend.GetInstanceList(params[0])
638 # node --------------------------
641 def perspective_node_tcp_ping(params):
642 """Do a TcpPing on the remote node.
645 return netutils.TcpPing(params[1], params[2], timeout=params[3],
646 live_port_needed=params[4], source=params[0])
649 def perspective_node_has_ip_address(params):
650 """Checks if a node has the given ip address.
653 return netutils.IPAddress.Own(params[0])
656 def perspective_node_info(params):
657 """Query node information.
660 vgname, hypervisor_type = params
661 return backend.GetNodeInfo(vgname, hypervisor_type)
664 def perspective_etc_hosts_modify(params):
665 """Modify a node entry in /etc/hosts.
668 backend.EtcHostsModify(params[0], params[1], params[2])
673 def perspective_node_verify(params):
674 """Run a verify sequence on this node.
677 return backend.VerifyNode(params[0], params[1])
680 def perspective_node_start_master(params):
681 """Promote this node to master status.
684 return backend.StartMaster(params[0], params[1])
687 def perspective_node_stop_master(params):
688 """Demote this node from master status.
691 return backend.StopMaster(params[0])
694 def perspective_node_leave_cluster(params):
695 """Cleanup after leaving a cluster.
698 return backend.LeaveCluster(params[0])
701 def perspective_node_volumes(params):
702 """Query the list of all logical volume groups.
705 return backend.NodeVolumes()
708 def perspective_node_demote_from_mc(params):
709 """Demote a node from the master candidate role.
712 return backend.DemoteFromMC()
715 def perspective_node_powercycle(params):
716 """Tries to powercycle the nod.
719 hypervisor_type = params[0]
720 return backend.PowercycleNode(hypervisor_type)
722 # cluster --------------------------
725 def perspective_version(params):
726 """Query version information.
729 return constants.PROTOCOL_VERSION
732 def perspective_upload_file(params):
735 Note that the backend implementation imposes strict rules on which
739 return backend.UploadFile(*params)
742 def perspective_master_info(params):
743 """Query master information.
746 return backend.GetMasterInfo()
749 def perspective_run_oob(params):
753 output = backend.RunOob(params[0], params[1], params[2], params[3])
755 result = serializer.LoadJson(output)
761 def perspective_write_ssconf_files(params):
762 """Write ssconf files.
766 return backend.WriteSsconfFiles(values)
768 # os -----------------------
771 def perspective_os_diagnose(params):
772 """Query detailed information about existing OSes.
775 return backend.DiagnoseOS()
778 def perspective_os_get(params):
779 """Query information about a given OS.
783 os_obj = backend.OSFromDisk(name)
784 return os_obj.ToDict()
787 def perspective_os_validate(params):
788 """Run a given OS' validation routine.
791 required, name, checks, params = params
792 return backend.ValidateOS(required, name, checks, params)
794 # hooks -----------------------
797 def perspective_hooks_runner(params):
801 hpath, phase, env = params
802 hr = backend.HooksRunner()
803 return hr.RunHooks(hpath, phase, env)
805 # iallocator -----------------
808 def perspective_iallocator_runner(params):
809 """Run an iallocator script.
813 iar = backend.IAllocatorRunner()
814 return iar.Run(name, idata)
816 # test -----------------------
819 def perspective_test_delay(params):
824 status, rval = utils.TestDelay(duration)
826 raise backend.RPCFail(rval)
829 # file storage ---------------
832 def perspective_file_storage_dir_create(params):
833 """Create the file storage directory.
836 file_storage_dir = params[0]
837 return backend.CreateFileStorageDir(file_storage_dir)
840 def perspective_file_storage_dir_remove(params):
841 """Remove the file storage directory.
844 file_storage_dir = params[0]
845 return backend.RemoveFileStorageDir(file_storage_dir)
848 def perspective_file_storage_dir_rename(params):
849 """Rename the file storage directory.
852 old_file_storage_dir = params[0]
853 new_file_storage_dir = params[1]
854 return backend.RenameFileStorageDir(old_file_storage_dir,
855 new_file_storage_dir)
857 # jobs ------------------------
860 @_RequireJobQueueLock
861 def perspective_jobqueue_update(params):
865 (file_name, content) = params
866 return backend.JobQueueUpdate(file_name, content)
869 @_RequireJobQueueLock
870 def perspective_jobqueue_purge(params):
874 return backend.JobQueuePurge()
877 @_RequireJobQueueLock
878 def perspective_jobqueue_rename(params):
879 """Rename a job queue file.
882 # TODO: What if a file fails to rename?
883 return [backend.JobQueueRename(old, new) for old, new in params]
885 # hypervisor ---------------
888 def perspective_hypervisor_validate_params(params):
889 """Validate the hypervisor parameters.
892 (hvname, hvparams) = params
893 return backend.ValidateHVParams(hvname, hvparams)
898 def perspective_x509_cert_create(params):
899 """Creates a new X509 certificate for SSL/TLS.
902 (validity, ) = params
903 return backend.CreateX509Certificate(validity)
906 def perspective_x509_cert_remove(params):
907 """Removes a X509 certificate.
911 return backend.RemoveX509Certificate(name)
916 def perspective_import_start(params):
917 """Starts an import daemon.
920 (opts_s, instance, component, dest, dest_args) = params
922 opts = objects.ImportExportOptions.FromDict(opts_s)
924 return backend.StartImportExportDaemon(constants.IEM_IMPORT, opts,
926 objects.Instance.FromDict(instance),
928 _DecodeImportExportIO(dest,
932 def perspective_export_start(params):
933 """Starts an export daemon.
936 (opts_s, host, port, instance, component, source, source_args) = params
938 opts = objects.ImportExportOptions.FromDict(opts_s)
940 return backend.StartImportExportDaemon(constants.IEM_EXPORT, opts,
942 objects.Instance.FromDict(instance),
944 _DecodeImportExportIO(source,
948 def perspective_impexp_status(params):
949 """Retrieves the status of an import or export daemon.
952 return backend.GetImportExportStatus(params[0])
955 def perspective_impexp_abort(params):
956 """Aborts an import or export.
959 return backend.AbortImportExport(params[0])
962 def perspective_impexp_cleanup(params):
963 """Cleans up after an import or export.
966 return backend.CleanupImportExport(params[0])
969 def CheckNoded(_, args):
970 """Initial checks whether to run or exit with a failure.
973 if args: # noded doesn't take any arguments
974 print >> sys.stderr, ("Usage: %s [-f] [-d] [-p port] [-b ADDRESS]" %
976 sys.exit(constants.EXIT_FAILURE)
978 codecs.lookup("string-escape")
980 print >> sys.stderr, ("Can't load the string-escape code which is part"
981 " of the Python installation. Is your installation"
982 " complete/correct? Aborting.")
983 sys.exit(constants.EXIT_FAILURE)
986 def PrepNoded(options, _):
987 """Preparation node daemon function, executed with the PID file held.
991 request_executor_class = MlockallRequestExecutor
994 except errors.NoCtypesError:
995 logging.warning("Cannot set memory lock, ctypes module not found")
996 request_executor_class = http.server.HttpServerRequestExecutor
998 request_executor_class = http.server.HttpServerRequestExecutor
1000 # Read SSL certificate
1002 ssl_params = http.HttpSslParams(ssl_key_path=options.ssl_key,
1003 ssl_cert_path=options.ssl_cert)
1007 err = _PrepareQueueLock()
1009 # this might be some kind of file-system/permission error; while
1010 # this breaks the job queue functionality, we shouldn't prevent
1011 # startup of the whole node daemon because of this
1012 logging.critical("Can't init/verify the queue, proceeding anyway: %s", err)
1014 mainloop = daemon.Mainloop()
1015 server = NodeHttpServer(mainloop, options.bind_address, options.port,
1016 ssl_params=ssl_params, ssl_verify_peer=True,
1017 request_executor_class=request_executor_class)
1019 return (mainloop, server)
1022 def ExecNoded(options, args, prep_data): # pylint: disable=W0613
1023 """Main node daemon function, executed with the PID file held.
1026 (mainloop, server) = prep_data
1034 """Main function for the node daemon.
1037 parser = OptionParser(description="Ganeti node daemon",
1038 usage="%prog [-f] [-d] [-p port] [-b ADDRESS]",
1039 version="%%prog (ganeti) %s" %
1040 constants.RELEASE_VERSION)
1041 parser.add_option("--no-mlock", dest="mlock",
1042 help="Do not mlock the node memory in ram",
1043 default=True, action="store_false")
1045 daemon.GenericMain(constants.NODED, parser, CheckNoded, PrepNoded, ExecNoded,
1046 default_ssl_cert=constants.NODED_CERT_FILE,
1047 default_ssl_key=constants.NODED_CERT_FILE,
1048 console_logging=True)