4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Ganeti node daemon"""
24 # pylint: disable=C0103,W0142
26 # C0103: Functions in this module need to have a given name structure,
27 # and the name of the daemon doesn't match
29 # W0142: Used * or ** magic, since we do use it extensively in this
38 from optparse import OptionParser
40 from ganeti import backend
41 from ganeti import constants
42 from ganeti import objects
43 from ganeti import errors
44 from ganeti import jstore
45 from ganeti import daemon
46 from ganeti import http
47 from ganeti import utils
48 from ganeti import storage
49 from ganeti import serializer
50 from ganeti import netutils
52 import ganeti.http.server # pylint: disable=W0611
58 def _PrepareQueueLock():
59 """Try to prepare the queue lock.
61 @return: None for success, otherwise an exception object
64 global queue_lock # pylint: disable=W0603
66 if queue_lock is not None:
71 queue_lock = jstore.InitAndVerifyQueue(must_lock=False)
73 except EnvironmentError, err:
77 def _RequireJobQueueLock(fn):
78 """Decorator for job queue manipulating functions.
81 QUEUE_LOCK_TIMEOUT = 10
83 def wrapper(*args, **kwargs):
84 # Locking in exclusive, blocking mode because there could be several
85 # children running at the same time. Waiting up to 10 seconds.
86 if _PrepareQueueLock() is not None:
87 raise errors.JobQueueError("Job queue failed initialization,"
88 " cannot update jobs")
89 queue_lock.Exclusive(blocking=True, timeout=QUEUE_LOCK_TIMEOUT)
91 return fn(*args, **kwargs)
98 def _DecodeImportExportIO(ieio, ieioargs):
99 """Decodes import/export I/O information.
102 if ieio == constants.IEIO_RAW_DISK:
103 assert len(ieioargs) == 1
104 return (objects.Disk.FromDict(ieioargs[0]), )
106 if ieio == constants.IEIO_SCRIPT:
107 assert len(ieioargs) == 2
108 return (objects.Disk.FromDict(ieioargs[0]), ieioargs[1])
113 class MlockallRequestExecutor(http.server.HttpServerRequestExecutor):
114 """Custom Request Executor class that ensures NodeHttpServer children are
118 def __init__(self, *args, **kwargs):
121 http.server.HttpServerRequestExecutor.__init__(self, *args, **kwargs)
124 class NodeHttpServer(http.server.HttpServer):
125 """The server implementation.
127 This class holds all methods exposed over the RPC interface.
130 # too many public methods, and unused args - all methods get params
132 # pylint: disable=R0904,W0613
133 def __init__(self, *args, **kwargs):
134 http.server.HttpServer.__init__(self, *args, **kwargs)
135 self.noded_pid = os.getpid()
137 def HandleRequest(self, req):
141 if req.request_method.upper() != http.HTTP_PUT:
142 raise http.HttpBadRequest()
144 path = req.request_path
145 if path.startswith("/"):
148 method = getattr(self, "perspective_%s" % path, None)
150 raise http.HttpNotFound()
153 result = (True, method(serializer.LoadJson(req.request_body)))
155 except backend.RPCFail, err:
156 # our custom failure exception; str(err) works fine if the
157 # exception was constructed with a single argument, and in
158 # this case, err.message == err.args[0] == str(err)
159 result = (False, str(err))
160 except errors.QuitGanetiException, err:
161 # Tell parent to quit
162 logging.info("Shutting down the node daemon, arguments: %s",
164 os.kill(self.noded_pid, signal.SIGTERM)
165 # And return the error's arguments, which must be already in
166 # correct tuple format
168 except Exception, err:
169 logging.exception("Error in RPC call")
170 result = (False, "Error while executing backend function: %s" % str(err))
172 return serializer.DumpJson(result, indent=False)
174 # the new block devices --------------------------
177 def perspective_blockdev_create(params):
178 """Create a block device.
181 bdev_s, size, owner, on_primary, info = params
182 bdev = objects.Disk.FromDict(bdev_s)
184 raise ValueError("can't unserialize data!")
185 return backend.BlockdevCreate(bdev, size, owner, on_primary, info)
188 def perspective_blockdev_pause_resume_sync(params):
189 """Pause/resume sync of a block device.
192 disks_s, pause = params
193 disks = [objects.Disk.FromDict(bdev_s) for bdev_s in disks_s]
194 return backend.BlockdevPauseResumeSync(disks, pause)
197 def perspective_blockdev_wipe(params):
198 """Wipe a block device.
201 bdev_s, offset, size = params
202 bdev = objects.Disk.FromDict(bdev_s)
203 return backend.BlockdevWipe(bdev, offset, size)
206 def perspective_blockdev_remove(params):
207 """Remove a block device.
211 bdev = objects.Disk.FromDict(bdev_s)
212 return backend.BlockdevRemove(bdev)
215 def perspective_blockdev_rename(params):
216 """Remove a block device.
219 devlist = [(objects.Disk.FromDict(ds), uid) for ds, uid in params]
220 return backend.BlockdevRename(devlist)
223 def perspective_blockdev_assemble(params):
224 """Assemble a block device.
227 bdev_s, owner, on_primary, idx = params
228 bdev = objects.Disk.FromDict(bdev_s)
230 raise ValueError("can't unserialize data!")
231 return backend.BlockdevAssemble(bdev, owner, on_primary, idx)
234 def perspective_blockdev_shutdown(params):
235 """Shutdown a block device.
239 bdev = objects.Disk.FromDict(bdev_s)
241 raise ValueError("can't unserialize data!")
242 return backend.BlockdevShutdown(bdev)
245 def perspective_blockdev_addchildren(params):
246 """Add a child to a mirror device.
248 Note: this is only valid for mirror devices. It's the caller's duty
249 to send a correct disk, otherwise we raise an error.
252 bdev_s, ndev_s = params
253 bdev = objects.Disk.FromDict(bdev_s)
254 ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s]
255 if bdev is None or ndevs.count(None) > 0:
256 raise ValueError("can't unserialize data!")
257 return backend.BlockdevAddchildren(bdev, ndevs)
260 def perspective_blockdev_removechildren(params):
261 """Remove a child from a mirror device.
263 This is only valid for mirror devices, of course. It's the callers
264 duty to send a correct disk, otherwise we raise an error.
267 bdev_s, ndev_s = params
268 bdev = objects.Disk.FromDict(bdev_s)
269 ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s]
270 if bdev is None or ndevs.count(None) > 0:
271 raise ValueError("can't unserialize data!")
272 return backend.BlockdevRemovechildren(bdev, ndevs)
275 def perspective_blockdev_getmirrorstatus(params):
276 """Return the mirror status for a list of disks.
279 disks = [objects.Disk.FromDict(dsk_s)
281 return [status.ToDict()
282 for status in backend.BlockdevGetmirrorstatus(disks)]
285 def perspective_blockdev_getmirrorstatus_multi(params):
286 """Return the mirror status for a list of disks.
289 (node_disks, ) = params
291 node_name = netutils.Hostname.GetSysName()
293 disks = [objects.Disk.FromDict(dsk_s)
294 for dsk_s in node_disks.get(node_name, [])]
298 for (success, status) in backend.BlockdevGetmirrorstatusMulti(disks):
300 result.append((success, status.ToDict()))
302 result.append((success, status))
307 def perspective_blockdev_find(params):
308 """Expose the FindBlockDevice functionality for a disk.
310 This will try to find but not activate a disk.
313 disk = objects.Disk.FromDict(params[0])
315 result = backend.BlockdevFind(disk)
319 return result.ToDict()
322 def perspective_blockdev_snapshot(params):
323 """Create a snapshot device.
325 Note that this is only valid for LVM disks, if we get passed
326 something else we raise an exception. The snapshot device can be
327 remove by calling the generic block device remove call.
330 cfbd = objects.Disk.FromDict(params[0])
331 return backend.BlockdevSnapshot(cfbd)
334 def perspective_blockdev_grow(params):
335 """Grow a stack of devices.
338 cfbd = objects.Disk.FromDict(params[0])
341 return backend.BlockdevGrow(cfbd, amount, dryrun)
344 def perspective_blockdev_close(params):
345 """Closes the given block devices.
348 disks = [objects.Disk.FromDict(cf) for cf in params[1]]
349 return backend.BlockdevClose(params[0], disks)
352 def perspective_blockdev_getsize(params):
353 """Compute the sizes of the given block devices.
356 disks = [objects.Disk.FromDict(cf) for cf in params[0]]
357 return backend.BlockdevGetsize(disks)
360 def perspective_blockdev_export(params):
361 """Compute the sizes of the given block devices.
364 disk = objects.Disk.FromDict(params[0])
365 dest_node, dest_path, cluster_name = params[1:]
366 return backend.BlockdevExport(disk, dest_node, dest_path, cluster_name)
368 # blockdev/drbd specific methods ----------
371 def perspective_drbd_disconnect_net(params):
372 """Disconnects the network connection of drbd disks.
374 Note that this is only valid for drbd disks, so the members of the
375 disk list must all be drbd devices.
378 nodes_ip, disks = params
379 disks = [objects.Disk.FromDict(cf) for cf in disks]
380 return backend.DrbdDisconnectNet(nodes_ip, disks)
383 def perspective_drbd_attach_net(params):
384 """Attaches the network connection of drbd disks.
386 Note that this is only valid for drbd disks, so the members of the
387 disk list must all be drbd devices.
390 nodes_ip, disks, instance_name, multimaster = params
391 disks = [objects.Disk.FromDict(cf) for cf in disks]
392 return backend.DrbdAttachNet(nodes_ip, disks,
393 instance_name, multimaster)
396 def perspective_drbd_wait_sync(params):
397 """Wait until DRBD disks are synched.
399 Note that this is only valid for drbd disks, so the members of the
400 disk list must all be drbd devices.
403 nodes_ip, disks = params
404 disks = [objects.Disk.FromDict(cf) for cf in disks]
405 return backend.DrbdWaitSync(nodes_ip, disks)
408 def perspective_drbd_helper(params):
409 """Query drbd helper.
412 return backend.GetDrbdUsermodeHelper()
414 # export/import --------------------------
417 def perspective_finalize_export(params):
418 """Expose the finalize export functionality.
421 instance = objects.Instance.FromDict(params[0])
424 for disk in params[1]:
425 if isinstance(disk, bool):
426 snap_disks.append(disk)
428 snap_disks.append(objects.Disk.FromDict(disk))
430 return backend.FinalizeExport(instance, snap_disks)
433 def perspective_export_info(params):
434 """Query information about an existing export on this node.
436 The given path may not contain an export, in which case we return
441 return backend.ExportInfo(path)
444 def perspective_export_list(params):
445 """List the available exports on this node.
447 Note that as opposed to export_info, which may query data about an
448 export in any path, this only queries the standard Ganeti path
449 (constants.EXPORT_DIR).
452 return backend.ListExports()
455 def perspective_export_remove(params):
460 return backend.RemoveExport(export)
462 # block device ---------------------
464 def perspective_bdev_sizes(params):
465 """Query the list of block devices
469 return backend.GetBlockDevSizes(devices)
471 # volume --------------------------
474 def perspective_lv_list(params):
475 """Query the list of logical volumes in a given volume group.
479 return backend.GetVolumeList(vgname)
482 def perspective_vg_list(params):
483 """Query the list of volume groups.
486 return backend.ListVolumeGroups()
488 # Storage --------------------------
491 def perspective_storage_list(params):
492 """Get list of storage units.
495 (su_name, su_args, name, fields) = params
496 return storage.GetStorage(su_name, *su_args).List(name, fields)
499 def perspective_storage_modify(params):
500 """Modify a storage unit.
503 (su_name, su_args, name, changes) = params
504 return storage.GetStorage(su_name, *su_args).Modify(name, changes)
507 def perspective_storage_execute(params):
508 """Execute an operation on a storage unit.
511 (su_name, su_args, name, op) = params
512 return storage.GetStorage(su_name, *su_args).Execute(name, op)
514 # bridge --------------------------
517 def perspective_bridges_exist(params):
518 """Check if all bridges given exist on this node.
521 bridges_list = params[0]
522 return backend.BridgesExist(bridges_list)
524 # instance --------------------------
527 def perspective_instance_os_add(params):
528 """Install an OS on a given instance.
532 inst = objects.Instance.FromDict(inst_s)
533 reinstall = params[1]
535 return backend.InstanceOsAdd(inst, reinstall, debug)
538 def perspective_instance_run_rename(params):
539 """Runs the OS rename script for an instance.
542 inst_s, old_name, debug = params
543 inst = objects.Instance.FromDict(inst_s)
544 return backend.RunRenameInstance(inst, old_name, debug)
547 def perspective_instance_shutdown(params):
548 """Shutdown an instance.
551 instance = objects.Instance.FromDict(params[0])
553 return backend.InstanceShutdown(instance, timeout)
556 def perspective_instance_start(params):
557 """Start an instance.
560 (instance_name, startup_paused) = params
561 instance = objects.Instance.FromDict(instance_name)
562 return backend.StartInstance(instance, startup_paused)
565 def perspective_migration_info(params):
566 """Gather information about an instance to be migrated.
569 instance = objects.Instance.FromDict(params[0])
570 return backend.MigrationInfo(instance)
573 def perspective_accept_instance(params):
574 """Prepare the node to accept an instance.
577 instance, info, target = params
578 instance = objects.Instance.FromDict(instance)
579 return backend.AcceptInstance(instance, info, target)
582 def perspective_finalize_migration(params):
583 """Finalize the instance migration.
586 instance, info, success = params
587 instance = objects.Instance.FromDict(instance)
588 return backend.FinalizeMigration(instance, info, success)
591 def perspective_instance_migrate(params):
592 """Migrates an instance.
595 instance, target, live = params
596 instance = objects.Instance.FromDict(instance)
597 return backend.MigrateInstance(instance, target, live)
600 def perspective_instance_reboot(params):
601 """Reboot an instance.
604 instance = objects.Instance.FromDict(params[0])
605 reboot_type = params[1]
606 shutdown_timeout = params[2]
607 return backend.InstanceReboot(instance, reboot_type, shutdown_timeout)
610 def perspective_instance_info(params):
611 """Query instance information.
614 return backend.GetInstanceInfo(params[0], params[1])
617 def perspective_instance_migratable(params):
618 """Query whether the specified instance can be migrated.
621 instance = objects.Instance.FromDict(params[0])
622 return backend.GetInstanceMigratable(instance)
625 def perspective_all_instances_info(params):
626 """Query information about all instances.
629 return backend.GetAllInstancesInfo(params[0])
632 def perspective_instance_list(params):
633 """Query the list of running instances.
636 return backend.GetInstanceList(params[0])
638 # node --------------------------
641 def perspective_node_tcp_ping(params):
642 """Do a TcpPing on the remote node.
645 return netutils.TcpPing(params[1], params[2], timeout=params[3],
646 live_port_needed=params[4], source=params[0])
649 def perspective_node_has_ip_address(params):
650 """Checks if a node has the given ip address.
653 return netutils.IPAddress.Own(params[0])
656 def perspective_node_info(params):
657 """Query node information.
660 vgname, hypervisor_type = params
661 return backend.GetNodeInfo(vgname, hypervisor_type)
664 def perspective_etc_hosts_modify(params):
665 """Modify a node entry in /etc/hosts.
668 backend.EtcHostsModify(params[0], params[1], params[2])
673 def perspective_node_verify(params):
674 """Run a verify sequence on this node.
677 return backend.VerifyNode(params[0], params[1])
680 def perspective_node_start_master_daemons(params):
681 """Start the master daemons on this node.
684 return backend.StartMasterDaemons(params[0])
687 def perspective_node_activate_master_ip(params):
688 """Activate the master IP on this node.
691 return backend.ActivateMasterIp()
694 def perspective_node_deactivate_master_ip(params):
695 """Deactivate the master IP on this node.
698 return backend.DeactivateMasterIp()
701 def perspective_node_stop_master(params):
702 """Deactivate the master IP and stops master daemons on this node.
704 Sometimes both operations need to be executed at the same time (doing one of
705 the two would make impossible to do the other one).
708 backend.DeactivateMasterIp()
709 return backend.StopMasterDaemons()
712 def perspective_node_leave_cluster(params):
713 """Cleanup after leaving a cluster.
716 return backend.LeaveCluster(params[0])
719 def perspective_node_volumes(params):
720 """Query the list of all logical volume groups.
723 return backend.NodeVolumes()
726 def perspective_node_demote_from_mc(params):
727 """Demote a node from the master candidate role.
730 return backend.DemoteFromMC()
733 def perspective_node_powercycle(params):
734 """Tries to powercycle the nod.
737 hypervisor_type = params[0]
738 return backend.PowercycleNode(hypervisor_type)
740 # cluster --------------------------
743 def perspective_version(params):
744 """Query version information.
747 return constants.PROTOCOL_VERSION
750 def perspective_upload_file(params):
753 Note that the backend implementation imposes strict rules on which
757 return backend.UploadFile(*params)
760 def perspective_master_info(params):
761 """Query master information.
764 return backend.GetMasterInfo()
767 def perspective_run_oob(params):
771 output = backend.RunOob(params[0], params[1], params[2], params[3])
773 result = serializer.LoadJson(output)
779 def perspective_write_ssconf_files(params):
780 """Write ssconf files.
784 return backend.WriteSsconfFiles(values)
786 # os -----------------------
789 def perspective_os_diagnose(params):
790 """Query detailed information about existing OSes.
793 return backend.DiagnoseOS()
796 def perspective_os_get(params):
797 """Query information about a given OS.
801 os_obj = backend.OSFromDisk(name)
802 return os_obj.ToDict()
805 def perspective_os_validate(params):
806 """Run a given OS' validation routine.
809 required, name, checks, params = params
810 return backend.ValidateOS(required, name, checks, params)
812 # hooks -----------------------
815 def perspective_hooks_runner(params):
819 hpath, phase, env = params
820 hr = backend.HooksRunner()
821 return hr.RunHooks(hpath, phase, env)
823 # iallocator -----------------
826 def perspective_iallocator_runner(params):
827 """Run an iallocator script.
831 iar = backend.IAllocatorRunner()
832 return iar.Run(name, idata)
834 # test -----------------------
837 def perspective_test_delay(params):
842 status, rval = utils.TestDelay(duration)
844 raise backend.RPCFail(rval)
847 # file storage ---------------
850 def perspective_file_storage_dir_create(params):
851 """Create the file storage directory.
854 file_storage_dir = params[0]
855 return backend.CreateFileStorageDir(file_storage_dir)
858 def perspective_file_storage_dir_remove(params):
859 """Remove the file storage directory.
862 file_storage_dir = params[0]
863 return backend.RemoveFileStorageDir(file_storage_dir)
866 def perspective_file_storage_dir_rename(params):
867 """Rename the file storage directory.
870 old_file_storage_dir = params[0]
871 new_file_storage_dir = params[1]
872 return backend.RenameFileStorageDir(old_file_storage_dir,
873 new_file_storage_dir)
875 # jobs ------------------------
878 @_RequireJobQueueLock
879 def perspective_jobqueue_update(params):
883 (file_name, content) = params
884 return backend.JobQueueUpdate(file_name, content)
887 @_RequireJobQueueLock
888 def perspective_jobqueue_purge(params):
892 return backend.JobQueuePurge()
895 @_RequireJobQueueLock
896 def perspective_jobqueue_rename(params):
897 """Rename a job queue file.
900 # TODO: What if a file fails to rename?
901 return [backend.JobQueueRename(old, new) for old, new in params]
903 # hypervisor ---------------
906 def perspective_hypervisor_validate_params(params):
907 """Validate the hypervisor parameters.
910 (hvname, hvparams) = params
911 return backend.ValidateHVParams(hvname, hvparams)
916 def perspective_x509_cert_create(params):
917 """Creates a new X509 certificate for SSL/TLS.
920 (validity, ) = params
921 return backend.CreateX509Certificate(validity)
924 def perspective_x509_cert_remove(params):
925 """Removes a X509 certificate.
929 return backend.RemoveX509Certificate(name)
934 def perspective_import_start(params):
935 """Starts an import daemon.
938 (opts_s, instance, component, dest, dest_args) = params
940 opts = objects.ImportExportOptions.FromDict(opts_s)
942 return backend.StartImportExportDaemon(constants.IEM_IMPORT, opts,
944 objects.Instance.FromDict(instance),
946 _DecodeImportExportIO(dest,
950 def perspective_export_start(params):
951 """Starts an export daemon.
954 (opts_s, host, port, instance, component, source, source_args) = params
956 opts = objects.ImportExportOptions.FromDict(opts_s)
958 return backend.StartImportExportDaemon(constants.IEM_EXPORT, opts,
960 objects.Instance.FromDict(instance),
962 _DecodeImportExportIO(source,
966 def perspective_impexp_status(params):
967 """Retrieves the status of an import or export daemon.
970 return backend.GetImportExportStatus(params[0])
973 def perspective_impexp_abort(params):
974 """Aborts an import or export.
977 return backend.AbortImportExport(params[0])
980 def perspective_impexp_cleanup(params):
981 """Cleans up after an import or export.
984 return backend.CleanupImportExport(params[0])
987 def CheckNoded(_, args):
988 """Initial checks whether to run or exit with a failure.
991 if args: # noded doesn't take any arguments
992 print >> sys.stderr, ("Usage: %s [-f] [-d] [-p port] [-b ADDRESS]" %
994 sys.exit(constants.EXIT_FAILURE)
996 codecs.lookup("string-escape")
998 print >> sys.stderr, ("Can't load the string-escape code which is part"
999 " of the Python installation. Is your installation"
1000 " complete/correct? Aborting.")
1001 sys.exit(constants.EXIT_FAILURE)
1004 def PrepNoded(options, _):
1005 """Preparation node daemon function, executed with the PID file held.
1009 request_executor_class = MlockallRequestExecutor
1012 except errors.NoCtypesError:
1013 logging.warning("Cannot set memory lock, ctypes module not found")
1014 request_executor_class = http.server.HttpServerRequestExecutor
1016 request_executor_class = http.server.HttpServerRequestExecutor
1018 # Read SSL certificate
1020 ssl_params = http.HttpSslParams(ssl_key_path=options.ssl_key,
1021 ssl_cert_path=options.ssl_cert)
1025 err = _PrepareQueueLock()
1027 # this might be some kind of file-system/permission error; while
1028 # this breaks the job queue functionality, we shouldn't prevent
1029 # startup of the whole node daemon because of this
1030 logging.critical("Can't init/verify the queue, proceeding anyway: %s", err)
1032 mainloop = daemon.Mainloop()
1033 server = NodeHttpServer(mainloop, options.bind_address, options.port,
1034 ssl_params=ssl_params, ssl_verify_peer=True,
1035 request_executor_class=request_executor_class)
1037 return (mainloop, server)
1040 def ExecNoded(options, args, prep_data): # pylint: disable=W0613
1041 """Main node daemon function, executed with the PID file held.
1044 (mainloop, server) = prep_data
1052 """Main function for the node daemon.
1055 parser = OptionParser(description="Ganeti node daemon",
1056 usage="%prog [-f] [-d] [-p port] [-b ADDRESS]",
1057 version="%%prog (ganeti) %s" %
1058 constants.RELEASE_VERSION)
1059 parser.add_option("--no-mlock", dest="mlock",
1060 help="Do not mlock the node memory in ram",
1061 default=True, action="store_false")
1063 daemon.GenericMain(constants.NODED, parser, CheckNoded, PrepNoded, ExecNoded,
1064 default_ssl_cert=constants.NODED_CERT_FILE,
1065 default_ssl_key=constants.NODED_CERT_FILE,
1066 console_logging=True)