4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Ganeti node daemon"""
24 # pylint: disable=C0103,W0142
26 # C0103: Functions in this module need to have a given name structure,
27 # and the name of the daemon doesn't match
29 # W0142: Used * or ** magic, since we do use it extensively in this
38 from optparse import OptionParser
40 from ganeti import backend
41 from ganeti import constants
42 from ganeti import objects
43 from ganeti import errors
44 from ganeti import jstore
45 from ganeti import daemon
46 from ganeti import http
47 from ganeti import utils
48 from ganeti import storage
49 from ganeti import serializer
50 from ganeti import netutils
52 import ganeti.http.server # pylint: disable=W0611
58 def _PrepareQueueLock():
59 """Try to prepare the queue lock.
61 @return: None for success, otherwise an exception object
64 global queue_lock # pylint: disable=W0603
66 if queue_lock is not None:
71 queue_lock = jstore.InitAndVerifyQueue(must_lock=False)
73 except EnvironmentError, err:
77 def _RequireJobQueueLock(fn):
78 """Decorator for job queue manipulating functions.
81 QUEUE_LOCK_TIMEOUT = 10
83 def wrapper(*args, **kwargs):
84 # Locking in exclusive, blocking mode because there could be several
85 # children running at the same time. Waiting up to 10 seconds.
86 if _PrepareQueueLock() is not None:
87 raise errors.JobQueueError("Job queue failed initialization,"
88 " cannot update jobs")
89 queue_lock.Exclusive(blocking=True, timeout=QUEUE_LOCK_TIMEOUT)
91 return fn(*args, **kwargs)
98 def _DecodeImportExportIO(ieio, ieioargs):
99 """Decodes import/export I/O information.
102 if ieio == constants.IEIO_RAW_DISK:
103 assert len(ieioargs) == 1
104 return (objects.Disk.FromDict(ieioargs[0]), )
106 if ieio == constants.IEIO_SCRIPT:
107 assert len(ieioargs) == 2
108 return (objects.Disk.FromDict(ieioargs[0]), ieioargs[1])
113 class MlockallRequestExecutor(http.server.HttpServerRequestExecutor):
114 """Custom Request Executor class that ensures NodeHttpServer children are
118 def __init__(self, *args, **kwargs):
121 http.server.HttpServerRequestExecutor.__init__(self, *args, **kwargs)
124 class NodeHttpServer(http.server.HttpServer):
125 """The server implementation.
127 This class holds all methods exposed over the RPC interface.
130 # too many public methods, and unused args - all methods get params
132 # pylint: disable=R0904,W0613
133 def __init__(self, *args, **kwargs):
134 http.server.HttpServer.__init__(self, *args, **kwargs)
135 self.noded_pid = os.getpid()
137 def HandleRequest(self, req):
141 if req.request_method.upper() != http.HTTP_PUT:
142 raise http.HttpBadRequest()
144 path = req.request_path
145 if path.startswith("/"):
148 method = getattr(self, "perspective_%s" % path, None)
150 raise http.HttpNotFound()
153 result = (True, method(serializer.LoadJson(req.request_body)))
155 except backend.RPCFail, err:
156 # our custom failure exception; str(err) works fine if the
157 # exception was constructed with a single argument, and in
158 # this case, err.message == err.args[0] == str(err)
159 result = (False, str(err))
160 except errors.QuitGanetiException, err:
161 # Tell parent to quit
162 logging.info("Shutting down the node daemon, arguments: %s",
164 os.kill(self.noded_pid, signal.SIGTERM)
165 # And return the error's arguments, which must be already in
166 # correct tuple format
168 except Exception, err:
169 logging.exception("Error in RPC call")
170 result = (False, "Error while executing backend function: %s" % str(err))
172 return serializer.DumpJson(result, indent=False)
174 # the new block devices --------------------------
177 def perspective_blockdev_create(params):
178 """Create a block device.
181 bdev_s, size, owner, on_primary, info = params
182 bdev = objects.Disk.FromDict(bdev_s)
184 raise ValueError("can't unserialize data!")
185 return backend.BlockdevCreate(bdev, size, owner, on_primary, info)
188 def perspective_blockdev_pause_resume_sync(params):
189 """Pause/resume sync of a block device.
192 disks_s, pause = params
193 disks = [objects.Disk.FromDict(bdev_s) for bdev_s in disks_s]
194 return backend.BlockdevPauseResumeSync(disks, pause)
197 def perspective_blockdev_wipe(params):
198 """Wipe a block device.
201 bdev_s, offset, size = params
202 bdev = objects.Disk.FromDict(bdev_s)
203 return backend.BlockdevWipe(bdev, offset, size)
206 def perspective_blockdev_remove(params):
207 """Remove a block device.
211 bdev = objects.Disk.FromDict(bdev_s)
212 return backend.BlockdevRemove(bdev)
215 def perspective_blockdev_rename(params):
216 """Remove a block device.
219 devlist = [(objects.Disk.FromDict(ds), uid) for ds, uid in params[0]]
220 return backend.BlockdevRename(devlist)
223 def perspective_blockdev_assemble(params):
224 """Assemble a block device.
227 bdev_s, owner, on_primary, idx = params
228 bdev = objects.Disk.FromDict(bdev_s)
230 raise ValueError("can't unserialize data!")
231 return backend.BlockdevAssemble(bdev, owner, on_primary, idx)
234 def perspective_blockdev_shutdown(params):
235 """Shutdown a block device.
239 bdev = objects.Disk.FromDict(bdev_s)
241 raise ValueError("can't unserialize data!")
242 return backend.BlockdevShutdown(bdev)
245 def perspective_blockdev_addchildren(params):
246 """Add a child to a mirror device.
248 Note: this is only valid for mirror devices. It's the caller's duty
249 to send a correct disk, otherwise we raise an error.
252 bdev_s, ndev_s = params
253 bdev = objects.Disk.FromDict(bdev_s)
254 ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s]
255 if bdev is None or ndevs.count(None) > 0:
256 raise ValueError("can't unserialize data!")
257 return backend.BlockdevAddchildren(bdev, ndevs)
260 def perspective_blockdev_removechildren(params):
261 """Remove a child from a mirror device.
263 This is only valid for mirror devices, of course. It's the callers
264 duty to send a correct disk, otherwise we raise an error.
267 bdev_s, ndev_s = params
268 bdev = objects.Disk.FromDict(bdev_s)
269 ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s]
270 if bdev is None or ndevs.count(None) > 0:
271 raise ValueError("can't unserialize data!")
272 return backend.BlockdevRemovechildren(bdev, ndevs)
275 def perspective_blockdev_getmirrorstatus(params):
276 """Return the mirror status for a list of disks.
279 disks = [objects.Disk.FromDict(dsk_s)
280 for dsk_s in params[0]]
281 return [status.ToDict()
282 for status in backend.BlockdevGetmirrorstatus(disks)]
285 def perspective_blockdev_getmirrorstatus_multi(params):
286 """Return the mirror status for a list of disks.
289 (node_disks, ) = params
291 node_name = netutils.Hostname.GetSysName()
293 disks = [objects.Disk.FromDict(dsk_s)
294 for dsk_s in node_disks.get(node_name, [])]
298 for (success, status) in backend.BlockdevGetmirrorstatusMulti(disks):
300 result.append((success, status.ToDict()))
302 result.append((success, status))
307 def perspective_blockdev_find(params):
308 """Expose the FindBlockDevice functionality for a disk.
310 This will try to find but not activate a disk.
313 disk = objects.Disk.FromDict(params[0])
315 result = backend.BlockdevFind(disk)
319 return result.ToDict()
322 def perspective_blockdev_snapshot(params):
323 """Create a snapshot device.
325 Note that this is only valid for LVM disks, if we get passed
326 something else we raise an exception. The snapshot device can be
327 remove by calling the generic block device remove call.
330 cfbd = objects.Disk.FromDict(params[0])
331 return backend.BlockdevSnapshot(cfbd)
334 def perspective_blockdev_grow(params):
335 """Grow a stack of devices.
338 cfbd = objects.Disk.FromDict(params[0])
341 return backend.BlockdevGrow(cfbd, amount, dryrun)
344 def perspective_blockdev_close(params):
345 """Closes the given block devices.
348 disks = [objects.Disk.FromDict(cf) for cf in params[1]]
349 return backend.BlockdevClose(params[0], disks)
352 def perspective_blockdev_getsize(params):
353 """Compute the sizes of the given block devices.
356 disks = [objects.Disk.FromDict(cf) for cf in params[0]]
357 return backend.BlockdevGetsize(disks)
360 def perspective_blockdev_export(params):
361 """Compute the sizes of the given block devices.
364 disk = objects.Disk.FromDict(params[0])
365 dest_node, dest_path, cluster_name = params[1:]
366 return backend.BlockdevExport(disk, dest_node, dest_path, cluster_name)
368 # blockdev/drbd specific methods ----------
371 def perspective_drbd_disconnect_net(params):
372 """Disconnects the network connection of drbd disks.
374 Note that this is only valid for drbd disks, so the members of the
375 disk list must all be drbd devices.
378 nodes_ip, disks = params
379 disks = [objects.Disk.FromDict(cf) for cf in disks]
380 return backend.DrbdDisconnectNet(nodes_ip, disks)
383 def perspective_drbd_attach_net(params):
384 """Attaches the network connection of drbd disks.
386 Note that this is only valid for drbd disks, so the members of the
387 disk list must all be drbd devices.
390 nodes_ip, disks, instance_name, multimaster = params
391 disks = [objects.Disk.FromDict(cf) for cf in disks]
392 return backend.DrbdAttachNet(nodes_ip, disks,
393 instance_name, multimaster)
396 def perspective_drbd_wait_sync(params):
397 """Wait until DRBD disks are synched.
399 Note that this is only valid for drbd disks, so the members of the
400 disk list must all be drbd devices.
403 nodes_ip, disks = params
404 disks = [objects.Disk.FromDict(cf) for cf in disks]
405 return backend.DrbdWaitSync(nodes_ip, disks)
408 def perspective_drbd_helper(params):
409 """Query drbd helper.
412 return backend.GetDrbdUsermodeHelper()
414 # export/import --------------------------
417 def perspective_finalize_export(params):
418 """Expose the finalize export functionality.
421 instance = objects.Instance.FromDict(params[0])
424 for disk in params[1]:
425 if isinstance(disk, bool):
426 snap_disks.append(disk)
428 snap_disks.append(objects.Disk.FromDict(disk))
430 return backend.FinalizeExport(instance, snap_disks)
433 def perspective_export_info(params):
434 """Query information about an existing export on this node.
436 The given path may not contain an export, in which case we return
441 return backend.ExportInfo(path)
444 def perspective_export_list(params):
445 """List the available exports on this node.
447 Note that as opposed to export_info, which may query data about an
448 export in any path, this only queries the standard Ganeti path
449 (constants.EXPORT_DIR).
452 return backend.ListExports()
455 def perspective_export_remove(params):
460 return backend.RemoveExport(export)
462 # block device ---------------------
464 def perspective_bdev_sizes(params):
465 """Query the list of block devices
469 return backend.GetBlockDevSizes(devices)
471 # volume --------------------------
474 def perspective_lv_list(params):
475 """Query the list of logical volumes in a given volume group.
479 return backend.GetVolumeList(vgname)
482 def perspective_vg_list(params):
483 """Query the list of volume groups.
486 return backend.ListVolumeGroups()
488 # Storage --------------------------
491 def perspective_storage_list(params):
492 """Get list of storage units.
495 (su_name, su_args, name, fields) = params
496 return storage.GetStorage(su_name, *su_args).List(name, fields)
499 def perspective_storage_modify(params):
500 """Modify a storage unit.
503 (su_name, su_args, name, changes) = params
504 return storage.GetStorage(su_name, *su_args).Modify(name, changes)
507 def perspective_storage_execute(params):
508 """Execute an operation on a storage unit.
511 (su_name, su_args, name, op) = params
512 return storage.GetStorage(su_name, *su_args).Execute(name, op)
514 # bridge --------------------------
517 def perspective_bridges_exist(params):
518 """Check if all bridges given exist on this node.
521 bridges_list = params[0]
522 return backend.BridgesExist(bridges_list)
524 # instance --------------------------
527 def perspective_instance_os_add(params):
528 """Install an OS on a given instance.
532 inst = objects.Instance.FromDict(inst_s)
533 reinstall = params[1]
535 return backend.InstanceOsAdd(inst, reinstall, debug)
538 def perspective_instance_run_rename(params):
539 """Runs the OS rename script for an instance.
542 inst_s, old_name, debug = params
543 inst = objects.Instance.FromDict(inst_s)
544 return backend.RunRenameInstance(inst, old_name, debug)
547 def perspective_instance_shutdown(params):
548 """Shutdown an instance.
551 instance = objects.Instance.FromDict(params[0])
553 return backend.InstanceShutdown(instance, timeout)
556 def perspective_instance_start(params):
557 """Start an instance.
560 (instance_name, startup_paused) = params
561 instance = objects.Instance.FromDict(instance_name)
562 return backend.StartInstance(instance, startup_paused)
565 def perspective_migration_info(params):
566 """Gather information about an instance to be migrated.
569 instance = objects.Instance.FromDict(params[0])
570 return backend.MigrationInfo(instance)
573 def perspective_accept_instance(params):
574 """Prepare the node to accept an instance.
577 instance, info, target = params
578 instance = objects.Instance.FromDict(instance)
579 return backend.AcceptInstance(instance, info, target)
582 def perspective_instance_finalize_migration_dst(params):
583 """Finalize the instance migration on the destination node.
586 instance, info, success = params
587 instance = objects.Instance.FromDict(instance)
588 return backend.FinalizeMigrationDst(instance, info, success)
591 def perspective_instance_migrate(params):
592 """Migrates an instance.
595 instance, target, live = params
596 instance = objects.Instance.FromDict(instance)
597 return backend.MigrateInstance(instance, target, live)
600 def perspective_instance_finalize_migration_src(params):
601 """Finalize the instance migration on the source node.
604 instance, success, live = params
605 instance = objects.Instance.FromDict(instance)
606 return backend.FinalizeMigrationSource(instance, success, live)
609 def perspective_instance_get_migration_status(params):
610 """Reports migration status.
613 instance = objects.Instance.FromDict(params[0])
614 return backend.GetMigrationStatus(instance).ToDict()
617 def perspective_instance_reboot(params):
618 """Reboot an instance.
621 instance = objects.Instance.FromDict(params[0])
622 reboot_type = params[1]
623 shutdown_timeout = params[2]
624 return backend.InstanceReboot(instance, reboot_type, shutdown_timeout)
627 def perspective_instance_info(params):
628 """Query instance information.
631 return backend.GetInstanceInfo(params[0], params[1])
634 def perspective_instance_migratable(params):
635 """Query whether the specified instance can be migrated.
638 instance = objects.Instance.FromDict(params[0])
639 return backend.GetInstanceMigratable(instance)
642 def perspective_all_instances_info(params):
643 """Query information about all instances.
646 return backend.GetAllInstancesInfo(params[0])
649 def perspective_instance_list(params):
650 """Query the list of running instances.
653 return backend.GetInstanceList(params[0])
655 # node --------------------------
658 def perspective_node_has_ip_address(params):
659 """Checks if a node has the given ip address.
662 return netutils.IPAddress.Own(params[0])
665 def perspective_node_info(params):
666 """Query node information.
669 vgname, hypervisor_type = params
670 return backend.GetNodeInfo(vgname, hypervisor_type)
673 def perspective_etc_hosts_modify(params):
674 """Modify a node entry in /etc/hosts.
677 backend.EtcHostsModify(params[0], params[1], params[2])
682 def perspective_node_verify(params):
683 """Run a verify sequence on this node.
686 return backend.VerifyNode(params[0], params[1])
689 def perspective_node_start_master_daemons(params):
690 """Start the master daemons on this node.
693 return backend.StartMasterDaemons(params[0])
696 def perspective_node_activate_master_ip(params):
697 """Activate the master IP on this node.
700 return backend.ActivateMasterIp(params[0], params[1], params[2], params[3])
703 def perspective_node_deactivate_master_ip(params):
704 """Deactivate the master IP on this node.
707 return backend.DeactivateMasterIp(params[0], params[1], params[2])
710 def perspective_node_stop_master(params):
711 """Stops master daemons on this node.
714 return backend.StopMasterDaemons()
717 def perspective_node_change_master_netmask(params):
718 """Change the master IP netmask.
721 return backend.ChangeMasterNetmask(params[0])
724 def perspective_node_leave_cluster(params):
725 """Cleanup after leaving a cluster.
728 return backend.LeaveCluster(params[0])
731 def perspective_node_volumes(params):
732 """Query the list of all logical volume groups.
735 return backend.NodeVolumes()
738 def perspective_node_demote_from_mc(params):
739 """Demote a node from the master candidate role.
742 return backend.DemoteFromMC()
745 def perspective_node_powercycle(params):
746 """Tries to powercycle the nod.
749 hypervisor_type = params[0]
750 return backend.PowercycleNode(hypervisor_type)
752 # cluster --------------------------
755 def perspective_version(params):
756 """Query version information.
759 return constants.PROTOCOL_VERSION
762 def perspective_upload_file(params):
765 Note that the backend implementation imposes strict rules on which
769 return backend.UploadFile(*(params[0]))
772 def perspective_master_info(params):
773 """Query master information.
776 return backend.GetMasterInfo()
779 def perspective_run_oob(params):
783 output = backend.RunOob(params[0], params[1], params[2], params[3])
785 result = serializer.LoadJson(output)
791 def perspective_write_ssconf_files(params):
792 """Write ssconf files.
796 return backend.WriteSsconfFiles(values)
798 # os -----------------------
801 def perspective_os_diagnose(params):
802 """Query detailed information about existing OSes.
805 return backend.DiagnoseOS()
808 def perspective_os_get(params):
809 """Query information about a given OS.
813 os_obj = backend.OSFromDisk(name)
814 return os_obj.ToDict()
817 def perspective_os_validate(params):
818 """Run a given OS' validation routine.
821 required, name, checks, params = params
822 return backend.ValidateOS(required, name, checks, params)
824 # hooks -----------------------
827 def perspective_hooks_runner(params):
831 hpath, phase, env = params
832 hr = backend.HooksRunner()
833 return hr.RunHooks(hpath, phase, env)
835 # iallocator -----------------
838 def perspective_iallocator_runner(params):
839 """Run an iallocator script.
843 iar = backend.IAllocatorRunner()
844 return iar.Run(name, idata)
846 # test -----------------------
849 def perspective_test_delay(params):
854 status, rval = utils.TestDelay(duration)
856 raise backend.RPCFail(rval)
859 # file storage ---------------
862 def perspective_file_storage_dir_create(params):
863 """Create the file storage directory.
866 file_storage_dir = params[0]
867 return backend.CreateFileStorageDir(file_storage_dir)
870 def perspective_file_storage_dir_remove(params):
871 """Remove the file storage directory.
874 file_storage_dir = params[0]
875 return backend.RemoveFileStorageDir(file_storage_dir)
878 def perspective_file_storage_dir_rename(params):
879 """Rename the file storage directory.
882 old_file_storage_dir = params[0]
883 new_file_storage_dir = params[1]
884 return backend.RenameFileStorageDir(old_file_storage_dir,
885 new_file_storage_dir)
887 # jobs ------------------------
890 @_RequireJobQueueLock
891 def perspective_jobqueue_update(params):
895 (file_name, content) = params
896 return backend.JobQueueUpdate(file_name, content)
899 @_RequireJobQueueLock
900 def perspective_jobqueue_purge(params):
904 return backend.JobQueuePurge()
907 @_RequireJobQueueLock
908 def perspective_jobqueue_rename(params):
909 """Rename a job queue file.
912 # TODO: What if a file fails to rename?
913 return [backend.JobQueueRename(old, new) for old, new in params[0]]
915 # hypervisor ---------------
918 def perspective_hypervisor_validate_params(params):
919 """Validate the hypervisor parameters.
922 (hvname, hvparams) = params
923 return backend.ValidateHVParams(hvname, hvparams)
928 def perspective_x509_cert_create(params):
929 """Creates a new X509 certificate for SSL/TLS.
932 (validity, ) = params
933 return backend.CreateX509Certificate(validity)
936 def perspective_x509_cert_remove(params):
937 """Removes a X509 certificate.
941 return backend.RemoveX509Certificate(name)
946 def perspective_import_start(params):
947 """Starts an import daemon.
950 (opts_s, instance, component, (dest, dest_args)) = params
952 opts = objects.ImportExportOptions.FromDict(opts_s)
954 return backend.StartImportExportDaemon(constants.IEM_IMPORT, opts,
956 objects.Instance.FromDict(instance),
958 _DecodeImportExportIO(dest,
962 def perspective_export_start(params):
963 """Starts an export daemon.
966 (opts_s, host, port, instance, component, (source, source_args)) = params
968 opts = objects.ImportExportOptions.FromDict(opts_s)
970 return backend.StartImportExportDaemon(constants.IEM_EXPORT, opts,
972 objects.Instance.FromDict(instance),
974 _DecodeImportExportIO(source,
978 def perspective_impexp_status(params):
979 """Retrieves the status of an import or export daemon.
982 return backend.GetImportExportStatus(params[0])
985 def perspective_impexp_abort(params):
986 """Aborts an import or export.
989 return backend.AbortImportExport(params[0])
992 def perspective_impexp_cleanup(params):
993 """Cleans up after an import or export.
996 return backend.CleanupImportExport(params[0])
999 def CheckNoded(_, args):
1000 """Initial checks whether to run or exit with a failure.
1003 if args: # noded doesn't take any arguments
1004 print >> sys.stderr, ("Usage: %s [-f] [-d] [-p port] [-b ADDRESS]" %
1006 sys.exit(constants.EXIT_FAILURE)
1008 codecs.lookup("string-escape")
1010 print >> sys.stderr, ("Can't load the string-escape code which is part"
1011 " of the Python installation. Is your installation"
1012 " complete/correct? Aborting.")
1013 sys.exit(constants.EXIT_FAILURE)
1016 def PrepNoded(options, _):
1017 """Preparation node daemon function, executed with the PID file held.
1021 request_executor_class = MlockallRequestExecutor
1024 except errors.NoCtypesError:
1025 logging.warning("Cannot set memory lock, ctypes module not found")
1026 request_executor_class = http.server.HttpServerRequestExecutor
1028 request_executor_class = http.server.HttpServerRequestExecutor
1030 # Read SSL certificate
1032 ssl_params = http.HttpSslParams(ssl_key_path=options.ssl_key,
1033 ssl_cert_path=options.ssl_cert)
1037 err = _PrepareQueueLock()
1039 # this might be some kind of file-system/permission error; while
1040 # this breaks the job queue functionality, we shouldn't prevent
1041 # startup of the whole node daemon because of this
1042 logging.critical("Can't init/verify the queue, proceeding anyway: %s", err)
1044 mainloop = daemon.Mainloop()
1045 server = NodeHttpServer(mainloop, options.bind_address, options.port,
1046 ssl_params=ssl_params, ssl_verify_peer=True,
1047 request_executor_class=request_executor_class)
1049 return (mainloop, server)
1052 def ExecNoded(options, args, prep_data): # pylint: disable=W0613
1053 """Main node daemon function, executed with the PID file held.
1056 (mainloop, server) = prep_data
1064 """Main function for the node daemon.
1067 parser = OptionParser(description="Ganeti node daemon",
1068 usage="%prog [-f] [-d] [-p port] [-b ADDRESS]",
1069 version="%%prog (ganeti) %s" %
1070 constants.RELEASE_VERSION)
1071 parser.add_option("--no-mlock", dest="mlock",
1072 help="Do not mlock the node memory in ram",
1073 default=True, action="store_false")
1075 daemon.GenericMain(constants.NODED, parser, CheckNoded, PrepNoded, ExecNoded,
1076 default_ssl_cert=constants.NODED_CERT_FILE,
1077 default_ssl_key=constants.NODED_CERT_FILE,
1078 console_logging=True)