X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/d57ae7f7db24a4a123a83abe79706205f509cf1d..77921a951861ca1dd5136e0f4b84cb6e7ac7b8d2:/lib/rpc.py diff --git a/lib/rpc.py b/lib/rpc.py index f82a90c..70dd312 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -32,14 +32,18 @@ import os import socket -import httplib import logging +import zlib +import base64 from ganeti import utils from ganeti import objects from ganeti import http from ganeti import serializer from ganeti import constants +from ganeti import errors + +import ganeti.http.client # Module level variable @@ -56,7 +60,7 @@ def Init(): assert not _http_manager, "RPC module initialized more than once" - _http_manager = http.HttpClientManager() + _http_manager = http.client.HttpClientManager() def Shutdown(): @@ -72,6 +76,84 @@ def Shutdown(): _http_manager = None +class RpcResult(object): + """RPC Result class. + + This class holds an RPC result. It is needed since in multi-node + calls we can't raise an exception just because one one out of many + failed, and therefore we use this class to encapsulate the result. + + @ivar data: the data payload, for successfull results, or None + @type failed: boolean + @ivar failed: whether the operation failed at RPC level (not + application level on the remote node) + @ivar call: the name of the RPC call + @ivar node: the name of the node to which we made the call + @ivar offline: whether the operation failed because the node was + offline, as opposed to actual failure; offline=True will always + imply failed=True, in order to allow simpler checking if + the user doesn't care about the exact failure mode + + """ + def __init__(self, data=None, failed=False, offline=False, + call=None, node=None): + self.failed = failed + self.offline = offline + self.call = call + self.node = node + if offline: + self.failed = True + self.error = "Node is marked offline" + self.data = self.payload = None + elif failed: + self.error = data + self.data = self.payload = None + else: + self.data = data + self.error = None + if isinstance(data, (tuple, list)) and len(data) == 2: + self.payload = data[1] + else: + self.payload = None + + def Raise(self): + """If the result has failed, raise an OpExecError. + + This is used so that LU code doesn't have to check for each + result, but instead can call this function. + + """ + if self.failed: + raise errors.OpExecError("Call '%s' to node '%s' has failed: %s" % + (self.call, self.node, self.error)) + + def RemoteFailMsg(self): + """Check if the remote procedure failed. + + This is valid only for RPC calls which return result of the form + (status, data | error_msg). + + @return: empty string for succcess, otherwise an error message + + """ + def _EnsureErr(val): + """Helper to ensure we return a 'True' value for error.""" + if val: + return val + else: + return "No error information" + + if self.failed: + return _EnsureErr(self.error) + if not isinstance(self.data, (tuple, list)): + return "Invalid result type (%s)" % type(self.data) + if len(self.data) != 2: + return "Invalid result length (%d), expected 2" % len(self.data) + if not self.data[0]: + return _EnsureErr(self.data[1]) + return "" + + class Client: """RPC Client class. @@ -84,12 +166,10 @@ class Client: cause bugs. """ - def __init__(self, procedure, args): + def __init__(self, procedure, body, port): self.procedure = procedure - self.args = args - self.body = serializer.DumpJson(args, indent=False) - - self.port = utils.GetNodeDaemonPort() + self.body = body + self.port = port self.nc = {} self._ssl_params = \ @@ -126,17 +206,18 @@ class Client: if address is None: address = name - self.nc[name] = http.HttpClientRequest(address, self.port, http.HTTP_PUT, - "/%s" % self.procedure, - post_data=self.body, - ssl_params=self._ssl_params, - ssl_verify_peer=True) + self.nc[name] = \ + http.client.HttpClientRequest(address, self.port, http.HTTP_PUT, + "/%s" % self.procedure, + post_data=self.body, + ssl_params=self._ssl_params, + ssl_verify_peer=True) def GetResults(self): """Call nodes and return results. @rtype: list - @returns: List of RPC results + @return: List of RPC results """ assert _http_manager, "RPC module not intialized" @@ -146,8 +227,9 @@ class Client: results = {} for name, req in self.nc.iteritems(): - if req.success and req.resp_status == http.HTTP_OK: - results[name] = serializer.LoadJson(req.resp_body) + if req.success and req.resp_status_code == http.HTTP_OK: + results[name] = RpcResult(data=serializer.LoadJson(req.resp_body), + node=name, call=self.procedure) continue # TODO: Better error reporting @@ -156,8 +238,10 @@ class Client: else: msg = req.resp_body - logging.error("RPC error from node %s: %s", name, msg) - results[name] = False + logging.error("RPC error in %s from node %s: %s", + self.procedure, name, msg) + results[name] = RpcResult(data=msg, failed=True, node=name, + call=self.procedure) return results @@ -174,6 +258,7 @@ class RpcRunner(object): """ self._cfg = cfg + self.port = utils.GetNodeDaemonPort() def _InstDict(self, instance): """Convert the given instance to a dict. @@ -194,73 +279,123 @@ class RpcRunner(object): idict["beparams"] = cluster.FillBE(instance) return idict - def _ConnectList(self, client, node_list): + def _ConnectList(self, client, node_list, call): """Helper for computing node addresses. @type client: L{Client} @param client: a C{Client} instance @type node_list: list @param node_list: the node list we should connect + @type call: string + @param call: the name of the remote procedure call, for filling in + correctly any eventual offline nodes' results """ all_nodes = self._cfg.GetAllNodesInfo() + name_list = [] addr_list = [] + skip_dict = {} for node in node_list: if node in all_nodes: + if all_nodes[node].offline: + skip_dict[node] = RpcResult(node=node, offline=True, call=call) + continue val = all_nodes[node].primary_ip else: val = None addr_list.append(val) - client.ConnectList(node_list, address_list=addr_list) + name_list.append(node) + if name_list: + client.ConnectList(name_list, address_list=addr_list) + return skip_dict - def _ConnectNode(self, client, node): + def _ConnectNode(self, client, node, call): """Helper for computing one node's address. @type client: L{Client} @param client: a C{Client} instance @type node: str @param node: the node we should connect + @type call: string + @param call: the name of the remote procedure call, for filling in + correctly any eventual offline nodes' results """ node_info = self._cfg.GetNodeInfo(node) if node_info is not None: + if node_info.offline: + return RpcResult(node=node, offline=True, call=call) addr = node_info.primary_ip else: addr = None client.ConnectNode(node, address=addr) - def _MultiNodeCall(self, node_list, procedure, args, - address_list=None): - c = Client(procedure, args) - if address_list is None: - self._ConnectList(c, node_list) - else: - c.ConnectList(node_list, address_list=address_list) - return c.GetResults() + def _MultiNodeCall(self, node_list, procedure, args): + """Helper for making a multi-node call + + """ + body = serializer.DumpJson(args, indent=False) + c = Client(procedure, body, self.port) + skip_dict = self._ConnectList(c, node_list, procedure) + skip_dict.update(c.GetResults()) + return skip_dict @classmethod def _StaticMultiNodeCall(cls, node_list, procedure, args, address_list=None): - c = Client(procedure, args) + """Helper for making a multi-node static call + + """ + body = serializer.DumpJson(args, indent=False) + c = Client(procedure, body, utils.GetNodeDaemonPort()) c.ConnectList(node_list, address_list=address_list) return c.GetResults() def _SingleNodeCall(self, node, procedure, args): - """ + """Helper for making a single-node call """ - c = Client(procedure, args) - self._ConnectNode(c, node) - return c.GetResults().get(node, False) + body = serializer.DumpJson(args, indent=False) + c = Client(procedure, body, self.port) + result = self._ConnectNode(c, node, procedure) + if result is None: + # we did connect, node is not offline + result = c.GetResults()[node] + return result @classmethod def _StaticSingleNodeCall(cls, node, procedure, args): + """Helper for making a single-node static call + """ + body = serializer.DumpJson(args, indent=False) + c = Client(procedure, body, utils.GetNodeDaemonPort()) + c.ConnectNode(node) + return c.GetResults()[node] + + @staticmethod + def _Compress(data): + """Compresses a string for transport over RPC. + + Small amounts of data are not compressed. + + @type data: str + @param data: Data + @rtype: tuple + @return: Encoded data to send """ - c = Client(procedure, args) - c.ConnectNode(c, node) - return c.GetResults().get(node, False) + # Small amounts of data are not compressed + if len(data) < 512: + return (constants.RPC_ENCODING_NONE, data) + + # Compress with zlib and encode in base64 + return (constants.RPC_ENCODING_ZLIB_BASE64, + base64.b64encode(zlib.compress(data, 3))) + + # + # Begin RPC calls + # def call_volume_list(self, node_list, vg_name): """Gets the logical volumes present in a given volume group. @@ -290,14 +425,14 @@ class RpcRunner(object): """ return self._SingleNodeCall(node, "bridges_exist", [bridges_list]) - def call_instance_start(self, node, instance, extra_args): + def call_instance_start(self, node, instance): """Starts an instance. This is a single-node call. """ return self._SingleNodeCall(node, "instance_start", - [self._InstDict(instance), extra_args]) + [self._InstDict(instance)]) def call_instance_shutdown(self, node, instance): """Stops an instance. @@ -308,6 +443,59 @@ class RpcRunner(object): return self._SingleNodeCall(node, "instance_shutdown", [self._InstDict(instance)]) + def call_migration_info(self, node, instance): + """Gather the information necessary to prepare an instance migration. + + This is a single-node call. + + @type node: string + @param node: the node on which the instance is currently running + @type instance: C{objects.Instance} + @param instance: the instance definition + + """ + return self._SingleNodeCall(node, "migration_info", + [self._InstDict(instance)]) + + def call_accept_instance(self, node, instance, info, target): + """Prepare a node to accept an instance. + + This is a single-node call. + + @type node: string + @param node: the target node for the migration + @type instance: C{objects.Instance} + @param instance: the instance definition + @type info: opaque/hypervisor specific (string/data) + @param info: result for the call_migration_info call + @type target: string + @param target: target hostname (usually ip address) (on the node itself) + + """ + return self._SingleNodeCall(node, "accept_instance", + [self._InstDict(instance), info, target]) + + def call_finalize_migration(self, node, instance, info, success): + """Finalize any target-node migration specific operation. + + This is called both in case of a successful migration and in case of error + (in which case it should abort the migration). + + This is a single-node call. + + @type node: string + @param node: the target node for the migration + @type instance: C{objects.Instance} + @param instance: the instance definition + @type info: opaque/hypervisor specific (string/data) + @param info: result for the call_migration_info call + @type success: boolean + @param success: whether the migration was a success or a failure + + """ + return self._SingleNodeCall(node, "finalize_migration", + [self._InstDict(instance), info, success]) + def call_instance_migrate(self, node, instance, target, live): """Migrate an instance. @@ -327,15 +515,14 @@ class RpcRunner(object): return self._SingleNodeCall(node, "instance_migrate", [self._InstDict(instance), target, live]) - def call_instance_reboot(self, node, instance, reboot_type, extra_args): + def call_instance_reboot(self, node, instance, reboot_type): """Reboots an instance. This is a single-node call. """ return self._SingleNodeCall(node, "instance_reboot", - [self._InstDict(instance), reboot_type, - extra_args]) + [self._InstDict(instance), reboot_type]) def call_instance_os_add(self, node, inst): """Installs an OS on the given instance. @@ -370,6 +557,20 @@ class RpcRunner(object): """ return self._SingleNodeCall(node, "instance_info", [instance, hname]) + def call_instance_migratable(self, node, instance): + """Checks whether the given instance can be migrated. + + This is a single-node call. + + @param node: the node to query + @type instance: L{objects.Instance} + @param instance: the instance to check + + + """ + return self._SingleNodeCall(node, "instance_migratable", + [self._InstDict(instance)]) + def call_all_instances_info(self, node_list, hypervisor_list): """Returns information about all instances on the given nodes. @@ -426,8 +627,8 @@ class RpcRunner(object): @type node_list: list @param node_list: the list of nodes to query - @type vgname: C{string} - @param vgname: the name of the volume group to ask for disk space + @type vg_name: C{string} + @param vg_name: the name of the volume group to ask for disk space information @type hypervisor_type: C{str} @param hypervisor_type: the name of the hypervisor to ask for @@ -437,19 +638,21 @@ class RpcRunner(object): retux = self._MultiNodeCall(node_list, "node_info", [vg_name, hypervisor_type]) - for node_name in retux: - ret = retux.get(node_name, False) - if type(ret) != dict: - logging.error("could not connect to node %s", node_name) - ret = {} - - utils.CheckDict(ret, { - 'memory_total' : '-', - 'memory_dom0' : '-', - 'memory_free' : '-', - 'vg_size' : 'node_unreachable', - 'vg_free' : '-', - }, "call_node_info") + for result in retux.itervalues(): + if result.failed or not isinstance(result.data, dict): + result.data = {} + if result.offline: + log_name = None + else: + log_name = "call_node_info" + + utils.CheckDict(result.data, { + 'memory_total' : '-', + 'memory_dom0' : '-', + 'memory_free' : '-', + 'vg_size' : 'node_unreachable', + 'vg_free' : '-', + }, log_name) return retux def call_node_add(self, node, dsa, dsapub, rsa, rsapub, ssh, sshpub): @@ -587,14 +790,43 @@ class RpcRunner(object): """ return self._SingleNodeCall(node, "blockdev_find", [disk.ToDict()]) - def call_blockdev_close(self, node, disks): + def call_blockdev_close(self, node, instance_name, disks): """Closes the given block devices. This is a single-node call. """ - return self._SingleNodeCall(node, "blockdev_close", - [cf.ToDict() for cf in disks]) + params = [instance_name, [cf.ToDict() for cf in disks]] + return self._SingleNodeCall(node, "blockdev_close", params) + + def call_drbd_disconnect_net(self, node_list, nodes_ip, disks): + """Disconnects the network of the given drbd devices. + + This is a multi-node call. + + """ + return self._MultiNodeCall(node_list, "drbd_disconnect_net", + [nodes_ip, [cf.ToDict() for cf in disks]]) + + def call_drbd_attach_net(self, node_list, nodes_ip, + disks, instance_name, multimaster): + """Disconnects the given drbd devices. + + This is a multi-node call. + + """ + return self._MultiNodeCall(node_list, "drbd_attach_net", + [nodes_ip, [cf.ToDict() for cf in disks], + instance_name, multimaster]) + + def call_drbd_wait_sync(self, node_list, nodes_ip, disks): + """Waits for the synchronization of drbd devices is complete. + + This is a multi-node call. + + """ + return self._MultiNodeCall(node_list, "drbd_wait_sync", + [nodes_ip, [cf.ToDict() for cf in disks]]) @classmethod def call_upload_file(cls, node_list, file_name, address_list=None): @@ -614,7 +846,8 @@ class RpcRunner(object): to optimize the RPC speed """ - data = utils.ReadFile(file_name) + file_contents = utils.ReadFile(file_name) + data = cls._Compress(file_contents) st = os.stat(file_name) params = [file_name, data, st.st_mode, st.st_uid, st.st_gid, st.st_atime, st.st_mtime] @@ -622,13 +855,13 @@ class RpcRunner(object): address_list=address_list) @classmethod - def call_write_ssconf_files(cls, node_list): + def call_write_ssconf_files(cls, node_list, values): """Write ssconf files. This is a multi-node call. """ - return cls._StaticMultiNodeCall(node_list, "write_ssconf_files", []) + return cls._StaticMultiNodeCall(node_list, "write_ssconf_files", [values]) def call_os_diagnose(self, node_list): """Request a diagnose of OS definitions. @@ -638,14 +871,11 @@ class RpcRunner(object): """ result = self._MultiNodeCall(node_list, "os_diagnose", []) - new_result = {} - for node_name in result: - if result[node_name]: - nr = [objects.OS.FromDict(oss) for oss in result[node_name]] - else: - nr = [] - new_result[node_name] = nr - return new_result + for node_result in result.values(): + if not node_result.failed and node_result.data: + node_result.data = [objects.OS.FromDict(oss) + for oss in node_result.data] + return result def call_os_get(self, node, name): """Returns an OS definition. @@ -654,10 +884,9 @@ class RpcRunner(object): """ result = self._SingleNodeCall(node, "os_get", [name]) - if isinstance(result, dict): - return objects.OS.FromDict(result) - else: - return result + if not result.failed and isinstance(result.data, dict): + result.data = objects.OS.FromDict(result.data) + return result def call_hooks_runner(self, node_list, hpath, phase, env): """Call the hooks runner. @@ -734,9 +963,9 @@ class RpcRunner(object): """ result = self._SingleNodeCall(node, "export_info", [path]) - if not result: - return result - return objects.SerializableConfigParser.Loads(str(result)) + if not result.failed and result.data: + result.data = objects.SerializableConfigParser.Loads(str(result.data)) + return result def call_instance_os_import(self, node, inst, src_node, src_images, cluster_name): @@ -785,6 +1014,14 @@ class RpcRunner(object): """ return self._MultiNodeCall(node_list, "node_volumes", []) + def call_node_demote_from_mc(self, node): + """Demote a node from the master candidate role. + + This is a single-node call. + + """ + return self._SingleNodeCall(node, "node_demote_from_mc", []) + def call_test_delay(self, node_list, duration): """Sleep for a fixed time on given node(s). @@ -829,7 +1066,7 @@ class RpcRunner(object): """ return cls._StaticMultiNodeCall(node_list, "jobqueue_update", - [file_name, content], + [file_name, cls._Compress(content)], address_list=address_list) @classmethod @@ -842,13 +1079,13 @@ class RpcRunner(object): return cls._StaticSingleNodeCall(node, "jobqueue_purge", []) @classmethod - def call_jobqueue_rename(cls, node_list, address_list, old, new): + def call_jobqueue_rename(cls, node_list, address_list, rename): """Rename a job queue file. This is a multi-node call. """ - return cls._StaticMultiNodeCall(node_list, "jobqueue_rename", [old, new], + return cls._StaticMultiNodeCall(node_list, "jobqueue_rename", rename, address_list=address_list) @classmethod