X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/1bdcbbab3d096a23946db02a779413458871a638..5c097318202f47e88b6a168f97920549416fb15a:/lib/rpc.py?ds=sidebyside diff --git a/lib/rpc.py b/lib/rpc.py index 57dd960..4e2693e 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007 Google Inc. +# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -34,6 +34,8 @@ import os import logging import zlib import base64 +import pycurl +import threading from ganeti import utils from ganeti import objects @@ -41,13 +43,20 @@ from ganeti import http from ganeti import serializer from ganeti import constants from ganeti import errors +from ganeti import netutils +from ganeti import ssconf # pylint has a bug here, doesn't see this import import ganeti.http.client # pylint: disable-msg=W0611 -# Module level variable -_http_manager = None +# Timeout for connecting to nodes (seconds) +_RPC_CONNECT_TIMEOUT = 5 + +_RPC_CLIENT_HEADERS = [ + "Content-type: %s" % http.HTTP_APP_JSON, + "Expect:", + ] # Various time constants for the timeout table _TMO_URGENT = 60 # one minute @@ -71,29 +80,71 @@ _TIMEOUTS = { def Init(): """Initializes the module-global HTTP client manager. - Must be called before using any RPC function. + Must be called before using any RPC function and while exactly one thread is + running. """ - global _http_manager # pylint: disable-msg=W0603 - - assert not _http_manager, "RPC module initialized more than once" + # curl_global_init(3) and curl_global_cleanup(3) must be called with only + # one thread running. This check is just a safety measure -- it doesn't + # cover all cases. + assert threading.activeCount() == 1, \ + "Found more than one active thread when initializing pycURL" - http.InitSsl() + logging.info("Using PycURL %s", pycurl.version) - _http_manager = http.client.HttpClientManager() + pycurl.global_init(pycurl.GLOBAL_ALL) def Shutdown(): """Stops the module-global HTTP client manager. - Must be called before quitting the program. + Must be called before quitting the program and while exactly one thread is + running. """ - global _http_manager # pylint: disable-msg=W0603 + pycurl.global_cleanup() + + +def _ConfigRpcCurl(curl): + noded_cert = str(constants.NODED_CERT_FILE) + + curl.setopt(pycurl.FOLLOWLOCATION, False) + curl.setopt(pycurl.CAINFO, noded_cert) + curl.setopt(pycurl.SSL_VERIFYHOST, 0) + curl.setopt(pycurl.SSL_VERIFYPEER, True) + curl.setopt(pycurl.SSLCERTTYPE, "PEM") + curl.setopt(pycurl.SSLCERT, noded_cert) + curl.setopt(pycurl.SSLKEYTYPE, "PEM") + curl.setopt(pycurl.SSLKEY, noded_cert) + curl.setopt(pycurl.CONNECTTIMEOUT, _RPC_CONNECT_TIMEOUT) + + +# Aliasing this module avoids the following warning by epydoc: "Warning: No +# information available for ganeti.rpc._RpcThreadLocal's base threading.local" +_threading = threading + + +class _RpcThreadLocal(_threading.local): + def GetHttpClientPool(self): + """Returns a per-thread HTTP client pool. + + @rtype: L{http.client.HttpClientPool} + + """ + try: + pool = self.hcp + except AttributeError: + pool = http.client.HttpClientPool(_ConfigRpcCurl) + self.hcp = pool + + return pool + + +# Remove module alias (see above) +del _threading - if _http_manager: - _http_manager.Shutdown() - _http_manager = None + +_thread_local = _RpcThreadLocal() def _RpcTimeout(secs): @@ -111,6 +162,23 @@ def _RpcTimeout(secs): return decorator +def RunWithRPC(fn): + """RPC-wrapper decorator. + + When applied to a function, it runs it with the RPC system + initialized, and it shutsdown the system afterwards. This means the + function must be called without RPC being initialized. + + """ + def wrapper(*args, **kwargs): + Init() + try: + return fn(*args, **kwargs) + finally: + Shutdown() + return wrapper + + class RpcResult(object): """RPC Result class. @@ -158,12 +226,9 @@ class RpcResult(object): self.fail_msg = None self.payload = data[1] - assert hasattr(self, "call") - assert hasattr(self, "data") - assert hasattr(self, "fail_msg") - assert hasattr(self, "node") - assert hasattr(self, "offline") - assert hasattr(self, "payload") + for attr_name in ["call", "data", "fail_msg", + "node", "offline", "payload"]: + assert hasattr(self, attr_name), "Missing attribute %s" % attr_name @staticmethod def _EnsureErr(val): @@ -199,6 +264,35 @@ class RpcResult(object): raise ec(*args) # pylint: disable-msg=W0142 +def _AddressLookup(node_list, + ssc=ssconf.SimpleStore, + nslookup_fn=netutils.Hostname.GetIP): + """Return addresses for given node names. + + @type node_list: list + @param node_list: List of node names + @type ssc: class + @param ssc: SimpleStore class that is used to obtain node->ip mappings + @type nslookup_fn: callable + @param nslookup_fn: function use to do NS lookup + @rtype: list of addresses and/or None's + @returns: List of corresponding addresses, if found + + """ + ss = ssc() + iplist = ss.GetNodePrimaryIPList() + family = ss.GetPrimaryIPFamily() + addresses = [] + ipmap = dict(entry.split() for entry in iplist) + for node in node_list: + address = ipmap.get(node) + if address is None: + address = nslookup_fn(node, family=family) + addresses.append(address) + + return addresses + + class Client: """RPC Client class. @@ -211,17 +305,14 @@ class Client: cause bugs. """ - def __init__(self, procedure, body, port): + def __init__(self, procedure, body, port, address_lookup_fn=_AddressLookup): assert procedure in _TIMEOUTS, ("New RPC call not declared in the" " timeouts table") self.procedure = procedure self.body = body self.port = port - self.nc = {} - - self._ssl_params = \ - http.HttpSslParams(ssl_key_path=constants.NODED_CERT_FILE, - ssl_cert_path=constants.NODED_CERT_FILE) + self._request = {} + self._address_lookup_fn = address_lookup_fn def ConnectList(self, node_list, address_list=None, read_timeout=None): """Add a list of nodes to the target nodes. @@ -232,15 +323,16 @@ class Client: @keyword address_list: either None or a list with node addresses, which must have the same length as the node list @type read_timeout: int - @param read_timeout: overwrites the default read timeout for the - given operation + @param read_timeout: overwrites default timeout for operation """ if address_list is None: - address_list = [None for _ in node_list] - else: - assert len(node_list) == len(address_list), \ - "Name and address lists should have the same length" + # Always use IP address instead of node name + address_list = self._address_lookup_fn(node_list) + + assert len(node_list) == len(address_list), \ + "Name and address lists must have the same length" + for node, address in zip(node_list, address_list): self.ConnectNode(node, address, read_timeout=read_timeout) @@ -250,37 +342,42 @@ class Client: @type name: str @param name: the node name @type address: str - @keyword address: the node address, if known + @param address: the node address, if known + @type read_timeout: int + @param read_timeout: overwrites default timeout for operation """ if address is None: - address = name + # Always use IP address instead of node name + address = self._address_lookup_fn([name])[0] + + assert(address is not None) if read_timeout is None: read_timeout = _TIMEOUTS[self.procedure] - self.nc[name] = \ - http.client.HttpClientRequest(address, self.port, http.HTTP_PUT, - "/%s" % self.procedure, - post_data=self.body, - ssl_params=self._ssl_params, - ssl_verify_peer=True, + self._request[name] = \ + http.client.HttpClientRequest(str(address), self.port, + http.HTTP_PUT, str("/%s" % self.procedure), + headers=_RPC_CLIENT_HEADERS, + post_data=str(self.body), read_timeout=read_timeout) - def GetResults(self): + def GetResults(self, http_pool=None): """Call nodes and return results. @rtype: list @return: List of RPC results """ - assert _http_manager, "RPC module not initialized" + if not http_pool: + http_pool = _thread_local.GetHttpClientPool() - _http_manager.ExecRequests(self.nc.values()) + http_pool.ProcessRequests(self._request.values()) results = {} - for name, req in self.nc.iteritems(): + for name, req in self._request.iteritems(): if req.success and req.resp_status_code == http.HTTP_OK: results[name] = RpcResult(data=serializer.LoadJson(req.resp_body), node=name, call=self.procedure) @@ -327,7 +424,7 @@ class RpcRunner(object): """ self._cfg = cfg - self.port = utils.GetDaemonPort(constants.NODED) + self.port = netutils.GetDaemonPort(constants.NODED) def _InstDict(self, instance, hvp=None, bep=None, osp=None): """Convert the given instance to a dict. @@ -342,7 +439,7 @@ class RpcRunner(object): @type bep: dict or None @param bep: a dictionary with overridden backend parameters @type osp: dict or None - @param osp: a dictionary with overriden os parameters + @param osp: a dictionary with overridden os parameters @rtype: dict @return: the instance dict, with the hvparams filled with the cluster defaults @@ -441,7 +538,7 @@ class RpcRunner(object): """ body = serializer.DumpJson(args, indent=False) - c = Client(procedure, body, utils.GetDaemonPort(constants.NODED)) + c = Client(procedure, body, netutils.GetDaemonPort(constants.NODED)) c.ConnectList(node_list, address_list=address_list, read_timeout=read_timeout) return c.GetResults() @@ -464,7 +561,7 @@ class RpcRunner(object): """ body = serializer.DumpJson(args, indent=False) - c = Client(procedure, body, utils.GetDaemonPort(constants.NODED)) + c = Client(procedure, body, netutils.GetDaemonPort(constants.NODED)) c.ConnectNode(node, read_timeout=read_timeout) return c.GetResults()[node] @@ -661,14 +758,15 @@ class RpcRunner(object): shutdown_timeout]) @_RpcTimeout(_TMO_1DAY) - def call_instance_os_add(self, node, inst, reinstall, debug): + def call_instance_os_add(self, node, inst, reinstall, debug, osparams=None): """Installs an OS on the given instance. This is a single-node call. """ return self._SingleNodeCall(node, "instance_os_add", - [self._InstDict(inst), reinstall, debug]) + [self._InstDict(inst, osp=osparams), + reinstall, debug]) @_RpcTimeout(_TMO_SLOW) def call_instance_run_rename(self, node, inst, old_name, debug): @@ -784,14 +882,20 @@ class RpcRunner(object): [vg_name, hypervisor_type]) @_RpcTimeout(_TMO_NORMAL) - def call_node_add(self, node, dsa, dsapub, rsa, rsapub, ssh, sshpub): - """Add a node to the cluster. + def call_etc_hosts_modify(self, node, mode, name, ip): + """Modify hosts file with name - This is a single-node call. + @type node: string + @param node: The node to call + @type mode: string + @param mode: The mode to operate. Currently "add" or "remove" + @type name: string + @param name: The host name to be modified + @type ip: string + @param ip: The ip of the entry (just valid if mode is "add") """ - return self._SingleNodeCall(node, "node_add", - [dsa, dsapub, rsa, rsapub, ssh, sshpub]) + return self._SingleNodeCall(node, "etc_hosts_modify", [mode, name, ip]) @_RpcTimeout(_TMO_NORMAL) def call_node_verify(self, node_list, checkdict, cluster_name): @@ -855,6 +959,16 @@ class RpcRunner(object): return self._SingleNodeCall(node, "blockdev_create", [bdev.ToDict(), size, owner, on_primary, info]) + @_RpcTimeout(_TMO_SLOW) + def call_blockdev_wipe(self, node, bdev, offset, size): + """Request wipe at given offset with given size of a block device. + + This is a single-node call. + + """ + return self._SingleNodeCall(node, "blockdev_wipe", + [bdev.ToDict(), offset, size]) + @_RpcTimeout(_TMO_NORMAL) def call_blockdev_remove(self, node, bdev): """Request removal of a given block device. @@ -875,14 +989,24 @@ class RpcRunner(object): [(d.ToDict(), uid) for d, uid in devlist]) @_RpcTimeout(_TMO_NORMAL) - def call_blockdev_assemble(self, node, disk, owner, on_primary): + def call_blockdev_pause_resume_sync(self, node, disks, pause): + """Request a pause/resume of given block device. + + This is a single-node call. + + """ + return self._SingleNodeCall(node, "blockdev_pause_resume_sync", + [[bdev.ToDict() for bdev in disks], pause]) + + @_RpcTimeout(_TMO_NORMAL) + def call_blockdev_assemble(self, node, disk, owner, on_primary, idx): """Request assembling of a given block device. This is a single-node call. """ return self._SingleNodeCall(node, "blockdev_assemble", - [disk.ToDict(), owner, on_primary]) + [disk.ToDict(), owner, on_primary, idx]) @_RpcTimeout(_TMO_NORMAL) def call_blockdev_shutdown(self, node, disk): @@ -930,6 +1054,26 @@ class RpcRunner(object): return result @_RpcTimeout(_TMO_NORMAL) + def call_blockdev_getmirrorstatus_multi(self, node_list, node_disks): + """Request status of (mirroring) devices from multiple nodes. + + This is a multi-node call. + + """ + result = self._MultiNodeCall(node_list, "blockdev_getmirrorstatus_multi", + [dict((name, [dsk.ToDict() for dsk in disks]) + for name, disks in node_disks.items())]) + for nres in result.values(): + if nres.fail_msg: + continue + + for idx, (success, status) in enumerate(nres.payload): + if success: + nres.payload[idx] = (success, objects.BlockDevStatus.FromDict(status)) + + return result + + @_RpcTimeout(_TMO_NORMAL) def call_blockdev_find(self, node, disk): """Request identification of a given block device. @@ -952,7 +1096,7 @@ class RpcRunner(object): return self._SingleNodeCall(node, "blockdev_close", params) @_RpcTimeout(_TMO_NORMAL) - def call_blockdev_getsizes(self, node, disks): + def call_blockdev_getsize(self, node, disks): """Returns the size of the given disks. This is a single-node call. @@ -993,6 +1137,15 @@ class RpcRunner(object): return self._MultiNodeCall(node_list, "drbd_wait_sync", [nodes_ip, [cf.ToDict() for cf in disks]]) + @_RpcTimeout(_TMO_URGENT) + def call_drbd_helper(self, node_list): + """Gets drbd helper. + + This is a multi-node call. + + """ + return self._MultiNodeCall(node_list, "drbd_helper", []) + @classmethod @_RpcTimeout(_TMO_NORMAL) def call_upload_file(cls, node_list, file_name, address_list=None): @@ -1030,6 +1183,16 @@ class RpcRunner(object): """ return cls._StaticMultiNodeCall(node_list, "write_ssconf_files", [values]) + @_RpcTimeout(_TMO_NORMAL) + def call_run_oob(self, node, oob_program, command, remote_node, timeout): + """Runs OOB. + + This is a single-node call. + + """ + return self._SingleNodeCall(node, "run_oob", [oob_program, command, + remote_node, timeout]) + @_RpcTimeout(_TMO_FAST) def call_os_diagnose(self, node_list): """Request a diagnose of OS definitions. @@ -1248,7 +1411,7 @@ class RpcRunner(object): [old_file_storage_dir, new_file_storage_dir]) @classmethod - @_RpcTimeout(_TMO_FAST) + @_RpcTimeout(_TMO_URGENT) def call_jobqueue_update(cls, node_list, address_list, file_name, content): """Update job queue. @@ -1270,7 +1433,7 @@ class RpcRunner(object): return cls._StaticSingleNodeCall(node, "jobqueue_purge", []) @classmethod - @_RpcTimeout(_TMO_FAST) + @_RpcTimeout(_TMO_URGENT) def call_jobqueue_rename(cls, node_list, address_list, rename): """Rename a job queue file.