#
#
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
import logging
import zlib
import base64
+import pycurl
+import threading
from ganeti import utils
from ganeti import objects
from ganeti import serializer
from ganeti import constants
from ganeti import errors
+from ganeti import netutils
+from ganeti import ssconf
# pylint has a bug here, doesn't see this import
import ganeti.http.client # pylint: disable-msg=W0611
-# Module level variable
-_http_manager = None
+# Timeout for connecting to nodes (seconds)
+_RPC_CONNECT_TIMEOUT = 5
+
+_RPC_CLIENT_HEADERS = [
+ "Content-type: %s" % http.HTTP_APP_JSON,
+ "Expect:",
+ ]
# Various time constants for the timeout table
_TMO_URGENT = 60 # one minute
def Init():
"""Initializes the module-global HTTP client manager.
- Must be called before using any RPC function.
+ Must be called before using any RPC function and while exactly one thread is
+ running.
"""
- global _http_manager # pylint: disable-msg=W0603
-
- assert not _http_manager, "RPC module initialized more than once"
+ # curl_global_init(3) and curl_global_cleanup(3) must be called with only
+ # one thread running. This check is just a safety measure -- it doesn't
+ # cover all cases.
+ assert threading.activeCount() == 1, \
+ "Found more than one active thread when initializing pycURL"
- http.InitSsl()
+ logging.info("Using PycURL %s", pycurl.version)
- _http_manager = http.client.HttpClientManager()
+ pycurl.global_init(pycurl.GLOBAL_ALL)
def Shutdown():
"""Stops the module-global HTTP client manager.
- Must be called before quitting the program.
+ Must be called before quitting the program and while exactly one thread is
+ running.
"""
- global _http_manager # pylint: disable-msg=W0603
+ pycurl.global_cleanup()
+
+
+def _ConfigRpcCurl(curl):
+ noded_cert = str(constants.NODED_CERT_FILE)
+
+ curl.setopt(pycurl.FOLLOWLOCATION, False)
+ curl.setopt(pycurl.CAINFO, noded_cert)
+ curl.setopt(pycurl.SSL_VERIFYHOST, 0)
+ curl.setopt(pycurl.SSL_VERIFYPEER, True)
+ curl.setopt(pycurl.SSLCERTTYPE, "PEM")
+ curl.setopt(pycurl.SSLCERT, noded_cert)
+ curl.setopt(pycurl.SSLKEYTYPE, "PEM")
+ curl.setopt(pycurl.SSLKEY, noded_cert)
+ curl.setopt(pycurl.CONNECTTIMEOUT, _RPC_CONNECT_TIMEOUT)
+
+
+# Aliasing this module avoids the following warning by epydoc: "Warning: No
+# information available for ganeti.rpc._RpcThreadLocal's base threading.local"
+_threading = threading
+
+
+class _RpcThreadLocal(_threading.local):
+ def GetHttpClientPool(self):
+ """Returns a per-thread HTTP client pool.
+
+ @rtype: L{http.client.HttpClientPool}
+
+ """
+ try:
+ pool = self.hcp
+ except AttributeError:
+ pool = http.client.HttpClientPool(_ConfigRpcCurl)
+ self.hcp = pool
+
+ return pool
+
+
+# Remove module alias (see above)
+del _threading
- if _http_manager:
- _http_manager.Shutdown()
- _http_manager = None
+
+_thread_local = _RpcThreadLocal()
def _RpcTimeout(secs):
return decorator
+def RunWithRPC(fn):
+ """RPC-wrapper decorator.
+
+ When applied to a function, it runs it with the RPC system
+ initialized, and it shutsdown the system afterwards. This means the
+ function must be called without RPC being initialized.
+
+ """
+ def wrapper(*args, **kwargs):
+ Init()
+ try:
+ return fn(*args, **kwargs)
+ finally:
+ Shutdown()
+ return wrapper
+
+
class RpcResult(object):
"""RPC Result class.
self.fail_msg = None
self.payload = data[1]
- assert hasattr(self, "call")
- assert hasattr(self, "data")
- assert hasattr(self, "fail_msg")
- assert hasattr(self, "node")
- assert hasattr(self, "offline")
- assert hasattr(self, "payload")
+ for attr_name in ["call", "data", "fail_msg",
+ "node", "offline", "payload"]:
+ assert hasattr(self, attr_name), "Missing attribute %s" % attr_name
@staticmethod
def _EnsureErr(val):
raise ec(*args) # pylint: disable-msg=W0142
+def _AddressLookup(node_list,
+ ssc=ssconf.SimpleStore,
+ nslookup_fn=netutils.Hostname.GetIP):
+ """Return addresses for given node names.
+
+ @type node_list: list
+ @param node_list: List of node names
+ @type ssc: class
+ @param ssc: SimpleStore class that is used to obtain node->ip mappings
+ @type nslookup_fn: callable
+ @param nslookup_fn: function use to do NS lookup
+ @rtype: list of addresses and/or None's
+ @returns: List of corresponding addresses, if found
+
+ """
+ ss = ssc()
+ iplist = ss.GetNodePrimaryIPList()
+ family = ss.GetPrimaryIPFamily()
+ addresses = []
+ ipmap = dict(entry.split() for entry in iplist)
+ for node in node_list:
+ address = ipmap.get(node)
+ if address is None:
+ address = nslookup_fn(node, family=family)
+ addresses.append(address)
+
+ return addresses
+
+
class Client:
"""RPC Client class.
cause bugs.
"""
- def __init__(self, procedure, body, port):
+ def __init__(self, procedure, body, port, address_lookup_fn=_AddressLookup):
assert procedure in _TIMEOUTS, ("New RPC call not declared in the"
" timeouts table")
self.procedure = procedure
self.body = body
self.port = port
- self.nc = {}
-
- self._ssl_params = \
- http.HttpSslParams(ssl_key_path=constants.NODED_CERT_FILE,
- ssl_cert_path=constants.NODED_CERT_FILE)
+ self._request = {}
+ self._address_lookup_fn = address_lookup_fn
def ConnectList(self, node_list, address_list=None, read_timeout=None):
"""Add a list of nodes to the target nodes.
@keyword address_list: either None or a list with node addresses,
which must have the same length as the node list
@type read_timeout: int
- @param read_timeout: overwrites the default read timeout for the
- given operation
+ @param read_timeout: overwrites default timeout for operation
"""
if address_list is None:
- address_list = [None for _ in node_list]
- else:
- assert len(node_list) == len(address_list), \
- "Name and address lists should have the same length"
+ # Always use IP address instead of node name
+ address_list = self._address_lookup_fn(node_list)
+
+ assert len(node_list) == len(address_list), \
+ "Name and address lists must have the same length"
+
for node, address in zip(node_list, address_list):
self.ConnectNode(node, address, read_timeout=read_timeout)
@type name: str
@param name: the node name
@type address: str
- @keyword address: the node address, if known
+ @param address: the node address, if known
+ @type read_timeout: int
+ @param read_timeout: overwrites default timeout for operation
"""
if address is None:
- address = name
+ # Always use IP address instead of node name
+ address = self._address_lookup_fn([name])[0]
+
+ assert(address is not None)
if read_timeout is None:
read_timeout = _TIMEOUTS[self.procedure]
- self.nc[name] = \
- http.client.HttpClientRequest(address, self.port, http.HTTP_PUT,
- "/%s" % self.procedure,
- post_data=self.body,
- ssl_params=self._ssl_params,
- ssl_verify_peer=True,
+ self._request[name] = \
+ http.client.HttpClientRequest(str(address), self.port,
+ http.HTTP_PUT, str("/%s" % self.procedure),
+ headers=_RPC_CLIENT_HEADERS,
+ post_data=str(self.body),
read_timeout=read_timeout)
- def GetResults(self):
+ def GetResults(self, http_pool=None):
"""Call nodes and return results.
@rtype: list
@return: List of RPC results
"""
- assert _http_manager, "RPC module not initialized"
+ if not http_pool:
+ http_pool = _thread_local.GetHttpClientPool()
- _http_manager.ExecRequests(self.nc.values())
+ http_pool.ProcessRequests(self._request.values())
results = {}
- for name, req in self.nc.iteritems():
+ for name, req in self._request.iteritems():
if req.success and req.resp_status_code == http.HTTP_OK:
results[name] = RpcResult(data=serializer.LoadJson(req.resp_body),
node=name, call=self.procedure)
"""
self._cfg = cfg
- self.port = utils.GetDaemonPort(constants.NODED)
+ self.port = netutils.GetDaemonPort(constants.NODED)
def _InstDict(self, instance, hvp=None, bep=None, osp=None):
"""Convert the given instance to a dict.
@type bep: dict or None
@param bep: a dictionary with overridden backend parameters
@type osp: dict or None
- @param osp: a dictionary with overriden os parameters
+ @param osp: a dictionary with overridden os parameters
@rtype: dict
@return: the instance dict, with the hvparams filled with the
cluster defaults
"""
body = serializer.DumpJson(args, indent=False)
- c = Client(procedure, body, utils.GetDaemonPort(constants.NODED))
+ c = Client(procedure, body, netutils.GetDaemonPort(constants.NODED))
c.ConnectList(node_list, address_list=address_list,
read_timeout=read_timeout)
return c.GetResults()
"""
body = serializer.DumpJson(args, indent=False)
- c = Client(procedure, body, utils.GetDaemonPort(constants.NODED))
+ c = Client(procedure, body, netutils.GetDaemonPort(constants.NODED))
c.ConnectNode(node, read_timeout=read_timeout)
return c.GetResults()[node]
shutdown_timeout])
@_RpcTimeout(_TMO_1DAY)
- def call_instance_os_add(self, node, inst, reinstall, debug):
+ def call_instance_os_add(self, node, inst, reinstall, debug, osparams=None):
"""Installs an OS on the given instance.
This is a single-node call.
"""
return self._SingleNodeCall(node, "instance_os_add",
- [self._InstDict(inst), reinstall, debug])
+ [self._InstDict(inst, osp=osparams),
+ reinstall, debug])
@_RpcTimeout(_TMO_SLOW)
def call_instance_run_rename(self, node, inst, old_name, debug):
[vg_name, hypervisor_type])
@_RpcTimeout(_TMO_NORMAL)
- def call_node_add(self, node, dsa, dsapub, rsa, rsapub, ssh, sshpub):
- """Add a node to the cluster.
+ def call_etc_hosts_modify(self, node, mode, name, ip):
+ """Modify hosts file with name
- This is a single-node call.
+ @type node: string
+ @param node: The node to call
+ @type mode: string
+ @param mode: The mode to operate. Currently "add" or "remove"
+ @type name: string
+ @param name: The host name to be modified
+ @type ip: string
+ @param ip: The ip of the entry (just valid if mode is "add")
"""
- return self._SingleNodeCall(node, "node_add",
- [dsa, dsapub, rsa, rsapub, ssh, sshpub])
+ return self._SingleNodeCall(node, "etc_hosts_modify", [mode, name, ip])
@_RpcTimeout(_TMO_NORMAL)
def call_node_verify(self, node_list, checkdict, cluster_name):
return self._SingleNodeCall(node, "blockdev_create",
[bdev.ToDict(), size, owner, on_primary, info])
+ @_RpcTimeout(_TMO_SLOW)
+ def call_blockdev_wipe(self, node, bdev, offset, size):
+ """Request wipe at given offset with given size of a block device.
+
+ This is a single-node call.
+
+ """
+ return self._SingleNodeCall(node, "blockdev_wipe",
+ [bdev.ToDict(), offset, size])
+
@_RpcTimeout(_TMO_NORMAL)
def call_blockdev_remove(self, node, bdev):
"""Request removal of a given block device.
[(d.ToDict(), uid) for d, uid in devlist])
@_RpcTimeout(_TMO_NORMAL)
- def call_blockdev_assemble(self, node, disk, owner, on_primary):
+ def call_blockdev_pause_resume_sync(self, node, disks, pause):
+ """Request a pause/resume of given block device.
+
+ This is a single-node call.
+
+ """
+ return self._SingleNodeCall(node, "blockdev_pause_resume_sync",
+ [[bdev.ToDict() for bdev in disks], pause])
+
+ @_RpcTimeout(_TMO_NORMAL)
+ def call_blockdev_assemble(self, node, disk, owner, on_primary, idx):
"""Request assembling of a given block device.
This is a single-node call.
"""
return self._SingleNodeCall(node, "blockdev_assemble",
- [disk.ToDict(), owner, on_primary])
+ [disk.ToDict(), owner, on_primary, idx])
@_RpcTimeout(_TMO_NORMAL)
def call_blockdev_shutdown(self, node, disk):
return result
@_RpcTimeout(_TMO_NORMAL)
+ def call_blockdev_getmirrorstatus_multi(self, node_list, node_disks):
+ """Request status of (mirroring) devices from multiple nodes.
+
+ This is a multi-node call.
+
+ """
+ result = self._MultiNodeCall(node_list, "blockdev_getmirrorstatus_multi",
+ [dict((name, [dsk.ToDict() for dsk in disks])
+ for name, disks in node_disks.items())])
+ for nres in result.values():
+ if nres.fail_msg:
+ continue
+
+ for idx, (success, status) in enumerate(nres.payload):
+ if success:
+ nres.payload[idx] = (success, objects.BlockDevStatus.FromDict(status))
+
+ return result
+
+ @_RpcTimeout(_TMO_NORMAL)
def call_blockdev_find(self, node, disk):
"""Request identification of a given block device.
return self._SingleNodeCall(node, "blockdev_close", params)
@_RpcTimeout(_TMO_NORMAL)
- def call_blockdev_getsizes(self, node, disks):
+ def call_blockdev_getsize(self, node, disks):
"""Returns the size of the given disks.
This is a single-node call.
return self._MultiNodeCall(node_list, "drbd_wait_sync",
[nodes_ip, [cf.ToDict() for cf in disks]])
+ @_RpcTimeout(_TMO_URGENT)
+ def call_drbd_helper(self, node_list):
+ """Gets drbd helper.
+
+ This is a multi-node call.
+
+ """
+ return self._MultiNodeCall(node_list, "drbd_helper", [])
+
@classmethod
@_RpcTimeout(_TMO_NORMAL)
def call_upload_file(cls, node_list, file_name, address_list=None):
"""
return cls._StaticMultiNodeCall(node_list, "write_ssconf_files", [values])
+ @_RpcTimeout(_TMO_NORMAL)
+ def call_run_oob(self, node, oob_program, command, remote_node, timeout):
+ """Runs OOB.
+
+ This is a single-node call.
+
+ """
+ return self._SingleNodeCall(node, "run_oob", [oob_program, command,
+ remote_node, timeout])
+
@_RpcTimeout(_TMO_FAST)
def call_os_diagnose(self, node_list):
"""Request a diagnose of OS definitions.
[old_file_storage_dir, new_file_storage_dir])
@classmethod
- @_RpcTimeout(_TMO_FAST)
+ @_RpcTimeout(_TMO_URGENT)
def call_jobqueue_update(cls, node_list, address_list, file_name, content):
"""Update job queue.
return cls._StaticSingleNodeCall(node, "jobqueue_purge", [])
@classmethod
- @_RpcTimeout(_TMO_FAST)
+ @_RpcTimeout(_TMO_URGENT)
def call_jobqueue_rename(cls, node_list, address_list, rename):
"""Rename a job queue file.