#
#
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2006, 2007, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
"""Module for the unix socket protocol
-This module implements the local unix socket protocl. You only need
+This module implements the local unix socket protocol. You only need
this module and the opcodes module in the client program in order to
communicate with the master.
-The module is also be used by the master daemon.
+The module is also used by the master daemon.
"""
import socket
import collections
-import simplejson
import time
import errno
+import logging
+import warnings
-from ganeti import opcodes
from ganeti import serializer
from ganeti import constants
-
-
-KEY_REQUEST = 'request'
-KEY_DATA = 'data'
-REQ_SUBMIT = 'submit'
-REQ_ABORT = 'abort'
-REQ_QUERY = 'query'
+from ganeti import errors
+from ganeti import utils
+from ganeti import objects
+
+
+KEY_METHOD = "method"
+KEY_ARGS = "args"
+KEY_SUCCESS = "success"
+KEY_RESULT = "result"
+KEY_VERSION = "version"
+
+REQ_SUBMIT_JOB = "SubmitJob"
+REQ_SUBMIT_MANY_JOBS = "SubmitManyJobs"
+REQ_WAIT_FOR_JOB_CHANGE = "WaitForJobChange"
+REQ_CANCEL_JOB = "CancelJob"
+REQ_ARCHIVE_JOB = "ArchiveJob"
+REQ_AUTOARCHIVE_JOBS = "AutoArchiveJobs"
+REQ_QUERY = "Query"
+REQ_QUERY_FIELDS = "QueryFields"
+REQ_QUERY_JOBS = "QueryJobs"
+REQ_QUERY_INSTANCES = "QueryInstances"
+REQ_QUERY_NODES = "QueryNodes"
+REQ_QUERY_GROUPS = "QueryGroups"
+REQ_QUERY_EXPORTS = "QueryExports"
+REQ_QUERY_CONFIG_VALUES = "QueryConfigValues"
+REQ_QUERY_CLUSTER_INFO = "QueryClusterInfo"
+REQ_QUERY_TAGS = "QueryTags"
+REQ_QUERY_LOCKS = "QueryLocks"
+REQ_QUEUE_SET_DRAIN_FLAG = "SetDrainFlag"
+REQ_SET_WATCHER_PAUSE = "SetWatcherPause"
DEF_CTMO = 10
DEF_RWTO = 60
+# WaitForJobChange timeout
+WFJC_TIMEOUT = (DEF_RWTO - 1) / 2
-class ProtocolError(Exception):
- """Denotes an error in the server communication"""
+
+class ProtocolError(errors.LuxiError):
+ """Denotes an error in the LUXI protocol."""
class ConnectionClosedError(ProtocolError):
- """Connection closed error"""
+ """Connection closed error."""
class TimeoutError(ProtocolError):
- """Operation timeout error"""
-
-
-class EncodingError(ProtocolError):
- """Encoding failure on the sending side"""
-
-
-class DecodingError(ProtocolError):
- """Decoding failure on the receiving side"""
+ """Operation timeout error."""
class RequestError(ProtocolError):
- """Error on request
+ """Error on request.
This signifies an error in the request format or request handling,
but not (e.g.) an error in starting up an instance.
"""
+
class NoMasterError(ProtocolError):
- """The master cannot be reached
+ """The master cannot be reached.
This means that the master daemon is not running or the socket has
been removed.
"""
-def SerializeJob(job):
- """Convert a job description to a string format.
-
- """
- return simplejson.dumps(job.__getstate__())
+class PermissionError(ProtocolError):
+ """Permission denied while connecting to the master socket.
+ This means the user doesn't have the proper rights.
-def UnserializeJob(data):
- """Load a job from a string format"""
- try:
- new_data = simplejson.loads(data)
- except Exception, err:
- raise DecodingError("Error while unserializing: %s" % str(err))
- job = opcodes.Job()
- job.__setstate__(new_data)
- return job
+ """
class Transport:
"""
- def __init__(self, address, timeouts=None, eom=None):
+ def __init__(self, address, timeouts=None):
"""Constructor for the Client class.
Arguments:
- address: a valid address the the used transport class
- timeout: a list of timeouts, to be used on connect and read/write
- - eom: an identifier to be used as end-of-message which the
- upper-layer will guarantee that this identifier will not appear
- in any message
There are two timeouts used since we might want to wait for a long
time for a response, but the connect timeout should be lower.
self._buffer = ""
self._msgs = collections.deque()
- if eom is None:
- self.eom = '\3'
- else:
- self.eom = eom
-
try:
self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- self.socket.settimeout(self._ctimeout)
+
+ # Try to connect
try:
- self.socket.connect(address)
- except socket.timeout, err:
- raise TimeoutError("Connect timed out: %s" % str(err))
- except socket.error, err:
- if err.args[0] == errno.ENOENT:
- raise NoMasterError((address,))
- raise
+ utils.Retry(self._Connect, 1.0, self._ctimeout,
+ args=(self.socket, address, self._ctimeout))
+ except utils.RetryTimeout:
+ raise TimeoutError("Connect timed out")
+
self.socket.settimeout(self._rwtimeout)
except (socket.error, NoMasterError):
if self.socket is not None:
self.socket = None
raise
+ @staticmethod
+ def _Connect(sock, address, timeout):
+ sock.settimeout(timeout)
+ try:
+ sock.connect(address)
+ except socket.timeout, err:
+ raise TimeoutError("Connect timed out: %s" % str(err))
+ except socket.error, err:
+ error_code = err.args[0]
+ if error_code in (errno.ENOENT, errno.ECONNREFUSED):
+ raise NoMasterError(address)
+ elif error_code in (errno.EPERM, errno.EACCES):
+ raise PermissionError(address)
+ elif error_code == errno.EAGAIN:
+ # Server's socket backlog is full at the moment
+ raise utils.RetryAgain()
+ raise
+
def _CheckSocket(self):
"""Make sure we are connected.
This just sends a message and doesn't wait for the response.
"""
- if self.eom in msg:
- raise EncodingError("Message terminator found in payload")
+ if constants.LUXI_EOM in msg:
+ raise ProtocolError("Message terminator found in payload")
+
self._CheckSocket()
try:
- self.socket.sendall(msg + self.eom)
+ # TODO: sendall is not guaranteed to send everything
+ self.socket.sendall(msg + constants.LUXI_EOM)
except socket.timeout, err:
raise TimeoutError("Sending timeout: %s" % str(err))
def Recv(self):
- """Try to receive a messae from the socket.
+ """Try to receive a message from the socket.
In case we already have messages queued, we just return from the
queue. Otherwise, we try to read data with a _rwtimeout network
while not self._msgs:
if time.time() > etime:
raise TimeoutError("Extended receive timeout")
- try:
- data = self.socket.recv(4096)
- except socket.timeout, err:
- raise TimeoutError("Receive timeout: %s" % str(err))
+ while True:
+ try:
+ data = self.socket.recv(4096)
+ except socket.timeout, err:
+ raise TimeoutError("Receive timeout: %s" % str(err))
+ except socket.error, err:
+ if err.args and err.args[0] == errno.EAGAIN:
+ continue
+ raise
+ break
if not data:
raise ConnectionClosedError("Connection closed while reading")
- new_msgs = (self._buffer + data).split(self.eom)
+ new_msgs = (self._buffer + data).split(constants.LUXI_EOM)
self._buffer = new_msgs.pop()
self._msgs.extend(new_msgs)
return self._msgs.popleft()
self.socket = None
+def ParseRequest(msg):
+ """Parses a LUXI request message.
+
+ """
+ try:
+ request = serializer.LoadJson(msg)
+ except ValueError, err:
+ raise ProtocolError("Invalid LUXI request (parsing error): %s" % err)
+
+ logging.debug("LUXI request: %s", request)
+
+ if not isinstance(request, dict):
+ logging.error("LUXI request not a dict: %r", msg)
+ raise ProtocolError("Invalid LUXI request (not a dict)")
+
+ method = request.get(KEY_METHOD, None) # pylint: disable-msg=E1103
+ args = request.get(KEY_ARGS, None) # pylint: disable-msg=E1103
+ version = request.get(KEY_VERSION, None) # pylint: disable-msg=E1103
+
+ if method is None or args is None:
+ logging.error("LUXI request missing method or arguments: %r", msg)
+ raise ProtocolError(("Invalid LUXI request (no method or arguments"
+ " in request): %r") % msg)
+
+ return (method, args, version)
+
+
+def ParseResponse(msg):
+ """Parses a LUXI response message.
+
+ """
+ # Parse the result
+ try:
+ data = serializer.LoadJson(msg)
+ except KeyboardInterrupt:
+ raise
+ except Exception, err:
+ raise ProtocolError("Error while deserializing response: %s" % str(err))
+
+ # Validate response
+ if not (isinstance(data, dict) and
+ KEY_SUCCESS in data and
+ KEY_RESULT in data):
+ raise ProtocolError("Invalid response from server: %r" % data)
+
+ return (data[KEY_SUCCESS], data[KEY_RESULT],
+ data.get(KEY_VERSION, None)) # pylint: disable-msg=E1103
+
+
+def FormatResponse(success, result, version=None):
+ """Formats a LUXI response message.
+
+ """
+ response = {
+ KEY_SUCCESS: success,
+ KEY_RESULT: result,
+ }
+
+ if version is not None:
+ response[KEY_VERSION] = version
+
+ logging.debug("LUXI response: %s", response)
+
+ return serializer.DumpJson(response)
+
+
+def FormatRequest(method, args, version=None):
+ """Formats a LUXI request message.
+
+ """
+ # Build request
+ request = {
+ KEY_METHOD: method,
+ KEY_ARGS: args,
+ }
+
+ if version is not None:
+ request[KEY_VERSION] = version
+
+ # Serialize the request
+ return serializer.DumpJson(request, indent=False)
+
+
+def CallLuxiMethod(transport_cb, method, args, version=None):
+ """Send a LUXI request via a transport and return the response.
+
+ """
+ assert callable(transport_cb)
+
+ request_msg = FormatRequest(method, args, version=version)
+
+ # Send request and wait for response
+ response_msg = transport_cb(request_msg)
+
+ (success, result, resp_version) = ParseResponse(response_msg)
+
+ # Verify version if there was one in the response
+ if resp_version is not None and resp_version != version:
+ raise errors.LuxiError("LUXI version mismatch, client %s, response %s" %
+ (version, resp_version))
+
+ if success:
+ return result
+
+ errors.MaybeRaise(result)
+ raise RequestError(result)
+
+
class Client(object):
"""High-level client implementation.
"""
if address is None:
address = constants.MASTER_SOCKET
- self.transport = transport(address, timeouts=timeouts)
+ self.address = address
+ self.timeouts = timeouts
+ self.transport_class = transport
+ self.transport = None
+ self._InitTransport()
- def SendRequest(self, request, data):
- """Send a generic request and return the response.
+ def _InitTransport(self):
+ """(Re)initialize the transport if needed.
+
+ """
+ if self.transport is None:
+ self.transport = self.transport_class(self.address,
+ timeouts=self.timeouts)
+
+ def _CloseTransport(self):
+ """Close the transport, ignoring errors.
"""
- msg = {KEY_REQUEST: request, KEY_DATA: data}
- result = self.transport.Call(serializer.DumpJson(msg, indent=False))
+ if self.transport is None:
+ return
+ try:
+ old_transp = self.transport
+ self.transport = None
+ old_transp.Close()
+ except Exception: # pylint: disable-msg=W0703
+ pass
+
+ def _SendMethodCall(self, data):
+ # Send request and wait for response
try:
- data = serializer.LoadJson(result)
- except Exception, err:
- raise ProtocolError("Error while deserializing response: %s" % str(err))
- if (not isinstance(data, dict) or
- 'success' not in data or
- 'result' not in data):
- raise DecodingError("Invalid response from server: %s" % str(data))
- return data
-
- def SubmitJob(self, job):
- """Submit a job"""
- result = self.SendRequest(REQ_SUBMIT, SerializeJob(job))
- if not result['success']:
- raise RequestError(result['result'])
- return result['result']
-
- def Query(self, data):
- """Make a query"""
- result = self.SendRequest(REQ_QUERY, data)
- if not result['success']:
- raise RequestError(result[result])
- result = result['result']
- if data["object"] == "jobs":
- # custom job processing of query values
- for row in result:
- for idx, field in enumerate(data["fields"]):
- if field == "op_list":
- row[idx] = [opcodes.OpCode.LoadOpCode(i) for i in row[idx]]
+ self._InitTransport()
+ return self.transport.Call(data)
+ except Exception:
+ self._CloseTransport()
+ raise
+
+ def Close(self):
+ """Close the underlying connection.
+
+ """
+ self._CloseTransport()
+
+ def CallMethod(self, method, args):
+ """Send a generic request and return the response.
+
+ """
+ return CallLuxiMethod(self._SendMethodCall, method, args,
+ version=constants.LUXI_VERSION)
+
+ def SetQueueDrainFlag(self, drain_flag):
+ return self.CallMethod(REQ_QUEUE_SET_DRAIN_FLAG, drain_flag)
+
+ def SetWatcherPause(self, until):
+ return self.CallMethod(REQ_SET_WATCHER_PAUSE, [until])
+
+ def SubmitJob(self, ops):
+ ops_state = map(lambda op: op.__getstate__(), ops)
+ return self.CallMethod(REQ_SUBMIT_JOB, ops_state)
+
+ def SubmitManyJobs(self, jobs):
+ jobs_state = []
+ for ops in jobs:
+ jobs_state.append([op.__getstate__() for op in ops])
+ return self.CallMethod(REQ_SUBMIT_MANY_JOBS, jobs_state)
+
+ def CancelJob(self, job_id):
+ return self.CallMethod(REQ_CANCEL_JOB, job_id)
+
+ def ArchiveJob(self, job_id):
+ return self.CallMethod(REQ_ARCHIVE_JOB, job_id)
+
+ def AutoArchiveJobs(self, age):
+ timeout = (DEF_RWTO - 1) / 2
+ return self.CallMethod(REQ_AUTOARCHIVE_JOBS, (age, timeout))
+
+ def WaitForJobChangeOnce(self, job_id, fields,
+ prev_job_info, prev_log_serial,
+ timeout=WFJC_TIMEOUT):
+ """Waits for changes on a job.
+
+ @param job_id: Job ID
+ @type fields: list
+ @param fields: List of field names to be observed
+ @type prev_job_info: None or list
+ @param prev_job_info: Previously received job information
+ @type prev_log_serial: None or int/long
+ @param prev_log_serial: Highest log serial number previously received
+ @type timeout: int/float
+ @param timeout: Timeout in seconds (values larger than L{WFJC_TIMEOUT} will
+ be capped to that value)
+
+ """
+ assert timeout >= 0, "Timeout can not be negative"
+ return self.CallMethod(REQ_WAIT_FOR_JOB_CHANGE,
+ (job_id, fields, prev_job_info,
+ prev_log_serial,
+ min(WFJC_TIMEOUT, timeout)))
+
+ def WaitForJobChange(self, job_id, fields, prev_job_info, prev_log_serial):
+ while True:
+ result = self.WaitForJobChangeOnce(job_id, fields,
+ prev_job_info, prev_log_serial)
+ if result != constants.JOB_NOTCHANGED:
+ break
return result
+
+ def Query(self, what, fields, filter_):
+ """Query for resources/items.
+
+ @param what: One of L{constants.QR_VIA_LUXI}
+ @type fields: List of strings
+ @param fields: List of requested fields
+ @type filter_: None or list
+ @param filter_: Query filter
+ @rtype: L{objects.QueryResponse}
+
+ """
+ req = objects.QueryRequest(what=what, fields=fields, filter=filter_)
+ result = self.CallMethod(REQ_QUERY, req.ToDict())
+ return objects.QueryResponse.FromDict(result)
+
+ def QueryFields(self, what, fields):
+ """Query for available fields.
+
+ @param what: One of L{constants.QR_VIA_LUXI}
+ @type fields: None or list of strings
+ @param fields: List of requested fields
+ @rtype: L{objects.QueryFieldsResponse}
+
+ """
+ req = objects.QueryFieldsRequest(what=what, fields=fields)
+ result = self.CallMethod(REQ_QUERY_FIELDS, req.ToDict())
+ return objects.QueryFieldsResponse.FromDict(result)
+
+ def QueryJobs(self, job_ids, fields):
+ return self.CallMethod(REQ_QUERY_JOBS, (job_ids, fields))
+
+ def QueryInstances(self, names, fields, use_locking):
+ return self.CallMethod(REQ_QUERY_INSTANCES, (names, fields, use_locking))
+
+ def QueryNodes(self, names, fields, use_locking):
+ return self.CallMethod(REQ_QUERY_NODES, (names, fields, use_locking))
+
+ def QueryGroups(self, names, fields, use_locking):
+ return self.CallMethod(REQ_QUERY_GROUPS, (names, fields, use_locking))
+
+ def QueryExports(self, nodes, use_locking):
+ return self.CallMethod(REQ_QUERY_EXPORTS, (nodes, use_locking))
+
+ def QueryClusterInfo(self):
+ return self.CallMethod(REQ_QUERY_CLUSTER_INFO, ())
+
+ def QueryConfigValues(self, fields):
+ return self.CallMethod(REQ_QUERY_CONFIG_VALUES, fields)
+
+ def QueryTags(self, kind, name):
+ return self.CallMethod(REQ_QUERY_TAGS, (kind, name))
+
+ def QueryLocks(self, fields, sync):
+ warnings.warn("This LUXI call is deprecated and will be removed, use"
+ " Query(\"%s\", ...) instead" % constants.QR_LOCK)
+ return self.CallMethod(REQ_QUERY_LOCKS, (fields, sync))