Statistics
| Branch: | Tag: | Revision:

root / lib / http / __init__.py @ 7e950d31

History | View | Annotate | Download (27.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""HTTP module.
22

23
"""
24

    
25
import logging
26
import mimetools
27
import OpenSSL
28
import select
29
import socket
30
import errno
31

    
32
from cStringIO import StringIO
33

    
34
from ganeti import constants
35
from ganeti import serializer
36
from ganeti import utils
37

    
38

    
39
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
40

    
41
HTTP_OK = 200
42
HTTP_NO_CONTENT = 204
43
HTTP_NOT_MODIFIED = 304
44

    
45
HTTP_0_9 = "HTTP/0.9"
46
HTTP_1_0 = "HTTP/1.0"
47
HTTP_1_1 = "HTTP/1.1"
48

    
49
HTTP_GET = "GET"
50
HTTP_HEAD = "HEAD"
51
HTTP_POST = "POST"
52
HTTP_PUT = "PUT"
53
HTTP_DELETE = "DELETE"
54

    
55
HTTP_ETAG = "ETag"
56
HTTP_HOST = "Host"
57
HTTP_SERVER = "Server"
58
HTTP_DATE = "Date"
59
HTTP_USER_AGENT = "User-Agent"
60
HTTP_CONTENT_TYPE = "Content-Type"
61
HTTP_CONTENT_LENGTH = "Content-Length"
62
HTTP_CONNECTION = "Connection"
63
HTTP_KEEP_ALIVE = "Keep-Alive"
64
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
65
HTTP_AUTHORIZATION = "Authorization"
66
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
67
HTTP_ALLOW = "Allow"
68

    
69
_SSL_UNEXPECTED_EOF = "Unexpected EOF"
70

    
71
# Socket operations
72
(SOCKOP_SEND,
73
 SOCKOP_RECV,
74
 SOCKOP_SHUTDOWN,
75
 SOCKOP_HANDSHAKE) = range(4)
76

    
77
# send/receive quantum
78
SOCK_BUF_SIZE = 32768
79

    
80

    
81
class HttpError(Exception):
82
  """Internal exception for HTTP errors.
83

84
  This should only be used for internal error reporting.
85

86
  """
87

    
88

    
89
class HttpConnectionClosed(Exception):
90
  """Internal exception for a closed connection.
91

92
  This should only be used for internal error reporting. Only use
93
  it if there's no other way to report this condition.
94

95
  """
96

    
97

    
98
class HttpSessionHandshakeUnexpectedEOF(HttpError):
99
  """Internal exception for errors during SSL handshake.
100

101
  This should only be used for internal error reporting.
102

103
  """
104

    
105

    
106
class HttpSocketTimeout(Exception):
107
  """Internal exception for socket timeouts.
108

109
  This should only be used for internal error reporting.
110

111
  """
112

    
113

    
114
class HttpException(Exception):
115
  code = None
116
  message = None
117

    
118
  def __init__(self, message=None, headers=None):
119
    Exception.__init__(self)
120
    self.message = message
121
    self.headers = headers
122

    
123

    
124
class HttpBadRequest(HttpException):
125
  """400 Bad Request
126

127
  RFC2616, 10.4.1: The request could not be understood by the server
128
  due to malformed syntax. The client SHOULD NOT repeat the request
129
  without modifications.
130

131
  """
132
  code = 400
133

    
134

    
135
class HttpUnauthorized(HttpException):
136
  """401 Unauthorized
137

138
  RFC2616, section 10.4.2: The request requires user
139
  authentication. The response MUST include a WWW-Authenticate header
140
  field (section 14.47) containing a challenge applicable to the
141
  requested resource.
142

143
  """
144
  code = 401
145

    
146

    
147
class HttpForbidden(HttpException):
148
  """403 Forbidden
149

150
  RFC2616, 10.4.4: The server understood the request, but is refusing
151
  to fulfill it.  Authorization will not help and the request SHOULD
152
  NOT be repeated.
153

154
  """
155
  code = 403
156

    
157

    
158
class HttpNotFound(HttpException):
159
  """404 Not Found
160

161
  RFC2616, 10.4.5: The server has not found anything matching the
162
  Request-URI.  No indication is given of whether the condition is
163
  temporary or permanent.
164

165
  """
166
  code = 404
167

    
168

    
169
class HttpMethodNotAllowed(HttpException):
170
  """405 Method Not Allowed
171

172
  RFC2616, 10.4.6: The method specified in the Request-Line is not
173
  allowed for the resource identified by the Request-URI. The response
174
  MUST include an Allow header containing a list of valid methods for
175
  the requested resource.
176

177
  """
178
  code = 405
179

    
180

    
181
class HttpRequestTimeout(HttpException):
182
  """408 Request Timeout
183

184
  RFC2616, 10.4.9: The client did not produce a request within the
185
  time that the server was prepared to wait. The client MAY repeat the
186
  request without modifications at any later time.
187

188
  """
189
  code = 408
190

    
191

    
192
class HttpConflict(HttpException):
193
  """409 Conflict
194

195
  RFC2616, 10.4.10: The request could not be completed due to a
196
  conflict with the current state of the resource. This code is only
197
  allowed in situations where it is expected that the user might be
198
  able to resolve the conflict and resubmit the request.
199

200
  """
201
  code = 409
202

    
203

    
204
class HttpGone(HttpException):
205
  """410 Gone
206

207
  RFC2616, 10.4.11: The requested resource is no longer available at
208
  the server and no forwarding address is known. This condition is
209
  expected to be considered permanent.
210

211
  """
212
  code = 410
213

    
214

    
215
class HttpLengthRequired(HttpException):
216
  """411 Length Required
217

218
  RFC2616, 10.4.12: The server refuses to accept the request without a
219
  defined Content-Length. The client MAY repeat the request if it adds
220
  a valid Content-Length header field containing the length of the
221
  message-body in the request message.
222

223
  """
224
  code = 411
225

    
226

    
227
class HttpPreconditionFailed(HttpException):
228
  """412 Precondition Failed
229

230
  RFC2616, 10.4.13: The precondition given in one or more of the
231
  request-header fields evaluated to false when it was tested on the
232
  server.
233

234
  """
235
  code = 412
236

    
237

    
238
class HttpInternalServerError(HttpException):
239
  """500 Internal Server Error
240

241
  RFC2616, 10.5.1: The server encountered an unexpected condition
242
  which prevented it from fulfilling the request.
243

244
  """
245
  code = 500
246

    
247

    
248
class HttpNotImplemented(HttpException):
249
  """501 Not Implemented
250

251
  RFC2616, 10.5.2: The server does not support the functionality
252
  required to fulfill the request.
253

254
  """
255
  code = 501
256

    
257

    
258
class HttpBadGateway(HttpException):
259
  """502 Bad Gateway
260

261
  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
262
  received an invalid response from the upstream server it accessed in
263
  attempting to fulfill the request.
264

265
  """
266
  code = 502
267

    
268

    
269
class HttpServiceUnavailable(HttpException):
270
  """503 Service Unavailable
271

272
  RFC2616, 10.5.4: The server is currently unable to handle the
273
  request due to a temporary overloading or maintenance of the server.
274

275
  """
276
  code = 503
277

    
278

    
279
class HttpGatewayTimeout(HttpException):
280
  """504 Gateway Timeout
281

282
  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
283
  not receive a timely response from the upstream server specified by
284
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
285
  (e.g. DNS) it needed to access in attempting to complete the
286
  request.
287

288
  """
289
  code = 504
290

    
291

    
292
class HttpVersionNotSupported(HttpException):
293
  """505 HTTP Version Not Supported
294

295
  RFC2616, 10.5.6: The server does not support, or refuses to support,
296
  the HTTP protocol version that was used in the request message.
297

298
  """
299
  code = 505
300

    
301

    
302
class HttpJsonConverter: # pylint: disable-msg=W0232
303
  CONTENT_TYPE = "application/json"
304

    
305
  @staticmethod
306
  def Encode(data):
307
    return serializer.DumpJson(data)
308

    
309
  @staticmethod
310
  def Decode(data):
311
    return serializer.LoadJson(data)
312

    
313

    
314
def WaitForSocketCondition(sock, event, timeout):
315
  """Waits for a condition to occur on the socket.
316

317
  @type sock: socket
318
  @param sock: Wait for events on this socket
319
  @type event: int
320
  @param event: ORed condition (see select module)
321
  @type timeout: float or None
322
  @param timeout: Timeout in seconds
323
  @rtype: int or None
324
  @return: None for timeout, otherwise occured conditions
325

326
  """
327
  check = (event | select.POLLPRI |
328
           select.POLLNVAL | select.POLLHUP | select.POLLERR)
329

    
330
  if timeout is not None:
331
    # Poller object expects milliseconds
332
    timeout *= 1000
333

    
334
  poller = select.poll()
335
  poller.register(sock, event)
336
  try:
337
    while True:
338
      # TODO: If the main thread receives a signal and we have no timeout, we
339
      # could wait forever. This should check a global "quit" flag or
340
      # something every so often.
341
      io_events = poller.poll(timeout)
342
      if not io_events:
343
        # Timeout
344
        return None
345
      for (_, evcond) in io_events:
346
        if evcond & check:
347
          return evcond
348
  finally:
349
    poller.unregister(sock)
350

    
351

    
352
def SocketOperation(sock, op, arg1, timeout):
353
  """Wrapper around socket functions.
354

355
  This function abstracts error handling for socket operations, especially
356
  for the complicated interaction with OpenSSL.
357

358
  @type sock: socket
359
  @param sock: Socket for the operation
360
  @type op: int
361
  @param op: Operation to execute (SOCKOP_* constants)
362
  @type arg1: any
363
  @param arg1: Parameter for function (if needed)
364
  @type timeout: None or float
365
  @param timeout: Timeout in seconds or None
366
  @return: Return value of socket function
367

368
  """
369
  # TODO: event_poll/event_check/override
370
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
371
    event_poll = select.POLLOUT
372

    
373
  elif op == SOCKOP_RECV:
374
    event_poll = select.POLLIN
375

    
376
  elif op == SOCKOP_SHUTDOWN:
377
    event_poll = None
378

    
379
    # The timeout is only used when OpenSSL requests polling for a condition.
380
    # It is not advisable to have no timeout for shutdown.
381
    assert timeout
382

    
383
  else:
384
    raise AssertionError("Invalid socket operation")
385

    
386
  # Handshake is only supported by SSL sockets
387
  if (op == SOCKOP_HANDSHAKE and
388
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
389
    return
390

    
391
  # No override by default
392
  event_override = 0
393

    
394
  while True:
395
    # Poll only for certain operations and when asked for by an override
396
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
397
      if event_override:
398
        wait_for_event = event_override
399
      else:
400
        wait_for_event = event_poll
401

    
402
      event = WaitForSocketCondition(sock, wait_for_event, timeout)
403
      if event is None:
404
        raise HttpSocketTimeout()
405

    
406
      if (op == SOCKOP_RECV and
407
          event & (select.POLLNVAL | select.POLLHUP | select.POLLERR)):
408
        return ""
409

    
410
      if not event & wait_for_event:
411
        continue
412

    
413
    # Reset override
414
    event_override = 0
415

    
416
    try:
417
      try:
418
        if op == SOCKOP_SEND:
419
          return sock.send(arg1)
420

    
421
        elif op == SOCKOP_RECV:
422
          return sock.recv(arg1)
423

    
424
        elif op == SOCKOP_SHUTDOWN:
425
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
426
            # PyOpenSSL's shutdown() doesn't take arguments
427
            return sock.shutdown()
428
          else:
429
            return sock.shutdown(arg1)
430

    
431
        elif op == SOCKOP_HANDSHAKE:
432
          return sock.do_handshake()
433

    
434
      except OpenSSL.SSL.WantWriteError:
435
        # OpenSSL wants to write, poll for POLLOUT
436
        event_override = select.POLLOUT
437
        continue
438

    
439
      except OpenSSL.SSL.WantReadError:
440
        # OpenSSL wants to read, poll for POLLIN
441
        event_override = select.POLLIN | select.POLLPRI
442
        continue
443

    
444
      except OpenSSL.SSL.WantX509LookupError:
445
        continue
446

    
447
      except OpenSSL.SSL.ZeroReturnError, err:
448
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
449
        # occurs if a closure alert has occurred in the protocol, i.e. the
450
        # connection has been closed cleanly. Note that this does not
451
        # necessarily mean that the transport layer (e.g. a socket) has been
452
        # closed.
453
        if op == SOCKOP_SEND:
454
          # Can happen during a renegotiation
455
          raise HttpConnectionClosed(err.args)
456
        elif op == SOCKOP_RECV:
457
          return ""
458

    
459
        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
460
        raise socket.error(err.args)
461

    
462
      except OpenSSL.SSL.SysCallError, err:
463
        if op == SOCKOP_SEND:
464
          # arg1 is the data when writing
465
          if err.args and err.args[0] == -1 and arg1 == "":
466
            # errors when writing empty strings are expected
467
            # and can be ignored
468
            return 0
469

    
470
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
471
          if op == SOCKOP_RECV:
472
            return ""
473
          elif op == SOCKOP_HANDSHAKE:
474
            # Can happen if peer disconnects directly after the connection is
475
            # opened.
476
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
477

    
478
        raise socket.error(err.args)
479

    
480
      except OpenSSL.SSL.Error, err:
481
        raise socket.error(err.args)
482

    
483
    except socket.error, err:
484
      if err.args and err.args[0] == errno.EAGAIN:
485
        # Ignore EAGAIN
486
        continue
487

    
488
      raise
489

    
490

    
491
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
492
  """Closes the connection.
493

494
  @type sock: socket
495
  @param sock: Socket to be shut down
496
  @type close_timeout: float
497
  @param close_timeout: How long to wait for the peer to close
498
      the connection
499
  @type write_timeout: float
500
  @param write_timeout: Write timeout for shutdown
501
  @type msgreader: http.HttpMessageReader
502
  @param msgreader: Request message reader, used to determine whether
503
      peer should close connection
504
  @type force: bool
505
  @param force: Whether to forcibly close the connection without
506
      waiting for peer
507

508
  """
509
  #print msgreader.peer_will_close, force
510
  if msgreader and msgreader.peer_will_close and not force:
511
    # Wait for peer to close
512
    try:
513
      # Check whether it's actually closed
514
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
515
        return
516
    except (socket.error, HttpError, HttpSocketTimeout):
517
      # Ignore errors at this stage
518
      pass
519

    
520
  # Close the connection from our side
521
  try:
522
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
523
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
524
                    write_timeout)
525
  except HttpSocketTimeout:
526
    raise HttpError("Timeout while shutting down connection")
527
  except socket.error, err:
528
    # Ignore ENOTCONN
529
    if not (err.args and err.args[0] == errno.ENOTCONN):
530
      raise HttpError("Error while shutting down connection: %s" % err)
531

    
532

    
533
def Handshake(sock, write_timeout):
534
  """Shakes peer's hands.
535

536
  @type sock: socket
537
  @param sock: Socket to be shut down
538
  @type write_timeout: float
539
  @param write_timeout: Write timeout for handshake
540

541
  """
542
  try:
543
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
544
  except HttpSocketTimeout:
545
    raise HttpError("Timeout during SSL handshake")
546
  except socket.error, err:
547
    raise HttpError("Error in SSL handshake: %s" % err)
548

    
549

    
550
def InitSsl():
551
  """Initializes the SSL infrastructure.
552

553
  This function is idempotent.
554

555
  """
556
  if not OpenSSL.rand.status():
557
    raise EnvironmentError("OpenSSL could not collect enough entropy"
558
                           " for the PRNG")
559

    
560
  # TODO: Maybe add some additional seeding for OpenSSL's PRNG
561

    
562

    
563
class HttpSslParams(object):
564
  """Data class for SSL key and certificate.
565

566
  """
567
  def __init__(self, ssl_key_path, ssl_cert_path):
568
    """Initializes this class.
569

570
    @type ssl_key_path: string
571
    @param ssl_key_path: Path to file containing SSL key in PEM format
572
    @type ssl_cert_path: string
573
    @param ssl_cert_path: Path to file containing SSL certificate
574
        in PEM format
575

576
    """
577
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
578
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
579

    
580
  def GetKey(self):
581
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
582
                                          self.ssl_key_pem)
583

    
584
  def GetCertificate(self):
585
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
586
                                           self.ssl_cert_pem)
587

    
588

    
589
class HttpBase(object):
590
  """Base class for HTTP server and client.
591

592
  """
593
  def __init__(self):
594
    self.using_ssl = None
595
    self._ssl_params = None
596
    self._ssl_key = None
597
    self._ssl_cert = None
598

    
599
  def _CreateSocket(self, ssl_params, ssl_verify_peer):
600
    """Creates a TCP socket and initializes SSL if needed.
601

602
    @type ssl_params: HttpSslParams
603
    @param ssl_params: SSL key and certificate
604
    @type ssl_verify_peer: bool
605
    @param ssl_verify_peer: Whether to require client certificate
606
        and compare it with our certificate
607

608
    """
609
    self._ssl_params = ssl_params
610

    
611
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
612

    
613
    # Should we enable SSL?
614
    self.using_ssl = ssl_params is not None
615

    
616
    if not self.using_ssl:
617
      return sock
618

    
619
    self._ssl_key = ssl_params.GetKey()
620
    self._ssl_cert = ssl_params.GetCertificate()
621

    
622
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
623
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
624

    
625
    ctx.use_privatekey(self._ssl_key)
626
    ctx.use_certificate(self._ssl_cert)
627
    ctx.check_privatekey()
628

    
629
    if ssl_verify_peer:
630
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
631
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
632
                     self._SSLVerifyCallback)
633

    
634
    return OpenSSL.SSL.Connection(ctx, sock)
635

    
636
  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
637
    """Verify the certificate provided by the peer
638

639
    We only compare fingerprints. The client must use the same certificate as
640
    we do on our side.
641

642
    """
643
    assert self._ssl_params, "SSL not initialized"
644

    
645
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
646
            self._ssl_cert.digest("md5") == cert.digest("md5"))
647

    
648

    
649
class HttpMessage(object):
650
  """Data structure for HTTP message.
651

652
  """
653
  def __init__(self):
654
    self.start_line = None
655
    self.headers = None
656
    self.body = None
657
    self.decoded_body = None
658

    
659

    
660
class HttpClientToServerStartLine(object):
661
  """Data structure for HTTP request start line.
662

663
  """
664
  def __init__(self, method, path, version):
665
    self.method = method
666
    self.path = path
667
    self.version = version
668

    
669
  def __str__(self):
670
    return "%s %s %s" % (self.method, self.path, self.version)
671

    
672

    
673
class HttpServerToClientStartLine(object):
674
  """Data structure for HTTP response start line.
675

676
  """
677
  def __init__(self, version, code, reason):
678
    self.version = version
679
    self.code = code
680
    self.reason = reason
681

    
682
  def __str__(self):
683
    return "%s %s %s" % (self.version, self.code, self.reason)
684

    
685

    
686
class HttpMessageWriter(object):
687
  """Writes an HTTP message to a socket.
688

689
  """
690
  def __init__(self, sock, msg, write_timeout):
691
    """Initializes this class and writes an HTTP message to a socket.
692

693
    @type sock: socket
694
    @param sock: Socket to be written to
695
    @type msg: http.HttpMessage
696
    @param msg: HTTP message to be written
697
    @type write_timeout: float
698
    @param write_timeout: Write timeout for socket
699

700
    """
701
    self._msg = msg
702

    
703
    self._PrepareMessage()
704

    
705
    buf = self._FormatMessage()
706

    
707
    pos = 0
708
    end = len(buf)
709
    while pos < end:
710
      # Send only SOCK_BUF_SIZE bytes at a time
711
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
712

    
713
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
714

    
715
      # Remove sent bytes
716
      pos += sent
717

    
718
    assert pos == end, "Message wasn't sent completely"
719

    
720
  def _PrepareMessage(self):
721
    """Prepares the HTTP message by setting mandatory headers.
722

723
    """
724
    # RFC2616, section 4.3: "The presence of a message-body in a request is
725
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
726
    # field in the request's message-headers."
727
    if self._msg.body:
728
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
729

    
730
  def _FormatMessage(self):
731
    """Serializes the HTTP message into a string.
732

733
    """
734
    buf = StringIO()
735

    
736
    # Add start line
737
    buf.write(str(self._msg.start_line))
738
    buf.write("\r\n")
739

    
740
    # Add headers
741
    if self._msg.start_line.version != HTTP_0_9:
742
      for name, value in self._msg.headers.iteritems():
743
        buf.write("%s: %s\r\n" % (name, value))
744

    
745
    buf.write("\r\n")
746

    
747
    # Add message body if needed
748
    if self.HasMessageBody():
749
      buf.write(self._msg.body)
750

    
751
    elif self._msg.body:
752
      logging.warning("Ignoring message body")
753

    
754
    return buf.getvalue()
755

    
756
  def HasMessageBody(self):
757
    """Checks whether the HTTP message contains a body.
758

759
    Can be overridden by subclasses.
760

761
    """
762
    return bool(self._msg.body)
763

    
764

    
765
class HttpMessageReader(object):
766
  """Reads HTTP message from socket.
767

768
  """
769
  # Length limits
770
  START_LINE_LENGTH_MAX = None
771
  HEADER_LENGTH_MAX = None
772

    
773
  # Parser state machine
774
  PS_START_LINE = "start-line"
775
  PS_HEADERS = "headers"
776
  PS_BODY = "entity-body"
777
  PS_COMPLETE = "complete"
778

    
779
  def __init__(self, sock, msg, read_timeout):
780
    """Reads an HTTP message from a socket.
781

782
    @type sock: socket
783
    @param sock: Socket to be read from
784
    @type msg: http.HttpMessage
785
    @param msg: Object for the read message
786
    @type read_timeout: float
787
    @param read_timeout: Read timeout for socket
788

789
    """
790
    self.sock = sock
791
    self.msg = msg
792

    
793
    self.start_line_buffer = None
794
    self.header_buffer = StringIO()
795
    self.body_buffer = StringIO()
796
    self.parser_status = self.PS_START_LINE
797
    self.content_length = None
798
    self.peer_will_close = None
799

    
800
    buf = ""
801
    eof = False
802
    while self.parser_status != self.PS_COMPLETE:
803
      # TODO: Don't read more than necessary (Content-Length), otherwise
804
      # data might be lost and/or an error could occur
805
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
806

    
807
      if data:
808
        buf += data
809
      else:
810
        eof = True
811

    
812
      # Do some parsing and error checking while more data arrives
813
      buf = self._ContinueParsing(buf, eof)
814

    
815
      # Must be done only after the buffer has been evaluated
816
      # TODO: Connection-length < len(data read) and connection closed
817
      if (eof and
818
          self.parser_status in (self.PS_START_LINE,
819
                                 self.PS_HEADERS)):
820
        raise HttpError("Connection closed prematurely")
821

    
822
    # Parse rest
823
    buf = self._ContinueParsing(buf, True)
824

    
825
    assert self.parser_status == self.PS_COMPLETE
826
    assert not buf, "Parser didn't read full response"
827

    
828
    msg.body = self.body_buffer.getvalue()
829

    
830
    # TODO: Content-type, error handling
831
    if msg.body:
832
      msg.decoded_body = HttpJsonConverter().Decode(msg.body)
833
    else:
834
      msg.decoded_body = None
835

    
836
    if msg.decoded_body:
837
      logging.debug("Message body: %s", msg.decoded_body)
838

    
839
  def _ContinueParsing(self, buf, eof):
840
    """Main function for HTTP message state machine.
841

842
    @type buf: string
843
    @param buf: Receive buffer
844
    @type eof: bool
845
    @param eof: Whether we've reached EOF on the socket
846
    @rtype: string
847
    @return: Updated receive buffer
848

849
    """
850
    # TODO: Use offset instead of slicing when possible
851
    if self.parser_status == self.PS_START_LINE:
852
      # Expect start line
853
      while True:
854
        idx = buf.find("\r\n")
855

    
856
        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
857
        # ignore any empty line(s) received where a Request-Line is expected.
858
        # In other words, if the server is reading the protocol stream at the
859
        # beginning of a message and receives a CRLF first, it should ignore
860
        # the CRLF."
861
        if idx == 0:
862
          # TODO: Limit number of CRLFs/empty lines for safety?
863
          buf = buf[:2]
864
          continue
865

    
866
        if idx > 0:
867
          self.start_line_buffer = buf[:idx]
868

    
869
          self._CheckStartLineLength(len(self.start_line_buffer))
870

    
871
          # Remove status line, including CRLF
872
          buf = buf[idx + 2:]
873

    
874
          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
875

    
876
          self.parser_status = self.PS_HEADERS
877
        else:
878
          # Check whether incoming data is getting too large, otherwise we just
879
          # fill our read buffer.
880
          self._CheckStartLineLength(len(buf))
881

    
882
        break
883

    
884
    # TODO: Handle messages without headers
885
    if self.parser_status == self.PS_HEADERS:
886
      # Wait for header end
887
      idx = buf.find("\r\n\r\n")
888
      if idx >= 0:
889
        self.header_buffer.write(buf[:idx + 2])
890

    
891
        self._CheckHeaderLength(self.header_buffer.tell())
892

    
893
        # Remove headers, including CRLF
894
        buf = buf[idx + 4:]
895

    
896
        self._ParseHeaders()
897

    
898
        self.parser_status = self.PS_BODY
899
      else:
900
        # Check whether incoming data is getting too large, otherwise we just
901
        # fill our read buffer.
902
        self._CheckHeaderLength(len(buf))
903

    
904
    if self.parser_status == self.PS_BODY:
905
      # TODO: Implement max size for body_buffer
906
      self.body_buffer.write(buf)
907
      buf = ""
908

    
909
      # Check whether we've read everything
910
      #
911
      # RFC2616, section 4.4: "When a message-body is included with a message,
912
      # the transfer-length of that body is determined by one of the following
913
      # [...] 5. By the server closing the connection. (Closing the connection
914
      # cannot be used to indicate the end of a request body, since that would
915
      # leave no possibility for the server to send back a response.)"
916
      #
917
      # TODO: Error when buffer length > Content-Length header
918
      if (eof or
919
          self.content_length is None or
920
          (self.content_length is not None and
921
           self.body_buffer.tell() >= self.content_length)):
922
        self.parser_status = self.PS_COMPLETE
923

    
924
    return buf
925

    
926
  def _CheckStartLineLength(self, length):
927
    """Limits the start line buffer size.
928

929
    @type length: int
930
    @param length: Buffer size
931

932
    """
933
    if (self.START_LINE_LENGTH_MAX is not None and
934
        length > self.START_LINE_LENGTH_MAX):
935
      raise HttpError("Start line longer than %d chars" %
936
                       self.START_LINE_LENGTH_MAX)
937

    
938
  def _CheckHeaderLength(self, length):
939
    """Limits the header buffer size.
940

941
    @type length: int
942
    @param length: Buffer size
943

944
    """
945
    if (self.HEADER_LENGTH_MAX is not None and
946
        length > self.HEADER_LENGTH_MAX):
947
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
948

    
949
  def ParseStartLine(self, start_line):
950
    """Parses the start line of a message.
951

952
    Must be overridden by subclass.
953

954
    @type start_line: string
955
    @param start_line: Start line string
956

957
    """
958
    raise NotImplementedError()
959

    
960
  def _WillPeerCloseConnection(self):
961
    """Evaluate whether peer will close the connection.
962

963
    @rtype: bool
964
    @return: Whether peer will close the connection
965

966
    """
967
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
968
    # for the sender to signal that the connection will be closed after
969
    # completion of the response. For example,
970
    #
971
    #        Connection: close
972
    #
973
    # in either the request or the response header fields indicates that the
974
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
975
    # current request/response is complete."
976

    
977
    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
978
    if hdr_connection:
979
      hdr_connection = hdr_connection.lower()
980

    
981
    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
982
    if self.msg.start_line.version == HTTP_1_1:
983
      return (hdr_connection and "close" in hdr_connection)
984

    
985
    # Some HTTP/1.0 implementations have support for persistent connections,
986
    # using rules different than HTTP/1.1.
987

    
988
    # For older HTTP, Keep-Alive indicates persistent connection.
989
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
990
      return False
991

    
992
    # At least Akamai returns a "Connection: Keep-Alive" header, which was
993
    # supposed to be sent by the client.
994
    if hdr_connection and "keep-alive" in hdr_connection:
995
      return False
996

    
997
    return True
998

    
999
  def _ParseHeaders(self):
1000
    """Parses the headers.
1001

1002
    This function also adjusts internal variables based on header values.
1003

1004
    RFC2616, section 4.3: The presence of a message-body in a request is
1005
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
1006
    field in the request's message-headers.
1007

1008
    """
1009
    # Parse headers
1010
    self.header_buffer.seek(0, 0)
1011
    self.msg.headers = mimetools.Message(self.header_buffer, 0)
1012

    
1013
    self.peer_will_close = self._WillPeerCloseConnection()
1014

    
1015
    # Do we have a Content-Length header?
1016
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
1017
    if hdr_content_length:
1018
      try:
1019
        self.content_length = int(hdr_content_length)
1020
      except ValueError:
1021
        self.content_length = None
1022
      if self.content_length is not None and self.content_length < 0:
1023
        self.content_length = None
1024

    
1025
    # if the connection remains open and a content-length was not provided,
1026
    # then assume that the connection WILL close.
1027
    if self.content_length is None:
1028
      self.peer_will_close = True