Statistics
| Branch: | Tag: | Revision:

root / lib / http / __init__.py @ b459a848

History | View | Annotate | Download (27.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2008, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""HTTP module.
22

23
"""
24

    
25
import logging
26
import mimetools
27
import OpenSSL
28
import select
29
import socket
30
import errno
31

    
32
from cStringIO import StringIO
33

    
34
from ganeti import constants
35
from ganeti import utils
36

    
37

    
38
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
39

    
40
HTTP_OK = 200
41
HTTP_NO_CONTENT = 204
42
HTTP_NOT_MODIFIED = 304
43

    
44
HTTP_0_9 = "HTTP/0.9"
45
HTTP_1_0 = "HTTP/1.0"
46
HTTP_1_1 = "HTTP/1.1"
47

    
48
HTTP_GET = "GET"
49
HTTP_HEAD = "HEAD"
50
HTTP_POST = "POST"
51
HTTP_PUT = "PUT"
52
HTTP_DELETE = "DELETE"
53

    
54
HTTP_ETAG = "ETag"
55
HTTP_HOST = "Host"
56
HTTP_SERVER = "Server"
57
HTTP_DATE = "Date"
58
HTTP_USER_AGENT = "User-Agent"
59
HTTP_CONTENT_TYPE = "Content-Type"
60
HTTP_CONTENT_LENGTH = "Content-Length"
61
HTTP_CONNECTION = "Connection"
62
HTTP_KEEP_ALIVE = "Keep-Alive"
63
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
64
HTTP_AUTHORIZATION = "Authorization"
65
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
66
HTTP_ALLOW = "Allow"
67

    
68
HTTP_APP_OCTET_STREAM = "application/octet-stream"
69
HTTP_APP_JSON = "application/json"
70

    
71
_SSL_UNEXPECTED_EOF = "Unexpected EOF"
72

    
73
# Socket operations
74
(SOCKOP_SEND,
75
 SOCKOP_RECV,
76
 SOCKOP_SHUTDOWN,
77
 SOCKOP_HANDSHAKE) = range(4)
78

    
79
# send/receive quantum
80
SOCK_BUF_SIZE = 32768
81

    
82

    
83
class HttpError(Exception):
84
  """Internal exception for HTTP errors.
85

86
  This should only be used for internal error reporting.
87

88
  """
89

    
90

    
91
class HttpConnectionClosed(Exception):
92
  """Internal exception for a closed connection.
93

94
  This should only be used for internal error reporting. Only use
95
  it if there's no other way to report this condition.
96

97
  """
98

    
99

    
100
class HttpSessionHandshakeUnexpectedEOF(HttpError):
101
  """Internal exception for errors during SSL handshake.
102

103
  This should only be used for internal error reporting.
104

105
  """
106

    
107

    
108
class HttpSocketTimeout(Exception):
109
  """Internal exception for socket timeouts.
110

111
  This should only be used for internal error reporting.
112

113
  """
114

    
115

    
116
class HttpException(Exception):
117
  code = None
118
  message = None
119

    
120
  def __init__(self, message=None, headers=None):
121
    Exception.__init__(self)
122
    self.message = message
123
    self.headers = headers
124

    
125

    
126
class HttpBadRequest(HttpException):
127
  """400 Bad Request
128

129
  RFC2616, 10.4.1: The request could not be understood by the server
130
  due to malformed syntax. The client SHOULD NOT repeat the request
131
  without modifications.
132

133
  """
134
  code = 400
135

    
136

    
137
class HttpUnauthorized(HttpException):
138
  """401 Unauthorized
139

140
  RFC2616, section 10.4.2: The request requires user
141
  authentication. The response MUST include a WWW-Authenticate header
142
  field (section 14.47) containing a challenge applicable to the
143
  requested resource.
144

145
  """
146
  code = 401
147

    
148

    
149
class HttpForbidden(HttpException):
150
  """403 Forbidden
151

152
  RFC2616, 10.4.4: The server understood the request, but is refusing
153
  to fulfill it.  Authorization will not help and the request SHOULD
154
  NOT be repeated.
155

156
  """
157
  code = 403
158

    
159

    
160
class HttpNotFound(HttpException):
161
  """404 Not Found
162

163
  RFC2616, 10.4.5: The server has not found anything matching the
164
  Request-URI.  No indication is given of whether the condition is
165
  temporary or permanent.
166

167
  """
168
  code = 404
169

    
170

    
171
class HttpMethodNotAllowed(HttpException):
172
  """405 Method Not Allowed
173

174
  RFC2616, 10.4.6: The method specified in the Request-Line is not
175
  allowed for the resource identified by the Request-URI. The response
176
  MUST include an Allow header containing a list of valid methods for
177
  the requested resource.
178

179
  """
180
  code = 405
181

    
182

    
183
class HttpNotAcceptable(HttpException):
184
  """406 Not Acceptable
185

186
  RFC2616, 10.4.7: The resource identified by the request is only capable of
187
  generating response entities which have content characteristics not
188
  acceptable according to the accept headers sent in the request.
189

190
  """
191
  code = 406
192

    
193

    
194
class HttpRequestTimeout(HttpException):
195
  """408 Request Timeout
196

197
  RFC2616, 10.4.9: The client did not produce a request within the
198
  time that the server was prepared to wait. The client MAY repeat the
199
  request without modifications at any later time.
200

201
  """
202
  code = 408
203

    
204

    
205
class HttpConflict(HttpException):
206
  """409 Conflict
207

208
  RFC2616, 10.4.10: The request could not be completed due to a
209
  conflict with the current state of the resource. This code is only
210
  allowed in situations where it is expected that the user might be
211
  able to resolve the conflict and resubmit the request.
212

213
  """
214
  code = 409
215

    
216

    
217
class HttpGone(HttpException):
218
  """410 Gone
219

220
  RFC2616, 10.4.11: The requested resource is no longer available at
221
  the server and no forwarding address is known. This condition is
222
  expected to be considered permanent.
223

224
  """
225
  code = 410
226

    
227

    
228
class HttpLengthRequired(HttpException):
229
  """411 Length Required
230

231
  RFC2616, 10.4.12: The server refuses to accept the request without a
232
  defined Content-Length. The client MAY repeat the request if it adds
233
  a valid Content-Length header field containing the length of the
234
  message-body in the request message.
235

236
  """
237
  code = 411
238

    
239

    
240
class HttpPreconditionFailed(HttpException):
241
  """412 Precondition Failed
242

243
  RFC2616, 10.4.13: The precondition given in one or more of the
244
  request-header fields evaluated to false when it was tested on the
245
  server.
246

247
  """
248
  code = 412
249

    
250

    
251
class HttpUnsupportedMediaType(HttpException):
252
  """415 Unsupported Media Type
253

254
  RFC2616, 10.4.16: The server is refusing to service the request because the
255
  entity of the request is in a format not supported by the requested resource
256
  for the requested method.
257

258
  """
259
  code = 415
260

    
261

    
262
class HttpInternalServerError(HttpException):
263
  """500 Internal Server Error
264

265
  RFC2616, 10.5.1: The server encountered an unexpected condition
266
  which prevented it from fulfilling the request.
267

268
  """
269
  code = 500
270

    
271

    
272
class HttpNotImplemented(HttpException):
273
  """501 Not Implemented
274

275
  RFC2616, 10.5.2: The server does not support the functionality
276
  required to fulfill the request.
277

278
  """
279
  code = 501
280

    
281

    
282
class HttpBadGateway(HttpException):
283
  """502 Bad Gateway
284

285
  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
286
  received an invalid response from the upstream server it accessed in
287
  attempting to fulfill the request.
288

289
  """
290
  code = 502
291

    
292

    
293
class HttpServiceUnavailable(HttpException):
294
  """503 Service Unavailable
295

296
  RFC2616, 10.5.4: The server is currently unable to handle the
297
  request due to a temporary overloading or maintenance of the server.
298

299
  """
300
  code = 503
301

    
302

    
303
class HttpGatewayTimeout(HttpException):
304
  """504 Gateway Timeout
305

306
  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
307
  not receive a timely response from the upstream server specified by
308
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
309
  (e.g. DNS) it needed to access in attempting to complete the
310
  request.
311

312
  """
313
  code = 504
314

    
315

    
316
class HttpVersionNotSupported(HttpException):
317
  """505 HTTP Version Not Supported
318

319
  RFC2616, 10.5.6: The server does not support, or refuses to support,
320
  the HTTP protocol version that was used in the request message.
321

322
  """
323
  code = 505
324

    
325

    
326
def SocketOperation(sock, op, arg1, timeout):
327
  """Wrapper around socket functions.
328

329
  This function abstracts error handling for socket operations, especially
330
  for the complicated interaction with OpenSSL.
331

332
  @type sock: socket
333
  @param sock: Socket for the operation
334
  @type op: int
335
  @param op: Operation to execute (SOCKOP_* constants)
336
  @type arg1: any
337
  @param arg1: Parameter for function (if needed)
338
  @type timeout: None or float
339
  @param timeout: Timeout in seconds or None
340
  @return: Return value of socket function
341

342
  """
343
  # TODO: event_poll/event_check/override
344
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
345
    event_poll = select.POLLOUT
346

    
347
  elif op == SOCKOP_RECV:
348
    event_poll = select.POLLIN
349

    
350
  elif op == SOCKOP_SHUTDOWN:
351
    event_poll = None
352

    
353
    # The timeout is only used when OpenSSL requests polling for a condition.
354
    # It is not advisable to have no timeout for shutdown.
355
    assert timeout
356

    
357
  else:
358
    raise AssertionError("Invalid socket operation")
359

    
360
  # Handshake is only supported by SSL sockets
361
  if (op == SOCKOP_HANDSHAKE and
362
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
363
    return
364

    
365
  # No override by default
366
  event_override = 0
367

    
368
  while True:
369
    # Poll only for certain operations and when asked for by an override
370
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
371
      if event_override:
372
        wait_for_event = event_override
373
      else:
374
        wait_for_event = event_poll
375

    
376
      event = utils.WaitForFdCondition(sock, wait_for_event, timeout)
377
      if event is None:
378
        raise HttpSocketTimeout()
379

    
380
      if event & (select.POLLNVAL | select.POLLHUP | select.POLLERR):
381
        # Let the socket functions handle these
382
        break
383

    
384
      if not event & wait_for_event:
385
        continue
386

    
387
    # Reset override
388
    event_override = 0
389

    
390
    try:
391
      try:
392
        if op == SOCKOP_SEND:
393
          return sock.send(arg1)
394

    
395
        elif op == SOCKOP_RECV:
396
          return sock.recv(arg1)
397

    
398
        elif op == SOCKOP_SHUTDOWN:
399
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
400
            # PyOpenSSL's shutdown() doesn't take arguments
401
            return sock.shutdown()
402
          else:
403
            return sock.shutdown(arg1)
404

    
405
        elif op == SOCKOP_HANDSHAKE:
406
          return sock.do_handshake()
407

    
408
      except OpenSSL.SSL.WantWriteError:
409
        # OpenSSL wants to write, poll for POLLOUT
410
        event_override = select.POLLOUT
411
        continue
412

    
413
      except OpenSSL.SSL.WantReadError:
414
        # OpenSSL wants to read, poll for POLLIN
415
        event_override = select.POLLIN | select.POLLPRI
416
        continue
417

    
418
      except OpenSSL.SSL.WantX509LookupError:
419
        continue
420

    
421
      except OpenSSL.SSL.ZeroReturnError, err:
422
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
423
        # occurs if a closure alert has occurred in the protocol, i.e. the
424
        # connection has been closed cleanly. Note that this does not
425
        # necessarily mean that the transport layer (e.g. a socket) has been
426
        # closed.
427
        if op == SOCKOP_SEND:
428
          # Can happen during a renegotiation
429
          raise HttpConnectionClosed(err.args)
430
        elif op == SOCKOP_RECV:
431
          return ""
432

    
433
        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
434
        raise socket.error(err.args)
435

    
436
      except OpenSSL.SSL.SysCallError, err:
437
        if op == SOCKOP_SEND:
438
          # arg1 is the data when writing
439
          if err.args and err.args[0] == -1 and arg1 == "":
440
            # errors when writing empty strings are expected
441
            # and can be ignored
442
            return 0
443

    
444
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
445
          if op == SOCKOP_RECV:
446
            return ""
447
          elif op == SOCKOP_HANDSHAKE:
448
            # Can happen if peer disconnects directly after the connection is
449
            # opened.
450
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
451

    
452
        raise socket.error(err.args)
453

    
454
      except OpenSSL.SSL.Error, err:
455
        raise socket.error(err.args)
456

    
457
    except socket.error, err:
458
      if err.args and err.args[0] == errno.EAGAIN:
459
        # Ignore EAGAIN
460
        continue
461

    
462
      raise
463

    
464

    
465
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
466
  """Closes the connection.
467

468
  @type sock: socket
469
  @param sock: Socket to be shut down
470
  @type close_timeout: float
471
  @param close_timeout: How long to wait for the peer to close
472
      the connection
473
  @type write_timeout: float
474
  @param write_timeout: Write timeout for shutdown
475
  @type msgreader: http.HttpMessageReader
476
  @param msgreader: Request message reader, used to determine whether
477
      peer should close connection
478
  @type force: bool
479
  @param force: Whether to forcibly close the connection without
480
      waiting for peer
481

482
  """
483
  #print msgreader.peer_will_close, force
484
  if msgreader and msgreader.peer_will_close and not force:
485
    # Wait for peer to close
486
    try:
487
      # Check whether it's actually closed
488
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
489
        return
490
    except (socket.error, HttpError, HttpSocketTimeout):
491
      # Ignore errors at this stage
492
      pass
493

    
494
  # Close the connection from our side
495
  try:
496
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
497
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
498
                    write_timeout)
499
  except HttpSocketTimeout:
500
    raise HttpError("Timeout while shutting down connection")
501
  except socket.error, err:
502
    # Ignore ENOTCONN
503
    if not (err.args and err.args[0] == errno.ENOTCONN):
504
      raise HttpError("Error while shutting down connection: %s" % err)
505

    
506

    
507
def Handshake(sock, write_timeout):
508
  """Shakes peer's hands.
509

510
  @type sock: socket
511
  @param sock: Socket to be shut down
512
  @type write_timeout: float
513
  @param write_timeout: Write timeout for handshake
514

515
  """
516
  try:
517
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
518
  except HttpSocketTimeout:
519
    raise HttpError("Timeout during SSL handshake")
520
  except socket.error, err:
521
    raise HttpError("Error in SSL handshake: %s" % err)
522

    
523

    
524
def InitSsl():
525
  """Initializes the SSL infrastructure.
526

527
  This function is idempotent.
528

529
  """
530
  if not OpenSSL.rand.status():
531
    raise EnvironmentError("OpenSSL could not collect enough entropy"
532
                           " for the PRNG")
533

    
534
  # TODO: Maybe add some additional seeding for OpenSSL's PRNG
535

    
536

    
537
class HttpSslParams(object):
538
  """Data class for SSL key and certificate.
539

540
  """
541
  def __init__(self, ssl_key_path, ssl_cert_path):
542
    """Initializes this class.
543

544
    @type ssl_key_path: string
545
    @param ssl_key_path: Path to file containing SSL key in PEM format
546
    @type ssl_cert_path: string
547
    @param ssl_cert_path: Path to file containing SSL certificate
548
        in PEM format
549

550
    """
551
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
552
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
553
    self.ssl_cert_path = ssl_cert_path
554

    
555
  def GetKey(self):
556
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
557
                                          self.ssl_key_pem)
558

    
559
  def GetCertificate(self):
560
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
561
                                           self.ssl_cert_pem)
562

    
563

    
564
class HttpBase(object):
565
  """Base class for HTTP server and client.
566

567
  """
568
  def __init__(self):
569
    self.using_ssl = None
570
    self._ssl_params = None
571
    self._ssl_key = None
572
    self._ssl_cert = None
573

    
574
  def _CreateSocket(self, ssl_params, ssl_verify_peer, family):
575
    """Creates a TCP socket and initializes SSL if needed.
576

577
    @type ssl_params: HttpSslParams
578
    @param ssl_params: SSL key and certificate
579
    @type ssl_verify_peer: bool
580
    @param ssl_verify_peer: Whether to require client certificate
581
        and compare it with our certificate
582
    @type family: int
583
    @param family: socket.AF_INET | socket.AF_INET6
584

585
    """
586
    assert family in (socket.AF_INET, socket.AF_INET6)
587

    
588
    self._ssl_params = ssl_params
589
    sock = socket.socket(family, socket.SOCK_STREAM)
590

    
591
    # Should we enable SSL?
592
    self.using_ssl = ssl_params is not None
593

    
594
    if not self.using_ssl:
595
      return sock
596

    
597
    self._ssl_key = ssl_params.GetKey()
598
    self._ssl_cert = ssl_params.GetCertificate()
599

    
600
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
601
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
602

    
603
    ciphers = self.GetSslCiphers()
604
    logging.debug("Setting SSL cipher string %s", ciphers)
605
    ctx.set_cipher_list(ciphers)
606

    
607
    ctx.use_privatekey(self._ssl_key)
608
    ctx.use_certificate(self._ssl_cert)
609
    ctx.check_privatekey()
610

    
611
    if ssl_verify_peer:
612
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
613
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
614
                     self._SSLVerifyCallback)
615

    
616
      # Also add our certificate as a trusted CA to be sent to the client.
617
      # This is required at least for GnuTLS clients to work.
618
      try:
619
        # This will fail for PyOpenssl versions before 0.10
620
        ctx.add_client_ca(self._ssl_cert)
621
      except AttributeError:
622
        # Fall back to letting OpenSSL read the certificate file directly.
623
        ctx.load_client_ca(ssl_params.ssl_cert_path)
624

    
625
    return OpenSSL.SSL.Connection(ctx, sock)
626

    
627
  def GetSslCiphers(self): # pylint: disable=R0201
628
    """Returns the ciphers string for SSL.
629

630
    """
631
    return constants.OPENSSL_CIPHERS
632

    
633
  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
634
    """Verify the certificate provided by the peer
635

636
    We only compare fingerprints. The client must use the same certificate as
637
    we do on our side.
638

639
    """
640
    # some parameters are unused, but this is the API
641
    # pylint: disable=W0613
642
    assert self._ssl_params, "SSL not initialized"
643

    
644
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
645
            self._ssl_cert.digest("md5") == cert.digest("md5"))
646

    
647

    
648
class HttpMessage(object):
649
  """Data structure for HTTP message.
650

651
  """
652
  def __init__(self):
653
    self.start_line = None
654
    self.headers = None
655
    self.body = None
656

    
657

    
658
class HttpClientToServerStartLine(object):
659
  """Data structure for HTTP request start line.
660

661
  """
662
  def __init__(self, method, path, version):
663
    self.method = method
664
    self.path = path
665
    self.version = version
666

    
667
  def __str__(self):
668
    return "%s %s %s" % (self.method, self.path, self.version)
669

    
670

    
671
class HttpServerToClientStartLine(object):
672
  """Data structure for HTTP response start line.
673

674
  """
675
  def __init__(self, version, code, reason):
676
    self.version = version
677
    self.code = code
678
    self.reason = reason
679

    
680
  def __str__(self):
681
    return "%s %s %s" % (self.version, self.code, self.reason)
682

    
683

    
684
class HttpMessageWriter(object):
685
  """Writes an HTTP message to a socket.
686

687
  """
688
  def __init__(self, sock, msg, write_timeout):
689
    """Initializes this class and writes an HTTP message to a socket.
690

691
    @type sock: socket
692
    @param sock: Socket to be written to
693
    @type msg: http.HttpMessage
694
    @param msg: HTTP message to be written
695
    @type write_timeout: float
696
    @param write_timeout: Write timeout for socket
697

698
    """
699
    self._msg = msg
700

    
701
    self._PrepareMessage()
702

    
703
    buf = self._FormatMessage()
704

    
705
    pos = 0
706
    end = len(buf)
707
    while pos < end:
708
      # Send only SOCK_BUF_SIZE bytes at a time
709
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
710

    
711
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
712

    
713
      # Remove sent bytes
714
      pos += sent
715

    
716
    assert pos == end, "Message wasn't sent completely"
717

    
718
  def _PrepareMessage(self):
719
    """Prepares the HTTP message by setting mandatory headers.
720

721
    """
722
    # RFC2616, section 4.3: "The presence of a message-body in a request is
723
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
724
    # field in the request's message-headers."
725
    if self._msg.body:
726
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
727

    
728
  def _FormatMessage(self):
729
    """Serializes the HTTP message into a string.
730

731
    """
732
    buf = StringIO()
733

    
734
    # Add start line
735
    buf.write(str(self._msg.start_line))
736
    buf.write("\r\n")
737

    
738
    # Add headers
739
    if self._msg.start_line.version != HTTP_0_9:
740
      for name, value in self._msg.headers.iteritems():
741
        buf.write("%s: %s\r\n" % (name, value))
742

    
743
    buf.write("\r\n")
744

    
745
    # Add message body if needed
746
    if self.HasMessageBody():
747
      buf.write(self._msg.body)
748

    
749
    elif self._msg.body:
750
      logging.warning("Ignoring message body")
751

    
752
    return buf.getvalue()
753

    
754
  def HasMessageBody(self):
755
    """Checks whether the HTTP message contains a body.
756

757
    Can be overridden by subclasses.
758

759
    """
760
    return bool(self._msg.body)
761

    
762

    
763
class HttpMessageReader(object):
764
  """Reads HTTP message from socket.
765

766
  """
767
  # Length limits
768
  START_LINE_LENGTH_MAX = None
769
  HEADER_LENGTH_MAX = None
770

    
771
  # Parser state machine
772
  PS_START_LINE = "start-line"
773
  PS_HEADERS = "headers"
774
  PS_BODY = "entity-body"
775
  PS_COMPLETE = "complete"
776

    
777
  def __init__(self, sock, msg, read_timeout):
778
    """Reads an HTTP message from a socket.
779

780
    @type sock: socket
781
    @param sock: Socket to be read from
782
    @type msg: http.HttpMessage
783
    @param msg: Object for the read message
784
    @type read_timeout: float
785
    @param read_timeout: Read timeout for socket
786

787
    """
788
    self.sock = sock
789
    self.msg = msg
790

    
791
    self.start_line_buffer = None
792
    self.header_buffer = StringIO()
793
    self.body_buffer = StringIO()
794
    self.parser_status = self.PS_START_LINE
795
    self.content_length = None
796
    self.peer_will_close = None
797

    
798
    buf = ""
799
    eof = False
800
    while self.parser_status != self.PS_COMPLETE:
801
      # TODO: Don't read more than necessary (Content-Length), otherwise
802
      # data might be lost and/or an error could occur
803
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
804

    
805
      if data:
806
        buf += data
807
      else:
808
        eof = True
809

    
810
      # Do some parsing and error checking while more data arrives
811
      buf = self._ContinueParsing(buf, eof)
812

    
813
      # Must be done only after the buffer has been evaluated
814
      # TODO: Content-Length < len(data read) and connection closed
815
      if (eof and
816
          self.parser_status in (self.PS_START_LINE,
817
                                 self.PS_HEADERS)):
818
        raise HttpError("Connection closed prematurely")
819

    
820
    # Parse rest
821
    buf = self._ContinueParsing(buf, True)
822

    
823
    assert self.parser_status == self.PS_COMPLETE
824
    assert not buf, "Parser didn't read full response"
825

    
826
    # Body is complete
827
    msg.body = self.body_buffer.getvalue()
828

    
829
  def _ContinueParsing(self, buf, eof):
830
    """Main function for HTTP message state machine.
831

832
    @type buf: string
833
    @param buf: Receive buffer
834
    @type eof: bool
835
    @param eof: Whether we've reached EOF on the socket
836
    @rtype: string
837
    @return: Updated receive buffer
838

839
    """
840
    # TODO: Use offset instead of slicing when possible
841
    if self.parser_status == self.PS_START_LINE:
842
      # Expect start line
843
      while True:
844
        idx = buf.find("\r\n")
845

    
846
        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
847
        # ignore any empty line(s) received where a Request-Line is expected.
848
        # In other words, if the server is reading the protocol stream at the
849
        # beginning of a message and receives a CRLF first, it should ignore
850
        # the CRLF."
851
        if idx == 0:
852
          # TODO: Limit number of CRLFs/empty lines for safety?
853
          buf = buf[2:]
854
          continue
855

    
856
        if idx > 0:
857
          self.start_line_buffer = buf[:idx]
858

    
859
          self._CheckStartLineLength(len(self.start_line_buffer))
860

    
861
          # Remove status line, including CRLF
862
          buf = buf[idx + 2:]
863

    
864
          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
865

    
866
          self.parser_status = self.PS_HEADERS
867
        else:
868
          # Check whether incoming data is getting too large, otherwise we just
869
          # fill our read buffer.
870
          self._CheckStartLineLength(len(buf))
871

    
872
        break
873

    
874
    # TODO: Handle messages without headers
875
    if self.parser_status == self.PS_HEADERS:
876
      # Wait for header end
877
      idx = buf.find("\r\n\r\n")
878
      if idx >= 0:
879
        self.header_buffer.write(buf[:idx + 2])
880

    
881
        self._CheckHeaderLength(self.header_buffer.tell())
882

    
883
        # Remove headers, including CRLF
884
        buf = buf[idx + 4:]
885

    
886
        self._ParseHeaders()
887

    
888
        self.parser_status = self.PS_BODY
889
      else:
890
        # Check whether incoming data is getting too large, otherwise we just
891
        # fill our read buffer.
892
        self._CheckHeaderLength(len(buf))
893

    
894
    if self.parser_status == self.PS_BODY:
895
      # TODO: Implement max size for body_buffer
896
      self.body_buffer.write(buf)
897
      buf = ""
898

    
899
      # Check whether we've read everything
900
      #
901
      # RFC2616, section 4.4: "When a message-body is included with a message,
902
      # the transfer-length of that body is determined by one of the following
903
      # [...] 5. By the server closing the connection. (Closing the connection
904
      # cannot be used to indicate the end of a request body, since that would
905
      # leave no possibility for the server to send back a response.)"
906
      #
907
      # TODO: Error when buffer length > Content-Length header
908
      if (eof or
909
          self.content_length is None or
910
          (self.content_length is not None and
911
           self.body_buffer.tell() >= self.content_length)):
912
        self.parser_status = self.PS_COMPLETE
913

    
914
    return buf
915

    
916
  def _CheckStartLineLength(self, length):
917
    """Limits the start line buffer size.
918

919
    @type length: int
920
    @param length: Buffer size
921

922
    """
923
    if (self.START_LINE_LENGTH_MAX is not None and
924
        length > self.START_LINE_LENGTH_MAX):
925
      raise HttpError("Start line longer than %d chars" %
926
                       self.START_LINE_LENGTH_MAX)
927

    
928
  def _CheckHeaderLength(self, length):
929
    """Limits the header buffer size.
930

931
    @type length: int
932
    @param length: Buffer size
933

934
    """
935
    if (self.HEADER_LENGTH_MAX is not None and
936
        length > self.HEADER_LENGTH_MAX):
937
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
938

    
939
  def ParseStartLine(self, start_line):
940
    """Parses the start line of a message.
941

942
    Must be overridden by subclass.
943

944
    @type start_line: string
945
    @param start_line: Start line string
946

947
    """
948
    raise NotImplementedError()
949

    
950
  def _WillPeerCloseConnection(self):
951
    """Evaluate whether peer will close the connection.
952

953
    @rtype: bool
954
    @return: Whether peer will close the connection
955

956
    """
957
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
958
    # for the sender to signal that the connection will be closed after
959
    # completion of the response. For example,
960
    #
961
    #        Connection: close
962
    #
963
    # in either the request or the response header fields indicates that the
964
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
965
    # current request/response is complete."
966

    
967
    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
968
    if hdr_connection:
969
      hdr_connection = hdr_connection.lower()
970

    
971
    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
972
    if self.msg.start_line.version == HTTP_1_1:
973
      return (hdr_connection and "close" in hdr_connection)
974

    
975
    # Some HTTP/1.0 implementations have support for persistent connections,
976
    # using rules different than HTTP/1.1.
977

    
978
    # For older HTTP, Keep-Alive indicates persistent connection.
979
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
980
      return False
981

    
982
    # At least Akamai returns a "Connection: Keep-Alive" header, which was
983
    # supposed to be sent by the client.
984
    if hdr_connection and "keep-alive" in hdr_connection:
985
      return False
986

    
987
    return True
988

    
989
  def _ParseHeaders(self):
990
    """Parses the headers.
991

992
    This function also adjusts internal variables based on header values.
993

994
    RFC2616, section 4.3: The presence of a message-body in a request is
995
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
996
    field in the request's message-headers.
997

998
    """
999
    # Parse headers
1000
    self.header_buffer.seek(0, 0)
1001
    self.msg.headers = mimetools.Message(self.header_buffer, 0)
1002

    
1003
    self.peer_will_close = self._WillPeerCloseConnection()
1004

    
1005
    # Do we have a Content-Length header?
1006
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
1007
    if hdr_content_length:
1008
      try:
1009
        self.content_length = int(hdr_content_length)
1010
      except (TypeError, ValueError):
1011
        self.content_length = None
1012
      if self.content_length is not None and self.content_length < 0:
1013
        self.content_length = None
1014

    
1015
    # if the connection remains open and a content-length was not provided,
1016
    # then assume that the connection WILL close.
1017
    if self.content_length is None:
1018
      self.peer_will_close = True