Statistics
| Branch: | Tag: | Revision:

root / lib / http / __init__.py @ 0e632cbd

History | View | Annotate | Download (27.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2008, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""HTTP module.
22

23
"""
24

    
25
import logging
26
import mimetools
27
import OpenSSL
28
import select
29
import socket
30
import errno
31

    
32
from cStringIO import StringIO
33

    
34
from ganeti import constants
35
from ganeti import utils
36

    
37

    
38
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
39

    
40
HTTP_OK = 200
41
HTTP_NO_CONTENT = 204
42
HTTP_NOT_MODIFIED = 304
43

    
44
HTTP_0_9 = "HTTP/0.9"
45
HTTP_1_0 = "HTTP/1.0"
46
HTTP_1_1 = "HTTP/1.1"
47

    
48
HTTP_GET = "GET"
49
HTTP_HEAD = "HEAD"
50
HTTP_POST = "POST"
51
HTTP_PUT = "PUT"
52
HTTP_DELETE = "DELETE"
53

    
54
HTTP_ETAG = "ETag"
55
HTTP_HOST = "Host"
56
HTTP_SERVER = "Server"
57
HTTP_DATE = "Date"
58
HTTP_USER_AGENT = "User-Agent"
59
HTTP_CONTENT_TYPE = "Content-Type"
60
HTTP_CONTENT_LENGTH = "Content-Length"
61
HTTP_CONNECTION = "Connection"
62
HTTP_KEEP_ALIVE = "Keep-Alive"
63
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
64
HTTP_AUTHORIZATION = "Authorization"
65
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
66
HTTP_ALLOW = "Allow"
67

    
68
HTTP_APP_OCTET_STREAM = "application/octet-stream"
69
HTTP_APP_JSON = "application/json"
70

    
71
_SSL_UNEXPECTED_EOF = "Unexpected EOF"
72

    
73
# Socket operations
74
(SOCKOP_SEND,
75
 SOCKOP_RECV,
76
 SOCKOP_SHUTDOWN,
77
 SOCKOP_HANDSHAKE) = range(4)
78

    
79
# send/receive quantum
80
SOCK_BUF_SIZE = 32768
81

    
82

    
83
class HttpError(Exception):
84
  """Internal exception for HTTP errors.
85

86
  This should only be used for internal error reporting.
87

88
  """
89

    
90

    
91
class HttpConnectionClosed(Exception):
92
  """Internal exception for a closed connection.
93

94
  This should only be used for internal error reporting. Only use
95
  it if there's no other way to report this condition.
96

97
  """
98

    
99

    
100
class HttpSessionHandshakeUnexpectedEOF(HttpError):
101
  """Internal exception for errors during SSL handshake.
102

103
  This should only be used for internal error reporting.
104

105
  """
106

    
107

    
108
class HttpSocketTimeout(Exception):
109
  """Internal exception for socket timeouts.
110

111
  This should only be used for internal error reporting.
112

113
  """
114

    
115

    
116
class HttpException(Exception):
117
  code = None
118
  message = None
119

    
120
  def __init__(self, message=None, headers=None):
121
    Exception.__init__(self)
122
    self.message = message
123
    self.headers = headers
124

    
125

    
126
class HttpBadRequest(HttpException):
127
  """400 Bad Request
128

129
  RFC2616, 10.4.1: The request could not be understood by the server
130
  due to malformed syntax. The client SHOULD NOT repeat the request
131
  without modifications.
132

133
  """
134
  code = 400
135

    
136

    
137
class HttpUnauthorized(HttpException):
138
  """401 Unauthorized
139

140
  RFC2616, section 10.4.2: The request requires user
141
  authentication. The response MUST include a WWW-Authenticate header
142
  field (section 14.47) containing a challenge applicable to the
143
  requested resource.
144

145
  """
146
  code = 401
147

    
148

    
149
class HttpForbidden(HttpException):
150
  """403 Forbidden
151

152
  RFC2616, 10.4.4: The server understood the request, but is refusing
153
  to fulfill it.  Authorization will not help and the request SHOULD
154
  NOT be repeated.
155

156
  """
157
  code = 403
158

    
159

    
160
class HttpNotFound(HttpException):
161
  """404 Not Found
162

163
  RFC2616, 10.4.5: The server has not found anything matching the
164
  Request-URI.  No indication is given of whether the condition is
165
  temporary or permanent.
166

167
  """
168
  code = 404
169

    
170

    
171
class HttpMethodNotAllowed(HttpException):
172
  """405 Method Not Allowed
173

174
  RFC2616, 10.4.6: The method specified in the Request-Line is not
175
  allowed for the resource identified by the Request-URI. The response
176
  MUST include an Allow header containing a list of valid methods for
177
  the requested resource.
178

179
  """
180
  code = 405
181

    
182

    
183
class HttpNotAcceptable(HttpException):
184
  """406 Not Acceptable
185

186
  RFC2616, 10.4.7: The resource identified by the request is only capable of
187
  generating response entities which have content characteristics not
188
  acceptable according to the accept headers sent in the request.
189

190
  """
191
  code = 406
192

    
193

    
194
class HttpRequestTimeout(HttpException):
195
  """408 Request Timeout
196

197
  RFC2616, 10.4.9: The client did not produce a request within the
198
  time that the server was prepared to wait. The client MAY repeat the
199
  request without modifications at any later time.
200

201
  """
202
  code = 408
203

    
204

    
205
class HttpConflict(HttpException):
206
  """409 Conflict
207

208
  RFC2616, 10.4.10: The request could not be completed due to a
209
  conflict with the current state of the resource. This code is only
210
  allowed in situations where it is expected that the user might be
211
  able to resolve the conflict and resubmit the request.
212

213
  """
214
  code = 409
215

    
216

    
217
class HttpGone(HttpException):
218
  """410 Gone
219

220
  RFC2616, 10.4.11: The requested resource is no longer available at
221
  the server and no forwarding address is known. This condition is
222
  expected to be considered permanent.
223

224
  """
225
  code = 410
226

    
227

    
228
class HttpLengthRequired(HttpException):
229
  """411 Length Required
230

231
  RFC2616, 10.4.12: The server refuses to accept the request without a
232
  defined Content-Length. The client MAY repeat the request if it adds
233
  a valid Content-Length header field containing the length of the
234
  message-body in the request message.
235

236
  """
237
  code = 411
238

    
239

    
240
class HttpPreconditionFailed(HttpException):
241
  """412 Precondition Failed
242

243
  RFC2616, 10.4.13: The precondition given in one or more of the
244
  request-header fields evaluated to false when it was tested on the
245
  server.
246

247
  """
248
  code = 412
249

    
250

    
251
class HttpUnsupportedMediaType(HttpException):
252
  """415 Unsupported Media Type
253

254
  RFC2616, 10.4.16: The server is refusing to service the request because the
255
  entity of the request is in a format not supported by the requested resource
256
  for the requested method.
257

258
  """
259
  code = 415
260

    
261

    
262
class HttpInternalServerError(HttpException):
263
  """500 Internal Server Error
264

265
  RFC2616, 10.5.1: The server encountered an unexpected condition
266
  which prevented it from fulfilling the request.
267

268
  """
269
  code = 500
270

    
271

    
272
class HttpNotImplemented(HttpException):
273
  """501 Not Implemented
274

275
  RFC2616, 10.5.2: The server does not support the functionality
276
  required to fulfill the request.
277

278
  """
279
  code = 501
280

    
281

    
282
class HttpBadGateway(HttpException):
283
  """502 Bad Gateway
284

285
  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
286
  received an invalid response from the upstream server it accessed in
287
  attempting to fulfill the request.
288

289
  """
290
  code = 502
291

    
292

    
293
class HttpServiceUnavailable(HttpException):
294
  """503 Service Unavailable
295

296
  RFC2616, 10.5.4: The server is currently unable to handle the
297
  request due to a temporary overloading or maintenance of the server.
298

299
  """
300
  code = 503
301

    
302

    
303
class HttpGatewayTimeout(HttpException):
304
  """504 Gateway Timeout
305

306
  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
307
  not receive a timely response from the upstream server specified by
308
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
309
  (e.g. DNS) it needed to access in attempting to complete the
310
  request.
311

312
  """
313
  code = 504
314

    
315

    
316
class HttpVersionNotSupported(HttpException):
317
  """505 HTTP Version Not Supported
318

319
  RFC2616, 10.5.6: The server does not support, or refuses to support,
320
  the HTTP protocol version that was used in the request message.
321

322
  """
323
  code = 505
324

    
325

    
326
def ParseHeaders(buf):
327
  """Parses HTTP headers.
328

329
  @note: This is just a trivial wrapper around C{mimetools.Message}
330

331
  """
332
  return mimetools.Message(buf, 0)
333

    
334

    
335
def SocketOperation(sock, op, arg1, timeout):
336
  """Wrapper around socket functions.
337

338
  This function abstracts error handling for socket operations, especially
339
  for the complicated interaction with OpenSSL.
340

341
  @type sock: socket
342
  @param sock: Socket for the operation
343
  @type op: int
344
  @param op: Operation to execute (SOCKOP_* constants)
345
  @type arg1: any
346
  @param arg1: Parameter for function (if needed)
347
  @type timeout: None or float
348
  @param timeout: Timeout in seconds or None
349
  @return: Return value of socket function
350

351
  """
352
  # TODO: event_poll/event_check/override
353
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
354
    event_poll = select.POLLOUT
355

    
356
  elif op == SOCKOP_RECV:
357
    event_poll = select.POLLIN
358

    
359
  elif op == SOCKOP_SHUTDOWN:
360
    event_poll = None
361

    
362
    # The timeout is only used when OpenSSL requests polling for a condition.
363
    # It is not advisable to have no timeout for shutdown.
364
    assert timeout
365

    
366
  else:
367
    raise AssertionError("Invalid socket operation")
368

    
369
  # Handshake is only supported by SSL sockets
370
  if (op == SOCKOP_HANDSHAKE and
371
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
372
    return
373

    
374
  # No override by default
375
  event_override = 0
376

    
377
  while True:
378
    # Poll only for certain operations and when asked for by an override
379
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
380
      if event_override:
381
        wait_for_event = event_override
382
      else:
383
        wait_for_event = event_poll
384

    
385
      event = utils.WaitForFdCondition(sock, wait_for_event, timeout)
386
      if event is None:
387
        raise HttpSocketTimeout()
388

    
389
      if event & (select.POLLNVAL | select.POLLHUP | select.POLLERR):
390
        # Let the socket functions handle these
391
        break
392

    
393
      if not event & wait_for_event:
394
        continue
395

    
396
    # Reset override
397
    event_override = 0
398

    
399
    try:
400
      try:
401
        if op == SOCKOP_SEND:
402
          return sock.send(arg1)
403

    
404
        elif op == SOCKOP_RECV:
405
          return sock.recv(arg1)
406

    
407
        elif op == SOCKOP_SHUTDOWN:
408
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
409
            # PyOpenSSL's shutdown() doesn't take arguments
410
            return sock.shutdown()
411
          else:
412
            return sock.shutdown(arg1)
413

    
414
        elif op == SOCKOP_HANDSHAKE:
415
          return sock.do_handshake()
416

    
417
      except OpenSSL.SSL.WantWriteError:
418
        # OpenSSL wants to write, poll for POLLOUT
419
        event_override = select.POLLOUT
420
        continue
421

    
422
      except OpenSSL.SSL.WantReadError:
423
        # OpenSSL wants to read, poll for POLLIN
424
        event_override = select.POLLIN | select.POLLPRI
425
        continue
426

    
427
      except OpenSSL.SSL.WantX509LookupError:
428
        continue
429

    
430
      except OpenSSL.SSL.ZeroReturnError, err:
431
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
432
        # occurs if a closure alert has occurred in the protocol, i.e. the
433
        # connection has been closed cleanly. Note that this does not
434
        # necessarily mean that the transport layer (e.g. a socket) has been
435
        # closed.
436
        if op == SOCKOP_SEND:
437
          # Can happen during a renegotiation
438
          raise HttpConnectionClosed(err.args)
439
        elif op == SOCKOP_RECV:
440
          return ""
441

    
442
        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
443
        raise socket.error(err.args)
444

    
445
      except OpenSSL.SSL.SysCallError, err:
446
        if op == SOCKOP_SEND:
447
          # arg1 is the data when writing
448
          if err.args and err.args[0] == -1 and arg1 == "":
449
            # errors when writing empty strings are expected
450
            # and can be ignored
451
            return 0
452

    
453
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
454
          if op == SOCKOP_RECV:
455
            return ""
456
          elif op == SOCKOP_HANDSHAKE:
457
            # Can happen if peer disconnects directly after the connection is
458
            # opened.
459
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
460

    
461
        raise socket.error(err.args)
462

    
463
      except OpenSSL.SSL.Error, err:
464
        raise socket.error(err.args)
465

    
466
    except socket.error, err:
467
      if err.args and err.args[0] == errno.EAGAIN:
468
        # Ignore EAGAIN
469
        continue
470

    
471
      raise
472

    
473

    
474
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
475
  """Closes the connection.
476

477
  @type sock: socket
478
  @param sock: Socket to be shut down
479
  @type close_timeout: float
480
  @param close_timeout: How long to wait for the peer to close
481
      the connection
482
  @type write_timeout: float
483
  @param write_timeout: Write timeout for shutdown
484
  @type msgreader: http.HttpMessageReader
485
  @param msgreader: Request message reader, used to determine whether
486
      peer should close connection
487
  @type force: bool
488
  @param force: Whether to forcibly close the connection without
489
      waiting for peer
490

491
  """
492
  #print msgreader.peer_will_close, force
493
  if msgreader and msgreader.peer_will_close and not force:
494
    # Wait for peer to close
495
    try:
496
      # Check whether it's actually closed
497
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
498
        return
499
    except (socket.error, HttpError, HttpSocketTimeout):
500
      # Ignore errors at this stage
501
      pass
502

    
503
  # Close the connection from our side
504
  try:
505
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
506
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
507
                    write_timeout)
508
  except HttpSocketTimeout:
509
    raise HttpError("Timeout while shutting down connection")
510
  except socket.error, err:
511
    # Ignore ENOTCONN
512
    if not (err.args and err.args[0] == errno.ENOTCONN):
513
      raise HttpError("Error while shutting down connection: %s" % err)
514

    
515

    
516
def Handshake(sock, write_timeout):
517
  """Shakes peer's hands.
518

519
  @type sock: socket
520
  @param sock: Socket to be shut down
521
  @type write_timeout: float
522
  @param write_timeout: Write timeout for handshake
523

524
  """
525
  try:
526
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
527
  except HttpSocketTimeout:
528
    raise HttpError("Timeout during SSL handshake")
529
  except socket.error, err:
530
    raise HttpError("Error in SSL handshake: %s" % err)
531

    
532

    
533
def InitSsl():
534
  """Initializes the SSL infrastructure.
535

536
  This function is idempotent.
537

538
  """
539
  if not OpenSSL.rand.status():
540
    raise EnvironmentError("OpenSSL could not collect enough entropy"
541
                           " for the PRNG")
542

    
543
  # TODO: Maybe add some additional seeding for OpenSSL's PRNG
544

    
545

    
546
class HttpSslParams(object):
547
  """Data class for SSL key and certificate.
548

549
  """
550
  def __init__(self, ssl_key_path, ssl_cert_path):
551
    """Initializes this class.
552

553
    @type ssl_key_path: string
554
    @param ssl_key_path: Path to file containing SSL key in PEM format
555
    @type ssl_cert_path: string
556
    @param ssl_cert_path: Path to file containing SSL certificate
557
        in PEM format
558

559
    """
560
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
561
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
562
    self.ssl_cert_path = ssl_cert_path
563

    
564
  def GetKey(self):
565
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
566
                                          self.ssl_key_pem)
567

    
568
  def GetCertificate(self):
569
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
570
                                           self.ssl_cert_pem)
571

    
572

    
573
class HttpBase(object):
574
  """Base class for HTTP server and client.
575

576
  """
577
  def __init__(self):
578
    self.using_ssl = None
579
    self._ssl_params = None
580
    self._ssl_key = None
581
    self._ssl_cert = None
582

    
583
  def _CreateSocket(self, ssl_params, ssl_verify_peer, family):
584
    """Creates a TCP socket and initializes SSL if needed.
585

586
    @type ssl_params: HttpSslParams
587
    @param ssl_params: SSL key and certificate
588
    @type ssl_verify_peer: bool
589
    @param ssl_verify_peer: Whether to require client certificate
590
        and compare it with our certificate
591
    @type family: int
592
    @param family: socket.AF_INET | socket.AF_INET6
593

594
    """
595
    assert family in (socket.AF_INET, socket.AF_INET6)
596

    
597
    self._ssl_params = ssl_params
598
    sock = socket.socket(family, socket.SOCK_STREAM)
599

    
600
    # Should we enable SSL?
601
    self.using_ssl = ssl_params is not None
602

    
603
    if not self.using_ssl:
604
      return sock
605

    
606
    self._ssl_key = ssl_params.GetKey()
607
    self._ssl_cert = ssl_params.GetCertificate()
608

    
609
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
610
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
611

    
612
    ciphers = self.GetSslCiphers()
613
    logging.debug("Setting SSL cipher string %s", ciphers)
614
    ctx.set_cipher_list(ciphers)
615

    
616
    ctx.use_privatekey(self._ssl_key)
617
    ctx.use_certificate(self._ssl_cert)
618
    ctx.check_privatekey()
619

    
620
    if ssl_verify_peer:
621
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
622
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
623
                     self._SSLVerifyCallback)
624

    
625
      # Also add our certificate as a trusted CA to be sent to the client.
626
      # This is required at least for GnuTLS clients to work.
627
      try:
628
        # This will fail for PyOpenssl versions before 0.10
629
        ctx.add_client_ca(self._ssl_cert)
630
      except AttributeError:
631
        # Fall back to letting OpenSSL read the certificate file directly.
632
        ctx.load_client_ca(ssl_params.ssl_cert_path)
633

    
634
    return OpenSSL.SSL.Connection(ctx, sock)
635

    
636
  def GetSslCiphers(self): # pylint: disable=R0201
637
    """Returns the ciphers string for SSL.
638

639
    """
640
    return constants.OPENSSL_CIPHERS
641

    
642
  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
643
    """Verify the certificate provided by the peer
644

645
    We only compare fingerprints. The client must use the same certificate as
646
    we do on our side.
647

648
    """
649
    # some parameters are unused, but this is the API
650
    # pylint: disable=W0613
651
    assert self._ssl_params, "SSL not initialized"
652

    
653
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
654
            self._ssl_cert.digest("md5") == cert.digest("md5"))
655

    
656

    
657
class HttpMessage(object):
658
  """Data structure for HTTP message.
659

660
  """
661
  def __init__(self):
662
    self.start_line = None
663
    self.headers = None
664
    self.body = None
665

    
666

    
667
class HttpClientToServerStartLine(object):
668
  """Data structure for HTTP request start line.
669

670
  """
671
  def __init__(self, method, path, version):
672
    self.method = method
673
    self.path = path
674
    self.version = version
675

    
676
  def __str__(self):
677
    return "%s %s %s" % (self.method, self.path, self.version)
678

    
679

    
680
class HttpServerToClientStartLine(object):
681
  """Data structure for HTTP response start line.
682

683
  """
684
  def __init__(self, version, code, reason):
685
    self.version = version
686
    self.code = code
687
    self.reason = reason
688

    
689
  def __str__(self):
690
    return "%s %s %s" % (self.version, self.code, self.reason)
691

    
692

    
693
class HttpMessageWriter(object):
694
  """Writes an HTTP message to a socket.
695

696
  """
697
  def __init__(self, sock, msg, write_timeout):
698
    """Initializes this class and writes an HTTP message to a socket.
699

700
    @type sock: socket
701
    @param sock: Socket to be written to
702
    @type msg: http.HttpMessage
703
    @param msg: HTTP message to be written
704
    @type write_timeout: float
705
    @param write_timeout: Write timeout for socket
706

707
    """
708
    self._msg = msg
709

    
710
    self._PrepareMessage()
711

    
712
    buf = self._FormatMessage()
713

    
714
    pos = 0
715
    end = len(buf)
716
    while pos < end:
717
      # Send only SOCK_BUF_SIZE bytes at a time
718
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
719

    
720
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
721

    
722
      # Remove sent bytes
723
      pos += sent
724

    
725
    assert pos == end, "Message wasn't sent completely"
726

    
727
  def _PrepareMessage(self):
728
    """Prepares the HTTP message by setting mandatory headers.
729

730
    """
731
    # RFC2616, section 4.3: "The presence of a message-body in a request is
732
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
733
    # field in the request's message-headers."
734
    if self._msg.body:
735
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
736

    
737
  def _FormatMessage(self):
738
    """Serializes the HTTP message into a string.
739

740
    """
741
    buf = StringIO()
742

    
743
    # Add start line
744
    buf.write(str(self._msg.start_line))
745
    buf.write("\r\n")
746

    
747
    # Add headers
748
    if self._msg.start_line.version != HTTP_0_9:
749
      for name, value in self._msg.headers.iteritems():
750
        buf.write("%s: %s\r\n" % (name, value))
751

    
752
    buf.write("\r\n")
753

    
754
    # Add message body if needed
755
    if self.HasMessageBody():
756
      buf.write(self._msg.body)
757

    
758
    elif self._msg.body:
759
      logging.warning("Ignoring message body")
760

    
761
    return buf.getvalue()
762

    
763
  def HasMessageBody(self):
764
    """Checks whether the HTTP message contains a body.
765

766
    Can be overridden by subclasses.
767

768
    """
769
    return bool(self._msg.body)
770

    
771

    
772
class HttpMessageReader(object):
773
  """Reads HTTP message from socket.
774

775
  """
776
  # Length limits
777
  START_LINE_LENGTH_MAX = None
778
  HEADER_LENGTH_MAX = None
779

    
780
  # Parser state machine
781
  PS_START_LINE = "start-line"
782
  PS_HEADERS = "headers"
783
  PS_BODY = "entity-body"
784
  PS_COMPLETE = "complete"
785

    
786
  def __init__(self, sock, msg, read_timeout):
787
    """Reads an HTTP message from a socket.
788

789
    @type sock: socket
790
    @param sock: Socket to be read from
791
    @type msg: http.HttpMessage
792
    @param msg: Object for the read message
793
    @type read_timeout: float
794
    @param read_timeout: Read timeout for socket
795

796
    """
797
    self.sock = sock
798
    self.msg = msg
799

    
800
    self.start_line_buffer = None
801
    self.header_buffer = StringIO()
802
    self.body_buffer = StringIO()
803
    self.parser_status = self.PS_START_LINE
804
    self.content_length = None
805
    self.peer_will_close = None
806

    
807
    buf = ""
808
    eof = False
809
    while self.parser_status != self.PS_COMPLETE:
810
      # TODO: Don't read more than necessary (Content-Length), otherwise
811
      # data might be lost and/or an error could occur
812
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
813

    
814
      if data:
815
        buf += data
816
      else:
817
        eof = True
818

    
819
      # Do some parsing and error checking while more data arrives
820
      buf = self._ContinueParsing(buf, eof)
821

    
822
      # Must be done only after the buffer has been evaluated
823
      # TODO: Content-Length < len(data read) and connection closed
824
      if (eof and
825
          self.parser_status in (self.PS_START_LINE,
826
                                 self.PS_HEADERS)):
827
        raise HttpError("Connection closed prematurely")
828

    
829
    # Parse rest
830
    buf = self._ContinueParsing(buf, True)
831

    
832
    assert self.parser_status == self.PS_COMPLETE
833
    assert not buf, "Parser didn't read full response"
834

    
835
    # Body is complete
836
    msg.body = self.body_buffer.getvalue()
837

    
838
  def _ContinueParsing(self, buf, eof):
839
    """Main function for HTTP message state machine.
840

841
    @type buf: string
842
    @param buf: Receive buffer
843
    @type eof: bool
844
    @param eof: Whether we've reached EOF on the socket
845
    @rtype: string
846
    @return: Updated receive buffer
847

848
    """
849
    # TODO: Use offset instead of slicing when possible
850
    if self.parser_status == self.PS_START_LINE:
851
      # Expect start line
852
      while True:
853
        idx = buf.find("\r\n")
854

    
855
        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
856
        # ignore any empty line(s) received where a Request-Line is expected.
857
        # In other words, if the server is reading the protocol stream at the
858
        # beginning of a message and receives a CRLF first, it should ignore
859
        # the CRLF."
860
        if idx == 0:
861
          # TODO: Limit number of CRLFs/empty lines for safety?
862
          buf = buf[2:]
863
          continue
864

    
865
        if idx > 0:
866
          self.start_line_buffer = buf[:idx]
867

    
868
          self._CheckStartLineLength(len(self.start_line_buffer))
869

    
870
          # Remove status line, including CRLF
871
          buf = buf[idx + 2:]
872

    
873
          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
874

    
875
          self.parser_status = self.PS_HEADERS
876
        else:
877
          # Check whether incoming data is getting too large, otherwise we just
878
          # fill our read buffer.
879
          self._CheckStartLineLength(len(buf))
880

    
881
        break
882

    
883
    # TODO: Handle messages without headers
884
    if self.parser_status == self.PS_HEADERS:
885
      # Wait for header end
886
      idx = buf.find("\r\n\r\n")
887
      if idx >= 0:
888
        self.header_buffer.write(buf[:idx + 2])
889

    
890
        self._CheckHeaderLength(self.header_buffer.tell())
891

    
892
        # Remove headers, including CRLF
893
        buf = buf[idx + 4:]
894

    
895
        self._ParseHeaders()
896

    
897
        self.parser_status = self.PS_BODY
898
      else:
899
        # Check whether incoming data is getting too large, otherwise we just
900
        # fill our read buffer.
901
        self._CheckHeaderLength(len(buf))
902

    
903
    if self.parser_status == self.PS_BODY:
904
      # TODO: Implement max size for body_buffer
905
      self.body_buffer.write(buf)
906
      buf = ""
907

    
908
      # Check whether we've read everything
909
      #
910
      # RFC2616, section 4.4: "When a message-body is included with a message,
911
      # the transfer-length of that body is determined by one of the following
912
      # [...] 5. By the server closing the connection. (Closing the connection
913
      # cannot be used to indicate the end of a request body, since that would
914
      # leave no possibility for the server to send back a response.)"
915
      #
916
      # TODO: Error when buffer length > Content-Length header
917
      if (eof or
918
          self.content_length is None or
919
          (self.content_length is not None and
920
           self.body_buffer.tell() >= self.content_length)):
921
        self.parser_status = self.PS_COMPLETE
922

    
923
    return buf
924

    
925
  def _CheckStartLineLength(self, length):
926
    """Limits the start line buffer size.
927

928
    @type length: int
929
    @param length: Buffer size
930

931
    """
932
    if (self.START_LINE_LENGTH_MAX is not None and
933
        length > self.START_LINE_LENGTH_MAX):
934
      raise HttpError("Start line longer than %d chars" %
935
                       self.START_LINE_LENGTH_MAX)
936

    
937
  def _CheckHeaderLength(self, length):
938
    """Limits the header buffer size.
939

940
    @type length: int
941
    @param length: Buffer size
942

943
    """
944
    if (self.HEADER_LENGTH_MAX is not None and
945
        length > self.HEADER_LENGTH_MAX):
946
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
947

    
948
  def ParseStartLine(self, start_line):
949
    """Parses the start line of a message.
950

951
    Must be overridden by subclass.
952

953
    @type start_line: string
954
    @param start_line: Start line string
955

956
    """
957
    raise NotImplementedError()
958

    
959
  def _WillPeerCloseConnection(self):
960
    """Evaluate whether peer will close the connection.
961

962
    @rtype: bool
963
    @return: Whether peer will close the connection
964

965
    """
966
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
967
    # for the sender to signal that the connection will be closed after
968
    # completion of the response. For example,
969
    #
970
    #        Connection: close
971
    #
972
    # in either the request or the response header fields indicates that the
973
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
974
    # current request/response is complete."
975

    
976
    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
977
    if hdr_connection:
978
      hdr_connection = hdr_connection.lower()
979

    
980
    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
981
    if self.msg.start_line.version == HTTP_1_1:
982
      return (hdr_connection and "close" in hdr_connection)
983

    
984
    # Some HTTP/1.0 implementations have support for persistent connections,
985
    # using rules different than HTTP/1.1.
986

    
987
    # For older HTTP, Keep-Alive indicates persistent connection.
988
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
989
      return False
990

    
991
    # At least Akamai returns a "Connection: Keep-Alive" header, which was
992
    # supposed to be sent by the client.
993
    if hdr_connection and "keep-alive" in hdr_connection:
994
      return False
995

    
996
    return True
997

    
998
  def _ParseHeaders(self):
999
    """Parses the headers.
1000

1001
    This function also adjusts internal variables based on header values.
1002

1003
    RFC2616, section 4.3: The presence of a message-body in a request is
1004
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
1005
    field in the request's message-headers.
1006

1007
    """
1008
    # Parse headers
1009
    self.header_buffer.seek(0, 0)
1010
    self.msg.headers = ParseHeaders(self.header_buffer, 0)
1011

    
1012
    self.peer_will_close = self._WillPeerCloseConnection()
1013

    
1014
    # Do we have a Content-Length header?
1015
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
1016
    if hdr_content_length:
1017
      try:
1018
        self.content_length = int(hdr_content_length)
1019
      except (TypeError, ValueError):
1020
        self.content_length = None
1021
      if self.content_length is not None and self.content_length < 0:
1022
        self.content_length = None
1023

    
1024
    # if the connection remains open and a content-length was not provided,
1025
    # then assume that the connection WILL close.
1026
    if self.content_length is None:
1027
      self.peer_will_close = True