Statistics
| Branch: | Tag: | Revision:

root / lib / http / __init__.py @ 16b037a9

History | View | Annotate | Download (28 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""HTTP module.
22

23
"""
24

    
25
import logging
26
import mimetools
27
import OpenSSL
28
import select
29
import socket
30
import errno
31

    
32
from cStringIO import StringIO
33

    
34
from ganeti import constants
35
from ganeti import utils
36

    
37

    
38
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
39

    
40
HTTP_OK = 200
41
HTTP_NO_CONTENT = 204
42
HTTP_NOT_MODIFIED = 304
43

    
44
HTTP_0_9 = "HTTP/0.9"
45
HTTP_1_0 = "HTTP/1.0"
46
HTTP_1_1 = "HTTP/1.1"
47

    
48
HTTP_GET = "GET"
49
HTTP_HEAD = "HEAD"
50
HTTP_POST = "POST"
51
HTTP_PUT = "PUT"
52
HTTP_DELETE = "DELETE"
53

    
54
HTTP_ETAG = "ETag"
55
HTTP_HOST = "Host"
56
HTTP_SERVER = "Server"
57
HTTP_DATE = "Date"
58
HTTP_USER_AGENT = "User-Agent"
59
HTTP_CONTENT_TYPE = "Content-Type"
60
HTTP_CONTENT_LENGTH = "Content-Length"
61
HTTP_CONNECTION = "Connection"
62
HTTP_KEEP_ALIVE = "Keep-Alive"
63
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
64
HTTP_AUTHORIZATION = "Authorization"
65
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
66
HTTP_ALLOW = "Allow"
67

    
68
HTTP_APP_OCTET_STREAM = "application/octet-stream"
69
HTTP_APP_JSON = "application/json"
70

    
71
_SSL_UNEXPECTED_EOF = "Unexpected EOF"
72

    
73
# Socket operations
74
(SOCKOP_SEND,
75
 SOCKOP_RECV,
76
 SOCKOP_SHUTDOWN,
77
 SOCKOP_HANDSHAKE) = range(4)
78

    
79
# send/receive quantum
80
SOCK_BUF_SIZE = 32768
81

    
82

    
83
class HttpError(Exception):
84
  """Internal exception for HTTP errors.
85

86
  This should only be used for internal error reporting.
87

88
  """
89

    
90

    
91
class HttpConnectionClosed(Exception):
92
  """Internal exception for a closed connection.
93

94
  This should only be used for internal error reporting. Only use
95
  it if there's no other way to report this condition.
96

97
  """
98

    
99

    
100
class HttpSessionHandshakeUnexpectedEOF(HttpError):
101
  """Internal exception for errors during SSL handshake.
102

103
  This should only be used for internal error reporting.
104

105
  """
106

    
107

    
108
class HttpSocketTimeout(Exception):
109
  """Internal exception for socket timeouts.
110

111
  This should only be used for internal error reporting.
112

113
  """
114

    
115

    
116
class HttpException(Exception):
117
  code = None
118
  message = None
119

    
120
  def __init__(self, message=None, headers=None):
121
    Exception.__init__(self)
122
    self.message = message
123
    self.headers = headers
124

    
125

    
126
class HttpBadRequest(HttpException):
127
  """400 Bad Request
128

129
  RFC2616, 10.4.1: The request could not be understood by the server
130
  due to malformed syntax. The client SHOULD NOT repeat the request
131
  without modifications.
132

133
  """
134
  code = 400
135

    
136

    
137
class HttpUnauthorized(HttpException):
138
  """401 Unauthorized
139

140
  RFC2616, section 10.4.2: The request requires user
141
  authentication. The response MUST include a WWW-Authenticate header
142
  field (section 14.47) containing a challenge applicable to the
143
  requested resource.
144

145
  """
146
  code = 401
147

    
148

    
149
class HttpForbidden(HttpException):
150
  """403 Forbidden
151

152
  RFC2616, 10.4.4: The server understood the request, but is refusing
153
  to fulfill it.  Authorization will not help and the request SHOULD
154
  NOT be repeated.
155

156
  """
157
  code = 403
158

    
159

    
160
class HttpNotFound(HttpException):
161
  """404 Not Found
162

163
  RFC2616, 10.4.5: The server has not found anything matching the
164
  Request-URI.  No indication is given of whether the condition is
165
  temporary or permanent.
166

167
  """
168
  code = 404
169

    
170

    
171
class HttpMethodNotAllowed(HttpException):
172
  """405 Method Not Allowed
173

174
  RFC2616, 10.4.6: The method specified in the Request-Line is not
175
  allowed for the resource identified by the Request-URI. The response
176
  MUST include an Allow header containing a list of valid methods for
177
  the requested resource.
178

179
  """
180
  code = 405
181

    
182

    
183
class HttpNotAcceptable(HttpException):
184
  """406 Not Acceptable
185

186
  RFC2616, 10.4.7: The resource identified by the request is only capable of
187
  generating response entities which have content characteristics not
188
  acceptable according to the accept headers sent in the request.
189

190
  """
191
  code = 406
192

    
193

    
194
class HttpRequestTimeout(HttpException):
195
  """408 Request Timeout
196

197
  RFC2616, 10.4.9: The client did not produce a request within the
198
  time that the server was prepared to wait. The client MAY repeat the
199
  request without modifications at any later time.
200

201
  """
202
  code = 408
203

    
204

    
205
class HttpConflict(HttpException):
206
  """409 Conflict
207

208
  RFC2616, 10.4.10: The request could not be completed due to a
209
  conflict with the current state of the resource. This code is only
210
  allowed in situations where it is expected that the user might be
211
  able to resolve the conflict and resubmit the request.
212

213
  """
214
  code = 409
215

    
216

    
217
class HttpGone(HttpException):
218
  """410 Gone
219

220
  RFC2616, 10.4.11: The requested resource is no longer available at
221
  the server and no forwarding address is known. This condition is
222
  expected to be considered permanent.
223

224
  """
225
  code = 410
226

    
227

    
228
class HttpLengthRequired(HttpException):
229
  """411 Length Required
230

231
  RFC2616, 10.4.12: The server refuses to accept the request without a
232
  defined Content-Length. The client MAY repeat the request if it adds
233
  a valid Content-Length header field containing the length of the
234
  message-body in the request message.
235

236
  """
237
  code = 411
238

    
239

    
240
class HttpPreconditionFailed(HttpException):
241
  """412 Precondition Failed
242

243
  RFC2616, 10.4.13: The precondition given in one or more of the
244
  request-header fields evaluated to false when it was tested on the
245
  server.
246

247
  """
248
  code = 412
249

    
250

    
251
class HttpUnsupportedMediaType(HttpException):
252
  """415 Unsupported Media Type
253

254
  RFC2616, 10.4.16: The server is refusing to service the request because the
255
  entity of the request is in a format not supported by the requested resource
256
  for the requested method.
257

258
  """
259
  code = 415
260

    
261

    
262
class HttpInternalServerError(HttpException):
263
  """500 Internal Server Error
264

265
  RFC2616, 10.5.1: The server encountered an unexpected condition
266
  which prevented it from fulfilling the request.
267

268
  """
269
  code = 500
270

    
271

    
272
class HttpNotImplemented(HttpException):
273
  """501 Not Implemented
274

275
  RFC2616, 10.5.2: The server does not support the functionality
276
  required to fulfill the request.
277

278
  """
279
  code = 501
280

    
281

    
282
class HttpBadGateway(HttpException):
283
  """502 Bad Gateway
284

285
  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
286
  received an invalid response from the upstream server it accessed in
287
  attempting to fulfill the request.
288

289
  """
290
  code = 502
291

    
292

    
293
class HttpServiceUnavailable(HttpException):
294
  """503 Service Unavailable
295

296
  RFC2616, 10.5.4: The server is currently unable to handle the
297
  request due to a temporary overloading or maintenance of the server.
298

299
  """
300
  code = 503
301

    
302

    
303
class HttpGatewayTimeout(HttpException):
304
  """504 Gateway Timeout
305

306
  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
307
  not receive a timely response from the upstream server specified by
308
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
309
  (e.g. DNS) it needed to access in attempting to complete the
310
  request.
311

312
  """
313
  code = 504
314

    
315

    
316
class HttpVersionNotSupported(HttpException):
317
  """505 HTTP Version Not Supported
318

319
  RFC2616, 10.5.6: The server does not support, or refuses to support,
320
  the HTTP protocol version that was used in the request message.
321

322
  """
323
  code = 505
324

    
325

    
326
def WaitForSocketCondition(sock, event, timeout):
327
  """Waits for a condition to occur on the socket.
328

329
  @type sock: socket
330
  @param sock: Wait for events on this socket
331
  @type event: int
332
  @param event: ORed condition (see select module)
333
  @type timeout: float or None
334
  @param timeout: Timeout in seconds
335
  @rtype: int or None
336
  @return: None for timeout, otherwise occured conditions
337

338
  """
339
  check = (event | select.POLLPRI |
340
           select.POLLNVAL | select.POLLHUP | select.POLLERR)
341

    
342
  if timeout is not None:
343
    # Poller object expects milliseconds
344
    timeout *= 1000
345

    
346
  poller = select.poll()
347
  poller.register(sock, event)
348
  try:
349
    while True:
350
      # TODO: If the main thread receives a signal and we have no timeout, we
351
      # could wait forever. This should check a global "quit" flag or
352
      # something every so often.
353
      io_events = poller.poll(timeout)
354
      if not io_events:
355
        # Timeout
356
        return None
357
      for (_, evcond) in io_events:
358
        if evcond & check:
359
          return evcond
360
  finally:
361
    poller.unregister(sock)
362

    
363

    
364
def SocketOperation(sock, op, arg1, timeout):
365
  """Wrapper around socket functions.
366

367
  This function abstracts error handling for socket operations, especially
368
  for the complicated interaction with OpenSSL.
369

370
  @type sock: socket
371
  @param sock: Socket for the operation
372
  @type op: int
373
  @param op: Operation to execute (SOCKOP_* constants)
374
  @type arg1: any
375
  @param arg1: Parameter for function (if needed)
376
  @type timeout: None or float
377
  @param timeout: Timeout in seconds or None
378
  @return: Return value of socket function
379

380
  """
381
  # TODO: event_poll/event_check/override
382
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
383
    event_poll = select.POLLOUT
384

    
385
  elif op == SOCKOP_RECV:
386
    event_poll = select.POLLIN
387

    
388
  elif op == SOCKOP_SHUTDOWN:
389
    event_poll = None
390

    
391
    # The timeout is only used when OpenSSL requests polling for a condition.
392
    # It is not advisable to have no timeout for shutdown.
393
    assert timeout
394

    
395
  else:
396
    raise AssertionError("Invalid socket operation")
397

    
398
  # Handshake is only supported by SSL sockets
399
  if (op == SOCKOP_HANDSHAKE and
400
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
401
    return
402

    
403
  # No override by default
404
  event_override = 0
405

    
406
  while True:
407
    # Poll only for certain operations and when asked for by an override
408
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
409
      if event_override:
410
        wait_for_event = event_override
411
      else:
412
        wait_for_event = event_poll
413

    
414
      event = WaitForSocketCondition(sock, wait_for_event, timeout)
415
      if event is None:
416
        raise HttpSocketTimeout()
417

    
418
      if (op == SOCKOP_RECV and
419
          event & (select.POLLNVAL | select.POLLHUP | select.POLLERR)):
420
        return ""
421

    
422
      if not event & wait_for_event:
423
        continue
424

    
425
    # Reset override
426
    event_override = 0
427

    
428
    try:
429
      try:
430
        if op == SOCKOP_SEND:
431
          return sock.send(arg1)
432

    
433
        elif op == SOCKOP_RECV:
434
          return sock.recv(arg1)
435

    
436
        elif op == SOCKOP_SHUTDOWN:
437
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
438
            # PyOpenSSL's shutdown() doesn't take arguments
439
            return sock.shutdown()
440
          else:
441
            return sock.shutdown(arg1)
442

    
443
        elif op == SOCKOP_HANDSHAKE:
444
          return sock.do_handshake()
445

    
446
      except OpenSSL.SSL.WantWriteError:
447
        # OpenSSL wants to write, poll for POLLOUT
448
        event_override = select.POLLOUT
449
        continue
450

    
451
      except OpenSSL.SSL.WantReadError:
452
        # OpenSSL wants to read, poll for POLLIN
453
        event_override = select.POLLIN | select.POLLPRI
454
        continue
455

    
456
      except OpenSSL.SSL.WantX509LookupError:
457
        continue
458

    
459
      except OpenSSL.SSL.ZeroReturnError, err:
460
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
461
        # occurs if a closure alert has occurred in the protocol, i.e. the
462
        # connection has been closed cleanly. Note that this does not
463
        # necessarily mean that the transport layer (e.g. a socket) has been
464
        # closed.
465
        if op == SOCKOP_SEND:
466
          # Can happen during a renegotiation
467
          raise HttpConnectionClosed(err.args)
468
        elif op == SOCKOP_RECV:
469
          return ""
470

    
471
        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
472
        raise socket.error(err.args)
473

    
474
      except OpenSSL.SSL.SysCallError, err:
475
        if op == SOCKOP_SEND:
476
          # arg1 is the data when writing
477
          if err.args and err.args[0] == -1 and arg1 == "":
478
            # errors when writing empty strings are expected
479
            # and can be ignored
480
            return 0
481

    
482
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
483
          if op == SOCKOP_RECV:
484
            return ""
485
          elif op == SOCKOP_HANDSHAKE:
486
            # Can happen if peer disconnects directly after the connection is
487
            # opened.
488
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
489

    
490
        raise socket.error(err.args)
491

    
492
      except OpenSSL.SSL.Error, err:
493
        raise socket.error(err.args)
494

    
495
    except socket.error, err:
496
      if err.args and err.args[0] == errno.EAGAIN:
497
        # Ignore EAGAIN
498
        continue
499

    
500
      raise
501

    
502

    
503
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
504
  """Closes the connection.
505

506
  @type sock: socket
507
  @param sock: Socket to be shut down
508
  @type close_timeout: float
509
  @param close_timeout: How long to wait for the peer to close
510
      the connection
511
  @type write_timeout: float
512
  @param write_timeout: Write timeout for shutdown
513
  @type msgreader: http.HttpMessageReader
514
  @param msgreader: Request message reader, used to determine whether
515
      peer should close connection
516
  @type force: bool
517
  @param force: Whether to forcibly close the connection without
518
      waiting for peer
519

520
  """
521
  #print msgreader.peer_will_close, force
522
  if msgreader and msgreader.peer_will_close and not force:
523
    # Wait for peer to close
524
    try:
525
      # Check whether it's actually closed
526
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
527
        return
528
    except (socket.error, HttpError, HttpSocketTimeout):
529
      # Ignore errors at this stage
530
      pass
531

    
532
  # Close the connection from our side
533
  try:
534
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
535
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
536
                    write_timeout)
537
  except HttpSocketTimeout:
538
    raise HttpError("Timeout while shutting down connection")
539
  except socket.error, err:
540
    # Ignore ENOTCONN
541
    if not (err.args and err.args[0] == errno.ENOTCONN):
542
      raise HttpError("Error while shutting down connection: %s" % err)
543

    
544

    
545
def Handshake(sock, write_timeout):
546
  """Shakes peer's hands.
547

548
  @type sock: socket
549
  @param sock: Socket to be shut down
550
  @type write_timeout: float
551
  @param write_timeout: Write timeout for handshake
552

553
  """
554
  try:
555
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
556
  except HttpSocketTimeout:
557
    raise HttpError("Timeout during SSL handshake")
558
  except socket.error, err:
559
    raise HttpError("Error in SSL handshake: %s" % err)
560

    
561

    
562
def InitSsl():
563
  """Initializes the SSL infrastructure.
564

565
  This function is idempotent.
566

567
  """
568
  if not OpenSSL.rand.status():
569
    raise EnvironmentError("OpenSSL could not collect enough entropy"
570
                           " for the PRNG")
571

    
572
  # TODO: Maybe add some additional seeding for OpenSSL's PRNG
573

    
574

    
575
class HttpSslParams(object):
576
  """Data class for SSL key and certificate.
577

578
  """
579
  def __init__(self, ssl_key_path, ssl_cert_path):
580
    """Initializes this class.
581

582
    @type ssl_key_path: string
583
    @param ssl_key_path: Path to file containing SSL key in PEM format
584
    @type ssl_cert_path: string
585
    @param ssl_cert_path: Path to file containing SSL certificate
586
        in PEM format
587

588
    """
589
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
590
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
591

    
592
  def GetKey(self):
593
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
594
                                          self.ssl_key_pem)
595

    
596
  def GetCertificate(self):
597
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
598
                                           self.ssl_cert_pem)
599

    
600

    
601
class HttpBase(object):
602
  """Base class for HTTP server and client.
603

604
  """
605
  def __init__(self):
606
    self.using_ssl = None
607
    self._ssl_params = None
608
    self._ssl_key = None
609
    self._ssl_cert = None
610

    
611
  def _CreateSocket(self, ssl_params, ssl_verify_peer):
612
    """Creates a TCP socket and initializes SSL if needed.
613

614
    @type ssl_params: HttpSslParams
615
    @param ssl_params: SSL key and certificate
616
    @type ssl_verify_peer: bool
617
    @param ssl_verify_peer: Whether to require client certificate
618
        and compare it with our certificate
619

620
    """
621
    self._ssl_params = ssl_params
622

    
623
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
624

    
625
    # Should we enable SSL?
626
    self.using_ssl = ssl_params is not None
627

    
628
    if not self.using_ssl:
629
      return sock
630

    
631
    self._ssl_key = ssl_params.GetKey()
632
    self._ssl_cert = ssl_params.GetCertificate()
633

    
634
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
635
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
636

    
637
    ctx.use_privatekey(self._ssl_key)
638
    ctx.use_certificate(self._ssl_cert)
639
    ctx.check_privatekey()
640

    
641
    if ssl_verify_peer:
642
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
643
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
644
                     self._SSLVerifyCallback)
645

    
646
    return OpenSSL.SSL.Connection(ctx, sock)
647

    
648
  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
649
    """Verify the certificate provided by the peer
650

651
    We only compare fingerprints. The client must use the same certificate as
652
    we do on our side.
653

654
    """
655
    # some parameters are unused, but this is the API
656
    # pylint: disable-msg=W0613
657
    assert self._ssl_params, "SSL not initialized"
658

    
659
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
660
            self._ssl_cert.digest("md5") == cert.digest("md5"))
661

    
662

    
663
class HttpMessage(object):
664
  """Data structure for HTTP message.
665

666
  """
667
  def __init__(self):
668
    self.start_line = None
669
    self.headers = None
670
    self.body = None
671

    
672

    
673
class HttpClientToServerStartLine(object):
674
  """Data structure for HTTP request start line.
675

676
  """
677
  def __init__(self, method, path, version):
678
    self.method = method
679
    self.path = path
680
    self.version = version
681

    
682
  def __str__(self):
683
    return "%s %s %s" % (self.method, self.path, self.version)
684

    
685

    
686
class HttpServerToClientStartLine(object):
687
  """Data structure for HTTP response start line.
688

689
  """
690
  def __init__(self, version, code, reason):
691
    self.version = version
692
    self.code = code
693
    self.reason = reason
694

    
695
  def __str__(self):
696
    return "%s %s %s" % (self.version, self.code, self.reason)
697

    
698

    
699
class HttpMessageWriter(object):
700
  """Writes an HTTP message to a socket.
701

702
  """
703
  def __init__(self, sock, msg, write_timeout):
704
    """Initializes this class and writes an HTTP message to a socket.
705

706
    @type sock: socket
707
    @param sock: Socket to be written to
708
    @type msg: http.HttpMessage
709
    @param msg: HTTP message to be written
710
    @type write_timeout: float
711
    @param write_timeout: Write timeout for socket
712

713
    """
714
    self._msg = msg
715

    
716
    self._PrepareMessage()
717

    
718
    buf = self._FormatMessage()
719

    
720
    pos = 0
721
    end = len(buf)
722
    while pos < end:
723
      # Send only SOCK_BUF_SIZE bytes at a time
724
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
725

    
726
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
727

    
728
      # Remove sent bytes
729
      pos += sent
730

    
731
    assert pos == end, "Message wasn't sent completely"
732

    
733
  def _PrepareMessage(self):
734
    """Prepares the HTTP message by setting mandatory headers.
735

736
    """
737
    # RFC2616, section 4.3: "The presence of a message-body in a request is
738
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
739
    # field in the request's message-headers."
740
    if self._msg.body:
741
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
742

    
743
  def _FormatMessage(self):
744
    """Serializes the HTTP message into a string.
745

746
    """
747
    buf = StringIO()
748

    
749
    # Add start line
750
    buf.write(str(self._msg.start_line))
751
    buf.write("\r\n")
752

    
753
    # Add headers
754
    if self._msg.start_line.version != HTTP_0_9:
755
      for name, value in self._msg.headers.iteritems():
756
        buf.write("%s: %s\r\n" % (name, value))
757

    
758
    buf.write("\r\n")
759

    
760
    # Add message body if needed
761
    if self.HasMessageBody():
762
      buf.write(self._msg.body)
763

    
764
    elif self._msg.body:
765
      logging.warning("Ignoring message body")
766

    
767
    return buf.getvalue()
768

    
769
  def HasMessageBody(self):
770
    """Checks whether the HTTP message contains a body.
771

772
    Can be overridden by subclasses.
773

774
    """
775
    return bool(self._msg.body)
776

    
777

    
778
class HttpMessageReader(object):
779
  """Reads HTTP message from socket.
780

781
  """
782
  # Length limits
783
  START_LINE_LENGTH_MAX = None
784
  HEADER_LENGTH_MAX = None
785

    
786
  # Parser state machine
787
  PS_START_LINE = "start-line"
788
  PS_HEADERS = "headers"
789
  PS_BODY = "entity-body"
790
  PS_COMPLETE = "complete"
791

    
792
  def __init__(self, sock, msg, read_timeout):
793
    """Reads an HTTP message from a socket.
794

795
    @type sock: socket
796
    @param sock: Socket to be read from
797
    @type msg: http.HttpMessage
798
    @param msg: Object for the read message
799
    @type read_timeout: float
800
    @param read_timeout: Read timeout for socket
801

802
    """
803
    self.sock = sock
804
    self.msg = msg
805

    
806
    self.start_line_buffer = None
807
    self.header_buffer = StringIO()
808
    self.body_buffer = StringIO()
809
    self.parser_status = self.PS_START_LINE
810
    self.content_length = None
811
    self.peer_will_close = None
812

    
813
    buf = ""
814
    eof = False
815
    while self.parser_status != self.PS_COMPLETE:
816
      # TODO: Don't read more than necessary (Content-Length), otherwise
817
      # data might be lost and/or an error could occur
818
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
819

    
820
      if data:
821
        buf += data
822
      else:
823
        eof = True
824

    
825
      # Do some parsing and error checking while more data arrives
826
      buf = self._ContinueParsing(buf, eof)
827

    
828
      # Must be done only after the buffer has been evaluated
829
      # TODO: Content-Length < len(data read) and connection closed
830
      if (eof and
831
          self.parser_status in (self.PS_START_LINE,
832
                                 self.PS_HEADERS)):
833
        raise HttpError("Connection closed prematurely")
834

    
835
    # Parse rest
836
    buf = self._ContinueParsing(buf, True)
837

    
838
    assert self.parser_status == self.PS_COMPLETE
839
    assert not buf, "Parser didn't read full response"
840

    
841
    # Body is complete
842
    msg.body = self.body_buffer.getvalue()
843

    
844
  def _ContinueParsing(self, buf, eof):
845
    """Main function for HTTP message state machine.
846

847
    @type buf: string
848
    @param buf: Receive buffer
849
    @type eof: bool
850
    @param eof: Whether we've reached EOF on the socket
851
    @rtype: string
852
    @return: Updated receive buffer
853

854
    """
855
    # TODO: Use offset instead of slicing when possible
856
    if self.parser_status == self.PS_START_LINE:
857
      # Expect start line
858
      while True:
859
        idx = buf.find("\r\n")
860

    
861
        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
862
        # ignore any empty line(s) received where a Request-Line is expected.
863
        # In other words, if the server is reading the protocol stream at the
864
        # beginning of a message and receives a CRLF first, it should ignore
865
        # the CRLF."
866
        if idx == 0:
867
          # TODO: Limit number of CRLFs/empty lines for safety?
868
          buf = buf[:2]
869
          continue
870

    
871
        if idx > 0:
872
          self.start_line_buffer = buf[:idx]
873

    
874
          self._CheckStartLineLength(len(self.start_line_buffer))
875

    
876
          # Remove status line, including CRLF
877
          buf = buf[idx + 2:]
878

    
879
          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
880

    
881
          self.parser_status = self.PS_HEADERS
882
        else:
883
          # Check whether incoming data is getting too large, otherwise we just
884
          # fill our read buffer.
885
          self._CheckStartLineLength(len(buf))
886

    
887
        break
888

    
889
    # TODO: Handle messages without headers
890
    if self.parser_status == self.PS_HEADERS:
891
      # Wait for header end
892
      idx = buf.find("\r\n\r\n")
893
      if idx >= 0:
894
        self.header_buffer.write(buf[:idx + 2])
895

    
896
        self._CheckHeaderLength(self.header_buffer.tell())
897

    
898
        # Remove headers, including CRLF
899
        buf = buf[idx + 4:]
900

    
901
        self._ParseHeaders()
902

    
903
        self.parser_status = self.PS_BODY
904
      else:
905
        # Check whether incoming data is getting too large, otherwise we just
906
        # fill our read buffer.
907
        self._CheckHeaderLength(len(buf))
908

    
909
    if self.parser_status == self.PS_BODY:
910
      # TODO: Implement max size for body_buffer
911
      self.body_buffer.write(buf)
912
      buf = ""
913

    
914
      # Check whether we've read everything
915
      #
916
      # RFC2616, section 4.4: "When a message-body is included with a message,
917
      # the transfer-length of that body is determined by one of the following
918
      # [...] 5. By the server closing the connection. (Closing the connection
919
      # cannot be used to indicate the end of a request body, since that would
920
      # leave no possibility for the server to send back a response.)"
921
      #
922
      # TODO: Error when buffer length > Content-Length header
923
      if (eof or
924
          self.content_length is None or
925
          (self.content_length is not None and
926
           self.body_buffer.tell() >= self.content_length)):
927
        self.parser_status = self.PS_COMPLETE
928

    
929
    return buf
930

    
931
  def _CheckStartLineLength(self, length):
932
    """Limits the start line buffer size.
933

934
    @type length: int
935
    @param length: Buffer size
936

937
    """
938
    if (self.START_LINE_LENGTH_MAX is not None and
939
        length > self.START_LINE_LENGTH_MAX):
940
      raise HttpError("Start line longer than %d chars" %
941
                       self.START_LINE_LENGTH_MAX)
942

    
943
  def _CheckHeaderLength(self, length):
944
    """Limits the header buffer size.
945

946
    @type length: int
947
    @param length: Buffer size
948

949
    """
950
    if (self.HEADER_LENGTH_MAX is not None and
951
        length > self.HEADER_LENGTH_MAX):
952
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
953

    
954
  def ParseStartLine(self, start_line):
955
    """Parses the start line of a message.
956

957
    Must be overridden by subclass.
958

959
    @type start_line: string
960
    @param start_line: Start line string
961

962
    """
963
    raise NotImplementedError()
964

    
965
  def _WillPeerCloseConnection(self):
966
    """Evaluate whether peer will close the connection.
967

968
    @rtype: bool
969
    @return: Whether peer will close the connection
970

971
    """
972
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
973
    # for the sender to signal that the connection will be closed after
974
    # completion of the response. For example,
975
    #
976
    #        Connection: close
977
    #
978
    # in either the request or the response header fields indicates that the
979
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
980
    # current request/response is complete."
981

    
982
    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
983
    if hdr_connection:
984
      hdr_connection = hdr_connection.lower()
985

    
986
    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
987
    if self.msg.start_line.version == HTTP_1_1:
988
      return (hdr_connection and "close" in hdr_connection)
989

    
990
    # Some HTTP/1.0 implementations have support for persistent connections,
991
    # using rules different than HTTP/1.1.
992

    
993
    # For older HTTP, Keep-Alive indicates persistent connection.
994
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
995
      return False
996

    
997
    # At least Akamai returns a "Connection: Keep-Alive" header, which was
998
    # supposed to be sent by the client.
999
    if hdr_connection and "keep-alive" in hdr_connection:
1000
      return False
1001

    
1002
    return True
1003

    
1004
  def _ParseHeaders(self):
1005
    """Parses the headers.
1006

1007
    This function also adjusts internal variables based on header values.
1008

1009
    RFC2616, section 4.3: The presence of a message-body in a request is
1010
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
1011
    field in the request's message-headers.
1012

1013
    """
1014
    # Parse headers
1015
    self.header_buffer.seek(0, 0)
1016
    self.msg.headers = mimetools.Message(self.header_buffer, 0)
1017

    
1018
    self.peer_will_close = self._WillPeerCloseConnection()
1019

    
1020
    # Do we have a Content-Length header?
1021
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
1022
    if hdr_content_length:
1023
      try:
1024
        self.content_length = int(hdr_content_length)
1025
      except ValueError:
1026
        self.content_length = None
1027
      if self.content_length is not None and self.content_length < 0:
1028
        self.content_length = None
1029

    
1030
    # if the connection remains open and a content-length was not provided,
1031
    # then assume that the connection WILL close.
1032
    if self.content_length is None:
1033
      self.peer_will_close = True