Statistics
| Branch: | Tag: | Revision:

root / lib / http / __init__.py @ aea0ed67

History | View | Annotate | Download (26.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""HTTP module.
22

23
"""
24

    
25
import logging
26
import mimetools
27
import OpenSSL
28
import select
29
import socket
30
import errno
31

    
32
from cStringIO import StringIO
33

    
34
from ganeti import constants
35
from ganeti import serializer
36
from ganeti import utils
37

    
38

    
39
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
40

    
41
HTTP_OK = 200
42
HTTP_NO_CONTENT = 204
43
HTTP_NOT_MODIFIED = 304
44

    
45
HTTP_0_9 = "HTTP/0.9"
46
HTTP_1_0 = "HTTP/1.0"
47
HTTP_1_1 = "HTTP/1.1"
48

    
49
HTTP_GET = "GET"
50
HTTP_HEAD = "HEAD"
51
HTTP_POST = "POST"
52
HTTP_PUT = "PUT"
53
HTTP_DELETE = "DELETE"
54

    
55
HTTP_ETAG = "ETag"
56
HTTP_HOST = "Host"
57
HTTP_SERVER = "Server"
58
HTTP_DATE = "Date"
59
HTTP_USER_AGENT = "User-Agent"
60
HTTP_CONTENT_TYPE = "Content-Type"
61
HTTP_CONTENT_LENGTH = "Content-Length"
62
HTTP_CONNECTION = "Connection"
63
HTTP_KEEP_ALIVE = "Keep-Alive"
64
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
65
HTTP_AUTHORIZATION = "Authorization"
66
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
67
HTTP_ALLOW = "Allow"
68

    
69
_SSL_UNEXPECTED_EOF = "Unexpected EOF"
70

    
71
# Socket operations
72
(SOCKOP_SEND,
73
 SOCKOP_RECV,
74
 SOCKOP_SHUTDOWN,
75
 SOCKOP_HANDSHAKE) = range(4)
76

    
77
# send/receive quantum
78
SOCK_BUF_SIZE = 32768
79

    
80

    
81
class HttpError(Exception):
82
  """Internal exception for HTTP errors.
83

84
  This should only be used for internal error reporting.
85

86
  """
87

    
88

    
89
class HttpConnectionClosed(Exception):
90
  """Internal exception for a closed connection.
91

92
  This should only be used for internal error reporting. Only use
93
  it if there's no other way to report this condition.
94

95
  """
96

    
97

    
98
class HttpSessionHandshakeUnexpectedEOF(HttpError):
99
  """Internal exception for errors during SSL handshake.
100

101
  This should only be used for internal error reporting.
102

103
  """
104

    
105

    
106
class HttpSocketTimeout(Exception):
107
  """Internal exception for socket timeouts.
108

109
  This should only be used for internal error reporting.
110

111
  """
112

    
113

    
114
class HttpException(Exception):
115
  code = None
116
  message = None
117

    
118
  def __init__(self, message=None, headers=None):
119
    Exception.__init__(self)
120
    self.message = message
121
    self.headers = headers
122

    
123

    
124
class HttpBadRequest(HttpException):
125
  """400 Bad Request
126

127
  RFC2616, 10.4.1: The request could not be understood by the server due to
128
  malformed syntax. The client SHOULD NOT repeat the request without
129
  modifications.
130

131
  """
132
  code = 400
133

    
134

    
135
class HttpUnauthorized(HttpException):
136
  """401 Unauthorized
137

138
  RFC2616, section 10.4.2: The request requires user authentication. The
139
  response MUST include a WWW-Authenticate header field (section 14.47)
140
  containing a challenge applicable to the requested resource.
141

142
  """
143
  code = 401
144

    
145

    
146
class HttpForbidden(HttpException):
147
  """403 Forbidden
148

149
  RFC2616, 10.4.4: The server understood the request, but is refusing to
150
  fulfill it.  Authorization will not help and the request SHOULD NOT be
151
  repeated.
152

153
  """
154
  code = 403
155

    
156

    
157
class HttpNotFound(HttpException):
158
  """404 Not Found
159

160
  RFC2616, 10.4.5: The server has not found anything matching the Request-URI.
161
  No indication is given of whether the condition is temporary or permanent.
162

163
  """
164
  code = 404
165

    
166

    
167
class HttpMethodNotAllowed(HttpException):
168
  """405 Method Not Allowed
169

170
  RFC2616, 10.4.6: The method specified in the Request-Line is not allowed for
171
  the resource identified by the Request-URI. The response MUST include an
172
  Allow header containing a list of valid methods for the requested resource.
173

174
  """
175
  code = 405
176

    
177

    
178
class HttpRequestTimeout(HttpException):
179
  """408 Request Timeout
180

181
  RFC2616, 10.4.9: The client did not produce a request within the time that
182
  the server was prepared to wait. The client MAY repeat the request without
183
  modifications at any later time.
184

185
  """
186
  code = 408
187

    
188

    
189
class HttpConflict(HttpException):
190
  """409 Conflict
191

192
  RFC2616, 10.4.10: The request could not be completed due to a conflict with
193
  the current state of the resource. This code is only allowed in situations
194
  where it is expected that the user might be able to resolve the conflict and
195
  resubmit the request.
196

197
  """
198
  code = 409
199

    
200

    
201
class HttpGone(HttpException):
202
  """410 Gone
203

204
  RFC2616, 10.4.11: The requested resource is no longer available at the server
205
  and no forwarding address is known. This condition is expected to be
206
  considered permanent.
207

208
  """
209
  code = 410
210

    
211

    
212
class HttpLengthRequired(HttpException):
213
  """411 Length Required
214

215
  RFC2616, 10.4.12: The server refuses to accept the request without a defined
216
  Content-Length. The client MAY repeat the request if it adds a valid
217
  Content-Length header field containing the length of the message-body in the
218
  request message.
219

220
  """
221
  code = 411
222

    
223

    
224
class HttpPreconditionFailed(HttpException):
225
  """412 Precondition Failed
226

227
  RFC2616, 10.4.13: The precondition given in one or more of the request-header
228
  fields evaluated to false when it was tested on the server.
229

230
  """
231
  code = 412
232

    
233

    
234
class HttpInternalServerError(HttpException):
235
  """500 Internal Server Error
236

237
  RFC2616, 10.5.1: The server encountered an unexpected condition which
238
  prevented it from fulfilling the request.
239

240
  """
241
  code = 500
242

    
243

    
244
class HttpNotImplemented(HttpException):
245
  """501 Not Implemented
246

247
  RFC2616, 10.5.2: The server does not support the functionality required to
248
  fulfill the request.
249

250
  """
251
  code = 501
252

    
253

    
254
class HttpServiceUnavailable(HttpException):
255
  """503 Service Unavailable
256

257
  RFC2616, 10.5.4: The server is currently unable to handle the request due to
258
  a temporary overloading or maintenance of the server.
259

260
  """
261
  code = 503
262

    
263

    
264
class HttpVersionNotSupported(HttpException):
265
  """505 HTTP Version Not Supported
266

267
  RFC2616, 10.5.6: The server does not support, or refuses to support, the HTTP
268
  protocol version that was used in the request message.
269

270
  """
271
  code = 505
272

    
273

    
274
class HttpJsonConverter:
275
  CONTENT_TYPE = "application/json"
276

    
277
  def Encode(self, data):
278
    return serializer.DumpJson(data)
279

    
280
  def Decode(self, data):
281
    return serializer.LoadJson(data)
282

    
283

    
284
def WaitForSocketCondition(sock, event, timeout):
285
  """Waits for a condition to occur on the socket.
286

287
  @type sock: socket
288
  @param sock: Wait for events on this socket
289
  @type event: int
290
  @param event: ORed condition (see select module)
291
  @type timeout: float or None
292
  @param timeout: Timeout in seconds
293
  @rtype: int or None
294
  @return: None for timeout, otherwise occured conditions
295

296
  """
297
  check = (event | select.POLLPRI |
298
           select.POLLNVAL | select.POLLHUP | select.POLLERR)
299

    
300
  if timeout is not None:
301
    # Poller object expects milliseconds
302
    timeout *= 1000
303

    
304
  poller = select.poll()
305
  poller.register(sock, event)
306
  try:
307
    while True:
308
      # TODO: If the main thread receives a signal and we have no timeout, we
309
      # could wait forever. This should check a global "quit" flag or
310
      # something every so often.
311
      io_events = poller.poll(timeout)
312
      if not io_events:
313
        # Timeout
314
        return None
315
      for (evfd, evcond) in io_events:
316
        if evcond & check:
317
          return evcond
318
  finally:
319
    poller.unregister(sock)
320

    
321

    
322
def SocketOperation(sock, op, arg1, timeout):
323
  """Wrapper around socket functions.
324

325
  This function abstracts error handling for socket operations, especially
326
  for the complicated interaction with OpenSSL.
327

328
  @type sock: socket
329
  @param sock: Socket for the operation
330
  @type op: int
331
  @param op: Operation to execute (SOCKOP_* constants)
332
  @type arg1: any
333
  @param arg1: Parameter for function (if needed)
334
  @type timeout: None or float
335
  @param timeout: Timeout in seconds or None
336
  @return: Return value of socket function
337

338
  """
339
  # TODO: event_poll/event_check/override
340
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
341
    event_poll = select.POLLOUT
342
    event_check = select.POLLOUT
343

    
344
  elif op == SOCKOP_RECV:
345
    event_poll = select.POLLIN
346
    event_check = select.POLLIN | select.POLLPRI
347

    
348
  elif op == SOCKOP_SHUTDOWN:
349
    event_poll = None
350
    event_check = None
351

    
352
    # The timeout is only used when OpenSSL requests polling for a condition.
353
    # It is not advisable to have no timeout for shutdown.
354
    assert timeout
355

    
356
  else:
357
    raise AssertionError("Invalid socket operation")
358

    
359
  # Handshake is only supported by SSL sockets
360
  if (op == SOCKOP_HANDSHAKE and
361
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
362
    return
363

    
364
  # No override by default
365
  event_override = 0
366

    
367
  while True:
368
    # Poll only for certain operations and when asked for by an override
369
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
370
      if event_override:
371
        wait_for_event = event_override
372
      else:
373
        wait_for_event = event_poll
374

    
375
      event = WaitForSocketCondition(sock, wait_for_event, timeout)
376
      if event is None:
377
        raise HttpSocketTimeout()
378

    
379
      if (op == SOCKOP_RECV and
380
          event & (select.POLLNVAL | select.POLLHUP | select.POLLERR)):
381
        return ""
382

    
383
      if not event & wait_for_event:
384
        continue
385

    
386
    # Reset override
387
    event_override = 0
388

    
389
    try:
390
      try:
391
        if op == SOCKOP_SEND:
392
          return sock.send(arg1)
393

    
394
        elif op == SOCKOP_RECV:
395
          return sock.recv(arg1)
396

    
397
        elif op == SOCKOP_SHUTDOWN:
398
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
399
            # PyOpenSSL's shutdown() doesn't take arguments
400
            return sock.shutdown()
401
          else:
402
            return sock.shutdown(arg1)
403

    
404
        elif op == SOCKOP_HANDSHAKE:
405
          return sock.do_handshake()
406

    
407
      except OpenSSL.SSL.WantWriteError:
408
        # OpenSSL wants to write, poll for POLLOUT
409
        event_override = select.POLLOUT
410
        continue
411

    
412
      except OpenSSL.SSL.WantReadError:
413
        # OpenSSL wants to read, poll for POLLIN
414
        event_override = select.POLLIN | select.POLLPRI
415
        continue
416

    
417
      except OpenSSL.SSL.WantX509LookupError:
418
        continue
419

    
420
      except OpenSSL.SSL.ZeroReturnError, err:
421
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
422
        # occurs if a closure alert has occurred in the protocol, i.e. the
423
        # connection has been closed cleanly. Note that this does not
424
        # necessarily mean that the transport layer (e.g. a socket) has been
425
        # closed.
426
        if op == SOCKOP_SEND:
427
          # Can happen during a renegotiation
428
          raise HttpConnectionClosed(err.args)
429
        elif op == SOCKOP_RECV:
430
          return ""
431

    
432
        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
433
        raise socket.error(err.args)
434

    
435
      except OpenSSL.SSL.SysCallError, err:
436
        if op == SOCKOP_SEND:
437
          # arg1 is the data when writing
438
          if err.args and err.args[0] == -1 and arg1 == "":
439
            # errors when writing empty strings are expected
440
            # and can be ignored
441
            return 0
442

    
443
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
444
          if op == SOCKOP_RECV:
445
            return ""
446
          elif op == SOCKOP_HANDSHAKE:
447
            # Can happen if peer disconnects directly after the connection is
448
            # opened.
449
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
450

    
451
        raise socket.error(err.args)
452

    
453
      except OpenSSL.SSL.Error, err:
454
        raise socket.error(err.args)
455

    
456
    except socket.error, err:
457
      if err.args and err.args[0] == errno.EAGAIN:
458
        # Ignore EAGAIN
459
        continue
460

    
461
      raise
462

    
463

    
464
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
465
  """Closes the connection.
466

467
  @type sock: socket
468
  @param sock: Socket to be shut down
469
  @type close_timeout: float
470
  @param close_timeout: How long to wait for the peer to close the connection
471
  @type write_timeout: float
472
  @param write_timeout: Write timeout for shutdown
473
  @type msgreader: http.HttpMessageReader
474
  @param msgreader: Request message reader, used to determine whether peer
475
                    should close connection
476
  @type force: bool
477
  @param force: Whether to forcibly close the connection without waiting
478
                for peer
479

480
  """
481
  #print msgreader.peer_will_close, force
482
  if msgreader and msgreader.peer_will_close and not force:
483
    # Wait for peer to close
484
    try:
485
      # Check whether it's actually closed
486
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
487
        return
488
    except (socket.error, HttpError, HttpSocketTimeout):
489
      # Ignore errors at this stage
490
      pass
491

    
492
  # Close the connection from our side
493
  try:
494
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
495
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
496
                    write_timeout)
497
  except HttpSocketTimeout:
498
    raise HttpError("Timeout while shutting down connection")
499
  except socket.error, err:
500
    # Ignore ENOTCONN
501
    if not (err.args and err.args[0] == errno.ENOTCONN):
502
      raise HttpError("Error while shutting down connection: %s" % err)
503

    
504

    
505
def Handshake(sock, write_timeout):
506
  """Shakes peer's hands.
507

508
  @type sock: socket
509
  @param sock: Socket to be shut down
510
  @type write_timeout: float
511
  @param write_timeout: Write timeout for handshake
512

513
  """
514
  try:
515
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
516
  except HttpSocketTimeout:
517
    raise HttpError("Timeout during SSL handshake")
518
  except socket.error, err:
519
    raise HttpError("Error in SSL handshake: %s" % err)
520

    
521

    
522
class HttpSslParams(object):
523
  """Data class for SSL key and certificate.
524

525
  """
526
  def __init__(self, ssl_key_path, ssl_cert_path):
527
    """Initializes this class.
528

529
    @type ssl_key_path: string
530
    @param ssl_key_path: Path to file containing SSL key in PEM format
531
    @type ssl_cert_path: string
532
    @param ssl_cert_path: Path to file containing SSL certificate in PEM format
533

534
    """
535
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
536
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
537

    
538
  def GetKey(self):
539
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
540
                                          self.ssl_key_pem)
541

    
542
  def GetCertificate(self):
543
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
544
                                           self.ssl_cert_pem)
545

    
546

    
547
class HttpBase(object):
548
  """Base class for HTTP server and client.
549

550
  """
551
  def __init__(self):
552
    self.using_ssl = None
553
    self._ssl_params = None
554
    self._ssl_key = None
555
    self._ssl_cert = None
556

    
557
  def _CreateSocket(self, ssl_params, ssl_verify_peer):
558
    """Creates a TCP socket and initializes SSL if needed.
559

560
    @type ssl_params: HttpSslParams
561
    @param ssl_params: SSL key and certificate
562
    @type ssl_verify_peer: bool
563
    @param ssl_verify_peer: Whether to require client certificate and compare
564
                            it with our certificate
565

566
    """
567
    self._ssl_params = ssl_params
568

    
569
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
570

    
571
    # Should we enable SSL?
572
    self.using_ssl = ssl_params is not None
573

    
574
    if not self.using_ssl:
575
      return sock
576

    
577
    self._ssl_key = ssl_params.GetKey()
578
    self._ssl_cert = ssl_params.GetCertificate()
579

    
580
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
581
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
582

    
583
    ctx.use_privatekey(self._ssl_key)
584
    ctx.use_certificate(self._ssl_cert)
585
    ctx.check_privatekey()
586

    
587
    if ssl_verify_peer:
588
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
589
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
590
                     self._SSLVerifyCallback)
591

    
592
    return OpenSSL.SSL.Connection(ctx, sock)
593

    
594
  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
595
    """Verify the certificate provided by the peer
596

597
    We only compare fingerprints. The client must use the same certificate as
598
    we do on our side.
599

600
    """
601
    assert self._ssl_params, "SSL not initialized"
602

    
603
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
604
            self._ssl_cert.digest("md5") == cert.digest("md5"))
605

    
606

    
607
class HttpMessage(object):
608
  """Data structure for HTTP message.
609

610
  """
611
  def __init__(self):
612
    self.start_line = None
613
    self.headers = None
614
    self.body = None
615
    self.decoded_body = None
616

    
617

    
618
class HttpClientToServerStartLine(object):
619
  """Data structure for HTTP request start line.
620

621
  """
622
  def __init__(self, method, path, version):
623
    self.method = method
624
    self.path = path
625
    self.version = version
626

    
627
  def __str__(self):
628
    return "%s %s %s" % (self.method, self.path, self.version)
629

    
630

    
631
class HttpServerToClientStartLine(object):
632
  """Data structure for HTTP response start line.
633

634
  """
635
  def __init__(self, version, code, reason):
636
    self.version = version
637
    self.code = code
638
    self.reason = reason
639

    
640
  def __str__(self):
641
    return "%s %s %s" % (self.version, self.code, self.reason)
642

    
643

    
644
class HttpMessageWriter(object):
645
  """Writes an HTTP message to a socket.
646

647
  """
648
  def __init__(self, sock, msg, write_timeout):
649
    """Initializes this class and writes an HTTP message to a socket.
650

651
    @type sock: socket
652
    @param sock: Socket to be written to
653
    @type msg: http.HttpMessage
654
    @param msg: HTTP message to be written
655
    @type write_timeout: float
656
    @param write_timeout: Write timeout for socket
657

658
    """
659
    self._msg = msg
660

    
661
    self._PrepareMessage()
662

    
663
    buf = self._FormatMessage()
664

    
665
    pos = 0
666
    end = len(buf)
667
    while pos < end:
668
      # Send only SOCK_BUF_SIZE bytes at a time
669
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
670

    
671
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
672

    
673
      # Remove sent bytes
674
      pos += sent
675

    
676
    assert pos == end, "Message wasn't sent completely"
677

    
678
  def _PrepareMessage(self):
679
    """Prepares the HTTP message by setting mandatory headers.
680

681
    """
682
    # RFC2616, section 4.3: "The presence of a message-body in a request is
683
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
684
    # field in the request's message-headers."
685
    if self._msg.body:
686
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
687

    
688
  def _FormatMessage(self):
689
    """Serializes the HTTP message into a string.
690

691
    """
692
    buf = StringIO()
693

    
694
    # Add start line
695
    buf.write(str(self._msg.start_line))
696
    buf.write("\r\n")
697

    
698
    # Add headers
699
    if self._msg.start_line.version != HTTP_0_9:
700
      for name, value in self._msg.headers.iteritems():
701
        buf.write("%s: %s\r\n" % (name, value))
702

    
703
    buf.write("\r\n")
704

    
705
    # Add message body if needed
706
    if self.HasMessageBody():
707
      buf.write(self._msg.body)
708

    
709
    elif self._msg.body:
710
      logging.warning("Ignoring message body")
711

    
712
    return buf.getvalue()
713

    
714
  def HasMessageBody(self):
715
    """Checks whether the HTTP message contains a body.
716

717
    Can be overriden by subclasses.
718

719
    """
720
    return bool(self._msg.body)
721

    
722

    
723
class HttpMessageReader(object):
724
  """Reads HTTP message from socket.
725

726
  """
727
  # Length limits
728
  START_LINE_LENGTH_MAX = None
729
  HEADER_LENGTH_MAX = None
730

    
731
  # Parser state machine
732
  PS_START_LINE = "start-line"
733
  PS_HEADERS = "headers"
734
  PS_BODY = "entity-body"
735
  PS_COMPLETE = "complete"
736

    
737
  def __init__(self, sock, msg, read_timeout):
738
    """Reads an HTTP message from a socket.
739

740
    @type sock: socket
741
    @param sock: Socket to be read from
742
    @type msg: http.HttpMessage
743
    @param msg: Object for the read message
744
    @type read_timeout: float
745
    @param read_timeout: Read timeout for socket
746

747
    """
748
    self.sock = sock
749
    self.msg = msg
750

    
751
    self.start_line_buffer = None
752
    self.header_buffer = StringIO()
753
    self.body_buffer = StringIO()
754
    self.parser_status = self.PS_START_LINE
755
    self.content_length = None
756
    self.peer_will_close = None
757

    
758
    buf = ""
759
    eof = False
760
    while self.parser_status != self.PS_COMPLETE:
761
      # TODO: Don't read more than necessary (Content-Length), otherwise
762
      # data might be lost and/or an error could occur
763
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
764

    
765
      if data:
766
        buf += data
767
      else:
768
        eof = True
769

    
770
      # Do some parsing and error checking while more data arrives
771
      buf = self._ContinueParsing(buf, eof)
772

    
773
      # Must be done only after the buffer has been evaluated
774
      # TODO: Connection-length < len(data read) and connection closed
775
      if (eof and
776
          self.parser_status in (self.PS_START_LINE,
777
                                 self.PS_HEADERS)):
778
        raise HttpError("Connection closed prematurely")
779

    
780
    # Parse rest
781
    buf = self._ContinueParsing(buf, True)
782

    
783
    assert self.parser_status == self.PS_COMPLETE
784
    assert not buf, "Parser didn't read full response"
785

    
786
    msg.body = self.body_buffer.getvalue()
787

    
788
    # TODO: Content-type, error handling
789
    if msg.body:
790
      msg.decoded_body = HttpJsonConverter().Decode(msg.body)
791
    else:
792
      msg.decoded_body = None
793

    
794
    if msg.decoded_body:
795
      logging.debug("Message body: %s", msg.decoded_body)
796

    
797
  def _ContinueParsing(self, buf, eof):
798
    """Main function for HTTP message state machine.
799

800
    @type buf: string
801
    @param buf: Receive buffer
802
    @type eof: bool
803
    @param eof: Whether we've reached EOF on the socket
804
    @rtype: string
805
    @return: Updated receive buffer
806

807
    """
808
    # TODO: Use offset instead of slicing when possible
809
    if self.parser_status == self.PS_START_LINE:
810
      # Expect start line
811
      while True:
812
        idx = buf.find("\r\n")
813

    
814
        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
815
        # ignore any empty line(s) received where a Request-Line is expected.
816
        # In other words, if the server is reading the protocol stream at the
817
        # beginning of a message and receives a CRLF first, it should ignore
818
        # the CRLF."
819
        if idx == 0:
820
          # TODO: Limit number of CRLFs/empty lines for safety?
821
          buf = buf[:2]
822
          continue
823

    
824
        if idx > 0:
825
          self.start_line_buffer = buf[:idx]
826

    
827
          self._CheckStartLineLength(len(self.start_line_buffer))
828

    
829
          # Remove status line, including CRLF
830
          buf = buf[idx + 2:]
831

    
832
          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
833

    
834
          self.parser_status = self.PS_HEADERS
835
        else:
836
          # Check whether incoming data is getting too large, otherwise we just
837
          # fill our read buffer.
838
          self._CheckStartLineLength(len(buf))
839

    
840
        break
841

    
842
    # TODO: Handle messages without headers
843
    if self.parser_status == self.PS_HEADERS:
844
      # Wait for header end
845
      idx = buf.find("\r\n\r\n")
846
      if idx >= 0:
847
        self.header_buffer.write(buf[:idx + 2])
848

    
849
        self._CheckHeaderLength(self.header_buffer.tell())
850

    
851
        # Remove headers, including CRLF
852
        buf = buf[idx + 4:]
853

    
854
        self._ParseHeaders()
855

    
856
        self.parser_status = self.PS_BODY
857
      else:
858
        # Check whether incoming data is getting too large, otherwise we just
859
        # fill our read buffer.
860
        self._CheckHeaderLength(len(buf))
861

    
862
    if self.parser_status == self.PS_BODY:
863
      # TODO: Implement max size for body_buffer
864
      self.body_buffer.write(buf)
865
      buf = ""
866

    
867
      # Check whether we've read everything
868
      #
869
      # RFC2616, section 4.4: "When a message-body is included with a message,
870
      # the transfer-length of that body is determined by one of the following
871
      # [...] 5. By the server closing the connection. (Closing the connection
872
      # cannot be used to indicate the end of a request body, since that would
873
      # leave no possibility for the server to send back a response.)"
874
      #
875
      # TODO: Error when buffer length > Content-Length header
876
      if (eof or
877
          self.content_length is None or
878
          (self.content_length is not None and
879
           self.body_buffer.tell() >= self.content_length)):
880
        self.parser_status = self.PS_COMPLETE
881

    
882
    return buf
883

    
884
  def _CheckStartLineLength(self, length):
885
    """Limits the start line buffer size.
886

887
    @type length: int
888
    @param length: Buffer size
889

890
    """
891
    if (self.START_LINE_LENGTH_MAX is not None and
892
        length > self.START_LINE_LENGTH_MAX):
893
      raise HttpError("Start line longer than %d chars" %
894
                       self.START_LINE_LENGTH_MAX)
895

    
896
  def _CheckHeaderLength(self, length):
897
    """Limits the header buffer size.
898

899
    @type length: int
900
    @param length: Buffer size
901

902
    """
903
    if (self.HEADER_LENGTH_MAX is not None and
904
        length > self.HEADER_LENGTH_MAX):
905
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
906

    
907
  def ParseStartLine(self, start_line):
908
    """Parses the start line of a message.
909

910
    Must be overriden by subclass.
911

912
    @type start_line: string
913
    @param start_line: Start line string
914

915
    """
916
    raise NotImplementedError()
917

    
918
  def _WillPeerCloseConnection(self):
919
    """Evaluate whether peer will close the connection.
920

921
    @rtype: bool
922
    @return: Whether peer will close the connection
923

924
    """
925
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
926
    # for the sender to signal that the connection will be closed after
927
    # completion of the response. For example,
928
    #
929
    #        Connection: close
930
    #
931
    # in either the request or the response header fields indicates that the
932
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
933
    # current request/response is complete."
934

    
935
    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
936
    if hdr_connection:
937
      hdr_connection = hdr_connection.lower()
938

    
939
    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
940
    if self.msg.start_line.version == HTTP_1_1:
941
      return (hdr_connection and "close" in hdr_connection)
942

    
943
    # Some HTTP/1.0 implementations have support for persistent connections,
944
    # using rules different than HTTP/1.1.
945

    
946
    # For older HTTP, Keep-Alive indicates persistent connection.
947
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
948
      return False
949

    
950
    # At least Akamai returns a "Connection: Keep-Alive" header, which was
951
    # supposed to be sent by the client.
952
    if hdr_connection and "keep-alive" in hdr_connection:
953
      return False
954

    
955
    return True
956

    
957
  def _ParseHeaders(self):
958
    """Parses the headers.
959

960
    This function also adjusts internal variables based on header values.
961

962
    RFC2616, section 4.3: "The presence of a message-body in a request is
963
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
964
    field in the request's message-headers."
965

966
    """
967
    # Parse headers
968
    self.header_buffer.seek(0, 0)
969
    self.msg.headers = mimetools.Message(self.header_buffer, 0)
970

    
971
    self.peer_will_close = self._WillPeerCloseConnection()
972

    
973
    # Do we have a Content-Length header?
974
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
975
    if hdr_content_length:
976
      try:
977
        self.content_length = int(hdr_content_length)
978
      except ValueError:
979
        self.content_length = None
980
      if self.content_length is not None and self.content_length < 0:
981
        self.content_length = None
982

    
983
    # if the connection remains open and a content-length was not provided,
984
    # then assume that the connection WILL close.
985
    if self.content_length is None:
986
      self.peer_will_close = True