Statistics
| Branch: | Tag: | Revision:

root / lib / http / __init__.py @ 231db3a5

History | View | Annotate | Download (27.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""HTTP module.
22

23
"""
24

    
25
import logging
26
import mimetools
27
import OpenSSL
28
import select
29
import socket
30
import errno
31

    
32
from cStringIO import StringIO
33

    
34
from ganeti import constants
35
from ganeti import serializer
36
from ganeti import utils
37

    
38

    
39
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
40

    
41
HTTP_OK = 200
42
HTTP_NO_CONTENT = 204
43
HTTP_NOT_MODIFIED = 304
44

    
45
HTTP_0_9 = "HTTP/0.9"
46
HTTP_1_0 = "HTTP/1.0"
47
HTTP_1_1 = "HTTP/1.1"
48

    
49
HTTP_GET = "GET"
50
HTTP_HEAD = "HEAD"
51
HTTP_POST = "POST"
52
HTTP_PUT = "PUT"
53
HTTP_DELETE = "DELETE"
54

    
55
HTTP_ETAG = "ETag"
56
HTTP_HOST = "Host"
57
HTTP_SERVER = "Server"
58
HTTP_DATE = "Date"
59
HTTP_USER_AGENT = "User-Agent"
60
HTTP_CONTENT_TYPE = "Content-Type"
61
HTTP_CONTENT_LENGTH = "Content-Length"
62
HTTP_CONNECTION = "Connection"
63
HTTP_KEEP_ALIVE = "Keep-Alive"
64
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
65
HTTP_AUTHORIZATION = "Authorization"
66
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
67
HTTP_ALLOW = "Allow"
68

    
69
_SSL_UNEXPECTED_EOF = "Unexpected EOF"
70

    
71
# Socket operations
72
(SOCKOP_SEND,
73
 SOCKOP_RECV,
74
 SOCKOP_SHUTDOWN,
75
 SOCKOP_HANDSHAKE) = range(4)
76

    
77
# send/receive quantum
78
SOCK_BUF_SIZE = 32768
79

    
80

    
81
class HttpError(Exception):
82
  """Internal exception for HTTP errors.
83

84
  This should only be used for internal error reporting.
85

86
  """
87

    
88

    
89
class HttpConnectionClosed(Exception):
90
  """Internal exception for a closed connection.
91

92
  This should only be used for internal error reporting. Only use
93
  it if there's no other way to report this condition.
94

95
  """
96

    
97

    
98
class HttpSessionHandshakeUnexpectedEOF(HttpError):
99
  """Internal exception for errors during SSL handshake.
100

101
  This should only be used for internal error reporting.
102

103
  """
104

    
105

    
106
class HttpSocketTimeout(Exception):
107
  """Internal exception for socket timeouts.
108

109
  This should only be used for internal error reporting.
110

111
  """
112

    
113

    
114
class HttpException(Exception):
115
  code = None
116
  message = None
117

    
118
  def __init__(self, message=None, headers=None):
119
    Exception.__init__(self)
120
    self.message = message
121
    self.headers = headers
122

    
123

    
124
class HttpBadRequest(HttpException):
125
  """400 Bad Request
126

127
  RFC2616, 10.4.1: The request could not be understood by the server
128
  due to malformed syntax. The client SHOULD NOT repeat the request
129
  without modifications.
130

131
  """
132
  code = 400
133

    
134

    
135
class HttpUnauthorized(HttpException):
136
  """401 Unauthorized
137

138
  RFC2616, section 10.4.2: The request requires user
139
  authentication. The response MUST include a WWW-Authenticate header
140
  field (section 14.47) containing a challenge applicable to the
141
  requested resource.
142

143
  """
144
  code = 401
145

    
146

    
147
class HttpForbidden(HttpException):
148
  """403 Forbidden
149

150
  RFC2616, 10.4.4: The server understood the request, but is refusing
151
  to fulfill it.  Authorization will not help and the request SHOULD
152
  NOT be repeated.
153

154
  """
155
  code = 403
156

    
157

    
158
class HttpNotFound(HttpException):
159
  """404 Not Found
160

161
  RFC2616, 10.4.5: The server has not found anything matching the
162
  Request-URI.  No indication is given of whether the condition is
163
  temporary or permanent.
164

165
  """
166
  code = 404
167

    
168

    
169
class HttpMethodNotAllowed(HttpException):
170
  """405 Method Not Allowed
171

172
  RFC2616, 10.4.6: The method specified in the Request-Line is not
173
  allowed for the resource identified by the Request-URI. The response
174
  MUST include an Allow header containing a list of valid methods for
175
  the requested resource.
176

177
  """
178
  code = 405
179

    
180

    
181
class HttpRequestTimeout(HttpException):
182
  """408 Request Timeout
183

184
  RFC2616, 10.4.9: The client did not produce a request within the
185
  time that the server was prepared to wait. The client MAY repeat the
186
  request without modifications at any later time.
187

188
  """
189
  code = 408
190

    
191

    
192
class HttpConflict(HttpException):
193
  """409 Conflict
194

195
  RFC2616, 10.4.10: The request could not be completed due to a
196
  conflict with the current state of the resource. This code is only
197
  allowed in situations where it is expected that the user might be
198
  able to resolve the conflict and resubmit the request.
199

200
  """
201
  code = 409
202

    
203

    
204
class HttpGone(HttpException):
205
  """410 Gone
206

207
  RFC2616, 10.4.11: The requested resource is no longer available at
208
  the server and no forwarding address is known. This condition is
209
  expected to be considered permanent.
210

211
  """
212
  code = 410
213

    
214

    
215
class HttpLengthRequired(HttpException):
216
  """411 Length Required
217

218
  RFC2616, 10.4.12: The server refuses to accept the request without a
219
  defined Content-Length. The client MAY repeat the request if it adds
220
  a valid Content-Length header field containing the length of the
221
  message-body in the request message.
222

223
  """
224
  code = 411
225

    
226

    
227
class HttpPreconditionFailed(HttpException):
228
  """412 Precondition Failed
229

230
  RFC2616, 10.4.13: The precondition given in one or more of the
231
  request-header fields evaluated to false when it was tested on the
232
  server.
233

234
  """
235
  code = 412
236

    
237

    
238
class HttpInternalServerError(HttpException):
239
  """500 Internal Server Error
240

241
  RFC2616, 10.5.1: The server encountered an unexpected condition
242
  which prevented it from fulfilling the request.
243

244
  """
245
  code = 500
246

    
247

    
248
class HttpNotImplemented(HttpException):
249
  """501 Not Implemented
250

251
  RFC2616, 10.5.2: The server does not support the functionality
252
  required to fulfill the request.
253

254
  """
255
  code = 501
256

    
257

    
258
class HttpBadGateway(HttpException):
259
  """502 Bad Gateway
260

261
  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
262
  received an invalid response from the upstream server it accessed in
263
  attempting to fulfill the request.
264

265
  """
266
  code = 502
267

    
268

    
269
class HttpServiceUnavailable(HttpException):
270
  """503 Service Unavailable
271

272
  RFC2616, 10.5.4: The server is currently unable to handle the
273
  request due to a temporary overloading or maintenance of the server.
274

275
  """
276
  code = 503
277

    
278

    
279
class HttpGatewayTimeout(HttpException):
280
  """504 Gateway Timeout
281

282
  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
283
  not receive a timely response from the upstream server specified by
284
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
285
  (e.g. DNS) it needed to access in attempting to complete the
286
  request.
287

288
  """
289
  code = 504
290

    
291

    
292
class HttpVersionNotSupported(HttpException):
293
  """505 HTTP Version Not Supported
294

295
  RFC2616, 10.5.6: The server does not support, or refuses to support,
296
  the HTTP protocol version that was used in the request message.
297

298
  """
299
  code = 505
300

    
301

    
302
class HttpJsonConverter: # pylint: disable-msg=W0232
303
  CONTENT_TYPE = "application/json"
304

    
305
  @staticmethod
306
  def Encode(data):
307
    return serializer.DumpJson(data)
308

    
309
  @staticmethod
310
  def Decode(data):
311
    return serializer.LoadJson(data)
312

    
313

    
314
def WaitForSocketCondition(sock, event, timeout):
315
  """Waits for a condition to occur on the socket.
316

317
  @type sock: socket
318
  @param sock: Wait for events on this socket
319
  @type event: int
320
  @param event: ORed condition (see select module)
321
  @type timeout: float or None
322
  @param timeout: Timeout in seconds
323
  @rtype: int or None
324
  @return: None for timeout, otherwise occured conditions
325

326
  """
327
  check = (event | select.POLLPRI |
328
           select.POLLNVAL | select.POLLHUP | select.POLLERR)
329

    
330
  if timeout is not None:
331
    # Poller object expects milliseconds
332
    timeout *= 1000
333

    
334
  poller = select.poll()
335
  poller.register(sock, event)
336
  try:
337
    while True:
338
      # TODO: If the main thread receives a signal and we have no timeout, we
339
      # could wait forever. This should check a global "quit" flag or
340
      # something every so often.
341
      io_events = poller.poll(timeout)
342
      if not io_events:
343
        # Timeout
344
        return None
345
      for (_, evcond) in io_events:
346
        if evcond & check:
347
          return evcond
348
  finally:
349
    poller.unregister(sock)
350

    
351

    
352
def SocketOperation(sock, op, arg1, timeout):
353
  """Wrapper around socket functions.
354

355
  This function abstracts error handling for socket operations, especially
356
  for the complicated interaction with OpenSSL.
357

358
  @type sock: socket
359
  @param sock: Socket for the operation
360
  @type op: int
361
  @param op: Operation to execute (SOCKOP_* constants)
362
  @type arg1: any
363
  @param arg1: Parameter for function (if needed)
364
  @type timeout: None or float
365
  @param timeout: Timeout in seconds or None
366
  @return: Return value of socket function
367

368
  """
369
  # TODO: event_poll/event_check/override
370
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
371
    event_poll = select.POLLOUT
372

    
373
  elif op == SOCKOP_RECV:
374
    event_poll = select.POLLIN
375

    
376
  elif op == SOCKOP_SHUTDOWN:
377
    event_poll = None
378

    
379
    # The timeout is only used when OpenSSL requests polling for a condition.
380
    # It is not advisable to have no timeout for shutdown.
381
    assert timeout
382

    
383
  else:
384
    raise AssertionError("Invalid socket operation")
385

    
386
  # Handshake is only supported by SSL sockets
387
  if (op == SOCKOP_HANDSHAKE and
388
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
389
    return
390

    
391
  # No override by default
392
  event_override = 0
393

    
394
  while True:
395
    # Poll only for certain operations and when asked for by an override
396
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
397
      if event_override:
398
        wait_for_event = event_override
399
      else:
400
        wait_for_event = event_poll
401

    
402
      event = WaitForSocketCondition(sock, wait_for_event, timeout)
403
      if event is None:
404
        raise HttpSocketTimeout()
405

    
406
      if (op == SOCKOP_RECV and
407
          event & (select.POLLNVAL | select.POLLHUP | select.POLLERR)):
408
        return ""
409

    
410
      if not event & wait_for_event:
411
        continue
412

    
413
    # Reset override
414
    event_override = 0
415

    
416
    try:
417
      try:
418
        if op == SOCKOP_SEND:
419
          return sock.send(arg1)
420

    
421
        elif op == SOCKOP_RECV:
422
          return sock.recv(arg1)
423

    
424
        elif op == SOCKOP_SHUTDOWN:
425
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
426
            # PyOpenSSL's shutdown() doesn't take arguments
427
            return sock.shutdown()
428
          else:
429
            return sock.shutdown(arg1)
430

    
431
        elif op == SOCKOP_HANDSHAKE:
432
          return sock.do_handshake()
433

    
434
      except OpenSSL.SSL.WantWriteError:
435
        # OpenSSL wants to write, poll for POLLOUT
436
        event_override = select.POLLOUT
437
        continue
438

    
439
      except OpenSSL.SSL.WantReadError:
440
        # OpenSSL wants to read, poll for POLLIN
441
        event_override = select.POLLIN | select.POLLPRI
442
        continue
443

    
444
      except OpenSSL.SSL.WantX509LookupError:
445
        continue
446

    
447
      except OpenSSL.SSL.ZeroReturnError, err:
448
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
449
        # occurs if a closure alert has occurred in the protocol, i.e. the
450
        # connection has been closed cleanly. Note that this does not
451
        # necessarily mean that the transport layer (e.g. a socket) has been
452
        # closed.
453
        if op == SOCKOP_SEND:
454
          # Can happen during a renegotiation
455
          raise HttpConnectionClosed(err.args)
456
        elif op == SOCKOP_RECV:
457
          return ""
458

    
459
        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
460
        raise socket.error(err.args)
461

    
462
      except OpenSSL.SSL.SysCallError, err:
463
        if op == SOCKOP_SEND:
464
          # arg1 is the data when writing
465
          if err.args and err.args[0] == -1 and arg1 == "":
466
            # errors when writing empty strings are expected
467
            # and can be ignored
468
            return 0
469

    
470
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
471
          if op == SOCKOP_RECV:
472
            return ""
473
          elif op == SOCKOP_HANDSHAKE:
474
            # Can happen if peer disconnects directly after the connection is
475
            # opened.
476
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
477

    
478
        raise socket.error(err.args)
479

    
480
      except OpenSSL.SSL.Error, err:
481
        raise socket.error(err.args)
482

    
483
    except socket.error, err:
484
      if err.args and err.args[0] == errno.EAGAIN:
485
        # Ignore EAGAIN
486
        continue
487

    
488
      raise
489

    
490

    
491
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
492
  """Closes the connection.
493

494
  @type sock: socket
495
  @param sock: Socket to be shut down
496
  @type close_timeout: float
497
  @param close_timeout: How long to wait for the peer to close
498
      the connection
499
  @type write_timeout: float
500
  @param write_timeout: Write timeout for shutdown
501
  @type msgreader: http.HttpMessageReader
502
  @param msgreader: Request message reader, used to determine whether
503
      peer should close connection
504
  @type force: bool
505
  @param force: Whether to forcibly close the connection without
506
      waiting for peer
507

508
  """
509
  #print msgreader.peer_will_close, force
510
  if msgreader and msgreader.peer_will_close and not force:
511
    # Wait for peer to close
512
    try:
513
      # Check whether it's actually closed
514
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
515
        return
516
    except (socket.error, HttpError, HttpSocketTimeout):
517
      # Ignore errors at this stage
518
      pass
519

    
520
  # Close the connection from our side
521
  try:
522
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
523
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
524
                    write_timeout)
525
  except HttpSocketTimeout:
526
    raise HttpError("Timeout while shutting down connection")
527
  except socket.error, err:
528
    # Ignore ENOTCONN
529
    if not (err.args and err.args[0] == errno.ENOTCONN):
530
      raise HttpError("Error while shutting down connection: %s" % err)
531

    
532

    
533
def Handshake(sock, write_timeout):
534
  """Shakes peer's hands.
535

536
  @type sock: socket
537
  @param sock: Socket to be shut down
538
  @type write_timeout: float
539
  @param write_timeout: Write timeout for handshake
540

541
  """
542
  try:
543
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
544
  except HttpSocketTimeout:
545
    raise HttpError("Timeout during SSL handshake")
546
  except socket.error, err:
547
    raise HttpError("Error in SSL handshake: %s" % err)
548

    
549

    
550
def InitSsl():
551
  """Initializes the SSL infrastructure.
552

553
  This function is idempotent.
554

555
  """
556
  if not OpenSSL.rand.status():
557
    raise EnvironmentError("OpenSSL could not collect enough entropy"
558
                           " for the PRNG")
559

    
560
  # TODO: Maybe add some additional seeding for OpenSSL's PRNG
561

    
562

    
563
class HttpSslParams(object):
564
  """Data class for SSL key and certificate.
565

566
  """
567
  def __init__(self, ssl_key_path, ssl_cert_path):
568
    """Initializes this class.
569

570
    @type ssl_key_path: string
571
    @param ssl_key_path: Path to file containing SSL key in PEM format
572
    @type ssl_cert_path: string
573
    @param ssl_cert_path: Path to file containing SSL certificate
574
        in PEM format
575

576
    """
577
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
578
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
579

    
580
  def GetKey(self):
581
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
582
                                          self.ssl_key_pem)
583

    
584
  def GetCertificate(self):
585
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
586
                                           self.ssl_cert_pem)
587

    
588

    
589
class HttpBase(object):
590
  """Base class for HTTP server and client.
591

592
  """
593
  def __init__(self):
594
    self.using_ssl = None
595
    self._ssl_params = None
596
    self._ssl_key = None
597
    self._ssl_cert = None
598

    
599
  def _CreateSocket(self, ssl_params, ssl_verify_peer):
600
    """Creates a TCP socket and initializes SSL if needed.
601

602
    @type ssl_params: HttpSslParams
603
    @param ssl_params: SSL key and certificate
604
    @type ssl_verify_peer: bool
605
    @param ssl_verify_peer: Whether to require client certificate
606
        and compare it with our certificate
607

608
    """
609
    self._ssl_params = ssl_params
610

    
611
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
612

    
613
    # Should we enable SSL?
614
    self.using_ssl = ssl_params is not None
615

    
616
    if not self.using_ssl:
617
      return sock
618

    
619
    self._ssl_key = ssl_params.GetKey()
620
    self._ssl_cert = ssl_params.GetCertificate()
621

    
622
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
623
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
624

    
625
    ctx.use_privatekey(self._ssl_key)
626
    ctx.use_certificate(self._ssl_cert)
627
    ctx.check_privatekey()
628

    
629
    if ssl_verify_peer:
630
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
631
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
632
                     self._SSLVerifyCallback)
633

    
634
    return OpenSSL.SSL.Connection(ctx, sock)
635

    
636
  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
637
    """Verify the certificate provided by the peer
638

639
    We only compare fingerprints. The client must use the same certificate as
640
    we do on our side.
641

642
    """
643
    # some parameters are unused, but this is the API
644
    # pylint: disable-msg=W0613
645
    assert self._ssl_params, "SSL not initialized"
646

    
647
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
648
            self._ssl_cert.digest("md5") == cert.digest("md5"))
649

    
650

    
651
class HttpMessage(object):
652
  """Data structure for HTTP message.
653

654
  """
655
  def __init__(self):
656
    self.start_line = None
657
    self.headers = None
658
    self.body = None
659
    self.decoded_body = None
660

    
661

    
662
class HttpClientToServerStartLine(object):
663
  """Data structure for HTTP request start line.
664

665
  """
666
  def __init__(self, method, path, version):
667
    self.method = method
668
    self.path = path
669
    self.version = version
670

    
671
  def __str__(self):
672
    return "%s %s %s" % (self.method, self.path, self.version)
673

    
674

    
675
class HttpServerToClientStartLine(object):
676
  """Data structure for HTTP response start line.
677

678
  """
679
  def __init__(self, version, code, reason):
680
    self.version = version
681
    self.code = code
682
    self.reason = reason
683

    
684
  def __str__(self):
685
    return "%s %s %s" % (self.version, self.code, self.reason)
686

    
687

    
688
class HttpMessageWriter(object):
689
  """Writes an HTTP message to a socket.
690

691
  """
692
  def __init__(self, sock, msg, write_timeout):
693
    """Initializes this class and writes an HTTP message to a socket.
694

695
    @type sock: socket
696
    @param sock: Socket to be written to
697
    @type msg: http.HttpMessage
698
    @param msg: HTTP message to be written
699
    @type write_timeout: float
700
    @param write_timeout: Write timeout for socket
701

702
    """
703
    self._msg = msg
704

    
705
    self._PrepareMessage()
706

    
707
    buf = self._FormatMessage()
708

    
709
    pos = 0
710
    end = len(buf)
711
    while pos < end:
712
      # Send only SOCK_BUF_SIZE bytes at a time
713
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
714

    
715
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
716

    
717
      # Remove sent bytes
718
      pos += sent
719

    
720
    assert pos == end, "Message wasn't sent completely"
721

    
722
  def _PrepareMessage(self):
723
    """Prepares the HTTP message by setting mandatory headers.
724

725
    """
726
    # RFC2616, section 4.3: "The presence of a message-body in a request is
727
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
728
    # field in the request's message-headers."
729
    if self._msg.body:
730
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
731

    
732
  def _FormatMessage(self):
733
    """Serializes the HTTP message into a string.
734

735
    """
736
    buf = StringIO()
737

    
738
    # Add start line
739
    buf.write(str(self._msg.start_line))
740
    buf.write("\r\n")
741

    
742
    # Add headers
743
    if self._msg.start_line.version != HTTP_0_9:
744
      for name, value in self._msg.headers.iteritems():
745
        buf.write("%s: %s\r\n" % (name, value))
746

    
747
    buf.write("\r\n")
748

    
749
    # Add message body if needed
750
    if self.HasMessageBody():
751
      buf.write(self._msg.body)
752

    
753
    elif self._msg.body:
754
      logging.warning("Ignoring message body")
755

    
756
    return buf.getvalue()
757

    
758
  def HasMessageBody(self):
759
    """Checks whether the HTTP message contains a body.
760

761
    Can be overridden by subclasses.
762

763
    """
764
    return bool(self._msg.body)
765

    
766

    
767
class HttpMessageReader(object):
768
  """Reads HTTP message from socket.
769

770
  """
771
  # Length limits
772
  START_LINE_LENGTH_MAX = None
773
  HEADER_LENGTH_MAX = None
774

    
775
  # Parser state machine
776
  PS_START_LINE = "start-line"
777
  PS_HEADERS = "headers"
778
  PS_BODY = "entity-body"
779
  PS_COMPLETE = "complete"
780

    
781
  def __init__(self, sock, msg, read_timeout):
782
    """Reads an HTTP message from a socket.
783

784
    @type sock: socket
785
    @param sock: Socket to be read from
786
    @type msg: http.HttpMessage
787
    @param msg: Object for the read message
788
    @type read_timeout: float
789
    @param read_timeout: Read timeout for socket
790

791
    """
792
    self.sock = sock
793
    self.msg = msg
794

    
795
    self.start_line_buffer = None
796
    self.header_buffer = StringIO()
797
    self.body_buffer = StringIO()
798
    self.parser_status = self.PS_START_LINE
799
    self.content_length = None
800
    self.peer_will_close = None
801

    
802
    buf = ""
803
    eof = False
804
    while self.parser_status != self.PS_COMPLETE:
805
      # TODO: Don't read more than necessary (Content-Length), otherwise
806
      # data might be lost and/or an error could occur
807
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
808

    
809
      if data:
810
        buf += data
811
      else:
812
        eof = True
813

    
814
      # Do some parsing and error checking while more data arrives
815
      buf = self._ContinueParsing(buf, eof)
816

    
817
      # Must be done only after the buffer has been evaluated
818
      # TODO: Content-Length < len(data read) and connection closed
819
      if (eof and
820
          self.parser_status in (self.PS_START_LINE,
821
                                 self.PS_HEADERS)):
822
        raise HttpError("Connection closed prematurely")
823

    
824
    # Parse rest
825
    buf = self._ContinueParsing(buf, True)
826

    
827
    assert self.parser_status == self.PS_COMPLETE
828
    assert not buf, "Parser didn't read full response"
829

    
830
    msg.body = self.body_buffer.getvalue()
831

    
832
    # TODO: Content-type, error handling
833
    if msg.body:
834
      msg.decoded_body = HttpJsonConverter().Decode(msg.body)
835
    else:
836
      msg.decoded_body = None
837

    
838
    if msg.decoded_body:
839
      logging.debug("Message body: %s", msg.decoded_body)
840

    
841
  def _ContinueParsing(self, buf, eof):
842
    """Main function for HTTP message state machine.
843

844
    @type buf: string
845
    @param buf: Receive buffer
846
    @type eof: bool
847
    @param eof: Whether we've reached EOF on the socket
848
    @rtype: string
849
    @return: Updated receive buffer
850

851
    """
852
    # TODO: Use offset instead of slicing when possible
853
    if self.parser_status == self.PS_START_LINE:
854
      # Expect start line
855
      while True:
856
        idx = buf.find("\r\n")
857

    
858
        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
859
        # ignore any empty line(s) received where a Request-Line is expected.
860
        # In other words, if the server is reading the protocol stream at the
861
        # beginning of a message and receives a CRLF first, it should ignore
862
        # the CRLF."
863
        if idx == 0:
864
          # TODO: Limit number of CRLFs/empty lines for safety?
865
          buf = buf[:2]
866
          continue
867

    
868
        if idx > 0:
869
          self.start_line_buffer = buf[:idx]
870

    
871
          self._CheckStartLineLength(len(self.start_line_buffer))
872

    
873
          # Remove status line, including CRLF
874
          buf = buf[idx + 2:]
875

    
876
          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
877

    
878
          self.parser_status = self.PS_HEADERS
879
        else:
880
          # Check whether incoming data is getting too large, otherwise we just
881
          # fill our read buffer.
882
          self._CheckStartLineLength(len(buf))
883

    
884
        break
885

    
886
    # TODO: Handle messages without headers
887
    if self.parser_status == self.PS_HEADERS:
888
      # Wait for header end
889
      idx = buf.find("\r\n\r\n")
890
      if idx >= 0:
891
        self.header_buffer.write(buf[:idx + 2])
892

    
893
        self._CheckHeaderLength(self.header_buffer.tell())
894

    
895
        # Remove headers, including CRLF
896
        buf = buf[idx + 4:]
897

    
898
        self._ParseHeaders()
899

    
900
        self.parser_status = self.PS_BODY
901
      else:
902
        # Check whether incoming data is getting too large, otherwise we just
903
        # fill our read buffer.
904
        self._CheckHeaderLength(len(buf))
905

    
906
    if self.parser_status == self.PS_BODY:
907
      # TODO: Implement max size for body_buffer
908
      self.body_buffer.write(buf)
909
      buf = ""
910

    
911
      # Check whether we've read everything
912
      #
913
      # RFC2616, section 4.4: "When a message-body is included with a message,
914
      # the transfer-length of that body is determined by one of the following
915
      # [...] 5. By the server closing the connection. (Closing the connection
916
      # cannot be used to indicate the end of a request body, since that would
917
      # leave no possibility for the server to send back a response.)"
918
      #
919
      # TODO: Error when buffer length > Content-Length header
920
      if (eof or
921
          self.content_length is None or
922
          (self.content_length is not None and
923
           self.body_buffer.tell() >= self.content_length)):
924
        self.parser_status = self.PS_COMPLETE
925

    
926
    return buf
927

    
928
  def _CheckStartLineLength(self, length):
929
    """Limits the start line buffer size.
930

931
    @type length: int
932
    @param length: Buffer size
933

934
    """
935
    if (self.START_LINE_LENGTH_MAX is not None and
936
        length > self.START_LINE_LENGTH_MAX):
937
      raise HttpError("Start line longer than %d chars" %
938
                       self.START_LINE_LENGTH_MAX)
939

    
940
  def _CheckHeaderLength(self, length):
941
    """Limits the header buffer size.
942

943
    @type length: int
944
    @param length: Buffer size
945

946
    """
947
    if (self.HEADER_LENGTH_MAX is not None and
948
        length > self.HEADER_LENGTH_MAX):
949
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
950

    
951
  def ParseStartLine(self, start_line):
952
    """Parses the start line of a message.
953

954
    Must be overridden by subclass.
955

956
    @type start_line: string
957
    @param start_line: Start line string
958

959
    """
960
    raise NotImplementedError()
961

    
962
  def _WillPeerCloseConnection(self):
963
    """Evaluate whether peer will close the connection.
964

965
    @rtype: bool
966
    @return: Whether peer will close the connection
967

968
    """
969
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
970
    # for the sender to signal that the connection will be closed after
971
    # completion of the response. For example,
972
    #
973
    #        Connection: close
974
    #
975
    # in either the request or the response header fields indicates that the
976
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
977
    # current request/response is complete."
978

    
979
    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
980
    if hdr_connection:
981
      hdr_connection = hdr_connection.lower()
982

    
983
    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
984
    if self.msg.start_line.version == HTTP_1_1:
985
      return (hdr_connection and "close" in hdr_connection)
986

    
987
    # Some HTTP/1.0 implementations have support for persistent connections,
988
    # using rules different than HTTP/1.1.
989

    
990
    # For older HTTP, Keep-Alive indicates persistent connection.
991
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
992
      return False
993

    
994
    # At least Akamai returns a "Connection: Keep-Alive" header, which was
995
    # supposed to be sent by the client.
996
    if hdr_connection and "keep-alive" in hdr_connection:
997
      return False
998

    
999
    return True
1000

    
1001
  def _ParseHeaders(self):
1002
    """Parses the headers.
1003

1004
    This function also adjusts internal variables based on header values.
1005

1006
    RFC2616, section 4.3: The presence of a message-body in a request is
1007
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
1008
    field in the request's message-headers.
1009

1010
    """
1011
    # Parse headers
1012
    self.header_buffer.seek(0, 0)
1013
    self.msg.headers = mimetools.Message(self.header_buffer, 0)
1014

    
1015
    self.peer_will_close = self._WillPeerCloseConnection()
1016

    
1017
    # Do we have a Content-Length header?
1018
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
1019
    if hdr_content_length:
1020
      try:
1021
        self.content_length = int(hdr_content_length)
1022
      except ValueError:
1023
        self.content_length = None
1024
      if self.content_length is not None and self.content_length < 0:
1025
        self.content_length = None
1026

    
1027
    # if the connection remains open and a content-length was not provided,
1028
    # then assume that the connection WILL close.
1029
    if self.content_length is None:
1030
      self.peer_will_close = True