Restrict header count and size. URL-encode/decode specific headers that may contain...
[pithos] / pithos / api / util.py
index 1420714..223ffeb 100644 (file)
@@ -37,6 +37,7 @@ from traceback import format_exc
 from wsgiref.handlers import format_date_time
 from binascii import hexlify, unhexlify
 from datetime import datetime, tzinfo, timedelta
+from urllib import quote, unquote
 
 from django.conf import settings
 from django.http import HttpResponse
@@ -758,6 +759,20 @@ def hashmap_hash(request, hashmap):
         h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
     return hexlify(h[0])
 
+def update_request_headers(request):
+    # Handle URL-encoded keys and values.
+    meta = request.META
+    for k, v in meta.copy().iteritems():
+        if ((k.startswith('HTTP_X_ACCOUNT_META_') or k.startswith('HTTP_X_ACCOUNT_GROUP_') or
+             k.startswith('HTTP_X_CONTAINER_META_') or k.startswith('HTTP_X_OBJECT_META_') or
+             k in ('HTTP_X_OBJECT_MANIFEST', 'HTTP_X_OBJECT_SHARING',
+                   'HTTP_X_COPY_FROM', 'HTTP_X_MOVE_FROM',
+                   'HTTP_X_SOURCE_ACCOUNT', 'HTTP_X_SOURCE_OBJECT',
+                   'HTTP_DESTINATION_ACCOUNT', 'HTTP_DESTINATION')) and
+            ('%' in k or '%' in v)):
+            del(meta[k])
+            meta[unquote(k)] = unquote(v)
+
 def update_response_headers(request, response):
     if request.serialization == 'xml':
         response['Content-Type'] = 'application/xml; charset=UTF-8'
@@ -766,9 +781,20 @@ def update_response_headers(request, response):
     elif not response['Content-Type']:
         response['Content-Type'] = 'text/plain; charset=UTF-8'
     
-    if not response.has_header('Content-Length') and not (response.has_header('Content-Type') and response['Content-Type'].startswith('multipart/byteranges')):
+    if (not response.has_header('Content-Length') and
+        not (response.has_header('Content-Type') and
+             response['Content-Type'].startswith('multipart/byteranges'))):
         response['Content-Length'] = len(response.content)
     
+    # URL-encode unicode in headers.
+    meta = response.items()
+    for k, v in meta:
+        if (k.startswith('X-Account-Meta-') or k.startswith('X-Account-Group-') or
+            k.startswith('X-Container-Meta-') or k.startswith('X-Object-Meta-') or
+            k in ('X-Container-Object-Meta', 'X-Object-Manifest', 'X-Object-Sharing', 'X-Object-Shared-By')):
+            del(response[k])
+            response[quote(k)] = quote(v)
+    
     if settings.TEST:
         response['Date'] = format_date_time(time())
 
@@ -824,6 +850,17 @@ def api_method(http_method=None, format_allowed=False, user_required=True):
                 if len(args) > 2 and len(args[2]) > 1024:
                     raise BadRequest('Object name too large.')
                 
+                # Format and check headers.
+                update_request_headers(request)
+                meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
+                if len(meta) > 90:
+                    raise BadRequest('Too many headers.')
+                for k, v in meta.iteritems():
+                    if len(k) > 128:
+                        raise BadRequest('Header name too large.')
+                    if len(v) > 256:
+                        raise BadRequest('Header value too large.')
+                
                 # Fill in custom request variables.
                 request.serialization = request_serialization(request, format_allowed)
                 request.backend = connect_backend()