Use URL-encoding/decoding for all headers.
authorAntony Chazapis <chazapis@gmail.com>
Fri, 2 Dec 2011 10:55:06 +0000 (12:55 +0200)
committerAntony Chazapis <chazapis@gmail.com>
Fri, 2 Dec 2011 10:55:06 +0000 (12:55 +0200)
Refs #1511

docs/source/devguide.rst
pithos/api/functions.py
pithos/api/util.py

index 55dbc1f..b94ecca 100644 (file)
@@ -27,6 +27,7 @@ Revision                   Description
 =========================  ================================
 0.8 (Dec 2, 2011)          Update allowed versioning values.
 \                          Change policy/meta formatting in JSON/XML replies.
+\                          Document that all non-ASCII characters in headers should be URL-encoded.
 0.7 (Nov 21, 2011)         Suggest upload/download methods using hashmaps.
 \                          Propose syncing algorithm.
 \                          Support cross-account object copy and move.
@@ -1074,6 +1075,7 @@ List of differences from the OOS API:
 
 Clarifications/suggestions:
 
+* All non-ASCII characters in headers should be URL-encoded.
 * Authentication is done by another system. The token is used in the same way, but it is obtained differently. The top level ``GET`` request is kept compatible with the OOS API and allows for guest/testing operations.
 * Some processing is done in the variable part of all ``X-*-Meta-*`` headers. If it includes underscores, they will be converted to dashes and the first letter of all intra-dash strings will be capitalized.
 * A ``GET`` reply for a level will include all headers of the corresponding ``HEAD`` request.
index da27288..79d54ae 100644 (file)
@@ -39,7 +39,7 @@ from django.http import HttpResponse
 from django.template.loader import render_to_string
 from django.utils import simplejson as json
 from django.utils.http import parse_etags
-from django.utils.encoding import smart_unicode, smart_str
+from django.utils.encoding import smart_str
 from xml.dom import minidom
 
 from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound, Conflict,
@@ -759,12 +759,12 @@ def object_write(request, v_account, v_container, v_object):
             meta = {}
         validate_matching_preconditions(request, meta)
     
-    copy_from = smart_unicode(request.META.get('HTTP_X_COPY_FROM'), strings_only=True)
-    move_from = smart_unicode(request.META.get('HTTP_X_MOVE_FROM'), strings_only=True)
+    copy_from = request.META.get('HTTP_X_COPY_FROM')
+    move_from = request.META.get('HTTP_X_MOVE_FROM')
     if copy_from or move_from:
         content_length = get_content_length(request) # Required by the API.
         
-        src_account = smart_unicode(request.META.get('HTTP_X_SOURCE_ACCOUNT'), strings_only=True)
+        src_account = request.META.get('HTTP_X_SOURCE_ACCOUNT')
         if not src_account:
             src_account = request.user_uniq
         if move_from:
@@ -912,10 +912,10 @@ def object_copy(request, v_account, v_container, v_object):
     #                       forbidden (403),
     #                       badRequest (400)
     
-    dest_account = smart_unicode(request.META.get('HTTP_DESTINATION_ACCOUNT'), strings_only=True)
+    dest_account = request.META.get('HTTP_DESTINATION_ACCOUNT')
     if not dest_account:
         dest_account = request.user_uniq
-    dest_path = smart_unicode(request.META.get('HTTP_DESTINATION'), strings_only=True)
+    dest_path = request.META.get('HTTP_DESTINATION')
     if not dest_path:
         raise BadRequest('Missing Destination header')
     try:
@@ -949,10 +949,10 @@ def object_move(request, v_account, v_container, v_object):
     #                       forbidden (403),
     #                       badRequest (400)
     
-    dest_account = smart_unicode(request.META.get('HTTP_DESTINATION_ACCOUNT'), strings_only=True)
+    dest_account = request.META.get('HTTP_DESTINATION_ACCOUNT')
     if not dest_account:
         dest_account = request.user_uniq
-    dest_path = smart_unicode(request.META.get('HTTP_DESTINATION'), strings_only=True)
+    dest_path = request.META.get('HTTP_DESTINATION')
     if not dest_path:
         raise BadRequest('Missing Destination header')
     try:
@@ -1073,12 +1073,10 @@ def object_update(request, v_account, v_container, v_object):
     elif offset > size:
         raise RangeNotSatisfiable('Supplied offset is beyond object limits')
     if src_object:
-        src_account = smart_unicode(request.META.get('HTTP_X_SOURCE_ACCOUNT'), strings_only=True)
+        src_account = request.META.get('HTTP_X_SOURCE_ACCOUNT')
         if not src_account:
             src_account = request.user_uniq
         src_container, src_name = split_container_object_string(src_object)
-        src_container = smart_unicode(src_container, strings_only=True)
-        src_name = smart_unicode(src_name, strings_only=True)
         src_version = request.META.get('HTTP_X_SOURCE_VERSION')
         try:
             src_size, src_hashmap = request.backend.get_object_hashmap(request.user_uniq,
index 9a56b54..9d81530 100644 (file)
@@ -762,17 +762,23 @@ def hashmap_hash(request, hashmap):
 
 def update_request_headers(request):
     # Handle URL-encoded keys and values.
-    meta = request.META
-    for k, v in meta.copy().iteritems():
-        if ((k.startswith('HTTP_X_ACCOUNT_META_') or k.startswith('HTTP_X_ACCOUNT_GROUP_') or
-             k.startswith('HTTP_X_CONTAINER_META_') or k.startswith('HTTP_X_OBJECT_META_') or
-             k in ('HTTP_X_OBJECT_MANIFEST', 'HTTP_X_OBJECT_SHARING',
-                   'HTTP_X_COPY_FROM', 'HTTP_X_MOVE_FROM',
-                   'HTTP_X_SOURCE_ACCOUNT', 'HTTP_X_SOURCE_OBJECT',
-                   'HTTP_DESTINATION_ACCOUNT', 'HTTP_DESTINATION')) and
-            ('%' in k or '%' in v)):
-            del(meta[k])
-            meta[unquote(k)] = unquote(v)
+    # Handle URL-encoded keys and values.
+    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
+    if len(meta) > 90:
+        raise BadRequest('Too many headers.')
+    for k, v in meta.iteritems():
+        if len(k) > 128:
+            raise BadRequest('Header name too large.')
+        if len(v) > 256:
+            raise BadRequest('Header value too large.')
+        try:
+            k.decode('ascii')
+            v.decode('ascii')
+        except UnicodeDecodeError:
+            raise BadRequest('Bad character in headers.')
+        if '%' in k or '%' in v:
+            del(request.META[k])
+            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
 
 def update_response_headers(request, response):
     if request.serialization == 'xml':
@@ -790,11 +796,10 @@ def update_response_headers(request, response):
     # URL-encode unicode in headers.
     meta = response.items()
     for k, v in meta:
-        if (k.startswith('X-Account-Meta-') or k.startswith('X-Account-Group-') or
-            k.startswith('X-Container-Meta-') or k.startswith('X-Object-Meta-') or
-            k in ('X-Container-Object-Meta', 'X-Object-Manifest', 'X-Object-Sharing', 'X-Object-Shared-By')):
+        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
+            k.startswith('X-Object-') or k.startswith('Content-')):
             del(response[k])
-            response[quote(k)] = quote(v, safe='/=,:@')
+            response[quote(k)] = quote(v, safe='/=,:@; ')
     
     if settings.TEST:
         response['Date'] = format_date_time(time())
@@ -853,14 +858,6 @@ def api_method(http_method=None, format_allowed=False, user_required=True):
                 
                 # Format and check headers.
                 update_request_headers(request)
-                meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
-                if len(meta) > 90:
-                    raise BadRequest('Too many headers.')
-                for k, v in meta.iteritems():
-                    if len(k) > 128:
-                        raise BadRequest('Header name too large.')
-                    if len(v) > 256:
-                        raise BadRequest('Header value too large.')
                 
                 # Fill in custom request variables.
                 request.serialization = request_serialization(request, format_allowed)