Unified query for listing objects with properties.
authorAntony Chazapis <chazapis@gmail.com>
Thu, 16 Feb 2012 00:54:07 +0000 (02:54 +0200)
committerAntony Chazapis <chazapis@gmail.com>
Thu, 16 Feb 2012 00:54:07 +0000 (02:54 +0200)
Refs #1948

pithos/api/functions.py
pithos/api/util.py
pithos/backends/base.py
pithos/backends/lib/sqlalchemy/node.py
pithos/backends/lib/sqlite/node.py
pithos/backends/modular.py

index 693abb3..8c94921 100644 (file)
@@ -326,7 +326,7 @@ def container_meta(request, v_account, v_container):
     try:
         meta = request.backend.get_container_meta(request.user_uniq, v_account,
                                                     v_container, 'pithos', until)
-        meta['object_meta'] = request.backend.list_object_meta(request.user_uniq,
+        meta['object_meta'] = request.backend.list_container_meta(request.user_uniq,
                                                 v_account, v_container, 'pithos', until)
         policy = request.backend.get_container_policy(request.user_uniq, v_account,
                                                         v_container)
@@ -463,7 +463,7 @@ def object_list(request, v_account, v_container):
     try:
         meta = request.backend.get_container_meta(request.user_uniq, v_account,
                                                     v_container, 'pithos', until)
-        meta['object_meta'] = request.backend.list_object_meta(request.user_uniq,
+        meta['object_meta'] = request.backend.list_container_meta(request.user_uniq,
                                                 v_account, v_container, 'pithos', until)
         policy = request.backend.get_container_policy(request.user_uniq, v_account,
                                                         v_container)
@@ -515,16 +515,16 @@ def object_list(request, v_account, v_container):
     if 'shared' in request.GET:
         shared = True
     
-    try:
-        objects = request.backend.list_objects(request.user_uniq, v_account,
-                                    v_container, prefix, delimiter, marker,
-                                    limit, virtual, 'pithos', keys, shared, until)
-    except NotAllowedError:
-        raise Forbidden('Not allowed')
-    except NameError:
-        raise ItemNotFound('Container does not exist')
-    
     if request.serialization == 'text':
+        try:
+            objects = request.backend.list_objects(request.user_uniq, v_account,
+                                        v_container, prefix, delimiter, marker,
+                                        limit, virtual, 'pithos', keys, shared, until)
+        except NotAllowedError:
+            raise Forbidden('Not allowed')
+        except NameError:
+            raise ItemNotFound('Container does not exist')
+        
         if len(objects) == 0:
             # The cloudfiles python bindings expect 200 if json/xml.
             response.status_code = 204
@@ -533,44 +533,79 @@ def object_list(request, v_account, v_container):
         response.content = '\n'.join([x[0] for x in objects]) + '\n'
         return response
     
+    try:
+        objects = request.backend.list_object_meta(request.user_uniq, v_account,
+                                    v_container, prefix, delimiter, marker,
+                                    limit, virtual, 'pithos', keys, shared, until)
+    except NotAllowedError:
+        raise Forbidden('Not allowed')
+    except NameError:
+        raise ItemNotFound('Container does not exist')
+    
+#     object_meta = []
+#     for x in objects:
+#         if x[1] is None:
+#             # Virtual objects/directories.
+#             object_meta.append({'subdir': x[0]})
+#         else:
+#             try:
+#                 meta = request.backend.get_object_meta(request.user_uniq, v_account,
+#                                                         v_container, x[0], 'pithos', x[1])
+#                 if until is None:
+#                     permissions = request.backend.get_object_permissions(
+#                                     request.user_uniq, v_account, v_container, x[0])
+#                     public = request.backend.get_object_public(request.user_uniq,
+#                                                 v_account, v_container, x[0])
+#                 else:
+#                     permissions = None
+#                     public = None
+#             except NotAllowedError:
+#                 raise Forbidden('Not allowed')
+#             except NameError:
+#                 pass
+#             else:
+#                 rename_meta_key(meta, 'hash', 'x_object_hash') # Will be replaced by checksum.
+#                 rename_meta_key(meta, 'checksum', 'hash')
+#                 rename_meta_key(meta, 'type', 'content_type')
+#                 rename_meta_key(meta, 'uuid', 'x_object_uuid')
+#                 rename_meta_key(meta, 'modified', 'last_modified')
+#                 rename_meta_key(meta, 'modified_by', 'x_object_modified_by')
+#                 rename_meta_key(meta, 'version', 'x_object_version')
+#                 rename_meta_key(meta, 'version_timestamp', 'x_object_version_timestamp')
+#                 m = dict([(k[14:], v) for k, v in meta.iteritems() if k.startswith('X-Object-Meta-')])
+#                 for k in m:
+#                     del(meta['X-Object-Meta-' + k])
+#                 if m:
+#                     meta['X-Object-Meta'] = printable_header_dict(m)
+#                 update_sharing_meta(request, permissions, v_account, v_container, x[0], meta)
+#                 update_public_meta(public, meta)
+#                 object_meta.append(printable_header_dict(meta))
+#     if request.serialization == 'xml':
+#         data = render_to_string('objects.xml', {'container': v_container, 'objects': object_meta})
+#     elif request.serialization  == 'json':
+#         data = json.dumps(object_meta, default=json_encode_decimal)
+#     response.status_code = 200
+#     response.content = data
+#     return response
+    
     object_meta = []
-    for x in objects:
-        if x[1] is None:
+    for meta in objects:
+        if len(meta) == 1:
             # Virtual objects/directories.
-            object_meta.append({'subdir': x[0]})
+            object_meta.append(meta)
         else:
-            try:
-                meta = request.backend.get_object_meta(request.user_uniq, v_account,
-                                                        v_container, x[0], 'pithos', x[1])
-                if until is None:
-                    permissions = request.backend.get_object_permissions(
-                                    request.user_uniq, v_account, v_container, x[0])
-                    public = request.backend.get_object_public(request.user_uniq,
-                                                v_account, v_container, x[0])
-                else:
-                    permissions = None
-                    public = None
-            except NotAllowedError:
-                raise Forbidden('Not allowed')
-            except NameError:
-                pass
+            rename_meta_key(meta, 'hash', 'x_object_hash') # Will be replaced by checksum.
+            rename_meta_key(meta, 'checksum', 'hash')
+            rename_meta_key(meta, 'type', 'content_type')
+            rename_meta_key(meta, 'uuid', 'x_object_uuid')
+            if until is not None and 'modified' in meta:
+                del(meta['modified'])
             else:
-                rename_meta_key(meta, 'hash', 'x_object_hash') # Will be replaced by checksum.
-                rename_meta_key(meta, 'checksum', 'hash')
-                rename_meta_key(meta, 'type', 'content_type')
-                rename_meta_key(meta, 'uuid', 'x_object_uuid')
                 rename_meta_key(meta, 'modified', 'last_modified')
-                rename_meta_key(meta, 'modified_by', 'x_object_modified_by')
-                rename_meta_key(meta, 'version', 'x_object_version')
-                rename_meta_key(meta, 'version_timestamp', 'x_object_version_timestamp')
-                m = dict([(k[14:], v) for k, v in meta.iteritems() if k.startswith('X-Object-Meta-')])
-                for k in m:
-                    del(meta['X-Object-Meta-' + k])
-                if m:
-                    meta['X-Object-Meta'] = printable_header_dict(m)
-                update_sharing_meta(request, permissions, v_account, v_container, x[0], meta)
-                update_public_meta(public, meta)
-                object_meta.append(printable_header_dict(meta))
+            rename_meta_key(meta, 'modified_by', 'x_object_modified_by')
+            rename_meta_key(meta, 'version', 'x_object_version')
+            rename_meta_key(meta, 'version_timestamp', 'x_object_version_timestamp')
+            object_meta.append(printable_header_dict(meta))
     if request.serialization == 'xml':
         data = render_to_string('objects.xml', {'container': v_container, 'objects': object_meta})
     elif request.serialization  == 'json':
index 64d4fb3..a0b432b 100644 (file)
@@ -104,7 +104,7 @@ def printable_header_dict(d):
     Format 'last_modified' timestamp.
     """
     
-    if 'last_modified' in d:
+    if 'last_modified' in d and d['last_modified']:
         d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
     return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
 
index a882e16..2f4119c 100644 (file)
@@ -185,6 +185,16 @@ class BaseBackend(object):
         """
         return []
     
+    def list_container_meta(self, user, account, container, domain, until=None):
+        """Return a list with all the container's object meta keys for the domain.
+        
+        Raises:
+            NotAllowedError: Operation not permitted
+            
+            NameError: Container does not exist
+        """
+        return []
+    
     def get_container_meta(self, user, account, container, domain, until=None):
         """Return a dictionary with the container metadata for the domain.
         
@@ -310,13 +320,11 @@ class BaseBackend(object):
         """
         return []
     
-    def list_object_meta(self, user, account, container, domain, until=None):
-        """Return a list with all the container's object meta keys for the domain.
+    def list_object_meta(self, user, account, container, prefix='', delimiter=None, marker=None, limit=10000, virtual=True, domain=None, keys=[], shared=False, until=None, size_range=None):
+        """Return a list of object metadata dicts existing under a container.
         
-        Raises:
-            NotAllowedError: Operation not permitted
-            
-            NameError: Container does not exist
+        Same parameters with list_objects. Returned dicts have no user-defined
+        metadata and, if until is not None, a None 'modified' timestamp.
         """
         return []
     
index ec4d37b..d8d2b6b 100644 (file)
@@ -784,7 +784,8 @@ class Node(DBWorker):
     
     def latest_version_list(self, parent, prefix='', delimiter=None,
                             start='', limit=10000, before=inf,
-                            except_cluster=0, pathq=[], domain=None, filterq=[], sizeq=None):
+                            except_cluster=0, pathq=[], domain=None,
+                            filterq=[], sizeq=None, all_props=False):
         """Return a (list of (path, serial) tuples, list of common prefixes)
            for the current versions of the paths with the given parent,
            matching the following criteria.
@@ -831,6 +832,8 @@ class Node(DBWorker):
            will always match.
            
            Limit applies to the first list of tuples returned.
+           
+           If all_props is True, return all properties after path, not just serial.
         """
         
         if not start or start < prefix:
@@ -839,7 +842,14 @@ class Node(DBWorker):
         
         v = self.versions.alias('v')
         n = self.nodes.alias('n')
-        s = select([n.c.path, v.c.serial]).distinct()
+        if not all_props:
+            s = select([n.c.path, v.c.serial]).distinct()
+        else:
+            s = select([n.c.path,
+                        v.c.serial, v.c.node, v.c.hash,
+                        v.c.size, v.c.type, v.c.source,
+                        v.c.mtime, v.c.muser, v.c.uuid,
+                        v.c.checksum, v.c.cluster]).distinct()
         filtered = select([func.max(self.versions.c.serial)])
         if before != inf:
             filtered = filtered.where(self.versions.c.mtime < before)
@@ -910,7 +920,8 @@ class Node(DBWorker):
             props = rp.fetchone()
             if props is None:
                 break
-            path, serial = props
+            path = props[0]
+            serial = props[1]
             idx = path.find(delimiter, pfz)
             
             if idx < 0:
index 5a66112..e45a967 100644 (file)
@@ -714,7 +714,8 @@ class Node(DBWorker):
     
     def latest_version_list(self, parent, prefix='', delimiter=None,
                             start='', limit=10000, before=inf,
-                            except_cluster=0, pathq=[], domain=None, filterq=[], sizeq=None):
+                            except_cluster=0, pathq=[], domain=None,
+                            filterq=[], sizeq=None, all_props=False):
         """Return a (list of (path, serial) tuples, list of common prefixes)
            for the current versions of the paths with the given parent,
            matching the following criteria.
@@ -761,6 +762,8 @@ class Node(DBWorker):
            will always match.
            
            Limit applies to the first list of tuples returned.
+           
+           If all_props is True, return all properties after path, not just serial.
         """
         
         execute = self.execute
@@ -769,7 +772,7 @@ class Node(DBWorker):
             start = strprevling(prefix)
         nextling = strnextling(prefix)
         
-        q = ("select distinct n.path, v.serial "
+        q = ("select distinct n.path, %s "
              "from versions v, nodes n "
              "where v.serial = (select max(serial) "
                                "from versions "
@@ -780,6 +783,10 @@ class Node(DBWorker):
                             "where parent = ?) "
              "and n.node = v.node "
              "and n.path > ? and n.path < ?")
+        if not all_props:
+            q = q % "v.serial"
+        else:
+            q = q % "v.serial, v.node, v.hash, v.size, v.type, v.source, v.mtime, v.muser, v.uuid, v.checksum, v.cluster"
         args = [before, except_cluster, parent, start, nextling]
         
         subq, subargs = self._construct_paths(pathq)
@@ -819,7 +826,8 @@ class Node(DBWorker):
             props = fetchone()
             if props is None:
                 break
-            path, serial = props
+            path = props[0]
+            serial = props[1]
             idx = path.find(delimiter, pfz)
             
             if idx < 0:
index c8cd2d2..5961ad0 100644 (file)
@@ -296,7 +296,24 @@ class ModularBackend(BaseBackend):
             start, limit = self._list_limits(allowed, marker, limit)
             return allowed[start:start + limit]
         node = self.node.node_lookup(account)
-        return [x[0] for x in self._list_objects(node, account, '', '/', marker, limit, False, None, [], until)]
+        return [x[0] for x in self._list_object_properties(node, account, '', '/', marker, limit, False, None, [], until)]
+    
+    @backend_method
+    def list_container_meta(self, user, account, container, domain, until=None):
+        """Return a list with all the container's object meta keys for the domain."""
+        
+        logger.debug("list_container_meta: %s %s %s %s", account, container, domain, until)
+        allowed = []
+        if user != account:
+            if until:
+                raise NotAllowedError
+            allowed = self.permissions.access_list_paths(user, '/'.join((account, container)))
+            if not allowed:
+                raise NotAllowedError
+        path, node = self._lookup_container(account, container)
+        before = until if until is not None else inf
+        allowed = self._get_formatted_paths(allowed)
+        return self.node.latest_attribute_keys(node, domain, before, CLUSTER_DELETED, allowed)
     
     @backend_method
     def get_container_meta(self, user, account, container, domain, until=None):
@@ -405,11 +422,7 @@ class ModularBackend(BaseBackend):
         self.node.node_remove(node)
         self._report_size_change(user, account, -size, {'action': 'container delete'})
     
-    @backend_method
-    def list_objects(self, user, account, container, prefix='', delimiter=None, marker=None, limit=10000, virtual=True, domain=None, keys=[], shared=False, until=None, size_range=None):
-        """Return a list of objects existing under a container."""
-        
-        logger.debug("list_objects: %s %s %s %s %s %s %s %s %s %s %s", account, container, prefix, delimiter, marker, limit, virtual, domain, keys, shared, until)
+    def _list_objects(self, user, account, container, prefix, delimiter, marker, limit, virtual, domain, keys, shared, until, size_range, all_props):
         allowed = []
         if user != account:
             if until:
@@ -424,24 +437,37 @@ class ModularBackend(BaseBackend):
                     return []
         path, node = self._lookup_container(account, container)
         allowed = self._get_formatted_paths(allowed)
-        return self._list_objects(node, path, prefix, delimiter, marker, limit, virtual, domain, keys, until, size_range, allowed)
+        return self._list_object_properties(node, path, prefix, delimiter, marker, limit, virtual, domain, keys, until, size_range, allowed, all_props)
     
     @backend_method
-    def list_object_meta(self, user, account, container, domain, until=None):
-        """Return a list with all the container's object meta keys for the domain."""
+    def list_objects(self, user, account, container, prefix='', delimiter=None, marker=None, limit=10000, virtual=True, domain=None, keys=[], shared=False, until=None, size_range=None):
+        """Return a list of object (name, version_id) tuples existing under a container."""
         
-        logger.debug("list_object_meta: %s %s %s %s", account, container, domain, until)
-        allowed = []
-        if user != account:
-            if until:
-                raise NotAllowedError
-            allowed = self.permissions.access_list_paths(user, '/'.join((account, container)))
-            if not allowed:
-                raise NotAllowedError
-        path, node = self._lookup_container(account, container)
-        before = until if until is not None else inf
-        allowed = self._get_formatted_paths(allowed)
-        return self.node.latest_attribute_keys(node, domain, before, CLUSTER_DELETED, allowed)
+        logger.debug("list_objects: %s %s %s %s %s %s %s %s %s %s %s %s", account, container, prefix, delimiter, marker, limit, virtual, domain, keys, shared, until, size_range)
+        return self._list_objects(user, account, container, prefix, delimiter, marker, limit, virtual, domain, keys, shared, until, size_range, False)
+    
+    @backend_method
+    def list_object_meta(self, user, account, container, prefix='', delimiter=None, marker=None, limit=10000, virtual=True, domain=None, keys=[], shared=False, until=None, size_range=None):
+        """Return a list of object metadata dicts existing under a container."""
+        
+        logger.debug("list_object_meta: %s %s %s %s %s %s %s %s %s %s %s %s", account, container, prefix, delimiter, marker, limit, virtual, domain, keys, shared, until, size_range)
+        props = self._list_objects(user, account, container, prefix, delimiter, marker, limit, virtual, domain, keys, shared, until, size_range, True)
+        objects = []
+        for p in props:
+            if len(p) == 2:
+                objects.append({'subdir': p[0]})
+            else:
+                objects.append({'name': p[0],
+                                'bytes': p[self.SIZE + 1],
+                                'type': p[self.TYPE + 1],
+                                'hash': p[self.HASH + 1],
+                                'version': p[self.SERIAL + 1],
+                                'version_timestamp': p[self.MTIME + 1],
+                                'modified': p[self.MTIME + 1] if until is None else None,
+                                'modified_by': p[self.MUSER + 1],
+                                'uuid': p[self.UUID + 1],
+                                'checksum': p[self.CHECKSUM + 1]})
+        return objects
     
     @backend_method
     def get_object_meta(self, user, account, container, name, domain, version=None):
@@ -466,7 +492,7 @@ class ModularBackend(BaseBackend):
         meta.update({'name': name,
                      'bytes': props[self.SIZE],
                      'type': props[self.TYPE],
-                     'hash':props[self.HASH],
+                     'hash': props[self.HASH],
                      'version': props[self.SERIAL],
                      'version_timestamp': props[self.MTIME],
                      'modified': modified,
@@ -894,7 +920,7 @@ class ModularBackend(BaseBackend):
             limit = 10000
         return start, limit
     
-    def _list_objects(self, parent, path, prefix='', delimiter=None, marker=None, limit=10000, virtual=True, domain=None, keys=[], until=None, size_range=None, allowed=[]):
+    def _list_object_properties(self, parent, path, prefix='', delimiter=None, marker=None, limit=10000, virtual=True, domain=None, keys=[], until=None, size_range=None, allowed=[], all_props=False):
         cont_prefix = path + '/'
         prefix = cont_prefix + prefix
         start = cont_prefix + marker if marker else None
@@ -902,10 +928,10 @@ class ModularBackend(BaseBackend):
         filterq = keys if domain else []
         sizeq = size_range
         
-        objects, prefixes = self.node.latest_version_list(parent, prefix, delimiter, start, limit, before, CLUSTER_DELETED, allowed, domain, filterq, sizeq)
+        objects, prefixes = self.node.latest_version_list(parent, prefix, delimiter, start, limit, before, CLUSTER_DELETED, allowed, domain, filterq, sizeq, all_props)
         objects.extend([(p, None) for p in prefixes] if virtual else [])
         objects.sort(key=lambda x: x[0])
-        objects = [(x[0][len(cont_prefix):], x[1]) for x in objects]
+        objects = [(x[0][len(cont_prefix):],) + x[1:] for x in objects]
         
         start, limit = self._list_limits([x[0] for x in objects], marker, limit)
         return objects[start:start + limit]