1 # Copyright 2011 GRNET S.A. All rights reserved.
3 # Redistribution and use in source and binary forms, with or
4 # without modification, are permitted provided that the following
7 # 1. Redistributions of source code must retain the above
8 # copyright notice, this list of conditions and the following
11 # 2. Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 # POSSIBILITY OF SUCH DAMAGE.
29 # The views and conclusions contained in the software and
30 # documentation are those of the authors and should not be
31 # interpreted as representing official policies, either expressed
32 # or implied, of GRNET S.A.
35 from sqlalchemy import Table, Integer, BigInteger, DECIMAL, Column, String, MetaData, ForeignKey
36 from sqlalchemy.types import Text
37 from sqlalchemy.schema import Index, Sequence
38 from sqlalchemy.sql import func, and_, or_, null, select, bindparam, text
39 from sqlalchemy.ext.compiler import compiles
40 from sqlalchemy.engine.reflection import Inspector
42 from dbworker import DBWorker
46 ( SERIAL, NODE, HASH, SIZE, SOURCE, MTIME, MUSER, CLUSTER ) = range(8)
51 def strnextling(prefix):
52 """Return the first unicode string
53 greater than but not starting with given prefix.
54 strnextling('hello') -> 'hellp'
57 ## all strings start with the null string,
58 ## therefore we have to approximate strnextling('')
59 ## with the last unicode character supported by python
60 ## 0x10ffff for wide (32-bit unicode) python builds
61 ## 0x00ffff for narrow (16-bit unicode) python builds
62 ## We will not autodetect. 0xffff is safe enough.
71 def strprevling(prefix):
72 """Return an approximation of the last unicode string
73 less than but not starting with given prefix.
74 strprevling(u'hello') -> u'helln\\xffff'
77 ## There is no prevling for the null string
82 #s += unichr(c-1) + unichr(0xffff)
100 """Nodes store path organization and have multiple versions.
101 Versions store object history and have multiple attributes.
102 Attributes store metadata.
105 # TODO: Provide an interface for included and excluded clusters.
107 def __init__(self, **params):
108 DBWorker.__init__(self, **params)
109 metadata = MetaData()
113 columns.append(Column('node', Integer, primary_key=True))
114 columns.append(Column('parent', Integer,
115 ForeignKey('nodes.node',
118 autoincrement=False))
120 path_length_in_bytes = path_length * 4
121 columns.append(Column('path', Text(path_length_in_bytes), default='', nullable=False))
122 self.nodes = Table('nodes', metadata, *columns, mysql_engine='InnoDB')
123 # place an index on path
124 #Index('idx_nodes_path', self.nodes.c.path)
128 columns.append(Column('node', Integer,
129 ForeignKey('nodes.node',
133 columns.append(Column('key', String(255), primary_key=True))
134 columns.append(Column('value', String(255)))
135 self.policies = Table('policy', metadata, *columns, mysql_engine='InnoDB')
137 #create statistics table
139 columns.append(Column('node', Integer,
140 ForeignKey('nodes.node',
144 columns.append(Column('population', Integer, nullable=False, default=0))
145 columns.append(Column('size', BigInteger, nullable=False, default=0))
146 columns.append(Column('mtime', DECIMAL(precision=16, scale=6)))
147 columns.append(Column('cluster', Integer, nullable=False, default=0,
148 primary_key=True, autoincrement=False))
149 self.statistics = Table('statistics', metadata, *columns, mysql_engine='InnoDB')
151 #create versions table
153 columns.append(Column('serial', Integer, primary_key=True))
154 columns.append(Column('node', Integer,
155 ForeignKey('nodes.node',
157 onupdate='CASCADE')))
158 columns.append(Column('hash', String(255)))
159 columns.append(Column('size', BigInteger, nullable=False, default=0))
160 columns.append(Column('source', Integer))
161 columns.append(Column('mtime', DECIMAL(precision=16, scale=6)))
162 columns.append(Column('muser', String(255), nullable=False, default=''))
163 columns.append(Column('cluster', Integer, nullable=False, default=0))
164 self.versions = Table('versions', metadata, *columns, mysql_engine='InnoDB')
165 Index('idx_versions_node_mtime', self.versions.c.node,
166 self.versions.c.mtime)
168 #create attributes table
170 columns.append(Column('serial', Integer,
171 ForeignKey('versions.serial',
175 columns.append(Column('key', String(255), primary_key=True))
176 columns.append(Column('value', String(255)))
177 self.attributes = Table('attributes', metadata, *columns, mysql_engine='InnoDB')
179 metadata.create_all(self.engine)
181 # the following code creates an index of specific length
182 # this can be accompliced in sqlalchemy >= 0.7.3
183 # providing mysql_length option during index creation
184 insp = Inspector.from_engine(self.engine)
185 indexes = [elem['name'] for elem in insp.get_indexes('nodes')]
186 if 'idx_nodes_path' not in indexes:
187 explicit_length = '(%s)' %path_length_in_bytes if self.engine.name == 'mysql' else ''
188 s = text('CREATE INDEX idx_nodes_path ON nodes (path%s)' %explicit_length)
189 self.conn.execute(s).close()
191 s = self.nodes.select().where(and_(self.nodes.c.node == ROOTNODE,
192 self.nodes.c.parent == ROOTNODE))
193 rp = self.conn.execute(s)
197 s = self.nodes.insert().values(node=ROOTNODE, parent=ROOTNODE)
200 def node_create(self, parent, path):
201 """Create a new node from the given properties.
202 Return the node identifier of the new node.
204 #TODO catch IntegrityError?
205 s = self.nodes.insert().values(parent=parent, path=path)
206 r = self.conn.execute(s)
207 inserted_primary_key = r.inserted_primary_key[0]
209 return inserted_primary_key
211 def node_lookup(self, path):
212 """Lookup the current node of the given path.
213 Return None if the path is not found.
216 s = select([self.nodes.c.node], self.nodes.c.path.like(path))
217 r = self.conn.execute(s)
224 def node_get_properties(self, node):
225 """Return the node's (parent, path).
226 Return None if the node is not found.
229 s = select([self.nodes.c.parent, self.nodes.c.path])
230 s = s.where(self.nodes.c.node == node)
231 r = self.conn.execute(s)
236 def node_get_versions(self, node, keys=(), propnames=_propnames):
237 """Return the properties of all versions at node.
238 If keys is empty, return all properties in the order
239 (serial, node, size, source, mtime, muser, cluster).
242 s = select([self.versions.c.serial,
243 self.versions.c.node,
244 self.versions.c.hash,
245 self.versions.c.size,
246 self.versions.c.source,
247 self.versions.c.mtime,
248 self.versions.c.muser,
249 self.versions.c.cluster], self.versions.c.node == node)
250 s = s.order_by(self.versions.c.serial)
251 r = self.conn.execute(s)
260 return [[p[propnames[k]] for k in keys if k in propnames] for p in rows]
262 def node_count_children(self, node):
263 """Return node's child count."""
265 s = select([func.count(self.nodes.c.node)])
266 s = s.where(and_(self.nodes.c.parent == node,
267 self.nodes.c.node != ROOTNODE))
268 r = self.conn.execute(s)
273 def node_purge_children(self, parent, before=inf, cluster=0):
274 """Delete all versions with the specified
275 parent and cluster, and return
276 the serials of versions deleted.
277 Clears out nodes with no remaining versions.
280 c1 = select([self.nodes.c.node],
281 self.nodes.c.parent == parent)
282 where_clause = and_(self.versions.c.node.in_(c1),
283 self.versions.c.cluster == cluster)
284 s = select([func.count(self.versions.c.serial),
285 func.sum(self.versions.c.size)])
286 s = s.where(where_clause)
288 s = s.where(self.versions.c.mtime <= before)
289 r = self.conn.execute(s)
294 nr, size = row[0], -row[1] if row[1] else 0
296 self.statistics_update(parent, -nr, size, mtime, cluster)
297 self.statistics_update_ancestors(parent, -nr, size, mtime, cluster)
299 s = select([self.versions.c.serial])
300 s = s.where(where_clause)
301 r = self.conn.execute(s)
302 serials = [row[SERIAL] for row in r.fetchall()]
306 s = self.versions.delete().where(where_clause)
307 r = self.conn.execute(s)
311 s = select([self.nodes.c.node],
312 and_(self.nodes.c.parent == parent,
313 select([func.count(self.versions.c.serial)],
314 self.versions.c.node == self.nodes.c.node).as_scalar() == 0))
315 rp = self.conn.execute(s)
316 nodes = [r[0] for r in rp.fetchall()]
318 s = self.nodes.delete().where(self.nodes.c.node.in_(nodes))
319 self.conn.execute(s).close()
323 def node_purge(self, node, before=inf, cluster=0):
324 """Delete all versions with the specified
325 node and cluster, and return
326 the serials of versions deleted.
327 Clears out the node if it has no remaining versions.
331 s = select([func.count(self.versions.c.serial),
332 func.sum(self.versions.c.size)])
333 where_clause = and_(self.versions.c.node == node,
334 self.versions.c.cluster == cluster)
335 s = s.where(where_clause)
337 s = s.where(self.versions.c.mtime <= before)
338 r = self.conn.execute(s)
340 nr, size = row[0], row[1]
345 self.statistics_update_ancestors(node, -nr, -size, mtime, cluster)
347 s = select([self.versions.c.serial])
348 s = s.where(where_clause)
349 r = self.conn.execute(s)
350 serials = [r[SERIAL] for r in r.fetchall()]
354 s = self.versions.delete().where(where_clause)
355 r = self.conn.execute(s)
359 s = select([self.nodes.c.node],
360 and_(self.nodes.c.node == node,
361 select([func.count(self.versions.c.serial)],
362 self.versions.c.node == self.nodes.c.node).as_scalar() == 0))
363 r = self.conn.execute(s)
366 s = self.nodes.delete().where(self.nodes.c.node.in_(nodes))
367 self.conn.execute(s).close()
371 def node_remove(self, node):
372 """Remove the node specified.
373 Return false if the node has children or is not found.
376 if self.node_count_children(node):
380 s = select([func.count(self.versions.c.serial),
381 func.sum(self.versions.c.size),
382 self.versions.c.cluster])
383 s = s.where(self.versions.c.node == node)
384 s = s.group_by(self.versions.c.cluster)
385 r = self.conn.execute(s)
386 for population, size, cluster in r.fetchall():
387 self.statistics_update_ancestors(node, -population, -size, mtime, cluster)
390 s = self.nodes.delete().where(self.nodes.c.node == node)
391 self.conn.execute(s).close()
394 def policy_get(self, node):
395 s = select([self.policies.c.key, self.policies.c.value],
396 self.policies.c.node==node)
397 r = self.conn.execute(s)
398 d = dict(r.fetchall())
402 def policy_set(self, node, policy):
404 for k, v in policy.iteritems():
405 s = self.policies.update().where(and_(self.policies.c.node == node,
406 self.policies.c.key == k))
407 s = s.values(value = v)
408 rp = self.conn.execute(s)
411 s = self.policies.insert()
412 values = {'node':node, 'key':k, 'value':v}
413 r = self.conn.execute(s, values)
416 def statistics_get(self, node, cluster=0):
417 """Return population, total size and last mtime
418 for all versions under node that belong to the cluster.
421 s = select([self.statistics.c.population,
422 self.statistics.c.size,
423 self.statistics.c.mtime])
424 s = s.where(and_(self.statistics.c.node == node,
425 self.statistics.c.cluster == cluster))
426 r = self.conn.execute(s)
431 def statistics_update(self, node, population, size, mtime, cluster=0):
432 """Update the statistics of the given node.
433 Statistics keep track the population, total
434 size of objects and mtime in the node's namespace.
435 May be zero or positive or negative numbers.
437 s = select([self.statistics.c.population, self.statistics.c.size],
438 and_(self.statistics.c.node == node,
439 self.statistics.c.cluster == cluster))
440 rp = self.conn.execute(s)
444 prepopulation, presize = (0, 0)
446 prepopulation, presize = r
447 population += prepopulation
452 u = self.statistics.update().where(and_(self.statistics.c.node==node,
453 self.statistics.c.cluster==cluster))
454 u = u.values(population=population, size=size, mtime=mtime)
455 rp = self.conn.execute(u)
458 ins = self.statistics.insert()
459 ins = ins.values(node=node, population=population, size=size,
460 mtime=mtime, cluster=cluster)
461 self.conn.execute(ins).close()
463 def statistics_update_ancestors(self, node, population, size, mtime, cluster=0):
464 """Update the statistics of the given node's parent.
465 Then recursively update all parents up to the root.
466 Population is not recursive.
472 props = self.node_get_properties(node)
476 self.statistics_update(parent, population, size, mtime, cluster)
478 population = 0 # Population isn't recursive
480 def statistics_latest(self, node, before=inf, except_cluster=0):
481 """Return population, total size and last mtime
482 for all latest versions under node that
483 do not belong to the cluster.
487 props = self.node_get_properties(node)
492 # The latest version.
493 s = select([self.versions.c.serial,
494 self.versions.c.node,
495 self.versions.c.hash,
496 self.versions.c.size,
497 self.versions.c.source,
498 self.versions.c.mtime,
499 self.versions.c.muser,
500 self.versions.c.cluster])
501 filtered = select([func.max(self.versions.c.serial)],
502 self.versions.c.node == node)
504 filtered = filtered.where(self.versions.c.mtime < before)
505 s = s.where(and_(self.versions.c.cluster != except_cluster,
506 self.versions.c.serial == filtered))
507 r = self.conn.execute(s)
514 # First level, just under node (get population).
515 v = self.versions.alias('v')
516 s = select([func.count(v.c.serial),
518 func.max(v.c.mtime)])
519 c1 = select([func.max(self.versions.c.serial)])
521 c1 = c1.where(self.versions.c.mtime < before)
522 c2 = select([self.nodes.c.node], self.nodes.c.parent == node)
523 s = s.where(and_(v.c.serial == c1.where(self.versions.c.node == v.c.node),
524 v.c.cluster != except_cluster,
526 rp = self.conn.execute(s)
532 mtime = max(mtime, r[2])
536 # All children (get size and mtime).
537 # XXX: This is why the full path is stored.
538 s = select([func.count(v.c.serial),
540 func.max(v.c.mtime)])
541 c1 = select([func.max(self.versions.c.serial)],
542 self.versions.c.node == v.c.node)
544 c1 = c1.where(self.versions.c.mtime < before)
545 c2 = select([self.nodes.c.node], self.nodes.c.path.like(path + '%'))
546 s = s.where(and_(v.c.serial == c1,
547 v.c.cluster != except_cluster,
549 rp = self.conn.execute(s)
554 size = r[1] - props[SIZE]
555 mtime = max(mtime, r[2])
556 return (count, size, mtime)
558 def version_create(self, node, hash, size, source, muser, cluster=0):
559 """Create a new version from the given properties.
560 Return the (serial, mtime) of the new version.
564 s = self.versions.insert().values(node=node, hash=hash, size=size, source=source,
565 mtime=mtime, muser=muser, cluster=cluster)
566 serial = self.conn.execute(s).inserted_primary_key[0]
567 self.statistics_update_ancestors(node, 1, size, mtime, cluster)
570 def version_lookup(self, node, before=inf, cluster=0):
571 """Lookup the current version of the given node.
572 Return a list with its properties:
573 (serial, node, hash, size, source, mtime, muser, cluster)
574 or None if the current version is not found in the given cluster.
577 v = self.versions.alias('v')
578 s = select([v.c.serial, v.c.node, v.c.hash, v.c.size,
579 v.c.source, v.c.mtime, v.c.muser, v.c.cluster])
580 c = select([func.max(self.versions.c.serial)],
581 self.versions.c.node == node)
583 c = c.where(self.versions.c.mtime < before)
584 s = s.where(and_(v.c.serial == c,
585 v.c.cluster == cluster))
586 r = self.conn.execute(s)
593 def version_get_properties(self, serial, keys=(), propnames=_propnames):
594 """Return a sequence of values for the properties of
595 the version specified by serial and the keys, in the order given.
596 If keys is empty, return all properties in the order
597 (serial, node, hash, size, source, mtime, muser, cluster).
600 v = self.versions.alias()
601 s = select([v.c.serial, v.c.node, v.c.hash, v.c.size,
602 v.c.source, v.c.mtime, v.c.muser, v.c.cluster], v.c.serial == serial)
603 rp = self.conn.execute(s)
611 return [r[propnames[k]] for k in keys if k in propnames]
613 def version_recluster(self, serial, cluster):
614 """Move the version into another cluster."""
616 props = self.version_get_properties(serial)
621 oldcluster = props[CLUSTER]
622 if cluster == oldcluster:
626 self.statistics_update_ancestors(node, -1, -size, mtime, oldcluster)
627 self.statistics_update_ancestors(node, 1, size, mtime, cluster)
629 s = self.versions.update()
630 s = s.where(self.versions.c.serial == serial)
631 s = s.values(cluster = cluster)
632 self.conn.execute(s).close()
634 def version_remove(self, serial):
635 """Remove the serial specified."""
637 props = self.node_get_properties(serial)
642 cluster = props[CLUSTER]
645 self.statistics_update_ancestors(node, -1, -size, mtime, cluster)
647 s = self.versions.delete().where(self.versions.c.serial == serial)
648 self.conn.execute(s).close()
651 def attribute_get(self, serial, keys=()):
652 """Return a list of (key, value) pairs of the version specified by serial.
653 If keys is empty, return all attributes.
654 Othwerise, return only those specified.
658 attrs = self.attributes.alias()
659 s = select([attrs.c.key, attrs.c.value])
660 s = s.where(and_(attrs.c.key.in_(keys),
661 attrs.c.serial == serial))
663 attrs = self.attributes.alias()
664 s = select([attrs.c.key, attrs.c.value])
665 s = s.where(attrs.c.serial == serial)
666 r = self.conn.execute(s)
671 def attribute_set(self, serial, items):
672 """Set the attributes of the version specified by serial.
673 Receive attributes as an iterable of (key, value) pairs.
678 s = self.attributes.update()
679 s = s.where(and_(self.attributes.c.serial == serial,
680 self.attributes.c.key == k))
681 s = s.values(value = v)
682 rp = self.conn.execute(s)
685 s = self.attributes.insert()
686 s = s.values(serial=serial, key=k, value=v)
687 self.conn.execute(s).close()
689 def attribute_del(self, serial, keys=()):
690 """Delete attributes of the version specified by serial.
691 If keys is empty, delete all attributes.
692 Otherwise delete those specified.
696 #TODO more efficient way to do this?
698 s = self.attributes.delete()
699 s = s.where(and_(self.attributes.c.serial == serial,
700 self.attributes.c.key == key))
701 self.conn.execute(s).close()
703 s = self.attributes.delete()
704 s = s.where(self.attributes.c.serial == serial)
705 self.conn.execute(s).close()
707 def attribute_copy(self, source, dest):
708 s = select([dest, self.attributes.c.key, self.attributes.c.value],
709 self.attributes.c.serial == source)
710 rp = self.conn.execute(s)
711 attributes = rp.fetchall()
713 for dest, k, v in attributes:
715 s = self.attributes.update().where(and_(
716 self.attributes.c.serial == dest,
717 self.attributes.c.key == k))
718 rp = self.conn.execute(s, value=v)
721 s = self.attributes.insert()
722 values = {'serial':dest, 'key':k, 'value':v}
723 self.conn.execute(s, values).close()
725 def latest_attribute_keys(self, parent, before=inf, except_cluster=0, pathq=[]):
726 """Return a list with all keys pairs defined
727 for all latest versions under parent that
728 do not belong to the cluster.
731 # TODO: Use another table to store before=inf results.
732 a = self.attributes.alias('a')
733 v = self.versions.alias('v')
734 n = self.nodes.alias('n')
735 s = select([a.c.key]).distinct()
736 filtered = select([func.max(self.versions.c.serial)])
738 filtered = filtered.where(self.versions.c.mtime < before)
739 s = s.where(v.c.serial == filtered.where(self.versions.c.node == v.c.node))
740 s = s.where(v.c.cluster != except_cluster)
741 s = s.where(v.c.node.in_(select([self.nodes.c.node],
742 self.nodes.c.parent == parent)))
743 s = s.where(a.c.serial == v.c.serial)
744 s = s.where(n.c.node == v.c.node)
747 conj.append(n.c.path.like(x + '%'))
749 s = s.where(or_(*conj))
750 rp = self.conn.execute(s)
753 return [r[0] for r in rows]
755 def latest_version_list(self, parent, prefix='', delimiter=None,
756 start='', limit=10000, before=inf,
757 except_cluster=0, pathq=[], filterq=None):
758 """Return a (list of (path, serial) tuples, list of common prefixes)
759 for the current versions of the paths with the given parent,
760 matching the following criteria.
762 The property tuple for a version is returned if all
763 of these conditions are true:
769 c. path starts with prefix (and paths in pathq)
771 d. version is the max up to before
773 e. version is not in cluster
775 f. the path does not have the delimiter occuring
776 after the prefix, or ends with the delimiter
778 g. serial matches the attribute filter query.
780 A filter query is a comma-separated list of
781 terms in one of these three forms:
784 an attribute with this key must exist
787 an attribute with this key must not exist
790 the attribute with this key satisfies the value
791 where ?op is one of ==, != <=, >=, <, >.
793 The list of common prefixes includes the prefixes
794 matching up to the first delimiter after prefix,
795 and are reported only once, as "virtual directories".
796 The delimiter is included in the prefixes.
798 If arguments are None, then the corresponding matching rule
801 Limit applies to the first list of tuples returned.
804 if not start or start < prefix:
805 start = strprevling(prefix)
806 nextling = strnextling(prefix)
808 a = self.attributes.alias('a')
809 v = self.versions.alias('v')
810 n = self.nodes.alias('n')
811 s = select([n.c.path, v.c.serial]).distinct()
812 filtered = select([func.max(self.versions.c.serial)])
814 filtered = filtered.where(self.versions.c.mtime < before)
815 s = s.where(v.c.serial == filtered.where(self.versions.c.node == v.c.node))
816 s = s.where(v.c.cluster != except_cluster)
817 s = s.where(v.c.node.in_(select([self.nodes.c.node],
818 self.nodes.c.parent == parent)))
820 s = s.where(a.c.serial == v.c.serial)
822 s = s.where(n.c.node == v.c.node)
823 s = s.where(and_(n.c.path > bindparam('start'), n.c.path < nextling))
826 conj.append(n.c.path.like(x + '%'))
829 s = s.where(or_(*conj))
832 s = s.where(a.c.key.in_(filterq.split(',')))
834 s = s.order_by(n.c.path)
838 rp = self.conn.execute(s, start=start)
847 pappend = prefixes.append
849 mappend = matches.append
851 rp = self.conn.execute(s, start=start)
853 props = rp.fetchone()
857 idx = path.find(delimiter, pfz)
866 if idx + dz == len(path):
869 continue # Get one more, in case there is a path.
875 rp = self.conn.execute(s, start=strnextling(pf)) # New start.
878 return matches, prefixes