root / pithos / lib / hashfiler / noder.py @ 44e0514f
History | View | Annotate | Download (19.8 kB)
1 |
|
---|---|
2 |
from sqlite3 import connect, IntegrityError |
3 |
from time import time |
4 |
|
5 |
ROOTNODE = 0
|
6 |
|
7 |
( SERIAL, PARENT, PATH, |
8 |
SIZE, POPULATION, POPSIZE, |
9 |
SOURCE, MTIME, CLUSTER ) = range(9) |
10 |
|
11 |
inf = float('inf') |
12 |
|
13 |
def strnextling(prefix): |
14 |
"""return the first unicode string
|
15 |
greater than but not starting with given prefix.
|
16 |
strnextling('hello') -> 'hellp'
|
17 |
"""
|
18 |
if not prefix: |
19 |
## all strings start with the null string,
|
20 |
## therefore we have to approximate strnextling('')
|
21 |
## with the last unicode character supported by python
|
22 |
## 0x10ffff for wide (32-bit unicode) python builds
|
23 |
## 0x00ffff for narrow (16-bit unicode) python builds
|
24 |
## We will not autodetect. 0xffff is safe enough.
|
25 |
return unichr(0xffff) |
26 |
s = prefix[:-1]
|
27 |
c = ord(prefix[-1]) |
28 |
if c >= 0xffff: |
29 |
raise RuntimeError |
30 |
s += unichr(c+1) |
31 |
return s
|
32 |
|
33 |
def strprevling(prefix): |
34 |
"""return an approximation of the last unicode string
|
35 |
less than but not starting with given prefix.
|
36 |
strprevling(u'hello') -> u'helln\\xffff'
|
37 |
"""
|
38 |
if not prefix: |
39 |
## There is no prevling for the null string
|
40 |
return prefix
|
41 |
s = prefix[:-1]
|
42 |
c = ord(prefix[-1]) |
43 |
if c > 0: |
44 |
s += unichr(c-1) + unichr(0xffff) |
45 |
return s
|
46 |
|
47 |
import re |
48 |
_regexfilter = re.compile('(!?)\s*([\w-]+)\s*(=|!=|<=|>=|<|>)?\s*(.*)$', re.UNICODE)
|
49 |
|
50 |
_propnames = { |
51 |
'serial' : 0, |
52 |
'parent' : 1, |
53 |
'path' : 2, |
54 |
'size' : 3, |
55 |
'population': 4, |
56 |
'popsize' : 5, |
57 |
'source' : 6, |
58 |
'mtime' : 7, |
59 |
'cluster' : 8, |
60 |
} |
61 |
|
62 |
_mutables = ('source', 'mtime', 'cluster') |
63 |
_mutablepropnames = dict((k, _propnames[k]) for k in _mutables) |
64 |
|
65 |
class Noder(object): |
66 |
|
67 |
def __init__(self, **params): |
68 |
self.params = params
|
69 |
self.noder_init()
|
70 |
|
71 |
def noder_init(self): |
72 |
execute = self.execute
|
73 |
executemany = self.executemany
|
74 |
|
75 |
execute(""" create table if not exists nodes
|
76 |
( serial integer not null,
|
77 |
parent integer not null default 0,
|
78 |
path text not null default '',
|
79 |
size integer not null default 0,
|
80 |
population integer not null default 0,
|
81 |
popsize integer not null default 0,
|
82 |
source integer,
|
83 |
mtime integer,
|
84 |
cluster integer not null default 0,
|
85 |
primary key (serial)
|
86 |
foreign key (parent)
|
87 |
references nodes(serial)
|
88 |
on update cascade
|
89 |
on delete cascade ) """)
|
90 |
|
91 |
execute(""" create index if not exists
|
92 |
paths on nodes(cluster, parent, path) """)
|
93 |
|
94 |
execute(""" create unique index if not exists
|
95 |
mtimes on nodes(mtime) """)
|
96 |
|
97 |
execute(""" create table if not exists attributes
|
98 |
( serial integer,
|
99 |
key text,
|
100 |
value text,
|
101 |
primary key (serial, key, value)
|
102 |
foreign key (serial)
|
103 |
references nodes(serial)
|
104 |
on update cascade
|
105 |
on delete cascade ) """)
|
106 |
|
107 |
#execute(""" create index if not exists keys
|
108 |
# on attributes(key, value, serial) """)
|
109 |
|
110 |
execute(""" pragma foreign_keys = 1 """)
|
111 |
|
112 |
q = "insert or ignore into nodes(serial, parent) values (?, ?)"
|
113 |
vals = ((ROOTNODE, ROOTNODE),) |
114 |
executemany(q, vals) |
115 |
|
116 |
def node_create(self, parent, path, size, source, cluster=0): |
117 |
"""Create a new node from the given properties.
|
118 |
Return the (serial, mtime) of the new node.
|
119 |
"""
|
120 |
serial = self.alloc_serial()
|
121 |
mtime = time() |
122 |
props = (serial, parent, path, size, 0, 0, source, mtime, cluster) |
123 |
q = "insert into nodes values (?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
124 |
self.execute(q, props)
|
125 |
self.node_update_ancestors(parent, 1, size) |
126 |
return serial, mtime
|
127 |
|
128 |
def node_resize(self, serial, size): |
129 |
props = self.node_get_properties(serial)
|
130 |
if props is None: |
131 |
return 0 |
132 |
q = "update nodes set size = ? where serial = ?"
|
133 |
self.execute(q, (size, serial))
|
134 |
delta = size - props[3]
|
135 |
parent = props[1]
|
136 |
self.node_update_ancestors(parent, 0, delta) |
137 |
return 1 |
138 |
|
139 |
def node_remove(self, serial, recursive=0): |
140 |
"""Remove the node specified by serial.
|
141 |
Return false if the node was not found,
|
142 |
or true if found and removed.
|
143 |
"""
|
144 |
node_get_properties = self.node_get_properties
|
145 |
props = node_get_properties(serial) |
146 |
size = props[3]
|
147 |
pop = props[4]
|
148 |
popsize = props[5]
|
149 |
if pop and not recursive: |
150 |
return 0 |
151 |
|
152 |
q = "delete from nodes where serial = ?"
|
153 |
self.execute(q, (serial,))
|
154 |
self.node_update_ancestors(props[1], -pop-1, -size-popsize) |
155 |
return 1 |
156 |
|
157 |
def node_get_properties(self, serial, keys=(), propnames=_propnames): |
158 |
"""Return a sequence of values for the properties of
|
159 |
the node specified by serial and the keys, in the order given.
|
160 |
If keys is empty, return all properties in the order
|
161 |
(serial, parent, path, size, population, popsize,
|
162 |
source, mtime, cluster).
|
163 |
"""
|
164 |
q = "select * from nodes where serial = ?"
|
165 |
self.execute(q, (serial,))
|
166 |
r = self.fetchone()
|
167 |
if r is None: |
168 |
return r
|
169 |
|
170 |
if not keys: |
171 |
return r
|
172 |
|
173 |
return [r[propnames[k]] for k in keys if k in propnames] |
174 |
|
175 |
def node_set_properties(self, serial, items, propnames=_mutablepropnames): |
176 |
"""Set the properties of a node specified by the node serial and
|
177 |
the items iterable of (name, value) pairs.
|
178 |
Mutable properties are %s.
|
179 |
Invalid property names and 'serial' are not set.
|
180 |
""" % (_mutables,)
|
181 |
if not items: |
182 |
return
|
183 |
|
184 |
keys, vals = zip(*items)
|
185 |
keystr = ','.join(("%s = ?" % k) for k in keys if k in propnames) |
186 |
if not keystr: |
187 |
return
|
188 |
q = "update nodes set %s where serial = ?" % keystr
|
189 |
vals += (serial,) |
190 |
self.execute(q, vals)
|
191 |
|
192 |
def node_update_ancestors(self, serial, population, popsize): |
193 |
"""Update the population properties of the node specified by serial.
|
194 |
Population properties keep track the population and totalsize
|
195 |
of objects in the node's namespace.
|
196 |
May be zero or positive or negative numbers.
|
197 |
"""
|
198 |
qu = ("update nodes set population = population + ?,"
|
199 |
"popsize = popsize + ? "
|
200 |
"where serial = ?")
|
201 |
qp = "select parent from nodes where serial = ?"
|
202 |
execute = self.execute
|
203 |
fetchone = self.fetchone
|
204 |
while 1: |
205 |
execute(qu, (population, popsize, serial)) |
206 |
population = 0 # Population isn't recursive |
207 |
execute(qp, (serial,)) |
208 |
r = fetchone() |
209 |
if r is None: |
210 |
break
|
211 |
serial = r[0]
|
212 |
if serial < 3: |
213 |
break
|
214 |
|
215 |
def node_lookup(self, parent, path, before=inf, cluster=0): |
216 |
"""Lookup the current version of the given path
|
217 |
within the parent's namespace. Return a list with its properties:
|
218 |
(serial, parent, path, size, population, popsize,
|
219 |
source, mtime, cluster)
|
220 |
or None if the version is not found.
|
221 |
"""
|
222 |
q =("select max(serial), parent, path, size, "
|
223 |
"population, popsize, source, mtime, cluster "
|
224 |
"from nodes "
|
225 |
"where parent = ? and cluster = ? and path = ? and mtime < ?")
|
226 |
self.execute(q, (parent, cluster, path, before))
|
227 |
props = self.fetchone()
|
228 |
if props is not None and props[SERIAL] is not None: |
229 |
return props
|
230 |
|
231 |
return None |
232 |
|
233 |
def parse_filters(self, filterq): |
234 |
preterms = filterq.split(',')
|
235 |
included = [] |
236 |
excluded = [] |
237 |
opers = [] |
238 |
match = _regexfilter.match |
239 |
for term in preterms: |
240 |
m = match(term) |
241 |
if m is None: |
242 |
continue
|
243 |
neg, key, op, value = m.groups() |
244 |
if neg:
|
245 |
excluded.append(key) |
246 |
elif not value: |
247 |
included.append(key) |
248 |
elif op:
|
249 |
opers.append((key, op, value)) |
250 |
|
251 |
return included, excluded, opers
|
252 |
|
253 |
def construct_filters(self, filterq): |
254 |
subqlist = [] |
255 |
append = subqlist.append |
256 |
included, excluded, opers = self.parse_filters(filterq)
|
257 |
args = [] |
258 |
|
259 |
if included:
|
260 |
subq = "key in ("
|
261 |
subq += ','.join(('?' for x in included)) + ")" |
262 |
args += included |
263 |
append(subq) |
264 |
|
265 |
if excluded:
|
266 |
subq = "key not in ("
|
267 |
subq += ','.join(('?' for x in exluded)) + ")" |
268 |
args += excluded |
269 |
append(subq) |
270 |
|
271 |
if opers:
|
272 |
t = (("(key = %s and value %s %s)" % (k, o, v)) for k, o, v in opers) |
273 |
subq = "(" + ' or '.join(t) + ")" |
274 |
args += opers |
275 |
|
276 |
if not subqlist: |
277 |
return None, None |
278 |
|
279 |
subq = " and serial in (select serial from attributes where "
|
280 |
subq += ' and '.join(subqlist)
|
281 |
subq += ")"
|
282 |
|
283 |
return subq, args
|
284 |
|
285 |
def node_list(self, parent, prefix='', |
286 |
start='', delimiter=None, |
287 |
after=0.0, before=inf,
|
288 |
filterq=None, versions=0, |
289 |
cluster=0, limit=10000): |
290 |
"""Return (a list of property tuples, a list of common prefixes)
|
291 |
for the current versions of the paths with the given parent,
|
292 |
matching the following criteria.
|
293 |
|
294 |
The property tuple for a version is returned if all
|
295 |
of these conditions are true:
|
296 |
|
297 |
a. parent (and cluster) matches
|
298 |
|
299 |
b. path > start
|
300 |
|
301 |
c. path starts with prefix
|
302 |
|
303 |
d. i [versions=true] version is in (after, before)
|
304 |
ii [versions=false] version is the max in (after, before)
|
305 |
|
306 |
e. the path does not have the delimiter occuring
|
307 |
after the prefix.
|
308 |
|
309 |
f. serial matches the attribute filter query.
|
310 |
|
311 |
A filter query is a comma-separated list of
|
312 |
terms in one of these three forms:
|
313 |
|
314 |
key
|
315 |
an attribute with this key must exist
|
316 |
|
317 |
!key
|
318 |
an attribute with this key must not exist
|
319 |
|
320 |
key ?op value
|
321 |
the attribute with this key satisfies the value
|
322 |
where ?op is one of ==, != <=, >=, <, >.
|
323 |
|
324 |
matching up to the first delimiter after prefix,
|
325 |
and are reported only once, as "virtual directories".
|
326 |
The delimiter is included in the prefixes.
|
327 |
Prefixes do appear from (e) even if no paths would match in (f).
|
328 |
|
329 |
If arguments are None, then the corresponding matching rule
|
330 |
will always match.
|
331 |
"""
|
332 |
execute = self.execute
|
333 |
|
334 |
if start < prefix:
|
335 |
start = strprevling(prefix) |
336 |
|
337 |
nextling = strnextling(prefix) |
338 |
|
339 |
q = ("select * from nodes "
|
340 |
"where parent = ? and path > ? and path < ? "
|
341 |
"and mtime > ? and mtime < ? and cluster = ?")
|
342 |
args = [parent, start, nextling, after, before, cluster] |
343 |
|
344 |
if filterq:
|
345 |
subq, subargs = self.construct_filters(filterq)
|
346 |
if subq is not None: |
347 |
q += subq |
348 |
args += subargs |
349 |
q += " order by path"
|
350 |
|
351 |
if delimiter is None: |
352 |
q += " limit ?"
|
353 |
args.append(limit) |
354 |
execute(q, args) |
355 |
return self.fetchall(), () |
356 |
|
357 |
pfz = len(prefix)
|
358 |
dz = len(delimiter)
|
359 |
count = 0
|
360 |
fetchone = self.fetchone
|
361 |
prefixes = [] |
362 |
pappend = prefixes.append |
363 |
matches = [] |
364 |
mappend = matches.append |
365 |
|
366 |
execute(q, args) |
367 |
while 1: |
368 |
props = fetchone() |
369 |
if props is None or props[0] is None: |
370 |
break
|
371 |
path = props[2]
|
372 |
idx = path.find(delimiter, pfz) |
373 |
if idx < 0: |
374 |
mappend(props) |
375 |
count += 1
|
376 |
if count >= limit:
|
377 |
break
|
378 |
continue
|
379 |
|
380 |
pf = path[:idx + dz] |
381 |
pappend(pf) |
382 |
count += 1
|
383 |
## XXX: if we break here due to limit,
|
384 |
## but a path would also be matched below,
|
385 |
## the path match would be lost since the
|
386 |
## next call with start=path would skip both of them.
|
387 |
## In this case, it is impossible to obey the limit,
|
388 |
## therefore we will break later, at limit + 1.
|
389 |
if idx + dz == len(path): |
390 |
mappend(props) |
391 |
count += 1
|
392 |
|
393 |
if count >= limit:
|
394 |
break
|
395 |
|
396 |
args[1] = strnextling(pf) # new start |
397 |
execute(q, args) |
398 |
|
399 |
return matches, prefixes
|
400 |
|
401 |
def node_delete(self, parent, prefix, |
402 |
start='', delimiter=None, |
403 |
after=0.0, before=inf,
|
404 |
filterq=None, versions=0, |
405 |
cluster=0, limit=10000): |
406 |
"""Delete the matching version for each
|
407 |
of the matching paths in the parent's namespace.
|
408 |
Return empty if nothing is deleted, else return matches.
|
409 |
The paths matching are those that would
|
410 |
be returned by .node_list() with the same arguments.
|
411 |
Note that only paths are deleted, not prefixes.
|
412 |
|
413 |
"""
|
414 |
r = self.node_list(parent, prefix,
|
415 |
start=start, delimiter=delimiter, |
416 |
after=after, before=before, |
417 |
filterq=filterq, versions=versions, |
418 |
cluster=cluster, limit=limit) |
419 |
matches, prefixes = r |
420 |
if not matches: |
421 |
return ()
|
422 |
|
423 |
q = "delete from nodes where serial = ?"
|
424 |
self.executemany(q, ((props[0],) for props in matches)) |
425 |
return matches
|
426 |
|
427 |
def node_purge(self, parent, path, after=0, before=inf, cluster=0): |
428 |
"""Delete all nodes with the specified
|
429 |
parent, cluster and path, and return
|
430 |
the serials of nodes deleted.
|
431 |
"""
|
432 |
execute = self.execute
|
433 |
q = ("select count(serial), total(size), "
|
434 |
"total(population), total(popsize) "
|
435 |
"from nodes "
|
436 |
"where parent = ? and cluster = ? "
|
437 |
"and path = ? and mtime between ? and ?")
|
438 |
args = (parent, cluster, path, after, before) |
439 |
execute(q, args) |
440 |
nr, size, pop, popsize = self.fetchone()
|
441 |
if not nr: |
442 |
return ()
|
443 |
self.node_update_ancestors(parent, -pop-nr, -size-popsize)
|
444 |
q = ("select serial from nodes "
|
445 |
"where parent = ? and cluster = ? "
|
446 |
"and path = ? and mtime between ? and ?")
|
447 |
execute(q, args) |
448 |
serials = [r[0] for r in self.fetchall()] |
449 |
q = ("delete from nodes where "
|
450 |
"parent = ? and cluster = ? "
|
451 |
"and path = ? and mtime between ? and ?")
|
452 |
execute(q, args) |
453 |
return serials
|
454 |
|
455 |
def node_move(self, source, parent, path): |
456 |
"""Move the source node into another path,
|
457 |
possibly, in another parent's namespace.
|
458 |
The node is moved with its namespace.
|
459 |
"""
|
460 |
props = self.node_get_properties(source)
|
461 |
|
462 |
oldparent = props[PARENT] |
463 |
size = props[SIZE] |
464 |
population = props[POPULATION] |
465 |
|
466 |
sizedelta = size + popsize |
467 |
popdelta = population + 1
|
468 |
node_update_ancestors = self.node_update_ancestors
|
469 |
node_update_ancestors(oldparent, -popdelta, -sizedelta) |
470 |
node_update_ancestors(parent, popdelta, sizedelta) |
471 |
|
472 |
q = "update nodes set parent = ?, path = ? where serial = ?"
|
473 |
self.execute(q, (parent, path, source))
|
474 |
|
475 |
def node_copy(self, source, parent=None, path=None): |
476 |
"""Copy the node specified by serial into
|
477 |
a new node under parent and path.
|
478 |
Return the serial of the new node.
|
479 |
The namespace of the node is not copied --
|
480 |
only the node itself is.
|
481 |
"""
|
482 |
newserial = self.alloc_serial()
|
483 |
props = self.node_get_properties(source)
|
484 |
if parent is None: |
485 |
parent = props[1]
|
486 |
if path is None: |
487 |
path = props[2]
|
488 |
size = props[3]
|
489 |
cluster = props[8]
|
490 |
return self.node_create(parent, path, size, source, cluster=cluster) |
491 |
|
492 |
def node_increment(self, serial, size=None): |
493 |
mtime = time() |
494 |
if size is not None: |
495 |
q = "update nodes set mtime = ?, size = ? where serial = ?"
|
496 |
args = (mtime, size, serial) |
497 |
else:
|
498 |
q = "update nodes set mtime = ? where serial = ?"
|
499 |
args = (mtime, serial) |
500 |
self.execute(q, args)
|
501 |
return mtime
|
502 |
|
503 |
def node_attr_get(self, serial, keys=()): |
504 |
"""Return a list of (key, value) pairs of the node specified by serial.
|
505 |
If keys is empty, return all attributes.
|
506 |
Othwerise, return only those specified.
|
507 |
"""
|
508 |
execute = self.execute
|
509 |
if keys:
|
510 |
marks = ','.join('?' for k in keys) |
511 |
q = ("select key, value from attributes "
|
512 |
"where key in (%s) and serial = ?" % (marks,))
|
513 |
execute(q, keys + (serial,)) |
514 |
else:
|
515 |
q = "select key, value from attributes where serial = ?"
|
516 |
execute(q, (serial,)) |
517 |
|
518 |
return self.fetchall() |
519 |
|
520 |
def node_attr_set(self, serial, items): |
521 |
"""Set the attributes of the node specified by serial.
|
522 |
Receive attributes as an iterable of (key, value) pairs.
|
523 |
"""
|
524 |
q = ("insert or replace into attributes (serial, key, value) "
|
525 |
"values (?, ?, ?)")
|
526 |
self.executemany(q, ((serial, k, v) for k, v in items)) |
527 |
|
528 |
def node_attr_del(self, serial, keys=()): |
529 |
"""Delete attributes of the node specified by serial.
|
530 |
If keys is empty, delete all attributes.
|
531 |
Otherwise delete those specified.
|
532 |
"""
|
533 |
if keys:
|
534 |
q = "delete from attributes where serial = ? and key = ?"
|
535 |
self.executemany(q, ((serial, key) for key in keys)) |
536 |
else:
|
537 |
q = "delete from attributes where serial = ?"
|
538 |
self.execute(q, (serial,))
|
539 |
|
540 |
def node_attr_create_key(self, serial, key): |
541 |
"""Create and return a key with the given name
|
542 |
in the namespace of the parent node.
|
543 |
"""
|
544 |
key = self.alloc_serial()
|
545 |
q = "insert or ignore into keys values (?, ?)"
|
546 |
self.execute(q, (serial, key))
|
547 |
return key
|
548 |
|
549 |
def node_attr_list_keys(self, parent): |
550 |
"""Return a list with all keys pairs defined
|
551 |
for the namespace of the node specified.
|
552 |
"""
|
553 |
#q = "select key from keys where serial = ?"
|
554 |
q = ("select distinct key from attributes a, nodes n "
|
555 |
"where a.serial = n.serial and n.parent = ?")
|
556 |
self.execute(q, (parent,))
|
557 |
return [r[0] for r in self.fetchall()] |
558 |
|
559 |
def node_attr_purge_key(self, parent, key, cluster=0): |
560 |
"""Delete the key given and
|
561 |
purge all attributes set with this key from all objects.
|
562 |
"""
|
563 |
#q = "delete from keys where key = ?"
|
564 |
q = ("delete from attributes where key = ? and serial in "
|
565 |
"(select serial from nodes where parent = ? and cluster = ?)")
|
566 |
self.execute(q, (key, parent, cluster))
|
567 |
|
568 |
def node_attr_copy(self, source, dest): |
569 |
q = ("insert or replace into attributes "
|
570 |
"select ?, key, value from attributes "
|
571 |
"where serial = ?")
|
572 |
self.execute(q, (dest, source))
|
573 |
|