Statistics
| Branch: | Tag: | Revision:

root / pithos / lib / hashfiler / noder.py @ 44e0514f

History | View | Annotate | Download (19.8 kB)

1

    
2
from sqlite3 import connect, IntegrityError
3
from time import time
4

    
5
ROOTNODE  = 0
6

    
7
( SERIAL, PARENT, PATH,
8
  SIZE, POPULATION, POPSIZE,
9
  SOURCE, MTIME, CLUSTER ) = range(9)
10

    
11
inf = float('inf')
12

    
13
def strnextling(prefix):
14
    """return the first unicode string
15
       greater than but not starting with given prefix.
16
       strnextling('hello') -> 'hellp'
17
    """
18
    if not prefix:
19
        ## all strings start with the null string,
20
        ## therefore we have to approximate strnextling('')
21
        ## with the last unicode character supported by python
22
        ## 0x10ffff for wide (32-bit unicode) python builds
23
        ## 0x00ffff for narrow (16-bit unicode) python builds
24
        ## We will not autodetect. 0xffff is safe enough.
25
        return unichr(0xffff)
26
    s = prefix[:-1]
27
    c = ord(prefix[-1])
28
    if c >= 0xffff:
29
        raise RuntimeError
30
    s += unichr(c+1)
31
    return s
32

    
33
def strprevling(prefix):
34
    """return an approximation of the last unicode string
35
       less than but not starting with given prefix.
36
       strprevling(u'hello') -> u'helln\\xffff'
37
    """
38
    if not prefix:
39
        ## There is no prevling for the null string
40
        return prefix
41
    s = prefix[:-1]
42
    c = ord(prefix[-1])
43
    if c > 0:
44
        s += unichr(c-1) + unichr(0xffff)
45
    return s
46

    
47
import re
48
_regexfilter = re.compile('(!?)\s*([\w-]+)\s*(=|!=|<=|>=|<|>)?\s*(.*)$', re.UNICODE)
49

    
50
_propnames = {
51
    'serial'    : 0,
52
    'parent'    : 1,
53
    'path'      : 2,
54
    'size'      : 3,
55
    'population': 4,
56
    'popsize'   : 5,
57
    'source'    : 6,
58
    'mtime'     : 7,
59
    'cluster'   : 8,
60
}
61

    
62
_mutables = ('source', 'mtime', 'cluster')
63
_mutablepropnames = dict((k, _propnames[k]) for k in _mutables)
64

    
65
class Noder(object):
66

    
67
    def __init__(self, **params):
68
        self.params = params
69
        self.noder_init()
70

    
71
    def noder_init(self):
72
        execute = self.execute
73
        executemany = self.executemany
74

    
75
        execute(""" create table if not exists nodes
76
                          ( serial     integer not null,
77
                            parent     integer not null default 0,
78
                            path       text    not null default '',
79
                            size       integer not null default 0,
80
                            population integer not null default 0,
81
                            popsize    integer not null default 0,
82
                            source     integer,
83
                            mtime      integer,
84
                            cluster    integer not null default 0,
85
                            primary key (serial)
86
                            foreign key (parent)
87
                            references nodes(serial)
88
                            on update cascade
89
                            on delete cascade ) """)
90

    
91
        execute(""" create index if not exists
92
                    paths on nodes(cluster, parent, path) """)
93

    
94
        execute(""" create unique index if not exists
95
                    mtimes on nodes(mtime) """)
96

    
97
        execute(""" create table if not exists attributes
98
                          ( serial integer,
99
                            key    text,
100
                            value  text,
101
                            primary key (serial, key, value)
102
                            foreign key (serial)
103
                            references nodes(serial)
104
                            on update cascade
105
                            on delete cascade ) """)
106

    
107
        #execute(""" create index if not exists keys 
108
        #                    on attributes(key, value, serial) """)
109

    
110
        execute(""" pragma foreign_keys = 1 """)
111

    
112
        q = "insert or ignore into nodes(serial, parent) values (?, ?)"
113
        vals = ((ROOTNODE, ROOTNODE),)
114
        executemany(q, vals)
115

    
116
    def node_create(self, parent, path, size, source, cluster=0):
117
        """Create a new node from the given properties.
118
           Return the (serial, mtime) of the new node.
119
        """
120
        serial = self.alloc_serial()
121
        mtime = time()
122
        props = (serial, parent, path, size, 0, 0, source, mtime, cluster)
123
        q = "insert into nodes values (?, ?, ?, ?, ?, ?, ?, ?, ?)"
124
        self.execute(q, props)
125
        self.node_update_ancestors(parent, 1, size)
126
        return serial, mtime
127

    
128
    def node_resize(self, serial, size):
129
        props = self.node_get_properties(serial)
130
        if props is None:
131
            return 0
132
        q = "update nodes set size = ? where serial = ?"
133
        self.execute(q, (size, serial))
134
        delta = size - props[3]
135
        parent = props[1]
136
        self.node_update_ancestors(parent, 0, delta)
137
        return 1
138

    
139
    def node_remove(self, serial, recursive=0):
140
        """Remove the node specified by serial.
141
           Return false if the node was not found,
142
           or true if found and removed.
143
        """
144
        node_get_properties = self.node_get_properties
145
        props = node_get_properties(serial)
146
        size = props[3]
147
        pop = props[4]
148
        popsize = props[5]
149
        if pop and not recursive:
150
            return 0
151

    
152
        q = "delete from nodes where serial = ?"
153
        self.execute(q, (serial,))
154
        self.node_update_ancestors(props[1], -pop-1, -size-popsize)
155
        return 1
156

    
157
    def node_get_properties(self, serial, keys=(), propnames=_propnames):
158
        """Return a sequence of values for the properties of
159
           the node specified by serial and the keys, in the order given.
160
           If keys is empty, return all properties in the order
161
           (serial, parent, path, size, population, popsize,
162
            source, mtime, cluster).
163
        """
164
        q = "select * from nodes where serial = ?"
165
        self.execute(q, (serial,))
166
        r = self.fetchone()
167
        if r is None:
168
            return r
169

    
170
        if not keys:
171
            return r
172

    
173
        return [r[propnames[k]] for k in keys if k in propnames]
174

    
175
    def node_set_properties(self, serial, items, propnames=_mutablepropnames):
176
        """Set the properties of a node specified by the node serial and
177
           the items iterable of (name, value) pairs.
178
           Mutable properties are %s.
179
           Invalid property names and 'serial' are not set.
180
        """ % (_mutables,)
181
        if not items:
182
            return
183

    
184
        keys, vals = zip(*items)
185
        keystr = ','.join(("%s = ?" % k) for k in keys if k in propnames)
186
        if not keystr:
187
            return
188
        q = "update nodes set %s where serial = ?" % keystr
189
        vals += (serial,)
190
        self.execute(q, vals)
191

    
192
    def node_update_ancestors(self, serial, population, popsize):
193
        """Update the population properties of the node specified by serial.
194
           Population properties keep track the population and totalsize
195
           of objects in the node's namespace.
196
           May be zero or positive or negative numbers.
197
        """
198
        qu = ("update nodes set population = population + ?,"
199
                               "popsize    = popsize    + ? "
200
              "where serial = ?")
201
        qp = "select parent from nodes where serial = ?"
202
        execute = self.execute
203
        fetchone = self.fetchone
204
        while 1:
205
            execute(qu, (population, popsize, serial))
206
            population = 0 # Population isn't recursive
207
            execute(qp, (serial,))
208
            r = fetchone()
209
            if r is None:
210
                break
211
            serial = r[0]
212
            if serial < 3:
213
                break
214

    
215
    def node_lookup(self, parent, path, before=inf, cluster=0):
216
        """Lookup the current version of the given path
217
           within the parent's namespace. Return a list with its properties:
218
           (serial, parent, path, size, population, popsize,
219
            source, mtime, cluster)
220
           or None if the version is not found.
221
        """
222
        q =("select max(serial), parent, path, size, "
223
                   "population, popsize, source, mtime, cluster "
224
            "from nodes "
225
            "where parent = ? and cluster = ? and path = ? and mtime < ?")
226
        self.execute(q, (parent, cluster, path, before))
227
        props = self.fetchone()
228
        if props is not None and props[SERIAL] is not None:
229
            return props
230

    
231
        return None
232

    
233
    def parse_filters(self, filterq):
234
        preterms = filterq.split(',')
235
        included = []
236
        excluded = []
237
        opers = []
238
        match = _regexfilter.match
239
        for term in preterms:
240
            m = match(term)
241
            if m is None:
242
                continue
243
            neg, key, op, value = m.groups()
244
            if neg:
245
                excluded.append(key)
246
            elif not value:
247
                included.append(key)
248
            elif op:
249
                opers.append((key, op, value))
250
        
251
        return included, excluded, opers
252

    
253
    def construct_filters(self, filterq):
254
        subqlist = []
255
        append = subqlist.append
256
        included, excluded, opers = self.parse_filters(filterq)
257
        args = []
258

    
259
        if included:
260
            subq = "key in ("
261
            subq += ','.join(('?' for x in included)) + ")"
262
            args += included
263
            append(subq)
264

    
265
        if excluded:
266
            subq = "key not in ("
267
            subq += ','.join(('?' for x in exluded)) + ")"
268
            args += excluded
269
            append(subq)
270

    
271
        if opers:
272
            t = (("(key = %s and value %s %s)" % (k, o, v)) for k, o, v in opers)
273
            subq = "(" + ' or '.join(t) + ")"
274
            args += opers
275

    
276
        if not subqlist:
277
            return None, None
278

    
279
        subq = " and serial in (select serial from attributes where "
280
        subq += ' and '.join(subqlist)
281
        subq += ")"
282

    
283
        return subq, args
284

    
285
    def node_list(self, parent, prefix='',
286
                   start='', delimiter=None,
287
                   after=0.0, before=inf,
288
                   filterq=None, versions=0,
289
                   cluster=0, limit=10000):
290
        """Return (a list of property tuples, a list of common prefixes)
291
           for the current versions of the paths with the given parent,
292
           matching the following criteria.
293

294
           The property tuple for a version is returned if all
295
           of these conditions are true:
296

297
                a. parent (and cluster) matches
298

299
                b. path > start
300

301
                c. path starts with prefix
302

303
                d. i  [versions=true]  version is in (after, before)
304
                   ii [versions=false] version is the max in (after, before)
305

306
                e. the path does not have the delimiter occuring
307
                   after the prefix.
308

309
                f. serial matches the attribute filter query.
310

311
                   A filter query is a comma-separated list of
312
                   terms in one of these three forms:
313

314
                   key
315
                       an attribute with this key must exist
316

317
                   !key
318
                       an attribute with this key must not exist
319

320
                   key ?op value
321
                       the attribute with this key satisfies the value
322
                       where ?op is one of ==, != <=, >=, <, >.
323

324
           matching up to the first delimiter after prefix,
325
           and are reported only once, as "virtual directories".
326
           The delimiter is included in the prefixes.
327
           Prefixes do appear from (e) even if no paths would match in (f).
328

329
           If arguments are None, then the corresponding matching rule
330
           will always match.
331
        """
332
        execute = self.execute
333

    
334
        if start < prefix:
335
            start = strprevling(prefix)
336

    
337
        nextling = strnextling(prefix)
338

    
339
        q = ("select * from nodes "
340
             "where parent = ? and path > ? and path < ? "
341
             "and mtime > ? and mtime < ? and cluster = ?")
342
        args = [parent, start, nextling, after, before, cluster]
343

    
344
        if filterq:
345
            subq, subargs = self.construct_filters(filterq)
346
            if subq is not None:
347
                q += subq
348
                args += subargs
349
        q += " order by path"
350

    
351
        if delimiter is None:
352
            q += " limit ?"
353
            args.append(limit)
354
            execute(q, args)
355
            return self.fetchall(), ()
356

    
357
        pfz = len(prefix)
358
        dz = len(delimiter)
359
        count = 0
360
        fetchone = self.fetchone
361
        prefixes = []
362
        pappend = prefixes.append
363
        matches = []
364
        mappend = matches.append
365
        
366
        execute(q, args)
367
        while 1:
368
            props = fetchone()
369
            if props is None or props[0] is None:
370
                break
371
            path = props[2]
372
            idx = path.find(delimiter, pfz)
373
            if idx < 0:
374
                mappend(props)
375
                count += 1
376
                if count >= limit:
377
                    break
378
                continue
379

    
380
            pf = path[:idx + dz]
381
            pappend(pf)
382
            count += 1
383
            ## XXX: if we break here due to limit,
384
            ##      but a path would also be matched below,
385
            ##      the path match would be lost since the
386
            ##      next call with start=path would skip both of them.
387
            ##      In this case, it is impossible to obey the limit,
388
            ##      therefore we will break later, at limit + 1.
389
            if idx + dz == len(path):
390
                mappend(props)
391
                count += 1
392

    
393
            if count >= limit: 
394
                break
395

    
396
            args[1] = strnextling(pf) # new start
397
            execute(q, args)
398

    
399
        return matches, prefixes
400

    
401
    def node_delete(self, parent, prefix,
402
                    start='', delimiter=None,
403
                    after=0.0, before=inf,
404
                    filterq=None, versions=0,
405
                    cluster=0, limit=10000):
406
        """Delete the matching version for each
407
           of the matching paths in the parent's namespace.
408
           Return empty if nothing is deleted, else return matches.
409
           The paths matching are those that would
410
           be returned by .node_list() with the same arguments.
411
           Note that only paths are deleted, not prefixes.
412

413
        """
414
        r = self.node_list(parent, prefix,
415
                           start=start, delimiter=delimiter,
416
                           after=after, before=before,
417
                           filterq=filterq, versions=versions,
418
                           cluster=cluster, limit=limit)
419
        matches, prefixes = r
420
        if not matches:
421
            return ()
422

    
423
        q = "delete from nodes where serial = ?"
424
        self.executemany(q, ((props[0],) for props in matches))
425
        return matches
426

    
427
    def node_purge(self, parent, path, after=0, before=inf, cluster=0):
428
        """Delete all nodes with the specified
429
           parent, cluster and path, and return
430
           the serials of nodes deleted.
431
        """
432
        execute = self.execute
433
        q = ("select count(serial), total(size), "
434
                    "total(population), total(popsize) "
435
             "from nodes "
436
             "where parent = ? and cluster = ? "
437
             "and path = ? and mtime between ? and ?")
438
        args = (parent, cluster, path, after, before)
439
        execute(q, args)
440
        nr, size, pop, popsize = self.fetchone()
441
        if not nr:
442
            return ()
443
        self.node_update_ancestors(parent, -pop-nr, -size-popsize)
444
        q = ("select serial from nodes "
445
             "where parent = ? and cluster = ? "
446
             "and path = ? and mtime between ? and ?")
447
        execute(q, args)
448
        serials = [r[0] for r in self.fetchall()]
449
        q = ("delete from nodes where "
450
             "parent = ? and cluster = ? "
451
             "and path = ? and mtime between ? and ?")
452
        execute(q, args)
453
        return serials
454

    
455
    def node_move(self, source, parent, path):
456
        """Move the source node into another path,
457
           possibly, in another parent's namespace.
458
           The node is moved with its namespace.
459
        """
460
        props = self.node_get_properties(source)
461

    
462
        oldparent = props[PARENT]
463
        size = props[SIZE]
464
        population = props[POPULATION]
465

    
466
        sizedelta = size + popsize
467
        popdelta = population + 1
468
        node_update_ancestors = self.node_update_ancestors
469
        node_update_ancestors(oldparent, -popdelta, -sizedelta)
470
        node_update_ancestors(parent, popdelta, sizedelta)
471

    
472
        q = "update nodes set parent = ?, path = ? where serial = ?"
473
        self.execute(q, (parent, path, source))
474

    
475
    def node_copy(self, source, parent=None, path=None):
476
        """Copy the node specified by serial into
477
           a new node under parent and path.
478
           Return the serial of the new node.
479
           The namespace of the node is not copied --
480
           only the node itself is.
481
        """
482
        newserial = self.alloc_serial()
483
        props = self.node_get_properties(source)
484
        if parent is None:
485
            parent = props[1]
486
        if path is None:
487
            path = props[2]
488
        size = props[3]
489
        cluster = props[8]
490
        return self.node_create(parent, path, size, source, cluster=cluster)
491

    
492
    def node_increment(self, serial, size=None):
493
        mtime = time()
494
        if size is not None:
495
            q = "update nodes set mtime = ?, size = ? where serial = ?"
496
            args = (mtime, size, serial)
497
        else:
498
            q = "update nodes set mtime = ? where serial = ?"
499
            args = (mtime, serial)
500
        self.execute(q, args)
501
        return mtime
502

    
503
    def node_attr_get(self, serial, keys=()):
504
        """Return a list of (key, value) pairs of the node specified by serial.
505
           If keys is empty, return all attributes.
506
           Othwerise, return only those specified.
507
        """
508
        execute = self.execute
509
        if keys:
510
            marks = ','.join('?' for k in keys)
511
            q = ("select key, value from attributes "
512
                 "where key in (%s) and serial = ?" % (marks,))
513
            execute(q, keys + (serial,))
514
        else:
515
            q = "select key, value from attributes where serial = ?"
516
            execute(q, (serial,))
517

    
518
        return self.fetchall()
519

    
520
    def node_attr_set(self, serial, items):
521
        """Set the attributes of the node specified by serial.
522
           Receive attributes as an iterable of (key, value) pairs.
523
        """
524
        q = ("insert or replace into attributes (serial, key, value) "
525
             "values (?, ?, ?)")
526
        self.executemany(q, ((serial, k, v) for k, v in items))
527

    
528
    def node_attr_del(self, serial, keys=()):
529
        """Delete attributes of the node specified by serial.
530
           If keys is empty, delete all attributes.
531
           Otherwise delete those specified.
532
        """
533
        if keys:
534
            q = "delete from attributes where serial = ? and key = ?"
535
            self.executemany(q, ((serial, key) for key in keys))
536
        else:
537
            q = "delete from attributes where serial = ?"
538
            self.execute(q, (serial,))
539

    
540
    def node_attr_create_key(self, serial, key):
541
        """Create and return a key with the given name
542
           in the namespace of the parent node.
543
        """
544
        key = self.alloc_serial()
545
        q = "insert or ignore into keys values (?, ?)"
546
        self.execute(q, (serial, key))
547
        return key
548

    
549
    def node_attr_list_keys(self, parent):
550
        """Return a list with all keys pairs defined
551
           for the namespace of the node specified.
552
        """
553
        #q = "select key from keys where serial = ?"
554
        q = ("select distinct key from attributes a, nodes n "
555
             "where a.serial = n.serial and n.parent = ?")
556
        self.execute(q, (parent,))
557
        return [r[0] for r in self.fetchall()]
558

    
559
    def node_attr_purge_key(self, parent, key, cluster=0):
560
        """Delete the key given and
561
           purge all attributes set with this key from all objects.
562
        """
563
        #q = "delete from keys where key = ?"
564
        q = ("delete from attributes where key = ? and serial in "
565
             "(select serial from nodes where parent = ? and cluster = ?)")
566
        self.execute(q, (key, parent, cluster))
567

    
568
    def node_attr_copy(self, source, dest):
569
        q = ("insert or replace into attributes "
570
             "select ?, key, value from attributes "
571
             "where serial = ?")
572
        self.execute(q, (dest, source))
573