Revision 2715ade4 snf-pithos-tools/pithos/tools/sync.py

b/snf-pithos-tools/pithos/tools/sync.py
1 1
#!/usr/bin/env python
2 2

  
3 3
# Copyright 2011-2012 GRNET S.A. All rights reserved.
4
# 
4
#
5 5
# Redistribution and use in source and binary forms, with or
6 6
# without modification, are permitted provided that the following
7 7
# conditions are met:
8
# 
8
#
9 9
#   1. Redistributions of source code must retain the above
10 10
#      copyright notice, this list of conditions and the following
11 11
#      disclaimer.
12
# 
12
#
13 13
#   2. Redistributions in binary form must reproduce the above
14 14
#      copyright notice, this list of conditions and the following
15 15
#      disclaimer in the documentation and/or other materials
16 16
#      provided with the distribution.
17
# 
17
#
18 18
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
19 19
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 20
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
......
27 27
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 28
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 29
# POSSIBILITY OF SUCH DAMAGE.
30
# 
30
#
31 31
# The views and conclusions contained in the software and
32 32
# documentation are those of the authors and should not be
33 33
# interpreted as representing official policies, either expressed
......
75 75
        self.deleted_dirs = set()
76 76

  
77 77
        _makedirs(self.trashdir)
78
        
78

  
79 79
        dbpath = join(SETTINGS_DIR, 'sync.db')
80 80
        self.conn = sqlite3.connect(dbpath)
81 81
        self.conn.execute(SQL_CREATE_FILES_TABLE)
82 82
        self.conn.commit()
83
    
83

  
84 84
    def current_hash(self, path):
85 85
        """Return the hash of the file as it exists now in the filesystem"""
86
        
86

  
87 87
        fullpath = join(self.syncdir, path)
88 88
        if fullpath in self.deleted_dirs:
89 89
            return 'DEL'
......
92 92
        if isdir(fullpath):
93 93
            return 'DIR'
94 94
        return merkle(fullpath)
95
    
95

  
96 96
    def delete_inactive(self, timestamp):
97 97
        sql = 'DELETE FROM files WHERE timestamp != ?'
98 98
        self.conn.execute(sql, (timestamp,))
99 99
        self.conn.commit()
100
    
100

  
101 101
    def download(self, path, hash):
102 102
        fullpath = join(self.syncdir, path)
103 103
        if hash == 'DEL':
......
112 112
            else:
113 113
                print 'Downloading %s...' % path
114 114
                download(client, self.container, path, fullpath)
115
        
115

  
116 116
        current = self.current_hash(path)
117 117
        assert current == hash, "Downloaded file does not match hash"
118 118
        self.save(path, hash)
119
    
119

  
120 120
    def empty_trash(self):
121 121
        for filename in os.listdir(self.trashdir):
122 122
            path = join(self.trashdir, filename)
123 123
            os.remove(path)
124
    
124

  
125 125
    def find_hash(self, hash):
126 126
        sql = 'SELECT path FROM files WHERE hash = ?'
127 127
        ret = self.conn.execute(sql, (hash,)).fetchone()
128 128
        if ret:
129 129
            return join(self.syncdir, ret[0])
130
        
130

  
131 131
        if hash in os.listdir(self.trashdir):
132 132
            return join(self.trashdir, hash)
133
        
133

  
134 134
        return None
135
    
135

  
136 136
    def previous_hash(self, path):
137 137
        """Return the hash of the file according to the previous sync with
138 138
           the server. Return DEL if not such entry exists."""
139
        
139

  
140 140
        sql = 'SELECT hash FROM files WHERE path = ?'
141 141
        ret = self.conn.execute(sql, (path,)).fetchone()
142 142
        return ret[0] if ret else 'DEL'
143
    
143

  
144 144
    def remote_hash(self, path):
145 145
        """Return the hash of the file according to the server"""
146
        
146

  
147 147
        try:
148 148
            meta = client.retrieve_object_metadata(self.container, path)
149 149
        except Fault:
......
152 152
            return 'DIR'
153 153
        else:
154 154
            return meta['x-object-hash']
155
    
155

  
156 156
    def remove_deleted_dirs(self):
157 157
        for path in sorted(self.deleted_dirs, key=len, reverse=True):
158 158
            os.rmdir(path)
159 159
            self.deleted_dirs.remove(path)
160
    
160

  
161 161
    def resolve_conflict(self, path, hash):
162 162
        """Resolve a sync conflict by renaming the local file and downloading
163 163
           the remote one."""
164
        
164

  
165 165
        fullpath = join(self.syncdir, path)
166 166
        resolved = fullpath + '.local'
167 167
        i = 0
168 168
        while exists(resolved):
169 169
            i += 1
170 170
            resolved = fullpath + '.local%d' % i
171
        
171

  
172 172
        os.rename(fullpath, resolved)
173 173
        self.download(path, hash)
174
    
174

  
175 175
    def rmdir(self, path):
176 176
        """Remove a dir or mark for deletion if non-empty
177
        
177

  
178 178
        If a dir is empty delete it and check if any of its parents should be
179 179
        deleted too. Else mark it for later deletion.
180 180
        """
181
        
181

  
182 182
        fullpath = join(self.syncdir, path)
183 183
        if not exists(fullpath):
184 184
            return
185
        
185

  
186 186
        if os.listdir(fullpath):
187 187
            # Directory not empty
188 188
            self.deleted_dirs.add(fullpath)
189 189
            return
190
        
190

  
191 191
        os.rmdir(fullpath)
192 192
        self.deleted_dirs.discard(fullpath)
193
        
193

  
194 194
        parent = dirname(fullpath)
195 195
        while parent in self.deleted_dirs:
196 196
            os.rmdir(parent)
197 197
            self.deleted_dirs.remove(parent)
198 198
            parent = dirname(parent)
199
    
199

  
200 200
    def save(self, path, hash):
201 201
        """Save the hash value of a file. This value will be later returned
202 202
           by `previous_hash`."""
203
        
203

  
204 204
        sql = 'INSERT OR REPLACE INTO files (path, hash) VALUES (?, ?)'
205 205
        self.conn.execute(sql, (path, hash))
206 206
        self.conn.commit()
207
    
207

  
208 208
    def touch(self, path, now):
209 209
        sql = 'UPDATE files SET timestamp = ? WHERE path = ?'
210 210
        self.conn.execute(sql, (now, path))
211 211
        self.conn.commit()
212
    
212

  
213 213
    def trash(self, path):
214 214
        """Move a file to trash or delete it if it's a directory"""
215
        
215

  
216 216
        fullpath = join(self.syncdir, path)
217 217
        if not exists(fullpath):
218 218
            return
219
        
219

  
220 220
        if isfile(fullpath):
221 221
            hash = merkle(fullpath)
222 222
            trashpath = join(self.trashdir, hash)
223 223
            os.rename(fullpath, trashpath)
224 224
        else:
225 225
            self.rmdir(path)
226
    
226

  
227 227
    def upload(self, path, hash):
228 228
        fullpath = join(self.syncdir, path)
229 229
        if hash == 'DEL':
......
236 236
                prefix += '/'
237 237
            print 'Uploading %s...' % path
238 238
            upload(client, fullpath, self.container, prefix, name)
239
        
239

  
240 240
        remote = self.remote_hash(path)
241 241
        assert remote == hash, "Uploaded file does not match hash"
242 242
        self.save(path, hash)
......
246 246
    previous = state.previous_hash(path)
247 247
    current = state.current_hash(path)
248 248
    remote = state.remote_hash(path)
249
    
249

  
250 250
    if current == previous:
251 251
        # No local changes, download any remote changes
252 252
        if remote != previous:
......
266 266
def walk(dir, container):
267 267
    """Iterates on the files of the hierarchy created by merging the files
268 268
       in `dir` and the objects in `container`."""
269
    
269

  
270 270
    pending = ['']
271
    
271

  
272 272
    while pending:
273 273
        dirs = set()
274 274
        files = set()
......
277 277
            continue
278 278
        if root:
279 279
            yield root
280
        
280

  
281 281
        dirpath = join(dir, root)
282 282
        if exists(dirpath):
283 283
            for filename in os.listdir(dirpath):
......
286 286
                    dirs.add(path)
287 287
                else:
288 288
                    files.add(path)
289
        
289

  
290 290
        for object in client.list_objects(container, format='json',
291
                prefix=root, delimiter='/'):
291
                                          prefix=root, delimiter='/'):
292 292
            if 'subdir' in object:
293 293
                continue
294 294
            name = object['name']
......
296 296
                dirs.add(name)
297 297
            else:
298 298
                files.add(name)
299
        
299

  
300 300
        pending += sorted(dirs)
301 301
        for path in files:
302 302
            yield path
......
306 306
    if len(sys.argv) != 2:
307 307
        print 'syntax: %s <dir>' % sys.argv[0]
308 308
        sys.exit(1)
309
    
309

  
310 310
    syncdir = sys.argv[1]
311
    
311

  
312 312
    _makedirs(SETTINGS_DIR)
313 313
    container = os.environ.get('PITHOS_SYNC_CONTAINER', DEFAULT_CONTAINER)
314 314
    client.create_container(container)
315
    
315

  
316 316
    state = State(syncdir, container)
317
    
317

  
318 318
    now = int(time())
319 319
    for path in walk(syncdir, container):
320 320
        print 'Syncing', path
321 321
        sync(path, state)
322 322
        state.touch(path, now)
323
    
323

  
324 324
    state.delete_inactive(now)
325 325
    state.empty_trash()
326 326
    state.remove_deleted_dirs()

Also available in: Unified diff