Revision 2715ade4 snf-pithos-tools/pithos/tools/sync.py
b/snf-pithos-tools/pithos/tools/sync.py | ||
---|---|---|
1 | 1 |
#!/usr/bin/env python |
2 | 2 |
|
3 | 3 |
# Copyright 2011-2012 GRNET S.A. All rights reserved. |
4 |
#
|
|
4 |
# |
|
5 | 5 |
# Redistribution and use in source and binary forms, with or |
6 | 6 |
# without modification, are permitted provided that the following |
7 | 7 |
# conditions are met: |
8 |
#
|
|
8 |
# |
|
9 | 9 |
# 1. Redistributions of source code must retain the above |
10 | 10 |
# copyright notice, this list of conditions and the following |
11 | 11 |
# disclaimer. |
12 |
#
|
|
12 |
# |
|
13 | 13 |
# 2. Redistributions in binary form must reproduce the above |
14 | 14 |
# copyright notice, this list of conditions and the following |
15 | 15 |
# disclaimer in the documentation and/or other materials |
16 | 16 |
# provided with the distribution. |
17 |
#
|
|
17 |
# |
|
18 | 18 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
19 | 19 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | 20 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
... | ... | |
27 | 27 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
28 | 28 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | 29 |
# POSSIBILITY OF SUCH DAMAGE. |
30 |
#
|
|
30 |
# |
|
31 | 31 |
# The views and conclusions contained in the software and |
32 | 32 |
# documentation are those of the authors and should not be |
33 | 33 |
# interpreted as representing official policies, either expressed |
... | ... | |
75 | 75 |
self.deleted_dirs = set() |
76 | 76 |
|
77 | 77 |
_makedirs(self.trashdir) |
78 |
|
|
78 |
|
|
79 | 79 |
dbpath = join(SETTINGS_DIR, 'sync.db') |
80 | 80 |
self.conn = sqlite3.connect(dbpath) |
81 | 81 |
self.conn.execute(SQL_CREATE_FILES_TABLE) |
82 | 82 |
self.conn.commit() |
83 |
|
|
83 |
|
|
84 | 84 |
def current_hash(self, path): |
85 | 85 |
"""Return the hash of the file as it exists now in the filesystem""" |
86 |
|
|
86 |
|
|
87 | 87 |
fullpath = join(self.syncdir, path) |
88 | 88 |
if fullpath in self.deleted_dirs: |
89 | 89 |
return 'DEL' |
... | ... | |
92 | 92 |
if isdir(fullpath): |
93 | 93 |
return 'DIR' |
94 | 94 |
return merkle(fullpath) |
95 |
|
|
95 |
|
|
96 | 96 |
def delete_inactive(self, timestamp): |
97 | 97 |
sql = 'DELETE FROM files WHERE timestamp != ?' |
98 | 98 |
self.conn.execute(sql, (timestamp,)) |
99 | 99 |
self.conn.commit() |
100 |
|
|
100 |
|
|
101 | 101 |
def download(self, path, hash): |
102 | 102 |
fullpath = join(self.syncdir, path) |
103 | 103 |
if hash == 'DEL': |
... | ... | |
112 | 112 |
else: |
113 | 113 |
print 'Downloading %s...' % path |
114 | 114 |
download(client, self.container, path, fullpath) |
115 |
|
|
115 |
|
|
116 | 116 |
current = self.current_hash(path) |
117 | 117 |
assert current == hash, "Downloaded file does not match hash" |
118 | 118 |
self.save(path, hash) |
119 |
|
|
119 |
|
|
120 | 120 |
def empty_trash(self): |
121 | 121 |
for filename in os.listdir(self.trashdir): |
122 | 122 |
path = join(self.trashdir, filename) |
123 | 123 |
os.remove(path) |
124 |
|
|
124 |
|
|
125 | 125 |
def find_hash(self, hash): |
126 | 126 |
sql = 'SELECT path FROM files WHERE hash = ?' |
127 | 127 |
ret = self.conn.execute(sql, (hash,)).fetchone() |
128 | 128 |
if ret: |
129 | 129 |
return join(self.syncdir, ret[0]) |
130 |
|
|
130 |
|
|
131 | 131 |
if hash in os.listdir(self.trashdir): |
132 | 132 |
return join(self.trashdir, hash) |
133 |
|
|
133 |
|
|
134 | 134 |
return None |
135 |
|
|
135 |
|
|
136 | 136 |
def previous_hash(self, path): |
137 | 137 |
"""Return the hash of the file according to the previous sync with |
138 | 138 |
the server. Return DEL if not such entry exists.""" |
139 |
|
|
139 |
|
|
140 | 140 |
sql = 'SELECT hash FROM files WHERE path = ?' |
141 | 141 |
ret = self.conn.execute(sql, (path,)).fetchone() |
142 | 142 |
return ret[0] if ret else 'DEL' |
143 |
|
|
143 |
|
|
144 | 144 |
def remote_hash(self, path): |
145 | 145 |
"""Return the hash of the file according to the server""" |
146 |
|
|
146 |
|
|
147 | 147 |
try: |
148 | 148 |
meta = client.retrieve_object_metadata(self.container, path) |
149 | 149 |
except Fault: |
... | ... | |
152 | 152 |
return 'DIR' |
153 | 153 |
else: |
154 | 154 |
return meta['x-object-hash'] |
155 |
|
|
155 |
|
|
156 | 156 |
def remove_deleted_dirs(self): |
157 | 157 |
for path in sorted(self.deleted_dirs, key=len, reverse=True): |
158 | 158 |
os.rmdir(path) |
159 | 159 |
self.deleted_dirs.remove(path) |
160 |
|
|
160 |
|
|
161 | 161 |
def resolve_conflict(self, path, hash): |
162 | 162 |
"""Resolve a sync conflict by renaming the local file and downloading |
163 | 163 |
the remote one.""" |
164 |
|
|
164 |
|
|
165 | 165 |
fullpath = join(self.syncdir, path) |
166 | 166 |
resolved = fullpath + '.local' |
167 | 167 |
i = 0 |
168 | 168 |
while exists(resolved): |
169 | 169 |
i += 1 |
170 | 170 |
resolved = fullpath + '.local%d' % i |
171 |
|
|
171 |
|
|
172 | 172 |
os.rename(fullpath, resolved) |
173 | 173 |
self.download(path, hash) |
174 |
|
|
174 |
|
|
175 | 175 |
def rmdir(self, path): |
176 | 176 |
"""Remove a dir or mark for deletion if non-empty |
177 |
|
|
177 |
|
|
178 | 178 |
If a dir is empty delete it and check if any of its parents should be |
179 | 179 |
deleted too. Else mark it for later deletion. |
180 | 180 |
""" |
181 |
|
|
181 |
|
|
182 | 182 |
fullpath = join(self.syncdir, path) |
183 | 183 |
if not exists(fullpath): |
184 | 184 |
return |
185 |
|
|
185 |
|
|
186 | 186 |
if os.listdir(fullpath): |
187 | 187 |
# Directory not empty |
188 | 188 |
self.deleted_dirs.add(fullpath) |
189 | 189 |
return |
190 |
|
|
190 |
|
|
191 | 191 |
os.rmdir(fullpath) |
192 | 192 |
self.deleted_dirs.discard(fullpath) |
193 |
|
|
193 |
|
|
194 | 194 |
parent = dirname(fullpath) |
195 | 195 |
while parent in self.deleted_dirs: |
196 | 196 |
os.rmdir(parent) |
197 | 197 |
self.deleted_dirs.remove(parent) |
198 | 198 |
parent = dirname(parent) |
199 |
|
|
199 |
|
|
200 | 200 |
def save(self, path, hash): |
201 | 201 |
"""Save the hash value of a file. This value will be later returned |
202 | 202 |
by `previous_hash`.""" |
203 |
|
|
203 |
|
|
204 | 204 |
sql = 'INSERT OR REPLACE INTO files (path, hash) VALUES (?, ?)' |
205 | 205 |
self.conn.execute(sql, (path, hash)) |
206 | 206 |
self.conn.commit() |
207 |
|
|
207 |
|
|
208 | 208 |
def touch(self, path, now): |
209 | 209 |
sql = 'UPDATE files SET timestamp = ? WHERE path = ?' |
210 | 210 |
self.conn.execute(sql, (now, path)) |
211 | 211 |
self.conn.commit() |
212 |
|
|
212 |
|
|
213 | 213 |
def trash(self, path): |
214 | 214 |
"""Move a file to trash or delete it if it's a directory""" |
215 |
|
|
215 |
|
|
216 | 216 |
fullpath = join(self.syncdir, path) |
217 | 217 |
if not exists(fullpath): |
218 | 218 |
return |
219 |
|
|
219 |
|
|
220 | 220 |
if isfile(fullpath): |
221 | 221 |
hash = merkle(fullpath) |
222 | 222 |
trashpath = join(self.trashdir, hash) |
223 | 223 |
os.rename(fullpath, trashpath) |
224 | 224 |
else: |
225 | 225 |
self.rmdir(path) |
226 |
|
|
226 |
|
|
227 | 227 |
def upload(self, path, hash): |
228 | 228 |
fullpath = join(self.syncdir, path) |
229 | 229 |
if hash == 'DEL': |
... | ... | |
236 | 236 |
prefix += '/' |
237 | 237 |
print 'Uploading %s...' % path |
238 | 238 |
upload(client, fullpath, self.container, prefix, name) |
239 |
|
|
239 |
|
|
240 | 240 |
remote = self.remote_hash(path) |
241 | 241 |
assert remote == hash, "Uploaded file does not match hash" |
242 | 242 |
self.save(path, hash) |
... | ... | |
246 | 246 |
previous = state.previous_hash(path) |
247 | 247 |
current = state.current_hash(path) |
248 | 248 |
remote = state.remote_hash(path) |
249 |
|
|
249 |
|
|
250 | 250 |
if current == previous: |
251 | 251 |
# No local changes, download any remote changes |
252 | 252 |
if remote != previous: |
... | ... | |
266 | 266 |
def walk(dir, container): |
267 | 267 |
"""Iterates on the files of the hierarchy created by merging the files |
268 | 268 |
in `dir` and the objects in `container`.""" |
269 |
|
|
269 |
|
|
270 | 270 |
pending = [''] |
271 |
|
|
271 |
|
|
272 | 272 |
while pending: |
273 | 273 |
dirs = set() |
274 | 274 |
files = set() |
... | ... | |
277 | 277 |
continue |
278 | 278 |
if root: |
279 | 279 |
yield root |
280 |
|
|
280 |
|
|
281 | 281 |
dirpath = join(dir, root) |
282 | 282 |
if exists(dirpath): |
283 | 283 |
for filename in os.listdir(dirpath): |
... | ... | |
286 | 286 |
dirs.add(path) |
287 | 287 |
else: |
288 | 288 |
files.add(path) |
289 |
|
|
289 |
|
|
290 | 290 |
for object in client.list_objects(container, format='json', |
291 |
prefix=root, delimiter='/'): |
|
291 |
prefix=root, delimiter='/'):
|
|
292 | 292 |
if 'subdir' in object: |
293 | 293 |
continue |
294 | 294 |
name = object['name'] |
... | ... | |
296 | 296 |
dirs.add(name) |
297 | 297 |
else: |
298 | 298 |
files.add(name) |
299 |
|
|
299 |
|
|
300 | 300 |
pending += sorted(dirs) |
301 | 301 |
for path in files: |
302 | 302 |
yield path |
... | ... | |
306 | 306 |
if len(sys.argv) != 2: |
307 | 307 |
print 'syntax: %s <dir>' % sys.argv[0] |
308 | 308 |
sys.exit(1) |
309 |
|
|
309 |
|
|
310 | 310 |
syncdir = sys.argv[1] |
311 |
|
|
311 |
|
|
312 | 312 |
_makedirs(SETTINGS_DIR) |
313 | 313 |
container = os.environ.get('PITHOS_SYNC_CONTAINER', DEFAULT_CONTAINER) |
314 | 314 |
client.create_container(container) |
315 |
|
|
315 |
|
|
316 | 316 |
state = State(syncdir, container) |
317 |
|
|
317 |
|
|
318 | 318 |
now = int(time()) |
319 | 319 |
for path in walk(syncdir, container): |
320 | 320 |
print 'Syncing', path |
321 | 321 |
sync(path, state) |
322 | 322 |
state.touch(path, now) |
323 |
|
|
323 |
|
|
324 | 324 |
state.delete_inactive(now) |
325 | 325 |
state.empty_trash() |
326 | 326 |
state.remove_deleted_dirs() |
Also available in: Unified diff