2 using System.Collections.Concurrent;
\r
3 using System.Collections.Generic;
\r
4 using System.Diagnostics;
\r
7 using System.Reflection;
\r
8 using System.Threading;
\r
9 using System.Threading.Tasks;
\r
10 using Pithos.Interfaces;
\r
11 using Pithos.Network;
\r
14 namespace Pithos.Core.Agents
\r
17 /// Creates a set of state tuples from the local files, cloud files and state database entries
\r
21 private static readonly ILog Log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
\r
23 private readonly string _emptyGuid = Guid.Empty.ToString();
\r
25 private readonly CancellationToken _token;
\r
27 private readonly IStatusKeeper _statusKeeper;
\r
28 private readonly IStatusNotification _statusNotification;
\r
29 private readonly IPithosSettings _settings;
\r
31 public TupleBuilder(CancellationToken token, IStatusKeeper statusKeeper, IStatusNotification statusNotification, IPithosSettings settings)
\r
34 _statusKeeper = statusKeeper;
\r
35 _statusNotification = statusNotification;
\r
36 _settings = settings;
\r
40 /// Creates a list of Sync tuples by merging local and cloud files with the locally stored state
\r
42 /// <param name="infos"></param>
\r
43 /// <param name="files"></param>
\r
44 /// <param name="states"></param>
\r
45 /// <param name="moves"></param>
\r
46 /// <returns></returns>
\r
47 public IEnumerable<StateTuple> MergeSources(IEnumerable<Tuple<string, ObjectInfo>> infos, IEnumerable<FileSystemInfo> files, List<FileState> states, ConcurrentDictionary<string, MovedEventArgs> moves)
\r
49 var tuplesByPath = new Dictionary<string, StateTuple>();
\r
50 //Fill the tuples with the local files
\r
51 CreateTuplesFromFiles(files, tuplesByPath);
\r
53 //Merge the file tuples with the local states, creating new tuples for states that have no matching files
\r
54 MergeLocalStates(states, tuplesByPath);
\r
56 MergeCloudFiles(infos, tuplesByPath);
\r
58 DetectLocalMoves(tuplesByPath);
\r
60 var tuples = tuplesByPath.Values;
\r
61 //Sync algorithm fallout: There are multiple ways we can reach a situation where a state without a checksum exists
\r
62 //1: The application stopped/crashed while downloading a file. The file's entry was created when the download started. When the application restarts,
\r
63 // it finds no local file, a server file and a null state -> C: NULL L: NULL but exists, S: Some
\r
64 // It can be fixed by NOT creating a local state if the file doesn't already exist, or adding extra info to mark this as a result of an upload
\r
65 //2: A new file is added but the app stops/crashes after uploading finishes but before the entry gets updated and the user deletes the file. The file's entry was created. When the app restarts,
\r
66 // it finds no local file, a server file and a null state -> C: NULL L: NULL but exists, S: Some
\r
69 var brokenTuples =( from tuple in tuples
\r
70 where tuple.FileState != null && tuple.FileState.Checksum == null
\r
71 && tuple.ObjectInfo != null && (tuple.FileInfo == null || !tuple.FileInfo.Exists)
\r
72 select tuple).ToList();
\r
75 var actualTuples = tuples.Except(brokenTuples).ToList();
\r
76 Debug.Assert(actualTuples.All(t => t.HashesValid()));
\r
78 foreach (var tuple in brokenTuples)
\r
80 _statusKeeper.SetFileState(tuple.FileState.FilePath,
\r
81 FileStatus.Conflict, FileOverlayStatus.Conflict, "FileState without checksum encountered for server object missing from disk");
\r
84 return actualTuples;
\r
92 /// <param name="tuplesByPath"></param>
\r
94 /// Local moves will appear as two tuples with the same hashes in different locations:
\r
95 /// - The FROM tuple will have a NULL C, and a filled L and S value
\r
96 /// - The TO tuple will only have a C hash value equal to FROM's and empty L,S
\r
97 /// The FROM tuple should be removed and the TO tuple should be updated to reflect that it's a MOVE operation.
\r
98 /// This should happen only if there are EXACTLY two tuples with the same hash
\r
100 private void DetectLocalMoves(Dictionary<string, StateTuple> tuplesByPath)
\r
102 Func<StateTuple, bool> isNew = t => t.C != null
\r
103 && (t.L == null || t.NullSafe(tp => tp.FileState).NullSafe( s => !s.FilePath.Equals(t.FilePath)))
\r
105 //Newly created fiels are candidate TOs
\r
106 var fileCreates = tuplesByPath.Values.Where(t=> t.FileInfo is FileInfo && isNew(t));
\r
107 var folderCreates = tuplesByPath.Values.Where(t => t.FileInfo is DirectoryInfo && isNew(t));
\r
108 //Newly deleted files are candidate FROMs
\r
109 var fileDeletes = tuplesByPath.Values.Where(t =>t.NullSafe(t1=>t1.FileState).NullSafe(s=>!s.IsFolder) && t.C == null && t.L != null && t.L==t.S);
\r
110 var folderDeletes = tuplesByPath.Values.Where(t => t.NullSafe(t1 => t1.FileState).NullSafe(s => s.IsFolder) && t.C == null && t.L != null && t.L == t.S);
\r
112 var moves = (from tuple in fileCreates
\r
113 let froms = fileDeletes.Where(d => d.L == tuple.C)
\r
114 where froms.Count() == 1
\r
115 select new {To = tuple, From = froms.Single()}).ToList();
\r
120 foreach (var move in moves)
\r
122 //Remove the old tuple
\r
123 var fromTuple = move.From;
\r
124 var toTuple = move.To;
\r
126 ReplaceTupleForMove(tuplesByPath, fromTuple, toTuple);
\r
127 //Detetct folder moves from moved files
\r
130 //Can't create ObjectInfo, FileState for a directory if we don't store directories in the database
\r
131 //Find a folderCreate that matches the TO and a folderDelete that matches the FROM
\r
132 var toFolder = folderCreates.SingleOrDefault(fd => move.To.FilePath.IsAtOrDirectlyBelow(fd.FilePath));
\r
133 var fromFolder = folderDeletes.SingleOrDefault(fd => move.From.FilePath.IsAtOrDirectlyBelow(fd.FilePath));
\r
134 if (fromFolder !=null && toFolder!= null)
\r
135 ReplaceTupleForMove(tuplesByPath,fromFolder,toFolder);
\r
136 //Folders may not be stored in states
\r
138 if (toFolder != null)
\r
140 var fromPath = Path.GetDirectoryName(fromTuple.FilePath);
\r
141 tuplesByPath.Remove(fromPath);
\r
142 toFolder.OldFullPath = fromPath;
\r
143 toFolder.NewFullPath = toFolder.FilePath;
\r
144 toFolder.OldChecksum = toFolder.L;
\r
145 toFolder.FileState = fromTuple.FileState;
\r
146 toFolder.ObjectInfo = fromTuple.ObjectInfo;
\r
155 private static void ReplaceTupleForMove(Dictionary<string, StateTuple> tuplesByPath, StateTuple fromTuple, StateTuple toTuple)
\r
157 tuplesByPath.Remove(fromTuple.FilePath);
\r
158 //Update the new tuple with information needed for the Move
\r
159 toTuple.OldFullPath = fromTuple.FilePath;
\r
160 toTuple.NewFullPath = toTuple.FilePath;
\r
161 toTuple.OldChecksum = fromTuple.L;
\r
162 toTuple.FileState = fromTuple.FileState;
\r
163 toTuple.ObjectInfo = fromTuple.ObjectInfo;
\r
166 private static void CreateTuplesFromFiles(IEnumerable<FileSystemInfo> files, Dictionary<string, StateTuple> tuplesByPath)
\r
168 foreach (var info in files)
\r
170 var tuple = new StateTuple(info);
\r
171 //Is this the target of a move event?
\r
173 moves.Values.FirstOrDefault(
\r
174 arg => info.FullName.Equals(arg.FullPath, StringComparison.InvariantCultureIgnoreCase)
\r
175 || info.FullName.IsAtOrBelow(arg.FullPath));
\r
176 if (moveArg != null)
\r
178 tuple.NewFullPath = info.FullName;
\r
179 var relativePath = info.AsRelativeTo(moveArg.FullPath);
\r
180 tuple.OldFullPath = Path.Combine(moveArg.OldFullPath, relativePath);
\r
181 tuple.OldChecksum = states.FirstOrDefault(
\r
182 st => st.FilePath.Equals(tuple.OldFullPath, StringComparison.InvariantCultureIgnoreCase))
\r
183 .NullSafe(st => st.Checksum);
\r
186 tuplesByPath[tuple.FilePath] = tuple;
\r
190 private void MergeLocalStates(IEnumerable<FileState> states, Dictionary<string, StateTuple> tuplesByPath)
\r
192 //For files that have state
\r
193 foreach (var state in states)
\r
195 StateTuple hashTuple;
\r
198 if (tuplesByPath.TryGetValue(state.FilePath, out hashTuple))
\r
200 hashTuple.FileState = state;
\r
201 UpdateHashes(hashTuple);
\r
203 /* else if (moves.ContainsKey(state.FilePath) &&
\r
204 tuplesByPath.TryGetValue(moves[state.FilePath].FullPath, out hashTuple))
\r
206 hashTuple.FileState = state;
\r
207 UpdateHashes(hashTuple);
\r
211 var fsInfo = FileInfoExtensions.FromPath(state.FilePath);
\r
212 hashTuple = new StateTuple { FileInfo = fsInfo, FileState = state };
\r
214 //Is the source of a moved item?
\r
216 moves.Values.FirstOrDefault(
\r
217 arg => state.FilePath.Equals(arg.OldFullPath, StringComparison.InvariantCultureIgnoreCase)
\r
218 || state.FilePath.IsAtOrBelow(arg.OldFullPath));
\r
219 if (moveArg != null)
\r
221 var relativePath = state.FilePath.AsRelativeTo(moveArg.OldFullPath);
\r
222 hashTuple.NewFullPath = Path.Combine(moveArg.FullPath, relativePath);
\r
223 hashTuple.OldFullPath = state.FilePath;
\r
224 //Do we have the old MD5?
\r
225 //hashTuple.OldMD5 = state.LastMD5;
\r
229 tuplesByPath[state.FilePath] = hashTuple;
\r
232 //for files that don't have state
\r
233 var statelessTuples = tuplesByPath.Values.Where(t => t.FileState == null).ToArray();
\r
234 //If there are too many stateless tuples, update them in parallel
\r
235 if (statelessTuples.Length > 20)
\r
236 Parallel.ForEach(statelessTuples, UpdateHashes);
\r
238 statelessTuples.ApplyAction(UpdateHashes);
\r
243 /// Update the tuple with the file's hashes, avoiding calculation if the file is unchanged
\r
245 /// <param name="hashTuple"></param>
\r
247 /// The function first checks the file's size and last write date to see if there are any changes. If there are none,
\r
248 /// the file's stored hashes are used.
\r
249 /// Otherwise, MD5 is calculated first to ensure there are no changes. If MD5 is different, the Merkle hash is calculated
\r
251 private void UpdateHashes(StateTuple hashTuple)
\r
256 var state = hashTuple.NullSafe(s => s.FileState);
\r
257 var storedHash = state.NullSafe(s => s.Checksum);
\r
258 var storedHashes = state.NullSafe(s => s.Hashes);
\r
259 //var storedMD5 = state.NullSafe(s => s.LastMD5);
\r
260 var storedDate = state.NullSafe(s => s.LastWriteDate) ?? DateTime.MinValue;
\r
261 var storedLength = state.NullSafe(s => s.LastLength);
\r
263 //var md5Hash = Signature.MD5_EMPTY;
\r
264 var merkle = TreeHash.Empty;
\r
266 if (hashTuple.FileInfo is FileInfo)
\r
268 var file = (FileInfo)hashTuple.FileInfo.WithProperCapitalization();
\r
270 //Attributes unchanged?
\r
271 //LastWriteTime is only accurate to the second
\r
272 var unchangedAttributes = file.LastWriteTime - storedDate < TimeSpan.FromSeconds(1)
\r
273 && storedLength == file.Length;
\r
275 //Attributes appear unchanged but the file length doesn't match the stored hash ?
\r
276 var nonEmptyMismatch = unchangedAttributes &&
\r
277 (file.Length == 0 ^ storedHash == Signature.MERKLE_EMPTY);
\r
279 //Missing hashes for NON-EMPTY hash ?
\r
280 var missingHashes = storedHash != Signature.MERKLE_EMPTY &&
\r
281 String.IsNullOrWhiteSpace(storedHashes);
\r
283 //Unchanged attributes but changed MD5
\r
284 //Short-circuiting ensures MD5 is computed only if the attributes are changed
\r
286 //var md5Mismatch = (!unchangedAttributes && file.ComputeShortHash(StatusNotification) != storedMD5);
\r
289 //If the attributes are unchanged but the Merkle doesn't match the size,
\r
290 //or the attributes and the MD5 hash have changed,
\r
291 //or the hashes are missing but the tophash is NOT empty, we need to recalculate
\r
293 //Otherwise we load the hashes from state
\r
294 if (!unchangedAttributes || nonEmptyMismatch || missingHashes)
\r
295 merkle = RecalculateTreehash(file);
\r
298 merkle = TreeHash.Parse(hashTuple.FileState.Hashes);
\r
299 //merkle.MD5 = storedMD5;
\r
303 //md5Hash = merkle.MD5;
\r
305 //hashTuple.MD5 = md5Hash;
\r
306 //Setting Merkle also updates C
\r
307 hashTuple.Merkle = merkle;
\r
309 catch (IOException)
\r
311 hashTuple.Locked = true;
\r
316 /// Recalculate a file's treehash and md5 and update the database
\r
318 /// <param name="file"></param>
\r
319 /// <returns></returns>
\r
320 private TreeHash RecalculateTreehash(FileInfo file)
\r
322 var progress = new Progress<HashProgress>(d => _statusNotification.Notify(
\r
323 new StatusNotification(String.Format("Hashing {0:p} of {1}", d.Percentage, file.Name))));
\r
324 var merkle = Signature.CalculateTreeHash(file, _statusKeeper.BlockSize, _statusKeeper.BlockHash,
\r
325 _settings.HashingParallelism, _token, progress);
\r
327 _statusKeeper.UpdateFileHashes(file, merkle);
\r
331 private void MergeCloudFiles(IEnumerable<Tuple<string, ObjectInfo>> infos, Dictionary<string, StateTuple> tuplesByPath)
\r
333 var tuplesById = tuplesByPath.Values
\r
334 .Where(tuple => tuple.FileState != null && tuple.FileState.ObjectID != null)
\r
335 .ToDictionary(tuple => tuple.FileState.ObjectID, tuple => tuple); //new Dictionary<Guid, StateTuple>();
\r
337 foreach (var info in infos)
\r
339 StateTuple hashTuple;
\r
340 var filePath = info.Item1;
\r
341 var objectInfo = info.Item2;
\r
342 var objectId = objectInfo.UUID;
\r
344 if (objectId != _emptyGuid && tuplesById.TryGetValue(objectId, out hashTuple))
\r
346 //This will handle renamed objects
\r
347 hashTuple.ObjectInfo = objectInfo;
\r
349 else if (tuplesByPath.TryGetValue(filePath, out hashTuple))
\r
351 hashTuple.ObjectInfo = objectInfo;
\r
355 var fsInfo = FileInfoExtensions.FromPath(filePath);
\r
356 hashTuple = new StateTuple { FileInfo = fsInfo, ObjectInfo = objectInfo };
\r
357 tuplesByPath[filePath] = hashTuple;
\r
359 if (objectInfo.UUID != _emptyGuid)
\r
360 tuplesById[objectInfo.UUID] = hashTuple;
\r