root / trunk / Pithos.Core / Agents / TupleBuilder.cs @ 23877270
History | View | Annotate | Download (15 kB)
1 |
using System; |
---|---|
2 |
using System.Collections.Concurrent; |
3 |
using System.Collections.Generic; |
4 |
using System.Diagnostics; |
5 |
using System.IO; |
6 |
using System.Linq; |
7 |
using System.Reflection; |
8 |
using System.Threading; |
9 |
using System.Threading.Tasks; |
10 |
using Pithos.Interfaces; |
11 |
using Pithos.Network; |
12 |
using log4net; |
13 |
|
14 |
namespace Pithos.Core.Agents |
15 |
{ |
16 |
/// <summary> |
17 |
/// Creates a set of state tuples from the local files, cloud files and state database entries |
18 |
/// </summary> |
19 |
class TupleBuilder |
20 |
{ |
21 |
private static readonly ILog Log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); |
22 |
|
23 |
private readonly string _emptyGuid = Guid.Empty.ToString(); |
24 |
|
25 |
private readonly CancellationToken _token; |
26 |
|
27 |
private readonly IStatusKeeper _statusKeeper; |
28 |
private readonly IStatusNotification _statusNotification; |
29 |
private readonly IPithosSettings _settings; |
30 |
|
31 |
public TupleBuilder(CancellationToken token, IStatusKeeper statusKeeper, IStatusNotification statusNotification, IPithosSettings settings) |
32 |
{ |
33 |
_token = token; |
34 |
_statusKeeper = statusKeeper; |
35 |
_statusNotification = statusNotification; |
36 |
_settings = settings; |
37 |
} |
38 |
|
39 |
/// <summary> |
40 |
/// Creates a list of Sync tuples by merging local and cloud files with the locally stored state |
41 |
/// </summary> |
42 |
/// <param name="infos"></param> |
43 |
/// <param name="files"></param> |
44 |
/// <param name="states"></param> |
45 |
/// <param name="moves"></param> |
46 |
/// <returns></returns> |
47 |
public IEnumerable<StateTuple> MergeSources(IEnumerable<Tuple<string, ObjectInfo>> infos, IEnumerable<FileSystemInfo> files, List<FileState> states, ConcurrentDictionary<string, MovedEventArgs> moves) |
48 |
{ |
49 |
var tuplesByPath = new Dictionary<string, StateTuple>(); |
50 |
//Fill the tuples with the local files |
51 |
CreateTuplesFromFiles(files, states, moves, tuplesByPath); |
52 |
|
53 |
//Merge the file tuples with the local states, creating new tuples for states that have no matching files |
54 |
MergeLocalStates(states, moves, tuplesByPath); |
55 |
|
56 |
MergeCloudFiles(infos, tuplesByPath); |
57 |
|
58 |
DetectLocalMoves(tuplesByPath); |
59 |
|
60 |
var tuples = tuplesByPath.Values; |
61 |
//Sync algorithm fallout: There are multiple ways we can reach a situation where a state without a checksum exists |
62 |
//1: The application stopped/crashed while downloading a file. The file's entry was created when the download started. When the application restarts, |
63 |
// it finds no local file, a server file and a null state -> C: NULL L: NULL but exists, S: Some |
64 |
// It can be fixed by NOT creating a local state if the file doesn't already exist, or adding extra info to mark this as a result of an upload |
65 |
//2: A new file is added but the app stops/crashes after uploading finishes but before the entry gets updated and the user deletes the file. The file's entry was created. When the app restarts, |
66 |
// it finds no local file, a server file and a null state -> C: NULL L: NULL but exists, S: Some |
67 |
// |
68 |
|
69 |
var brokenTuples =( from tuple in tuples |
70 |
where tuple.FileState != null && tuple.FileState.Checksum == null |
71 |
&& tuple.ObjectInfo != null && (tuple.FileInfo == null || !tuple.FileInfo.Exists) |
72 |
select tuple).ToList(); |
73 |
|
74 |
|
75 |
var actualTuples = tuples.Except(brokenTuples).ToList(); |
76 |
Debug.Assert(actualTuples.All(t => t.HashesValid())); |
77 |
|
78 |
foreach (var tuple in brokenTuples) |
79 |
{ |
80 |
_statusKeeper.SetFileState(tuple.FileState.FilePath, |
81 |
FileStatus.Conflict, FileOverlayStatus.Conflict, "FileState without checksum encountered for server object missing from disk"); |
82 |
} |
83 |
|
84 |
return actualTuples; |
85 |
} |
86 |
|
87 |
|
88 |
|
89 |
/// <summary> |
90 |
/// |
91 |
/// </summary> |
92 |
/// <param name="tuplesByPath"></param> |
93 |
/// <remarks> |
94 |
/// Local moves will appear as two tuples with the same hashes in different locations: |
95 |
/// - The FROM tuple will have a NULL C, and a filled L and S value |
96 |
/// - The TO tuple will only have a C hash value equal to FROM's and empty L,S |
97 |
/// The FROM tuple should be removed and the TO tuple should be updated to reflect that it's a MOVE operation. |
98 |
/// This should happen only if there are EXACTLY two tuples with the same hash |
99 |
/// </remarks> |
100 |
private void DetectLocalMoves(Dictionary<string, StateTuple> tuplesByPath) |
101 |
{ |
102 |
//Newly created fiels are candidate TOs |
103 |
var creates = tuplesByPath.Values.Where(t=> t.FileInfo is FileInfo && t.C!=null && t.L == null && t.S == null); |
104 |
//Newly deleted files are candidate FROMs |
105 |
var deletes = tuplesByPath.Values.Where(t =>t.NullSafe(t1=>t1.FileState).NullSafe(s=>!s.IsFolder) && t.C == null && t.L != null && t.L==t.S); |
106 |
|
107 |
var moves = (from tuple in creates |
108 |
let froms = deletes.Where(d => d.L == tuple.C) |
109 |
where froms.Count() == 1 |
110 |
select new {To = tuple, From = froms.Single()}).ToList(); |
111 |
|
112 |
if (!moves.Any()) |
113 |
return; |
114 |
|
115 |
foreach (var move in moves) |
116 |
{ |
117 |
tuplesByPath.Remove(move.From.FilePath); |
118 |
move.To.OldFullPath = move.From.FilePath; |
119 |
move.To.NewFullPath = move.To.FilePath; |
120 |
move.To.OldChecksum = move.From.L; |
121 |
move.To.FileState = move.From.FileState; |
122 |
move.To.ObjectInfo = move.From.ObjectInfo; |
123 |
} |
124 |
|
125 |
} |
126 |
|
127 |
private static void CreateTuplesFromFiles(IEnumerable<FileSystemInfo> files, List<FileState> states, ConcurrentDictionary<string, MovedEventArgs> moves, Dictionary<string, StateTuple> tuplesByPath) |
128 |
{ |
129 |
foreach (var info in files) |
130 |
{ |
131 |
var tuple = new StateTuple(info); |
132 |
//Is this the target of a move event? |
133 |
var moveArg = |
134 |
moves.Values.FirstOrDefault( |
135 |
arg => info.FullName.Equals(arg.FullPath, StringComparison.InvariantCultureIgnoreCase) |
136 |
|| info.FullName.IsAtOrBelow(arg.FullPath)); |
137 |
if (moveArg != null) |
138 |
{ |
139 |
tuple.NewFullPath = info.FullName; |
140 |
var relativePath = info.AsRelativeTo(moveArg.FullPath); |
141 |
tuple.OldFullPath = Path.Combine(moveArg.OldFullPath, relativePath); |
142 |
tuple.OldChecksum = states.FirstOrDefault( |
143 |
st => st.FilePath.Equals(tuple.OldFullPath, StringComparison.InvariantCultureIgnoreCase)) |
144 |
.NullSafe(st => st.Checksum); |
145 |
} |
146 |
|
147 |
tuplesByPath[tuple.FilePath] = tuple; |
148 |
} |
149 |
} |
150 |
|
151 |
private void MergeLocalStates(IEnumerable<FileState> states, ConcurrentDictionary<string, MovedEventArgs> moves, Dictionary<string, StateTuple> tuplesByPath) |
152 |
{ |
153 |
//For files that have state |
154 |
foreach (var state in states) |
155 |
{ |
156 |
StateTuple hashTuple; |
157 |
|
158 |
|
159 |
if (tuplesByPath.TryGetValue(state.FilePath, out hashTuple)) |
160 |
{ |
161 |
hashTuple.FileState = state; |
162 |
UpdateHashes(hashTuple); |
163 |
} |
164 |
else if (moves.ContainsKey(state.FilePath) && |
165 |
tuplesByPath.TryGetValue(moves[state.FilePath].FullPath, out hashTuple)) |
166 |
{ |
167 |
hashTuple.FileState = state; |
168 |
UpdateHashes(hashTuple); |
169 |
} |
170 |
else |
171 |
{ |
172 |
var fsInfo = FileInfoExtensions.FromPath(state.FilePath); |
173 |
hashTuple = new StateTuple { FileInfo = fsInfo, FileState = state }; |
174 |
|
175 |
//Is the source of a moved item? |
176 |
var moveArg = |
177 |
moves.Values.FirstOrDefault( |
178 |
arg => state.FilePath.Equals(arg.OldFullPath, StringComparison.InvariantCultureIgnoreCase) |
179 |
|| state.FilePath.IsAtOrBelow(arg.OldFullPath)); |
180 |
if (moveArg != null) |
181 |
{ |
182 |
var relativePath = state.FilePath.AsRelativeTo(moveArg.OldFullPath); |
183 |
hashTuple.NewFullPath = Path.Combine(moveArg.FullPath, relativePath); |
184 |
hashTuple.OldFullPath = state.FilePath; |
185 |
//Do we have the old MD5? |
186 |
//hashTuple.OldMD5 = state.LastMD5; |
187 |
} |
188 |
|
189 |
|
190 |
tuplesByPath[state.FilePath] = hashTuple; |
191 |
} |
192 |
} |
193 |
//for files that don't have state |
194 |
var statelessTuples = tuplesByPath.Values.Where(t => t.FileState == null).ToArray(); |
195 |
//If there are too many stateless tuples, update them in parallel |
196 |
if (statelessTuples.Length > 20) |
197 |
Parallel.ForEach(statelessTuples, UpdateHashes); |
198 |
else |
199 |
statelessTuples.ApplyAction(UpdateHashes); |
200 |
} |
201 |
|
202 |
|
203 |
/// <summary> |
204 |
/// Update the tuple with the file's hashes, avoiding calculation if the file is unchanged |
205 |
/// </summary> |
206 |
/// <param name="hashTuple"></param> |
207 |
/// <remarks> |
208 |
/// The function first checks the file's size and last write date to see if there are any changes. If there are none, |
209 |
/// the file's stored hashes are used. |
210 |
/// Otherwise, MD5 is calculated first to ensure there are no changes. If MD5 is different, the Merkle hash is calculated |
211 |
/// </remarks> |
212 |
private void UpdateHashes(StateTuple hashTuple) |
213 |
{ |
214 |
|
215 |
try |
216 |
{ |
217 |
var state = hashTuple.NullSafe(s => s.FileState); |
218 |
var storedHash = state.NullSafe(s => s.Checksum); |
219 |
var storedHashes = state.NullSafe(s => s.Hashes); |
220 |
//var storedMD5 = state.NullSafe(s => s.LastMD5); |
221 |
var storedDate = state.NullSafe(s => s.LastWriteDate) ?? DateTime.MinValue; |
222 |
var storedLength = state.NullSafe(s => s.LastLength); |
223 |
|
224 |
//var md5Hash = Signature.MD5_EMPTY; |
225 |
var merkle = TreeHash.Empty; |
226 |
|
227 |
if (hashTuple.FileInfo is FileInfo) |
228 |
{ |
229 |
var file = (FileInfo)hashTuple.FileInfo.WithProperCapitalization(); |
230 |
|
231 |
//Attributes unchanged? |
232 |
//LastWriteTime is only accurate to the second |
233 |
var unchangedAttributes = file.LastWriteTime - storedDate < TimeSpan.FromSeconds(1) |
234 |
&& storedLength == file.Length; |
235 |
|
236 |
//Attributes appear unchanged but the file length doesn't match the stored hash ? |
237 |
var nonEmptyMismatch = unchangedAttributes && |
238 |
(file.Length == 0 ^ storedHash == Signature.MERKLE_EMPTY); |
239 |
|
240 |
//Missing hashes for NON-EMPTY hash ? |
241 |
var missingHashes = storedHash != Signature.MERKLE_EMPTY && |
242 |
String.IsNullOrWhiteSpace(storedHashes); |
243 |
|
244 |
//Unchanged attributes but changed MD5 |
245 |
//Short-circuiting ensures MD5 is computed only if the attributes are changed |
246 |
|
247 |
//var md5Mismatch = (!unchangedAttributes && file.ComputeShortHash(StatusNotification) != storedMD5); |
248 |
|
249 |
|
250 |
//If the attributes are unchanged but the Merkle doesn't match the size, |
251 |
//or the attributes and the MD5 hash have changed, |
252 |
//or the hashes are missing but the tophash is NOT empty, we need to recalculate |
253 |
// |
254 |
//Otherwise we load the hashes from state |
255 |
if (!unchangedAttributes || nonEmptyMismatch || missingHashes) |
256 |
merkle = RecalculateTreehash(file); |
257 |
else |
258 |
{ |
259 |
merkle = TreeHash.Parse(hashTuple.FileState.Hashes); |
260 |
//merkle.MD5 = storedMD5; |
261 |
} |
262 |
|
263 |
|
264 |
//md5Hash = merkle.MD5; |
265 |
} |
266 |
//hashTuple.MD5 = md5Hash; |
267 |
//Setting Merkle also updates C |
268 |
hashTuple.Merkle = merkle; |
269 |
} |
270 |
catch (IOException) |
271 |
{ |
272 |
hashTuple.Locked = true; |
273 |
} |
274 |
} |
275 |
|
276 |
/// <summary> |
277 |
/// Recalculate a file's treehash and md5 and update the database |
278 |
/// </summary> |
279 |
/// <param name="file"></param> |
280 |
/// <returns></returns> |
281 |
private TreeHash RecalculateTreehash(FileInfo file) |
282 |
{ |
283 |
var progress = new Progress<HashProgress>(d => _statusNotification.Notify( |
284 |
new StatusNotification(String.Format("Hashing {0:p} of {1}", d.Percentage, file.Name)))); |
285 |
var merkle = Signature.CalculateTreeHash(file, _statusKeeper.BlockSize, _statusKeeper.BlockHash, |
286 |
_settings.HashingParallelism, _token, progress); |
287 |
|
288 |
_statusKeeper.UpdateFileHashes(file, merkle); |
289 |
return merkle; |
290 |
} |
291 |
|
292 |
private void MergeCloudFiles(IEnumerable<Tuple<string, ObjectInfo>> infos, Dictionary<string, StateTuple> tuplesByPath) |
293 |
{ |
294 |
var tuplesById = tuplesByPath.Values |
295 |
.Where(tuple => tuple.FileState != null && tuple.FileState.ObjectID != null) |
296 |
.ToDictionary(tuple => tuple.FileState.ObjectID, tuple => tuple); //new Dictionary<Guid, StateTuple>(); |
297 |
|
298 |
foreach (var info in infos) |
299 |
{ |
300 |
StateTuple hashTuple; |
301 |
var filePath = info.Item1; |
302 |
var objectInfo = info.Item2; |
303 |
var objectId = objectInfo.UUID; |
304 |
|
305 |
if (objectId != _emptyGuid && tuplesById.TryGetValue(objectId, out hashTuple)) |
306 |
{ |
307 |
//This will handle renamed objects |
308 |
hashTuple.ObjectInfo = objectInfo; |
309 |
} |
310 |
else if (tuplesByPath.TryGetValue(filePath, out hashTuple)) |
311 |
{ |
312 |
hashTuple.ObjectInfo = objectInfo; |
313 |
} |
314 |
else |
315 |
{ |
316 |
var fsInfo = FileInfoExtensions.FromPath(filePath); |
317 |
hashTuple = new StateTuple { FileInfo = fsInfo, ObjectInfo = objectInfo }; |
318 |
tuplesByPath[filePath] = hashTuple; |
319 |
|
320 |
if (objectInfo.UUID != _emptyGuid) |
321 |
tuplesById[objectInfo.UUID] = hashTuple; |
322 |
} |
323 |
} |
324 |
} |
325 |
|
326 |
|
327 |
} |
328 |
} |