Statistics
| Branch: | Revision:

root / trunk / Pithos.Core / Agents / TupleBuilder.cs @ 23877270

History | View | Annotate | Download (15 kB)

1
using System;
2
using System.Collections.Concurrent;
3
using System.Collections.Generic;
4
using System.Diagnostics;
5
using System.IO;
6
using System.Linq;
7
using System.Reflection;
8
using System.Threading;
9
using System.Threading.Tasks;
10
using Pithos.Interfaces;
11
using Pithos.Network;
12
using log4net;
13

    
14
namespace Pithos.Core.Agents
15
{
16
    /// <summary>
17
    /// Creates a set of state tuples from the local files, cloud files and state database entries
18
    /// </summary>
19
    class TupleBuilder
20
    {
21
        private static readonly ILog Log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
22

    
23
        private readonly string _emptyGuid = Guid.Empty.ToString();
24

    
25
        private readonly CancellationToken _token;
26

    
27
        private readonly IStatusKeeper _statusKeeper;
28
        private readonly IStatusNotification _statusNotification;
29
        private readonly IPithosSettings _settings;
30

    
31
        public TupleBuilder(CancellationToken token, IStatusKeeper statusKeeper, IStatusNotification statusNotification, IPithosSettings settings)
32
        {
33
            _token = token;
34
            _statusKeeper = statusKeeper;
35
            _statusNotification = statusNotification;
36
            _settings = settings;
37
        }
38

    
39
        /// <summary>
40
        /// Creates a list of Sync tuples by merging local and cloud files with the locally stored state
41
        /// </summary>
42
        /// <param name="infos"></param>
43
        /// <param name="files"></param>
44
        /// <param name="states"></param>
45
        /// <param name="moves"></param>
46
        /// <returns></returns>
47
        public IEnumerable<StateTuple> MergeSources(IEnumerable<Tuple<string, ObjectInfo>> infos, IEnumerable<FileSystemInfo> files, List<FileState> states, ConcurrentDictionary<string, MovedEventArgs> moves)
48
        {
49
            var tuplesByPath = new Dictionary<string, StateTuple>();
50
            //Fill the tuples with the local files
51
            CreateTuplesFromFiles(files, states, moves, tuplesByPath);
52
            
53
            //Merge the file tuples with the local states, creating new tuples for states that have no matching files
54
            MergeLocalStates(states, moves, tuplesByPath);
55

    
56
            MergeCloudFiles(infos, tuplesByPath);
57

    
58
            DetectLocalMoves(tuplesByPath);
59

    
60
            var tuples = tuplesByPath.Values;
61
            //Sync algorithm fallout: There are multiple ways we can reach a situation where a state without a checksum exists
62
            //1: The application stopped/crashed while downloading a file. The file's entry was created when the download started. When the application restarts,
63
            //  it finds no local file, a server file and a null state -> C: NULL L: NULL but exists, S: Some
64
            //  It can be fixed by NOT creating a local state if the file doesn't already exist, or adding extra info to mark this as a result of an upload
65
            //2: A new file is added but the app stops/crashes after uploading finishes but before the entry gets updated and the user deletes the file. The file's entry was created. When the app restarts,
66
            //  it finds no local file, a server file and a null state -> C: NULL L: NULL but exists, S: Some
67
            //  
68

    
69
            var brokenTuples =( from tuple in tuples
70
                               where tuple.FileState != null && tuple.FileState.Checksum == null
71
                                    && tuple.ObjectInfo != null && (tuple.FileInfo == null || !tuple.FileInfo.Exists)
72
                               select tuple).ToList();
73

    
74

    
75
            var actualTuples = tuples.Except(brokenTuples).ToList();
76
            Debug.Assert(actualTuples.All(t => t.HashesValid()));
77

    
78
            foreach (var tuple in brokenTuples)
79
            {
80
                _statusKeeper.SetFileState(tuple.FileState.FilePath,
81
                    FileStatus.Conflict, FileOverlayStatus.Conflict, "FileState without checksum encountered for server object missing from disk");
82
            }
83

    
84
            return actualTuples;
85
        }
86

    
87

    
88
        
89
        /// <summary>
90
        /// 
91
        /// </summary>
92
        /// <param name="tuplesByPath"></param>
93
        /// <remarks>
94
        /// Local moves will appear as two tuples with the same hashes in different locations:
95
        /// - The FROM tuple will have a NULL C, and a filled L and S value
96
        /// - The TO tuple will only have a C hash value equal to FROM's and empty L,S
97
        /// The FROM tuple should be removed and the TO tuple should be updated to reflect that it's a MOVE operation.
98
        /// This should happen only if there are EXACTLY two tuples with the same hash
99
        /// </remarks>
100
        private void DetectLocalMoves(Dictionary<string, StateTuple> tuplesByPath)
101
        {
102
            //Newly created fiels are candidate TOs
103
            var creates = tuplesByPath.Values.Where(t=> t.FileInfo is FileInfo && t.C!=null && t.L == null && t.S == null);
104
            //Newly deleted files are candidate FROMs
105
            var deletes = tuplesByPath.Values.Where(t =>t.NullSafe(t1=>t1.FileState).NullSafe(s=>!s.IsFolder) &&  t.C == null && t.L != null && t.L==t.S);
106

    
107
            var moves = (from tuple in creates
108
                        let froms = deletes.Where(d => d.L == tuple.C)
109
                        where froms.Count() == 1
110
                        select new {To = tuple, From = froms.Single()}).ToList();
111

    
112
            if (!moves.Any())
113
                return;
114

    
115
            foreach (var move in moves)
116
            {
117
                tuplesByPath.Remove(move.From.FilePath);
118
                move.To.OldFullPath = move.From.FilePath;
119
                move.To.NewFullPath = move.To.FilePath;
120
                move.To.OldChecksum = move.From.L;
121
                move.To.FileState = move.From.FileState;
122
                move.To.ObjectInfo = move.From.ObjectInfo;
123
            }
124

    
125
        }
126

    
127
        private static void CreateTuplesFromFiles(IEnumerable<FileSystemInfo> files, List<FileState> states, ConcurrentDictionary<string, MovedEventArgs> moves, Dictionary<string, StateTuple> tuplesByPath)
128
        {
129
            foreach (var info in files)
130
            {
131
                var tuple = new StateTuple(info);
132
                //Is this the target of a move event?
133
                var moveArg =
134
                    moves.Values.FirstOrDefault(
135
                        arg => info.FullName.Equals(arg.FullPath, StringComparison.InvariantCultureIgnoreCase)
136
                               || info.FullName.IsAtOrBelow(arg.FullPath));
137
                if (moveArg != null)
138
                {
139
                    tuple.NewFullPath = info.FullName;
140
                    var relativePath = info.AsRelativeTo(moveArg.FullPath);
141
                    tuple.OldFullPath = Path.Combine(moveArg.OldFullPath, relativePath);
142
                    tuple.OldChecksum = states.FirstOrDefault(
143
                        st => st.FilePath.Equals(tuple.OldFullPath, StringComparison.InvariantCultureIgnoreCase))
144
                        .NullSafe(st => st.Checksum);
145
                }
146

    
147
                tuplesByPath[tuple.FilePath] = tuple;
148
            }
149
        }
150

    
151
        private void MergeLocalStates(IEnumerable<FileState> states, ConcurrentDictionary<string, MovedEventArgs> moves, Dictionary<string, StateTuple> tuplesByPath)
152
        {
153
            //For files that have state
154
            foreach (var state in states)
155
            {
156
                StateTuple hashTuple;
157

    
158

    
159
                if (tuplesByPath.TryGetValue(state.FilePath, out hashTuple))
160
                {
161
                    hashTuple.FileState = state;
162
                    UpdateHashes(hashTuple);
163
                }
164
                else if (moves.ContainsKey(state.FilePath) &&
165
                         tuplesByPath.TryGetValue(moves[state.FilePath].FullPath, out hashTuple))
166
                {
167
                    hashTuple.FileState = state;
168
                    UpdateHashes(hashTuple);
169
                }
170
                else
171
                {
172
                    var fsInfo = FileInfoExtensions.FromPath(state.FilePath);
173
                    hashTuple = new StateTuple { FileInfo = fsInfo, FileState = state };
174

    
175
                    //Is the source of a moved item?
176
                    var moveArg =
177
                        moves.Values.FirstOrDefault(
178
                            arg => state.FilePath.Equals(arg.OldFullPath, StringComparison.InvariantCultureIgnoreCase)
179
                                   || state.FilePath.IsAtOrBelow(arg.OldFullPath));
180
                    if (moveArg != null)
181
                    {
182
                        var relativePath = state.FilePath.AsRelativeTo(moveArg.OldFullPath);
183
                        hashTuple.NewFullPath = Path.Combine(moveArg.FullPath, relativePath);
184
                        hashTuple.OldFullPath = state.FilePath;
185
                        //Do we have the old MD5?
186
                        //hashTuple.OldMD5 = state.LastMD5;
187
                    }
188

    
189

    
190
                    tuplesByPath[state.FilePath] = hashTuple;
191
                }
192
            }
193
            //for files that don't have state
194
            var statelessTuples = tuplesByPath.Values.Where(t => t.FileState == null).ToArray();
195
            //If there are too many stateless tuples, update them in parallel
196
            if (statelessTuples.Length > 20)
197
                Parallel.ForEach(statelessTuples, UpdateHashes);
198
            else
199
                statelessTuples.ApplyAction(UpdateHashes);
200
        }
201

    
202

    
203
        /// <summary>
204
        /// Update the tuple with the file's hashes, avoiding calculation if the file is unchanged
205
        /// </summary>
206
        /// <param name="hashTuple"></param>
207
        /// <remarks>
208
        /// The function first checks the file's size and last write date to see if there are any changes. If there are none,
209
        /// the file's stored hashes are used.
210
        /// Otherwise, MD5 is calculated first to ensure there are no changes. If MD5 is different, the Merkle hash is calculated
211
        /// </remarks>
212
        private void UpdateHashes(StateTuple hashTuple)
213
        {
214

    
215
            try
216
            {
217
                var state = hashTuple.NullSafe(s => s.FileState);
218
                var storedHash = state.NullSafe(s => s.Checksum);
219
                var storedHashes = state.NullSafe(s => s.Hashes);
220
                //var storedMD5 = state.NullSafe(s => s.LastMD5);
221
                var storedDate = state.NullSafe(s => s.LastWriteDate) ?? DateTime.MinValue;
222
                var storedLength = state.NullSafe(s => s.LastLength);
223

    
224
                //var md5Hash = Signature.MD5_EMPTY;                
225
                var merkle = TreeHash.Empty;
226

    
227
                if (hashTuple.FileInfo is FileInfo)
228
                {
229
                    var file = (FileInfo)hashTuple.FileInfo.WithProperCapitalization();
230

    
231
                    //Attributes unchanged?
232
                    //LastWriteTime is only accurate to the second
233
                    var unchangedAttributes = file.LastWriteTime - storedDate < TimeSpan.FromSeconds(1)
234
                        && storedLength == file.Length;
235

    
236
                    //Attributes appear unchanged but the file length doesn't match the stored hash ?
237
                    var nonEmptyMismatch = unchangedAttributes &&
238
                        (file.Length == 0 ^ storedHash == Signature.MERKLE_EMPTY);
239

    
240
                    //Missing hashes for NON-EMPTY hash ?
241
                    var missingHashes = storedHash != Signature.MERKLE_EMPTY &&
242
                        String.IsNullOrWhiteSpace(storedHashes);
243

    
244
                    //Unchanged attributes but changed MD5 
245
                    //Short-circuiting ensures MD5 is computed only if the attributes are changed
246

    
247
                    //var md5Mismatch = (!unchangedAttributes && file.ComputeShortHash(StatusNotification) != storedMD5);
248

    
249

    
250
                    //If the attributes are unchanged but the Merkle doesn't match the size,
251
                    //or the attributes and the MD5 hash have changed, 
252
                    //or the hashes are missing but the tophash is NOT empty, we need to recalculate
253
                    //
254
                    //Otherwise we load the hashes from state
255
                    if (!unchangedAttributes || nonEmptyMismatch || missingHashes)
256
                        merkle = RecalculateTreehash(file);
257
                    else
258
                    {
259
                        merkle = TreeHash.Parse(hashTuple.FileState.Hashes);
260
                        //merkle.MD5 = storedMD5;
261
                    }
262

    
263

    
264
                    //md5Hash = merkle.MD5;
265
                }
266
                //hashTuple.MD5 = md5Hash;
267
                //Setting Merkle also updates C
268
                hashTuple.Merkle = merkle;
269
            }
270
            catch (IOException)
271
            {
272
                hashTuple.Locked = true;
273
            }
274
        }
275

    
276
        /// <summary>
277
        /// Recalculate a file's treehash and md5 and update the database
278
        /// </summary>
279
        /// <param name="file"></param>
280
        /// <returns></returns>
281
        private TreeHash RecalculateTreehash(FileInfo file)
282
        {
283
            var progress = new Progress<HashProgress>(d => _statusNotification.Notify(
284
                                                    new StatusNotification(String.Format("Hashing {0:p} of {1}", d.Percentage, file.Name))));
285
            var merkle = Signature.CalculateTreeHash(file, _statusKeeper.BlockSize, _statusKeeper.BlockHash,
286
                _settings.HashingParallelism, _token, progress);
287

    
288
            _statusKeeper.UpdateFileHashes(file, merkle);
289
            return merkle;
290
        }
291

    
292
        private void MergeCloudFiles(IEnumerable<Tuple<string, ObjectInfo>> infos, Dictionary<string, StateTuple> tuplesByPath)
293
        {
294
            var tuplesById = tuplesByPath.Values
295
                .Where(tuple => tuple.FileState != null && tuple.FileState.ObjectID != null)
296
                .ToDictionary(tuple => tuple.FileState.ObjectID, tuple => tuple); //new Dictionary<Guid, StateTuple>();
297

    
298
            foreach (var info in infos)
299
            {
300
                StateTuple hashTuple;
301
                var filePath = info.Item1;
302
                var objectInfo = info.Item2;
303
                var objectId = objectInfo.UUID;
304

    
305
                if (objectId != _emptyGuid && tuplesById.TryGetValue(objectId, out hashTuple))
306
                {
307
                    //This will handle renamed objects
308
                    hashTuple.ObjectInfo = objectInfo;
309
                }
310
                else if (tuplesByPath.TryGetValue(filePath, out hashTuple))
311
                {
312
                    hashTuple.ObjectInfo = objectInfo;
313
                }
314
                else
315
                {
316
                    var fsInfo = FileInfoExtensions.FromPath(filePath);
317
                    hashTuple = new StateTuple { FileInfo = fsInfo, ObjectInfo = objectInfo };
318
                    tuplesByPath[filePath] = hashTuple;
319

    
320
                    if (objectInfo.UUID != _emptyGuid)
321
                        tuplesById[objectInfo.UUID] = hashTuple;
322
                }
323
            }
324
        }
325

    
326

    
327
    }
328
}