Synch seems OK. Identified problem with poll differencer
[pithos-ms-client] / trunk / Pithos.Core / Agents / PollAgent.cs
1 #region\r
2 /* -----------------------------------------------------------------------\r
3  * <copyright file="PollAgent.cs" company="GRNet">\r
4  * \r
5  * Copyright 2011-2012 GRNET S.A. All rights reserved.\r
6  *\r
7  * Redistribution and use in source and binary forms, with or\r
8  * without modification, are permitted provided that the following\r
9  * conditions are met:\r
10  *\r
11  *   1. Redistributions of source code must retain the above\r
12  *      copyright notice, this list of conditions and the following\r
13  *      disclaimer.\r
14  *\r
15  *   2. Redistributions in binary form must reproduce the above\r
16  *      copyright notice, this list of conditions and the following\r
17  *      disclaimer in the documentation and/or other materials\r
18  *      provided with the distribution.\r
19  *\r
20  *\r
21  * THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS\r
22  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\r
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\r
24  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR\r
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\r
28  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\r
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\r
31  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\r
32  * POSSIBILITY OF SUCH DAMAGE.\r
33  *\r
34  * The views and conclusions contained in the software and\r
35  * documentation are those of the authors and should not be\r
36  * interpreted as representing official policies, either expressed\r
37  * or implied, of GRNET S.A.\r
38  * </copyright>\r
39  * -----------------------------------------------------------------------\r
40  */\r
41 #endregion\r
42 \r
43 using System.Collections.Concurrent;\r
44 using System.ComponentModel.Composition;\r
45 using System.Diagnostics;\r
46 using System.Diagnostics.Contracts;\r
47 using System.IO;\r
48 using System.Threading;\r
49 using System.Threading.Tasks;\r
50 using System.Threading.Tasks.Dataflow;\r
51 using Castle.ActiveRecord;\r
52 using Pithos.Interfaces;\r
53 using Pithos.Network;\r
54 using log4net;\r
55 \r
56 namespace Pithos.Core.Agents\r
57 {\r
58     using System;\r
59     using System.Collections.Generic;\r
60     using System.Linq;\r
61     using System.Text;\r
62 \r
63     /// <summary>\r
64     /// PollAgent periodically polls the server to detect object changes. The agent retrieves a listing of all\r
65     /// objects and compares it with a previously cached version to detect differences. \r
66     /// New files are downloaded, missing files are deleted from the local file system and common files are compared\r
67     /// to determine the appropriate action\r
68     /// </summary>\r
69     [Export]\r
70     public class PollAgent\r
71     {\r
72         private static readonly ILog Log = LogManager.GetLogger("PollAgent");\r
73 \r
74         [System.ComponentModel.Composition.Import]\r
75         public IStatusKeeper StatusKeeper { get; set; }\r
76 \r
77         [System.ComponentModel.Composition.Import]\r
78         public IPithosSettings Settings { get; set; }\r
79 \r
80         [System.ComponentModel.Composition.Import]\r
81         public NetworkAgent NetworkAgent { get; set; }\r
82 \r
83         public IStatusNotification StatusNotification { get; set; }\r
84 \r
85         private bool _firstPoll = true;\r
86 \r
87         //The Sync Event signals a manual synchronisation\r
88         private readonly AsyncManualResetEvent _syncEvent = new AsyncManualResetEvent();\r
89 \r
90         private ConcurrentDictionary<string, DateTime> _lastSeen = new ConcurrentDictionary<string, DateTime>();\r
91         private readonly ConcurrentBag<AccountInfo> _accounts = new ConcurrentBag<AccountInfo>();\r
92 \r
93 \r
94         /// <summary>\r
95         /// Start a manual synchronization\r
96         /// </summary>\r
97         public void SynchNow()\r
98         {            \r
99             _syncEvent.Set();\r
100         }\r
101 \r
102         /// <summary>\r
103         /// Remote files are polled periodically. Any changes are processed\r
104         /// </summary>\r
105         /// <param name="since"></param>\r
106         /// <returns></returns>\r
107         public async Task PollRemoteFiles(DateTime? since = null)\r
108         {\r
109             Debug.Assert(Thread.CurrentThread.IsBackground, "Polling Ended up in the main thread!");\r
110 \r
111             UpdateStatus(PithosStatus.Syncing);\r
112             StatusNotification.Notify(new PollNotification());\r
113 \r
114             using (log4net.ThreadContext.Stacks["Retrieve Remote"].Push("All accounts"))\r
115             {\r
116                 //If this poll fails, we will retry with the same since value\r
117                 var nextSince = since;\r
118                 try\r
119                 {\r
120                     //Next time we will check for all changes since the current check minus 1 second\r
121                     //This is done to ensure there are no discrepancies due to clock differences\r
122                     var current = DateTime.Now.AddSeconds(-1);\r
123 \r
124                     var tasks = from accountInfo in _accounts\r
125                                 select ProcessAccountFiles(accountInfo, since);\r
126 \r
127                     await TaskEx.WhenAll(tasks.ToList());\r
128 \r
129                     _firstPoll = false;\r
130                     //Reschedule the poll with the current timestamp as a "since" value\r
131                     nextSince = current;\r
132                 }\r
133                 catch (Exception ex)\r
134                 {\r
135                     Log.ErrorFormat("Error while processing accounts\r\n{0}", ex);\r
136                     //In case of failure retry with the same "since" value\r
137                 }\r
138 \r
139                 UpdateStatus(PithosStatus.InSynch);\r
140                 //The multiple try blocks are required because we can't have an await call\r
141                 //inside a finally block\r
142                 //TODO: Find a more elegant solution for reschedulling in the event of an exception\r
143                 try\r
144                 {\r
145                     //Wait for the polling interval to pass or the Sync event to be signalled\r
146                     nextSince = await WaitForScheduledOrManualPoll(nextSince);\r
147                 }\r
148                 finally\r
149                 {\r
150                     //Ensure polling is scheduled even in case of error\r
151                     TaskEx.Run(() => PollRemoteFiles(nextSince));                        \r
152                 }\r
153             }\r
154         }\r
155 \r
156         /// <summary>\r
157         /// Wait for the polling period to expire or a manual sync request\r
158         /// </summary>\r
159         /// <param name="since"></param>\r
160         /// <returns></returns>\r
161         private async Task<DateTime?> WaitForScheduledOrManualPoll(DateTime? since)\r
162         {\r
163             var sync = _syncEvent.WaitAsync();\r
164             var wait = TaskEx.Delay(TimeSpan.FromSeconds(Settings.PollingInterval), NetworkAgent.CancellationToken);\r
165             var signaledTask = await TaskEx.WhenAny(sync, wait);\r
166 \r
167             //Wait for network processing to finish before polling\r
168             var pauseTask=NetworkAgent.ProceedEvent.WaitAsync();\r
169             await TaskEx.WhenAll(signaledTask, pauseTask);\r
170 \r
171             //If polling is signalled by SynchNow, ignore the since tag\r
172             if (sync.IsCompleted)\r
173             {\r
174                 //TODO: Must convert to AutoReset\r
175                 _syncEvent.Reset();\r
176                 return null;\r
177             }\r
178             return since;\r
179         }\r
180 \r
181         public async Task ProcessAccountFiles(AccountInfo accountInfo, DateTime? since = null)\r
182         {\r
183             if (accountInfo == null)\r
184                 throw new ArgumentNullException("accountInfo");\r
185             if (String.IsNullOrWhiteSpace(accountInfo.AccountPath))\r
186                 throw new ArgumentException("The AccountInfo.AccountPath is empty", "accountInfo");\r
187             Contract.EndContractBlock();\r
188 \r
189 \r
190             using (log4net.ThreadContext.Stacks["Retrieve Remote"].Push(accountInfo.UserName))\r
191             {\r
192                 await NetworkAgent.GetDeleteAwaiter();\r
193 \r
194                 Log.Info("Scheduled");\r
195                 var client = new CloudFilesClient(accountInfo);\r
196 \r
197                 var containers = client.ListContainers(accountInfo.UserName);\r
198 \r
199 \r
200                 CreateContainerFolders(accountInfo, containers);\r
201 \r
202                 try\r
203                 {\r
204                     //Wait for any deletions to finish\r
205                     await NetworkAgent.GetDeleteAwaiter();\r
206                     //Get the poll time now. We may miss some deletions but it's better to keep a file that was deleted\r
207                     //than delete a file that was created while we were executing the poll                    \r
208                     var pollTime = DateTime.Now;\r
209 \r
210                     //Get the list of server objects changed since the last check\r
211                     //The name of the container is passed as state in order to create a dictionary of tasks in a subsequent step\r
212                     var listObjects = (from container in containers\r
213                                        select Task<IList<ObjectInfo>>.Factory.StartNew(_ =>\r
214                                              client.ListObjects(accountInfo.UserName, container.Name, since), container.Name)).ToList();\r
215                     //BUG: Can't detect difference between no changes or no objects\r
216                     //ListObjects returns nothing if there are no changes since the last check time (since value)                    \r
217                     //TODO: Must detect the difference between no server objects an\r
218                     var listShared = Task<IList<ObjectInfo>>.Factory.StartNew(_ => client.ListSharedObjects(since), "shared");\r
219                     listObjects.Add(listShared);\r
220                     var listTasks = await Task.Factory.WhenAll(listObjects.ToArray());\r
221 \r
222                     using (log4net.ThreadContext.Stacks["SCHEDULE"].Push("Process Results"))\r
223                     {\r
224                         var dict = listTasks.ToDictionary(t => t.AsyncState);\r
225 \r
226                         //Get all non-trash objects. Remember, the container name is stored in AsyncState\r
227                         var remoteObjects = from objectList in listTasks\r
228                                             where (string)objectList.AsyncState != "trash"\r
229                                             from obj in objectList.Result\r
230                                             select obj;\r
231 \r
232                         var trashObjects = dict["trash"].Result;\r
233                         var sharedObjects = dict["shared"].Result;\r
234 \r
235                         //DON'T process trashed files\r
236                         //If some files are deleted and added again to a folder, they will be deleted\r
237                         //even though they are new.\r
238                         //We would have to check file dates and hashes to ensure that a trashed file\r
239                         //can be deleted safely from the local hard drive.\r
240                         /*\r
241                         //Items with the same name, hash may be both in the container and the trash\r
242                         //Don't delete items that exist in the container\r
243                         var realTrash = from trash in trashObjects\r
244                                         where\r
245                                             !remoteObjects.Any(\r
246                                                 info => info.Name == trash.Name && info.Hash == trash.Hash)\r
247                                         select trash;\r
248                         ProcessTrashedFiles(accountInfo, realTrash);\r
249 */\r
250 \r
251                         var cleanRemotes = (from info in remoteObjects.Union(sharedObjects)\r
252                                             let name = info.Name\r
253                                             where !name.EndsWith(".ignore", StringComparison.InvariantCultureIgnoreCase) &&\r
254                                                   !name.StartsWith(FolderConstants.CacheFolder + "/",\r
255                                                                    StringComparison.InvariantCultureIgnoreCase)\r
256                                             select info).ToList();\r
257 \r
258                         var differencer = _differencer.PostSnapshot(accountInfo, cleanRemotes);\r
259 \r
260                         ProcessDeletedFiles(accountInfo, differencer.Deleted.FilterDirectlyBelow(SelectiveUris), pollTime);\r
261 \r
262                         // @@@ NEED To add previous state here as well, To compare with previous hash\r
263 \r
264                         \r
265 \r
266                         //Create a list of actions from the remote files\r
267                         var allActions = ChangesToActions(accountInfo, differencer.Changed.FilterDirectlyBelow(SelectiveUris))\r
268                                         .Union(\r
269                                         CreatesToActions(accountInfo, differencer.Created.FilterDirectlyBelow(SelectiveUris)));\r
270 \r
271                         //And remove those that are already being processed by the agent\r
272                         var distinctActions = allActions\r
273                             .Except(NetworkAgent.GetEnumerable(), new PithosMonitor.LocalFileComparer())\r
274                             .ToList();\r
275 \r
276                         //Queue all the actions\r
277                         foreach (var message in distinctActions)\r
278                         {\r
279                             NetworkAgent.Post(message);\r
280                         }\r
281 \r
282                         Log.Info("[LISTENER] End Processing");\r
283                     }\r
284                 }\r
285                 catch (Exception ex)\r
286                 {\r
287                     Log.ErrorFormat("[FAIL] ListObjects for{0} in ProcessRemoteFiles with {1}", accountInfo.UserName, ex);\r
288                     return;\r
289                 }\r
290 \r
291                 Log.Info("[LISTENER] Finished");\r
292 \r
293             }\r
294         }\r
295 \r
296         AccountsDifferencer _differencer = new AccountsDifferencer();\r
297         private List<Uri> _selectiveUris=new List<Uri>();\r
298 \r
299         /// <summary>\r
300         /// Deletes local files that are not found in the list of cloud files\r
301         /// </summary>\r
302         /// <param name="accountInfo"></param>\r
303         /// <param name="cloudFiles"></param>\r
304         /// <param name="pollTime"></param>\r
305         private void ProcessDeletedFiles(AccountInfo accountInfo, IEnumerable<ObjectInfo> cloudFiles, DateTime pollTime)\r
306         {\r
307             if (accountInfo == null)\r
308                 throw new ArgumentNullException("accountInfo");\r
309             if (String.IsNullOrWhiteSpace(accountInfo.AccountPath))\r
310                 throw new ArgumentException("The AccountInfo.AccountPath is empty", "accountInfo");\r
311             if (cloudFiles == null)\r
312                 throw new ArgumentNullException("cloudFiles");\r
313             Contract.EndContractBlock();\r
314 \r
315             //On the first run\r
316             if (_firstPoll)\r
317             {\r
318                 //Only consider files that are not being modified, ie they are in the Unchanged state            \r
319                 var deleteCandidates = FileState.Queryable.Where(state =>\r
320                     state.FilePath.StartsWith(accountInfo.AccountPath)\r
321                     && state.FileStatus == FileStatus.Unchanged).ToList();\r
322 \r
323 \r
324                 //TODO: filesToDelete must take into account the Others container            \r
325                 var filesToDelete = (from deleteCandidate in deleteCandidates\r
326                                      let localFile = FileInfoExtensions.FromPath(deleteCandidate.FilePath)\r
327                                      let relativeFilePath = localFile.AsRelativeTo(accountInfo.AccountPath)\r
328                                      where\r
329                                          !cloudFiles.Any(r => r.RelativeUrlToFilePath(accountInfo.UserName) == relativeFilePath)\r
330                                      select localFile).ToList();\r
331 \r
332 \r
333 \r
334                 //Set the status of missing files to Conflict\r
335                 foreach (var item in filesToDelete)\r
336                 {\r
337                     //Try to acquire a gate on the file, to take into account files that have been dequeued\r
338                     //and are being processed\r
339                     using (var gate = NetworkGate.Acquire(item.FullName, NetworkOperation.Deleting))\r
340                     {\r
341                         if (gate.Failed)\r
342                             continue;\r
343                         StatusKeeper.SetFileState(item.FullName, FileStatus.Conflict, FileOverlayStatus.Deleted);\r
344                     }\r
345                 }\r
346                 UpdateStatus(PithosStatus.HasConflicts);\r
347                 StatusNotification.NotifyConflicts(filesToDelete, String.Format("{0} local files are missing from Pithos, possibly because they were deleted", filesToDelete.Count));\r
348                 StatusNotification.NotifyForFiles(filesToDelete, String.Format("{0} files were deleted", filesToDelete.Count), TraceLevel.Info);\r
349             }\r
350             else\r
351             {\r
352                 var deletedFiles = new List<FileSystemInfo>();\r
353                 foreach (var objectInfo in cloudFiles)\r
354                 {\r
355                     var relativePath = objectInfo.RelativeUrlToFilePath(accountInfo.UserName);\r
356                     var item = FileAgent.GetFileAgent(accountInfo).GetFileSystemInfo(relativePath);\r
357                     if (item.Exists)\r
358                     {\r
359                         if ((item.Attributes & FileAttributes.ReadOnly) == FileAttributes.ReadOnly)\r
360                         {\r
361                             item.Attributes = item.Attributes & ~FileAttributes.ReadOnly;\r
362 \r
363                         }\r
364                         item.Delete();\r
365                         DateTime lastDate;\r
366                         _lastSeen.TryRemove(item.FullName, out lastDate);\r
367                         deletedFiles.Add(item);\r
368                     }\r
369                     StatusKeeper.SetFileState(item.FullName, FileStatus.Deleted, FileOverlayStatus.Deleted);\r
370                 }\r
371                 StatusNotification.NotifyForFiles(deletedFiles, String.Format("{0} files were deleted", deletedFiles.Count), TraceLevel.Info);\r
372             }\r
373 \r
374         }\r
375 \r
376         //Creates an appropriate action for each server file\r
377         private IEnumerable<CloudAction> ChangesToActions(AccountInfo accountInfo, IEnumerable<ObjectInfo> changes)\r
378         {\r
379             if (changes == null)\r
380                 throw new ArgumentNullException();\r
381             Contract.EndContractBlock();\r
382             var fileAgent = FileAgent.GetFileAgent(accountInfo);\r
383 \r
384             //In order to avoid multiple iterations over the files, we iterate only once\r
385             //over the remote files\r
386             foreach (var objectInfo in changes)\r
387             {\r
388                 var relativePath = objectInfo.RelativeUrlToFilePath(accountInfo.UserName);\r
389                 //and remove any matching objects from the list, adding them to the commonObjects list\r
390                 if (fileAgent.Exists(relativePath))\r
391                 {\r
392                     //If a directory object already exists, we don't need to perform any other action                    \r
393                     var localFile = fileAgent.GetFileSystemInfo(relativePath);\r
394                     if (objectInfo.Content_Type == @"application/directory" && localFile is DirectoryInfo)\r
395                         continue;\r
396                     using (new SessionScope(FlushAction.Never))\r
397                     {\r
398                         var state = StatusKeeper.GetStateByFilePath(localFile.FullName);\r
399                         _lastSeen[localFile.FullName] = DateTime.Now;\r
400                         //Common files should be checked on a per-case basis to detect differences, which is newer\r
401 \r
402                         yield return new CloudAction(accountInfo, CloudActionType.MustSynch,\r
403                                                      localFile, objectInfo, state, accountInfo.BlockSize,\r
404                                                      accountInfo.BlockHash);\r
405                     }\r
406                 }\r
407                 else\r
408                 {\r
409                     //Remote files should be downloaded\r
410                     yield return new CloudDownloadAction(accountInfo, objectInfo);\r
411                 }\r
412             }\r
413         }\r
414 \r
415         private IEnumerable<CloudAction> CreatesToActions(AccountInfo accountInfo, IEnumerable<ObjectInfo> creates)\r
416         {\r
417             if (creates == null)\r
418                 throw new ArgumentNullException();\r
419             Contract.EndContractBlock();\r
420             var fileAgent = FileAgent.GetFileAgent(accountInfo);\r
421 \r
422             //In order to avoid multiple iterations over the files, we iterate only once\r
423             //over the remote files\r
424             foreach (var objectInfo in creates)\r
425             {\r
426                 var relativePath = objectInfo.RelativeUrlToFilePath(accountInfo.UserName);\r
427                 //and remove any matching objects from the list, adding them to the commonObjects list\r
428                 if (fileAgent.Exists(relativePath))\r
429                 {\r
430                     //If the object already exists, we probably have a conflict\r
431                     //If a directory object already exists, we don't need to perform any other action                    \r
432                     var localFile = fileAgent.GetFileSystemInfo(relativePath);\r
433                     StatusKeeper.SetFileState(localFile.FullName, FileStatus.Conflict, FileOverlayStatus.Conflict);\r
434                 }\r
435                 else\r
436                 {\r
437                     //Remote files should be downloaded\r
438                     yield return new CloudDownloadAction(accountInfo, objectInfo);\r
439                 }\r
440             }\r
441         }\r
442 \r
443         private void ProcessTrashedFiles(AccountInfo accountInfo, IEnumerable<ObjectInfo> trashObjects)\r
444         {\r
445             var fileAgent = FileAgent.GetFileAgent(accountInfo);\r
446             foreach (var trashObject in trashObjects)\r
447             {\r
448                 var barePath = trashObject.RelativeUrlToFilePath(accountInfo.UserName);\r
449                 //HACK: Assume only the "pithos" container is used. Must find out what happens when\r
450                 //deleting a file from a different container\r
451                 var relativePath = Path.Combine("pithos", barePath);\r
452                 fileAgent.Delete(relativePath);\r
453             }\r
454         }\r
455 \r
456         /// <summary>\r
457         /// Notify the UI to update the visual status\r
458         /// </summary>\r
459         /// <param name="status"></param>\r
460         private void UpdateStatus(PithosStatus status)\r
461         {\r
462             try\r
463             {\r
464                 StatusKeeper.SetPithosStatus(status);\r
465                 StatusNotification.Notify(new Notification());\r
466             }\r
467             catch (Exception exc)\r
468             {\r
469                 //Failure is not critical, just log it\r
470                 Log.Warn("Error while updating status", exc);\r
471             }\r
472         }\r
473 \r
474         private static void CreateContainerFolders(AccountInfo accountInfo, IEnumerable<ContainerInfo> containers)\r
475         {\r
476             var containerPaths = from container in containers\r
477                                  let containerPath = Path.Combine(accountInfo.AccountPath, container.Name)\r
478                                  where container.Name != FolderConstants.TrashContainer && !Directory.Exists(containerPath)\r
479                                  select containerPath;\r
480 \r
481             foreach (var path in containerPaths)\r
482             {\r
483                 Directory.CreateDirectory(path);\r
484             }\r
485         }\r
486 \r
487         public void SetSyncUris(Uri[] uris)\r
488         {            \r
489             SelectiveUris=uris.ToList();\r
490         }\r
491 \r
492         protected List<Uri> SelectiveUris\r
493         {\r
494             get { return _selectiveUris;}\r
495             set { _selectiveUris = value; }\r
496         }\r
497 \r
498         public void AddAccount(AccountInfo accountInfo)\r
499         {\r
500             if (!_accounts.Contains(accountInfo))\r
501                 _accounts.Add(accountInfo);\r
502         }\r
503     }\r
504 }\r