Modified SnapshotDifferencer to detect NoModificationInfo entries.
[pithos-ms-client] / trunk / Pithos.Core / Agents / PollAgent.cs
1 #region\r
2 /* -----------------------------------------------------------------------\r
3  * <copyright file="PollAgent.cs" company="GRNet">\r
4  * \r
5  * Copyright 2011-2012 GRNET S.A. All rights reserved.\r
6  *\r
7  * Redistribution and use in source and binary forms, with or\r
8  * without modification, are permitted provided that the following\r
9  * conditions are met:\r
10  *\r
11  *   1. Redistributions of source code must retain the above\r
12  *      copyright notice, this list of conditions and the following\r
13  *      disclaimer.\r
14  *\r
15  *   2. Redistributions in binary form must reproduce the above\r
16  *      copyright notice, this list of conditions and the following\r
17  *      disclaimer in the documentation and/or other materials\r
18  *      provided with the distribution.\r
19  *\r
20  *\r
21  * THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS\r
22  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\r
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\r
24  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR\r
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\r
28  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\r
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\r
31  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\r
32  * POSSIBILITY OF SUCH DAMAGE.\r
33  *\r
34  * The views and conclusions contained in the software and\r
35  * documentation are those of the authors and should not be\r
36  * interpreted as representing official policies, either expressed\r
37  * or implied, of GRNET S.A.\r
38  * </copyright>\r
39  * -----------------------------------------------------------------------\r
40  */\r
41 #endregion\r
42 \r
43 using System.Collections.Concurrent;\r
44 using System.ComponentModel.Composition;\r
45 using System.Diagnostics;\r
46 using System.Diagnostics.Contracts;\r
47 using System.IO;\r
48 using System.Threading;\r
49 using System.Threading.Tasks;\r
50 using System.Threading.Tasks.Dataflow;\r
51 using Castle.ActiveRecord;\r
52 using Pithos.Interfaces;\r
53 using Pithos.Network;\r
54 using log4net;\r
55 \r
56 namespace Pithos.Core.Agents\r
57 {\r
58     using System;\r
59     using System.Collections.Generic;\r
60     using System.Linq;\r
61     using System.Text;\r
62 \r
63     /// <summary>\r
64     /// PollAgent periodically polls the server to detect object changes. The agent retrieves a listing of all\r
65     /// objects and compares it with a previously cached version to detect differences. \r
66     /// New files are downloaded, missing files are deleted from the local file system and common files are compared\r
67     /// to determine the appropriate action\r
68     /// </summary>\r
69     [Export]\r
70     public class PollAgent\r
71     {\r
72         private static readonly ILog Log = LogManager.GetLogger("PollAgent");\r
73 \r
74         [System.ComponentModel.Composition.Import]\r
75         public IStatusKeeper StatusKeeper { get; set; }\r
76 \r
77         [System.ComponentModel.Composition.Import]\r
78         public IPithosSettings Settings { get; set; }\r
79 \r
80         [System.ComponentModel.Composition.Import]\r
81         public NetworkAgent NetworkAgent { get; set; }\r
82 \r
83         public IStatusNotification StatusNotification { get; set; }\r
84 \r
85         private bool _firstPoll = true;\r
86 \r
87         //The Sync Event signals a manual synchronisation\r
88         private readonly AsyncManualResetEvent _syncEvent = new AsyncManualResetEvent();\r
89 \r
90         private ConcurrentDictionary<string, DateTime> _lastSeen = new ConcurrentDictionary<string, DateTime>();\r
91         private readonly ConcurrentBag<AccountInfo> _accounts = new ConcurrentBag<AccountInfo>();\r
92 \r
93 \r
94         /// <summary>\r
95         /// Start a manual synchronization\r
96         /// </summary>\r
97         public void SynchNow()\r
98         {            \r
99             _syncEvent.Set();\r
100         }\r
101 \r
102         /// <summary>\r
103         /// Remote files are polled periodically. Any changes are processed\r
104         /// </summary>\r
105         /// <param name="since"></param>\r
106         /// <returns></returns>\r
107         public async Task PollRemoteFiles(DateTime? since = null)\r
108         {\r
109             Debug.Assert(Thread.CurrentThread.IsBackground, "Polling Ended up in the main thread!");\r
110 \r
111             UpdateStatus(PithosStatus.Syncing);\r
112             StatusNotification.Notify(new PollNotification());\r
113 \r
114             using (log4net.ThreadContext.Stacks["Retrieve Remote"].Push("All accounts"))\r
115             {\r
116                 //If this poll fails, we will retry with the same since value\r
117                 var nextSince = since;\r
118                 try\r
119                 {\r
120                     //Next time we will check for all changes since the current check minus 1 second\r
121                     //This is done to ensure there are no discrepancies due to clock differences\r
122                     var current = DateTime.Now.AddSeconds(-1);\r
123 \r
124                     var tasks = from accountInfo in _accounts\r
125                                 select ProcessAccountFiles(accountInfo, since);\r
126 \r
127                     await TaskEx.WhenAll(tasks.ToList());\r
128 \r
129                     _firstPoll = false;\r
130                     //Reschedule the poll with the current timestamp as a "since" value\r
131                     nextSince = current;\r
132                 }\r
133                 catch (Exception ex)\r
134                 {\r
135                     Log.ErrorFormat("Error while processing accounts\r\n{0}", ex);\r
136                     //In case of failure retry with the same "since" value\r
137                 }\r
138 \r
139                 UpdateStatus(PithosStatus.InSynch);\r
140                 //The multiple try blocks are required because we can't have an await call\r
141                 //inside a finally block\r
142                 //TODO: Find a more elegant solution for reschedulling in the event of an exception\r
143                 try\r
144                 {\r
145                     //Wait for the polling interval to pass or the Sync event to be signalled\r
146                     nextSince = await WaitForScheduledOrManualPoll(nextSince);\r
147                 }\r
148                 finally\r
149                 {\r
150                     //Ensure polling is scheduled even in case of error\r
151                     TaskEx.Run(() => PollRemoteFiles(nextSince));                        \r
152                 }\r
153             }\r
154         }\r
155 \r
156         /// <summary>\r
157         /// Wait for the polling period to expire or a manual sync request\r
158         /// </summary>\r
159         /// <param name="since"></param>\r
160         /// <returns></returns>\r
161         private async Task<DateTime?> WaitForScheduledOrManualPoll(DateTime? since)\r
162         {\r
163             var sync = _syncEvent.WaitAsync();\r
164             var wait = TaskEx.Delay(TimeSpan.FromSeconds(Settings.PollingInterval), NetworkAgent.CancellationToken);\r
165             var signaledTask = await TaskEx.WhenAny(sync, wait);\r
166 \r
167             //Wait for network processing to finish before polling\r
168             var pauseTask=NetworkAgent.ProceedEvent.WaitAsync();\r
169             await TaskEx.WhenAll(signaledTask, pauseTask);\r
170 \r
171             //If polling is signalled by SynchNow, ignore the since tag\r
172             if (sync.IsCompleted)\r
173             {\r
174                 //TODO: Must convert to AutoReset\r
175                 _syncEvent.Reset();\r
176                 return null;\r
177             }\r
178             return since;\r
179         }\r
180 \r
181         public async Task ProcessAccountFiles(AccountInfo accountInfo, DateTime? since = null)\r
182         {\r
183             if (accountInfo == null)\r
184                 throw new ArgumentNullException("accountInfo");\r
185             if (String.IsNullOrWhiteSpace(accountInfo.AccountPath))\r
186                 throw new ArgumentException("The AccountInfo.AccountPath is empty", "accountInfo");\r
187             Contract.EndContractBlock();\r
188 \r
189 \r
190             using (log4net.ThreadContext.Stacks["Retrieve Remote"].Push(accountInfo.UserName))\r
191             {\r
192                 await NetworkAgent.GetDeleteAwaiter();\r
193 \r
194                 Log.Info("Scheduled");\r
195                 var client = new CloudFilesClient(accountInfo);\r
196 \r
197                 //We don't need to check the trash container\r
198                 var containers = client.ListContainers(accountInfo.UserName).Where(c=>c.Name!="trash");\r
199 \r
200 \r
201                 CreateContainerFolders(accountInfo, containers);\r
202 \r
203                 try\r
204                 {\r
205                     //Wait for any deletions to finish\r
206                     await NetworkAgent.GetDeleteAwaiter();\r
207                     //Get the poll time now. We may miss some deletions but it's better to keep a file that was deleted\r
208                     //than delete a file that was created while we were executing the poll                    \r
209                     var pollTime = DateTime.Now;\r
210 \r
211                     //Get the list of server objects changed since the last check\r
212                     //The name of the container is passed as state in order to create a dictionary of tasks in a subsequent step\r
213                     var listObjects = (from container in containers\r
214                                        select Task<IList<ObjectInfo>>.Factory.StartNew(_ =>\r
215                                              client.ListObjects(accountInfo.UserName, container.Name, since), container.Name)).ToList();\r
216                     //BUG: Can't detect difference between no changes or no objects\r
217                     //ListObjects returns nothing if there are no changes since the last check time (since value)                    \r
218                     //TODO: Must detect the difference between no server objects and no change\r
219 \r
220                     //NOTE: One option is to "mark" all result lists with their container name, or \r
221                     //rather the url of the container\r
222                     //Another option \r
223 \r
224                     var listShared = Task<IList<ObjectInfo>>.Factory.StartNew(_ => \r
225                         client.ListSharedObjects(since), "shared");\r
226                     listObjects.Add(listShared);\r
227                     var listTasks = await Task.Factory.WhenAll(listObjects.ToArray());\r
228 \r
229                     using (log4net.ThreadContext.Stacks["SCHEDULE"].Push("Process Results"))\r
230                     {\r
231                         var dict = listTasks.ToDictionary(t => t.AsyncState);\r
232 \r
233                         //Get all non-trash objects. Remember, the container name is stored in AsyncState\r
234                         var remoteObjects = from objectList in listTasks\r
235                                             where (string)objectList.AsyncState != "trash"\r
236                                             from obj in objectList.Result\r
237                                             select obj;\r
238 \r
239                         var sharedObjects = dict["shared"].Result;\r
240 \r
241                         //DON'T process trashed files\r
242                         //If some files are deleted and added again to a folder, they will be deleted\r
243                         //even though they are new.\r
244                         //We would have to check file dates and hashes to ensure that a trashed file\r
245                         //can be deleted safely from the local hard drive.\r
246                         /*\r
247                         //Items with the same name, hash may be both in the container and the trash\r
248                         //Don't delete items that exist in the container\r
249                         var realTrash = from trash in trashObjects\r
250                                         where\r
251                                             !remoteObjects.Any(\r
252                                                 info => info.Name == trash.Name && info.Hash == trash.Hash)\r
253                                         select trash;\r
254                         ProcessTrashedFiles(accountInfo, realTrash);\r
255 */\r
256 \r
257                         var cleanRemotes = (from info in remoteObjects.Union(sharedObjects)\r
258                                             let name = info.Name\r
259                                             where !name.EndsWith(".ignore", StringComparison.InvariantCultureIgnoreCase) &&\r
260                                                   !name.StartsWith(FolderConstants.CacheFolder + "/",\r
261                                                                    StringComparison.InvariantCultureIgnoreCase)\r
262                                             select info).ToList();\r
263 \r
264                         var differencer = _differencer.PostSnapshot(accountInfo, cleanRemotes);\r
265 \r
266                         ProcessDeletedFiles(accountInfo, differencer.Deleted.FilterDirectlyBelow(SelectiveUris), pollTime);\r
267 \r
268                         // @@@ NEED To add previous state here as well, To compare with previous hash\r
269 \r
270                         \r
271 \r
272                         //Create a list of actions from the remote files\r
273                         var allActions = ChangesToActions(accountInfo, differencer.Changed.FilterDirectlyBelow(SelectiveUris))\r
274                                         .Union(\r
275                                         CreatesToActions(accountInfo, differencer.Created.FilterDirectlyBelow(SelectiveUris)));\r
276 \r
277                         //And remove those that are already being processed by the agent\r
278                         var distinctActions = allActions\r
279                             .Except(NetworkAgent.GetEnumerable(), new PithosMonitor.LocalFileComparer())\r
280                             .ToList();\r
281 \r
282                         //Queue all the actions\r
283                         foreach (var message in distinctActions)\r
284                         {\r
285                             NetworkAgent.Post(message);\r
286                         }\r
287 \r
288                         Log.Info("[LISTENER] End Processing");\r
289                     }\r
290                 }\r
291                 catch (Exception ex)\r
292                 {\r
293                     Log.ErrorFormat("[FAIL] ListObjects for{0} in ProcessRemoteFiles with {1}", accountInfo.UserName, ex);\r
294                     return;\r
295                 }\r
296 \r
297                 Log.Info("[LISTENER] Finished");\r
298 \r
299             }\r
300         }\r
301 \r
302         AccountsDifferencer _differencer = new AccountsDifferencer();\r
303         private List<Uri> _selectiveUris=new List<Uri>();\r
304 \r
305         /// <summary>\r
306         /// Deletes local files that are not found in the list of cloud files\r
307         /// </summary>\r
308         /// <param name="accountInfo"></param>\r
309         /// <param name="cloudFiles"></param>\r
310         /// <param name="pollTime"></param>\r
311         private void ProcessDeletedFiles(AccountInfo accountInfo, IEnumerable<ObjectInfo> cloudFiles, DateTime pollTime)\r
312         {\r
313             if (accountInfo == null)\r
314                 throw new ArgumentNullException("accountInfo");\r
315             if (String.IsNullOrWhiteSpace(accountInfo.AccountPath))\r
316                 throw new ArgumentException("The AccountInfo.AccountPath is empty", "accountInfo");\r
317             if (cloudFiles == null)\r
318                 throw new ArgumentNullException("cloudFiles");\r
319             Contract.EndContractBlock();\r
320 \r
321             //On the first run\r
322             if (_firstPoll)\r
323             {\r
324                 //Only consider files that are not being modified, ie they are in the Unchanged state            \r
325                 var deleteCandidates = FileState.Queryable.Where(state =>\r
326                     state.FilePath.StartsWith(accountInfo.AccountPath)\r
327                     && state.FileStatus == FileStatus.Unchanged).ToList();\r
328 \r
329 \r
330                 //TODO: filesToDelete must take into account the Others container            \r
331                 var filesToDelete = (from deleteCandidate in deleteCandidates\r
332                                      let localFile = FileInfoExtensions.FromPath(deleteCandidate.FilePath)\r
333                                      let relativeFilePath = localFile.AsRelativeTo(accountInfo.AccountPath)\r
334                                      where\r
335                                          !cloudFiles.Any(r => r.RelativeUrlToFilePath(accountInfo.UserName) == relativeFilePath)\r
336                                      select localFile).ToList();\r
337 \r
338 \r
339 \r
340                 //Set the status of missing files to Conflict\r
341                 foreach (var item in filesToDelete)\r
342                 {\r
343                     //Try to acquire a gate on the file, to take into account files that have been dequeued\r
344                     //and are being processed\r
345                     using (var gate = NetworkGate.Acquire(item.FullName, NetworkOperation.Deleting))\r
346                     {\r
347                         if (gate.Failed)\r
348                             continue;\r
349                         StatusKeeper.SetFileState(item.FullName, FileStatus.Conflict, FileOverlayStatus.Deleted);\r
350                     }\r
351                 }\r
352                 UpdateStatus(PithosStatus.HasConflicts);\r
353                 StatusNotification.NotifyConflicts(filesToDelete, String.Format("{0} local files are missing from Pithos, possibly because they were deleted", filesToDelete.Count));\r
354                 StatusNotification.NotifyForFiles(filesToDelete, String.Format("{0} files were deleted", filesToDelete.Count), TraceLevel.Info);\r
355             }\r
356             else\r
357             {\r
358                 var deletedFiles = new List<FileSystemInfo>();\r
359                 foreach (var objectInfo in cloudFiles)\r
360                 {\r
361                     var relativePath = objectInfo.RelativeUrlToFilePath(accountInfo.UserName);\r
362                     var item = FileAgent.GetFileAgent(accountInfo).GetFileSystemInfo(relativePath);\r
363                     if (item.Exists)\r
364                     {\r
365                         if ((item.Attributes & FileAttributes.ReadOnly) == FileAttributes.ReadOnly)\r
366                         {\r
367                             item.Attributes = item.Attributes & ~FileAttributes.ReadOnly;\r
368 \r
369                         }\r
370                         item.Delete();\r
371                         DateTime lastDate;\r
372                         _lastSeen.TryRemove(item.FullName, out lastDate);\r
373                         deletedFiles.Add(item);\r
374                     }\r
375                     StatusKeeper.SetFileState(item.FullName, FileStatus.Deleted, FileOverlayStatus.Deleted);\r
376                 }\r
377                 StatusNotification.NotifyForFiles(deletedFiles, String.Format("{0} files were deleted", deletedFiles.Count), TraceLevel.Info);\r
378             }\r
379 \r
380         }\r
381 \r
382         //Creates an appropriate action for each server file\r
383         private IEnumerable<CloudAction> ChangesToActions(AccountInfo accountInfo, IEnumerable<ObjectInfo> changes)\r
384         {\r
385             if (changes == null)\r
386                 throw new ArgumentNullException();\r
387             Contract.EndContractBlock();\r
388             var fileAgent = FileAgent.GetFileAgent(accountInfo);\r
389 \r
390             //In order to avoid multiple iterations over the files, we iterate only once\r
391             //over the remote files\r
392             foreach (var objectInfo in changes)\r
393             {\r
394                 var relativePath = objectInfo.RelativeUrlToFilePath(accountInfo.UserName);\r
395                 //and remove any matching objects from the list, adding them to the commonObjects list\r
396                 if (fileAgent.Exists(relativePath))\r
397                 {\r
398                     //If a directory object already exists, we don't need to perform any other action                    \r
399                     var localFile = fileAgent.GetFileSystemInfo(relativePath);\r
400                     if (objectInfo.Content_Type == @"application/directory" && localFile is DirectoryInfo)\r
401                         continue;\r
402                     using (new SessionScope(FlushAction.Never))\r
403                     {\r
404                         var state = StatusKeeper.GetStateByFilePath(localFile.FullName);\r
405                         _lastSeen[localFile.FullName] = DateTime.Now;\r
406                         //Common files should be checked on a per-case basis to detect differences, which is newer\r
407 \r
408                         yield return new CloudAction(accountInfo, CloudActionType.MustSynch,\r
409                                                      localFile, objectInfo, state, accountInfo.BlockSize,\r
410                                                      accountInfo.BlockHash);\r
411                     }\r
412                 }\r
413                 else\r
414                 {\r
415                     //Remote files should be downloaded\r
416                     yield return new CloudDownloadAction(accountInfo, objectInfo);\r
417                 }\r
418             }\r
419         }\r
420 \r
421         private IEnumerable<CloudAction> CreatesToActions(AccountInfo accountInfo, IEnumerable<ObjectInfo> creates)\r
422         {\r
423             if (creates == null)\r
424                 throw new ArgumentNullException();\r
425             Contract.EndContractBlock();\r
426             var fileAgent = FileAgent.GetFileAgent(accountInfo);\r
427 \r
428             //In order to avoid multiple iterations over the files, we iterate only once\r
429             //over the remote files\r
430             foreach (var objectInfo in creates)\r
431             {\r
432                 var relativePath = objectInfo.RelativeUrlToFilePath(accountInfo.UserName);\r
433                 //and remove any matching objects from the list, adding them to the commonObjects list\r
434                 if (fileAgent.Exists(relativePath))\r
435                 {\r
436                     //If the object already exists, we probably have a conflict\r
437                     //If a directory object already exists, we don't need to perform any other action                    \r
438                     var localFile = fileAgent.GetFileSystemInfo(relativePath);\r
439                     StatusKeeper.SetFileState(localFile.FullName, FileStatus.Conflict, FileOverlayStatus.Conflict);\r
440                 }\r
441                 else\r
442                 {\r
443                     //Remote files should be downloaded\r
444                     yield return new CloudDownloadAction(accountInfo, objectInfo);\r
445                 }\r
446             }\r
447         }\r
448 \r
449         private void ProcessTrashedFiles(AccountInfo accountInfo, IEnumerable<ObjectInfo> trashObjects)\r
450         {\r
451             var fileAgent = FileAgent.GetFileAgent(accountInfo);\r
452             foreach (var trashObject in trashObjects)\r
453             {\r
454                 var barePath = trashObject.RelativeUrlToFilePath(accountInfo.UserName);\r
455                 //HACK: Assume only the "pithos" container is used. Must find out what happens when\r
456                 //deleting a file from a different container\r
457                 var relativePath = Path.Combine("pithos", barePath);\r
458                 fileAgent.Delete(relativePath);\r
459             }\r
460         }\r
461 \r
462         /// <summary>\r
463         /// Notify the UI to update the visual status\r
464         /// </summary>\r
465         /// <param name="status"></param>\r
466         private void UpdateStatus(PithosStatus status)\r
467         {\r
468             try\r
469             {\r
470                 StatusKeeper.SetPithosStatus(status);\r
471                 StatusNotification.Notify(new Notification());\r
472             }\r
473             catch (Exception exc)\r
474             {\r
475                 //Failure is not critical, just log it\r
476                 Log.Warn("Error while updating status", exc);\r
477             }\r
478         }\r
479 \r
480         private static void CreateContainerFolders(AccountInfo accountInfo, IEnumerable<ContainerInfo> containers)\r
481         {\r
482             var containerPaths = from container in containers\r
483                                  let containerPath = Path.Combine(accountInfo.AccountPath, container.Name)\r
484                                  where container.Name != FolderConstants.TrashContainer && !Directory.Exists(containerPath)\r
485                                  select containerPath;\r
486 \r
487             foreach (var path in containerPaths)\r
488             {\r
489                 Directory.CreateDirectory(path);\r
490             }\r
491         }\r
492 \r
493         public void SetSyncUris(Uri[] uris)\r
494         {            \r
495             SelectiveUris=uris.ToList();\r
496         }\r
497 \r
498         protected List<Uri> SelectiveUris\r
499         {\r
500             get { return _selectiveUris;}\r
501             set { _selectiveUris = value; }\r
502         }\r
503 \r
504         public void AddAccount(AccountInfo accountInfo)\r
505         {\r
506             if (!_accounts.Contains(accountInfo))\r
507                 _accounts.Add(accountInfo);\r
508         }\r
509     }\r
510 }\r