Added BlockUpdater.cs to perform block updates in a separate class. Will include...
[pithos-ms-client] / trunk / Pithos.Network / Signature.cs
1 using System;
2 using System.Collections.Concurrent;
3 using System.Collections.Generic;
4 using System.Diagnostics.Contracts;
5 using System.IO;
6 using System.Runtime.Remoting.Metadata.W3cXsd2001;
7 using System.Security.Cryptography;
8 using System.Text;
9 using System.Threading.Tasks;
10 using System.Linq;
11
12 namespace Pithos.Network
13 {
14     public static class Signature
15     {
16         public static string CalculateMD5(FileInfo info)
17         {
18             if (info==null)
19                 throw new ArgumentNullException("info");
20             Contract.EndContractBlock();
21
22             return CalculateMD5(info.FullName);
23         }
24
25         public static string CalculateMD5(string path)
26         {
27             if (String.IsNullOrWhiteSpace(path))
28                 throw new ArgumentNullException("path");
29             Contract.EndContractBlock();
30
31             //DON'T calculate hashes for folders
32             if (Directory.Exists(path))
33                 return "";
34
35             string hash;
36             using (var hasher = MD5.Create())
37             using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, true))
38             {
39                 var hashBytes = hasher.ComputeHash(stream);
40                 hash = hashBytes.ToHashString();
41             }
42             return hash;
43         }
44
45 /*
46         public static string BytesToString(byte[] hashBytes)
47         {
48             var shb=new SoapHexBinary(hashBytes);
49             return shb.ToString();
50             
51         }
52
53
54         public static byte[] StringToBytes(string hash)
55         {
56             var shb=SoapHexBinary.Parse(hash);
57             return shb.Value;
58         }
59 */
60
61         public static byte[] ToBytes(this string hash)
62         {
63             var shb = SoapHexBinary.Parse(hash);
64             return shb.Value;
65         }
66
67         public static string ToHashString(this byte[] hashBytes)
68         {
69             var shb = new SoapHexBinary(hashBytes);
70             return shb.ToString().ToLower();
71         }
72
73         public static TreeHash CalculateTreeHash(FileInfo fileInfo, int blockSize, string algorithm)
74         {
75             if (fileInfo==null)
76                 throw new ArgumentNullException("fileInfo");
77             if (blockSize <= 0)
78                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
79             if (String.IsNullOrWhiteSpace(algorithm))
80                 throw new ArgumentNullException("algorithm");
81             Contract.EndContractBlock();
82
83             return CalculateTreeHash(fileInfo.FullName, blockSize, algorithm);
84         }
85
86         /// <summary>
87         /// Calculates a file's tree hash synchronously, using the specified block size
88         /// </summary>
89         /// <param name="filePath">Path to an existing file</param>
90         /// <param name="blockSize">Block size used to calculate leaf hashes</param>
91         /// <param name="algorithm"></param>
92         /// <returns>A <see cref="TreeHash"/> with the block hashes and top hash</returns>
93         public static TreeHash CalculateTreeHash(string filePath, int blockSize, string algorithm)
94         {
95             if (String.IsNullOrWhiteSpace(filePath))
96                 throw new ArgumentNullException("filePath");
97             if (blockSize<=0)
98                 throw new ArgumentOutOfRangeException("blockSize","blockSize must be a value greater than zero ");
99             if (String.IsNullOrWhiteSpace(algorithm))
100                 throw new ArgumentNullException("algorithm");
101             Contract.EndContractBlock();
102
103             //DON'T calculate hashes for folders
104             if (Directory.Exists(filePath))
105                 return null;
106
107
108             var list = new List<byte[]>();
109             using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, false))
110             using (var hasher = HashAlgorithm.Create(algorithm))
111             {
112                 var buffer = new byte[blockSize];
113                 int read;
114                 while ((read=stream.Read(buffer, 0, blockSize)) > 0)
115                 {
116                     //This code was added for compatibility with the way Pithos calculates the last hash
117                     //We calculate the hash only up to the last non-null byte
118                     //TODO: Remove if the server starts using the full block instead of the trimmed block
119                     var lastByteIndex=Array.FindLastIndex(buffer,read-1, aByte => aByte != 0);
120                     
121                     var hash = hasher.ComputeHash(buffer, 0, lastByteIndex+1);
122                     list.Add(hash);                    
123                 }
124                 return new TreeHash(algorithm) { Hashes = list,                    
125                     BlockSize = blockSize, 
126                     Bytes = stream.Length};
127             }            
128         }
129         
130         public static Task<TreeHash> CalculateTreeHashAsync(FileInfo fileInfo, int blockSize, string algorithm)
131         {
132             if (fileInfo == null)
133                 throw new ArgumentNullException("fileInfo");
134             if (blockSize <= 0)
135                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
136             if (String.IsNullOrWhiteSpace(algorithm))
137                 throw new ArgumentNullException("algorithm");
138             Contract.EndContractBlock();
139
140             return CalculateTreeHashAsync(fileInfo.FullName, blockSize, algorithm);
141         }
142
143
144         public static Task<TreeHash> CalculateTreeHashAsync(string filePath, int blockSize,string algorithm)
145         {
146             if (String.IsNullOrWhiteSpace(filePath))
147                 throw new ArgumentNullException("filePath");
148             if (blockSize <= 0)
149                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
150             if (String.IsNullOrWhiteSpace(algorithm))
151                 throw new ArgumentNullException("algorithm");
152             Contract.EndContractBlock();
153
154             //DON'T calculate hashes for folders
155             if (Directory.Exists(filePath))
156                 return Task.Factory.StartNew(()=>new TreeHash(algorithm));
157             //The hash of a non-existent file is the empty hash
158             if (!File.Exists(filePath))
159                 return Task.Factory.StartNew(()=>new TreeHash(algorithm));
160
161             //Calculate the hash of all blocks using a blockhash iterator
162             var treeHash =Iterate<TreeHash>(BlockHashIterator(filePath, blockSize, algorithm));
163             
164             return treeHash;
165         }
166
167         
168         private static IEnumerable<Task> BlockHashIterator(string filePath, int blockSize, string algorithm)
169         {
170             using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, true))
171             {
172                 //Calculate the blocks asyncrhonously
173                 var hashes= CalculateBlockHashesAsync(stream, blockSize, algorithm);
174                 yield return hashes;
175                 
176                 //And then proceed with creating and returning a TreeHash
177                 var length = stream.Length;                
178                 var list = hashes.Result.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
179
180                 var treeHash = new TreeHash(algorithm)
181                 {
182                     Bytes = length, 
183                     BlockSize = blockSize, 
184                     Hashes = list
185                 };
186
187                 yield return Task.Factory.FromResult(treeHash);
188             }
189
190         }
191
192         //The Task Iterator style allows the execution of a sequence of patterns using
193         //iterator syntax.
194         //This particular variation returns the result of the last task, if there is one
195         public static Task<TResult> Iterate<TResult>(IEnumerable<Task> asyncIterator)
196         {
197             if (asyncIterator == null) 
198                 throw new ArgumentNullException("asyncIterator");
199             var enumerator = asyncIterator.GetEnumerator();
200             if (enumerator == null)
201                 throw new InvalidOperationException("Invalid enumerable - GetEnumerator returned null");
202             
203             var tcs = new TaskCompletionSource<TResult>();
204             tcs.Task.ContinueWith(_ => enumerator.Dispose(), TaskContinuationOptions.ExecuteSynchronously);
205             
206             Action<Task> recursiveBody = null;
207             recursiveBody = delegate
208             {
209                 try
210                 {
211                     if (enumerator.MoveNext())
212                         enumerator.Current.ContinueWith(recursiveBody,
213                                                         TaskContinuationOptions.ExecuteSynchronously);
214                     else
215                     {
216                         var lastTask = enumerator.Current as Task<TResult>;
217                         var result = (lastTask !=null ) 
218                             ? lastTask.Result
219                             : default(TResult);
220
221                         tcs.TrySetResult(result);
222                     }
223                 }
224                 catch (Exception exc)
225                 {
226                     tcs.TrySetException(exc);
227                 }
228             };
229             recursiveBody(null);
230             return tcs.Task;
231         }
232
233
234         /*  public static byte[] CalculateTopHash(IEnumerable<byte[]> hashMap, string algorithm)
235         {
236             if (hashMap == null)
237                 throw new ArgumentNullException("hashMap");
238             if (String.IsNullOrWhiteSpace(algorithm))
239                 throw new ArgumentNullException("algorithm");
240             Contract.EndContractBlock();
241
242             var hashCount = hashMap.Count();
243             if (hashCount == 0)
244                 return null;
245             using (var hasher = HashAlgorithm.Create(algorithm))
246             {
247                 var i = 0;
248                 var count = hashCount;
249                 foreach (var block in hashMap)
250                 {
251                     if (i++ != count - 1)
252                         hasher.TransformBlock(block, 0, block.Length, null, 0);
253                     else
254                         hasher.TransformFinalBlock(block, 0, block.Length);
255                 }
256
257                 var finalHash = hasher.Hash;
258
259                 return finalHash;
260             }
261         }*/
262
263         public static byte[] CalculateTopHash(IList<byte[]> hashMap, string algorithm)
264         {
265             if (hashMap == null)
266                 throw new ArgumentNullException("hashMap");
267             if (String.IsNullOrWhiteSpace(algorithm))
268                 throw new ArgumentNullException("algorithm");
269             Contract.EndContractBlock();            
270
271             var hashCount = hashMap.Count;
272             //The tophash of an empty hashmap is an empty array
273             if (hashCount == 0)
274                 return new byte[0];
275             //The tophash of a one-item hashmap is the hash itself
276             if (hashCount == 1)
277                 return hashMap[0];
278
279             //Calculate the required number of leaf nodes
280             var leafs =(int)Math.Pow(2, Math.Ceiling(Math.Log(hashCount,2)));
281             //The size of all nodes is the same and equal to the size of the input hashes
282             var hashSize = hashMap[0].Length;
283
284             //If the hashmap containes fewer nodes than the required leaf count, we need to fill
285             //the rest with empty blocks
286             byte[] empty=null;            
287             if (hashCount < leafs)
288                 empty = new byte[hashSize];
289
290             //New hashes will be stored in a dictionary keyed by their step to preserve order
291             var newHashes=new ConcurrentDictionary<int, byte[]>();            
292             
293             Parallel.For(0, leafs/2,
294                 (step, state) =>
295                 {
296                     using (var hasher = HashAlgorithm.Create(algorithm))
297                     {
298                         var i = step*2;
299                         var block1 = i <= hashCount - 1 ? hashMap[i] : empty;
300                         var block2 = i <= hashCount - 2 ? hashMap[i + 1] : empty;
301
302                         hasher.TransformBlock(block1, 0, block1.Length, null, 0);
303                         hasher.TransformFinalBlock(block2, 0, block2.Length);
304
305                         var finalHash = hasher.Hash;
306                         //Store the final value in its proper place
307                         newHashes[step] = finalHash;
308                     }
309                 });
310 /*
311             Parallel.For(0, leafs/2,
312                 () => HashAlgorithm.Create(algorithm),
313                 (step, state, hasher) =>
314                 {
315                                  
316                     var i = step*2;
317                     var block1 = i <= hashCount - 1 ? hashMap[i]: empty;
318                     var block2 = i <= hashCount - 2 ? hashMap[i + 1] : empty;
319
320                     hasher.TransformBlock(block1, 0, block1.Length, null, 0);
321                     hasher.TransformFinalBlock(block2, 0, block2.Length);
322                     
323                     var finalHash = hasher.Hash;
324                     //Store the final value in its proper place
325                     newHashes[step] = finalHash;
326                                         
327                     return hasher;
328                 },
329                 hasher => hasher.Dispose());
330 */
331             //Extract the hashes to a list ordered by their step 
332             var hashes = newHashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
333             return CalculateTopHash(hashes, algorithm);                   
334         }
335
336         
337
338       /*  public static string CalculateTopHash(string hashString, string algorithm)
339         {
340             if (String.IsNullOrWhiteSpace(algorithm))
341                 throw new ArgumentNullException("algorithm");
342             Contract.EndContractBlock();
343             if (String.IsNullOrWhiteSpace(hashString))
344                 return String.Empty;
345
346             using (var hasher = HashAlgorithm.Create(algorithm))
347             {
348                 var bytes=Encoding.ASCII.GetBytes(hashString.ToLower());
349                 var hash=hasher.ComputeHash(bytes);
350                 return hash.ToHashString();
351             }
352         }*/
353
354         private static Task<ConcurrentDictionary<int, byte[]>> CalculateBlockHashesAsync(FileStream stream, int blockSize, string algorithm, ConcurrentDictionary<int, byte[]> hashes=null, int index = 0)
355         {
356             if (stream==null)
357                 throw new ArgumentNullException("stream");
358             if (String.IsNullOrWhiteSpace(algorithm))
359                 throw new ArgumentNullException("algorithm");
360             if (blockSize <= 0)
361                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
362             if (index< 0)
363                 throw new ArgumentOutOfRangeException("index", "index must be a non-negative value");
364             Contract.EndContractBlock();
365
366
367             if (hashes==null)
368                 hashes= new ConcurrentDictionary<int, byte[]>();
369
370             var buffer = new byte[blockSize];
371             return stream.ReadAsync(buffer, 0, blockSize).ContinueWith(t =>
372             {
373                 var read = t.Result;
374
375                 var nextTask = read == blockSize
376                                     ? CalculateBlockHashesAsync(stream, blockSize, algorithm, hashes, index + 1) 
377                                     : Task.Factory.StartNew(() => hashes);
378                 if (read>0)
379                     using (var hasher = HashAlgorithm.Create(algorithm))
380                     {
381                         //This code was added for compatibility with the way Pithos calculates the last hash
382                         //We calculate the hash only up to the last non-null byte
383                         //TODO: Remove if the server starts using the full block instead of the trimmed block
384                         var lastByteIndex = Array.FindLastIndex(buffer, read - 1, aByte => aByte != 0);
385
386                         var hash = hasher.ComputeHash(buffer, 0, lastByteIndex+1);
387                         hashes[index]=hash;
388                     }
389                 return nextTask;
390             }).Unwrap();
391         }
392
393     
394
395         public static byte[] CalculateHash(byte[] buffer,string algorithm)
396         {
397             if (buffer == null)
398                 throw new ArgumentNullException("buffer");
399             if (String.IsNullOrWhiteSpace(algorithm))
400                 throw new ArgumentNullException("algorithm");
401             Contract.EndContractBlock();
402
403             using (var hasher = HashAlgorithm.Create(algorithm))
404             {
405                 var hash = hasher.ComputeHash(buffer, 0, buffer.Length);
406                 return hash;
407             }        
408         }
409     }
410 }
411
412
413