Uploading and downloading with hashes
[pithos-ms-client] / trunk / Pithos.Network / Signature.cs
1 using System;
2 using System.Collections.Concurrent;
3 using System.Collections.Generic;
4 using System.Diagnostics.Contracts;
5 using System.IO;
6 using System.Runtime.Remoting.Metadata.W3cXsd2001;
7 using System.Security.Cryptography;
8 using System.Text;
9 using System.Threading.Tasks;
10 using System.Linq;
11
12 namespace Pithos.Network
13 {
14     public static class Signature
15     {
16         public static string CalculateMD5(FileInfo info)
17         {
18             if (info==null)
19                 throw new ArgumentNullException("info");
20             Contract.EndContractBlock();
21
22             return CalculateMD5(info.FullName);
23         }
24
25         public static string CalculateMD5(string path)
26         {
27             if (String.IsNullOrWhiteSpace(path))
28                 throw new ArgumentNullException("path");
29             Contract.EndContractBlock();
30
31             //DON'T calculate hashes for folders
32             if (Directory.Exists(path))
33                 return "";
34
35             string hash;
36             using (var hasher = MD5.Create())
37             using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, true))
38             {
39                 var hashBytes = hasher.ComputeHash(stream);
40                 hash = hashBytes.ToHashString();
41             }
42             return hash;
43         }
44
45 /*
46         public static string BytesToString(byte[] hashBytes)
47         {
48             var shb=new SoapHexBinary(hashBytes);
49             return shb.ToString();
50             
51         }
52
53
54         public static byte[] StringToBytes(string hash)
55         {
56             var shb=SoapHexBinary.Parse(hash);
57             return shb.Value;
58         }
59 */
60
61         public static byte[] ToBytes(this string hash)
62         {
63             var shb = SoapHexBinary.Parse(hash);
64             return shb.Value;
65         }
66
67         public static string ToHashString(this byte[] hashBytes)
68         {
69             var shb = new SoapHexBinary(hashBytes);
70             return shb.ToString().ToLower();
71         }
72
73         public static TreeHash CalculateTreeHash(FileInfo fileInfo, int blockSize, string algorithm)
74         {
75             if (fileInfo==null)
76                 throw new ArgumentNullException("fileInfo");
77             if (blockSize <= 0)
78                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
79             if (String.IsNullOrWhiteSpace(algorithm))
80                 throw new ArgumentNullException("algorithm");
81             Contract.EndContractBlock();
82
83             return CalculateTreeHash(fileInfo.FullName, blockSize, algorithm);
84         }
85
86         /// <summary>
87         /// Calculates a file's tree hash synchronously, using the specified block size
88         /// </summary>
89         /// <param name="filePath">Path to an existing file</param>
90         /// <param name="blockSize">Block size used to calculate leaf hashes</param>
91         /// <param name="algorithm"></param>
92         /// <returns>A <see cref="TreeHash"/> with the block hashes and top hash</returns>
93         public static TreeHash CalculateTreeHash(string filePath, int blockSize, string algorithm)
94         {
95             if (String.IsNullOrWhiteSpace(filePath))
96                 throw new ArgumentNullException("filePath");
97             if (blockSize<=0)
98                 throw new ArgumentOutOfRangeException("blockSize","blockSize must be a value greater than zero ");
99             if (String.IsNullOrWhiteSpace(algorithm))
100                 throw new ArgumentNullException("algorithm");
101             Contract.EndContractBlock();
102
103             //DON'T calculate hashes for folders
104             if (Directory.Exists(filePath))
105                 return null;
106
107
108             var list = new List<byte[]>();
109             using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, false))
110             using (var hasher = HashAlgorithm.Create(algorithm))
111             {
112                 var buffer = new byte[blockSize];
113                 int read;
114                 while ((read=stream.Read(buffer, 0, blockSize)) > 0)
115                 {
116                     //This code was added for compatibility with the way Pithos calculates the last hash
117                     //We calculate the hash only up to the last non-null byte
118                     //TODO: Remove if the server starts using the full block instead of the trimmed block
119                     var lastByteIndex=Array.FindLastIndex(buffer,read-1, aByte => aByte != 0);
120                     
121                     var hash = hasher.ComputeHash(buffer, 0, lastByteIndex+1);
122                     list.Add(hash);                    
123                 }
124                 return new TreeHash(algorithm) { Hashes = list,                    
125                     BlockSize = blockSize, 
126                     Bytes = stream.Length};
127             }            
128         }
129         
130         public static Task<TreeHash> CalculateTreeHashAsync(FileInfo fileInfo, int blockSize, string algorithm)
131         {
132             if (fileInfo == null)
133                 throw new ArgumentNullException("fileInfo");
134             if (blockSize <= 0)
135                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
136             if (String.IsNullOrWhiteSpace(algorithm))
137                 throw new ArgumentNullException("algorithm");
138             Contract.EndContractBlock();
139
140             return CalculateTreeHashAsync(fileInfo.FullName, blockSize, algorithm);
141         }
142
143
144         public static Task<TreeHash> CalculateTreeHashAsync(string filePath, int blockSize,string algorithm)
145         {
146             if (String.IsNullOrWhiteSpace(filePath))
147                 throw new ArgumentNullException("filePath");
148             if (blockSize <= 0)
149                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
150             if (String.IsNullOrWhiteSpace(algorithm))
151                 throw new ArgumentNullException("algorithm");
152             Contract.EndContractBlock();
153
154             //DON'T calculate hashes for folders
155             if (Directory.Exists(filePath))
156                 return Task.Factory.StartNew(()=>new TreeHash(algorithm));
157
158             //Calculate the hash of all blocks using a blockhash iterator
159             var treeHash =Iterate<TreeHash>(BlockHashIterator(filePath, blockSize, algorithm));
160             
161             return treeHash;
162         }
163
164         
165         private static IEnumerable<Task> BlockHashIterator(string filePath, int blockSize, string algorithm)
166         {
167             using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, true))
168             {
169                 //Calculate the blocks asyncrhonously
170                 var hashes= CalculateBlockHashesAsync(stream, blockSize, algorithm);
171                 yield return hashes;
172                 
173                 //And then proceed with creating and returning a TreeHash
174                 var length = stream.Length;                
175                 var list = hashes.Result.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
176
177                 var treeHash = new TreeHash(algorithm)
178                 {
179                     Bytes = length, 
180                     BlockSize = blockSize, 
181                     Hashes = list
182                 };
183
184                 yield return Task.Factory.FromResult(treeHash);
185             }
186
187         }
188
189         //The Task Iterator style allows the execution of a sequence of patterns using
190         //iterator syntax.
191         //This particular variation returns the result of the last task, if there is one
192         public static Task<TResult> Iterate<TResult>(IEnumerable<Task> asyncIterator)
193         {
194             if (asyncIterator == null) 
195                 throw new ArgumentNullException("asyncIterator");
196             var enumerator = asyncIterator.GetEnumerator();
197             if (enumerator == null)
198                 throw new InvalidOperationException("Invalid enumerable - GetEnumerator returned null");
199             
200             var tcs = new TaskCompletionSource<TResult>();
201             tcs.Task.ContinueWith(_ => enumerator.Dispose(), TaskContinuationOptions.ExecuteSynchronously);
202             
203             Action<Task> recursiveBody = null;
204             recursiveBody = delegate
205             {
206                 try
207                 {
208                     if (enumerator.MoveNext())
209                         enumerator.Current.ContinueWith(recursiveBody,
210                                                         TaskContinuationOptions.ExecuteSynchronously);
211                     else
212                     {
213                         var lastTask = enumerator.Current as Task<TResult>;
214                         var result = (lastTask !=null ) 
215                             ? lastTask.Result
216                             : default(TResult);
217
218                         tcs.TrySetResult(result);
219                     }
220                 }
221                 catch (Exception exc)
222                 {
223                     tcs.TrySetException(exc);
224                 }
225             };
226             recursiveBody(null);
227             return tcs.Task;
228         }
229
230
231         /*  public static byte[] CalculateTopHash(IEnumerable<byte[]> hashMap, string algorithm)
232         {
233             if (hashMap == null)
234                 throw new ArgumentNullException("hashMap");
235             if (String.IsNullOrWhiteSpace(algorithm))
236                 throw new ArgumentNullException("algorithm");
237             Contract.EndContractBlock();
238
239             var hashCount = hashMap.Count();
240             if (hashCount == 0)
241                 return null;
242             using (var hasher = HashAlgorithm.Create(algorithm))
243             {
244                 var i = 0;
245                 var count = hashCount;
246                 foreach (var block in hashMap)
247                 {
248                     if (i++ != count - 1)
249                         hasher.TransformBlock(block, 0, block.Length, null, 0);
250                     else
251                         hasher.TransformFinalBlock(block, 0, block.Length);
252                 }
253
254                 var finalHash = hasher.Hash;
255
256                 return finalHash;
257             }
258         }*/
259
260         public static byte[] CalculateTopHash(IList<byte[]> hashMap, string algorithm)
261         {
262             if (hashMap == null)
263                 throw new ArgumentNullException("hashMap");
264             if (String.IsNullOrWhiteSpace(algorithm))
265                 throw new ArgumentNullException("algorithm");
266             Contract.EndContractBlock();            
267
268             var hashCount = hashMap.Count;
269             //The tophash of an empty hashmap is an empty array
270             if (hashCount == 0)
271                 return new byte[0];
272             //The tophash of a one-item hashmap is the hash itself
273             if (hashCount == 1)
274                 return hashMap[0];
275
276             //Calculate the required number of leaf nodes
277             var leafs =(int)Math.Pow(2, Math.Ceiling(Math.Log(hashCount,2)));
278             //The size of all nodes is the same and equal to the size of the input hashes
279             var hashSize = hashMap[0].Length;
280
281             //If the hashmap containes fewer nodes than the required leaf count, we need to fill
282             //the rest with empty blocks
283             byte[] empty=null;            
284             if (hashCount < leafs)
285                 empty = new byte[hashSize];
286
287             //New hashes will be stored in a dictionary keyed by their step to preserve order
288             var newHashes=new ConcurrentDictionary<int, byte[]>();            
289             
290             Parallel.For(0, leafs/2,
291                 (step, state) =>
292                 {
293                     using (var hasher = HashAlgorithm.Create(algorithm))
294                     {
295                         var i = step*2;
296                         var block1 = i <= hashCount - 1 ? hashMap[i] : empty;
297                         var block2 = i <= hashCount - 2 ? hashMap[i + 1] : empty;
298
299                         hasher.TransformBlock(block1, 0, block1.Length, null, 0);
300                         hasher.TransformFinalBlock(block2, 0, block2.Length);
301
302                         var finalHash = hasher.Hash;
303                         //Store the final value in its proper place
304                         newHashes[step] = finalHash;
305                     }
306                 });
307 /*
308             Parallel.For(0, leafs/2,
309                 () => HashAlgorithm.Create(algorithm),
310                 (step, state, hasher) =>
311                 {
312                                  
313                     var i = step*2;
314                     var block1 = i <= hashCount - 1 ? hashMap[i]: empty;
315                     var block2 = i <= hashCount - 2 ? hashMap[i + 1] : empty;
316
317                     hasher.TransformBlock(block1, 0, block1.Length, null, 0);
318                     hasher.TransformFinalBlock(block2, 0, block2.Length);
319                     
320                     var finalHash = hasher.Hash;
321                     //Store the final value in its proper place
322                     newHashes[step] = finalHash;
323                                         
324                     return hasher;
325                 },
326                 hasher => hasher.Dispose());
327 */
328             //Extract the hashes to a list ordered by their step 
329             var hashes = newHashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
330             return CalculateTopHash(hashes, algorithm);                   
331         }
332
333         
334
335       /*  public static string CalculateTopHash(string hashString, string algorithm)
336         {
337             if (String.IsNullOrWhiteSpace(algorithm))
338                 throw new ArgumentNullException("algorithm");
339             Contract.EndContractBlock();
340             if (String.IsNullOrWhiteSpace(hashString))
341                 return String.Empty;
342
343             using (var hasher = HashAlgorithm.Create(algorithm))
344             {
345                 var bytes=Encoding.ASCII.GetBytes(hashString.ToLower());
346                 var hash=hasher.ComputeHash(bytes);
347                 return hash.ToHashString();
348             }
349         }*/
350
351         private static Task<ConcurrentDictionary<int, byte[]>> CalculateBlockHashesAsync(FileStream stream, int blockSize, string algorithm, ConcurrentDictionary<int, byte[]> hashes=null, int index = 0)
352         {
353             if (stream==null)
354                 throw new ArgumentNullException("stream");
355             if (String.IsNullOrWhiteSpace(algorithm))
356                 throw new ArgumentNullException("algorithm");
357             if (blockSize <= 0)
358                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
359             if (index< 0)
360                 throw new ArgumentOutOfRangeException("index", "index must be a non-negative value");
361             Contract.EndContractBlock();
362
363
364             if (hashes==null)
365                 hashes= new ConcurrentDictionary<int, byte[]>();
366
367             var buffer = new byte[blockSize];
368             return stream.ReadAsync(buffer, 0, blockSize).ContinueWith(t =>
369             {
370                 var read = t.Result;
371
372                 var nextTask = read == blockSize
373                                     ? CalculateBlockHashesAsync(stream, blockSize, algorithm, hashes, index + 1) 
374                                     : Task.Factory.StartNew(() => hashes);
375                 if (read>0)
376                     using (var hasher = HashAlgorithm.Create(algorithm))
377                     {
378                         //This code was added for compatibility with the way Pithos calculates the last hash
379                         //We calculate the hash only up to the last non-null byte
380                         //TODO: Remove if the server starts using the full block instead of the trimmed block
381                         var lastByteIndex = Array.FindLastIndex(buffer, read - 1, aByte => aByte != 0);
382
383                         var hash = hasher.ComputeHash(buffer, 0, lastByteIndex+1);
384                         hashes[index]=hash;
385                     }
386                 return nextTask;
387             }).Unwrap();
388         }
389
390     
391
392         public static byte[] CalculateHash(byte[] buffer,string algorithm)
393         {
394             if (buffer == null)
395                 throw new ArgumentNullException("buffer");
396             if (String.IsNullOrWhiteSpace(algorithm))
397                 throw new ArgumentNullException("algorithm");
398             Contract.EndContractBlock();
399
400             using (var hasher = HashAlgorithm.Create(algorithm))
401             {
402                 var hash = hasher.ComputeHash(buffer, 0, buffer.Length);
403                 return hash;
404             }        
405         }
406     }
407 }
408
409
410