Reduced buffer size while hashing to 16K
[pithos-ms-client] / trunk / Pithos.Network / Signature.cs
1 #region
2 /* -----------------------------------------------------------------------
3  * <copyright file="Signature.cs" company="GRNet">
4  * 
5  * Copyright 2011-2012 GRNET S.A. All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or
8  * without modification, are permitted provided that the following
9  * conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above
12  *      copyright notice, this list of conditions and the following
13  *      disclaimer.
14  *
15  *   2. Redistributions in binary form must reproduce the above
16  *      copyright notice, this list of conditions and the following
17  *      disclaimer in the documentation and/or other materials
18  *      provided with the distribution.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
22  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
28  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  *
34  * The views and conclusions contained in the software and
35  * documentation are those of the authors and should not be
36  * interpreted as representing official policies, either expressed
37  * or implied, of GRNET S.A.
38  * </copyright>
39  * -----------------------------------------------------------------------
40  */
41 #endregion
42 using System;
43 using System.Collections.Concurrent;
44 using System.Collections.Generic;
45 using System.Diagnostics.Contracts;
46 using System.IO;
47 using System.Reflection;
48 using System.Runtime.Remoting.Metadata.W3cXsd2001;
49 using System.Security.Cryptography;
50 using System.Threading.Tasks;
51 using System.Linq;
52
53 namespace Pithos.Network
54 {
55     public static class Signature
56     {
57         private static readonly log4net.ILog Log = log4net.LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
58         public const  int BufferSize = 16384;
59
60         public const string MD5_EMPTY = "d41d8cd98f00b204e9800998ecf8427e";
61
62
63         public static string CalculateMD5(FileSystemInfo info)
64         {
65             if (info==null)
66                 throw new ArgumentNullException("info");
67             if (String.IsNullOrWhiteSpace(info.FullName))
68                 throw new ArgumentException("info.FullName is empty","info");
69             Contract.EndContractBlock();
70
71             if (info is DirectoryInfo)
72                 return MD5_EMPTY;
73
74             return CalculateMD5(info.FullName);
75         }
76
77         public static string CalculateMD5(string path)
78         {
79             if (String.IsNullOrWhiteSpace(path))
80                 throw new ArgumentNullException("path");
81             Contract.EndContractBlock();
82
83             //DON'T calculate hashes for folders
84             if (Directory.Exists(path))
85                 return "";
86
87             string hash;
88             using (var hasher = MD5.Create())
89             using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, Signature.BufferSize, true))
90             {
91                 var hashBytes = hasher.ComputeHash(stream);
92                 hash = hashBytes.ToHashString();
93             }
94             return hash;
95         }
96
97 /*
98         public static string BytesToString(byte[] hashBytes)
99         {
100             var shb=new SoapHexBinary(hashBytes);
101             return shb.ToString();
102             
103         }
104
105
106         public static byte[] StringToBytes(string hash)
107         {
108             var shb=SoapHexBinary.Parse(hash);
109             return shb.Value;
110         }
111 */
112
113         public static byte[] ToBytes(this string hash)
114         {
115             var shb = SoapHexBinary.Parse(hash);
116             return shb.Value;
117         }
118
119         public static string ToHashString(this byte[] hashBytes)
120         {
121             var shb = new SoapHexBinary(hashBytes);
122             return shb.ToString().ToLower();
123         }
124
125         public static TreeHash CalculateTreeHash(FileSystemInfo fileInfo, int blockSize, string algorithm,IProgress<double> progress )
126         {
127             if (fileInfo == null)
128                 throw new ArgumentNullException("fileInfo");
129             if (String.IsNullOrWhiteSpace(fileInfo.FullName))
130                 throw new ArgumentException("fileInfo.FullName is empty", "fileInfo");
131             if (blockSize <= 0)
132                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
133             if (String.IsNullOrWhiteSpace(algorithm))
134                 throw new ArgumentNullException("algorithm");
135             Contract.EndContractBlock();
136             fileInfo.Refresh();
137             if (fileInfo is DirectoryInfo || !fileInfo.Exists)
138                 return TreeHash.Empty;
139
140             return CalculateTreeHash(fileInfo.FullName, blockSize, algorithm,progress);
141         }
142
143         /// <summary>
144         /// Calculates a file's tree hash synchronously, using the specified block size
145         /// </summary>
146         /// <param name="filePath">Path to an existing file</param>
147         /// <param name="blockSize">Block size used to calculate leaf hashes</param>
148         /// <param name="algorithm"></param>
149         /// <returns>A <see cref="TreeHash"/> with the block hashes and top hash</returns>
150         public static TreeHash CalculateTreeHash(string filePath, int blockSize, string algorithm,IProgress<double> progress )
151         {
152             if (String.IsNullOrWhiteSpace(filePath))
153                 throw new ArgumentNullException("filePath");
154             if (blockSize<=0)
155                 throw new ArgumentOutOfRangeException("blockSize","blockSize must be a value greater than zero ");
156             if (String.IsNullOrWhiteSpace(algorithm))
157                 throw new ArgumentNullException("algorithm");
158             Contract.EndContractBlock();           
159             var hash=CalculateTreeHashAsync(filePath, blockSize, algorithm, 1,progress);
160             return hash;
161         }
162         
163         public static TreeHash CalculateTreeHashAsync(FileInfo fileInfo, int blockSize, string algorithm, byte parallelism,IProgress<double> progress )
164         {
165             if (fileInfo == null)
166                 throw new ArgumentNullException("fileInfo");
167             if (String.IsNullOrWhiteSpace(fileInfo.FullName))
168                 throw new ArgumentNullException("fileInfo.FullName is empty","fileInfo");
169             if (blockSize <= 0)
170                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
171             if (String.IsNullOrWhiteSpace(algorithm))
172                 throw new ArgumentNullException("algorithm");
173             Contract.EndContractBlock();
174             
175             return CalculateTreeHashAsync(fileInfo.FullName, blockSize, algorithm, parallelism,progress);
176         }
177
178
179         public static TreeHash CalculateTreeHashAsync(string filePath, int blockSize,string algorithm, int parallelism,IProgress<double> progress )
180         {
181             if (String.IsNullOrWhiteSpace(filePath))
182                 throw new ArgumentNullException("filePath");
183             if (blockSize <= 0)
184                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
185             if (String.IsNullOrWhiteSpace(algorithm))
186                 throw new ArgumentNullException("algorithm");
187             Contract.EndContractBlock();
188
189             if (Log.IsDebugEnabled)
190                 Log.DebugFormat("Calc Signature [{0}]",filePath);
191
192             if (filePath.Split('/').Contains(".pithos.cache"))
193                 throw new ArgumentException(String.Format("Trying to hash file from the cache folder: [{0}]",filePath));
194
195             //DON'T calculate hashes for folders
196             if (Directory.Exists(filePath))
197                 return new TreeHash(algorithm);
198             //The hash of a non-existent file is the empty hash
199             if (!File.Exists(filePath))
200                 return new TreeHash(algorithm);
201
202             //Calculate the hash of all blocks using a blockhash iterator
203             using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, BufferSize, true))
204             {
205                 var md5 = new MD5BlockCalculator();
206                 Action<long, byte[], int> postAction = md5.PostBlock;
207                 //Calculate the blocks asyncrhonously
208                 var hashes = BlockHashAlgorithms.CalculateBlockHashesInPlacePFor(stream, blockSize, algorithm, parallelism,postAction, progress).Result;                
209
210                 //And then proceed with creating and returning a TreeHash
211                 var length = stream.Length;
212                 var list = hashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
213
214                 var treeHash = new TreeHash(algorithm)
215                 {
216                     Bytes = length,
217                     BlockSize = blockSize,
218                     Hashes = list,
219                 };
220
221                 string fileHash;
222
223                 var md5Hash=md5.GetHash().Result;
224 /*
225                 var hasher = HashAlgorithm.Create("MD5");
226                 stream.Position = 0;
227 */
228                 treeHash.MD5= md5Hash;
229
230                 return treeHash;
231             }
232         }
233
234         
235         public static byte[] CalculateTopHash(IList<byte[]> hashMap, string algorithm)
236         {
237             if (hashMap == null)
238                 throw new ArgumentNullException("hashMap");
239             if (String.IsNullOrWhiteSpace(algorithm))
240                 throw new ArgumentNullException("algorithm");
241             Contract.EndContractBlock();            
242
243             var hashCount = hashMap.Count;
244             //The tophash of an empty hashmap is an empty array
245             if (hashCount == 0)
246                 return new byte[0];
247             //The tophash of a one-item hashmap is the hash itself
248             if (hashCount == 1)
249                 return hashMap[0];
250
251             //Calculate the required number of leaf nodes
252             var leafs =(int)Math.Pow(2, Math.Ceiling(Math.Log(hashCount,2)));
253             //The size of all nodes is the same and equal to the size of the input hashes
254             var hashSize = hashMap[0].Length;
255
256             //If the hashmap containes fewer nodes than the required leaf count, we need to fill
257             //the rest with empty blocks
258             byte[] empty=null;            
259             if (hashCount < leafs)
260                 empty = new byte[hashSize];
261
262             //New hashes will be stored in a dictionary keyed by their step to preserve order
263             var newHashes=new ConcurrentDictionary<int, byte[]>();            
264             
265             Parallel.For(0, leafs/2,
266                 (step, state) =>
267                 {
268                     using (var hasher = HashAlgorithm.Create(algorithm))
269                     {
270                         var i = step*2;
271                         var block1 = i <= hashCount - 1 ? hashMap[i] : empty;
272                         var block2 = i <= hashCount - 2 ? hashMap[i + 1] : empty;
273
274                         hasher.TransformBlock(block1, 0, block1.Length, null, 0);
275                         hasher.TransformFinalBlock(block2, 0, block2.Length);
276
277                         var finalHash = hasher.Hash;
278                         //Store the final value in its proper place
279                         newHashes[step] = finalHash;
280                     }
281                 });
282
283             //Extract the hashes to a list ordered by their step 
284             var hashes = newHashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
285             return CalculateTopHash(hashes, algorithm);                   
286         }        
287     
288
289         public static byte[] CalculateHash(byte[] buffer,string algorithm)
290         {
291             if (buffer == null)
292                 throw new ArgumentNullException("buffer");
293             if (String.IsNullOrWhiteSpace(algorithm))
294                 throw new ArgumentNullException("algorithm");
295             Contract.EndContractBlock();
296
297             using (var hasher = HashAlgorithm.Create(algorithm))
298             {
299                 var hash = hasher.ComputeHash(buffer, 0, buffer.Length);
300                 return hash;
301             }        
302         }
303     }
304 }
305
306
307