Fixes to hashing
[pithos-ms-client] / trunk / Pithos.Network / Signature.cs
1 #region
2 /* -----------------------------------------------------------------------
3  * <copyright file="Signature.cs" company="GRNet">
4  * 
5  * Copyright 2011-2012 GRNET S.A. All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or
8  * without modification, are permitted provided that the following
9  * conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above
12  *      copyright notice, this list of conditions and the following
13  *      disclaimer.
14  *
15  *   2. Redistributions in binary form must reproduce the above
16  *      copyright notice, this list of conditions and the following
17  *      disclaimer in the documentation and/or other materials
18  *      provided with the distribution.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
22  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
28  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  *
34  * The views and conclusions contained in the software and
35  * documentation are those of the authors and should not be
36  * interpreted as representing official policies, either expressed
37  * or implied, of GRNET S.A.
38  * </copyright>
39  * -----------------------------------------------------------------------
40  */
41 #endregion
42 using System;
43 using System.Collections.Concurrent;
44 using System.Collections.Generic;
45 using System.Diagnostics.Contracts;
46 using System.IO;
47 using System.Reflection;
48 using System.Runtime.Remoting.Metadata.W3cXsd2001;
49 using System.Security.Cryptography;
50 using System.Threading.Tasks;
51 using System.Linq;
52
53 namespace Pithos.Network
54 {
55     public static class Signature
56     {
57         private static readonly log4net.ILog Log = log4net.LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
58
59         public static string CalculateMD5(FileInfo info)
60         {
61             if (info==null)
62                 throw new ArgumentNullException("info");
63             if (String.IsNullOrWhiteSpace(info.FullName))
64                 throw new ArgumentException("info.FullName is empty","info");
65             Contract.EndContractBlock();
66
67             return CalculateMD5(info.FullName);
68         }
69
70         public static string CalculateMD5(string path)
71         {
72             if (String.IsNullOrWhiteSpace(path))
73                 throw new ArgumentNullException("path");
74             Contract.EndContractBlock();
75
76             //DON'T calculate hashes for folders
77             if (Directory.Exists(path))
78                 return "";
79
80             string hash;
81             using (var hasher = MD5.Create())
82             using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 65536, true))
83             {
84                 var hashBytes = hasher.ComputeHash(stream);
85                 hash = hashBytes.ToHashString();
86             }
87             return hash;
88         }
89
90 /*
91         public static string BytesToString(byte[] hashBytes)
92         {
93             var shb=new SoapHexBinary(hashBytes);
94             return shb.ToString();
95             
96         }
97
98
99         public static byte[] StringToBytes(string hash)
100         {
101             var shb=SoapHexBinary.Parse(hash);
102             return shb.Value;
103         }
104 */
105
106         public static byte[] ToBytes(this string hash)
107         {
108             var shb = SoapHexBinary.Parse(hash);
109             return shb.Value;
110         }
111
112         public static string ToHashString(this byte[] hashBytes)
113         {
114             var shb = new SoapHexBinary(hashBytes);
115             return shb.ToString().ToLower();
116         }
117
118         public static TreeHash CalculateTreeHash(FileSystemInfo fileInfo, int blockSize, string algorithm)
119         {
120             if (fileInfo == null)
121                 throw new ArgumentNullException("fileInfo");
122             if (String.IsNullOrWhiteSpace(fileInfo.FullName))
123                 throw new ArgumentException("fileInfo.FullName is empty", "fileInfo");
124             if (blockSize <= 0)
125                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
126             if (String.IsNullOrWhiteSpace(algorithm))
127                 throw new ArgumentNullException("algorithm");
128             Contract.EndContractBlock();
129
130             if (fileInfo is DirectoryInfo || !fileInfo.Exists)
131                 return TreeHash.Empty;
132
133             return CalculateTreeHash(fileInfo.FullName, blockSize, algorithm);
134         }
135
136         /// <summary>
137         /// Calculates a file's tree hash synchronously, using the specified block size
138         /// </summary>
139         /// <param name="filePath">Path to an existing file</param>
140         /// <param name="blockSize">Block size used to calculate leaf hashes</param>
141         /// <param name="algorithm"></param>
142         /// <returns>A <see cref="TreeHash"/> with the block hashes and top hash</returns>
143         public static TreeHash CalculateTreeHash(string filePath, int blockSize, string algorithm)
144         {
145             if (String.IsNullOrWhiteSpace(filePath))
146                 throw new ArgumentNullException("filePath");
147             if (blockSize<=0)
148                 throw new ArgumentOutOfRangeException("blockSize","blockSize must be a value greater than zero ");
149             if (String.IsNullOrWhiteSpace(algorithm))
150                 throw new ArgumentNullException("algorithm");
151             Contract.EndContractBlock();
152
153             var hash=CalculateTreeHashAsync(filePath, blockSize, algorithm, 2);
154             return hash.Result;
155         }
156         
157         public static async Task<TreeHash> CalculateTreeHashAsync(FileInfo fileInfo, int blockSize, string algorithm, byte parallelism)
158         {
159             if (fileInfo == null)
160                 throw new ArgumentNullException("fileInfo");
161             if (String.IsNullOrWhiteSpace(fileInfo.FullName))
162                 throw new ArgumentNullException("fileInfo.FullName is empty","fileInfo");
163             if (blockSize <= 0)
164                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
165             if (String.IsNullOrWhiteSpace(algorithm))
166                 throw new ArgumentNullException("algorithm");
167             Contract.EndContractBlock();
168             
169             return await CalculateTreeHashAsync(fileInfo.FullName, blockSize, algorithm, parallelism);
170         }
171
172
173         public static async Task<TreeHash> CalculateTreeHashAsync(string filePath, int blockSize,string algorithm, int parallelism)
174         {
175             if (String.IsNullOrWhiteSpace(filePath))
176                 throw new ArgumentNullException("filePath");
177             if (blockSize <= 0)
178                 throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
179             if (String.IsNullOrWhiteSpace(algorithm))
180                 throw new ArgumentNullException("algorithm");
181             Contract.EndContractBlock();
182
183             if (Log.IsDebugEnabled)
184                 Log.DebugFormat("Calc Signature [{0}]",filePath);
185
186             //DON'T calculate hashes for folders
187             if (Directory.Exists(filePath))
188                 return new TreeHash(algorithm);
189             //The hash of a non-existent file is the empty hash
190             if (!File.Exists(filePath))
191                 return new TreeHash(algorithm);
192
193             //Calculate the hash of all blocks using a blockhash iterator
194             using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, true))
195             {
196                 //Calculate the blocks asyncrhonously
197                 var hashes = await BlockHashAlgorithms.CalculateBlockHashesInPlacePFor(stream, blockSize, algorithm, parallelism);                
198
199                 //And then proceed with creating and returning a TreeHash
200                 var length = stream.Length;
201                 var list = hashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
202
203                 var treeHash = new TreeHash(algorithm)
204                 {
205                     Bytes = length,
206                     BlockSize = blockSize,
207                     Hashes = list,
208                 };
209
210                 string fileHash;
211                 var hasher = HashAlgorithm.Create("MD5");
212                 stream.Position = 0;
213                 treeHash.MD5= hasher.ComputeHash(stream).ToHashString();
214
215                 return treeHash;
216             }
217         }
218
219         
220         public static byte[] CalculateTopHash(IList<byte[]> hashMap, string algorithm)
221         {
222             if (hashMap == null)
223                 throw new ArgumentNullException("hashMap");
224             if (String.IsNullOrWhiteSpace(algorithm))
225                 throw new ArgumentNullException("algorithm");
226             Contract.EndContractBlock();            
227
228             var hashCount = hashMap.Count;
229             //The tophash of an empty hashmap is an empty array
230             if (hashCount == 0)
231                 return new byte[0];
232             //The tophash of a one-item hashmap is the hash itself
233             if (hashCount == 1)
234                 return hashMap[0];
235
236             //Calculate the required number of leaf nodes
237             var leafs =(int)Math.Pow(2, Math.Ceiling(Math.Log(hashCount,2)));
238             //The size of all nodes is the same and equal to the size of the input hashes
239             var hashSize = hashMap[0].Length;
240
241             //If the hashmap containes fewer nodes than the required leaf count, we need to fill
242             //the rest with empty blocks
243             byte[] empty=null;            
244             if (hashCount < leafs)
245                 empty = new byte[hashSize];
246
247             //New hashes will be stored in a dictionary keyed by their step to preserve order
248             var newHashes=new ConcurrentDictionary<int, byte[]>();            
249             
250             Parallel.For(0, leafs/2,
251                 (step, state) =>
252                 {
253                     using (var hasher = HashAlgorithm.Create(algorithm))
254                     {
255                         var i = step*2;
256                         var block1 = i <= hashCount - 1 ? hashMap[i] : empty;
257                         var block2 = i <= hashCount - 2 ? hashMap[i + 1] : empty;
258
259                         hasher.TransformBlock(block1, 0, block1.Length, null, 0);
260                         hasher.TransformFinalBlock(block2, 0, block2.Length);
261
262                         var finalHash = hasher.Hash;
263                         //Store the final value in its proper place
264                         newHashes[step] = finalHash;
265                     }
266                 });
267
268             //Extract the hashes to a list ordered by their step 
269             var hashes = newHashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
270             return CalculateTopHash(hashes, algorithm);                   
271         }        
272     
273
274         public static byte[] CalculateHash(byte[] buffer,string algorithm)
275         {
276             if (buffer == null)
277                 throw new ArgumentNullException("buffer");
278             if (String.IsNullOrWhiteSpace(algorithm))
279                 throw new ArgumentNullException("algorithm");
280             Contract.EndContractBlock();
281
282             using (var hasher = HashAlgorithm.Create(algorithm))
283             {
284                 var hash = hasher.ComputeHash(buffer, 0, buffer.Length);
285                 return hash;
286             }        
287         }
288     }
289 }
290
291
292