New SQLite version
[pithos-ms-client] / trunk / Pithos.Network / Signature.cs
index 97dce8b..c8faac8 100644 (file)
@@ -1,11 +1,52 @@
+#region
+/* -----------------------------------------------------------------------
+ * <copyright file="Signature.cs" company="GRNet">
+ * 
+ * Copyright 2011-2012 GRNET S.A. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and
+ * documentation are those of the authors and should not be
+ * interpreted as representing official policies, either expressed
+ * or implied, of GRNET S.A.
+ * </copyright>
+ * -----------------------------------------------------------------------
+ */
+#endregion
 using System;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.Diagnostics.Contracts;
 using System.IO;
+using System.Reflection;
 using System.Runtime.Remoting.Metadata.W3cXsd2001;
 using System.Security.Cryptography;
-using System.Text;
 using System.Threading.Tasks;
 using System.Linq;
 
@@ -13,6 +54,19 @@ namespace Pithos.Network
 {
     public static class Signature
     {
+        private static readonly log4net.ILog Log = log4net.LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
+
+        public static string CalculateMD5(FileInfo info)
+        {
+            if (info==null)
+                throw new ArgumentNullException("info");
+            if (String.IsNullOrWhiteSpace(info.FullName))
+                throw new ArgumentException("info.FullName is empty","info");
+            Contract.EndContractBlock();
+
+            return CalculateMD5(info.FullName);
+        }
+
         public static string CalculateMD5(string path)
         {
             if (String.IsNullOrWhiteSpace(path))
@@ -25,7 +79,7 @@ namespace Pithos.Network
 
             string hash;
             using (var hasher = MD5.Create())
-            using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 4096, true))
+            using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 65536, true))
             {
                 var hashBytes = hasher.ComputeHash(stream);
                 hash = hashBytes.ToHashString();
@@ -58,9 +112,26 @@ namespace Pithos.Network
         public static string ToHashString(this byte[] hashBytes)
         {
             var shb = new SoapHexBinary(hashBytes);
-            return shb.ToString();
+            return shb.ToString().ToLower();
         }
 
+        public static TreeHash CalculateTreeHash(FileSystemInfo fileInfo, int blockSize, string algorithm)
+        {
+            if (fileInfo == null)
+                throw new ArgumentNullException("fileInfo");
+            if (String.IsNullOrWhiteSpace(fileInfo.FullName))
+                throw new ArgumentException("fileInfo.FullName is empty", "fileInfo");
+            if (blockSize <= 0)
+                throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
+            if (String.IsNullOrWhiteSpace(algorithm))
+                throw new ArgumentNullException("algorithm");
+            Contract.EndContractBlock();
+
+            if (fileInfo is DirectoryInfo || !fileInfo.Exists)
+                return TreeHash.Empty;
+
+            return CalculateTreeHash(fileInfo.FullName, blockSize, algorithm);
+        }
 
         /// <summary>
         /// Calculates a file's tree hash synchronously, using the specified block size
@@ -79,30 +150,27 @@ namespace Pithos.Network
                 throw new ArgumentNullException("algorithm");
             Contract.EndContractBlock();
 
-            //DON'T calculate hashes for folders
-            if (Directory.Exists(filePath))
-                return null;
-
-
-            var list = new List<byte[]>();
-            using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, false))
-            using (var hasher = HashAlgorithm.Create(algorithm))
-            {
-                int read;
-                var buffer = new byte[blockSize];            
-                while ((read = stream.Read(buffer, 0, blockSize)) > 0)
-                {
-                    var hash = hasher.ComputeHash(buffer, 0, read);
-                    list.Add(hash);
-                }
-                return new TreeHash(algorithm) { Hashes = list,                    
-                    BlockSize = blockSize, 
-                    Bytes = stream.Length};
-            }            
+            var hash=CalculateTreeHashAsync(filePath, blockSize, algorithm, 2);
+            return hash.Result;
+        }
+        
+        public static async Task<TreeHash> CalculateTreeHashAsync(FileInfo fileInfo, int blockSize, string algorithm, byte parallelism)
+        {
+            if (fileInfo == null)
+                throw new ArgumentNullException("fileInfo");
+            if (String.IsNullOrWhiteSpace(fileInfo.FullName))
+                throw new ArgumentNullException("fileInfo.FullName is empty","fileInfo");
+            if (blockSize <= 0)
+                throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
+            if (String.IsNullOrWhiteSpace(algorithm))
+                throw new ArgumentNullException("algorithm");
+            Contract.EndContractBlock();
+            
+            return await CalculateTreeHashAsync(fileInfo.FullName, blockSize, algorithm, parallelism);
         }
 
 
-        public static Task<TreeHash> CalculateTreeHashAsync(string filePath, int blockSize,string algorithm)
+        public static async Task<TreeHash> CalculateTreeHashAsync(string filePath, int blockSize,string algorithm, int parallelism)
         {
             if (String.IsNullOrWhiteSpace(filePath))
                 throw new ArgumentNullException("filePath");
@@ -112,137 +180,90 @@ namespace Pithos.Network
                 throw new ArgumentNullException("algorithm");
             Contract.EndContractBlock();
 
+            if (Log.IsDebugEnabled)
+                Log.DebugFormat("Calc Signature [{0}]",filePath);
+
             //DON'T calculate hashes for folders
             if (Directory.Exists(filePath))
-                return Task.Factory.StartNew(()=>new TreeHash(algorithm));
-
-
-            var hashes = new ConcurrentDictionary<int, byte[]>();
-            var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, true);
+                return new TreeHash(algorithm);
+            //The hash of a non-existent file is the empty hash
+            if (!File.Exists(filePath))
+                return new TreeHash(algorithm);
 
-            return CalculateBlockHashesAsync(stream, blockSize, algorithm,hashes)
-                .ContinueWith(t => {
-                                        var length = stream.Length;
-                                       stream.Close();
-                                       var list= t.Result.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();                                       
-                                       return new TreeHash(algorithm) { Hashes = list,                                           
-                                           BlockSize = blockSize, 
-                                           Bytes = length };
-                });
-        }
+            //Calculate the hash of all blocks using a blockhash iterator
+            using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, blockSize, true))
+            {
+                //Calculate the blocks asyncrhonously
+                var hashes = await BlockHashAlgorithms.CalculateBlockHashesInPlacePFor(stream, blockSize, algorithm, parallelism);                
 
-      /*  public static byte[] CalculateTopHash(IEnumerable<byte[]> hashMap, string algorithm)
-        {
-            if (hashMap == null)
-                throw new ArgumentNullException("hashMap");
-            if (String.IsNullOrWhiteSpace(algorithm))
-                throw new ArgumentNullException("algorithm");
-            Contract.EndContractBlock();
+                //And then proceed with creating and returning a TreeHash
+                var length = stream.Length;
+                var list = hashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
 
-            var hashCount = hashMap.Count();
-            if (hashCount == 0)
-                return null;
-            using (var hasher = HashAlgorithm.Create(algorithm))
-            {
-                var i = 0;
-                var count = hashCount;
-                foreach (var block in hashMap)
+                var treeHash = new TreeHash(algorithm)
                 {
-                    if (i++ != count - 1)
-                        hasher.TransformBlock(block, 0, block.Length, null, 0);
-                    else
-                        hasher.TransformFinalBlock(block, 0, block.Length);
-                }
-
-                var finalHash = hasher.Hash;
+                    Bytes = length,
+                    BlockSize = blockSize,
+                    Hashes = list
+                };
 
-                return finalHash;
+                return treeHash;
             }
-        }*/
+        }
+
         
-        public static byte[] CalculateTopHash(IEnumerable<byte[]> hashMap, string algorithm)
+        public static byte[] CalculateTopHash(IList<byte[]> hashMap, string algorithm)
         {
             if (hashMap == null)
                 throw new ArgumentNullException("hashMap");
             if (String.IsNullOrWhiteSpace(algorithm))
                 throw new ArgumentNullException("algorithm");
-            Contract.EndContractBlock();
+            Contract.EndContractBlock();            
 
-            var hashCount = hashMap.Count();
+            var hashCount = hashMap.Count;
+            //The tophash of an empty hashmap is an empty array
             if (hashCount == 0)
-                return null;
+                return new byte[0];
+            //The tophash of a one-item hashmap is the hash itself
             if (hashCount == 1)
-                return hashMap.First();
-
-            var newHashes=new List<byte[]>();
-            var leafs =Math.Pow(2, Math.Log(hashCount,2)+1);
-            for (int i = 0; i < leafs;i+=2 )
-            {
-                using (var hasher = HashAlgorithm.Create(algorithm))
-                {
-                    var block1 = i >hashCount - 1 ? new byte[hashMap.First().Length] : hashMap.ElementAt(i);
-                    var block2 = i>hashCount-2 ? new byte[block1.Length] : hashMap.ElementAt(i+1);
-                    
-                    hasher.TransformBlock(block1, 0, block1.Length, null, 0);
-                    hasher.TransformFinalBlock(block2, 0, block2.Length);                        
-                    var finalHash = hasher.Hash;
-                    newHashes.Add(finalHash);
-                }                    
-            }
-            return CalculateTopHash(newHashes, algorithm);                   
-        }
-
-        
-
-        public static string CalculateTopHash(string hashString, string algorithm)
-        {
-            if (String.IsNullOrWhiteSpace(algorithm))
-                throw new ArgumentNullException("algorithm");
-            Contract.EndContractBlock();
-            if (String.IsNullOrWhiteSpace(hashString))
-                return String.Empty;
-
-            using (var hasher = HashAlgorithm.Create(algorithm))
-            {
-                var bytes=Encoding.ASCII.GetBytes(hashString.ToLower());
-                var hash=hasher.ComputeHash(bytes);
-                return hash.ToHashString();
-            }
-        }
-
-        private static Task<ConcurrentDictionary<int, byte[]>> CalculateBlockHashesAsync(FileStream stream, int blockSize, string algorithm, ConcurrentDictionary<int, byte[]> hashes, int index = 0)
-        {
-            if (stream==null)
-                throw new ArgumentNullException("stream");
-            if (hashes==null)
-                throw new ArgumentNullException("hashes");
-            if (String.IsNullOrWhiteSpace(algorithm))
-                throw new ArgumentNullException("algorithm");
-            if (blockSize <= 0)
-                throw new ArgumentOutOfRangeException("blockSize", "blockSize must be a value greater than zero ");
-            if (index< 0)
-                throw new ArgumentOutOfRangeException("index", "index must be a non-negative value");
-            Contract.EndContractBlock();
+                return hashMap[0];
 
+            //Calculate the required number of leaf nodes
+            var leafs =(int)Math.Pow(2, Math.Ceiling(Math.Log(hashCount,2)));
+            //The size of all nodes is the same and equal to the size of the input hashes
+            var hashSize = hashMap[0].Length;
 
-            var buffer = new byte[blockSize];
-            return stream.ReadAsync(buffer, 0, blockSize).ContinueWith(t =>
-            {
-                var read = t.Result;
-
-                var nextTask = read == blockSize
-                                    ? CalculateBlockHashesAsync(stream, blockSize, algorithm, hashes, index + 1) 
-                                    : Task.Factory.StartNew(() => hashes);
+            //If the hashmap containes fewer nodes than the required leaf count, we need to fill
+            //the rest with empty blocks
+            byte[] empty=null;            
+            if (hashCount < leafs)
+                empty = new byte[hashSize];
 
-                using (var hasher = HashAlgorithm.Create(algorithm))
+            //New hashes will be stored in a dictionary keyed by their step to preserve order
+            var newHashes=new ConcurrentDictionary<int, byte[]>();            
+            
+            Parallel.For(0, leafs/2,
+                (step, state) =>
                 {
-                    var hash = hasher.ComputeHash(buffer, 0, read);
-                    hashes[index]=hash;
-                }
-                return nextTask;
-            }).Unwrap();
-        }
+                    using (var hasher = HashAlgorithm.Create(algorithm))
+                    {
+                        var i = step*2;
+                        var block1 = i <= hashCount - 1 ? hashMap[i] : empty;
+                        var block2 = i <= hashCount - 2 ? hashMap[i + 1] : empty;
+
+                        hasher.TransformBlock(block1, 0, block1.Length, null, 0);
+                        hasher.TransformFinalBlock(block2, 0, block2.Length);
+
+                        var finalHash = hasher.Hash;
+                        //Store the final value in its proper place
+                        newHashes[step] = finalHash;
+                    }
+                });
 
+            //Extract the hashes to a list ordered by their step 
+            var hashes = newHashes.OrderBy(pair => pair.Key).Select(pair => pair.Value).ToList();
+            return CalculateTopHash(hashes, algorithm);                   
+        }        
     
 
         public static byte[] CalculateHash(byte[] buffer,string algorithm)