1 // BZip2OutputStream.cs
3 // Copyright (C) 2001 Mike Krueger
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 // Linking this library statically or dynamically with other modules is
20 // making a combined work based on this library. Thus, the terms and
21 // conditions of the GNU General Public License cover the whole
24 // As a special exception, the copyright holders of this library give you
25 // permission to link this library with independent modules to produce an
26 // executable, regardless of the license terms of these independent
27 // modules, and to copy and distribute the resulting executable under
28 // terms of your choice, provided that you also meet, for each linked
29 // independent module, the terms and conditions of the license of that
30 // module. An independent module is a module which is not derived from
31 // or based on this library. If you modify this library, you may extend
32 // this exception to your version of the library, but you are not
33 // obligated to do so. If you do not wish to do so, delete this
34 // exception statement from your version.
38 using ICSharpCode.SharpZipLib.Silverlight.BZip2;
39 using ICSharpCode.SharpZipLib.Silverlight.Checksums;
41 namespace ICSharpCode.SharpZipLib.Silverlight.BZip2
44 /// An output stream that compresses into the BZip2 format
45 /// including file header chars into another stream.
47 public class BZip2OutputStream : Stream
50 const int SETMASK = (1 << 21);
51 const int CLEARMASK = (~SETMASK);
52 const int GREATER_ICOST = 15;
53 const int LESSER_ICOST = 0;
54 const int SMALL_THRESH = 20;
55 const int DEPTH_THRESH = 10;
58 If you are ever unlucky/improbable enough
59 to get a stack overflow whilst sorting,
60 increase the following constant and try
61 again. In practice I have never seen the
62 stack go above 27 elems, so the following
63 limit seems very generous.
65 const int QSORT_STACK_SIZE = 1000;
68 Knuth's increments seem to work better
69 than Incerpi-Sedgewick here. Possibly
70 because the number of elems to sort is
71 usually small, typically <= 20.
73 readonly int[] increments = new int[] {
74 1, 4, 13, 40, 121, 364, 1093, 3280,
75 9841, 29524, 88573, 265720,
82 /// Construct a default output stream with maximum block size
84 /// <param name="stream">The stream to write BZip data onto.</param>
85 public BZip2OutputStream(Stream stream) : this(stream, 9)
90 /// Initialise a new instance of the <see cref="BZip2OutputStream"></see>
91 /// for the specified stream, using the given blocksize.
93 /// <param name="stream">The stream to write compressed data to.</param>
94 /// <param name="blockSize">The block size to use.</param>
96 /// Valid block sizes are in the range 1..9, with 1 giving
97 /// the lowest compression and 9 the highest.
99 public BZip2OutputStream(Stream stream, int blockSize)
111 blockSize100k = blockSize;
112 AllocateCompressStructures();
120 /// Ensures that resources are freed and other cleanup operations
121 /// are performed when the garbage collector reclaims the BZip2OutputStream.
130 /// Get/set flag indicating ownership of underlying stream.
131 /// When the flag is true <see cref="Close"></see> will close the underlying stream also.
133 public bool IsStreamOwner
135 get { return isStreamOwner; }
136 set { isStreamOwner = value; }
140 #region Stream overrides
142 /// Gets a value indicating whether the current stream supports reading
144 public override bool CanRead
152 /// Gets a value indicating whether the current stream supports seeking
154 public override bool CanSeek {
161 /// Gets a value indicating whether the current stream supports writing
163 public override bool CanWrite {
165 return baseStream.CanWrite;
170 /// Gets the length in bytes of the stream
172 public override long Length {
174 return baseStream.Length;
179 /// Gets or sets the current position of this stream.
181 public override long Position {
183 return baseStream.Position;
186 throw new NotSupportedException("BZip2OutputStream position cannot be set");
191 /// Sets the current position of this stream to the given value.
193 /// <param name="offset">The point relative to the offset from which to being seeking.</param>
194 /// <param name="origin">The reference point from which to begin seeking.</param>
195 /// <returns>The new position in the stream.</returns>
196 public override long Seek(long offset, SeekOrigin origin)
198 throw new NotSupportedException("BZip2OutputStream Seek not supported");
202 /// Sets the length of this stream to the given value.
204 /// <param name="value">The new stream length.</param>
205 public override void SetLength(long value)
207 throw new NotSupportedException("BZip2OutputStream SetLength not supported");
211 /// Read a byte from the stream advancing the position.
213 /// <returns>The byte read cast to an int; -1 if end of stream.</returns>
214 public override int ReadByte()
216 throw new NotSupportedException("BZip2OutputStream ReadByte not supported");
220 /// Read a block of bytes
222 /// <param name="buffer">The buffer to read into.</param>
223 /// <param name="offset">The offset in the buffer to start storing data at.</param>
224 /// <param name="count">The maximum number of bytes to read.</param>
225 /// <returns>The total number of bytes read. This might be less than the number of bytes
226 /// requested if that number of bytes are not currently available, or zero
227 /// if the end of the stream is reached.</returns>
228 public override int Read(byte[] buffer, int offset, int count)
230 throw new NotSupportedException("BZip2OutputStream Read not supported");
234 /// Write a block of bytes to the stream
236 /// <param name="buffer">The buffer containing data to write.</param>
237 /// <param name="offset">The offset of the first byte to write.</param>
238 /// <param name="count">The number of bytes to write.</param>
239 public override void Write(byte[] buffer, int offset, int count)
241 if ( buffer == null ) {
242 throw new ArgumentNullException("buffer");
247 throw new ArgumentOutOfRangeException("offset");
252 throw new ArgumentOutOfRangeException("count");
255 if ( buffer.Length - offset < count )
257 throw new ArgumentException("Offset/count out of range");
260 for (int i = 0; i < count; ++i) {
261 WriteByte(buffer[offset + i]);
266 /// Write a byte to the stream.
268 /// <param name="value">The byte to write to the stream.</param>
269 public override void WriteByte(byte value)
271 int b = (256 + value) % 256;
272 if (currentChar != -1) {
273 if (currentChar == b) {
275 if (runLength > 254) {
292 /// End the current block and end compression.
293 /// Close the stream and free any resources
295 public override void Close()
298 GC.SuppressFinalize(this);
305 for (int i = 0; i < 256; i++) {
307 seqToUnseq[nInUse] = (char)i;
308 unseqToSeq[i] = (char)nInUse;
315 /// Get the number of bytes written to output.
319 if (last < allowableBlockSize) {
320 inUse[currentChar] = true;
321 for (int i = 0; i < runLength; i++) {
322 mCrc.Update(currentChar);
328 block[last + 1] = (byte)currentChar;
332 block[last + 1] = (byte)currentChar;
334 block[last + 1] = (byte)currentChar;
338 block[last + 1] = (byte)currentChar;
340 block[last + 1] = (byte)currentChar;
342 block[last + 1] = (byte)currentChar;
345 inUse[runLength - 4] = true;
347 block[last + 1] = (byte)currentChar;
349 block[last + 1] = (byte)currentChar;
351 block[last + 1] = (byte)currentChar;
353 block[last + 1] = (byte)currentChar;
355 block[last + 1] = (byte)(runLength - 4);
366 /// Get the number of bytes written to the output.
368 public int BytesWritten
370 get { return bytesOut; }
374 /// Releases the unmanaged resources used by the <see cref="BZip2OutputStream"/> and optionally releases the managed resources.
376 /// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged resources.</param>
377 override protected void Dispose(bool disposing)
380 base.Dispose(disposing);
396 if ( IsStreamOwner ) {
404 /// Flush output buffers
406 public override void Flush()
414 nBlocksRandomised = 0;
416 /*--- Write header `magic' bytes indicating file-format == huffmanised,
417 followed by a digit indicating blockSize100k.
424 BsPutUChar('0' + blockSize100k);
434 for (int i = 0; i < 256; i++) {
438 /*--- 20 is just a paranoia constant ---*/
439 allowableBlockSize = BZip2Constants.baseBlockSize * blockSize100k - 20;
444 if (last < 0) { // dont do anything for empty files, (makes empty files compatible with original Bzip)
448 blockCRC = unchecked((uint)mCrc.Value);
449 combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
450 combinedCRC ^= blockCRC;
452 /*-- sort the block and establish position of original string --*/
453 DoReversibleTransformation();
456 A 6-byte block header, the value chosen arbitrarily
457 as 0x314159265359 :-). A 32 bit value does not really
458 give a strong enough guarantee that the value will not
459 appear by chance in the compressed datastream. Worst-case
460 probability of this event, for a 900k block, is about
461 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
462 For a compressed file of size 100Gb -- about 100000 blocks --
463 only a 48-bit marker will do. NB: normal compression/
464 decompression do *not* rely on these statistical properties.
465 They are only important when trying to recover blocks from
475 /*-- Now the block's CRC, so it is in a known place. --*/
477 BsPutint((int)blockCRC);
480 /*-- Now a single bit indicating randomisation. --*/
481 if (blockRandomised) {
488 /*-- Finally, block's contents proper. --*/
489 MoveToFrontCodeAndSend();
492 void EndCompression()
495 Now another magic 48-bit number, 0x177245385090, to
496 indicate the end of the last block. (sqrt(pi), if
497 you want to know. I did want to use e, but it contains
498 too much repetition -- 27 18 28 18 28 46 -- for me
499 to feel statistically comfortable. Call me paranoid.)
509 BsPutint((int)combinedCRC);
512 BsFinishedWithStream();
515 void BsSetStream(Stream stream)
523 void BsFinishedWithStream()
527 int ch = (bsBuff >> 24);
528 baseStream.WriteByte((byte)ch); // write 8-bit
535 void BsW(int n, int v)
537 while (bsLive >= 8) {
538 int ch = (bsBuff >> 24);
539 unchecked{baseStream.WriteByte((byte)ch);} // write 8-bit
544 bsBuff |= (v << (32 - bsLive - n));
548 void BsPutUChar(int c)
555 BsW(8, (u >> 24) & 0xFF);
556 BsW(8, (u >> 16) & 0xFF);
557 BsW(8, (u >> 8) & 0xFF);
561 void BsPutIntVS(int numBits, int c)
568 char[][] len = new char[BZip2Constants.N_GROUPS][];
569 for (int i = 0; i < BZip2Constants.N_GROUPS; ++i) {
570 len[i] = new char[BZip2Constants.MAX_ALPHA_SIZE];
573 int gs, ge, totc, bt, bc, iter;
574 int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
577 alphaSize = nInUse + 2;
578 for (int t = 0; t < BZip2Constants.N_GROUPS; t++) {
579 for (int v = 0; v < alphaSize; v++) {
580 len[t][v] = (char)GREATER_ICOST;
584 /*--- Decide how many coding tables to use ---*/
591 } else if (nMTF < 600) {
593 } else if (nMTF < 1200) {
595 } else if (nMTF < 2400) {
601 /*--- Generate an initial set of coding tables ---*/
606 int tFreq = remF / nPart;
609 while (aFreq < tFreq && ge < alphaSize - 1) {
611 aFreq += mtfFreq[ge];
614 if (ge > gs && nPart != nGroups && nPart != 1 && ((nGroups - nPart) % 2 == 1)) {
615 aFreq -= mtfFreq[ge];
619 for (int v = 0; v < alphaSize; v++) {
620 if (v >= gs && v <= ge) {
621 len[nPart - 1][v] = (char)LESSER_ICOST;
623 len[nPart - 1][v] = (char)GREATER_ICOST;
632 int[][] rfreq = new int[BZip2Constants.N_GROUPS][];
633 for (int i = 0; i < BZip2Constants.N_GROUPS; ++i) {
634 rfreq[i] = new int[BZip2Constants.MAX_ALPHA_SIZE];
637 int[] fave = new int[BZip2Constants.N_GROUPS];
638 short[] cost = new short[BZip2Constants.N_GROUPS];
640 Iterate up to N_ITERS times to improve the tables.
642 for (iter = 0; iter < BZip2Constants.N_ITERS; ++iter) {
643 for (int t = 0; t < nGroups; ++t) {
647 for (int t = 0; t < nGroups; ++t) {
648 for (int v = 0; v < alphaSize; ++v) {
657 /*--- Set group start & end marks. --*/
661 ge = gs + BZip2Constants.G_SIZE - 1;
667 Calculate the cost of this group as coded
668 by each of the coding tables.
670 for (int t = 0; t < nGroups; t++) {
675 short cost0, cost1, cost2, cost3, cost4, cost5;
676 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
677 for (int i = gs; i <= ge; ++i) {
678 short icv = szptr[i];
679 cost0 += (short)len[0][icv];
680 cost1 += (short)len[1][icv];
681 cost2 += (short)len[2][icv];
682 cost3 += (short)len[3][icv];
683 cost4 += (short)len[4][icv];
684 cost5 += (short)len[5][icv];
693 for (int i = gs; i <= ge; ++i) {
694 short icv = szptr[i];
695 for (int t = 0; t < nGroups; t++) {
696 cost[t] += (short)len[t][icv];
702 Find the coding table which is best for this group,
703 and record its identity in the selector table.
707 for (int t = 0; t < nGroups; ++t) {
715 selector[nSelectors] = (char)bt;
719 Increment the symbol frequencies for the selected table.
721 for (int i = gs; i <= ge; ++i) {
722 ++rfreq[bt][szptr[i]];
729 Recompute the tables based on the accumulated frequencies.
731 for (int t = 0; t < nGroups; ++t) {
732 HbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
740 if (!(nGroups < 8)) {
744 if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZip2Constants.G_SIZE)))) {
748 /*--- Compute MTF values for the selectors. ---*/
749 char[] pos = new char[BZip2Constants.N_GROUPS];
750 char ll_i, tmp2, tmp;
752 for (int i = 0; i < nGroups; i++) {
756 for (int i = 0; i < nSelectors; i++) {
760 while (ll_i != tmp) {
767 selectorMtf[i] = (char)j;
770 int[][] code = new int[BZip2Constants.N_GROUPS][];
772 for (int i = 0; i < BZip2Constants.N_GROUPS; ++i) {
773 code[i] = new int[BZip2Constants.MAX_ALPHA_SIZE];
776 /*--- Assign actual codes for the tables. --*/
777 for (int t = 0; t < nGroups; t++) {
780 for (int i = 0; i < alphaSize; i++) {
781 if (len[t][i] > maxLen) {
784 if (len[t][i] < minLen) {
794 HbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
797 /*--- Transmit the mapping table. ---*/
798 bool[] inUse16 = new bool[16];
799 for (int i = 0; i < 16; ++i) {
801 for (int j = 0; j < 16; ++j) {
802 if (inUse[i * 16 + j]) {
808 for (int i = 0; i < 16; ++i) {
816 for (int i = 0; i < 16; ++i) {
818 for (int j = 0; j < 16; ++j) {
819 if (inUse[i * 16 + j]) {
828 /*--- Now the selectors. ---*/
831 for (int i = 0; i < nSelectors; ++i) {
832 for (int j = 0; j < selectorMtf[i]; ++j) {
838 /*--- Now the coding tables. ---*/
839 for (int t = 0; t < nGroups; ++t) {
840 int curr = len[t][0];
842 for (int i = 0; i < alphaSize; ++i) {
843 while (curr < len[t][i]) {
847 while (curr > len[t][i]) {
855 /*--- And finally, the block data proper ---*/
862 ge = gs + BZip2Constants.G_SIZE - 1;
867 for (int i = gs; i <= ge; i++) {
868 BsW(len[selector[selCtr]][szptr[i]], code[selector[selCtr]][szptr[i]]);
874 if (!(selCtr == nSelectors)) {
879 void MoveToFrontCodeAndSend ()
881 BsPutIntVS(24, origPtr);
886 void SimpleSort(int lo, int hi, int d)
888 int i, j, h, bigN, hp;
897 while (increments[hp] < bigN) {
902 for (; hp >= 0; hp--) {
912 while (FullGtU(zptr[j-h]+d, v+d)) {
915 if (j <= (lo + h - 1))
927 while (FullGtU ( zptr[j-h]+d, v+d )) {
930 if (j <= (lo + h - 1)) {
943 while (FullGtU ( zptr[j-h]+d, v+d)) {
946 if (j <= (lo + h - 1)) {
953 if (workDone > workLimit && firstAttempt) {
960 void Vswap(int p1, int p2, int n )
973 void QSort3(int loSt, int hiSt, int dSt)
975 int unLo, unHi, ltLo, gtHi, med, n, m;
977 StackElement[] stack = new StackElement[QSORT_STACK_SIZE];
978 for (int count = 0; count < QSORT_STACK_SIZE; count++) {
979 stack[count] = new StackElement();
990 if (sp >= QSORT_STACK_SIZE) {
999 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
1000 SimpleSort(lo, hi, d);
1001 if (workDone > workLimit && firstAttempt) {
1007 med = Med3(block[zptr[lo] + d + 1],
1008 block[zptr[hi ] + d + 1],
1009 block[zptr[(lo + hi) >> 1] + d + 1]);
1019 n = ((int)block[zptr[unLo]+d + 1]) - med;
1023 zptr[unLo] = zptr[ltLo];
1039 n = ((int)block[zptr[unHi]+d + 1]) - med;
1043 zptr[unHi] = zptr[gtHi];
1060 int temp = zptr[unLo];
1061 zptr[unLo] = zptr[unHi];
1076 n = ((ltLo-lo) < (unLo-ltLo)) ? (ltLo-lo) : (unLo-ltLo);
1077 Vswap(lo, unLo-n, n);
1078 m = ((hi-gtHi) < (gtHi-unHi)) ? (hi-gtHi) : (gtHi-unHi);
1079 Vswap(unLo, hi-m+1, m);
1081 n = lo + unLo - ltLo - 1;
1082 m = hi - (gtHi - unHi) + 1;
1089 stack[sp].ll = n + 1;
1090 stack[sp].hh = m - 1;
1104 int[] runningOrder = new int[256];
1105 int[] copy = new int[256];
1106 bool[] bigDone = new bool[256];
1111 In the various block-sized structures, live data runs
1112 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
1113 set up the overshoot area for block.
1116 // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
1117 for (i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
1118 block[last + i + 2] = block[(i % (last + 1)) + 1];
1120 for (i = 0; i <= last + BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
1124 block[0] = (byte)(block[last + 1]);
1128 Use simpleSort(), since the full sorting mechanism
1129 has quite a large constant overhead.
1131 for (i = 0; i <= last; i++) {
1134 firstAttempt = false;
1135 workDone = workLimit = 0;
1136 SimpleSort(0, last, 0);
1139 for (i = 0; i <= 255; i++) {
1142 for (i = 0; i <= 65536; i++) {
1147 for (i = 0; i <= last; i++) {
1149 ftab[(c1 << 8) + c2]++;
1153 for (i = 1; i <= 65536; i++) {
1154 ftab[i] += ftab[i - 1];
1158 for (i = 0; i < last; i++) {
1166 j = ((block[last + 1]) << 8) + (block[1]);
1168 zptr[ftab[j]] = last;
1171 Now ftab contains the first loc of every small bucket.
1172 Calculate the running order, from smallest to largest
1176 for (i = 0; i <= 255; i++) {
1177 runningOrder[i] = i;
1187 for (i = h; i <= 255; i++) {
1188 vv = runningOrder[i];
1190 while ((ftab[((runningOrder[j-h])+1) << 8] - ftab[(runningOrder[j-h]) << 8]) > (ftab[((vv)+1) << 8] - ftab[(vv) << 8])) {
1191 runningOrder[j] = runningOrder[j-h];
1197 runningOrder[j] = vv;
1202 The main sorting loop.
1204 for (i = 0; i <= 255; i++) {
1207 Process big buckets, starting with the least full.
1209 ss = runningOrder[i];
1212 Complete the big bucket [ss] by quicksorting
1213 any unsorted small buckets [ss, j]. Hopefully
1214 previous pointer-scanning phases have already
1215 completed many of the small buckets [ss, j], so
1216 we don't have to sort them at all.
1218 for (j = 0; j <= 255; j++) {
1220 if(!((ftab[sb] & SETMASK) == SETMASK)) {
1221 int lo = ftab[sb] & CLEARMASK;
1222 int hi = (ftab[sb+1] & CLEARMASK) - 1;
1225 numQSorted += (hi - lo + 1);
1226 if (workDone > workLimit && firstAttempt) {
1230 ftab[sb] |= SETMASK;
1235 The ss big bucket is now done. Record this fact,
1236 and update the quadrant descriptors. Remember to
1237 update quadrants in the overshoot area too, if
1238 necessary. The "if (i < 255)" test merely skips
1239 this updating for the last bucket processed, since
1240 updating for the last bucket is pointless.
1245 int bbStart = ftab[ss << 8] & CLEARMASK;
1246 int bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1249 while ((bbSize >> shifts) > 65534) {
1253 for (j = 0; j < bbSize; j++) {
1254 int a2update = zptr[bbStart + j];
1255 int qVal = (j >> shifts);
1256 quadrant[a2update] = qVal;
1257 if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) {
1258 quadrant[a2update + last + 1] = qVal;
1262 if (!(((bbSize-1) >> shifts) <= 65535)) {
1268 Now scan this big bucket so as to synthesise the
1269 sorted order for small buckets [t, ss] for all t != ss.
1271 for (j = 0; j <= 255; j++) {
1272 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
1275 for (j = ftab[ss << 8] & CLEARMASK; j < (ftab[(ss+1) << 8] & CLEARMASK); j++) {
1276 c1 = block[zptr[j]];
1278 zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
1283 for (j = 0; j <= 255; j++) {
1284 ftab[(j << 8) + ss] |= SETMASK;
1290 void RandomiseBlock()
1295 for (i = 0; i < 256; i++) {
1299 for (i = 0; i <= last; i++) {
1301 rNToGo = (int)BZip2Constants.rNums[rTPos];
1308 block[i + 1] ^= (byte)((rNToGo == 1) ? 1 : 0);
1309 // handle 16 bit signed numbers
1310 block[i + 1] &= 0xFF;
1312 inUse[block[i + 1]] = true;
1316 void DoReversibleTransformation()
1318 workLimit = workFactor * last;
1320 blockRandomised = false;
1321 firstAttempt = true;
1325 if (workDone > workLimit && firstAttempt) {
1327 workLimit = workDone = 0;
1328 blockRandomised = true;
1329 firstAttempt = false;
1334 for (int i = 0; i <= last; i++) {
1341 if (origPtr == -1) {
1346 bool FullGtU(int i1, int i2)
1471 void AllocateCompressStructures()
1473 int n = BZip2Constants.baseBlockSize * blockSize100k;
1474 block = new byte[(n + 1 + BZip2Constants.NUM_OVERSHOOT_BYTES)];
1475 quadrant = new int[(n + BZip2Constants.NUM_OVERSHOOT_BYTES)];
1477 ftab = new int[65537];
1479 if (block == null || quadrant == null || zptr == null || ftab == null) {
1480 // int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537;
1481 // compressOutOfMemory ( totalDraw, n );
1485 The back end needs a place to store the MTF values
1486 whilst it calculates the coding tables. We could
1487 put them in the zptr array. However, these values
1488 will fit in a short, so we overlay szptr at the
1489 start of zptr, in the hope of reducing the number
1490 of cache misses induced by the multiple traversals
1491 of the MTF values when calculating coding tables.
1492 Seems to improve compression speed by about 1%.
1497 szptr = new short[2 * n];
1500 void GenerateMTFValues()
1502 char[] yy = new char[256];
1513 for (i = 0; i <= EOB; i++) {
1519 for (i = 0; i < nInUse; i++) {
1524 for (i = 0; i <= last; i++) {
1527 ll_i = unseqToSeq[block[zptr[i]]];
1531 while (ll_i != tmp) {
1545 switch (zPend % 2) {
1547 szptr[wr] = (short)BZip2Constants.RUNA;
1549 mtfFreq[BZip2Constants.RUNA]++;
1552 szptr[wr] = (short)BZip2Constants.RUNB;
1554 mtfFreq[BZip2Constants.RUNB]++;
1560 zPend = (zPend - 2) / 2;
1564 szptr[wr] = (short)(j + 1);
1573 switch (zPend % 2) {
1575 szptr[wr] = (short)BZip2Constants.RUNA;
1577 mtfFreq[BZip2Constants.RUNA]++;
1580 szptr[wr] = (short)BZip2Constants.RUNB;
1582 mtfFreq[BZip2Constants.RUNB]++;
1588 zPend = (zPend - 2) / 2;
1592 szptr[wr] = (short)EOB;
1601 throw new BZip2Exception("BZip2 output stream panic");
1604 static void HbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen)
1607 Nodes and heap entries run from 1. Entry 0
1608 for both the heap and nodes is a sentinel.
1610 int nNodes, nHeap, n1, n2, j, k;
1613 int[] heap = new int[BZip2Constants.MAX_ALPHA_SIZE + 2];
1614 int[] weight = new int[BZip2Constants.MAX_ALPHA_SIZE * 2];
1615 int[] parent = new int[BZip2Constants.MAX_ALPHA_SIZE * 2];
1617 for (int i = 0; i < alphaSize; ++i)
1619 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
1631 for (int i = 1; i <= alphaSize; ++i)
1638 while (weight[tmp] < weight[heap[zz >> 1]])
1640 heap[zz] = heap[zz >> 1];
1645 if (!(nHeap < (BZip2Constants.MAX_ALPHA_SIZE+2)))
1653 heap[1] = heap[nHeap];
1665 if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]])
1669 if (weight[tmp] < weight[heap[yy]])
1674 heap[zz] = heap[yy];
1679 heap[1] = heap[nHeap];
1692 if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]])
1696 if (weight[tmp] < weight[heap[yy]])
1700 heap[zz] = heap[yy];
1705 parent[n1] = parent[n2] = nNodes;
1707 weight[nNodes] = (int)((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00)) |
1708 (int)(1 + (((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff)) ? (weight[n1] & 0x000000ff) : (weight[n2] & 0x000000ff)));
1710 parent[nNodes] = -1;
1712 heap[nHeap] = nNodes;
1716 while (weight[tmp] < weight[heap[zz >> 1]])
1718 heap[zz] = heap[zz >> 1];
1723 if (!(nNodes < (BZip2Constants.MAX_ALPHA_SIZE * 2)))
1729 for (int i = 1; i <= alphaSize; ++i)
1733 while (parent[k] >= 0)
1738 len[i - 1] = (char)j;
1750 for (int i = 1; i < alphaSize; ++i)
1759 static void HbAssignCodes (int[] code, char[] length, int minLen, int maxLen, int alphaSize)
1762 for (int n = minLen; n <= maxLen; ++n)
1764 for (int i = 0; i < alphaSize; ++i)
1776 static byte Med3(byte a, byte b, byte c )
1805 #region Instance Fields
1806 bool isStreamOwner = true;
1809 index of the last char in the block, so
1810 the block size == last + 1.
1815 index in zptr[] of original string after sorting.
1820 always: in the range 0 .. 9.
1821 The current block size is 100000 * this number.
1825 bool blockRandomised;
1830 IChecksum mCrc = new StrangeCRC();
1832 bool[] inUse = new bool[256];
1835 char[] seqToUnseq = new char[256];
1836 char[] unseqToSeq = new char[256];
1838 char[] selector = new char[BZip2Constants.MAX_SELECTORS];
1839 char[] selectorMtf = new char[BZip2Constants.MAX_SELECTORS];
1849 int[] mtfFreq = new int[BZip2Constants.MAX_ALPHA_SIZE];
1852 * Used when sorting. If too many long comparisons
1853 * happen, we stop sorting, randomise the block
1854 * slightly, and try again.
1860 int nBlocksRandomised;
1862 int currentChar = -1;
1864 uint blockCRC, combinedCRC;
1865 int allowableBlockSize;
1872 /* This file was derived from a file containing this license:
1874 * This file is a part of bzip2 and/or libbzip2, a program and
1875 * library for lossless, block-sorting data compression.
1877 * Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
1879 * Redistribution and use in source and binary forms, with or without
1880 * modification, are permitted provided that the following conditions
1883 * 1. Redistributions of source code must retain the above copyright
1884 * notice, this list of conditions and the following disclaimer.
1886 * 2. The origin of this software must not be misrepresented; you must
1887 * not claim that you wrote the original software. If you use this
1888 * software in a product, an acknowledgment in the product
1889 * documentation would be appreciated but is not required.
1891 * 3. Altered source versions must be plainly marked as such, and must
1892 * not be misrepresented as being the original software.
1894 * 4. The name of the author may not be used to endorse or promote
1895 * products derived from this software without specific prior written
1898 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
1899 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1900 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1901 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
1902 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1903 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
1904 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1905 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1906 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
1907 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1908 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1910 * Java version ported by Keiron Liddle, Aftex Software <keiron@aftexsw.com> 1999-2001