From 9922008d657c72235c402343fd88847b044014ad Mon Sep 17 00:00:00 2001 From: Stefan Bodewig Date: Fri, 27 Mar 2009 13:49:42 +0000 Subject: [PATCH] Improve bzip2 performance, submitted by Rodrigo Schmidt, PR 24798 git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@759138 13f79535-47bb-0310-9956-ffa450edef68 --- CONTRIBUTORS | 1 + contributors.xml | 4 + .../tools/bzip2/CBZip2OutputStream.java | 2838 ++++++++++------- 3 files changed, 1652 insertions(+), 1191 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 4326f26bf..913a0a5e9 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -252,6 +252,7 @@ Roberto Scaramuzzi Robin Green Rob Oxspring Rob van Oostrum +Rodrigo Schmidt Roger Vaughn Roman Ivashin Ronen Mashal diff --git a/contributors.xml b/contributors.xml index 299d1db5d..efa9113ec 100644 --- a/contributors.xml +++ b/contributors.xml @@ -1028,6 +1028,10 @@ Rob van Oostrum + + Rodrigo + Schmidt + Roger Vaughn diff --git a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java index ac2f510b3..bc84844ad 100644 --- a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java +++ b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java @@ -31,337 +31,681 @@ import java.io.IOException; * An output stream that compresses into the BZip2 format (without the file * header chars) into another stream. * - * TODO: Update to BZip2 1.0.1 + *

+ * The compression requires large amounts of memory. Thus you should call the + * {@link #close() close()} method as soon as possible, to force + * CBZip2OutputStream to release the allocated memory. + *

+ * + *

You can shrink the amount of allocated memory and maybe raise + * the compression speed by choosing a lower blocksize, which in turn + * may cause a lower compression ratio. You can avoid unnecessary + * memory allocation by avoiding using a blocksize which is bigger + * than the size of the input.

+ * + *

You can compute the memory usage for compressing by the + * following formula:

+ * + *
+ * <code>400k + (9 * blocksize)</code>.
+ * 
+ * + *

To get the memory required for decompression by {@link + * CBZip2InputStream CBZip2InputStream} use

+ * + *
+ * <code>65k + (5 * blocksize)</code>.
+ * 
+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Memory usage by blocksize
Blocksize Compression
+ * memory usage
Decompression
+ * memory usage
100k1300k565k
200k2200k1065k
300k3100k1565k
400k4000k2065k
500k4900k2565k
600k5800k3065k
700k6700k3565k
800k7600k4065k
900k8500k4565k
+ * + *

+ * For decompression CBZip2InputStream allocates less memory if the + * bzipped input is smaller than one block. + *

+ * + *

+ * Instances of this class are not threadsafe. + *

+ * + *

+ * TODO: Update to BZip2 1.0.1 + *

+ * */ -public class CBZip2OutputStream extends OutputStream implements BZip2Constants { +public class CBZip2OutputStream extends OutputStream + implements BZip2Constants { + + /** + * The minimum supported blocksize == 1. + */ + public static final int MIN_BLOCKSIZE = 1; + + /** + * The maximum supported blocksize == 9. + */ + public static final int MAX_BLOCKSIZE = 9; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ protected static final int SETMASK = (1 << 21); + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ protected static final int CLEARMASK = (~SETMASK); + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ protected static final int GREATER_ICOST = 15; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ protected static final int LESSER_ICOST = 0; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ protected static final int SMALL_THRESH = 20; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ protected static final int DEPTH_THRESH = 10; - /* - If you are ever unlucky/improbable enough - to get a stack overflow whilst sorting, - increase the following constant and try - again. In practice I have never seen the - stack go above 27 elems, so the following - limit seems very generous. - */ - protected static final int QSORT_STACK_SIZE = 1000; + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int WORK_FACTOR = 30; - private static void panic() { - System.out.println("panic"); - //throw new CError(); - } + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + *

If you are ever unlucky/improbable enough to get a stack + * overflow whilst sorting, increase the following constant and + * try again. In practice I have never seen the stack go above 27 + * elems, so the following limit seems very generous.

+ */ + protected static final int QSORT_STACK_SIZE = 1000; - private void makeMaps() { - int i; - nInUse = 0; - for (i = 0; i < 256; i++) { - if (inUse[i]) { - seqToUnseq[nInUse] = (char) i; - unseqToSeq[i] = (char) nInUse; - nInUse++; - } - } - } + /** + * Knuth's increments seem to work better than Incerpi-Sedgewick here. + * Possibly because the number of elems to sort is usually small, typically + * <= 20. + */ + private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, 797161, + 2391484 }; + /** + * This method is accessible by subclasses for historical + * purposes. If you don't know what it does then you don't need + * it. + */ protected static void hbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen) { /* - Nodes and heap entries run from 1. Entry 0 - for both the heap and nodes is a sentinel. - */ - int nNodes, nHeap, n1, n2, i, j, k; - boolean tooLong; - - int[] heap = new int[MAX_ALPHA_SIZE + 2]; - int[] weight = new int[MAX_ALPHA_SIZE * 2]; - int[] parent = new int[MAX_ALPHA_SIZE * 2]; - - for (i = 0; i < alphaSize; i++) { + * Nodes and heap entries run from 1. Entry 0 for both the heap and + * nodes is a sentinel. + */ + final int[] heap = new int[MAX_ALPHA_SIZE * 2]; + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; + + for (int i = alphaSize; --i >= 0;) { weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; } - while (true) { - nNodes = alphaSize; - nHeap = 0; + for (boolean tooLong = true; tooLong;) { + tooLong = false; + int nNodes = alphaSize; + int nHeap = 0; heap[0] = 0; weight[0] = 0; parent[0] = -2; - for (i = 1; i <= alphaSize; i++) { + for (int i = 1; i <= alphaSize; i++) { parent[i] = -1; nHeap++; heap[nHeap] = i; - { - int zz, tmp; - zz = nHeap; - tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; + + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; } + heap[zz] = tmp; } - if (!(nHeap < (MAX_ALPHA_SIZE + 2))) { - panic(); - } + + // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap; while (nHeap > 1) { - n1 = heap[1]; + int n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; - { - int zz = 0, yy = 0, tmp = 0; - zz = 1; - tmp = heap[zz]; - while (true) { - yy = zz << 1; - if (yy > nHeap) { - break; - } - if (yy < nHeap - && weight[heap[yy + 1]] < weight[heap[yy]]) { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) { - break; - } - heap[zz] = heap[yy]; - zz = yy; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; } - heap[zz] = tmp; + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; } - n2 = heap[1]; + + heap[zz] = tmp; + + int n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; - { - int zz = 0, yy = 0, tmp = 0; - zz = 1; - tmp = heap[zz]; - while (true) { - yy = zz << 1; - if (yy > nHeap) { - break; - } - if (yy < nHeap - && weight[heap[yy + 1]] < weight[heap[yy]]) { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) { - break; - } - heap[zz] = heap[yy]; - zz = yy; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; } - heap[zz] = tmp; + + heap[zz] = heap[yy]; + zz = yy; } + + heap[zz] = tmp; nNodes++; parent[n1] = parent[n2] = nNodes; - weight[nNodes] = ((weight[n1] & 0xffffff00) - + (weight[n2] & 0xffffff00)) - | (1 + (((weight[n1] & 0x000000ff) - > (weight[n2] & 0x000000ff)) - ? (weight[n1] & 0x000000ff) - : (weight[n2] & 0x000000ff))); + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = (((weight_n1 & 0xffffff00) + + (weight_n2 & 0xffffff00)) + | + (1 + (((weight_n1 & 0x000000ff) + > (weight_n2 & 0x000000ff)) + ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff)) + )); parent[nNodes] = -1; nHeap++; heap[nHeap] = nNodes; - { - int zz = 0, tmp = 0; - zz = nHeap; - tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; } - } - if (!(nNodes < (MAX_ALPHA_SIZE * 2))) { - panic(); + heap[zz] = tmp; + } - tooLong = false; - for (i = 1; i <= alphaSize; i++) { - j = 0; - k = i; - while (parent[k] >= 0) { - k = parent[k]; + // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes; + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; j++; } + len[i - 1] = (char) j; if (j > maxLen) { tooLong = true; } } - if (!tooLong) { - break; - } - - for (i = 1; i < alphaSize; i++) { - j = weight[i] >> 8; - j = 1 + (j / 2); - weight[i] = j << 8; + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } } } } - /* - index of the last char in the block, so - the block size == last + 1. - */ - int last; + private static void hbMakeCodeLengths(final byte[] len, final int[] freq, + final Data dat, final int alphaSize, + final int maxLen) { + /* + * Nodes and heap entries run from 1. Entry 0 for both the heap and + * nodes is a sentinel. + */ + final int[] heap = dat.heap; + final int[] weight = dat.weight; + final int[] parent = dat.parent; + + for (int i = alphaSize; --i >= 0;) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } - /* - index in zptr[] of original string after sorting. - */ - int origPtr; + for (boolean tooLong = true; tooLong;) { + tooLong = false; - /* - always: in the range 0 .. 9. - The current block size is 100000 * this number. - */ - int blockSize100k; + int nNodes = alphaSize; + int nHeap = 0; + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; - boolean blockRandomised; + for (int i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; - int bytesOut; - int bsBuff; - int bsLive; - CRC mCrc = new CRC(); + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } - private boolean[] inUse = new boolean[256]; - private int nInUse; + while (nHeap > 1) { + int n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; - private char[] seqToUnseq = new char[256]; - private char[] unseqToSeq = new char[256]; + int yy = 0; + int zz = 1; + int tmp = heap[1]; - private char[] selector = new char[MAX_SELECTORS]; - private char[] selectorMtf = new char[MAX_SELECTORS]; + while (true) { + yy = zz << 1; - private char[] block; - private int[] quadrant; - private int[] zptr; - private short[] szptr; - private int[] ftab; + if (yy > nHeap) { + break; + } - private int nMTF; + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + + int n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + nNodes++; + parent[n1] = parent[n2] = nNodes; + + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = ((weight_n1 & 0xffffff00) + + (weight_n2 & 0xffffff00)) + | (1 + (((weight_n1 & 0x000000ff) + > (weight_n2 & 0x000000ff)) + ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + + } + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; + j++; + } + + len[i - 1] = (byte) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } + } + } + } + + /** + * Index of the last char in the block, so the block size == last + 1. + */ + private int last; + + /** + * Index in fmap[] of original string after sorting. + */ + private int origPtr; + + /** + * Always: in the range 0 .. 9. The current block size is 100000 * this + * number. + */ + private final int blockSize100k; + + private boolean blockRandomised; - private int[] mtfFreq = new int[MAX_ALPHA_SIZE]; + private int bsBuff; + private int bsLive; + private final CRC crc = new CRC(); + + private int nInUse; + + private int nMTF; /* - * Used when sorting. If too many long comparisons - * happen, we stop sorting, randomise the block - * slightly, and try again. + * Used when sorting. If too many long comparisons happen, we stop sorting, + * randomise the block slightly, and try again. */ - private int workFactor; private int workDone; private int workLimit; private boolean firstAttempt; - private int nBlocksRandomised; private int currentChar = -1; private int runLength = 0; - public CBZip2OutputStream(OutputStream inStream) throws IOException { - this(inStream, 9); - } + private int blockCRC; + private int combinedCRC; + private int allowableBlockSize; - public CBZip2OutputStream(OutputStream inStream, int inBlockSize) - throws IOException { - block = null; - quadrant = null; - zptr = null; - ftab = null; + /** + * All memory intensive stuff. + */ + private CBZip2OutputStream.Data data; - bsSetStream(inStream); + private OutputStream out; - workFactor = 50; - if (inBlockSize > 9) { - inBlockSize = 9; - } - if (inBlockSize < 1) { - inBlockSize = 1; - } - blockSize100k = inBlockSize; - allocateCompressStructures(); - initialize(); - initBlock(); + /** + * Chooses a blocksize based on the given length of the data to compress. + * + * @return The blocksize, between {@link #MIN_BLOCKSIZE} and + * {@link #MAX_BLOCKSIZE} both inclusive. For a negative + * inputLength this method returns MAX_BLOCKSIZE + * always. + * + * @param inputLength + * The length of the data which will be compressed by + * CBZip2OutputStream. + */ + public static int chooseBlockSize(long inputLength) { + return (inputLength > 0) ? (int) Math + .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; } /** + * Constructs a new CBZip2OutputStream with a blocksize of 900k. * - * modified by Oliver Merkel, 010128 + *

+ * Attention: The caller is resonsible to write the two BZip2 magic + * bytes "BZ" to the specified stream prior to calling this + * constructor. + *

* + * @param out * + * the destination stream. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws NullPointerException + * if out == null. */ - public void write(int bv) throws IOException { - int b = (256 + bv) % 256; - if (currentChar != -1) { - if (currentChar == b) { - runLength++; - if (runLength > 254) { - writeRun(); - currentChar = -1; - runLength = 0; - } - } else { - writeRun(); - runLength = 1; - currentChar = b; - } + public CBZip2OutputStream(final OutputStream out) throws IOException { + this(out, MAX_BLOCKSIZE); + } + + /** + * Constructs a new CBZip2OutputStream with specified blocksize. + * + *

+ * Attention: The caller is resonsible to write the two BZip2 magic + * bytes "BZ" to the specified stream prior to calling this + * constructor. + *

+ * + * + * @param out + * the destination stream. + * @param blockSize + * the blockSize as 100k units. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws IllegalArgumentException + * if (blockSize < 1) || (blockSize > 9). + * @throws NullPointerException + * if out == null. + * + * @see #MIN_BLOCKSIZE + * @see #MAX_BLOCKSIZE + */ + public CBZip2OutputStream(final OutputStream out, final int blockSize) + throws IOException { + super(); + + if (blockSize < 1) { + throw new IllegalArgumentException("blockSize(" + blockSize + + ") < 1"); + } + if (blockSize > 9) { + throw new IllegalArgumentException("blockSize(" + blockSize + + ") > 9"); + } + + this.blockSize100k = blockSize; + this.out = out; + init(); + } + + public void write(final int b) throws IOException { + if (this.out != null) { + write0(b); } else { - currentChar = b; - runLength++; + throw new IOException("closed"); } } private void writeRun() throws IOException { - if (last < allowableBlockSize) { - inUse[currentChar] = true; - for (int i = 0; i < runLength; i++) { - mCrc.updateCRC((char) currentChar); - } - switch (runLength) { + final int lastShadow = this.last; + + if (lastShadow < this.allowableBlockSize) { + final int currentCharShadow = this.currentChar; + final Data dataShadow = this.data; + dataShadow.inUse[currentCharShadow] = true; + final byte ch = (byte) currentCharShadow; + + int runLengthShadow = this.runLength; + this.crc.updateCRC(currentCharShadow, runLengthShadow); + + switch (runLengthShadow) { case 1: - last++; - block[last + 1] = (char) currentChar; + dataShadow.block[lastShadow + 2] = ch; + this.last = lastShadow + 1; break; + case 2: - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; + dataShadow.block[lastShadow + 2] = ch; + dataShadow.block[lastShadow + 3] = ch; + this.last = lastShadow + 2; break; - case 3: - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; + + case 3: { + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + this.last = lastShadow + 3; + } break; - default: - inUse[runLength - 4] = true; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) (runLength - 4); + + default: { + runLengthShadow -= 4; + dataShadow.inUse[runLengthShadow] = true; + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + block[lastShadow + 5] = ch; + block[lastShadow + 6] = (byte) runLengthShadow; + this.last = lastShadow + 5; + } break; + } } else { endBlock(); @@ -370,113 +714,116 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { } } - boolean closed = false; - + /** + * Overriden to close the stream. + */ protected void finalize() throws Throwable { - close(); + finish(); super.finalize(); } - public void close() throws IOException { - if (closed) { - return; + + public void finish() throws IOException { + if (out != null) { + try { + if (this.runLength > 0) { + writeRun(); + } + this.currentChar = -1; + endBlock(); + endCompression(); + } finally { + this.out = null; + this.data = null; + } } - finish(); - super.close(); - bsStream.close(); - closed = true; } - - protected void finish() throws IOException { - if (closed) { - return; - } - if (runLength > 0) { - writeRun(); + public void close() throws IOException { + if (out != null) { + OutputStream outShadow = this.out; + finish(); + outShadow.close(); } - currentChar = -1; - endBlock(); - endCompression(); } public void flush() throws IOException { - super.flush(); - bsStream.flush(); + OutputStream outShadow = this.out; + if (outShadow != null) { + outShadow.flush(); + } } - private int blockCRC, combinedCRC; + private void init() throws IOException { + // write magic: done by caller who created this stream + // this.out.write('B'); + // this.out.write('Z'); - private void initialize() throws IOException { - bytesOut = 0; - nBlocksRandomised = 0; + this.data = new Data(this.blockSize100k); - /* Write `magic' bytes h indicating file-format == huffmanised, - followed by a digit indicating blockSize100k. - */ - bsPutUChar('h'); - bsPutUChar('0' + blockSize100k); + /* + * Write `magic' bytes h indicating file-format == huffmanised, followed + * by a digit indicating blockSize100k. + */ + bsPutUByte('h'); + bsPutUByte('0' + this.blockSize100k); - combinedCRC = 0; + this.combinedCRC = 0; + initBlock(); } - private int allowableBlockSize; - private void initBlock() { - // blockNo++; - mCrc.initialiseCRC(); - last = -1; - // ch = 0; + // blockNo++; + this.crc.initialiseCRC(); + this.last = -1; + // ch = 0; - for (int i = 0; i < 256; i++) { + boolean[] inUse = this.data.inUse; + for (int i = 256; --i >= 0;) { inUse[i] = false; } /* 20 is just a paranoia constant */ - allowableBlockSize = baseBlockSize * blockSize100k - 20; + this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20; } private void endBlock() throws IOException { - blockCRC = mCrc.getFinalCRC(); - combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31); - combinedCRC ^= blockCRC; - - // If the stream was empty we must skip the rest of this method. - // See bug#32200. - if (last == -1) { - return; + this.blockCRC = this.crc.getFinalCRC(); + this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); + this.combinedCRC ^= this.blockCRC; + + // empty block at end of file + if (this.last == -1) { + return; } - + /* sort the block and establish posn of original string */ - doReversibleTransformation(); + blockSort(); /* - A 6-byte block header, the value chosen arbitrarily - as 0x314159265359 :-). A 32 bit value does not really - give a strong enough guarantee that the value will not - appear by chance in the compressed datastream. Worst-case - probability of this event, for a 900k block, is about - 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. - For a compressed file of size 100Gb -- about 100000 blocks -- - only a 48-bit marker will do. NB: normal compression/ - decompression do *not* rely on these statistical properties. - They are only important when trying to recover blocks from - damaged files. - */ - bsPutUChar(0x31); - bsPutUChar(0x41); - bsPutUChar(0x59); - bsPutUChar(0x26); - bsPutUChar(0x53); - bsPutUChar(0x59); + * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 + * :-). A 32 bit value does not really give a strong enough guarantee + * that the value will not appear by chance in the compressed + * datastream. Worst-case probability of this event, for a 900k block, + * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 + * bits. For a compressed file of size 100Gb -- about 100000 blocks -- + * only a 48-bit marker will do. NB: normal compression/ decompression + * donot rely on these statistical properties. They are only important + * when trying to recover blocks from damaged files. + */ + bsPutUByte(0x31); + bsPutUByte(0x41); + bsPutUByte(0x59); + bsPutUByte(0x26); + bsPutUByte(0x53); + bsPutUByte(0x59); /* Now the block's CRC, so it is in a known place. */ - bsPutint(blockCRC); + bsPutInt(this.blockCRC); /* Now a single bit indicating randomisation. */ - if (blockRandomised) { + if (this.blockRandomised) { bsW(1, 1); - nBlocksRandomised++; } else { bsW(1, 0); } @@ -487,32 +834,79 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { private void endCompression() throws IOException { /* - Now another magic 48-bit number, 0x177245385090, to - indicate the end of the last block. (sqrt(pi), if - you want to know. I did want to use e, but it contains - too much repetition -- 27 18 28 18 28 46 -- for me - to feel statistically comfortable. Call me paranoid.) - */ - bsPutUChar(0x17); - bsPutUChar(0x72); - bsPutUChar(0x45); - bsPutUChar(0x38); - bsPutUChar(0x50); - bsPutUChar(0x90); - - bsPutint(combinedCRC); - + * Now another magic 48-bit number, 0x177245385090, to indicate the end + * of the last block. (sqrt(pi), if you want to know. I did want to use + * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me + * to feel statistically comfortable. Call me paranoid.) + */ + bsPutUByte(0x17); + bsPutUByte(0x72); + bsPutUByte(0x45); + bsPutUByte(0x38); + bsPutUByte(0x50); + bsPutUByte(0x90); + + bsPutInt(this.combinedCRC); bsFinishedWithStream(); } - private void hbAssignCodes (int[] code, char[] length, int minLen, - int maxLen, int alphaSize) { - int n, vec, i; + /** + * Returns the blocksize parameter specified at construction time. + */ + public final int getBlockSize() { + return this.blockSize100k; + } + + public void write(final byte[] buf, int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > buf.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + + len + ") > buf.length(" + + buf.length + ")."); + } + if (this.out == null) { + throw new IOException("stream closed"); + } + + for (int hi = offs + len; offs < hi;) { + write0(buf[offs++]); + } + } - vec = 0; - for (n = minLen; n <= maxLen; n++) { - for (i = 0; i < alphaSize; i++) { - if (length[i] == n) { + private void write0(int b) throws IOException { + if (this.currentChar != -1) { + b &= 0xff; + if (this.currentChar == b) { + if (++this.runLength > 254) { + writeRun(); + this.currentChar = -1; + this.runLength = 0; + } + // else nothing to do + } else { + writeRun(); + this.runLength = 1; + this.currentChar = b; + } + } else { + this.currentChar = b & 0xff; + this.runLength++; + } + } + + private static void hbAssignCodes(final int[] code, final byte[] length, + final int minLen, final int maxLen, + final int alphaSize) { + int vec = 0; + for (int n = minLen; n <= maxLen; n++) { + for (int i = 0; i < alphaSize; i++) { + if ((length[i] & 0xff) == n) { code[i] = vec; vec++; } @@ -521,1061 +915,1063 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { } } - private void bsSetStream(OutputStream f) { - bsStream = f; - bsLive = 0; - bsBuff = 0; - bytesOut = 0; - } - private void bsFinishedWithStream() throws IOException { - while (bsLive > 0) { - int ch = (bsBuff >> 24); - try { - bsStream.write(ch); // write 8-bit - } catch (IOException e) { - throw e; - } - bsBuff <<= 8; - bsLive -= 8; - bytesOut++; + while (this.bsLive > 0) { + int ch = this.bsBuff >> 24; + this.out.write(ch); // write 8-bit + this.bsBuff <<= 8; + this.bsLive -= 8; } } - private void bsW(int n, int v) throws IOException { - while (bsLive >= 8) { - int ch = (bsBuff >> 24); - try { - bsStream.write(ch); // write 8-bit - } catch (IOException e) { - throw e; - } - bsBuff <<= 8; - bsLive -= 8; - bytesOut++; + private void bsW(final int n, final int v) throws IOException { + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; } - bsBuff |= (v << (32 - bsLive - n)); - bsLive += n; + + this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); + this.bsLive = bsLiveShadow + n; } - private void bsPutUChar(int c) throws IOException { + private void bsPutUByte(final int c) throws IOException { bsW(8, c); } - private void bsPutint(int u) throws IOException { + private void bsPutInt(final int u) throws IOException { bsW(8, (u >> 24) & 0xff); bsW(8, (u >> 16) & 0xff); - bsW(8, (u >> 8) & 0xff); - bsW(8, u & 0xff); - } - - private void bsPutIntVS(int numBits, int c) throws IOException { - bsW(numBits, c); + bsW(8, (u >> 8) & 0xff); + bsW(8, u & 0xff); } private void sendMTFValues() throws IOException { - char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; + final byte[][] len = this.data.sendMTFValues_len; + final int alphaSize = this.nInUse + 2; - int v, t, i, j, gs, ge, totc, bt, bc, iter; - int nSelectors = 0, alphaSize, minLen, maxLen, selCtr; - int nGroups; //, nBytes; - - alphaSize = nInUse + 2; - for (t = 0; t < N_GROUPS; t++) { - for (v = 0; v < alphaSize; v++) { - len[t][v] = (char) GREATER_ICOST; + for (int t = N_GROUPS; --t >= 0;) { + byte[] len_t = len[t]; + for (int v = alphaSize; --v >= 0;) { + len_t[v] = GREATER_ICOST; } } /* Decide how many coding tables to use */ - if (nMTF <= 0) { - panic(); - } + // assert (this.nMTF > 0) : this.nMTF; + final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 + : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; - if (nMTF < 200) { - nGroups = 2; - } else if (nMTF < 600) { - nGroups = 3; - } else if (nMTF < 1200) { - nGroups = 4; - } else if (nMTF < 2400) { - nGroups = 5; - } else { - nGroups = 6; - } + /* Generate an initial set of coding tables */ + sendMTFValues0(nGroups, alphaSize); - /* Generate an initial set of coding tables */ { - int nPart, remF, tFreq, aFreq; - - nPart = nGroups; - remF = nMTF; - gs = 0; - while (nPart > 0) { - tFreq = remF / nPart; - ge = gs - 1; - aFreq = 0; - while (aFreq < tFreq && ge < alphaSize - 1) { - ge++; - aFreq += mtfFreq[ge]; - } + /* + * Iterate up to N_ITERS times to improve the tables. + */ + final int nSelectors = sendMTFValues1(nGroups, alphaSize); - if (ge > gs && nPart != nGroups && nPart != 1 - && ((nGroups - nPart) % 2 != 0)) { - aFreq -= mtfFreq[ge]; - ge--; - } + /* Compute MTF values for the selectors. */ + sendMTFValues2(nGroups, nSelectors); - for (v = 0; v < alphaSize; v++) { - if (v >= gs && v <= ge) { - len[nPart - 1][v] = (char) LESSER_ICOST; - } else { - len[nPart - 1][v] = (char) GREATER_ICOST; - } - } + /* Assign actual codes for the tables. */ + sendMTFValues3(nGroups, alphaSize); - nPart--; - gs = ge + 1; - remF -= aFreq; + /* Transmit the mapping table. */ + sendMTFValues4(); + + /* Now the selectors. */ + sendMTFValues5(nGroups, nSelectors); + + /* Now the coding tables. */ + sendMTFValues6(nGroups, alphaSize); + + /* And finally, the block data proper */ + sendMTFValues7(nSelectors); + } + + private void sendMTFValues0(final int nGroups, final int alphaSize) { + final byte[][] len = this.data.sendMTFValues_len; + final int[] mtfFreq = this.data.mtfFreq; + + int remF = this.nMTF; + int gs = 0; + + for (int nPart = nGroups; nPart > 0; nPart--) { + final int tFreq = remF / nPart; + int ge = gs - 1; + int aFreq = 0; + + for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { + aFreq += mtfFreq[++ge]; } - } - int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; - int[] fave = new int[N_GROUPS]; - short[] cost = new short[N_GROUPS]; - /* - Iterate up to N_ITERS times to improve the tables. - */ - for (iter = 0; iter < N_ITERS; iter++) { - for (t = 0; t < nGroups; t++) { - fave[t] = 0; + if ((ge > gs) && (nPart != nGroups) && (nPart != 1) + && (((nGroups - nPart) & 1) != 0)) { + aFreq -= mtfFreq[ge--]; } - for (t = 0; t < nGroups; t++) { - for (v = 0; v < alphaSize; v++) { - rfreq[t][v] = 0; + final byte[] len_np = len[nPart - 1]; + for (int v = alphaSize; --v >= 0;) { + if ((v >= gs) && (v <= ge)) { + len_np[v] = LESSER_ICOST; + } else { + len_np[v] = GREATER_ICOST; + } + } + + gs = ge + 1; + remF -= aFreq; + } + } + + private int sendMTFValues1(final int nGroups, final int alphaSize) { + final Data dataShadow = this.data; + final int[][] rfreq = dataShadow.sendMTFValues_rfreq; + final int[] fave = dataShadow.sendMTFValues_fave; + final short[] cost = dataShadow.sendMTFValues_cost; + final char[] sfmap = dataShadow.sfmap; + final byte[] selector = dataShadow.selector; + final byte[][] len = dataShadow.sendMTFValues_len; + final byte[] len_0 = len[0]; + final byte[] len_1 = len[1]; + final byte[] len_2 = len[2]; + final byte[] len_3 = len[3]; + final byte[] len_4 = len[4]; + final byte[] len_5 = len[5]; + final int nMTFShadow = this.nMTF; + + int nSelectors = 0; + + for (int iter = 0; iter < N_ITERS; iter++) { + for (int t = nGroups; --t >= 0;) { + fave[t] = 0; + int[] rfreqt = rfreq[t]; + for (int i = alphaSize; --i >= 0;) { + rfreqt[i] = 0; } } nSelectors = 0; - totc = 0; - gs = 0; - while (true) { + for (int gs = 0; gs < this.nMTF;) { /* Set group start & end marks. */ - if (gs >= nMTF) { - break; - } - ge = gs + G_SIZE - 1; - if (ge >= nMTF) { - ge = nMTF - 1; - } /* - Calculate the cost of this group as coded - by each of the coding tables. - */ - for (t = 0; t < nGroups; t++) { - cost[t] = 0; - } - - if (nGroups == 6) { - short cost0, cost1, cost2, cost3, cost4, cost5; - cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; - for (i = gs; i <= ge; i++) { - short icv = szptr[i]; - cost0 += len[0][icv]; - cost1 += len[1][icv]; - cost2 += len[2][icv]; - cost3 += len[3][icv]; - cost4 += len[4][icv]; - cost5 += len[5][icv]; + * Calculate the cost of this group as coded by each of the + * coding tables. + */ + + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + + if (nGroups == N_GROUPS) { + // unrolled version of the else-block + + short cost0 = 0; + short cost1 = 0; + short cost2 = 0; + short cost3 = 0; + short cost4 = 0; + short cost5 = 0; + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + cost0 += len_0[icv] & 0xff; + cost1 += len_1[icv] & 0xff; + cost2 += len_2[icv] & 0xff; + cost3 += len_3[icv] & 0xff; + cost4 += len_4[icv] & 0xff; + cost5 += len_5[icv] & 0xff; } + cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; + } else { - for (i = gs; i <= ge; i++) { - short icv = szptr[i]; - for (t = 0; t < nGroups; t++) { - cost[t] += len[t][icv]; + for (int t = nGroups; --t >= 0;) { + cost[t] = 0; + } + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + for (int t = nGroups; --t >= 0;) { + cost[t] += len[t][icv] & 0xff; } } } /* - Find the coding table which is best for this group, - and record its identity in the selector table. - */ - bc = 999999999; - bt = -1; - for (t = 0; t < nGroups; t++) { - if (cost[t] < bc) { - bc = cost[t]; + * Find the coding table which is best for this group, and + * record its identity in the selector table. + */ + int bt = -1; + for (int t = nGroups, bc = 999999999; --t >= 0;) { + final int cost_t = cost[t]; + if (cost_t < bc) { + bc = cost_t; bt = t; } } - totc += bc; + fave[bt]++; - selector[nSelectors] = (char) bt; + selector[nSelectors] = (byte) bt; nSelectors++; /* - Increment the symbol frequencies for the selected table. - */ - for (i = gs; i <= ge; i++) { - rfreq[bt][szptr[i]]++; + * Increment the symbol frequencies for the selected table. + */ + final int[] rfreq_bt = rfreq[bt]; + for (int i = gs; i <= ge; i++) { + rfreq_bt[sfmap[i]]++; } gs = ge + 1; } /* - Recompute the tables based on the accumulated frequencies. - */ - for (t = 0; t < nGroups; t++) { - hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20); + * Recompute the tables based on the accumulated frequencies. + */ + for (int t = 0; t < nGroups; t++) { + hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); } } - rfreq = null; - fave = null; - cost = null; + return nSelectors; + } + + private void sendMTFValues2(final int nGroups, final int nSelectors) { + // assert (nGroups < 8) : nGroups; - if (!(nGroups < 8)) { - panic(); - } - if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) { - panic(); + final Data dataShadow = this.data; + byte[] pos = dataShadow.sendMTFValues2_pos; + + for (int i = nGroups; --i >= 0;) { + pos[i] = (byte) i; } + for (int i = 0; i < nSelectors; i++) { + final byte ll_i = dataShadow.selector[i]; + byte tmp = pos[0]; + int j = 0; - /* Compute MTF values for the selectors. */ - { - char[] pos = new char[N_GROUPS]; - char ll_i, tmp2, tmp; - for (i = 0; i < nGroups; i++) { - pos[i] = (char) i; - } - for (i = 0; i < nSelectors; i++) { - ll_i = selector[i]; - j = 0; + while (ll_i != tmp) { + j++; + byte tmp2 = tmp; tmp = pos[j]; - while (ll_i != tmp) { - j++; - tmp2 = tmp; - tmp = pos[j]; - pos[j] = tmp2; - } - pos[0] = tmp; - selectorMtf[i] = (char) j; + pos[j] = tmp2; } - } - int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE]; + pos[0] = tmp; + dataShadow.selectorMtf[i] = (byte) j; + } + } - /* Assign actual codes for the tables. */ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (len[t][i] > maxLen) { - maxLen = len[t][i]; + private void sendMTFValues3(final int nGroups, final int alphaSize) { + int[][] code = this.data.sendMTFValues_code; + byte[][] len = this.data.sendMTFValues_len; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final byte[] len_t = len[t]; + for (int i = alphaSize; --i >= 0;) { + final int l = len_t[i] & 0xff; + if (l > maxLen) { + maxLen = l; } - if (len[t][i] < minLen) { - minLen = len[t][i]; + if (l < minLen) { + minLen = l; } } - if (maxLen > 20) { - panic(); - } - if (minLen < 1) { - panic(); - } + + // assert (maxLen <= 20) : maxLen; + // assert (minLen >= 1) : minLen; + hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); } + } - /* Transmit the mapping table. */ - { - boolean[] inUse16 = new boolean[16]; - for (i = 0; i < 16; i++) { - inUse16[i] = false; - for (j = 0; j < 16; j++) { - if (inUse[i * 16 + j]) { - inUse16[i] = true; - } + private void sendMTFValues4() throws IOException { + final boolean[] inUse = this.data.inUse; + final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; + + for (int i = 16; --i >= 0;) { + inUse16[i] = false; + final int i16 = i * 16; + for (int j = 16; --j >= 0;) { + if (inUse[i16 + j]) { + inUse16[i] = true; } } + } - //nBytes = bytesOut; - for (i = 0; i < 16; i++) { - if (inUse16[i]) { - bsW(1, 1); - } else { - bsW(1, 0); + for (int i = 0; i < 16; i++) { + bsW(1, inUse16[i] ? 1 : 0); + } + + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < 16; i++) { + if (inUse16[i]) { + final int i16 = i * 16; + for (int j = 0; j < 16; j++) { + // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + if (inUse[i16 + j]) { + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + } + bsLiveShadow++; } } + } - for (i = 0; i < 16; i++) { - if (inUse16[i]) { - for (j = 0; j < 16; j++) { - if (inUse[i * 16 + j]) { - bsW(1, 1); - } else { - bsW(1, 0); - } - } + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues5(final int nGroups, final int nSelectors) + throws IOException { + bsW(3, nGroups); + bsW(15, nSelectors); + + final OutputStream outShadow = this.out; + final byte[] selectorMtf = this.data.selectorMtf; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < nSelectors; i++) { + for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { + // inlined: bsW(1, 1); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; } + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + bsLiveShadow++; } + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; } - /* Now the selectors. */ - //nBytes = bytesOut; - bsW (3, nGroups); - bsW (15, nSelectors); - for (i = 0; i < nSelectors; i++) { - for (j = 0; j < selectorMtf[i]; j++) { - bsW(1, 1); + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues6(final int nGroups, final int alphaSize) + throws IOException { + final byte[][] len = this.data.sendMTFValues_len; + final OutputStream outShadow = this.out; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int t = 0; t < nGroups; t++) { + byte[] len_t = len[t]; + int curr = len_t[0] & 0xff; + + // inlined: bsW(5, curr); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; } - bsW(1, 0); - } + bsBuffShadow |= curr << (32 - bsLiveShadow - 5); + bsLiveShadow += 5; + + for (int i = 0; i < alphaSize; i++) { + int lti = len_t[i] & 0xff; + while (curr < lti) { + // inlined: bsW(2, 2); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; - /* Now the coding tables. */ - //nBytes = bytesOut; - - for (t = 0; t < nGroups; t++) { - int curr = len[t][0]; - bsW(5, curr); - for (i = 0; i < alphaSize; i++) { - while (curr < len[t][i]) { - bsW(2, 2); curr++; /* 10 */ } - while (curr > len[t][i]) { - bsW(2, 3); + + while (curr > lti) { + // inlined: bsW(2, 3); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; + curr--; /* 11 */ } - bsW (1, 0); + + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; } } - /* And finally, the block data proper */ - //nBytes = bytesOut; - selCtr = 0; - gs = 0; - while (true) { - if (gs >= nMTF) { - break; - } - ge = gs + G_SIZE - 1; - if (ge >= nMTF) { - ge = nMTF - 1; - } - for (i = gs; i <= ge; i++) { - bsW(len[selector[selCtr]][szptr[i]], - code[selector[selCtr]][szptr[i]]); + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues7(final int nSelectors) throws IOException { + final Data dataShadow = this.data; + final byte[][] len = dataShadow.sendMTFValues_len; + final int[][] code = dataShadow.sendMTFValues_code; + final OutputStream outShadow = this.out; + final byte[] selector = dataShadow.selector; + final char[] sfmap = dataShadow.sfmap; + final int nMTFShadow = this.nMTF; + + int selCtr = 0; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int gs = 0; gs < nMTFShadow;) { + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + final int selector_selCtr = selector[selCtr] & 0xff; + final int[] code_selCtr = code[selector_selCtr]; + final byte[] len_selCtr = len[selector_selCtr]; + + while (gs <= ge) { + final int sfmap_i = sfmap[gs]; + + // + // inlined: bsW(len_selCtr[sfmap_i] & 0xff, + // code_selCtr[sfmap_i]); + // + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + final int n = len_selCtr[sfmap_i] & 0xFF; + bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); + bsLiveShadow += n; + + gs++; } gs = ge + 1; selCtr++; } - if (!(selCtr == nSelectors)) { - panic(); - } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; } - private void moveToFrontCodeAndSend () throws IOException { - bsPutIntVS(24, origPtr); + private void moveToFrontCodeAndSend() throws IOException { + bsW(24, this.origPtr); generateMTFValues(); sendMTFValues(); } - private OutputStream bsStream; - - private void simpleSort(int lo, int hi, int d) { - int i, j, h, bigN, hp; - int v; - - bigN = hi - lo + 1; + /** + * This is the most hammered method of this class. + * + *

+ * This is the version using unrolled loops. Normally I never use such ones + * in Java code. The unrolling has shown a noticable performance improvement + * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the + * JIT compiler of the vm. + *

+ */ + private boolean mainSimpleSort(final Data dataShadow, final int lo, + final int hi, final int d) { + final int bigN = hi - lo + 1; if (bigN < 2) { - return; + return this.firstAttempt && (this.workDone > this.workLimit); } - hp = 0; - while (incs[hp] < bigN) { + int hp = 0; + while (INCS[hp] < bigN) { hp++; } - hp--; - for (; hp >= 0; hp--) { - h = incs[hp]; + final int[] fmap = dataShadow.fmap; + final char[] quadrant = dataShadow.quadrant; + final byte[] block = dataShadow.block; + final int lastShadow = this.last; + final int lastPlus1 = lastShadow + 1; + final boolean firstAttemptShadow = this.firstAttempt; + final int workLimitShadow = this.workLimit; + int workDoneShadow = this.workDone; + + // Following block contains unrolled code which could be shortened by + // coding it in additional loops. + + HP: while (--hp >= 0) { + final int h = INCS[hp]; + final int mj = lo + h - 1; + + for (int i = lo + h; i <= hi;) { + // copy + for (int k = 3; (i <= hi) && (--k >= 0); i++) { + final int v = fmap[i]; + final int vd = v + d; + int j = i; + + // for (int a; + // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, + // block, quadrant, lastShadow); + // j -= h) { + // fmap[j] = a; + // } + // + // unrolled version: + + // start inline mainGTU + boolean onceRunned = false; + int a = 0; + + HAMMER: while (true) { + if (onceRunned) { + fmap[j] = a; + if ((j -= h) <= mj) { + break HAMMER; + } + } else { + onceRunned = true; + } - i = lo + h; - while (true) { - /* copy 1 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } - } - zptr[j] = v; - i++; + a = fmap[j - h]; + int i1 = a + d; + int i2 = vd; + + // following could be done in a loop, but + // unrolled it for performance: + if (block[i1 + 1] == block[i2 + 1]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (block[i1 + 5] == block[i2 + 5]) { + if (block[(i1 += 6)] == block[(i2 += 6)]) { + int x = lastShadow; + X: while (x > 0) { + x -= 4; + + if (block[i1 + 1] == block[i2 + 1]) { + if (quadrant[i1] == quadrant[i2]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (quadrant[i1 + 1] == quadrant[i2 + 1]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (quadrant[i1 + 2] == quadrant[i2 + 2]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (quadrant[i1 + 3] == quadrant[i2 + 3]) { + if ((i1 += 4) >= lastPlus1) { + i1 -= lastPlus1; + } + if ((i2 += 4) >= lastPlus1) { + i2 -= lastPlus1; + } + workDoneShadow++; + continue X; + } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1] > quadrant[i2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } + break HAMMER; + } // while x > 0 + else { + if ((block[i1] & 0xff) > (block[i2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } + } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } - /* copy 2 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } - } - zptr[j] = v; - i++; + } // HAMMER + // end inline mainGTU - /* copy 3 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } + fmap[j] = v; } - zptr[j] = v; - i++; - if (workDone > workLimit && firstAttempt) { - return; + if (firstAttemptShadow && (i <= hi) + && (workDoneShadow > workLimitShadow)) { + break HP; } } } - } - private void vswap(int p1, int p2, int n) { - int temp = 0; - while (n > 0) { - temp = zptr[p1]; - zptr[p1] = zptr[p2]; - zptr[p2] = temp; - p1++; - p2++; - n--; - } + this.workDone = workDoneShadow; + return firstAttemptShadow && (workDoneShadow > workLimitShadow); } - private char med3(char a, char b, char c) { - char t; - if (a > b) { - t = a; - a = b; - b = t; - } - if (b > c) { - b = c; + private static void vswap(int[] fmap, int p1, int p2, int n) { + n += p1; + while (p1 < n) { + int t = fmap[p1]; + fmap[p1++] = fmap[p2]; + fmap[p2++] = t; } - if (a > b) { - b = a; - } - return b; } - private static class StackElem { - int ll; - int hh; - int dd; + private static byte med3(byte a, byte b, byte c) { + return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c + : a); } - private void qSort3(int loSt, int hiSt, int dSt, StackElem[] stack) { - int unLo, unHi, ltLo, gtHi, med, n, m; - int sp, lo, hi, d; - - sp = 0; + private void blockSort() { + this.workLimit = WORK_FACTOR * this.last; + this.workDone = 0; + this.blockRandomised = false; + this.firstAttempt = true; + mainSort(); - stack[sp].ll = loSt; - stack[sp].hh = hiSt; - stack[sp].dd = dSt; - sp++; + if (this.firstAttempt && (this.workDone > this.workLimit)) { + randomiseBlock(); + this.workLimit = this.workDone = 0; + this.firstAttempt = false; + mainSort(); + } - while (sp > 0) { - if (sp >= QSORT_STACK_SIZE) { - panic(); + int[] fmap = this.data.fmap; + this.origPtr = -1; + for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { + if (fmap[i] == 0) { + this.origPtr = i; + break; } + } - sp--; - lo = stack[sp].ll; - hi = stack[sp].hh; - d = stack[sp].dd; + // assert (this.origPtr != -1) : this.origPtr; + } - if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { - simpleSort(lo, hi, d); - if (workDone > workLimit && firstAttempt) { + /** + * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 + */ + private void mainQSort3(final Data dataShadow, final int loSt, + final int hiSt, final int dSt) { + final int[] stack_ll = dataShadow.stack_ll; + final int[] stack_hh = dataShadow.stack_hh; + final int[] stack_dd = dataShadow.stack_dd; + final int[] fmap = dataShadow.fmap; + final byte[] block = dataShadow.block; + + stack_ll[0] = loSt; + stack_hh[0] = hiSt; + stack_dd[0] = dSt; + + for (int sp = 1; --sp >= 0;) { + final int lo = stack_ll[sp]; + final int hi = stack_hh[sp]; + final int d = stack_dd[sp]; + + if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { + if (mainSimpleSort(dataShadow, lo, hi, d)) { return; } - continue; - } - - med = med3(block[zptr[lo] + d + 1], - block[zptr[hi ] + d + 1], - block[zptr[(lo + hi) >>> 1] + d + 1]); + } else { + final int d1 = d + 1; + final int med = med3(block[fmap[lo] + d1], + block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; - unLo = ltLo = lo; - unHi = gtHi = hi; + int unLo = lo; + int unHi = hi; + int ltLo = lo; + int gtHi = hi; - while (true) { - while (true) { - if (unLo > unHi) { - break; - } - n = ((int) block[zptr[unLo] + d + 1]) - med; - if (n == 0) { - int temp = 0; - temp = zptr[unLo]; - zptr[unLo] = zptr[ltLo]; - zptr[ltLo] = temp; - ltLo++; - unLo++; - continue; - } - if (n > 0) { - break; - } - unLo++; - } while (true) { - if (unLo > unHi) { - break; + while (unLo <= unHi) { + final int n = ((int) block[fmap[unLo] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[ltLo]; + fmap[ltLo++] = temp; + } else if (n < 0) { + unLo++; + } else { + break; + } } - n = ((int) block[zptr[unHi] + d + 1]) - med; - if (n == 0) { - int temp = 0; - temp = zptr[unHi]; - zptr[unHi] = zptr[gtHi]; - zptr[gtHi] = temp; - gtHi--; - unHi--; - continue; + + while (unLo <= unHi) { + final int n = ((int) block[fmap[unHi] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unHi]; + fmap[unHi--] = fmap[gtHi]; + fmap[gtHi--] = temp; + } else if (n > 0) { + unHi--; + } else { + break; + } } - if (n < 0) { + + if (unLo <= unHi) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[unHi]; + fmap[unHi--] = temp; + } else { break; } - unHi--; } - if (unLo > unHi) { - break; - } - int temp = 0; - temp = zptr[unLo]; - zptr[unLo] = zptr[unHi]; - zptr[unHi] = temp; - unLo++; - unHi--; - } - if (gtHi < ltLo) { - stack[sp].ll = lo; - stack[sp].hh = hi; - stack[sp].dd = d + 1; - sp++; - continue; + if (gtHi < ltLo) { + stack_ll[sp] = lo; + stack_hh[sp] = hi; + stack_dd[sp] = d1; + sp++; + } else { + int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) + : (unLo - ltLo); + vswap(fmap, lo, unLo - n, n); + int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) + : (gtHi - unHi); + vswap(fmap, unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack_ll[sp] = lo; + stack_hh[sp] = n; + stack_dd[sp] = d; + sp++; + + stack_ll[sp] = n + 1; + stack_hh[sp] = m - 1; + stack_dd[sp] = d1; + sp++; + + stack_ll[sp] = m; + stack_hh[sp] = hi; + stack_dd[sp] = d; + sp++; + } } - - n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); - vswap(lo, unLo - n, n); - m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); - vswap(unLo, hi - m + 1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - stack[sp].ll = lo; - stack[sp].hh = n; - stack[sp].dd = d; - sp++; - - stack[sp].ll = n + 1; - stack[sp].hh = m - 1; - stack[sp].dd = d + 1; - sp++; - - stack[sp].ll = m; - stack[sp].hh = hi; - stack[sp].dd = d; - sp++; } } private void mainSort() { - int i, j, ss, sb; - int[] runningOrder = new int[256]; - int[] copy = new int[256]; - boolean[] bigDone = new boolean[256]; - int c1, c2; - int numQSorted; + final Data dataShadow = this.data; + final int[] runningOrder = dataShadow.mainSort_runningOrder; + final int[] copy = dataShadow.mainSort_copy; + final boolean[] bigDone = dataShadow.mainSort_bigDone; + final int[] ftab = dataShadow.ftab; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] quadrant = dataShadow.quadrant; + final int lastShadow = this.last; + final int workLimitShadow = this.workLimit; + final boolean firstAttemptShadow = this.firstAttempt; + + // Set up the 2-byte frequency table + for (int i = 65537; --i >= 0;) { + ftab[i] = 0; + } /* - In the various block-sized structures, live data runs - from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, - set up the overshoot area for block. - */ - - // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); - - for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) { - block[last + i + 2] = block[(i % (last + 1)) + 1]; + * In the various block-sized structures, live data runs from 0 to + * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area + * for block. + */ + for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) { + block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; } - for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) { + for (int i = lastShadow + NUM_OVERSHOOT_BYTES +1; --i >= 0;) { quadrant[i] = 0; } + block[0] = block[lastShadow + 1]; - block[0] = (char) (block[last + 1]); - - if (last < 4000) { - /* - Use simpleSort(), since the full sorting mechanism - has quite a large constant overhead. - */ - for (i = 0; i <= last; i++) { - zptr[i] = i; - } - firstAttempt = false; - workDone = workLimit = 0; - simpleSort(0, last, 0); - } else { - numQSorted = 0; - for (i = 0; i <= 255; i++) { - bigDone[i] = false; - } - - for (i = 0; i <= 65536; i++) { - ftab[i] = 0; - } - - c1 = block[0]; - for (i = 0; i <= last; i++) { - c2 = block[i + 1]; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } + // Complete the initial radix sort: - for (i = 1; i <= 65536; i++) { - ftab[i] += ftab[i - 1]; - } + int c1 = block[0] & 0xff; + for (int i = 0; i <= lastShadow; i++) { + final int c2 = block[i + 1] & 0xff; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } - c1 = block[1]; - for (i = 0; i < last; i++) { - c2 = block[i + 2]; - j = (c1 << 8) + c2; - c1 = c2; - ftab[j]--; - zptr[ftab[j]] = i; - } + for (int i = 1; i <= 65536; i++) + ftab[i] += ftab[i - 1]; - j = ((block[last + 1]) << 8) + (block[1]); - ftab[j]--; - zptr[ftab[j]] = last; + c1 = block[1] & 0xff; + for (int i = 0; i < lastShadow; i++) { + final int c2 = block[i + 2] & 0xff; + fmap[--ftab[(c1 << 8) + c2]] = i; + c1 = c2; + } - /* - Now ftab contains the first loc of every small bucket. - Calculate the running order, from smallest to largest - big bucket. - */ + fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; - for (i = 0; i <= 255; i++) { - runningOrder[i] = i; - } + /* + * Now ftab contains the first loc of every small bucket. Calculate the + * running order, from smallest to largest big bucket. + */ + for (int i = 256; --i >= 0;) { + bigDone[i] = false; + runningOrder[i] = i; + } - { - int vv; - int h = 1; - do { - h = 3 * h + 1; - } - while (h <= 256); - do { - h = h / 3; - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while ((ftab[((runningOrder[j - h]) + 1) << 8] - - ftab[(runningOrder[j - h]) << 8]) - > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) { - runningOrder[j] = runningOrder[j - h]; - j = j - h; - if (j <= (h - 1)) { - break; - } - } - runningOrder[j] = vv; + for (int h = 364; h != 1;) { + h /= 3; + for (int i = h; i <= 255; i++) { + final int vv = runningOrder[i]; + final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; + final int b = h - 1; + int j = i; + for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j + - h]) { + runningOrder[j] = ro; + j -= h; + if (j <= b) { + break; } - } while (h != 1); - } - - StackElem[] stack = new StackElem[QSORT_STACK_SIZE]; - for (int count = 0; count < QSORT_STACK_SIZE; count++) { - stack[count] = new StackElem(); + } + runningOrder[j] = vv; } + } + /* + * The main sorting loop. + */ + for (int i = 0; i <= 255; i++) { /* - The main sorting loop. - */ - for (i = 0; i <= 255; i++) { - - /* - Process big buckets, starting with the least full. - */ - ss = runningOrder[i]; + * Process big buckets, starting with the least full. + */ + final int ss = runningOrder[i]; - /* - Complete the big bucket [ss] by quicksorting - any unsorted small buckets [ss, j]. Hopefully - previous pointer-scanning phases have already - completed many of the small buckets [ss, j], so - we don't have to sort them at all. - */ - for (j = 0; j <= 255; j++) { - sb = (ss << 8) + j; - if (!((ftab[sb] & SETMASK) == SETMASK)) { - int lo = ftab[sb] & CLEARMASK; - int hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) { - qSort3(lo, hi, 2, stack); - numQSorted += (hi - lo + 1); - if (workDone > workLimit && firstAttempt) { - return; - } + // Step 1: + /* + * Complete the big bucket [ss] by quicksorting any unsorted small + * buckets [ss, j]. Hopefully previous pointer-scanning phases have + * already completed many of the small buckets [ss, j], so we don't + * have to sort them at all. + */ + for (int j = 0; j <= 255; j++) { + final int sb = (ss << 8) + j; + final int ftab_sb = ftab[sb]; + if ((ftab_sb & SETMASK) != SETMASK) { + final int lo = ftab_sb & CLEARMASK; + final int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3(dataShadow, lo, hi, 2); + if (firstAttemptShadow + && (this.workDone > workLimitShadow)) { + return; } - ftab[sb] |= SETMASK; } + ftab[sb] = ftab_sb | SETMASK; } + } - /* - The ss big bucket is now done. Record this fact, - and update the quadrant descriptors. Remember to - update quadrants in the overshoot area too, if - necessary. The "if (i < 255)" test merely skips - this updating for the last bucket processed, since - updating for the last bucket is pointless. - */ - bigDone[ss] = true; - - if (i < 255) { - int bbStart = ftab[ss << 8] & CLEARMASK; - int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; - int shifts = 0; - - while ((bbSize >> shifts) > 65534) { - shifts++; - } + // Step 2: + // Now scan this big bucket so as to synthesise the + // sorted order for small buckets [t, ss] for all t != ss. - for (j = 0; j < bbSize; j++) { - int a2update = zptr[bbStart + j]; - int qVal = (j >> shifts); - quadrant[a2update] = qVal; - if (a2update < NUM_OVERSHOOT_BYTES) { - quadrant[a2update + last + 1] = qVal; - } - } + for (int j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } - if (!(((bbSize - 1) >> shifts) <= 65535)) { - panic(); - } + for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { + final int fmap_j = fmap[j]; + c1 = block[fmap_j] & 0xff; + if (!bigDone[c1]) { + fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); + copy[c1]++; } + } - /* - Now scan this big bucket so as to synthesise the - sorted order for small buckets [t, ss] for all t != ss. - */ - for (j = 0; j <= 255; j++) { - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - } + for (int j = 256; --j >= 0;) + ftab[(j << 8) + ss] |= SETMASK; - for (j = ftab[ss << 8] & CLEARMASK; - j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) { - c1 = block[zptr[j]]; - if (!bigDone[c1]) { - zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; - copy[c1]++; - } + // Step 3: + /* + * The ss big bucket is now done. Record this fact, and update the + * quadrant descriptors. Remember to update quadrants in the + * overshoot area too, if necessary. The "if (i < 255)" test merely + * skips this updating for the last bucket processed, since updating + * for the last bucket is pointless. + */ + bigDone[ss] = true; + + if (i < 255) { + final int bbStart = ftab[ss << 8] & CLEARMASK; + final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; } - for (j = 0; j <= 255; j++) { - ftab[(j << 8) + ss] |= SETMASK; + for (int j = 0; j < bbSize; j++) { + final int a2update = fmap[bbStart + j]; + final char qVal = (char) (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < NUM_OVERSHOOT_BYTES) { + quadrant[a2update + lastShadow + 1] = qVal; + } } } + } } private void randomiseBlock() { - int i; - int rNToGo = 0; - int rTPos = 0; - for (i = 0; i < 256; i++) { + final boolean[] inUse = this.data.inUse; + final byte[] block = this.data.block; + final int lastShadow = this.last; + + for (int i = 256; --i >= 0;) inUse[i] = false; - } - for (i = 0; i <= last; i++) { + int rNToGo = 0; + int rTPos = 0; + for (int i = 0, j = 1; i <= lastShadow; i = j, j++) { if (rNToGo == 0) { - rNToGo = (char) rNums[rTPos]; - rTPos++; - if (rTPos == 512) { + rNToGo = (char) BZip2Constants.rNums[rTPos]; + if (++rTPos == 512) { rTPos = 0; } } - rNToGo--; - block[i + 1] ^= ((rNToGo == 1) ? 1 : 0); - // handle 16 bit signed numbers - block[i + 1] &= 0xFF; - - inUse[block[i + 1]] = true; - } - } - - private void doReversibleTransformation() { - int i; - workLimit = workFactor * last; - workDone = 0; - blockRandomised = false; - firstAttempt = true; - - mainSort(); - - if (workDone > workLimit && firstAttempt) { - randomiseBlock(); - workLimit = workDone = 0; - blockRandomised = true; - firstAttempt = false; - mainSort(); - } + rNToGo--; + block[j] ^= ((rNToGo == 1) ? 1 : 0); - origPtr = -1; - for (i = 0; i <= last; i++) { - if (zptr[i] == 0) { - origPtr = i; - break; - } + // handle 16 bit signed numbers + inUse[block[j] & 0xff] = true; } - if (origPtr == -1) { - panic(); - } + this.blockRandomised = true; } - private boolean fullGtU(int i1, int i2) { - int k; - char c1, c2; - int s1, s2; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - k = last + 1; - - do { - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - if (i1 > last) { - i1 -= last; - i1--; - } - if (i2 > last) { - i2 -= last; - i2--; + private void generateMTFValues() { + final int lastShadow = this.last; + final Data dataShadow = this.data; + final boolean[] inUse = dataShadow.inUse; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] sfmap = dataShadow.sfmap; + final int[] mtfFreq = dataShadow.mtfFreq; + final byte[] unseqToSeq = dataShadow.unseqToSeq; + final byte[] yy = dataShadow.generateMTFValues_yy; + + // make maps + int nInUseShadow = 0; + for (int i = 0; i < 256; i++) { + if (inUse[i]) { + unseqToSeq[i] = (byte) nInUseShadow; + nInUseShadow++; } - - k -= 4; - workDone++; - } while (k >= 0); - - return false; - } - - /* - Knuth's increments seem to work better - than Incerpi-Sedgewick here. Possibly - because the number of elems to sort is - usually small, typically <= 20. - */ - private int[] incs = {1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484}; - - private void allocateCompressStructures () { - int n = baseBlockSize * blockSize100k; - block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)]; - quadrant = new int[(n + NUM_OVERSHOOT_BYTES)]; - zptr = new int[n]; - ftab = new int[65537]; - - if (block == null || quadrant == null || zptr == null - || ftab == null) { - //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; - //compressOutOfMemory ( totalDraw, n ); } + this.nInUse = nInUseShadow; - /* - The back end needs a place to store the MTF values - whilst it calculates the coding tables. We could - put them in the zptr array. However, these values - will fit in a short, so we overlay szptr at the - start of zptr, in the hope of reducing the number - of cache misses induced by the multiple traversals - of the MTF values when calculating coding tables. - Seems to improve compression speed by about 1%. - */ - // szptr = zptr; - - - szptr = new short[2 * n]; - } + final int eob = nInUseShadow + 1; - private void generateMTFValues() { - char[] yy = new char[256]; - int i, j; - char tmp; - char tmp2; - int zPend; - int wr; - int EOB; - - makeMaps(); - EOB = nInUse + 1; - - for (i = 0; i <= EOB; i++) { + for (int i = eob; i >= 0; i--) { mtfFreq[i] = 0; } - wr = 0; - zPend = 0; - for (i = 0; i < nInUse; i++) { - yy[i] = (char) i; + for (int i = nInUseShadow; --i >= 0;) { + yy[i] = (byte) i; } + int wr = 0; + int zPend = 0; - for (i = 0; i <= last; i++) { - char ll_i; - - ll_i = unseqToSeq[block[zptr[i]]]; + for (int i = 0; i <= lastShadow; i++) { + final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; + byte tmp = yy[0]; + int j = 0; - j = 0; - tmp = yy[j]; while (ll_i != tmp) { j++; - tmp2 = tmp; + byte tmp2 = tmp; tmp = yy[j]; yy[j] = tmp2; } @@ -1587,26 +1983,25 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { if (zPend > 0) { zPend--; while (true) { - switch (zPend % 2) { - case 0: - szptr[wr] = (short) RUNA; + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; - break; - case 1: - szptr[wr] = (short) RUNB; + } else { + sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; - break; } - if (zPend < 2) { + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { break; } - zPend = (zPend - 2) / 2; } zPend = 0; } - szptr[wr] = (short) (j + 1); + sfmap[wr] = (char) (j + 1); wr++; mtfFreq[j + 1]++; } @@ -1615,29 +2010,90 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { if (zPend > 0) { zPend--; while (true) { - switch (zPend % 2) { - case 0: - szptr[wr] = (short) RUNA; + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; - break; - case 1: - szptr[wr] = (short) RUNB; + } else { + sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; - break; } - if (zPend < 2) { + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { break; } - zPend = (zPend - 2) / 2; } } - szptr[wr] = (short) EOB; - wr++; - mtfFreq[EOB]++; + sfmap[wr] = (char) eob; + mtfFreq[eob]++; + this.nMTF = wr + 1; + } + + private static final class Data extends Object { + + // with blockSize 900k + final boolean[] inUse = new boolean[256]; // 256 byte + final byte[] unseqToSeq = new byte[256]; // 256 byte + final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + final byte[] generateMTFValues_yy = new byte[256]; // 256 byte + final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 + // byte + final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte + final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte + final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte + final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte + + final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte + + final int[] mainSort_runningOrder = new int[256]; // 1024 byte + final int[] mainSort_copy = new int[256]; // 1024 byte + final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte + + final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + + final int[] ftab = new int[65537]; // 262148 byte + // ------------ + // 333408 byte + + final byte[] block; // 900021 byte + final int[] fmap; // 3600000 byte + final char[] sfmap; // 3600000 byte + // ------------ + // 8433529 byte + // ============ + + /** + * Array instance identical to sfmap, both are used only + * temporarily and indepently, so we do not need to allocate + * additional memory. + */ + final char[] quadrant; + + Data(int blockSize100k) { + super(); + + final int n = blockSize100k * BZip2Constants.baseBlockSize; + this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; + this.fmap = new int[n]; + this.sfmap = new char[2 * n]; + this.quadrant = this.sfmap; + } - nMTF = wr; } -} \ No newline at end of file + +}