diff --git a/CONTRIBUTORS b/CONTRIBUTORS index cb55828e5..296304e67 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -104,6 +104,7 @@ Jerome Lacoste Jesse Glick Jesse Stockall Jim Allers +Joerg Wassmer Jon Dickinson Jon S. Stevens Jose Alberto Fernandez @@ -160,6 +161,7 @@ Nick Pellow Nicola Ken Barozzi Nico Seessle Nigel Magnay +Oliver Merkel Oliver Rossmueller Patrick C. Beard Patrick Chanezon diff --git a/src/main/org/apache/tools/bzip2/BZip2Constants.java b/src/main/org/apache/tools/bzip2/BZip2Constants.java index 2cc973279..4504baba6 100644 --- a/src/main/org/apache/tools/bzip2/BZip2Constants.java +++ b/src/main/org/apache/tools/bzip2/BZip2Constants.java @@ -1,5 +1,5 @@ /* - * Copyright 2001,2004 The Apache Software Foundation + * Copyright 2001,2004-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,10 @@ package org.apache.tools.bzip2; /** * Base class for both the compress and decompress classes. * Holds common arrays, and static data. - * + *
+ * This interface is public for historical purposes. + * You should have no need to use it. + *
*/ public interface BZip2Constants { @@ -41,6 +44,13 @@ public interface BZip2Constants { int MAX_SELECTORS = (2 + (900000 / G_SIZE)); int NUM_OVERSHOOT_BYTES = 20; + /** + * This array really shouldn't be here. + * Again, for historical purposes it is. + * + *FIXME: This array should be in a private or package private + * location, since it could be modified by malicious code.
+ */ int[] rNums = { 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, diff --git a/src/main/org/apache/tools/bzip2/CBZip2InputStream.java b/src/main/org/apache/tools/bzip2/CBZip2InputStream.java index fd935fb5d..aa8733619 100644 --- a/src/main/org/apache/tools/bzip2/CBZip2InputStream.java +++ b/src/main/org/apache/tools/bzip2/CBZip2InputStream.java @@ -1,5 +1,5 @@ /* - * Copyright 2001-2004 The Apache Software Foundation + * Copyright 2001-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,89 +29,72 @@ import java.io.IOException; * An input stream that decompresses from the BZip2 format (without the file * header chars) to be read as any other stream. * + *The decompression requires large amounts of memory. Thus you + * should call the {@link #close() close()} method as soon as + * possible, to force CBZip2InputStream to release the + * allocated memory. See {@link CBZip2OutputStream + * CBZip2OutputStream} for information about memory usage.
+ * + *CBZip2InputStream reads bytes from the compressed + * source stream via the single byte {@link java.io.InputStream#read() + * read()} method exclusively. Thus you should consider to use a + * buffered source stream.
+ * + *Instances of this class are not threadsafe.
*/ public class CBZip2InputStream extends InputStream implements BZip2Constants { - private static void cadvise() { - System.out.println("CRC Error"); - //throw new CCoruptionError(); - } - - private static void badBGLengths() { - cadvise(); - } - private static void bitStreamEOF() { - cadvise(); - } + private static void reportCRCError() throws IOException { + // The clean way would be to throw an exception. + //throw new IOException("crc error"); - private static void compressedStreamEOF() { - cadvise(); + // Just print a message, like the previous versions of this class did + System.err.println("BZip2 CRC error"); } private void makeMaps() { - int i; - nInUse = 0; - for (i = 0; i < 256; i++) { - if (inUse[i]) { - seqToUnseq[nInUse] = (char) i; - unseqToSeq[i] = (char) nInUse; - nInUse++; - } + final boolean[] inUse = this.data.inUse; + final byte[] seqToUnseq = this.data.seqToUnseq; + + int nInUse = 0; + + for (int i = 0; i < 256; i++) { + if (inUse[i]) + seqToUnseq[nInUse++] = (byte) i; } + + this.nInUse = nInUse; } - /* - index of the last char in the block, so - the block size == last + 1. - */ + /** + * Index of the last char in the block, so the block size == last + 1. + */ private int last; - /* - index in zptr[] of original string after sorting. - */ + /** + * Index in zptr[] of original string after sorting. + */ private int origPtr; - /* - always: in the range 0 .. 9. - The current block size is 100000 * this number. - */ + /** + * always: in the range 0 .. 9. + * The current block size is 100000 * this number. + */ private int blockSize100k; private boolean blockRandomised; private int bsBuff; private int bsLive; - private CRC mCrc = new CRC(); + private final CRC crc = new CRC(); - private boolean[] inUse = new boolean[256]; private int nInUse; - private char[] seqToUnseq = new char[256]; - private char[] unseqToSeq = new char[256]; - - private char[] selector = new char[MAX_SELECTORS]; - private char[] selectorMtf = new char[MAX_SELECTORS]; - - private int[] tt; - private char[] ll8; - - /* - freq table collected to save a pass over the data - during decompression. - */ - private int[] unzftab = new int[256]; - - private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; - private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; - private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; - private int[] minLens = new int[N_GROUPS]; - - private InputStream bsStream; - - private boolean streamEnd = false; + private InputStream in; private int currentChar = -1; + private static final int EOF = 0; private static final int START_BLOCK_STATE = 1; private static final int RAND_PART_A_STATE = 2; private static final int RAND_PART_B_STATE = 3; @@ -125,330 +108,429 @@ public class CBZip2InputStream extends InputStream implements BZip2Constants { private int storedBlockCRC, storedCombinedCRC; private int computedBlockCRC, computedCombinedCRC; - int i2, count, chPrev, ch2; - int tPos; - int rNToGo = 0; - int rTPos = 0; - int j2; - char z; - - public CBZip2InputStream(InputStream zStream) { - ll8 = null; - tt = null; - bsSetStream(zStream); - initialize(); - initBlock(); - setupBlock(); - } - - public int read() { - if (streamEnd) { - return -1; + // Variables used by setup* methods exclusively + + private int su_count; + private int su_ch2; + private int su_chPrev; + private int su_i2; + private int su_j2; + private int su_rNToGo; + private int su_rTPos; + private int su_tPos; + private char su_z; + + /** + * All memory intensive stuff. + * This field is initialized by initBlock(). + */ + private CBZip2InputStream.Data data; + + /** + * Constructs a new CBZip2InputStream which decompresses bytes readed from + * the specified stream. + * + *Although BZip2 headers are marked with the magic + * "Bz" this constructor expects the next byte in the + * stream to be the first one after the magic. Thus callers have + * to skip the first two bytes. Otherwise this constructor will + * throw an exception.
+ * + * @throws IOException + * if the stream content is malformed or an I/O error occurs. + * @throws NullPointerException + * if in == null + */ + public CBZip2InputStream(final InputStream in) throws IOException { + super(); + + this.in = in; + init(); + } + + public int read() throws IOException { + if (this.in != null) { + return read0(); } else { - int retChar = currentChar; - switch(currentState) { - case START_BLOCK_STATE: - break; - case RAND_PART_A_STATE: - break; - case RAND_PART_B_STATE: - setupRandPartB(); - break; - case RAND_PART_C_STATE: - setupRandPartC(); - break; - case NO_RAND_PART_A_STATE: - break; - case NO_RAND_PART_B_STATE: - setupNoRandPartB(); - break; - case NO_RAND_PART_C_STATE: - setupNoRandPartC(); - break; - default: - break; - } - return retChar; + throw new IOException("stream closed"); } } - private void initialize() { - char magic3, magic4; - magic3 = bsGetUChar(); - magic4 = bsGetUChar(); - if (magic3 != 'h' || magic4 < '1' || magic4 > '9') { - bsFinishedWithStream(); - streamEnd = true; - return; + public int read(final byte[] dest, final int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > dest.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + + len + ") > dest.length(" + + dest.length + ")."); + } + if (this.in == null) { + throw new IOException("stream closed"); } - setDecompressStructureSizes(magic4 - '0'); - computedCombinedCRC = 0; + final int hi = offs + len; + int destOffs = offs; + for (int b; (destOffs < hi) && ((b = read0()) >= 0);) { + dest[destOffs++] = (byte) b; + } + + return (destOffs == offs) ? -1 : (destOffs - offs); } - private void initBlock() { - char magic1, magic2, magic3, magic4; - char magic5, magic6; - magic1 = bsGetUChar(); - magic2 = bsGetUChar(); - magic3 = bsGetUChar(); - magic4 = bsGetUChar(); - magic5 = bsGetUChar(); - magic6 = bsGetUChar(); - if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 - && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) { - complete(); - return; - } + private int read0() throws IOException { + final int retChar = this.currentChar; - if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 - || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) { - badBlockHeader(); - streamEnd = true; - return; - } + switch (this.currentState) { + case EOF: + return -1; - storedBlockCRC = bsGetInt32(); + case START_BLOCK_STATE: + throw new Error(); - if (bsR(1) == 1) { - blockRandomised = true; - } else { - blockRandomised = false; - } + case RAND_PART_A_STATE: + throw new Error(); - // currBlockNo++; - getAndMoveToFrontDecode(); + case RAND_PART_B_STATE: + setupRandPartB(); + break; - mCrc.initialiseCRC(); - currentState = START_BLOCK_STATE; - } + case RAND_PART_C_STATE: + setupRandPartC(); + break; + + case NO_RAND_PART_A_STATE: + throw new Error(); - private void endBlock() { - computedBlockCRC = mCrc.getFinalCRC(); - /* A bad CRC is considered a fatal error. */ - if (storedBlockCRC != computedBlockCRC) { - crcError(); + case NO_RAND_PART_B_STATE: + setupNoRandPartB(); + break; + + case NO_RAND_PART_C_STATE: + setupNoRandPartC(); + break; + + default: + throw new Error(); } - computedCombinedCRC = (computedCombinedCRC << 1) - | (computedCombinedCRC >>> 31); - computedCombinedCRC ^= computedBlockCRC; + return retChar; } - private void complete() { - storedCombinedCRC = bsGetInt32(); - if (storedCombinedCRC != computedCombinedCRC) { - crcError(); + private void init() throws IOException { + int magic2 = this.in.read(); + if (magic2 != 'h') { + throw new IOException("Stream is not BZip2 formatted: expected 'h'" + + " as first byte but got '" + (char) magic2 + + "'"); } - bsFinishedWithStream(); - streamEnd = true; + int blockSize = this.in.read(); + if ((blockSize < '1') || (blockSize > '9')) { + throw new IOException("Stream is not BZip2 formatted: illegal " + + "blocksize " + (char) blockSize); + } + + this.blockSize100k = blockSize - '0'; + + initBlock(); + setupBlock(); } - private static void blockOverrun() { - cadvise(); + private void initBlock() throws IOException { + char magic0 = bsGetUByte(); + char magic1 = bsGetUByte(); + char magic2 = bsGetUByte(); + char magic3 = bsGetUByte(); + char magic4 = bsGetUByte(); + char magic5 = bsGetUByte(); + + if (magic0 == 0x17 && + magic1 == 0x72 && + magic2 == 0x45 && + magic3 == 0x38 && + magic4 == 0x50 && + magic5 == 0x90) { + complete(); // end of file + } else if (magic0 != 0x31 || // '1' + magic1 != 0x41 || // ')' + magic2 != 0x59 || // 'Y' + magic3 != 0x26 || // '&' + magic4 != 0x53 || // 'S' + magic5 != 0x59 // 'Y' + ) { + this.currentState = EOF; + throw new IOException("bad block header"); + } else { + this.storedBlockCRC = bsGetInt(); + this.blockRandomised = bsR(1) == 1; + + /** + * Allocate data here instead in constructor, so we do not + * allocate it if the input file is empty. + */ + if (this.data == null) { + this.data = new Data(this.blockSize100k); + } + + // currBlockNo++; + getAndMoveToFrontDecode(); + + this.crc.initialiseCRC(); + this.currentState = START_BLOCK_STATE; + } } - private static void badBlockHeader() { - cadvise(); + private void endBlock() throws IOException { + this.computedBlockCRC = this.crc.getFinalCRC(); + + // A bad CRC is considered a fatal error. + if (this.storedBlockCRC != this.computedBlockCRC) { + // make next blocks readable without error + // (repair feature, not yet documented, not tested) + this.computedCombinedCRC + = (this.storedCombinedCRC << 1) + | (this.storedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.storedBlockCRC; + + reportCRCError(); + } + + this.computedCombinedCRC + = (this.computedCombinedCRC << 1) + | (this.computedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.computedBlockCRC; } - private static void crcError() { - cadvise(); + private void complete() throws IOException { + this.storedCombinedCRC = bsGetInt(); + this.currentState = EOF; + this.data = null; + + if (this.storedCombinedCRC != this.computedCombinedCRC) { + reportCRCError(); + } } - private void bsFinishedWithStream() { - try { - if (this.bsStream != null) { - if (this.bsStream != System.in) { - this.bsStream.close(); - this.bsStream = null; + public void close() throws IOException { + InputStream in = this.in; + if (in != null) { + try { + if (in != System.in) { + in.close(); } + } finally { + this.data = null; + this.in = null; } - } catch (IOException ioe) { - //ignore } } - private void bsSetStream(InputStream f) { - bsStream = f; - bsLive = 0; - bsBuff = 0; + private int bsR(final int n) throws IOException { + int bsLive = this.bsLive; + int bsBuff = this.bsBuff; + + if (bsLive < n) { + final InputStream in = this.in; + do { + int thech = in.read(); + + if (thech < 0) { + throw new IOException("unexpected end of stream"); + } + + bsBuff = (bsBuff << 8) | thech; + bsLive += 8; + } while (bsLive < n); + + this.bsBuff = bsBuff; + } + + this.bsLive = bsLive - n; + return (bsBuff >> (bsLive - n)) & ((1 << n) - 1); } - private int bsR(int n) { - int v; - while (bsLive < n) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - if (thech == -1) { - compressedStreamEOF(); + private boolean bsGetBit() throws IOException { + int bsLive = this.bsLive; + int bsBuff = this.bsBuff; + + if (bsLive < 1) { + int thech = this.in.read(); + + if (thech < 0) { + throw new IOException("unexpected end of stream"); } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); + + bsBuff = (bsBuff << 8) | thech; bsLive += 8; + this.bsBuff = bsBuff; } - v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1); - bsLive -= n; - return v; + this.bsLive = bsLive - 1; + return ((bsBuff >> (bsLive - 1)) & 1) != 0; } - private char bsGetUChar() { + private char bsGetUByte() throws IOException { return (char) bsR(8); } - private int bsGetint() { - int u = 0; - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - return u; - } - - private int bsGetIntVS(int numBits) { - return (int) bsR(numBits); + private int bsGetInt() throws IOException { + return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8); } - private int bsGetInt32() { - return (int) bsGetint(); - } - - private void hbCreateDecodeTables(int[] limit, int[] base, - int[] perm, char[] length, - int minLen, int maxLen, int alphaSize) { - int pp, i, j, vec; - - pp = 0; - for (i = minLen; i <= maxLen; i++) { - for (j = 0; j < alphaSize; j++) { + /** + * Called by createHuffmanDecodingTables() exclusively. + */ + private static void hbCreateDecodeTables(final int[] limit, + final int[] base, + final int[] perm, + final char[] length, + final int minLen, + final int maxLen, + final int alphaSize) { + for (int i = minLen, pp = 0; i <= maxLen; i++) { + for (int j = 0; j < alphaSize; j++) { if (length[j] == i) { - perm[pp] = j; - pp++; + perm[pp++] = j; } } } - for (i = 0; i < MAX_CODE_LEN; i++) { + for (int i = MAX_CODE_LEN; --i > 0;) { base[i] = 0; - } - for (i = 0; i < alphaSize; i++) { - base[length[i] + 1]++; + limit[i] = 0; } - for (i = 1; i < MAX_CODE_LEN; i++) { - base[i] += base[i - 1]; + for (int i = 0; i < alphaSize; i++) { + base[length[i] + 1]++; } - for (i = 0; i < MAX_CODE_LEN; i++) { - limit[i] = 0; + for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { + b += base[i]; + base[i] = b; } - vec = 0; - for (i = minLen; i <= maxLen; i++) { - vec += (base[i + 1] - base[i]); + for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { + final int nb = base[i + 1]; + vec += nb - b; + b = nb; limit[i] = vec - 1; vec <<= 1; } - for (i = minLen + 1; i <= maxLen; i++) { + + for (int i = minLen + 1; i <= maxLen; i++) { base[i] = ((limit[i - 1] + 1) << 1) - base[i]; } } - private void recvDecodingTables() { - char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; - int i, j, t, nGroups, nSelectors, alphaSize; - int minLen, maxLen; - boolean[] inUse16 = new boolean[16]; + private void recvDecodingTables() throws IOException { + final Data data = this.data; + final boolean[] inUse = data.inUse; + final byte[] pos = data.recvDecodingTables_pos; + final byte[] selector = data.selector; + final byte[] selectorMtf = data.selectorMtf; + + int inUse16 = 0; /* Receive the mapping table */ - for (i = 0; i < 16; i++) { - if (bsR(1) == 1) { - inUse16[i] = true; - } else { - inUse16[i] = false; + for (int i = 0; i < 16; i++) { + if (bsGetBit()) { + inUse16 |= 1 << i; } } - for (i = 0; i < 256; i++) { + for (int i = 256; --i >= 0;) { inUse[i] = false; } - for (i = 0; i < 16; i++) { - if (inUse16[i]) { - for (j = 0; j < 16; j++) { - if (bsR(1) == 1) { - inUse[i * 16 + j] = true; + for (int i = 0; i < 16; i++) { + if ((inUse16 & (1 << i)) != 0) { + final int i16 = i << 4; + for (int j = 0; j < 16; j++) { + if (bsGetBit()) { + inUse[i16 + j] = true; } } } } makeMaps(); - alphaSize = nInUse + 2; + final int alphaSize = this.nInUse + 2; /* Now the selectors */ - nGroups = bsR(3); - nSelectors = bsR(15); - for (i = 0; i < nSelectors; i++) { - j = 0; - while (bsR(1) == 1) { + final int nGroups = bsR(3); + final int nSelectors = bsR(15); + + for (int i = 0; i < nSelectors; i++) { + int j = 0; + while (bsGetBit()) { j++; } - selectorMtf[i] = (char) j; + selectorMtf[i] = (byte) j; } /* Undo the MTF values for the selectors. */ - { - char[] pos = new char[N_GROUPS]; - char tmp, v; - for (v = 0; v < nGroups; v++) { - pos[v] = v; - } + for (int v = nGroups; --v >= 0;) { + pos[v] = (byte) v; + } - for (i = 0; i < nSelectors; i++) { - v = selectorMtf[i]; - tmp = pos[v]; - while (v > 0) { - pos[v] = pos[v - 1]; - v--; - } - pos[0] = tmp; - selector[i] = tmp; + for (int i = 0; i < nSelectors; i++) { + int v = selectorMtf[i] & 0xff; + final byte tmp = pos[v]; + while (v > 0) { + // nearly all times v is zero, 4 in most other cases + pos[v] = pos[v - 1]; + v--; } + pos[0] = tmp; + selector[i] = tmp; } + final char[][] len = data.temp_charArray2d; + /* Now the coding tables */ - for (t = 0; t < nGroups; t++) { + for (int t = 0; t < nGroups; t++) { int curr = bsR(5); - for (i = 0; i < alphaSize; i++) { - while (bsR(1) == 1) { - if (bsR(1) == 0) { - curr++; - } else { - curr--; - } + final char[] len_t = len[t]; + for (int i = 0; i < alphaSize; i++) { + while (bsGetBit()) { + curr += bsGetBit() ? -1 : 1; } - len[t][i] = (char) curr; + len_t[i] = (char) curr; } } - /* Create the Huffman decoding tables */ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (len[t][i] > maxLen) { - maxLen = len[t][i]; + // finally create the Huffman tables + createHuffmanDecodingTables(alphaSize, nGroups); + } + + /** + * Called by recvDecodingTables() exclusively. + */ + private void createHuffmanDecodingTables(final int alphaSize, + final int nGroups) { + final Data data = this.data; + final char[][] len = data.temp_charArray2d; + final int[] minLens = data.minLens; + final int[][] limit = data.limit; + final int[][] base = data.base; + final int[][] perm = data.perm; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final char[] len_t = len[t]; + for (int i = alphaSize; --i >= 0;) { + final char lent = len_t[i]; + if (lent > maxLen) { + maxLen = lent; } - if (len[t][i] < minLen) { - minLen = len[t][i]; + if (lent < minLen) { + minLen = lent; } } hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, @@ -457,18 +539,22 @@ public class CBZip2InputStream extends InputStream implements BZip2Constants { } } - private void getAndMoveToFrontDecode() { - char[] yy = new char[256]; - int i, j, nextSym, limitLast; - int EOB, groupNo, groupPos; - - limitLast = baseBlockSize * blockSize100k; - origPtr = bsGetIntVS(24); - + private void getAndMoveToFrontDecode() throws IOException { + this.origPtr = bsR(24); recvDecodingTables(); - EOB = nInUse + 1; - groupNo = -1; - groupPos = 0; + + final InputStream in = this.in; + final Data data = this.data; + final byte[] ll8 = data.ll8; + final int[] unzftab = data.unzftab; + final byte[] selector = data.selector; + final byte[] seqToUnseq = data.seqToUnseq; + final char[] yy = data.getAndMoveToFrontDecode_yy; + final int[] minLens = data.minLens; + final int[][] limit = data.limit; + final int[][] base = data.base; + final int[][] perm = data.perm; + final int limitLast = this.blockSize100k * 100000; /* Setting up the unzftab entries here is not strictly @@ -476,246 +562,259 @@ public class CBZip2InputStream extends InputStream implements BZip2Constants { in a separate pass, and so saves a block's worth of cache misses. */ - for (i = 0; i <= 255; i++) { - unzftab[i] = 0; - } - - for (i = 0; i <= 255; i++) { + for (int i = 256; --i >= 0;) { yy[i] = (char) i; + unzftab[i] = 0; } - last = -1; + int groupNo = 0; + int groupPos = G_SIZE - 1; + final int eob = this.nInUse + 1; + int nextSym = getAndMoveToFrontDecode0(0); + int bsBuff = this.bsBuff; + int bsLive = this.bsLive; + int last = -1; + int zt = selector[groupNo] & 0xff; + int[] base_zt = base[zt]; + int[] limit_zt = limit[zt]; + int[] perm_zt = perm[zt]; + int minLens_zt = minLens[zt]; + + while (nextSym != eob) { + if ((nextSym == RUNA) || (nextSym == RUNB)) { + int s = -1; - { - int zt, zn, zvec, zj; - if (groupPos == 0) { - groupNo++; - groupPos = G_SIZE; - } - groupPos--; - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = bsR(zn); - while (zvec > limit[zt][zn]) { - zn++; - { - { - while (bsLive < 1) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - if (thech == -1) { - compressedStreamEOF(); - } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); - bsLive += 8; - } + for (int n = 1; true; n <<= 1) { + if (nextSym == RUNA) { + s += n; + } else if (nextSym == RUNB) { + s += n << 1; + } else { + break; } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; - } - zvec = (zvec << 1) | zj; - } - nextSym = perm[zt][zvec - base[zt][zn]]; - } - while (true) { + if (groupPos == 0) { + groupPos = G_SIZE - 1; + zt = selector[++groupNo] & 0xff; + base_zt = base[zt]; + limit_zt = limit[zt]; + perm_zt = perm[zt]; + minLens_zt = minLens[zt]; + } else { + groupPos--; + } - if (nextSym == EOB) { - break; - } + int zn = minLens_zt; - if (nextSym == RUNA || nextSym == RUNB) { - char ch; - int s = -1; - int N = 1; - do { - if (nextSym == RUNA) { - s = s + (0 + 1) * N; - } else if (nextSym == RUNB) { - s = s + (1 + 1) * N; - } - N = N * 2; - { - int zt, zn, zvec, zj; - if (groupPos == 0) { - groupNo++; - groupPos = G_SIZE; + // Inlined: + // int zvec = bsR(zn); + while (bsLive < zn) { + final int thech = in.read(); + if (thech >= 0) { + bsBuff = (bsBuff << 8) | thech; + bsLive += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); } - groupPos--; - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = bsR(zn); - while (zvec > limit[zt][zn]) { - zn++; - { - { - while (bsLive < 1) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - if (thech == -1) { - compressedStreamEOF(); - } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); - bsLive += 8; - } - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; + } + int zvec = (bsBuff >> (bsLive - zn)) & ((1 << zn) - 1); + bsLive -= zn; + + while (zvec > limit_zt[zn]) { + zn++; + while (bsLive < 1) { + final int thech = in.read(); + if (thech >= 0) { + bsBuff = (bsBuff << 8) | thech; + bsLive += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); } - zvec = (zvec << 1) | zj; } - nextSym = perm[zt][zvec - base[zt][zn]]; + bsLive--; + zvec = (zvec << 1) | ((bsBuff >> bsLive) & 1); } - } while (nextSym == RUNA || nextSym == RUNB); + nextSym = perm_zt[zvec - base_zt[zn]]; + } - s++; - ch = seqToUnseq[yy[0]]; - unzftab[ch] += s; + final byte ch = seqToUnseq[yy[0]]; + unzftab[ch & 0xff] += s + 1; - while (s > 0) { - last++; - ll8[last] = ch; - s--; + while (s-- >= 0) { + ll8[++last] = ch; } if (last >= limitLast) { - blockOverrun(); + throw new IOException("block overrun"); } - continue; } else { - char tmp; - last++; - if (last >= limitLast) { - blockOverrun(); + if (++last >= limitLast) { + throw new IOException("block overrun"); } - tmp = yy[nextSym - 1]; - unzftab[seqToUnseq[tmp]]++; + final char tmp = yy[nextSym - 1]; + unzftab[seqToUnseq[tmp] & 0xff]++; ll8[last] = seqToUnseq[tmp]; /* This loop is hammered during decompression, - hence the unrolling. - - for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1]; + hence avoid native method call overhead of + System.arraycopy for very small ranges to copy. */ - - j = nextSym - 1; - for (; j > 3; j -= 4) { - yy[j] = yy[j - 1]; - yy[j - 1] = yy[j - 2]; - yy[j - 2] = yy[j - 3]; - yy[j - 3] = yy[j - 4]; - } - for (; j > 0; j--) { - yy[j] = yy[j - 1]; + if (nextSym <= 16) { + for (int j = nextSym - 1; j > 0;) { + yy[j] = yy[--j]; + } + } else { + System.arraycopy(yy, 0, yy, 1, nextSym - 1); } yy[0] = tmp; - { - int zt, zn, zvec, zj; - if (groupPos == 0) { - groupNo++; - groupPos = G_SIZE; - } + + if (groupPos == 0) { + groupPos = G_SIZE - 1; + zt = selector[++groupNo] & 0xff; + base_zt = base[zt]; + limit_zt = limit[zt]; + perm_zt = perm[zt]; + minLens_zt = minLens[zt]; + } else { groupPos--; - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = bsR(zn); - while (zvec > limit[zt][zn]) { - zn++; - { - { - while (bsLive < 1) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); - bsLive += 8; - } - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; + } + + int zn = minLens_zt; + + // Inlined: + // int zvec = bsR(zn); + while (bsLive < zn) { + final int thech = in.read(); + if (thech >= 0) { + bsBuff = (bsBuff << 8) | thech; + bsLive += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + int zvec = (bsBuff >> (bsLive - zn)) & ((1 << zn) - 1); + bsLive -= zn; + + while (zvec > limit_zt[zn]) { + zn++; + while (bsLive < 1) { + final int thech = in.read(); + if (thech >= 0) { + bsBuff = (bsBuff << 8) | thech; + bsLive += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); } - zvec = (zvec << 1) | zj; } - nextSym = perm[zt][zvec - base[zt][zn]]; + bsLive--; + zvec = (zvec << 1) | ((bsBuff >> bsLive) & 1); + } + nextSym = perm_zt[zvec - base_zt[zn]]; + } + } + + this.last = last; + this.bsLive = bsLive; + this.bsBuff = bsBuff; + } + + private int getAndMoveToFrontDecode0(final int groupNo) + throws IOException { + final InputStream in = this.in; + final Data data = this.data; + final int zt = data.selector[groupNo] & 0xff; + final int[] limit_zt = data.limit[zt]; + int zn = data.minLens[zt]; + int zvec = bsR(zn); + int bsLive = this.bsLive; + int bsBuff = this.bsBuff; + + while (zvec > limit_zt[zn]) { + zn++; + while (bsLive < 1) { + final int thech = in.read(); + + if (thech >= 0) { + bsBuff = (bsBuff << 8) | thech; + bsLive += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); } - continue; } + bsLive--; + zvec = (zvec << 1) | ((bsBuff >> bsLive) & 1); } + + this.bsLive = bsLive; + this.bsBuff = bsBuff; + + return data.perm[zt][zvec - data.base[zt][zn]]; } - private void setupBlock() { - int[] cftab = new int[257]; - char ch; + private void setupBlock() throws IOException { + if (this.data == null) { + return; + } + final int[] cftab = this.data.cftab; + final int[] tt = this.data.initTT(this.last + 1); + final byte[] ll8 = this.data.ll8; cftab[0] = 0; - for (int i = 1; i <= 256; i++) { - cftab[i] = unzftab[i - 1]; - } - for (int i = 1; i <= 256; i++) { - cftab[i] += cftab[i - 1]; + System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); + + for (int i = 1, c = cftab[0]; i <= 256; i++) { + c += cftab[i]; + cftab[i] = c; } - for (int i = 0; i <= last; i++) { - ch = (char) ll8[i]; - tt[cftab[ch]] = i; - cftab[ch]++; + for (int i = 0, last = this.last; i <= last; i++) { + tt[cftab[ll8[i] & 0xff]++] = i; } - cftab = null; - tPos = tt[origPtr]; + if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { + throw new IOException("stream corrupted"); + } - count = 0; - i2 = 0; - ch2 = 256; /* not a char and not EOF */ + this.su_tPos = tt[this.origPtr]; + this.su_count = 0; + this.su_i2 = 0; + this.su_ch2 = 256; /* not a char and not EOF */ - if (blockRandomised) { - rNToGo = 0; - rTPos = 0; + if (this.blockRandomised) { + this.su_rNToGo = 0; + this.su_rTPos = 0; setupRandPartA(); } else { setupNoRandPartA(); } } - private void setupRandPartA() { - if (i2 <= last) { - chPrev = ch2; - ch2 = ll8[tPos]; - tPos = tt[tPos]; - if (rNToGo == 0) { - rNToGo = rNums[rTPos]; - rTPos++; - if (rTPos == 512) { - rTPos = 0; + private void setupRandPartA() throws IOException { + if (this.su_i2 <= this.last) { + this.su_chPrev = this.su_ch2; + int su_ch2 = this.data.ll8[this.su_tPos] & 0xff; + this.su_tPos = this.data.tt[this.su_tPos]; + if (this.su_rNToGo == 0) { + this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; + if (++this.su_rTPos == 512) { + this.su_rTPos = 0; } + } else { + this.su_rNToGo--; } - rNToGo--; - ch2 ^= (int) ((rNToGo == 1) ? 1 : 0); - i2++; - - currentChar = ch2; - currentState = RAND_PART_B_STATE; - mCrc.updateCRC(ch2); + this.su_ch2 = su_ch2 ^= (this.su_rNToGo == 1) ? 1 : 0; + this.su_i2++; + this.currentChar = su_ch2; + this.currentState = RAND_PART_B_STATE; + this.crc.updateCRC(su_ch2); } else { endBlock(); initBlock(); @@ -723,113 +822,154 @@ public class CBZip2InputStream extends InputStream implements BZip2Constants { } } - private void setupNoRandPartA() { - if (i2 <= last) { - chPrev = ch2; - ch2 = ll8[tPos]; - tPos = tt[tPos]; - i2++; - - currentChar = ch2; - currentState = NO_RAND_PART_B_STATE; - mCrc.updateCRC(ch2); + private void setupNoRandPartA() throws IOException { + if (this.su_i2 <= this.last) { + this.su_chPrev = this.su_ch2; + int su_ch2 = this.data.ll8[this.su_tPos] & 0xff; + this.su_ch2 = su_ch2; + this.su_tPos = this.data.tt[this.su_tPos]; + this.su_i2++; + this.currentChar = su_ch2; + this.currentState = NO_RAND_PART_B_STATE; + this.crc.updateCRC(su_ch2); } else { + this.currentState = NO_RAND_PART_A_STATE; endBlock(); initBlock(); setupBlock(); } } - private void setupRandPartB() { - if (ch2 != chPrev) { - currentState = RAND_PART_A_STATE; - count = 1; + private void setupRandPartB() throws IOException { + if (this.su_ch2 != this.su_chPrev) { + this.currentState = RAND_PART_A_STATE; + this.su_count = 1; setupRandPartA(); - } else { - count++; - if (count >= 4) { - z = ll8[tPos]; - tPos = tt[tPos]; - if (rNToGo == 0) { - rNToGo = rNums[rTPos]; - rTPos++; - if (rTPos == 512) { - rTPos = 0; - } + } else if (++this.su_count >= 4) { + this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); + this.su_tPos = this.data.tt[this.su_tPos]; + if (this.su_rNToGo == 0) { + this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; + if (++this.su_rTPos == 512) { + this.su_rTPos = 0; } - rNToGo--; - z ^= ((rNToGo == 1) ? 1 : 0); - j2 = 0; - currentState = RAND_PART_C_STATE; - setupRandPartC(); } else { - currentState = RAND_PART_A_STATE; - setupRandPartA(); + this.su_rNToGo--; + } + this.su_j2 = 0; + this.currentState = RAND_PART_C_STATE; + if (this.su_rNToGo == 1) { + this.su_z ^= 1; } + setupRandPartC(); + } else { + this.currentState = RAND_PART_A_STATE; + setupRandPartA(); } } - private void setupRandPartC() { - if (j2 < (int) z) { - currentChar = ch2; - mCrc.updateCRC(ch2); - j2++; + private void setupRandPartC() throws IOException { + if (this.su_j2 < this.su_z) { + this.currentChar = this.su_ch2; + this.crc.updateCRC(this.su_ch2); + this.su_j2++; } else { - currentState = RAND_PART_A_STATE; - i2++; - count = 0; + this.currentState = RAND_PART_A_STATE; + this.su_i2++; + this.su_count = 0; setupRandPartA(); } } - private void setupNoRandPartB() { - if (ch2 != chPrev) { - currentState = NO_RAND_PART_A_STATE; - count = 1; + private void setupNoRandPartB() throws IOException { + if (this.su_ch2 != this.su_chPrev) { + this.su_count = 1; setupNoRandPartA(); + } else if (++this.su_count >= 4) { + this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); + this.su_tPos = this.data.tt[this.su_tPos]; + this.su_j2 = 0; + setupNoRandPartC(); } else { - count++; - if (count >= 4) { - z = ll8[tPos]; - tPos = tt[tPos]; - currentState = NO_RAND_PART_C_STATE; - j2 = 0; - setupNoRandPartC(); - } else { - currentState = NO_RAND_PART_A_STATE; - setupNoRandPartA(); - } + setupNoRandPartA(); } } - private void setupNoRandPartC() { - if (j2 < (int) z) { - currentChar = ch2; - mCrc.updateCRC(ch2); - j2++; + private void setupNoRandPartC() throws IOException { + if (this.su_j2 < this.su_z) { + int su_ch2 = this.su_ch2; + this.currentChar = su_ch2; + this.crc.updateCRC(su_ch2); + this.su_j2++; + this.currentState = NO_RAND_PART_C_STATE; } else { - currentState = NO_RAND_PART_A_STATE; - i2++; - count = 0; + this.su_i2++; + this.su_count = 0; setupNoRandPartA(); } } - private void setDecompressStructureSizes(int newSize100k) { - if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k - && blockSize100k <= 9)) { - // throw new IOException("Invalid block size"); + private static final class Data extends Object { + + // (with blockSize 900k) + final boolean[] inUse = new boolean[256]; // 256 byte + + final byte[] seqToUnseq = new byte[256]; // 256 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + /** + * Freq table collected to save a pass over the data during + * decompression. + */ + final int[] unzftab = new int[256]; // 1024 byte + + final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[] minLens = new int[N_GROUPS]; // 24 byte + + final int[] cftab = new int[257]; // 1028 byte + final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte + final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 byte + final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte + //--------------- + // 60798 byte + + int[] tt; // 3600000 byte + byte[] ll8; // 900000 byte + //--------------- + // 4560782 byte + //=============== + + Data(int blockSize100k) { + super(); + + this.ll8 = new byte[blockSize100k * BZip2Constants.baseBlockSize]; } - blockSize100k = newSize100k; + /** + * Initializes the {@link #tt} array. + * + * This method is called when the required length of the array + * is known. I don't initialize it at construction time to + * avoid unneccessary memory allocation when compressing small + * files. + */ + final int[] initTT(int length) { + int[] tt = this.tt; - if (newSize100k == 0) { - return; + // tt.length should always be >= length, but theoretically + // it can happen, if the compressor mixed small and large + // blocks. Normally only the last block will be smaller + // than others. + if ((tt == null) || (tt.length < length)) { + this.tt = tt = new int[length]; + } + + return tt; } - int n = baseBlockSize * newSize100k; - ll8 = new char[n]; - tt = new int[n]; } } diff --git a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java index e7c3ff55c..877a8d28e 100644 --- a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java +++ b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java @@ -1,5 +1,5 @@ /* - * Copyright 2001-2004 The Apache Software Foundation + * Copyright 2001-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,338 +29,676 @@ import java.io.IOException; /** * An output stream that compresses into the BZip2 format (without the file * header chars) into another stream. + + *The compression requires large amounts of memory. Thus you + * should call the {@link #close() close()} method as soon as + * possible, to force CBZip2OutputStream to release the + * allocated memory.
+ * + *You can shrink the amount of allocated memory and maybe raise + * the compression speed by choosing a lower blocksize, which in turn + * may cause a lower compression ratio. You can avoid unnecessary + * memory allocation by avoiding using a blocksize which is bigger + * than the size of the input.
+ * + *You can compute the memory usage for compressing by the + * following formula:
+ *
+ * 400k + (9 * blocksize)
.
+ *
+ *
+ * To get the memory required for decompression by {@link + * CBZip2InputStream CBZip2InputStream} use
+ *
+ * 65k + (5 * blocksize)
.
+ *
+ *
+ * Memory usage by blocksize | + *||
---|---|---|
Blocksize | + *Compression memory usage |
+ * Decompression memory usage |
+ *
100k | + *1300k | + *565k | + *
200k | + *2200k | + *1065k | + *
300k | + *3100k | + *1565k | + *
400k | + *4000k | + *2065k | + *
500k | + *4900k | + *2565k | + *
600k | + *5800k | + *3065k | + *
700k | + *6700k | + *3565k | + *
800k | + *7600k | + *4065k | + *
900k | + *8500k | + *4565k | + *
For decompression CBZip2InputStream allocates less + * memory if the bzipped input is smaller than one block.
+ * + *Instances of this class are not threadsafe.
* + ** TODO: Update to BZip2 1.0.1 + *
+ * */ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { + + /** + * The minimum supported blocksize == 1. + */ + public static final int MIN_BLOCKSIZE = 1; + + /** + * The maximum supported blocksize == 9. + */ + public static final int MAX_BLOCKSIZE = 9; + + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + */ protected static final int SETMASK = (1 << 21); + + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + */ protected static final int CLEARMASK = (~SETMASK); + + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + */ protected static final int GREATER_ICOST = 15; + + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + */ protected static final int LESSER_ICOST = 0; + + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + */ protected static final int SMALL_THRESH = 20; + + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + */ protected static final int DEPTH_THRESH = 10; - /* + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + */ + protected static final int WORK_FACTOR = 30; + + /** + * This constant is accessible by subclasses for historical purposes. + * If you don't know what it means then you don't need it. + *If you are ever unlucky/improbable enough to get a stack overflow whilst sorting, increase the following constant and try again. In practice I have never seen the stack go above 27 elems, so the following limit seems very generous. - */ + *
+ */ protected static final int QSORT_STACK_SIZE = 1000; - private static void panic() { - System.out.println("panic"); - //throw new CError(); - } + private static final int FALLBACK_QSORT_SMALL_THRESH = 10; + private static final int FALLBACK_QSORT_STACK_SIZE = 100; - private void makeMaps() { - int i; - nInUse = 0; - for (i = 0; i < 256; i++) { - if (inUse[i]) { - seqToUnseq[nInUse] = (char) i; - unseqToSeq[i] = (char) nInUse; - nInUse++; - } - } - } + /** + * Knuth's increments seem to work better than Incerpi-Sedgewick + * here. Possibly because the number of elems to sort is usually + * small, typically <= 20. + */ + private static final int[] INCS = { + 1, + 4, + 13, + 40, + 121, + 364, + 1093, + 3280, + 9841, + 29524, + 88573, + 265720, + 797161, + 2391484 + }; + /** + * This method is accessible by subclasses for historical purposes. + * If you don't know what it does then you don't need it. + */ protected static void hbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen) { /* Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel. */ - int nNodes, nHeap, n1, n2, i, j, k; - boolean tooLong; - - int[] heap = new int[MAX_ALPHA_SIZE + 2]; - int[] weight = new int[MAX_ALPHA_SIZE * 2]; - int[] parent = new int[MAX_ALPHA_SIZE * 2]; + final int[] heap = new int[MAX_ALPHA_SIZE * 2]; + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; - for (i = 0; i < alphaSize; i++) { + for (int i = alphaSize; --i >= 0;) { weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; } - while (true) { - nNodes = alphaSize; - nHeap = 0; + for (boolean tooLong = true; tooLong;) { + tooLong = false; + int nNodes = alphaSize; + int nHeap = 0; heap[0] = 0; weight[0] = 0; parent[0] = -2; - for (i = 1; i <= alphaSize; i++) { + for (int i = 1; i <= alphaSize; i++) { parent[i] = -1; nHeap++; heap[nHeap] = i; - { - int zz, tmp; - zz = nHeap; - tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; + + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; } + heap[zz] = tmp; } - if (!(nHeap < (MAX_ALPHA_SIZE + 2))) { - panic(); - } + + // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap; while (nHeap > 1) { - n1 = heap[1]; + int n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; - { - int zz = 0, yy = 0, tmp = 0; - zz = 1; - tmp = heap[zz]; - while (true) { - yy = zz << 1; - if (yy > nHeap) { - break; - } - if (yy < nHeap - && weight[heap[yy + 1]] < weight[heap[yy]]) { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) { - break; - } - heap[zz] = heap[yy]; - zz = yy; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; } - heap[zz] = tmp; + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; } - n2 = heap[1]; + + heap[zz] = tmp; + + int n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; - { - int zz = 0, yy = 0, tmp = 0; - zz = 1; - tmp = heap[zz]; - while (true) { - yy = zz << 1; - if (yy > nHeap) { - break; - } - if (yy < nHeap - && weight[heap[yy + 1]] < weight[heap[yy]]) { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) { - break; - } - heap[zz] = heap[yy]; - zz = yy; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; } - heap[zz] = tmp; + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; } + + heap[zz] = tmp; nNodes++; parent[n1] = parent[n2] = nNodes; - weight[nNodes] = ((weight[n1] & 0xffffff00) - + (weight[n2] & 0xffffff00)) - | (1 + (((weight[n1] & 0x000000ff) - > (weight[n2] & 0x000000ff)) - ? (weight[n1] & 0x000000ff) - : (weight[n2] & 0x000000ff))); - + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = (((weight_n1 & 0xffffff00) + + (weight_n2 & 0xffffff00)) + | (1 + (((weight_n1 & 0x000000ff) + > (weight_n2 & 0x000000ff)) + ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff)))); + parent[nNodes] = -1; nHeap++; heap[nHeap] = nNodes; - { - int zz = 0, tmp = 0; - zz = nHeap; - tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; } - } - if (!(nNodes < (MAX_ALPHA_SIZE * 2))) { - panic(); + heap[zz] = tmp; + } - tooLong = false; - for (i = 1; i <= alphaSize; i++) { - j = 0; - k = i; - while (parent[k] >= 0) { - k = parent[k]; + // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes; + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; j++; } + len[i - 1] = (char) j; if (j > maxLen) { tooLong = true; } } - if (!tooLong) { - break; + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } + } + } + } + + private static void hbMakeCodeLengths(final byte[] len, final int[] freq, + final Data dat, final int alphaSize, + final int maxLen) { + /* + Nodes and heap entries run from 1. Entry 0 + for both the heap and nodes is a sentinel. + */ + final int[] heap = dat.heap; + final int[] weight = dat.weight; + final int[] parent = dat.parent; + + for (int i = alphaSize; --i >= 0;) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + for (boolean tooLong = true; tooLong;) { + tooLong = false; + + int nNodes = alphaSize; + int nHeap = 0; + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (int i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + + while (nHeap > 1) { + int n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + + int n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + nNodes++; + parent[n1] = parent[n2] = nNodes; + + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = ((weight_n1 & 0xffffff00) + + (weight_n2 & 0xffffff00)) + | (1 + (((weight_n1 & 0x000000ff) + > (weight_n2 & 0x000000ff)) + ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } - for (i = 1; i < alphaSize; i++) { - j = weight[i] >> 8; - j = 1 + (j / 2); - weight[i] = j << 8; + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; + j++; + } + + len[i - 1] = (byte) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } } } } - /* - index of the last char in the block, so + /** + Index of the last char in the block, so the block size == last + 1. */ - int last; + private int last; - /* - index in zptr[] of original string after sorting. - */ - int origPtr; + /** + * Index in fmap[] of original string after sorting. + */ + private int origPtr; - /* - always: in the range 0 .. 9. - The current block size is 100000 * this number. - */ - int blockSize100k; + /** + Always: in the range 0 .. 9. + The current block size is 100000 * this number. + */ + private final int blockSize100k; - boolean blockRandomised; + private boolean blockRandomised; - int bytesOut; - int bsBuff; - int bsLive; - CRC mCrc = new CRC(); + private int bsBuff; + private int bsLive; + private final CRC crc = new CRC(); - private boolean[] inUse = new boolean[256]; private int nInUse; - private char[] seqToUnseq = new char[256]; - private char[] unseqToSeq = new char[256]; - - private char[] selector = new char[MAX_SELECTORS]; - private char[] selectorMtf = new char[MAX_SELECTORS]; - - private char[] block; - private int[] quadrant; - private int[] zptr; - private short[] szptr; - private int[] ftab; - private int nMTF; - private int[] mtfFreq = new int[MAX_ALPHA_SIZE]; - /* * Used when sorting. If too many long comparisons * happen, we stop sorting, randomise the block * slightly, and try again. */ - private int workFactor; private int workDone; private int workLimit; private boolean firstAttempt; - private int nBlocksRandomised; private int currentChar = -1; private int runLength = 0; - public CBZip2OutputStream(OutputStream inStream) throws IOException { - this(inStream, 9); - } + private int blockCRC; + private int combinedCRC; + private int allowableBlockSize; - public CBZip2OutputStream(OutputStream inStream, int inBlockSize) - throws IOException { - block = null; - quadrant = null; - zptr = null; - ftab = null; + /** + * All memory intensive stuff. + */ + private CBZip2OutputStream.Data data; - bsSetStream(inStream); + private OutputStream out; - workFactor = 50; - if (inBlockSize > 9) { - inBlockSize = 9; - } - if (inBlockSize < 1) { - inBlockSize = 1; - } - blockSize100k = inBlockSize; - allocateCompressStructures(); - initialize(); - initBlock(); + /** + * Chooses a blocksize based on the given length of the data to compress. + * + * @return + * The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} + * both inclusive. For a negative inputLength this method returns + * MAX_BLOCKSIZE always. + * + * @param inputLength + * The length of the data which will be compressed by + * CBZip2OutputStream. + */ + public static int chooseBlockSize(long inputLength) { + return (inputLength > 0) + ? (int) Math.min((inputLength / 132000) + 1, 9) + : MAX_BLOCKSIZE; } /** + * Constructs a new CBZip2OutputStream with a blocksize of 900k. + * + *Attention: The caller is resonsible to write the two + * BZip2 magic bytes "BZ" to the specified stream prior + * to calling this constructor.
* - * modified by Oliver Merkel, 010128 + * @param out * the destination stream. * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws NullPointerException + * ifout == null
.
*/
- public void write(int bv) throws IOException {
- int b = (256 + bv) % 256;
- if (currentChar != -1) {
- if (currentChar == b) {
- runLength++;
- if (runLength > 254) {
- writeRun();
- currentChar = -1;
- runLength = 0;
- }
- } else {
- writeRun();
- runLength = 1;
- currentChar = b;
- }
+ public CBZip2OutputStream(final OutputStream out) throws IOException {
+ this(out, MAX_BLOCKSIZE);
+ }
+
+ /**
+ * Constructs a new CBZip2OutputStream with specified blocksize.
+ *
+ * Attention: The caller is resonsible to write the two + * BZip2 magic bytes "BZ" to the specified stream prior + * to calling this constructor.
+ * + * + * @param out + * the destination stream. + * @param blockSize + * the blockSize as 100k units. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws IllegalArgumentException + * if(blockSize < 1) || (blockSize > 9)
.
+ * @throws NullPointerException
+ * if out == null
.
+ *
+ * @see #MIN_BLOCKSIZE
+ * @see #MAX_BLOCKSIZE
+ */
+ public CBZip2OutputStream(final OutputStream out, final int blockSize)
+ throws IOException {
+ super();
+
+ if (blockSize < 1) {
+ throw new IllegalArgumentException("blockSize(" + blockSize
+ + ") < 1");
+ }
+ if (blockSize > 9) {
+ throw new IllegalArgumentException("blockSize(" + blockSize
+ + ") > 9");
+ }
+
+ this.blockSize100k = blockSize;
+ this.out = out;
+ init();
+ }
+
+ public void write(final int b) throws IOException {
+ if (this.out != null) {
+ write0(b);
} else {
- currentChar = b;
- runLength++;
+ throw new IOException("closed");
}
}
private void writeRun() throws IOException {
- if (last < allowableBlockSize) {
- inUse[currentChar] = true;
- for (int i = 0; i < runLength; i++) {
- mCrc.updateCRC((char) currentChar);
- }
+ final int last = this.last;
+
+ if (last < this.allowableBlockSize) {
+ final int currentChar = this.currentChar;
+ final Data data = this.data;
+ data.inUse[currentChar] = true;
+ final byte ch = (byte) currentChar;
+
+ int runLength = this.runLength;
+ this.crc.updateCRC(currentChar, runLength);
+
switch (runLength) {
case 1:
- last++;
- block[last + 1] = (char) currentChar;
+ data.block[last + 2] = ch;
+ this.last = last + 1;
break;
+
case 2:
- last++;
- block[last + 1] = (char) currentChar;
- last++;
- block[last + 1] = (char) currentChar;
+ data.block[last + 2] = ch;
+ data.block[last + 3] = ch;
+ this.last = last + 2;
break;
+
case 3:
- last++;
- block[last + 1] = (char) currentChar;
- last++;
- block[last + 1] = (char) currentChar;
- last++;
- block[last + 1] = (char) currentChar;
+ {
+ final byte[] block = data.block;
+ block[last + 2] = ch;
+ block[last + 3] = ch;
+ block[last + 4] = ch;
+ this.last = last + 3;
+ }
break;
+
default:
- inUse[runLength - 4] = true;
- last++;
- block[last + 1] = (char) currentChar;
- last++;
- block[last + 1] = (char) currentChar;
- last++;
- block[last + 1] = (char) currentChar;
- last++;
- block[last + 1] = (char) currentChar;
- last++;
- block[last + 1] = (char) (runLength - 4);
+ {
+ runLength -= 4;
+ data.inUse[runLength] = true;
+ final byte[] block = data.block;
+ block[last + 2] = ch;
+ block[last + 3] = ch;
+ block[last + 4] = ch;
+ block[last + 5] = ch;
+ block[last + 6] = (byte) runLength;
+ this.last = last + 5;
+ }
break;
+
}
} else {
endBlock();
@@ -369,72 +707,84 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
}
}
- boolean closed = false;
-
+ /**
+ * Overriden to close the stream.
+ */
protected void finalize() throws Throwable {
close();
super.finalize();
}
public void close() throws IOException {
- if (closed) {
- return;
- }
-
- if (runLength > 0) {
- writeRun();
+ OutputStream out = this.out;
+ if (out != null) {
+ try {
+ if (this.runLength > 0) {
+ writeRun();
+ }
+ this.currentChar = -1;
+ endBlock();
+ endCompression();
+ out.close();
+ } finally {
+ this.out = null;
+ this.data = null;
+ }
}
- currentChar = -1;
- endBlock();
- endCompression();
- closed = true;
- super.close();
- bsStream.close();
}
public void flush() throws IOException {
- super.flush();
- bsStream.flush();
+ OutputStream out = this.out;
+ if (out != null) {
+ out.flush();
+ }
}
- private int blockCRC, combinedCRC;
+ private void init() throws IOException {
+ // write magic: done by caller who created this stream
+ //this.out.write('B');
+ //this.out.write('Z');
- private void initialize() throws IOException {
- bytesOut = 0;
- nBlocksRandomised = 0;
+ this.data = new Data(this.blockSize100k);
/* Write `magic' bytes h indicating file-format == huffmanised,
followed by a digit indicating blockSize100k.
*/
- bsPutUChar('h');
- bsPutUChar('0' + blockSize100k);
+ bsPutUByte('h');
+ bsPutUByte('0' + this.blockSize100k);
- combinedCRC = 0;
+ this.combinedCRC = 0;
+ initBlock();
}
- private int allowableBlockSize;
-
private void initBlock() {
// blockNo++;
- mCrc.initialiseCRC();
- last = -1;
+ this.crc.initialiseCRC();
+ this.last = -1;
// ch = 0;
- for (int i = 0; i < 256; i++) {
+ boolean[] inUse = this.data.inUse;
+ for (int i = 256; --i >= 0;) {
inUse[i] = false;
}
/* 20 is just a paranoia constant */
- allowableBlockSize = baseBlockSize * blockSize100k - 20;
+ this.allowableBlockSize
+ = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20;
}
private void endBlock() throws IOException {
- blockCRC = mCrc.getFinalCRC();
- combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31);
- combinedCRC ^= blockCRC;
+ this.blockCRC = this.crc.getFinalCRC();
+ this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
+ this.combinedCRC ^= this.blockCRC;
+
+ // empty block at end of file
+ if (this.last == -1) {
+ return;
+ }
/* sort the block and establish posn of original string */
- doReversibleTransformation();
+ blockSort();
/*
A 6-byte block header, the value chosen arbitrarily
@@ -449,20 +799,19 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
They are only important when trying to recover blocks from
damaged files.
*/
- bsPutUChar(0x31);
- bsPutUChar(0x41);
- bsPutUChar(0x59);
- bsPutUChar(0x26);
- bsPutUChar(0x53);
- bsPutUChar(0x59);
+ bsPutUByte(0x31);
+ bsPutUByte(0x41);
+ bsPutUByte(0x59);
+ bsPutUByte(0x26);
+ bsPutUByte(0x53);
+ bsPutUByte(0x59);
/* Now the block's CRC, so it is in a known place. */
- bsPutint(blockCRC);
+ bsPutInt(this.blockCRC);
/* Now a single bit indicating randomisation. */
- if (blockRandomised) {
+ if (this.blockRandomised) {
bsW(1, 1);
- nBlocksRandomised++;
} else {
bsW(1, 0);
}
@@ -479,214 +828,276 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
too much repetition -- 27 18 28 18 28 46 -- for me
to feel statistically comfortable. Call me paranoid.)
*/
- bsPutUChar(0x17);
- bsPutUChar(0x72);
- bsPutUChar(0x45);
- bsPutUChar(0x38);
- bsPutUChar(0x50);
- bsPutUChar(0x90);
+ bsPutUByte(0x17);
+ bsPutUByte(0x72);
+ bsPutUByte(0x45);
+ bsPutUByte(0x38);
+ bsPutUByte(0x50);
+ bsPutUByte(0x90);
+
+ bsPutInt(this.combinedCRC);
+ bsFinishedWithStream();
+ }
- bsPutint(combinedCRC);
+ /**
+ * Returns the blocksize parameter specified at construction time.
+ */
+ public final int getBlockSize() {
+ return this.blockSize100k;
+ }
- bsFinishedWithStream();
+ public void write(final byte[] buf, int offs, final int len)
+ throws IOException {
+ if (offs < 0) {
+ throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
+ }
+ if (len < 0) {
+ throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
+ }
+ if (offs + len > buf.length) {
+ throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+ + len + ") > buf.length("
+ + buf.length + ").");
+ }
+ if (this.out == null) {
+ throw new IOException("stream closed");
+ }
+
+ for (int hi = offs + len; offs < hi;) {
+ write0(buf[offs++]);
+ }
}
- private void hbAssignCodes (int[] code, char[] length, int minLen,
- int maxLen, int alphaSize) {
- int n, vec, i;
+ private void write0(int b) throws IOException {
+ if (this.currentChar != -1) {
+ b &= 0xff;
+ if (this.currentChar == b) {
+ if (++this.runLength > 254) {
+ writeRun();
+ this.currentChar = -1;
+ this.runLength = 0;
+ }
+ // else nothing to do
+ } else {
+ writeRun();
+ this.runLength = 1;
+ this.currentChar = b;
+ }
+ } else {
+ this.currentChar = b & 0xff;
+ this.runLength++;
+ }
+ }
- vec = 0;
- for (n = minLen; n <= maxLen; n++) {
- for (i = 0; i < alphaSize; i++) {
- if (length[i] == n) {
+ private static void hbAssignCodes(final int[] code, final byte[] length,
+ final int minLen, final int maxLen,
+ final int alphaSize) {
+ int vec = 0;
+ for (int n = minLen; n <= maxLen; n++) {
+ for (int i = 0; i < alphaSize; i++) {
+ if ((length[i] & 0xff) == n) {
code[i] = vec;
vec++;
}
- };
+ }
vec <<= 1;
}
}
- private void bsSetStream(OutputStream f) {
- bsStream = f;
- bsLive = 0;
- bsBuff = 0;
- bytesOut = 0;
- }
-
private void bsFinishedWithStream() throws IOException {
- while (bsLive > 0) {
- int ch = (bsBuff >> 24);
- try {
- bsStream.write(ch); // write 8-bit
- } catch (IOException e) {
- throw e;
- }
- bsBuff <<= 8;
- bsLive -= 8;
- bytesOut++;
+ while (this.bsLive > 0) {
+ int ch = this.bsBuff >> 24;
+ this.out.write(ch); // write 8-bit
+ this.bsBuff <<= 8;
+ this.bsLive -= 8;
}
}
- private void bsW(int n, int v) throws IOException {
+ private void bsW(final int n, final int v) throws IOException {
+ final OutputStream out = this.out;
+ int bsLive = this.bsLive;
+ int bsBuff = this.bsBuff;
+
while (bsLive >= 8) {
- int ch = (bsBuff >> 24);
- try {
- bsStream.write(ch); // write 8-bit
- } catch (IOException e) {
- throw e;
- }
+ out.write(bsBuff >> 24); // write 8-bit
bsBuff <<= 8;
bsLive -= 8;
- bytesOut++;
}
- bsBuff |= (v << (32 - bsLive - n));
- bsLive += n;
+
+ this.bsBuff = bsBuff | (v << (32 - bsLive - n));
+ this.bsLive = bsLive + n;
}
- private void bsPutUChar(int c) throws IOException {
+ private void bsPutUByte(final int c) throws IOException {
bsW(8, c);
}
- private void bsPutint(int u) throws IOException {
+ private void bsPutInt(final int u) throws IOException {
bsW(8, (u >> 24) & 0xff);
bsW(8, (u >> 16) & 0xff);
bsW(8, (u >> 8) & 0xff);
bsW(8, u & 0xff);
}
- private void bsPutIntVS(int numBits, int c) throws IOException {
- bsW(numBits, c);
- }
-
private void sendMTFValues() throws IOException {
- char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
-
- int v, t, i, j, gs, ge, totc, bt, bc, iter;
- int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
- int nGroups, nBytes;
+ final byte[][] len = this.data.sendMTFValues_len;
+ final int alphaSize = this.nInUse + 2;
- alphaSize = nInUse + 2;
- for (t = 0; t < N_GROUPS; t++) {
- for (v = 0; v < alphaSize; v++) {
- len[t][v] = (char) GREATER_ICOST;
+ for (int t = N_GROUPS; --t >= 0;) {
+ byte[] len_t = len[t];
+ for (int v = alphaSize; --v >= 0;) {
+ len_t[v] = GREATER_ICOST;
}
}
/* Decide how many coding tables to use */
- if (nMTF <= 0) {
- panic();
- }
+ // assert (this.nMTF > 0) : this.nMTF;
+ final int nGroups =
+ (this.nMTF < 200) ? 2
+ : (this.nMTF < 600) ? 3
+ : (this.nMTF < 1200) ? 4
+ : (this.nMTF < 2400) ? 5
+ : 6;
- if (nMTF < 200) {
- nGroups = 2;
- } else if (nMTF < 600) {
- nGroups = 3;
- } else if (nMTF < 1200) {
- nGroups = 4;
- } else if (nMTF < 2400) {
- nGroups = 5;
- } else {
- nGroups = 6;
- }
+ /* Generate an initial set of coding tables */
+ sendMTFValues0(nGroups, alphaSize);
- /* Generate an initial set of coding tables */ {
- int nPart, remF, tFreq, aFreq;
-
- nPart = nGroups;
- remF = nMTF;
- gs = 0;
- while (nPart > 0) {
- tFreq = remF / nPart;
- ge = gs - 1;
- aFreq = 0;
- while (aFreq < tFreq && ge < alphaSize - 1) {
- ge++;
- aFreq += mtfFreq[ge];
- }
+ /*
+ Iterate up to N_ITERS times to improve the tables.
+ */
+ final int nSelectors = sendMTFValues1(nGroups, alphaSize);
- if (ge > gs && nPart != nGroups && nPart != 1
- && ((nGroups - nPart) % 2 == 1)) {
- aFreq -= mtfFreq[ge];
- ge--;
- }
+ /* Compute MTF values for the selectors. */
+ sendMTFValues2(nGroups, nSelectors);
- for (v = 0; v < alphaSize; v++) {
- if (v >= gs && v <= ge) {
- len[nPart - 1][v] = (char) LESSER_ICOST;
- } else {
- len[nPart - 1][v] = (char) GREATER_ICOST;
- }
- }
+ /* Assign actual codes for the tables. */
+ sendMTFValues3(nGroups, alphaSize);
- nPart--;
- gs = ge + 1;
- remF -= aFreq;
+ /* Transmit the mapping table. */
+ sendMTFValues4();
+
+ /* Now the selectors. */
+ sendMTFValues5(nGroups, nSelectors);
+
+ /* Now the coding tables. */
+ sendMTFValues6(nGroups, alphaSize);
+
+ /* And finally, the block data proper */
+ sendMTFValues7(nSelectors);
+ }
+
+ private void sendMTFValues0(final int nGroups, final int alphaSize) {
+ final byte[][] len = this.data.sendMTFValues_len;
+ final int[] mtfFreq = this.data.mtfFreq;
+
+ int remF = this.nMTF;
+ int gs = 0;
+
+ for (int nPart = nGroups; nPart > 0; nPart--) {
+ final int tFreq = remF / nPart;
+ int ge = gs - 1;
+ int aFreq = 0;
+
+ for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
+ aFreq += mtfFreq[++ge];
}
- }
- int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE];
- int[] fave = new int[N_GROUPS];
- short[] cost = new short[N_GROUPS];
- /*
- Iterate up to N_ITERS times to improve the tables.
- */
- for (iter = 0; iter < N_ITERS; iter++) {
- for (t = 0; t < nGroups; t++) {
- fave[t] = 0;
+ if ((ge > gs)
+ && (nPart != nGroups)
+ && (nPart != 1)
+ && (((nGroups - nPart) & 1) != 0)) {
+ aFreq -= mtfFreq[ge--];
}
- for (t = 0; t < nGroups; t++) {
- for (v = 0; v < alphaSize; v++) {
- rfreq[t][v] = 0;
+ final byte[] len_np = len[nPart - 1];
+ for (int v = alphaSize; --v >= 0;) {
+ if ((v >= gs) && (v <= ge)) {
+ len_np[v] = LESSER_ICOST;
+ } else {
+ len_np[v] = GREATER_ICOST;
+ }
+ }
+
+ gs = ge + 1;
+ remF -= aFreq;
+ }
+ }
+
+ private int sendMTFValues1(final int nGroups, final int alphaSize) {
+ final Data data = this.data;
+ final int[][] rfreq = data.sendMTFValues_rfreq;
+ final int[] fave = data.sendMTFValues_fave;
+ final short[] cost = data.sendMTFValues_cost;
+ final char[] sfmap = data.sfmap;
+ final byte[] selector = data.selector;
+ final byte[][] len = data.sendMTFValues_len;
+ final byte[] len_0 = len[0];
+ final byte[] len_1 = len[1];
+ final byte[] len_2 = len[2];
+ final byte[] len_3 = len[3];
+ final byte[] len_4 = len[4];
+ final byte[] len_5 = len[5];
+ final int nMTF = this.nMTF;
+
+ int nSelectors = 0;
+
+ for (int iter = 0; iter < N_ITERS; iter++) {
+ for (int t = nGroups; --t >= 0;) {
+ fave[t] = 0;
+ int[] rfreqt = rfreq[t];
+ for (int i = alphaSize; --i >= 0;) {
+ rfreqt[i] = 0;
}
}
nSelectors = 0;
- totc = 0;
- gs = 0;
- while (true) {
+ for (int gs = 0; gs < this.nMTF;) {
/* Set group start & end marks. */
- if (gs >= nMTF) {
- break;
- }
- ge = gs + G_SIZE - 1;
- if (ge >= nMTF) {
- ge = nMTF - 1;
- }
/*
Calculate the cost of this group as coded
by each of the coding tables.
*/
- for (t = 0; t < nGroups; t++) {
- cost[t] = 0;
- }
- if (nGroups == 6) {
- short cost0, cost1, cost2, cost3, cost4, cost5;
- cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
- for (i = gs; i <= ge; i++) {
- short icv = szptr[i];
- cost0 += len[0][icv];
- cost1 += len[1][icv];
- cost2 += len[2][icv];
- cost3 += len[3][icv];
- cost4 += len[4][icv];
- cost5 += len[5][icv];
+ final int ge = Math.min(gs + G_SIZE - 1, nMTF - 1);
+
+ if (nGroups == N_GROUPS) {
+ // unrolled version of the else-block
+
+ short cost0 = 0;
+ short cost1 = 0;
+ short cost2 = 0;
+ short cost3 = 0;
+ short cost4 = 0;
+ short cost5 = 0;
+
+ for (int i = gs; i <= ge; i++) {
+ final int icv = sfmap[i];
+ cost0 += len_0[icv] & 0xff;
+ cost1 += len_1[icv] & 0xff;
+ cost2 += len_2[icv] & 0xff;
+ cost3 += len_3[icv] & 0xff;
+ cost4 += len_4[icv] & 0xff;
+ cost5 += len_5[icv] & 0xff;
}
+
cost[0] = cost0;
cost[1] = cost1;
cost[2] = cost2;
cost[3] = cost3;
cost[4] = cost4;
cost[5] = cost5;
+
} else {
- for (i = gs; i <= ge; i++) {
- short icv = szptr[i];
- for (t = 0; t < nGroups; t++) {
- cost[t] += len[t][icv];
+ for (int t = nGroups; --t >= 0;) {
+ cost[t] = 0;
+ }
+
+ for (int i = gs; i <= ge; i++) {
+ final int icv = sfmap[i];
+ for (int t = nGroups; --t >= 0;) {
+ cost[t] += len[t][icv] & 0xff;
}
}
}
@@ -695,24 +1106,25 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
Find the coding table which is best for this group,
and record its identity in the selector table.
*/
- bc = 999999999;
- bt = -1;
- for (t = 0; t < nGroups; t++) {
- if (cost[t] < bc) {
- bc = cost[t];
+ int bt = -1;
+ for (int t = nGroups, bc = 999999999; --t >= 0;) {
+ final int cost_t = cost[t];
+ if (cost_t < bc) {
+ bc = cost_t;
bt = t;
}
- };
- totc += bc;
+ }
+
fave[bt]++;
- selector[nSelectors] = (char) bt;
+ selector[nSelectors] = (byte) bt;
nSelectors++;
/*
Increment the symbol frequencies for the selected table.
*/
- for (i = gs; i <= ge; i++) {
- rfreq[bt][szptr[i]]++;
+ final int[] rfreq_bt = rfreq[bt];
+ for (int i = gs; i <= ge; i++) {
+ rfreq_bt[sfmap[i]]++;
}
gs = ge + 1;
@@ -721,850 +1133,861 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
/*
Recompute the tables based on the accumulated frequencies.
*/
- for (t = 0; t < nGroups; t++) {
- hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
+ for (int t = 0; t < nGroups; t++) {
+ hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
}
}
- rfreq = null;
- fave = null;
- cost = null;
+ return nSelectors;
+ }
- if (!(nGroups < 8)) {
- panic();
- }
- if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) {
- panic();
+ private void sendMTFValues2(final int nGroups, final int nSelectors) {
+ // assert (nGroups < 8) : nGroups;
+
+ final Data data = this.data;
+ byte[] pos = data.sendMTFValues2_pos;
+
+ for (int i = nGroups; --i >= 0;) {
+ pos[i] = (byte) i;
}
+ for (int i = 0; i < nSelectors; i++) {
+ final byte ll_i = data.selector[i];
+ byte tmp = pos[0];
+ int j = 0;
- /* Compute MTF values for the selectors. */
- {
- char[] pos = new char[N_GROUPS];
- char ll_i, tmp2, tmp;
- for (i = 0; i < nGroups; i++) {
- pos[i] = (char) i;
- }
- for (i = 0; i < nSelectors; i++) {
- ll_i = selector[i];
- j = 0;
+ while (ll_i != tmp) {
+ j++;
+ byte tmp2 = tmp;
tmp = pos[j];
- while (ll_i != tmp) {
- j++;
- tmp2 = tmp;
- tmp = pos[j];
- pos[j] = tmp2;
- }
- pos[0] = tmp;
- selectorMtf[i] = (char) j;
+ pos[j] = tmp2;
}
- }
- int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE];
+ pos[0] = tmp;
+ data.selectorMtf[i] = (byte) j;
+ }
+ }
- /* Assign actual codes for the tables. */
- for (t = 0; t < nGroups; t++) {
- minLen = 32;
- maxLen = 0;
- for (i = 0; i < alphaSize; i++) {
- if (len[t][i] > maxLen) {
- maxLen = len[t][i];
+ private void sendMTFValues3(final int nGroups, final int alphaSize) {
+ int[][] code = this.data.sendMTFValues_code;
+ byte[][] len = this.data.sendMTFValues_len;
+
+ for (int t = 0; t < nGroups; t++) {
+ int minLen = 32;
+ int maxLen = 0;
+ final byte[] len_t = len[t];
+ for (int i = alphaSize; --i >= 0;) {
+ final int l = len_t[i] & 0xff;
+ if (l > maxLen) {
+ maxLen = l;
}
- if (len[t][i] < minLen) {
- minLen = len[t][i];
+ if (l < minLen) {
+ minLen = l;
}
}
- if (maxLen > 20) {
- panic();
- }
- if (minLen < 1) {
- panic();
- }
+
+ // assert (maxLen <= 20) : maxLen;
+ // assert (minLen >= 1) : minLen;
+
hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
}
+ }
- /* Transmit the mapping table. */
- {
- boolean[] inUse16 = new boolean[16];
- for (i = 0; i < 16; i++) {
- inUse16[i] = false;
- for (j = 0; j < 16; j++) {
- if (inUse[i * 16 + j]) {
- inUse16[i] = true;
+ private void sendMTFValues4() throws IOException {
+ final boolean[] inUse = this.data.inUse;
+ final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
+
+ for (int i = 16; --i >= 0;) {
+ inUse16[i] = false;
+ final int i16 = i * 16;
+ for (int j = 16; --j >= 0;) {
+ if (inUse[i16 + j]) {
+ inUse16[i] = true;
+ }
+ }
+ }
+
+ for (int i = 0; i < 16; i++) {
+ bsW(1, inUse16[i] ? 1 : 0);
+ }
+
+ final OutputStream out = this.out;
+ int bsLive = this.bsLive;
+ int bsBuff = this.bsBuff;
+
+ for (int i = 0; i < 16; i++) {
+ if (inUse16[i]) {
+ final int i16 = i * 16;
+ for (int j = 0; j < 16; j++) {
+ // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24); // write 8-bit
+ bsBuff <<= 8;
+ bsLive -= 8;
}
+ if (inUse[i16 + j]) {
+ bsBuff |= 1 << (32 - bsLive - 1);
+ }
+ bsLive++;
}
}
+ }
- nBytes = bytesOut;
- for (i = 0; i < 16; i++) {
- if (inUse16[i]) {
- bsW(1, 1);
- } else {
- bsW(1, 0);
- }
- }
+ this.bsBuff = bsBuff;
+ this.bsLive = bsLive;
+ }
- for (i = 0; i < 16; i++) {
- if (inUse16[i]) {
- for (j = 0; j < 16; j++) {
- if (inUse[i * 16 + j]) {
- bsW(1, 1);
- } else {
- bsW(1, 0);
- }
- }
+ private void sendMTFValues5(final int nGroups, final int nSelectors)
+ throws IOException {
+ bsW(3, nGroups);
+ bsW(15, nSelectors);
+
+ final OutputStream out = this.out;
+ final byte[] selectorMtf = this.data.selectorMtf;
+
+ int bsLive = this.bsLive;
+ int bsBuff = this.bsBuff;
+
+ for (int i = 0; i < nSelectors; i++) {
+ for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
+ // inlined: bsW(1, 1);
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24);
+ bsBuff <<= 8;
+ bsLive -= 8;
}
+ bsBuff |= 1 << (32 - bsLive - 1);
+ bsLive++;
}
+ // inlined: bsW(1, 0);
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24);
+ bsBuff <<= 8;
+ bsLive -= 8;
+ }
+ //bsBuff |= 0 << (32 - bsLive - 1);
+ bsLive++;
}
- /* Now the selectors. */
- nBytes = bytesOut;
- bsW (3, nGroups);
- bsW (15, nSelectors);
- for (i = 0; i < nSelectors; i++) {
- for (j = 0; j < selectorMtf[i]; j++) {
- bsW(1, 1);
+ this.bsBuff = bsBuff;
+ this.bsLive = bsLive;
+ }
+
+ private void sendMTFValues6(final int nGroups, final int alphaSize)
+ throws IOException {
+ final byte[][] len = this.data.sendMTFValues_len;
+ final OutputStream out = this.out;
+
+ int bsLive = this.bsLive;
+ int bsBuff = this.bsBuff;
+
+ for (int t = 0; t < nGroups; t++) {
+ byte[] len_t = len[t];
+ int curr = len_t[0] & 0xff;
+
+ // inlined: bsW(5, curr);
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24); // write 8-bit
+ bsBuff <<= 8;
+ bsLive -= 8;
}
- bsW(1, 0);
- }
+ bsBuff |= curr << (32 - bsLive - 5);
+ bsLive += 5;
+
+ for (int i = 0; i < alphaSize; i++) {
+ int lti = len_t[i] & 0xff;
+ while (curr < lti) {
+ // inlined: bsW(2, 2);
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24); // write 8-bit
+ bsBuff <<= 8;
+ bsLive -= 8;
+ }
+ bsBuff |= 2 << (32 - bsLive - 2);
+ bsLive += 2;
- /* Now the coding tables. */
- nBytes = bytesOut;
-
- for (t = 0; t < nGroups; t++) {
- int curr = len[t][0];
- bsW(5, curr);
- for (i = 0; i < alphaSize; i++) {
- while (curr < len[t][i]) {
- bsW(2, 2);
curr++; /* 10 */
}
- while (curr > len[t][i]) {
- bsW(2, 3);
+
+ while (curr > lti) {
+ // inlined: bsW(2, 3);
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24); // write 8-bit
+ bsBuff <<= 8;
+ bsLive -= 8;
+ }
+ bsBuff |= 3 << (32 - bsLive - 2);
+ bsLive += 2;
+
curr--; /* 11 */
}
- bsW (1, 0);
+
+ // inlined: bsW(1, 0);
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24); // write 8-bit
+ bsBuff <<= 8;
+ bsLive -= 8;
+ }
+ // bsBuff |= 0 << (32 - bsLive - 1);
+ bsLive++;
}
}
- /* And finally, the block data proper */
- nBytes = bytesOut;
- selCtr = 0;
- gs = 0;
- while (true) {
- if (gs >= nMTF) {
- break;
- }
- ge = gs + G_SIZE - 1;
- if (ge >= nMTF) {
- ge = nMTF - 1;
- }
- for (i = gs; i <= ge; i++) {
- bsW(len[selector[selCtr]][szptr[i]],
- code[selector[selCtr]][szptr[i]]);
+ this.bsBuff = bsBuff;
+ this.bsLive = bsLive;
+ }
+
+ private void sendMTFValues7(final int nSelectors) throws IOException {
+ final Data data = this.data;
+ final byte[][] len = data.sendMTFValues_len;
+ final int[][] code = data.sendMTFValues_code;
+ final OutputStream out = this.out;
+ final byte[] selector = data.selector;
+ final char[] sfmap = data.sfmap;
+ final int nMTF = this.nMTF;
+
+ int selCtr = 0;
+
+ int bsLive = this.bsLive;
+ int bsBuff = this.bsBuff;
+
+ for (int gs = 0; gs < nMTF;) {
+ final int ge = Math.min(gs + G_SIZE - 1, nMTF - 1);
+ final int selector_selCtr = selector[selCtr] & 0xff;
+ final int[] code_selCtr = code[selector_selCtr];
+ final byte[] len_selCtr = len[selector_selCtr];
+
+ while (gs <= ge) {
+ final int sfmap_i = sfmap[gs];
+
+ //
+ // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
+ // code_selCtr[sfmap_i]);
+ //
+ while (bsLive >= 8) {
+ out.write(bsBuff >> 24);
+ bsBuff <<= 8;
+ bsLive -= 8;
+ }
+ final int n = len_selCtr[sfmap_i] & 0xFF;
+ bsBuff |= code_selCtr[sfmap_i] << (32 - bsLive - n);
+ bsLive += n;
+
+ gs++;
}
gs = ge + 1;
selCtr++;
}
- if (!(selCtr == nSelectors)) {
- panic();
- }
+
+ this.bsBuff = bsBuff;
+ this.bsLive = bsLive;
}
- private void moveToFrontCodeAndSend () throws IOException {
- bsPutIntVS(24, origPtr);
+ private void moveToFrontCodeAndSend() throws IOException {
+ bsW(24, this.origPtr);
generateMTFValues();
sendMTFValues();
}
- private OutputStream bsStream;
-
- private void simpleSort(int lo, int hi, int d) {
- int i, j, h, bigN, hp;
- int v;
-
- bigN = hi - lo + 1;
+ /**
+ * This is the most hammered method of this class.
+ *
+ * This is the version using unrolled loops. Normally I never + * use such ones in Java code. The unrolling has shown a + * noticable performance improvement on JRE 1.4.2 (Linux i586 / + * HotSpot Client). Of course it depends on the JIT compiler of + * the vm.
+ */ + private boolean mainSimpleSort(final Data data, final int lo, final int hi, + final int d) { + final int bigN = hi - lo + 1; if (bigN < 2) { - return; + return this.firstAttempt && (this.workDone > this.workLimit); } - hp = 0; - while (incs[hp] < bigN) { + int hp = 0; + while (INCS[hp] < bigN) { hp++; } - hp--; - for (; hp >= 0; hp--) { - h = incs[hp]; + final int[] fmap = data.fmap; + final char[] quadrant = data.quadrant; + final byte[] block = data.block; + final int last = this.last; + final int lastPlus1 = last + 1; + final boolean firstAttempt = this.firstAttempt; + final int workLimit = this.workLimit; + int workDone = this.workDone; + + // Following block contains unrolled code which could be shortened by + // coding it in additional loops. + + HP: while (--hp >= 0) { + final int h = INCS[hp]; + final int mj = lo + h - 1; + + for (int i = lo + h; i <= hi;) { + // copy + for (int k = 3; (i <= hi) && (--k >= 0); i++) { + final int v = fmap[i]; + final int vd = v + d; + int j = i; + + // for (int a; + // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, + // block, quadrant, last); + // j -= h) { + // fmap[j] = a; + // } + // + // unrolled version: + + // start inline mainGTU + boolean onceRunned = false; + int a = 0; + + HAMMER: while (true) { + if (onceRunned) { + fmap[j] = a; + if ((j -= h) <= mj) { + break HAMMER; + } + } else { + onceRunned = true; + } - i = lo + h; - while (true) { - /* copy 1 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } - } - zptr[j] = v; - i++; + a = fmap[j - h]; + int i1 = a + d; + int i2 = vd; + + // following could be done in a loop, but + // unrolled it for performance: + if (block[i1 + 1] == block[i2 + 1]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (block[i1 + 5] == block[i2 + 5]) { + if (block[(i1 += 6)] + == block[(i2 += 6)]) { + int x = last; + X: while (x > 0) { + x -= 4; + + if (block[i1 + 1] + == block[i2 + 1]) { + if (quadrant[i1] + == quadrant[i2]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (quadrant[i1 + 1] == quadrant[i2 + 1]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (quadrant[i1 + 2] == quadrant[i2 + 2]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (quadrant[i1 + 3] == quadrant[i2 + 3]) { + if ((i1 += 4) >= lastPlus1) { + i1 -= lastPlus1; + } + if ((i2 += 4) >= lastPlus1) { + i2 -= lastPlus1; + } + workDone++; + continue X; + } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1] > quadrant[i2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } + break HAMMER; + } // while x > 0 + else { + if ((block[i1] & 0xff) + > (block[i2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } + } else if ((block[i1 + 5] & 0xff) + > (block[i2 + 5] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) + > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) + > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) + > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) + > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } - /* copy 2 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } - } - zptr[j] = v; - i++; + } // HAMMER + // end inline mainGTU - /* copy 3 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } + fmap[j] = v; } - zptr[j] = v; - i++; - if (workDone > workLimit && firstAttempt) { - return; + if (firstAttempt && (i <= hi) && (workDone > workLimit)) { + break HP; } } } - } - private void vswap(int p1, int p2, int n) { - int temp = 0; - while (n > 0) { - temp = zptr[p1]; - zptr[p1] = zptr[p2]; - zptr[p2] = temp; - p1++; - p2++; - n--; - } + this.workDone = workDone; + return firstAttempt && (workDone > workLimit); } - private char med3(char a, char b, char c) { - char t; - if (a > b) { - t = a; - a = b; - b = t; - } - if (b > c) { - t = b; - b = c; - c = t; + private static void vswap(int[] fmap, int p1, int p2, int n) { + n += p1; + while (p1 < n) { + int t = fmap[p1]; + fmap[p1++] = fmap[p2]; + fmap[p2++] = t; } - if (a > b) { - b = a; - } - return b; } - private static class StackElem { - int ll; - int hh; - int dd; + private static byte med3(byte a, byte b, byte c) { + return (a < b) + ? (b < c ? b : a < c ? c : a) + : (b > c ? b : a > c ? c : a); } - private void qSort3(int loSt, int hiSt, int dSt) { - int unLo, unHi, ltLo, gtHi, med, n, m; - int sp, lo, hi, d; - StackElem[] stack = new StackElem[QSORT_STACK_SIZE]; - for (int count = 0; count < QSORT_STACK_SIZE; count++) { - stack[count] = new StackElem(); - } - - sp = 0; + private void blockSort() { + this.workLimit = WORK_FACTOR * this.last; + this.workDone = 0; + this.blockRandomised = false; + this.firstAttempt = true; + mainSort(); - stack[sp].ll = loSt; - stack[sp].hh = hiSt; - stack[sp].dd = dSt; - sp++; + if (this.firstAttempt && (this.workDone > this.workLimit)) { + randomiseBlock(); + this.workLimit = this.workDone = 0; + this.firstAttempt = false; + mainSort(); + } - while (sp > 0) { - if (sp >= QSORT_STACK_SIZE) { - panic(); + int[] fmap = this.data.fmap; + this.origPtr = -1; + for (int i = 0, last = this.last; i <= last; i++) { + if (fmap[i] == 0) { + this.origPtr = i; + break; } + } - sp--; - lo = stack[sp].ll; - hi = stack[sp].hh; - d = stack[sp].dd; + // assert (this.origPtr != -1) : this.origPtr; + } - if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { - simpleSort(lo, hi, d); - if (workDone > workLimit && firstAttempt) { + /** + * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 + */ + private void mainQSort3(final Data data, final int loSt, final int hiSt, + final int dSt) { + final int[] stack_ll = data.stack_ll; + final int[] stack_hh = data.stack_hh; + final int[] stack_dd = data.stack_dd; + final int[] fmap = data.fmap; + final byte[] block = data.block; + + stack_ll[0] = loSt; + stack_hh[0] = hiSt; + stack_dd[0] = dSt; + + for (int sp = 1; --sp >= 0;) { + final int lo = stack_ll[sp]; + final int hi = stack_hh[sp]; + final int d = stack_dd[sp]; + + if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { + if (mainSimpleSort(data, lo, hi, d)) { return; } - continue; - } - - med = med3(block[zptr[lo] + d + 1], - block[zptr[hi ] + d + 1], - block[zptr[(lo + hi) >> 1] + d + 1]); + } else { + final int d1 = d + 1; + final int med = med3(block[fmap[lo] + d1], + block[fmap[hi ] + d1], + block[fmap[(lo + hi) >> 1] + d1]) + & 0xff; - unLo = ltLo = lo; - unHi = gtHi = hi; + int unLo = lo; + int unHi = hi; + int ltLo = lo; + int gtHi = hi; - while (true) { while (true) { - if (unLo > unHi) { - break; - } - n = ((int) block[zptr[unLo] + d + 1]) - med; - if (n == 0) { - int temp = 0; - temp = zptr[unLo]; - zptr[unLo] = zptr[ltLo]; - zptr[ltLo] = temp; - ltLo++; - unLo++; - continue; - }; - if (n > 0) { - break; + while (unLo <= unHi) { + final int n = + ((int) block[fmap[unLo] + d1] & 0xff) - med; + if (n == 0) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[ltLo]; + fmap[ltLo++] = temp; + } else if (n < 0) { + unLo++; + } else { + break; + } } - unLo++; - } - while (true) { - if (unLo > unHi) { - break; + + while (unLo <= unHi) { + final int n = + ((int) block[fmap[unHi] + d1] & 0xff) - med; + if (n == 0) { + final int temp = fmap[unHi]; + fmap[unHi--] = fmap[gtHi]; + fmap[gtHi--] = temp; + } else if (n > 0) { + unHi--; + } else { + break; + } } - n = ((int) block[zptr[unHi] + d + 1]) - med; - if (n == 0) { - int temp = 0; - temp = zptr[unHi]; - zptr[unHi] = zptr[gtHi]; - zptr[gtHi] = temp; - gtHi--; - unHi--; - continue; - }; - if (n < 0) { + + if (unLo <= unHi) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[unHi]; + fmap[unHi--] = temp; + } else { break; } - unHi--; - } - if (unLo > unHi) { - break; } - int temp = 0; - temp = zptr[unLo]; - zptr[unLo] = zptr[unHi]; - zptr[unHi] = temp; - unLo++; - unHi--; - } - if (gtHi < ltLo) { - stack[sp].ll = lo; - stack[sp].hh = hi; - stack[sp].dd = d + 1; - sp++; - continue; + if (gtHi < ltLo) { + stack_ll[sp] = lo; + stack_hh[sp] = hi; + stack_dd[sp] = d1; + sp++; + } else { + int n = ((ltLo - lo) < (unLo - ltLo)) + ? (ltLo - lo) : (unLo - ltLo); + vswap(fmap, lo, unLo - n, n); + int m = ((hi - gtHi) < (gtHi - unHi)) + ? (hi - gtHi) : (gtHi - unHi); + vswap(fmap, unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack_ll[sp] = lo; + stack_hh[sp] = n; + stack_dd[sp] = d; + sp++; + + stack_ll[sp] = n + 1; + stack_hh[sp] = m - 1; + stack_dd[sp] = d1; + sp++; + + stack_ll[sp] = m; + stack_hh[sp] = hi; + stack_dd[sp] = d; + sp++; + } } - - n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); - vswap(lo, unLo - n, n); - m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); - vswap(unLo, hi - m + 1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - stack[sp].ll = lo; - stack[sp].hh = n; - stack[sp].dd = d; - sp++; - - stack[sp].ll = n + 1; - stack[sp].hh = m - 1; - stack[sp].dd = d + 1; - sp++; - - stack[sp].ll = m; - stack[sp].hh = hi; - stack[sp].dd = d; - sp++; } } private void mainSort() { - int i, j, ss, sb; - int[] runningOrder = new int[256]; - int[] copy = new int[256]; - boolean[] bigDone = new boolean[256]; - int c1, c2; - int numQSorted; + final Data data = this.data; + final int[] runningOrder = data.mainSort_runningOrder; + final int[] copy = data.mainSort_copy; + final boolean[] bigDone = data.mainSort_bigDone; + final int[] ftab = data.ftab; + final byte[] block = data.block; + final int[] fmap = data.fmap; + final char[] quadrant = data.quadrant; + final int last = this.last; + final int workLimit = this.workLimit; + final boolean firstAttempt = this.firstAttempt; + + // Set up the 2-byte frequency table + for (int i = 65537; --i >= 0;) { + ftab[i] = 0; + } /* In the various block-sized structures, live data runs from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area for block. */ - - // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); - for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) { + for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) { block[last + i + 2] = block[(i % (last + 1)) + 1]; } - for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) { + for (int i = last + NUM_OVERSHOOT_BYTES; --i >= 0;) { quadrant[i] = 0; } + block[0] = block[last + 1]; - block[0] = (char) (block[last + 1]); - - if (last < 4000) { - /* - Use simpleSort(), since the full sorting mechanism - has quite a large constant overhead. - */ - for (i = 0; i <= last; i++) { - zptr[i] = i; - } - firstAttempt = false; - workDone = workLimit = 0; - simpleSort(0, last, 0); - } else { - numQSorted = 0; - for (i = 0; i <= 255; i++) { - bigDone[i] = false; - } - - for (i = 0; i <= 65536; i++) { - ftab[i] = 0; - } + // Complete the initial radix sort: - c1 = block[0]; - for (i = 0; i <= last; i++) { - c2 = block[i + 1]; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } + int c1 = block[0] & 0xff; + for (int i = 0; i <= last; i++) { + final int c2 = block[i + 1] & 0xff; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } - for (i = 1; i <= 65536; i++) { - ftab[i] += ftab[i - 1]; - } + for (int i = 1; i <= 65536; i++) + ftab[i] += ftab[i - 1]; - c1 = block[1]; - for (i = 0; i < last; i++) { - c2 = block[i + 2]; - j = (c1 << 8) + c2; - c1 = c2; - ftab[j]--; - zptr[ftab[j]] = i; - } + c1 = block[1] & 0xff; + for (int i = 0; i < last; i++) { + final int c2 = block[i + 2] & 0xff; + fmap[--ftab[(c1 << 8) + c2]] = i; + c1 = c2; + } - j = ((block[last + 1]) << 8) + (block[1]); - ftab[j]--; - zptr[ftab[j]] = last; + fmap[--ftab[((block[last + 1] & 0xff) << 8) + (block[1] & 0xff)]] + = last; - /* + /* Now ftab contains the first loc of every small bucket. Calculate the running order, from smallest to largest big bucket. - */ - - for (i = 0; i <= 255; i++) { - runningOrder[i] = i; - } + */ + for (int i = 256; --i >= 0;) { + bigDone[i] = false; + runningOrder[i] = i; + } - { - int vv; - int h = 1; - do { - h = 3 * h + 1; - } - while (h <= 256); - do { - h = h / 3; - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while ((ftab[((runningOrder[j - h]) + 1) << 8] - - ftab[(runningOrder[j - h]) << 8]) - > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) { - runningOrder[j] = runningOrder[j - h]; - j = j - h; - if (j <= (h - 1)) { - break; - } - } - runningOrder[j] = vv; + for (int h = 364; h != 1;) { + h /= 3; + for (int i = h; i <= 255; i++) { + final int vv = runningOrder[i]; + final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; + final int b = h - 1; + int j = i; + for (int ro = runningOrder[j - h]; + (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; + ro = runningOrder[j - h]) { + runningOrder[j] = ro; + j -= h; + if (j <= b) { + break; } - } while (h != 1); + } + runningOrder[j] = vv; } + } - /* + /* The main sorting loop. - */ - for (i = 0; i <= 255; i++) { - - /* + */ + for (int i = 0; i <= 255; i++) { + /* Process big buckets, starting with the least full. - */ - ss = runningOrder[i]; + */ + final int ss = runningOrder[i]; - /* + // Step 1: + /* Complete the big bucket [ss] by quicksorting any unsorted small buckets [ss, j]. Hopefully previous pointer-scanning phases have already completed many of the small buckets [ss, j], so we don't have to sort them at all. - */ - for (j = 0; j <= 255; j++) { - sb = (ss << 8) + j; - if (!((ftab[sb] & SETMASK) == SETMASK)) { - int lo = ftab[sb] & CLEARMASK; - int hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) { - qSort3(lo, hi, 2); - numQSorted += (hi - lo + 1); - if (workDone > workLimit && firstAttempt) { - return; - } + */ + for (int j = 0; j <= 255; j++) { + final int sb = (ss << 8) + j; + final int ftab_sb = ftab[sb]; + if ((ftab_sb & SETMASK) != SETMASK) { + final int lo = ftab_sb & CLEARMASK; + final int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3(data, lo, hi, 2); + if (firstAttempt && (this.workDone > workLimit)) { + return; } - ftab[sb] |= SETMASK; } + ftab[sb] = ftab_sb | SETMASK; + } + } + + // Step 2: + // Now scan this big bucket so as to synthesise the + // sorted order for small buckets [t, ss] for all t != ss. + + for (int j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } + + for (int j = ftab[ss << 8] & CLEARMASK, + hj = (ftab[(ss + 1) << 8] & CLEARMASK); + j < hj; + j++) { + final int fmap_j = fmap[j]; + c1 = block[fmap_j] & 0xff; + if (!bigDone[c1]) { + fmap[copy[c1]] = (fmap_j == 0) ? last : (fmap_j - 1); + copy[c1]++; } + } - /* + for (int j = 256; --j >= 0;) + ftab[(j << 8) + ss] |= SETMASK; + + // Step 3: + /* The ss big bucket is now done. Record this fact, and update the quadrant descriptors. Remember to update quadrants in the overshoot area too, if necessary. The "if (i < 255)" test merely skips this updating for the last bucket processed, since updating for the last bucket is pointless. - */ - bigDone[ss] = true; - - if (i < 255) { - int bbStart = ftab[ss << 8] & CLEARMASK; - int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; - int shifts = 0; - - while ((bbSize >> shifts) > 65534) { - shifts++; - } - - for (j = 0; j < bbSize; j++) { - int a2update = zptr[bbStart + j]; - int qVal = (j >> shifts); - quadrant[a2update] = qVal; - if (a2update < NUM_OVERSHOOT_BYTES) { - quadrant[a2update + last + 1] = qVal; - } - } + */ + bigDone[ss] = true; - if (!(((bbSize - 1) >> shifts) <= 65535)) { - panic(); - } - } + if (i < 255) { + final int bbStart = ftab[ss << 8] & CLEARMASK; + final int bbSize = + (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; - /* - Now scan this big bucket so as to synthesise the - sorted order for small buckets [t, ss] for all t != ss. - */ - for (j = 0; j <= 255; j++) { - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + while ((bbSize >> shifts) > 65534) { + shifts++; } - for (j = ftab[ss << 8] & CLEARMASK; - j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) { - c1 = block[zptr[j]]; - if (!bigDone[c1]) { - zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; - copy[c1]++; + for (int j = 0; j < bbSize; j++) { + final int a2update = fmap[bbStart + j]; + final char qVal = (char) (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < NUM_OVERSHOOT_BYTES) { + quadrant[a2update + last + 1] = qVal; } } - - for (j = 0; j <= 255; j++) { - ftab[(j << 8) + ss] |= SETMASK; - } } + } } private void randomiseBlock() { - int i; - int rNToGo = 0; - int rTPos = 0; - for (i = 0; i < 256; i++) { + final boolean[] inUse = this.data.inUse; + final byte[] block = this.data.block; + final int last = this.last; + + for (int i = 256; --i >= 0;) inUse[i] = false; - } - for (i = 0; i <= last; i++) { + int rNToGo = 0; + int rTPos = 0; + for (int i = 0, j = 1; i <= last; i = j, j++) { if (rNToGo == 0) { - rNToGo = (char) rNums[rTPos]; - rTPos++; - if (rTPos == 512) { + rNToGo = (char) BZip2Constants.rNums[rTPos]; + if (++rTPos == 512) { rTPos = 0; } } - rNToGo--; - block[i + 1] ^= ((rNToGo == 1) ? 1 : 0); - // handle 16 bit signed numbers - block[i + 1] &= 0xFF; - inUse[block[i + 1]] = true; - } - } - - private void doReversibleTransformation() { - int i; - - workLimit = workFactor * last; - workDone = 0; - blockRandomised = false; - firstAttempt = true; - - mainSort(); + rNToGo--; + block[j] ^= ((rNToGo == 1) ? 1 : 0); - if (workDone > workLimit && firstAttempt) { - randomiseBlock(); - workLimit = workDone = 0; - blockRandomised = true; - firstAttempt = false; - mainSort(); + // handle 16 bit signed numbers + inUse[block[j] & 0xff] = true; } - origPtr = -1; - for (i = 0; i <= last; i++) { - if (zptr[i] == 0) { - origPtr = i; - break; - } - }; - - if (origPtr == -1) { - panic(); - } + this.blockRandomised = true; } - private boolean fullGtU(int i1, int i2) { - int k; - char c1, c2; - int s1, s2; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - k = last + 1; - - do { - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); + private void generateMTFValues() { + final int last = this.last; + final Data data = this.data; + final boolean[] inUse = data.inUse; + final byte[] block = data.block; + final int[] fmap = data.fmap; + final char[] sfmap = data.sfmap; + final int[] mtfFreq = data.mtfFreq; + final byte[] unseqToSeq = data.unseqToSeq; + final byte[] yy = data.generateMTFValues_yy; + + // make maps + int nInUse = 0; + for (int i = 0; i < 256; i++) { + if (inUse[i]) { + unseqToSeq[i] = (byte) nInUse; + nInUse++; } - i1++; - i2++; - - if (i1 > last) { - i1 -= last; - i1--; - }; - if (i2 > last) { - i2 -= last; - i2--; - }; - - k -= 4; - workDone++; - } while (k >= 0); - - return false; - } - - /* - Knuth's increments seem to work better - than Incerpi-Sedgewick here. Possibly - because the number of elems to sort is - usually small, typically <= 20. - */ - private int[] incs = {1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484}; - - private void allocateCompressStructures () { - int n = baseBlockSize * blockSize100k; - block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)]; - quadrant = new int[(n + NUM_OVERSHOOT_BYTES)]; - zptr = new int[n]; - ftab = new int[65537]; - - if (block == null || quadrant == null || zptr == null - || ftab == null) { - //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; - //compressOutOfMemory ( totalDraw, n ); } + this.nInUse = nInUse; - /* - The back end needs a place to store the MTF values - whilst it calculates the coding tables. We could - put them in the zptr array. However, these values - will fit in a short, so we overlay szptr at the - start of zptr, in the hope of reducing the number - of cache misses induced by the multiple traversals - of the MTF values when calculating coding tables. - Seems to improve compression speed by about 1%. - */ - // szptr = zptr; - - - szptr = new short[2 * n]; - } + final int eob = nInUse + 1; - private void generateMTFValues() { - char[] yy = new char[256]; - int i, j; - char tmp; - char tmp2; - int zPend; - int wr; - int EOB; - - makeMaps(); - EOB = nInUse + 1; - - for (i = 0; i <= EOB; i++) { + for (int i = eob; i >= 0; i--) { mtfFreq[i] = 0; } - wr = 0; - zPend = 0; - for (i = 0; i < nInUse; i++) { - yy[i] = (char) i; + for (int i = nInUse; --i >= 0;) { + yy[i] = (byte) i; } + int wr = 0; + int zPend = 0; - for (i = 0; i <= last; i++) { - char ll_i; + for (int i = 0; i <= last; i++) { + final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; + byte tmp = yy[0]; + int j = 0; - ll_i = unseqToSeq[block[zptr[i]]]; - - j = 0; - tmp = yy[j]; while (ll_i != tmp) { j++; - tmp2 = tmp; + byte tmp2 = tmp; tmp = yy[j]; yy[j] = tmp2; - }; + } yy[0] = tmp; if (j == 0) { @@ -1573,26 +1996,25 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { if (zPend > 0) { zPend--; while (true) { - switch (zPend % 2) { - case 0: - szptr[wr] = (short) RUNA; + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; - break; - case 1: - szptr[wr] = (short) RUNB; + } else { + sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; - break; - }; - if (zPend < 2) { + } + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { break; } - zPend = (zPend - 2) / 2; - }; + } zPend = 0; } - szptr[wr] = (short) (j + 1); + sfmap[wr] = (char) (j + 1); wr++; mtfFreq[j + 1]++; } @@ -1601,31 +2023,86 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { if (zPend > 0) { zPend--; while (true) { - switch (zPend % 2) { - case 0: - szptr[wr] = (short) RUNA; + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; - break; - case 1: - szptr[wr] = (short) RUNB; + } else { + sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; - break; } - if (zPend < 2) { + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { break; } - zPend = (zPend - 2) / 2; } } - szptr[wr] = (short) EOB; - wr++; - mtfFreq[EOB]++; - - nMTF = wr; + sfmap[wr] = (char) eob; + mtfFreq[eob]++; + this.nMTF = wr + 1; } -} + private static final class Data extends Object { + + // with blockSize 900k + final boolean[] inUse = new boolean[256]; // 256 byte + final byte[] unseqToSeq = new byte[256]; // 256 byte + final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + final byte[] generateMTFValues_yy = new byte[256]; // 256 byte + final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 byte + final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte + final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte + final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte + final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte + + final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte + + final int[] mainSort_runningOrder = new int[256]; // 1024 byte + final int[] mainSort_copy = new int[256]; // 1024 byte + final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte + + final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + + final int[] ftab = new int[65537]; // 262148 byte + // ------------ + // 333408 byte + + final byte[] block; // 900021 byte + final int[] fmap; // 3600000 byte + final char[] sfmap; // 3600000 byte + // ------------ + // 8433529 byte + // ============ + + /** + * Array instance identical to sfmap, both are used only temporarily and indepently, + * so we do not need to allocate additional memory. + */ + final char[] quadrant; + + Data(int blockSize100k) { + super(); + + final int n = blockSize100k * BZip2Constants.baseBlockSize; + this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; + this.fmap = new int[n]; + this.sfmap = new char[2 * n]; + this.quadrant = this.sfmap; + } + + } +} diff --git a/src/main/org/apache/tools/bzip2/CRC.java b/src/main/org/apache/tools/bzip2/CRC.java index 4fbcfc9ad..2d947f695 100644 --- a/src/main/org/apache/tools/bzip2/CRC.java +++ b/src/main/org/apache/tools/bzip2/CRC.java @@ -1,5 +1,5 @@ /* - * Copyright 2001-2002,2004 The Apache Software Foundation + * Copyright 2001-2002,2004-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,8 @@ package org.apache.tools.bzip2; * of the data. * */ -class CRC { - public static int crc32Table[] = { +final class CRC { + static final int crc32Table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, @@ -96,7 +96,7 @@ class CRC { 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; - public CRC() { + CRC() { initialiseCRC(); } @@ -124,6 +124,17 @@ class CRC { globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp]; } + void updateCRC(int inCh, int repeat) { + int globalCrc = this.globalCrc; + while (repeat-- > 0) { + int temp = (globalCrc >> 24) ^ inCh; + globalCrc = (globalCrc << 8) ^ crc32Table[(temp >= 0) + ? temp + : (temp + 256)]; + } + this.globalCrc = globalCrc; + } + int globalCrc; } diff --git a/src/testcases/org/apache/tools/ant/taskdefs/BZip2Test.java b/src/testcases/org/apache/tools/ant/taskdefs/BZip2Test.java index 224c3b66c..51bc76efa 100644 --- a/src/testcases/org/apache/tools/ant/taskdefs/BZip2Test.java +++ b/src/testcases/org/apache/tools/ant/taskdefs/BZip2Test.java @@ -19,7 +19,12 @@ package org.apache.tools.ant.taskdefs; import org.apache.tools.ant.BuildFileTest; import org.apache.tools.ant.util.FileUtils; +import org.apache.tools.bzip2.CBZip2InputStream; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; import java.io.IOException; /** @@ -44,9 +49,50 @@ public class BZip2Test extends BuildFileTest { public void testRealTest() throws IOException { executeTarget("realTest"); - assertTrue("File content mismatch", - FILE_UTILS.contentEquals(project.resolveFile("expected/asf-logo-huge.tar.bz2"), - project.resolveFile("asf-logo-huge.tar.bz2"))); + + // doesn't work: Depending on the compression engine used, + // compressed bytes may differ. False errors would be + // reported. + // assertTrue("File content mismatch", + // FILE_UTILS.contentEquals(project.resolveFile("expected/asf-logo-huge.tar.bz2"), + // project.resolveFile("asf-logo-huge.tar.bz2"))); + + // We have to compare the decompressed content instead: + + File originalFile = + project.resolveFile("expected/asf-logo-huge.tar.bz2"); + File actualFile = project.resolveFile("asf-logo-huge.tar.bz2"); + + InputStream originalIn = + new BufferedInputStream(new FileInputStream(originalFile)); + assertEquals((byte) 'B', originalIn.read()); + assertEquals((byte) 'Z', originalIn.read()); + + InputStream actualIn = + new BufferedInputStream(new FileInputStream(actualFile)); + assertEquals((byte) 'B', actualIn.read()); + assertEquals((byte) 'Z', actualIn.read()); + + originalIn = new CBZip2InputStream(originalIn); + actualIn = new CBZip2InputStream(actualIn); + + while (true) { + int expected = originalIn.read(); + int actual = actualIn.read(); + if (expected >= 0) { + if (expected != actual) { + fail("File content mismatch"); + } + } else { + if (actual >= 0) { + fail("File content mismatch"); + } + break; + } + } + + originalIn.close(); + actualIn.close(); } public void testDateCheck(){