diff --git a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java index 573efffa5..e7c3ff55c 100644 --- a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java +++ b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java @@ -1,10 +1,9 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2001-2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -30,673 +29,338 @@ import java.io.IOException; /** * An output stream that compresses into the BZip2 format (without the file * header chars) into another stream. - - *
The compression requires large amounts of memory. Thus you - * should call the {@link #close() close()} method as soon as - * possible, to force CBZip2OutputStream to release the - * allocated memory.
- * - *You can shrink the amount of allocated memory and maybe raise - * the compression speed by choosing a lower blocksize, which in turn - * may cause a lower compression ratio. You can avoid unnecessary - * memory allocation by avoiding using a blocksize which is bigger - * than the size of the input.
- * - *You can compute the memory usage for compressing by the - * following formula:
- *
- * 400k + (9 * blocksize)
.
- *
- *
- * To get the memory required for decompression by {@link - * CBZip2InputStream CBZip2InputStream} use
- *
- * 65k + (5 * blocksize)
.
- *
- *
- * Memory usage by blocksize | - *||
---|---|---|
Blocksize | - *Compression memory usage |
- * Decompression memory usage |
- *
100k | - *1300k | - *565k | - *
200k | - *2200k | - *1065k | - *
300k | - *3100k | - *1565k | - *
400k | - *4000k | - *2065k | - *
500k | - *4900k | - *2565k | - *
600k | - *5800k | - *3065k | - *
700k | - *6700k | - *3565k | - *
800k | - *7600k | - *4065k | - *
900k | - *8500k | - *4565k | - *
For decompression CBZip2InputStream allocates less - * memory if the bzipped input is smaller than one block.
* - *Instances of this class are not threadsafe.
- * - ** TODO: Update to BZip2 1.0.1 - *
- * */ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { - - /** - * The minimum supported blocksize == 1. - */ - public static final int MIN_BLOCKSIZE = 1; - - /** - * The maximum supported blocksize == 9. - */ - public static final int MAX_BLOCKSIZE = 9; - - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - */ protected static final int SETMASK = (1 << 21); - - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - */ protected static final int CLEARMASK = (~SETMASK); - - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - */ protected static final int GREATER_ICOST = 15; - - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - */ protected static final int LESSER_ICOST = 0; - - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - */ protected static final int SMALL_THRESH = 20; - - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - */ protected static final int DEPTH_THRESH = 10; - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - */ - protected static final int WORK_FACTOR = 30; - - /** - * This constant is accessible by subclasses for historical purposes. - * If you don't know what it means then you don't need it. - *+ /* If you are ever unlucky/improbable enough to get a stack overflow whilst sorting, increase the following constant and try again. In practice I have never seen the stack go above 27 elems, so the following limit seems very generous. - *
- */ + */ protected static final int QSORT_STACK_SIZE = 1000; - /** - * Knuth's increments seem to work better than Incerpi-Sedgewick - * here. Possibly because the number of elems to sort is usually - * small, typically <= 20. - */ - private static final int[] INCS = { - 1, - 4, - 13, - 40, - 121, - 364, - 1093, - 3280, - 9841, - 29524, - 88573, - 265720, - 797161, - 2391484 - }; - - /** - * This method is accessible by subclasses for historical purposes. - * If you don't know what it does then you don't need it. - */ - protected static void hbMakeCodeLengths(char[] len, int[] freq, - int alphaSize, int maxLen) { - /* - Nodes and heap entries run from 1. Entry 0 - for both the heap and nodes is a sentinel. - */ - final int[] heap = new int[MAX_ALPHA_SIZE * 2]; - final int[] weight = new int[MAX_ALPHA_SIZE * 2]; - final int[] parent = new int[MAX_ALPHA_SIZE * 2]; - - for (int i = alphaSize; --i >= 0;) { - weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; - } - - for (boolean tooLong = true; tooLong;) { - tooLong = false; - - int nNodes = alphaSize; - int nHeap = 0; - heap[0] = 0; - weight[0] = 0; - parent[0] = -2; - - for (int i = 1; i <= alphaSize; i++) { - parent[i] = -1; - nHeap++; - heap[nHeap] = i; - - int zz = nHeap; - int tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; - } - - // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap; - - while (nHeap > 1) { - int n1 = heap[1]; - heap[1] = heap[nHeap]; - nHeap--; - - int yy = 0; - int zz = 1; - int tmp = heap[1]; - - while (true) { - yy = zz << 1; - - if (yy > nHeap) { - break; - } - - if ((yy < nHeap) - && (weight[heap[yy + 1]] < weight[heap[yy]])) { - yy++; - } - - if (weight[tmp] < weight[heap[yy]]) { - break; - } - - heap[zz] = heap[yy]; - zz = yy; - } - - heap[zz] = tmp; - - int n2 = heap[1]; - heap[1] = heap[nHeap]; - nHeap--; - - yy = 0; - zz = 1; - tmp = heap[1]; - - while (true) { - yy = zz << 1; - - if (yy > nHeap) { - break; - } - - if ((yy < nHeap) - && (weight[heap[yy + 1]] < weight[heap[yy]])) { - yy++; - } - - if (weight[tmp] < weight[heap[yy]]) { - break; - } - - heap[zz] = heap[yy]; - zz = yy; - } - - heap[zz] = tmp; - nNodes++; - parent[n1] = parent[n2] = nNodes; - - final int weight_n1 = weight[n1]; - final int weight_n2 = weight[n2]; - weight[nNodes] = (((weight_n1 & 0xffffff00) - + (weight_n2 & 0xffffff00)) - | (1 + (((weight_n1 & 0x000000ff) - > (weight_n2 & 0x000000ff)) - ? (weight_n1 & 0x000000ff) - : (weight_n2 & 0x000000ff)))); - - parent[nNodes] = -1; - nHeap++; - heap[nHeap] = nNodes; - - tmp = 0; - zz = nHeap; - tmp = heap[zz]; - final int weight_tmp = weight[tmp]; - while (weight_tmp < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; - - } - - // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes; - - for (int i = 1; i <= alphaSize; i++) { - int j = 0; - int k = i; - - for (int parent_k; (parent_k = parent[k]) >= 0;) { - k = parent_k; - j++; - } - - len[i - 1] = (char) j; - if (j > maxLen) { - tooLong = true; - } - } + private static void panic() { + System.out.println("panic"); + //throw new CError(); + } - if (tooLong) { - for (int i = 1; i < alphaSize; i++) { - int j = weight[i] >> 8; - j = 1 + (j >> 1); - weight[i] = j << 8; - } + private void makeMaps() { + int i; + nInUse = 0; + for (i = 0; i < 256; i++) { + if (inUse[i]) { + seqToUnseq[nInUse] = (char) i; + unseqToSeq[i] = (char) nInUse; + nInUse++; } } } - private static void hbMakeCodeLengths(final byte[] len, final int[] freq, - final Data dat, final int alphaSize, - final int maxLen) { + protected static void hbMakeCodeLengths(char[] len, int[] freq, + int alphaSize, int maxLen) { /* Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel. */ - final int[] heap = dat.heap; - final int[] weight = dat.weight; - final int[] parent = dat.parent; + int nNodes, nHeap, n1, n2, i, j, k; + boolean tooLong; + + int[] heap = new int[MAX_ALPHA_SIZE + 2]; + int[] weight = new int[MAX_ALPHA_SIZE * 2]; + int[] parent = new int[MAX_ALPHA_SIZE * 2]; - for (int i = alphaSize; --i >= 0;) { + for (i = 0; i < alphaSize; i++) { weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; } - for (boolean tooLong = true; tooLong;) { - tooLong = false; + while (true) { + nNodes = alphaSize; + nHeap = 0; - int nNodes = alphaSize; - int nHeap = 0; heap[0] = 0; weight[0] = 0; parent[0] = -2; - for (int i = 1; i <= alphaSize; i++) { + for (i = 1; i <= alphaSize; i++) { parent[i] = -1; nHeap++; heap[nHeap] = i; - - int zz = nHeap; - int tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; + { + int zz, tmp; + zz = nHeap; + tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; } - heap[zz] = tmp; + } + if (!(nHeap < (MAX_ALPHA_SIZE + 2))) { + panic(); } while (nHeap > 1) { - int n1 = heap[1]; + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; - - int yy = 0; - int zz = 1; - int tmp = heap[1]; - - while (true) { - yy = zz << 1; - - if (yy > nHeap) { - break; - } - - if ((yy < nHeap) - && (weight[heap[yy + 1]] < weight[heap[yy]])) { - yy++; - } - - if (weight[tmp] < weight[heap[yy]]) { - break; + { + int zz = 0, yy = 0, tmp = 0; + zz = 1; + tmp = heap[zz]; + while (true) { + yy = zz << 1; + if (yy > nHeap) { + break; + } + if (yy < nHeap + && weight[heap[yy + 1]] < weight[heap[yy]]) { + yy++; + } + if (weight[tmp] < weight[heap[yy]]) { + break; + } + heap[zz] = heap[yy]; + zz = yy; } - - heap[zz] = heap[yy]; - zz = yy; + heap[zz] = tmp; } - - heap[zz] = tmp; - - int n2 = heap[1]; + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; - - yy = 0; - zz = 1; - tmp = heap[1]; - - while (true) { - yy = zz << 1; - - if (yy > nHeap) { - break; - } - - if ((yy < nHeap) - && (weight[heap[yy + 1]] < weight[heap[yy]])) { - yy++; - } - - if (weight[tmp] < weight[heap[yy]]) { - break; + { + int zz = 0, yy = 0, tmp = 0; + zz = 1; + tmp = heap[zz]; + while (true) { + yy = zz << 1; + if (yy > nHeap) { + break; + } + if (yy < nHeap + && weight[heap[yy + 1]] < weight[heap[yy]]) { + yy++; + } + if (weight[tmp] < weight[heap[yy]]) { + break; + } + heap[zz] = heap[yy]; + zz = yy; } - - heap[zz] = heap[yy]; - zz = yy; + heap[zz] = tmp; } - - heap[zz] = tmp; nNodes++; parent[n1] = parent[n2] = nNodes; - final int weight_n1 = weight[n1]; - final int weight_n2 = weight[n2]; - weight[nNodes] = ((weight_n1 & 0xffffff00) - + (weight_n2 & 0xffffff00)) - | (1 + (((weight_n1 & 0x000000ff) - > (weight_n2 & 0x000000ff)) - ? (weight_n1 & 0x000000ff) - : (weight_n2 & 0x000000ff))); + weight[nNodes] = ((weight[n1] & 0xffffff00) + + (weight[n2] & 0xffffff00)) + | (1 + (((weight[n1] & 0x000000ff) + > (weight[n2] & 0x000000ff)) + ? (weight[n1] & 0x000000ff) + : (weight[n2] & 0x000000ff))); parent[nNodes] = -1; nHeap++; heap[nHeap] = nNodes; - - tmp = 0; - zz = nHeap; - tmp = heap[zz]; - final int weight_tmp = weight[tmp]; - while (weight_tmp < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; + { + int zz = 0, tmp = 0; + zz = nHeap; + tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; } - heap[zz] = tmp; - + } + if (!(nNodes < (MAX_ALPHA_SIZE * 2))) { + panic(); } - for (int i = 1; i <= alphaSize; i++) { - int j = 0; - int k = i; - - for (int parent_k; (parent_k = parent[k]) >= 0;) { - k = parent_k; + tooLong = false; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { + k = parent[k]; j++; } - - len[i - 1] = (byte) j; + len[i - 1] = (char) j; if (j > maxLen) { tooLong = true; } } - if (tooLong) { - for (int i = 1; i < alphaSize; i++) { - int j = weight[i] >> 8; - j = 1 + (j >> 1); - weight[i] = j << 8; - } + if (!tooLong) { + break; + } + + for (i = 1; i < alphaSize; i++) { + j = weight[i] >> 8; + j = 1 + (j / 2); + weight[i] = j << 8; } } } - /** - Index of the last char in the block, so + /* + index of the last char in the block, so the block size == last + 1. */ - private int last; + int last; - /** - * Index in fmap[] of original string after sorting. - */ - private int origPtr; + /* + index in zptr[] of original string after sorting. + */ + int origPtr; - /** - Always: in the range 0 .. 9. - The current block size is 100000 * this number. - */ - private final int blockSize100k; + /* + always: in the range 0 .. 9. + The current block size is 100000 * this number. + */ + int blockSize100k; - private boolean blockRandomised; + boolean blockRandomised; - private int bsBuff; - private int bsLive; - private final CRC crc = new CRC(); + int bytesOut; + int bsBuff; + int bsLive; + CRC mCrc = new CRC(); + private boolean[] inUse = new boolean[256]; private int nInUse; + private char[] seqToUnseq = new char[256]; + private char[] unseqToSeq = new char[256]; + + private char[] selector = new char[MAX_SELECTORS]; + private char[] selectorMtf = new char[MAX_SELECTORS]; + + private char[] block; + private int[] quadrant; + private int[] zptr; + private short[] szptr; + private int[] ftab; + private int nMTF; + private int[] mtfFreq = new int[MAX_ALPHA_SIZE]; + /* * Used when sorting. If too many long comparisons * happen, we stop sorting, randomise the block * slightly, and try again. */ + private int workFactor; private int workDone; private int workLimit; private boolean firstAttempt; + private int nBlocksRandomised; private int currentChar = -1; private int runLength = 0; - private int blockCRC; - private int combinedCRC; - private int allowableBlockSize; - - /** - * All memory intensive stuff. - */ - private CBZip2OutputStream.Data data; + public CBZip2OutputStream(OutputStream inStream) throws IOException { + this(inStream, 9); + } - private OutputStream out; + public CBZip2OutputStream(OutputStream inStream, int inBlockSize) + throws IOException { + block = null; + quadrant = null; + zptr = null; + ftab = null; - /** - * Chooses a blocksize based on the given length of the data to compress. - * - * @return - * The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} - * both inclusive. For a negative inputLength this method returns - * MAX_BLOCKSIZE always. - * - * @param inputLength - * The length of the data which will be compressed by - * CBZip2OutputStream. - */ - public static int chooseBlockSize(long inputLength) { - return (inputLength > 0) - ? (int) Math.min((inputLength / 132000) + 1, 9) - : MAX_BLOCKSIZE; - } + bsSetStream(inStream); - /** - * Constructs a new CBZip2OutputStream with a blocksize of 900k. - * - *Attention: The caller is resonsible to write the two - * BZip2 magic bytes "BZ" to the specified stream prior - * to calling this constructor.
- * - * @param out * the destination stream. - * - * @throws IOException - * if an I/O error occurs in the specified stream. - * @throws NullPointerException - * ifout == null
.
- */
- public CBZip2OutputStream(final OutputStream out) throws IOException {
- this(out, MAX_BLOCKSIZE);
+ workFactor = 50;
+ if (inBlockSize > 9) {
+ inBlockSize = 9;
+ }
+ if (inBlockSize < 1) {
+ inBlockSize = 1;
+ }
+ blockSize100k = inBlockSize;
+ allocateCompressStructures();
+ initialize();
+ initBlock();
}
/**
- * Constructs a new CBZip2OutputStream with specified blocksize.
- *
- * Attention: The caller is resonsible to write the two - * BZip2 magic bytes "BZ" to the specified stream prior - * to calling this constructor.
- * - * - * @param out - * the destination stream. - * @param blockSize - * the blockSize as 100k units. * - * @throws IOException - * if an I/O error occurs in the specified stream. - * @throws IllegalArgumentException - * if(blockSize < 1) || (blockSize > 9)
.
- * @throws NullPointerException
- * if out == null
.
+ * modified by Oliver Merkel, 010128
*
- * @see #MIN_BLOCKSIZE
- * @see #MAX_BLOCKSIZE
*/
- public CBZip2OutputStream(final OutputStream out, final int blockSize)
- throws IOException {
- super();
-
- if (blockSize < 1) {
- throw new IllegalArgumentException("blockSize(" + blockSize
- + ") < 1");
- }
- if (blockSize > 9) {
- throw new IllegalArgumentException("blockSize(" + blockSize
- + ") > 9");
- }
-
- this.blockSize100k = blockSize;
- this.out = out;
- init();
- }
-
- public void write(final int b) throws IOException {
- if (this.out != null) {
- write0(b);
+ public void write(int bv) throws IOException {
+ int b = (256 + bv) % 256;
+ if (currentChar != -1) {
+ if (currentChar == b) {
+ runLength++;
+ if (runLength > 254) {
+ writeRun();
+ currentChar = -1;
+ runLength = 0;
+ }
+ } else {
+ writeRun();
+ runLength = 1;
+ currentChar = b;
+ }
} else {
- throw new IOException("closed");
+ currentChar = b;
+ runLength++;
}
}
private void writeRun() throws IOException {
- final int lastShadow = this.last;
-
- if (lastShadow < this.allowableBlockSize) {
- final int currentCharShadow = this.currentChar;
- final Data dataShadow = this.data;
- dataShadow.inUse[currentCharShadow] = true;
- final byte ch = (byte) currentCharShadow;
-
- int runLengthShadow = this.runLength;
- this.crc.updateCRC(currentCharShadow, runLengthShadow);
-
- switch (runLengthShadow) {
+ if (last < allowableBlockSize) {
+ inUse[currentChar] = true;
+ for (int i = 0; i < runLength; i++) {
+ mCrc.updateCRC((char) currentChar);
+ }
+ switch (runLength) {
case 1:
- dataShadow.block[lastShadow + 2] = ch;
- this.last = lastShadow + 1;
+ last++;
+ block[last + 1] = (char) currentChar;
break;
-
case 2:
- dataShadow.block[lastShadow + 2] = ch;
- dataShadow.block[lastShadow + 3] = ch;
- this.last = lastShadow + 2;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
break;
-
case 3:
- {
- final byte[] block = dataShadow.block;
- block[lastShadow + 2] = ch;
- block[lastShadow + 3] = ch;
- block[lastShadow + 4] = ch;
- this.last = lastShadow + 3;
- }
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
break;
-
default:
- {
- runLengthShadow -= 4;
- dataShadow.inUse[runLengthShadow] = true;
- final byte[] block = dataShadow.block;
- block[lastShadow + 2] = ch;
- block[lastShadow + 3] = ch;
- block[lastShadow + 4] = ch;
- block[lastShadow + 5] = ch;
- block[lastShadow + 6] = (byte) runLengthShadow;
- this.last = lastShadow + 5;
- }
+ inUse[runLength - 4] = true;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) (runLength - 4);
break;
-
}
} else {
endBlock();
@@ -705,84 +369,72 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
}
}
- /**
- * Overriden to close the stream.
- */
+ boolean closed = false;
+
protected void finalize() throws Throwable {
close();
super.finalize();
}
public void close() throws IOException {
- OutputStream outShadow = this.out;
- if (outShadow != null) {
- try {
- if (this.runLength > 0) {
- writeRun();
- }
- this.currentChar = -1;
- endBlock();
- endCompression();
- outShadow.close();
- } finally {
- this.out = null;
- this.data = null;
- }
+ if (closed) {
+ return;
}
+
+ if (runLength > 0) {
+ writeRun();
+ }
+ currentChar = -1;
+ endBlock();
+ endCompression();
+ closed = true;
+ super.close();
+ bsStream.close();
}
public void flush() throws IOException {
- OutputStream outShadow = this.out;
- if (outShadow != null) {
- outShadow.flush();
- }
+ super.flush();
+ bsStream.flush();
}
- private void init() throws IOException {
- // write magic: done by caller who created this stream
- //this.out.write('B');
- //this.out.write('Z');
+ private int blockCRC, combinedCRC;
- this.data = new Data(this.blockSize100k);
+ private void initialize() throws IOException {
+ bytesOut = 0;
+ nBlocksRandomised = 0;
/* Write `magic' bytes h indicating file-format == huffmanised,
followed by a digit indicating blockSize100k.
*/
- bsPutUByte('h');
- bsPutUByte('0' + this.blockSize100k);
+ bsPutUChar('h');
+ bsPutUChar('0' + blockSize100k);
- this.combinedCRC = 0;
- initBlock();
+ combinedCRC = 0;
}
+ private int allowableBlockSize;
+
private void initBlock() {
// blockNo++;
- this.crc.initialiseCRC();
- this.last = -1;
+ mCrc.initialiseCRC();
+ last = -1;
// ch = 0;
- boolean[] inUse = this.data.inUse;
- for (int i = 256; --i >= 0;) {
+ for (int i = 0; i < 256; i++) {
inUse[i] = false;
}
/* 20 is just a paranoia constant */
- this.allowableBlockSize
- = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20;
+ allowableBlockSize = baseBlockSize * blockSize100k - 20;
}
private void endBlock() throws IOException {
- this.blockCRC = this.crc.getFinalCRC();
- this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
- this.combinedCRC ^= this.blockCRC;
-
- // empty block at end of file
- if (this.last == -1) {
- return;
- }
+ blockCRC = mCrc.getFinalCRC();
+ combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31);
+ combinedCRC ^= blockCRC;
/* sort the block and establish posn of original string */
- blockSort();
+ doReversibleTransformation();
/*
A 6-byte block header, the value chosen arbitrarily
@@ -797,19 +449,20 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
They are only important when trying to recover blocks from
damaged files.
*/
- bsPutUByte(0x31);
- bsPutUByte(0x41);
- bsPutUByte(0x59);
- bsPutUByte(0x26);
- bsPutUByte(0x53);
- bsPutUByte(0x59);
+ bsPutUChar(0x31);
+ bsPutUChar(0x41);
+ bsPutUChar(0x59);
+ bsPutUChar(0x26);
+ bsPutUChar(0x53);
+ bsPutUChar(0x59);
/* Now the block's CRC, so it is in a known place. */
- bsPutInt(this.blockCRC);
+ bsPutint(blockCRC);
/* Now a single bit indicating randomisation. */
- if (this.blockRandomised) {
+ if (blockRandomised) {
bsW(1, 1);
+ nBlocksRandomised++;
} else {
bsW(1, 0);
}
@@ -826,276 +479,214 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
too much repetition -- 27 18 28 18 28 46 -- for me
to feel statistically comfortable. Call me paranoid.)
*/
- bsPutUByte(0x17);
- bsPutUByte(0x72);
- bsPutUByte(0x45);
- bsPutUByte(0x38);
- bsPutUByte(0x50);
- bsPutUByte(0x90);
-
- bsPutInt(this.combinedCRC);
- bsFinishedWithStream();
- }
+ bsPutUChar(0x17);
+ bsPutUChar(0x72);
+ bsPutUChar(0x45);
+ bsPutUChar(0x38);
+ bsPutUChar(0x50);
+ bsPutUChar(0x90);
- /**
- * Returns the blocksize parameter specified at construction time.
- */
- public final int getBlockSize() {
- return this.blockSize100k;
- }
+ bsPutint(combinedCRC);
- public void write(final byte[] buf, int offs, final int len)
- throws IOException {
- if (offs < 0) {
- throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
- }
- if (len < 0) {
- throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
- }
- if (offs + len > buf.length) {
- throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
- + len + ") > buf.length("
- + buf.length + ").");
- }
- if (this.out == null) {
- throw new IOException("stream closed");
- }
-
- for (int hi = offs + len; offs < hi;) {
- write0(buf[offs++]);
- }
+ bsFinishedWithStream();
}
- private void write0(int b) throws IOException {
- if (this.currentChar != -1) {
- b &= 0xff;
- if (this.currentChar == b) {
- if (++this.runLength > 254) {
- writeRun();
- this.currentChar = -1;
- this.runLength = 0;
- }
- // else nothing to do
- } else {
- writeRun();
- this.runLength = 1;
- this.currentChar = b;
- }
- } else {
- this.currentChar = b & 0xff;
- this.runLength++;
- }
- }
+ private void hbAssignCodes (int[] code, char[] length, int minLen,
+ int maxLen, int alphaSize) {
+ int n, vec, i;
- private static void hbAssignCodes(final int[] code, final byte[] length,
- final int minLen, final int maxLen,
- final int alphaSize) {
- int vec = 0;
- for (int n = minLen; n <= maxLen; n++) {
- for (int i = 0; i < alphaSize; i++) {
- if ((length[i] & 0xff) == n) {
+ vec = 0;
+ for (n = minLen; n <= maxLen; n++) {
+ for (i = 0; i < alphaSize; i++) {
+ if (length[i] == n) {
code[i] = vec;
vec++;
}
- }
+ };
vec <<= 1;
}
}
+ private void bsSetStream(OutputStream f) {
+ bsStream = f;
+ bsLive = 0;
+ bsBuff = 0;
+ bytesOut = 0;
+ }
+
private void bsFinishedWithStream() throws IOException {
- while (this.bsLive > 0) {
- int ch = this.bsBuff >> 24;
- this.out.write(ch); // write 8-bit
- this.bsBuff <<= 8;
- this.bsLive -= 8;
+ while (bsLive > 0) {
+ int ch = (bsBuff >> 24);
+ try {
+ bsStream.write(ch); // write 8-bit
+ } catch (IOException e) {
+ throw e;
+ }
+ bsBuff <<= 8;
+ bsLive -= 8;
+ bytesOut++;
}
}
- private void bsW(final int n, final int v) throws IOException {
- final OutputStream outShadow = this.out;
- int bsLiveShadow = this.bsLive;
- int bsBuffShadow = this.bsBuff;
-
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24); // write 8-bit
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
+ private void bsW(int n, int v) throws IOException {
+ while (bsLive >= 8) {
+ int ch = (bsBuff >> 24);
+ try {
+ bsStream.write(ch); // write 8-bit
+ } catch (IOException e) {
+ throw e;
+ }
+ bsBuff <<= 8;
+ bsLive -= 8;
+ bytesOut++;
}
-
- this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
- this.bsLive = bsLiveShadow + n;
+ bsBuff |= (v << (32 - bsLive - n));
+ bsLive += n;
}
- private void bsPutUByte(final int c) throws IOException {
+ private void bsPutUChar(int c) throws IOException {
bsW(8, c);
}
- private void bsPutInt(final int u) throws IOException {
+ private void bsPutint(int u) throws IOException {
bsW(8, (u >> 24) & 0xff);
bsW(8, (u >> 16) & 0xff);
bsW(8, (u >> 8) & 0xff);
bsW(8, u & 0xff);
}
+ private void bsPutIntVS(int numBits, int c) throws IOException {
+ bsW(numBits, c);
+ }
+
private void sendMTFValues() throws IOException {
- final byte[][] len = this.data.sendMTFValues_len;
- final int alphaSize = this.nInUse + 2;
+ char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
- for (int t = N_GROUPS; --t >= 0;) {
- byte[] len_t = len[t];
- for (int v = alphaSize; --v >= 0;) {
- len_t[v] = GREATER_ICOST;
+ int v, t, i, j, gs, ge, totc, bt, bc, iter;
+ int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
+ int nGroups, nBytes;
+
+ alphaSize = nInUse + 2;
+ for (t = 0; t < N_GROUPS; t++) {
+ for (v = 0; v < alphaSize; v++) {
+ len[t][v] = (char) GREATER_ICOST;
}
}
/* Decide how many coding tables to use */
- // assert (this.nMTF > 0) : this.nMTF;
- final int nGroups =
- (this.nMTF < 200) ? 2
- : (this.nMTF < 600) ? 3
- : (this.nMTF < 1200) ? 4
- : (this.nMTF < 2400) ? 5
- : 6;
-
- /* Generate an initial set of coding tables */
- sendMTFValues0(nGroups, alphaSize);
-
- /*
- Iterate up to N_ITERS times to improve the tables.
- */
- final int nSelectors = sendMTFValues1(nGroups, alphaSize);
-
- /* Compute MTF values for the selectors. */
- sendMTFValues2(nGroups, nSelectors);
-
- /* Assign actual codes for the tables. */
- sendMTFValues3(nGroups, alphaSize);
-
- /* Transmit the mapping table. */
- sendMTFValues4();
-
- /* Now the selectors. */
- sendMTFValues5(nGroups, nSelectors);
-
- /* Now the coding tables. */
- sendMTFValues6(nGroups, alphaSize);
-
- /* And finally, the block data proper */
- sendMTFValues7(nSelectors);
- }
-
- private void sendMTFValues0(final int nGroups, final int alphaSize) {
- final byte[][] len = this.data.sendMTFValues_len;
- final int[] mtfFreq = this.data.mtfFreq;
-
- int remF = this.nMTF;
- int gs = 0;
+ if (nMTF <= 0) {
+ panic();
+ }
- for (int nPart = nGroups; nPart > 0; nPart--) {
- final int tFreq = remF / nPart;
- int ge = gs - 1;
- int aFreq = 0;
+ if (nMTF < 200) {
+ nGroups = 2;
+ } else if (nMTF < 600) {
+ nGroups = 3;
+ } else if (nMTF < 1200) {
+ nGroups = 4;
+ } else if (nMTF < 2400) {
+ nGroups = 5;
+ } else {
+ nGroups = 6;
+ }
- for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
- aFreq += mtfFreq[++ge];
- }
+ /* Generate an initial set of coding tables */ {
+ int nPart, remF, tFreq, aFreq;
+
+ nPart = nGroups;
+ remF = nMTF;
+ gs = 0;
+ while (nPart > 0) {
+ tFreq = remF / nPart;
+ ge = gs - 1;
+ aFreq = 0;
+ while (aFreq < tFreq && ge < alphaSize - 1) {
+ ge++;
+ aFreq += mtfFreq[ge];
+ }
- if ((ge > gs)
- && (nPart != nGroups)
- && (nPart != 1)
- && (((nGroups - nPart) & 1) != 0)) {
- aFreq -= mtfFreq[ge--];
- }
+ if (ge > gs && nPart != nGroups && nPart != 1
+ && ((nGroups - nPart) % 2 == 1)) {
+ aFreq -= mtfFreq[ge];
+ ge--;
+ }
- final byte[] len_np = len[nPart - 1];
- for (int v = alphaSize; --v >= 0;) {
- if ((v >= gs) && (v <= ge)) {
- len_np[v] = LESSER_ICOST;
- } else {
- len_np[v] = GREATER_ICOST;
+ for (v = 0; v < alphaSize; v++) {
+ if (v >= gs && v <= ge) {
+ len[nPart - 1][v] = (char) LESSER_ICOST;
+ } else {
+ len[nPart - 1][v] = (char) GREATER_ICOST;
+ }
}
- }
- gs = ge + 1;
- remF -= aFreq;
+ nPart--;
+ gs = ge + 1;
+ remF -= aFreq;
+ }
}
- }
- private int sendMTFValues1(final int nGroups, final int alphaSize) {
- final Data dataShadow = this.data;
- final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
- final int[] fave = dataShadow.sendMTFValues_fave;
- final short[] cost = dataShadow.sendMTFValues_cost;
- final char[] sfmap = dataShadow.sfmap;
- final byte[] selector = dataShadow.selector;
- final byte[][] len = dataShadow.sendMTFValues_len;
- final byte[] len_0 = len[0];
- final byte[] len_1 = len[1];
- final byte[] len_2 = len[2];
- final byte[] len_3 = len[3];
- final byte[] len_4 = len[4];
- final byte[] len_5 = len[5];
- final int nMTFShadow = this.nMTF;
-
- int nSelectors = 0;
-
- for (int iter = 0; iter < N_ITERS; iter++) {
- for (int t = nGroups; --t >= 0;) {
+ int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE];
+ int[] fave = new int[N_GROUPS];
+ short[] cost = new short[N_GROUPS];
+ /*
+ Iterate up to N_ITERS times to improve the tables.
+ */
+ for (iter = 0; iter < N_ITERS; iter++) {
+ for (t = 0; t < nGroups; t++) {
fave[t] = 0;
- int[] rfreqt = rfreq[t];
- for (int i = alphaSize; --i >= 0;) {
- rfreqt[i] = 0;
+ }
+
+ for (t = 0; t < nGroups; t++) {
+ for (v = 0; v < alphaSize; v++) {
+ rfreq[t][v] = 0;
}
}
nSelectors = 0;
+ totc = 0;
+ gs = 0;
+ while (true) {
- for (int gs = 0; gs < this.nMTF;) {
/* Set group start & end marks. */
+ if (gs >= nMTF) {
+ break;
+ }
+ ge = gs + G_SIZE - 1;
+ if (ge >= nMTF) {
+ ge = nMTF - 1;
+ }
/*
Calculate the cost of this group as coded
by each of the coding tables.
*/
+ for (t = 0; t < nGroups; t++) {
+ cost[t] = 0;
+ }
- final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
-
- if (nGroups == N_GROUPS) {
- // unrolled version of the else-block
-
- short cost0 = 0;
- short cost1 = 0;
- short cost2 = 0;
- short cost3 = 0;
- short cost4 = 0;
- short cost5 = 0;
-
- for (int i = gs; i <= ge; i++) {
- final int icv = sfmap[i];
- cost0 += len_0[icv] & 0xff;
- cost1 += len_1[icv] & 0xff;
- cost2 += len_2[icv] & 0xff;
- cost3 += len_3[icv] & 0xff;
- cost4 += len_4[icv] & 0xff;
- cost5 += len_5[icv] & 0xff;
+ if (nGroups == 6) {
+ short cost0, cost1, cost2, cost3, cost4, cost5;
+ cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
+ for (i = gs; i <= ge; i++) {
+ short icv = szptr[i];
+ cost0 += len[0][icv];
+ cost1 += len[1][icv];
+ cost2 += len[2][icv];
+ cost3 += len[3][icv];
+ cost4 += len[4][icv];
+ cost5 += len[5][icv];
}
-
cost[0] = cost0;
cost[1] = cost1;
cost[2] = cost2;
cost[3] = cost3;
cost[4] = cost4;
cost[5] = cost5;
-
} else {
- for (int t = nGroups; --t >= 0;) {
- cost[t] = 0;
- }
-
- for (int i = gs; i <= ge; i++) {
- final int icv = sfmap[i];
- for (int t = nGroups; --t >= 0;) {
- cost[t] += len[t][icv] & 0xff;
+ for (i = gs; i <= ge; i++) {
+ short icv = szptr[i];
+ for (t = 0; t < nGroups; t++) {
+ cost[t] += len[t][icv];
}
}
}
@@ -1104,25 +695,24 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
Find the coding table which is best for this group,
and record its identity in the selector table.
*/
- int bt = -1;
- for (int t = nGroups, bc = 999999999; --t >= 0;) {
- final int cost_t = cost[t];
- if (cost_t < bc) {
- bc = cost_t;
+ bc = 999999999;
+ bt = -1;
+ for (t = 0; t < nGroups; t++) {
+ if (cost[t] < bc) {
+ bc = cost[t];
bt = t;
}
- }
-
+ };
+ totc += bc;
fave[bt]++;
- selector[nSelectors] = (byte) bt;
+ selector[nSelectors] = (char) bt;
nSelectors++;
/*
Increment the symbol frequencies for the selected table.
*/
- final int[] rfreq_bt = rfreq[bt];
- for (int i = gs; i <= ge; i++) {
- rfreq_bt[sfmap[i]]++;
+ for (i = gs; i <= ge; i++) {
+ rfreq[bt][szptr[i]]++;
}
gs = ge + 1;
@@ -1131,861 +721,850 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
/*
Recompute the tables based on the accumulated frequencies.
*/
- for (int t = 0; t < nGroups; t++) {
- hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
+ for (t = 0; t < nGroups; t++) {
+ hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
}
}
- return nSelectors;
- }
-
- private void sendMTFValues2(final int nGroups, final int nSelectors) {
- // assert (nGroups < 8) : nGroups;
-
- final Data dataShadow = this.data;
- byte[] pos = dataShadow.sendMTFValues2_pos;
+ rfreq = null;
+ fave = null;
+ cost = null;
- for (int i = nGroups; --i >= 0;) {
- pos[i] = (byte) i;
+ if (!(nGroups < 8)) {
+ panic();
+ }
+ if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) {
+ panic();
}
- for (int i = 0; i < nSelectors; i++) {
- final byte ll_i = dataShadow.selector[i];
- byte tmp = pos[0];
- int j = 0;
- while (ll_i != tmp) {
- j++;
- byte tmp2 = tmp;
- tmp = pos[j];
- pos[j] = tmp2;
+ /* Compute MTF values for the selectors. */
+ {
+ char[] pos = new char[N_GROUPS];
+ char ll_i, tmp2, tmp;
+ for (i = 0; i < nGroups; i++) {
+ pos[i] = (char) i;
}
-
- pos[0] = tmp;
- dataShadow.selectorMtf[i] = (byte) j;
- }
- }
-
- private void sendMTFValues3(final int nGroups, final int alphaSize) {
- int[][] code = this.data.sendMTFValues_code;
- byte[][] len = this.data.sendMTFValues_len;
-
- for (int t = 0; t < nGroups; t++) {
- int minLen = 32;
- int maxLen = 0;
- final byte[] len_t = len[t];
- for (int i = alphaSize; --i >= 0;) {
- final int l = len_t[i] & 0xff;
- if (l > maxLen) {
- maxLen = l;
- }
- if (l < minLen) {
- minLen = l;
+ for (i = 0; i < nSelectors; i++) {
+ ll_i = selector[i];
+ j = 0;
+ tmp = pos[j];
+ while (ll_i != tmp) {
+ j++;
+ tmp2 = tmp;
+ tmp = pos[j];
+ pos[j] = tmp2;
}
+ pos[0] = tmp;
+ selectorMtf[i] = (char) j;
}
-
- // assert (maxLen <= 20) : maxLen;
- // assert (minLen >= 1) : minLen;
-
- hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
}
- }
- private void sendMTFValues4() throws IOException {
- final boolean[] inUse = this.data.inUse;
- final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
+ int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE];
- for (int i = 16; --i >= 0;) {
- inUse16[i] = false;
- final int i16 = i * 16;
- for (int j = 16; --j >= 0;) {
- if (inUse[i16 + j]) {
- inUse16[i] = true;
+ /* Assign actual codes for the tables. */
+ for (t = 0; t < nGroups; t++) {
+ minLen = 32;
+ maxLen = 0;
+ for (i = 0; i < alphaSize; i++) {
+ if (len[t][i] > maxLen) {
+ maxLen = len[t][i];
+ }
+ if (len[t][i] < minLen) {
+ minLen = len[t][i];
}
}
+ if (maxLen > 20) {
+ panic();
+ }
+ if (minLen < 1) {
+ panic();
+ }
+ hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
}
- for (int i = 0; i < 16; i++) {
- bsW(1, inUse16[i] ? 1 : 0);
- }
-
- final OutputStream outShadow = this.out;
- int bsLiveShadow = this.bsLive;
- int bsBuffShadow = this.bsBuff;
-
- for (int i = 0; i < 16; i++) {
- if (inUse16[i]) {
- final int i16 = i * 16;
- for (int j = 0; j < 16; j++) {
- // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24); // write 8-bit
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
- }
- if (inUse[i16 + j]) {
- bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
+ /* Transmit the mapping table. */
+ {
+ boolean[] inUse16 = new boolean[16];
+ for (i = 0; i < 16; i++) {
+ inUse16[i] = false;
+ for (j = 0; j < 16; j++) {
+ if (inUse[i * 16 + j]) {
+ inUse16[i] = true;
}
- bsLiveShadow++;
}
}
- }
-
- this.bsBuff = bsBuffShadow;
- this.bsLive = bsLiveShadow;
- }
- private void sendMTFValues5(final int nGroups, final int nSelectors)
- throws IOException {
- bsW(3, nGroups);
- bsW(15, nSelectors);
-
- final OutputStream outShadow = this.out;
- final byte[] selectorMtf = this.data.selectorMtf;
-
- int bsLiveShadow = this.bsLive;
- int bsBuffShadow = this.bsBuff;
-
- for (int i = 0; i < nSelectors; i++) {
- for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
- // inlined: bsW(1, 1);
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24);
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
+ nBytes = bytesOut;
+ for (i = 0; i < 16; i++) {
+ if (inUse16[i]) {
+ bsW(1, 1);
+ } else {
+ bsW(1, 0);
}
- bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
- bsLiveShadow++;
}
- // inlined: bsW(1, 0);
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24);
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
+ for (i = 0; i < 16; i++) {
+ if (inUse16[i]) {
+ for (j = 0; j < 16; j++) {
+ if (inUse[i * 16 + j]) {
+ bsW(1, 1);
+ } else {
+ bsW(1, 0);
+ }
+ }
+ }
}
- //bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
- bsLiveShadow++;
- }
-
- this.bsBuff = bsBuffShadow;
- this.bsLive = bsLiveShadow;
- }
-
- private void sendMTFValues6(final int nGroups, final int alphaSize)
- throws IOException {
- final byte[][] len = this.data.sendMTFValues_len;
- final OutputStream outShadow = this.out;
- int bsLiveShadow = this.bsLive;
- int bsBuffShadow = this.bsBuff;
-
- for (int t = 0; t < nGroups; t++) {
- byte[] len_t = len[t];
- int curr = len_t[0] & 0xff;
+ }
- // inlined: bsW(5, curr);
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24); // write 8-bit
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
+ /* Now the selectors. */
+ nBytes = bytesOut;
+ bsW (3, nGroups);
+ bsW (15, nSelectors);
+ for (i = 0; i < nSelectors; i++) {
+ for (j = 0; j < selectorMtf[i]; j++) {
+ bsW(1, 1);
}
- bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
- bsLiveShadow += 5;
-
- for (int i = 0; i < alphaSize; i++) {
- int lti = len_t[i] & 0xff;
- while (curr < lti) {
- // inlined: bsW(2, 2);
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24); // write 8-bit
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
- }
- bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
- bsLiveShadow += 2;
+ bsW(1, 0);
+ }
+ /* Now the coding tables. */
+ nBytes = bytesOut;
+
+ for (t = 0; t < nGroups; t++) {
+ int curr = len[t][0];
+ bsW(5, curr);
+ for (i = 0; i < alphaSize; i++) {
+ while (curr < len[t][i]) {
+ bsW(2, 2);
curr++; /* 10 */
}
-
- while (curr > lti) {
- // inlined: bsW(2, 3);
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24); // write 8-bit
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
- }
- bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
- bsLiveShadow += 2;
-
+ while (curr > len[t][i]) {
+ bsW(2, 3);
curr--; /* 11 */
}
-
- // inlined: bsW(1, 0);
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24); // write 8-bit
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
- }
- // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
- bsLiveShadow++;
+ bsW (1, 0);
}
}
- this.bsBuff = bsBuffShadow;
- this.bsLive = bsLiveShadow;
- }
-
- private void sendMTFValues7(final int nSelectors) throws IOException {
- final Data dataShadow = this.data;
- final byte[][] len = dataShadow.sendMTFValues_len;
- final int[][] code = dataShadow.sendMTFValues_code;
- final OutputStream outShadow = this.out;
- final byte[] selector = dataShadow.selector;
- final char[] sfmap = dataShadow.sfmap;
- final int nMTFShadow = this.nMTF;
-
- int selCtr = 0;
-
- int bsLiveShadow = this.bsLive;
- int bsBuffShadow = this.bsBuff;
-
- for (int gs = 0; gs < nMTFShadow;) {
- final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
- final int selector_selCtr = selector[selCtr] & 0xff;
- final int[] code_selCtr = code[selector_selCtr];
- final byte[] len_selCtr = len[selector_selCtr];
-
- while (gs <= ge) {
- final int sfmap_i = sfmap[gs];
-
- //
- // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
- // code_selCtr[sfmap_i]);
- //
- while (bsLiveShadow >= 8) {
- outShadow.write(bsBuffShadow >> 24);
- bsBuffShadow <<= 8;
- bsLiveShadow -= 8;
- }
- final int n = len_selCtr[sfmap_i] & 0xFF;
- bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
- bsLiveShadow += n;
-
- gs++;
+ /* And finally, the block data proper */
+ nBytes = bytesOut;
+ selCtr = 0;
+ gs = 0;
+ while (true) {
+ if (gs >= nMTF) {
+ break;
+ }
+ ge = gs + G_SIZE - 1;
+ if (ge >= nMTF) {
+ ge = nMTF - 1;
+ }
+ for (i = gs; i <= ge; i++) {
+ bsW(len[selector[selCtr]][szptr[i]],
+ code[selector[selCtr]][szptr[i]]);
}
gs = ge + 1;
selCtr++;
}
-
- this.bsBuff = bsBuffShadow;
- this.bsLive = bsLiveShadow;
+ if (!(selCtr == nSelectors)) {
+ panic();
+ }
}
- private void moveToFrontCodeAndSend() throws IOException {
- bsW(24, this.origPtr);
+ private void moveToFrontCodeAndSend () throws IOException {
+ bsPutIntVS(24, origPtr);
generateMTFValues();
sendMTFValues();
}
- /**
- * This is the most hammered method of this class.
- *
- * This is the version using unrolled loops. Normally I never - * use such ones in Java code. The unrolling has shown a - * noticable performance improvement on JRE 1.4.2 (Linux i586 / - * HotSpot Client). Of course it depends on the JIT compiler of - * the vm.
- */ - private boolean mainSimpleSort(final Data dataShadow, final int lo, final int hi, - final int d) { - final int bigN = hi - lo + 1; + private OutputStream bsStream; + + private void simpleSort(int lo, int hi, int d) { + int i, j, h, bigN, hp; + int v; + + bigN = hi - lo + 1; if (bigN < 2) { - return this.firstAttempt && (this.workDone > this.workLimit); + return; } - int hp = 0; - while (INCS[hp] < bigN) { + hp = 0; + while (incs[hp] < bigN) { hp++; } + hp--; - final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; - final byte[] block = dataShadow.block; - final int lastShadow = this.last; - final int lastPlus1 = lastShadow + 1; - final boolean firstAttemptShadow = this.firstAttempt; - final int workLimitShadow = this.workLimit; - int workDoneShadow = this.workDone; - - // Following block contains unrolled code which could be shortened by - // coding it in additional loops. - - HP: while (--hp >= 0) { - final int h = INCS[hp]; - final int mj = lo + h - 1; - - for (int i = lo + h; i <= hi;) { - // copy - for (int k = 3; (i <= hi) && (--k >= 0); i++) { - final int v = fmap[i]; - final int vd = v + d; - int j = i; - - // for (int a; - // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, - // block, quadrant, lastShadow); - // j -= h) { - // fmap[j] = a; - // } - // - // unrolled version: - - // start inline mainGTU - boolean onceRunned = false; - int a = 0; - - HAMMER: while (true) { - if (onceRunned) { - fmap[j] = a; - if ((j -= h) <= mj) { - break HAMMER; - } - } else { - onceRunned = true; - } + for (; hp >= 0; hp--) { + h = incs[hp]; - a = fmap[j - h]; - int i1 = a + d; - int i2 = vd; - - // following could be done in a loop, but - // unrolled it for performance: - if (block[i1 + 1] == block[i2 + 1]) { - if (block[i1 + 2] == block[i2 + 2]) { - if (block[i1 + 3] == block[i2 + 3]) { - if (block[i1 + 4] == block[i2 + 4]) { - if (block[i1 + 5] == block[i2 + 5]) { - if (block[(i1 += 6)] - == block[(i2 += 6)]) { - int x = lastShadow; - X: while (x > 0) { - x -= 4; - - if (block[i1 + 1] - == block[i2 + 1]) { - if (quadrant[i1] - == quadrant[i2]) { - if (block[i1 + 2] == block[i2 + 2]) { - if (quadrant[i1 + 1] == quadrant[i2 + 1]) { - if (block[i1 + 3] == block[i2 + 3]) { - if (quadrant[i1 + 2] == quadrant[i2 + 2]) { - if (block[i1 + 4] == block[i2 + 4]) { - if (quadrant[i1 + 3] == quadrant[i2 + 3]) { - if ((i1 += 4) >= lastPlus1) { - i1 -= lastPlus1; - } - if ((i2 += 4) >= lastPlus1) { - i2 -= lastPlus1; - } - workDoneShadow++; - continue X; - } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1] > quadrant[i2])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - - } - break HAMMER; - } // while x > 0 - else { - if ((block[i1] & 0xff) - > (block[i2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } - } else if ((block[i1 + 5] & 0xff) - > (block[i2 + 5] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 4] & 0xff) - > (block[i2 + 4] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 3] & 0xff) - > (block[i2 + 3] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 2] & 0xff) - > (block[i2 + 2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 1] & 0xff) - > (block[i2 + 1] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } + i = lo + h; + while (true) { + /* copy 1 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; - } // HAMMER - // end inline mainGTU + /* copy 2 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; - fmap[j] = v; + /* copy 3 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } } + zptr[j] = v; + i++; - if (firstAttemptShadow && (i <= hi) && (workDoneShadow > workLimitShadow)) { - break HP; + if (workDone > workLimit && firstAttempt) { + return; } } } - - this.workDone = workDoneShadow; - return firstAttemptShadow && (workDoneShadow > workLimitShadow); } - private static void vswap(int[] fmap, int p1, int p2, int n) { - n += p1; - while (p1 < n) { - int t = fmap[p1]; - fmap[p1++] = fmap[p2]; - fmap[p2++] = t; + private void vswap(int p1, int p2, int n) { + int temp = 0; + while (n > 0) { + temp = zptr[p1]; + zptr[p1] = zptr[p2]; + zptr[p2] = temp; + p1++; + p2++; + n--; } } - private static byte med3(byte a, byte b, byte c) { - return (a < b) - ? (b < c ? b : a < c ? c : a) - : (b > c ? b : a > c ? c : a); + private char med3(char a, char b, char c) { + char t; + if (a > b) { + t = a; + a = b; + b = t; + } + if (b > c) { + t = b; + b = c; + c = t; + } + if (a > b) { + b = a; + } + return b; } - private void blockSort() { - this.workLimit = WORK_FACTOR * this.last; - this.workDone = 0; - this.blockRandomised = false; - this.firstAttempt = true; - mainSort(); + private static class StackElem { + int ll; + int hh; + int dd; + } - if (this.firstAttempt && (this.workDone > this.workLimit)) { - randomiseBlock(); - this.workLimit = this.workDone = 0; - this.firstAttempt = false; - mainSort(); + private void qSort3(int loSt, int hiSt, int dSt) { + int unLo, unHi, ltLo, gtHi, med, n, m; + int sp, lo, hi, d; + StackElem[] stack = new StackElem[QSORT_STACK_SIZE]; + for (int count = 0; count < QSORT_STACK_SIZE; count++) { + stack[count] = new StackElem(); } - int[] fmap = this.data.fmap; - this.origPtr = -1; - for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { - if (fmap[i] == 0) { - this.origPtr = i; - break; + sp = 0; + + stack[sp].ll = loSt; + stack[sp].hh = hiSt; + stack[sp].dd = dSt; + sp++; + + while (sp > 0) { + if (sp >= QSORT_STACK_SIZE) { + panic(); } - } - // assert (this.origPtr != -1) : this.origPtr; - } + sp--; + lo = stack[sp].ll; + hi = stack[sp].hh; + d = stack[sp].dd; - /** - * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 - */ - private void mainQSort3(final Data dataShadow, final int loSt, final int hiSt, - final int dSt) { - final int[] stack_ll = dataShadow.stack_ll; - final int[] stack_hh = dataShadow.stack_hh; - final int[] stack_dd = dataShadow.stack_dd; - final int[] fmap = dataShadow.fmap; - final byte[] block = dataShadow.block; - - stack_ll[0] = loSt; - stack_hh[0] = hiSt; - stack_dd[0] = dSt; - - for (int sp = 1; --sp >= 0;) { - final int lo = stack_ll[sp]; - final int hi = stack_hh[sp]; - final int d = stack_dd[sp]; - - if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { - if (mainSimpleSort(dataShadow, lo, hi, d)) { + if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { + simpleSort(lo, hi, d); + if (workDone > workLimit && firstAttempt) { return; } - } else { - final int d1 = d + 1; - final int med = med3(block[fmap[lo] + d1], - block[fmap[hi ] + d1], - block[fmap[(lo + hi) >> 1] + d1]) - & 0xff; + continue; + } + + med = med3(block[zptr[lo] + d + 1], + block[zptr[hi ] + d + 1], + block[zptr[(lo + hi) >> 1] + d + 1]); - int unLo = lo; - int unHi = hi; - int ltLo = lo; - int gtHi = hi; + unLo = ltLo = lo; + unHi = gtHi = hi; + while (true) { while (true) { - while (unLo <= unHi) { - final int n = - ((int) block[fmap[unLo] + d1] & 0xff) - med; - if (n == 0) { - final int temp = fmap[unLo]; - fmap[unLo++] = fmap[ltLo]; - fmap[ltLo++] = temp; - } else if (n < 0) { - unLo++; - } else { - break; - } + if (unLo > unHi) { + break; } - - while (unLo <= unHi) { - final int n = - ((int) block[fmap[unHi] + d1] & 0xff) - med; - if (n == 0) { - final int temp = fmap[unHi]; - fmap[unHi--] = fmap[gtHi]; - fmap[gtHi--] = temp; - } else if (n > 0) { - unHi--; - } else { - break; - } + n = ((int) block[zptr[unLo] + d + 1]) - med; + if (n == 0) { + int temp = 0; + temp = zptr[unLo]; + zptr[unLo] = zptr[ltLo]; + zptr[ltLo] = temp; + ltLo++; + unLo++; + continue; + }; + if (n > 0) { + break; } - - if (unLo <= unHi) { - final int temp = fmap[unLo]; - fmap[unLo++] = fmap[unHi]; - fmap[unHi--] = temp; - } else { + unLo++; + } + while (true) { + if (unLo > unHi) { + break; + } + n = ((int) block[zptr[unHi] + d + 1]) - med; + if (n == 0) { + int temp = 0; + temp = zptr[unHi]; + zptr[unHi] = zptr[gtHi]; + zptr[gtHi] = temp; + gtHi--; + unHi--; + continue; + }; + if (n < 0) { break; } + unHi--; } - - if (gtHi < ltLo) { - stack_ll[sp] = lo; - stack_hh[sp] = hi; - stack_dd[sp] = d1; - sp++; - } else { - int n = ((ltLo - lo) < (unLo - ltLo)) - ? (ltLo - lo) : (unLo - ltLo); - vswap(fmap, lo, unLo - n, n); - int m = ((hi - gtHi) < (gtHi - unHi)) - ? (hi - gtHi) : (gtHi - unHi); - vswap(fmap, unLo, hi - m + 1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - stack_ll[sp] = lo; - stack_hh[sp] = n; - stack_dd[sp] = d; - sp++; - - stack_ll[sp] = n + 1; - stack_hh[sp] = m - 1; - stack_dd[sp] = d1; - sp++; - - stack_ll[sp] = m; - stack_hh[sp] = hi; - stack_dd[sp] = d; - sp++; + if (unLo > unHi) { + break; } + int temp = 0; + temp = zptr[unLo]; + zptr[unLo] = zptr[unHi]; + zptr[unHi] = temp; + unLo++; + unHi--; + } + + if (gtHi < ltLo) { + stack[sp].ll = lo; + stack[sp].hh = hi; + stack[sp].dd = d + 1; + sp++; + continue; } + + n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); + vswap(lo, unLo - n, n); + m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); + vswap(unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack[sp].ll = lo; + stack[sp].hh = n; + stack[sp].dd = d; + sp++; + + stack[sp].ll = n + 1; + stack[sp].hh = m - 1; + stack[sp].dd = d + 1; + sp++; + + stack[sp].ll = m; + stack[sp].hh = hi; + stack[sp].dd = d; + sp++; } } private void mainSort() { - final Data dataShadow = this.data; - final int[] runningOrder = dataShadow.mainSort_runningOrder; - final int[] copy = dataShadow.mainSort_copy; - final boolean[] bigDone = dataShadow.mainSort_bigDone; - final int[] ftab = dataShadow.ftab; - final byte[] block = dataShadow.block; - final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; - final int lastShadow = this.last; - final int workLimitShadow = this.workLimit; - final boolean firstAttemptShadow = this.firstAttempt; - - // Set up the 2-byte frequency table - for (int i = 65537; --i >= 0;) { - ftab[i] = 0; - } + int i, j, ss, sb; + int[] runningOrder = new int[256]; + int[] copy = new int[256]; + boolean[] bigDone = new boolean[256]; + int c1, c2; + int numQSorted; /* In the various block-sized structures, live data runs from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area for block. */ - for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) { - block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; + + // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); + for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) { + block[last + i + 2] = block[(i % (last + 1)) + 1]; } - for (int i = lastShadow + NUM_OVERSHOOT_BYTES; --i >= 0;) { + for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) { quadrant[i] = 0; } - block[0] = block[lastShadow + 1]; - // Complete the initial radix sort: + block[0] = (char) (block[last + 1]); - int c1 = block[0] & 0xff; - for (int i = 0; i <= lastShadow; i++) { - final int c2 = block[i + 1] & 0xff; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } + if (last < 4000) { + /* + Use simpleSort(), since the full sorting mechanism + has quite a large constant overhead. + */ + for (i = 0; i <= last; i++) { + zptr[i] = i; + } + firstAttempt = false; + workDone = workLimit = 0; + simpleSort(0, last, 0); + } else { + numQSorted = 0; + for (i = 0; i <= 255; i++) { + bigDone[i] = false; + } - for (int i = 1; i <= 65536; i++) - ftab[i] += ftab[i - 1]; + for (i = 0; i <= 65536; i++) { + ftab[i] = 0; + } - c1 = block[1] & 0xff; - for (int i = 0; i < lastShadow; i++) { - final int c2 = block[i + 2] & 0xff; - fmap[--ftab[(c1 << 8) + c2]] = i; - c1 = c2; - } + c1 = block[0]; + for (i = 0; i <= last; i++) { + c2 = block[i + 1]; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } - fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] - = lastShadow; + for (i = 1; i <= 65536; i++) { + ftab[i] += ftab[i - 1]; + } - /* + c1 = block[1]; + for (i = 0; i < last; i++) { + c2 = block[i + 2]; + j = (c1 << 8) + c2; + c1 = c2; + ftab[j]--; + zptr[ftab[j]] = i; + } + + j = ((block[last + 1]) << 8) + (block[1]); + ftab[j]--; + zptr[ftab[j]] = last; + + /* Now ftab contains the first loc of every small bucket. Calculate the running order, from smallest to largest big bucket. - */ - for (int i = 256; --i >= 0;) { - bigDone[i] = false; - runningOrder[i] = i; - } + */ - for (int h = 364; h != 1;) { - h /= 3; - for (int i = h; i <= 255; i++) { - final int vv = runningOrder[i]; - final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; - final int b = h - 1; - int j = i; - for (int ro = runningOrder[j - h]; - (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; - ro = runningOrder[j - h]) { - runningOrder[j] = ro; - j -= h; - if (j <= b) { - break; - } + for (i = 0; i <= 255; i++) { + runningOrder[i] = i; + } + + { + int vv; + int h = 1; + do { + h = 3 * h + 1; } - runningOrder[j] = vv; + while (h <= 256); + do { + h = h / 3; + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while ((ftab[((runningOrder[j - h]) + 1) << 8] + - ftab[(runningOrder[j - h]) << 8]) + > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) { + runningOrder[j] = runningOrder[j - h]; + j = j - h; + if (j <= (h - 1)) { + break; + } + } + runningOrder[j] = vv; + } + } while (h != 1); } - } - /* - The main sorting loop. - */ - for (int i = 0; i <= 255; i++) { /* - Process big buckets, starting with the least full. + The main sorting loop. */ - final int ss = runningOrder[i]; + for (i = 0; i <= 255; i++) { - // Step 1: - /* + /* + Process big buckets, starting with the least full. + */ + ss = runningOrder[i]; + + /* Complete the big bucket [ss] by quicksorting any unsorted small buckets [ss, j]. Hopefully previous pointer-scanning phases have already completed many of the small buckets [ss, j], so we don't have to sort them at all. - */ - for (int j = 0; j <= 255; j++) { - final int sb = (ss << 8) + j; - final int ftab_sb = ftab[sb]; - if ((ftab_sb & SETMASK) != SETMASK) { - final int lo = ftab_sb & CLEARMASK; - final int hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) { - mainQSort3(dataShadow, lo, hi, 2); - if (firstAttemptShadow && (this.workDone > workLimitShadow)) { - return; + */ + for (j = 0; j <= 255; j++) { + sb = (ss << 8) + j; + if (!((ftab[sb] & SETMASK) == SETMASK)) { + int lo = ftab[sb] & CLEARMASK; + int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + qSort3(lo, hi, 2); + numQSorted += (hi - lo + 1); + if (workDone > workLimit && firstAttempt) { + return; + } } + ftab[sb] |= SETMASK; } - ftab[sb] = ftab_sb | SETMASK; } - } - - // Step 2: - // Now scan this big bucket so as to synthesise the - // sorted order for small buckets [t, ss] for all t != ss. - - for (int j = 0; j <= 255; j++) { - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - } - - for (int j = ftab[ss << 8] & CLEARMASK, - hj = (ftab[(ss + 1) << 8] & CLEARMASK); - j < hj; - j++) { - final int fmap_j = fmap[j]; - c1 = block[fmap_j] & 0xff; - if (!bigDone[c1]) { - fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); - copy[c1]++; - } - } - - for (int j = 256; --j >= 0;) - ftab[(j << 8) + ss] |= SETMASK; - // Step 3: - /* + /* The ss big bucket is now done. Record this fact, and update the quadrant descriptors. Remember to update quadrants in the overshoot area too, if necessary. The "if (i < 255)" test merely skips this updating for the last bucket processed, since updating for the last bucket is pointless. - */ - bigDone[ss] = true; + */ + bigDone[ss] = true; + + if (i < 255) { + int bbStart = ftab[ss << 8] & CLEARMASK; + int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; + } + + for (j = 0; j < bbSize; j++) { + int a2update = zptr[bbStart + j]; + int qVal = (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < NUM_OVERSHOOT_BYTES) { + quadrant[a2update + last + 1] = qVal; + } + } - if (i < 255) { - final int bbStart = ftab[ss << 8] & CLEARMASK; - final int bbSize = - (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; - int shifts = 0; + if (!(((bbSize - 1) >> shifts) <= 65535)) { + panic(); + } + } - while ((bbSize >> shifts) > 65534) { - shifts++; + /* + Now scan this big bucket so as to synthesise the + sorted order for small buckets [t, ss] for all t != ss. + */ + for (j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; } - for (int j = 0; j < bbSize; j++) { - final int a2update = fmap[bbStart + j]; - final char qVal = (char) (j >> shifts); - quadrant[a2update] = qVal; - if (a2update < NUM_OVERSHOOT_BYTES) { - quadrant[a2update + lastShadow + 1] = qVal; + for (j = ftab[ss << 8] & CLEARMASK; + j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) { + c1 = block[zptr[j]]; + if (!bigDone[c1]) { + zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; + copy[c1]++; } } - } + for (j = 0; j <= 255; j++) { + ftab[(j << 8) + ss] |= SETMASK; + } + } } } private void randomiseBlock() { - final boolean[] inUse = this.data.inUse; - final byte[] block = this.data.block; - final int lastShadow = this.last; - - for (int i = 256; --i >= 0;) - inUse[i] = false; - + int i; int rNToGo = 0; int rTPos = 0; - for (int i = 0, j = 1; i <= lastShadow; i = j, j++) { + for (i = 0; i < 256; i++) { + inUse[i] = false; + } + + for (i = 0; i <= last; i++) { if (rNToGo == 0) { - rNToGo = (char) BZip2Constants.rNums[rTPos]; - if (++rTPos == 512) { + rNToGo = (char) rNums[rTPos]; + rTPos++; + if (rTPos == 512) { rTPos = 0; } } - rNToGo--; - block[j] ^= ((rNToGo == 1) ? 1 : 0); - + block[i + 1] ^= ((rNToGo == 1) ? 1 : 0); // handle 16 bit signed numbers - inUse[block[j] & 0xff] = true; + block[i + 1] &= 0xFF; + + inUse[block[i + 1]] = true; + } + } + + private void doReversibleTransformation() { + int i; + + workLimit = workFactor * last; + workDone = 0; + blockRandomised = false; + firstAttempt = true; + + mainSort(); + + if (workDone > workLimit && firstAttempt) { + randomiseBlock(); + workLimit = workDone = 0; + blockRandomised = true; + firstAttempt = false; + mainSort(); } - this.blockRandomised = true; + origPtr = -1; + for (i = 0; i <= last; i++) { + if (zptr[i] == 0) { + origPtr = i; + break; + } + }; + + if (origPtr == -1) { + panic(); + } } - private void generateMTFValues() { - final int lastShadow = this.last; - final Data dataShadow = this.data; - final boolean[] inUse = dataShadow.inUse; - final byte[] block = dataShadow.block; - final int[] fmap = dataShadow.fmap; - final char[] sfmap = dataShadow.sfmap; - final int[] mtfFreq = dataShadow.mtfFreq; - final byte[] unseqToSeq = dataShadow.unseqToSeq; - final byte[] yy = dataShadow.generateMTFValues_yy; - - // make maps - int nInUseShadow = 0; - for (int i = 0; i < 256; i++) { - if (inUse[i]) { - unseqToSeq[i] = (byte) nInUseShadow; - nInUseShadow++; + private boolean fullGtU(int i1, int i2) { + int k; + char c1, c2; + int s1, s2; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + k = last + 1; + + do { + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); } + i1++; + i2++; + + if (i1 > last) { + i1 -= last; + i1--; + }; + if (i2 > last) { + i2 -= last; + i2--; + }; + + k -= 4; + workDone++; + } while (k >= 0); + + return false; + } + + /* + Knuth's increments seem to work better + than Incerpi-Sedgewick here. Possibly + because the number of elems to sort is + usually small, typically <= 20. + */ + private int[] incs = {1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484}; + + private void allocateCompressStructures () { + int n = baseBlockSize * blockSize100k; + block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)]; + quadrant = new int[(n + NUM_OVERSHOOT_BYTES)]; + zptr = new int[n]; + ftab = new int[65537]; + + if (block == null || quadrant == null || zptr == null + || ftab == null) { + //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; + //compressOutOfMemory ( totalDraw, n ); } - this.nInUse = nInUseShadow; - final int eob = nInUseShadow + 1; + /* + The back end needs a place to store the MTF values + whilst it calculates the coding tables. We could + put them in the zptr array. However, these values + will fit in a short, so we overlay szptr at the + start of zptr, in the hope of reducing the number + of cache misses induced by the multiple traversals + of the MTF values when calculating coding tables. + Seems to improve compression speed by about 1%. + */ + // szptr = zptr; + + + szptr = new short[2 * n]; + } - for (int i = eob; i >= 0; i--) { + private void generateMTFValues() { + char[] yy = new char[256]; + int i, j; + char tmp; + char tmp2; + int zPend; + int wr; + int EOB; + + makeMaps(); + EOB = nInUse + 1; + + for (i = 0; i <= EOB; i++) { mtfFreq[i] = 0; } - for (int i = nInUseShadow; --i >= 0;) { - yy[i] = (byte) i; + wr = 0; + zPend = 0; + for (i = 0; i < nInUse; i++) { + yy[i] = (char) i; } - int wr = 0; - int zPend = 0; - for (int i = 0; i <= lastShadow; i++) { - final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; - byte tmp = yy[0]; - int j = 0; + for (i = 0; i <= last; i++) { + char ll_i; + ll_i = unseqToSeq[block[zptr[i]]]; + + j = 0; + tmp = yy[j]; while (ll_i != tmp) { j++; - byte tmp2 = tmp; + tmp2 = tmp; tmp = yy[j]; yy[j] = tmp2; - } + }; yy[0] = tmp; if (j == 0) { @@ -1994,25 +1573,26 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { if (zPend > 0) { zPend--; while (true) { - if ((zPend & 1) == 0) { - sfmap[wr] = RUNA; + switch (zPend % 2) { + case 0: + szptr[wr] = (short) RUNA; wr++; mtfFreq[RUNA]++; - } else { - sfmap[wr] = RUNB; + break; + case 1: + szptr[wr] = (short) RUNB; wr++; mtfFreq[RUNB]++; - } - - if (zPend >= 2) { - zPend = (zPend - 2) >> 1; - } else { + break; + }; + if (zPend < 2) { break; } - } + zPend = (zPend - 2) / 2; + }; zPend = 0; } - sfmap[wr] = (char) (j + 1); + szptr[wr] = (short) (j + 1); wr++; mtfFreq[j + 1]++; } @@ -2021,86 +1601,31 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { if (zPend > 0) { zPend--; while (true) { - if ((zPend & 1) == 0) { - sfmap[wr] = RUNA; + switch (zPend % 2) { + case 0: + szptr[wr] = (short) RUNA; wr++; mtfFreq[RUNA]++; - } else { - sfmap[wr] = RUNB; + break; + case 1: + szptr[wr] = (short) RUNB; wr++; mtfFreq[RUNB]++; + break; } - - if (zPend >= 2) { - zPend = (zPend - 2) >> 1; - } else { + if (zPend < 2) { break; } + zPend = (zPend - 2) / 2; } } - sfmap[wr] = (char) eob; - mtfFreq[eob]++; - this.nMTF = wr + 1; - } - - private static final class Data extends Object { - - // with blockSize 900k - final boolean[] inUse = new boolean[256]; // 256 byte - final byte[] unseqToSeq = new byte[256]; // 256 byte - final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte - final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte - final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte - - final byte[] generateMTFValues_yy = new byte[256]; // 256 byte - final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 byte - final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte - final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte - final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte - final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte - final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte - final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte - - final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte - - final int[] mainSort_runningOrder = new int[256]; // 1024 byte - final int[] mainSort_copy = new int[256]; // 1024 byte - final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte - - final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte - final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte - final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte - - final int[] ftab = new int[65537]; // 262148 byte - // ------------ - // 333408 byte - - final byte[] block; // 900021 byte - final int[] fmap; // 3600000 byte - final char[] sfmap; // 3600000 byte - // ------------ - // 8433529 byte - // ============ - - /** - * Array instance identical to sfmap, both are used only temporarily and indepently, - * so we do not need to allocate additional memory. - */ - final char[] quadrant; - - Data(int blockSize100k) { - super(); - - final int n = blockSize100k * BZip2Constants.baseBlockSize; - this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; - this.fmap = new int[n]; - this.sfmap = new char[2 * n]; - this.quadrant = this.sfmap; - } + szptr[wr] = (short) EOB; + wr++; + mtfFreq[EOB]++; + nMTF = wr; } - } + +