From 08284bc7aa7d066544b30974231240c9b73597eb Mon Sep 17 00:00:00 2001 From: Stefan Bodewig Date: Mon, 21 May 2012 04:33:42 +0000 Subject: [PATCH] [CVE-2012-2098] merge bzip2 edge case improvement from Commons Compress git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@1340895 13f79535-47bb-0310-9956-ffa450edef68 --- WHATSNEW | 4 + .../org/apache/tools/bzip2/BlockSort.java | 1081 +++++++++++++++++ .../tools/bzip2/CBZip2OutputStream.java | 627 +--------- .../org/apache/tools/bzip2/BlockSortTest.java | 171 +++ 4 files changed, 1310 insertions(+), 573 deletions(-) create mode 100644 src/main/org/apache/tools/bzip2/BlockSort.java create mode 100644 src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java diff --git a/WHATSNEW b/WHATSNEW index f46391251..9e0a30cc8 100644 --- a/WHATSNEW +++ b/WHATSNEW @@ -14,6 +14,10 @@ Fixed bugs: * Fixed some potential stream leaks. Bugzilla Reports 52738, 52740, 52742, 52743. + * Ported libbzip2's fallback sort algorithm to CBZip2OutputStream to + speed up compression in certain edge cases. Merge from Commons + Compress. + Other changes: -------------- diff --git a/src/main/org/apache/tools/bzip2/BlockSort.java b/src/main/org/apache/tools/bzip2/BlockSort.java new file mode 100644 index 000000000..f997bab39 --- /dev/null +++ b/src/main/org/apache/tools/bzip2/BlockSort.java @@ -0,0 +1,1081 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tools.bzip2; + +import java.util.BitSet; + +/** + * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link + * CBZip2OutputStream}. + * + *

This class is based on a Java port of Julian Seward's + * blocksort.c in his libbzip2

+ * + *

The Burrows-Wheeler transform is a reversible transform of the + * original data that is supposed to group similiar bytes close to + * each other. The idea is to sort all permutations of the input and + * only keep the last byte of each permutation. E.g. for "Commons + * Compress" you'd get:

+ * + *
+ *  CompressCommons
+ * Commons Compress
+ * CompressCommons 
+ * essCommons Compr
+ * mmons CompressCo
+ * mons CompressCom
+ * mpressCommons Co
+ * ns CompressCommo
+ * ommons CompressC
+ * ompressCommons C
+ * ons CompressComm
+ * pressCommons Com
+ * ressCommons Comp
+ * s CompressCommon
+ * sCommons Compres
+ * ssCommons Compre
+ * 
+ * + *

Which results in a new text "ss romooCCmmpnse", in adition the + * index of the first line that contained the original text is kept - + * in this case it is 1. The idea is that in a long English text all + * permutations that start with "he" are likely suffixes of a "the" and + * thus they end in "t" leading to a larger block of "t"s that can + * better be compressed by the subsequent Move-to-Front, run-length + * und Huffman encoding steps.

+ * + *

For more information see for example:

+ * + * + * @NotThreadSafe + */ +class BlockSort { + + /* + * Some of the constructs used in the C code cannot be ported + * literally to Java - for example macros, unsigned types. Some + * code has been hand-tuned to improve performance. In order to + * avoid memory pressure some structures are reused for several + * blocks and some memory is even shared between sorting and the + * MTF stage even though either algorithm uses it for its own + * purpose. + * + * Comments preserved from the actual C code are prefixed with + * "LBZ2:". + */ + + /* + * 2012-05-20 Stefan Bodewig: + * + * This class seems to mix several revisions of libbzip2's code. + * The mainSort function and those used by it look closer to the + * 0.9.5 version but show some variations introduced later. At + * the same time the logic of Compress 1.4 to randomize the block + * on bad input has been dropped after libbzip2 0.9.0 and replaced + * by a fallback sorting algorithm. + * + * I've added the fallbackSort function of 1.0.6 and tried to + * integrate it with the existing code without touching too much. + * I've also removed the now unused randomization code. + */ + + /* + * LBZ2: If you are ever unlucky/improbable enough to get a stack + * overflow whilst sorting, increase the following constant and + * try again. In practice I have never seen the stack go above 27 + * elems, so the following limit seems very generous. + */ + private static final int QSORT_STACK_SIZE = 1000; + + private static final int FALLBACK_QSORT_STACK_SIZE = 100; + + private static final int STACK_SIZE = + QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE + ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE; + + /* + * Used when sorting. If too many long comparisons happen, we stop sorting, + * and use fallbackSort instead. + */ + private int workDone; + private int workLimit; + private boolean firstAttempt; + + private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte + private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte + private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte + + private final int[] mainSort_runningOrder = new int[256]; // 1024 byte + private final int[] mainSort_copy = new int[256]; // 1024 byte + private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte + + private final int[] ftab = new int[65537]; // 262148 byte + + /** + * Array instance identical to Data's sfmap, both are used only + * temporarily and indepently, so we do not need to allocate + * additional memory. + */ + private final char[] quadrant; + + BlockSort(final CBZip2OutputStream.Data data) { + this.quadrant = data.sfmap; + } + + void blockSort(final CBZip2OutputStream.Data data, final int last) { + this.workLimit = WORK_FACTOR * last; + this.workDone = 0; + this.firstAttempt = true; + + if (last + 1 < 10000) { + fallbackSort(data, last); + } else { + mainSort(data, last); + + if (this.firstAttempt && (this.workDone > this.workLimit)) { + fallbackSort(data, last); + } + } + + final int[] fmap = data.fmap; + data.origPtr = -1; + for (int i = 0; i <= last; i++) { + if (fmap[i] == 0) { + data.origPtr = i; + break; + } + } + + // assert (data.origPtr != -1) : data.origPtr; + } + + /** + * Adapt fallbackSort to the expected interface of the rest of the + * code, in particular deal with the fact that block starts at + * offset 1 (in libbzip2 1.0.6 it starts at 0). + */ + final void fallbackSort(final CBZip2OutputStream.Data data, + final int last) { + data.block[0] = data.block[last + 1]; + fallbackSort(data.fmap, data.block, last + 1); + for (int i = 0; i < last + 1; i++) { + --data.fmap[i]; + } + for (int i = 0; i < last + 1; i++) { + if (data.fmap[i] == -1) { + data.fmap[i] = last; + break; + } + } + } + +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +/*--- LBZ2: Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + + /* + * This is the fallback sorting algorithm libbzip2 1.0.6 uses for + * repetitive or very short inputs. + * + * The idea is inspired by Manber-Myers string suffix sorting + * algorithm. First a bucket sort places each permutation of the + * block into a bucket based on its first byte. Permutations are + * represented by pointers to their first character kept in + * (partially) sorted order inside the array ftab. + * + * The next step visits all buckets in order and performs a + * quicksort on all permutations of the bucket based on the index + * of the bucket the second byte of the permutation belongs to, + * thereby forming new buckets. When arrived here the + * permutations are sorted up to the second character and we have + * buckets of permutations that are identical up to two + * characters. + * + * Repeat the step of quicksorting each bucket, now based on the + * bucket holding the sequence of the third and forth character + * leading to four byte buckets. Repeat this doubling of bucket + * sizes until all buckets only contain single permutations or the + * bucket size exceeds the block size. + * + * I.e. + * + * "abraba" form three buckets for the chars "a", "b", and "r" in + * the first step with + * + * fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 } + * + * when looking at the bucket of "a"s the second characters are in + * the buckets that start with fmap-index 0 (rolled over), 3 and 3 + * respectively, forming two new buckets "aa" and "ab", so we get + * + * fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 } + * + * since the last bucket only contained a single item it didn't + * have to be sorted at all. + * + * There now is just one bucket with more than one permutation + * that remains to be sorted. For the permutation that starts + * with index 3 the third and forth char are in bucket 'aa' at + * index 0 and for the one starting at block index 0 they are in + * bucket 'ra' with sort index 5. The fully sorted order then becomes. + * + * fmap = { 5, 3, 0, 4, 1, 2 } + * + */ + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param eclass points from the index of a character inside the + * block to the first index in fmap that contains the + * bucket of its suffix that is sorted in this step. + * @param lo lower boundary of the fmap-interval to be sorted + * @param hi upper boundary of the fmap-interval to be sorted + */ + private void fallbackSimpleSort(int[] fmap, + int[] eclass, + int lo, + int hi) { + if (lo == hi) { + return; + } + + int j; + if (hi - lo > 3) { + for (int i = hi - 4; i >= lo; i--) { + int tmp = fmap[i]; + int ec_tmp = eclass[tmp]; + for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; + j += 4) { + fmap[j - 4] = fmap[j]; + } + fmap[j - 4] = tmp; + } + } + + for (int i = hi - 1; i >= lo; i--) { + int tmp = fmap[i]; + int ec_tmp = eclass[tmp]; + for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) { + fmap[j - 1] = fmap[j]; + } + fmap[j-1] = tmp; + } + } + + private static final int FALLBACK_QSORT_SMALL_THRESH = 10; + + /** + * swaps two values in fmap + */ + private void fswap(int[] fmap, int zz1, int zz2) { + int zztmp = fmap[zz1]; + fmap[zz1] = fmap[zz2]; + fmap[zz2] = zztmp; + } + + /** + * swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap. + */ + private void fvswap(int[] fmap, int yyp1, int yyp2, int yyn) { + while (yyn > 0) { + fswap(fmap, yyp1, yyp2); + yyp1++; yyp2++; yyn--; + } + } + + private int fmin(int a, int b) { + return a < b ? a : b; + } + + private void fpush(int sp, int lz, int hz) { + stack_ll[sp] = lz; + stack_hh[sp] = hz; + } + + private int[] fpop(int sp) { + return new int[] { stack_ll[sp], stack_hh[sp] }; + } + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param eclass points from the index of a character inside the + * block to the first index in fmap that contains the + * bucket of its suffix that is sorted in this step. + * @param loSt lower boundary of the fmap-interval to be sorted + * @param hiSt upper boundary of the fmap-interval to be sorted + */ + private void fallbackQSort3(int[] fmap, + int[] eclass, + int loSt, + int hiSt) { + int lo, unLo, ltLo, hi, unHi, gtHi, n; + + long r = 0; + int sp = 0; + fpush(sp++, loSt, hiSt); + + while (sp > 0) { + int[] s = fpop(--sp); + lo = s[0]; hi = s[1]; + + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort(fmap, eclass, lo, hi); + continue; + } + + /* LBZ2: Random partitioning. Median of 3 sometimes fails to + avoid bad cases. Median of 9 seems to help but + looks rather expensive. This too seems to work but + is cheaper. Guidance for the magic constants + 7621 and 32768 is taken from Sedgewick's algorithms + book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + long r3 = r % 3, med; + if (r3 == 0) { + med = eclass[fmap[lo]]; + } else if (r3 == 1) { + med = eclass[fmap[(lo + hi) >>> 1]]; + } else { + med = eclass[fmap[hi]]; + } + + unLo = ltLo = lo; + unHi = gtHi = hi; + + // looks like the ternary partition attributed to Wegner + // in the cited Sedgewick paper + while (true) { + while (true) { + if (unLo > unHi) { + break; + } + n = eclass[fmap[unLo]] - (int) med; + if (n == 0) { + fswap(fmap, unLo, ltLo); + ltLo++; unLo++; + continue; + } + if (n > 0) { + break; + } + unLo++; + } + while (true) { + if (unLo > unHi) { + break; + } + n = eclass[fmap[unHi]] - (int) med; + if (n == 0) { + fswap(fmap, unHi, gtHi); + gtHi--; unHi--; + continue; + } + if (n < 0) { + break; + } + unHi--; + } + if (unLo > unHi) { + break; + } + fswap(fmap, unLo, unHi); unLo++; unHi--; + } + + if (gtHi < ltLo) { + continue; + } + + n = fmin(ltLo - lo, unLo - ltLo); + fvswap(fmap, lo, unLo - n, n); + int m = fmin(hi - gtHi, gtHi - unHi); + fvswap(fmap, unHi + 1, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush(sp++, lo, n); + fpush(sp++, m, hi); + } else { + fpush(sp++, m, hi); + fpush(sp++, lo, n); + } + } + } + + +/*---------------------------------------------*/ + + private int[] eclass; + + private int[] getEclass() { + return eclass == null + ? (eclass = new int[quadrant.length / 2]) : eclass; + } + + /* + * The C code uses an array of ints (each int holding 32 flags) to + * represents the bucket-start flags (bhtab). It also contains + * optimizations to skip over 32 consecutively set or + * consecutively unset bits on word boundaries at once. For now + * I've chosen to use the simpler but potentially slower code + * using BitSet - also in the hope that using the BitSet#nextXXX + * methods may be fast enough. + */ + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param block the original data + * @param nblock size of the block + * @param off offset of first byte to sort in block + */ + final void fallbackSort(int[] fmap, byte[] block, int nblock) { + final int[] ftab = new int[257]; + int H, i, j, k, l, r, cc, cc1; + int nNotDone; + int nBhtab; + final int[] eclass = getEclass(); + + for (i = 0; i < nblock; i++) { + eclass[i] = 0; + } + /*-- + LBZ2: Initial 1-char radix sort to generate + initial fmap and initial BH bits. + --*/ + for (i = 0; i < nblock; i++) { + ftab[block[i] & 0xff]++; + } + for (i = 1; i < 257; i++) { + ftab[i] += ftab[i - 1]; + } + + for (i = 0; i < nblock; i++) { + j = block[i] & 0xff; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 64 + nblock; + BitSet bhtab = new BitSet(nBhtab); + for (i = 0; i < 256; i++) { + bhtab.set(ftab[i]); + } + + /*-- + LBZ2: Inductively refine the buckets. Kind-of an + "exponential radix sort" (!), inspired by the + Manber-Myers suffix array construction algorithm. + --*/ + + /*-- LBZ2: set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + bhtab.set(nblock + 2 * i); + bhtab.clear(nblock + 2 * i + 1); + } + + /*-- LBZ2: the log(N) loop --*/ + H = 1; + while (true) { + + j = 0; + for (i = 0; i < nblock; i++) { + if (bhtab.get(i)) { + j = i; + } + k = fmap[i] - H; + if (k < 0) { + k += nblock; + } + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (true) { + + /*-- LBZ2: find the next non-singleton bucket --*/ + k = r + 1; + k = bhtab.nextClearBit(k); + l = k - 1; + if (l >= nblock) { + break; + } + k = bhtab.nextSetBit(k + 1); + r = k - 1; + if (r >= nblock) { + break; + } + + /*-- LBZ2: now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3(fmap, eclass, l, r); + + /*-- LBZ2: scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { + bhtab.set(i); + cc = cc1; + } + } + } + } + + H *= 2; + if (H > nblock || nNotDone == 0) { + break; + } + } + } + +/*---------------------------------------------*/ + + /* + * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here. + * Possibly because the number of elems to sort is usually small, typically + * <= 20. + */ + private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, 797161, + 2391484 }; + + /** + * This is the most hammered method of this class. + * + *

+ * This is the version using unrolled loops. Normally I never use such ones + * in Java code. The unrolling has shown a noticable performance improvement + * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the + * JIT compiler of the vm. + *

+ */ + private boolean mainSimpleSort(final CBZip2OutputStream.Data dataShadow, + final int lo, final int hi, final int d, + final int lastShadow) { + final int bigN = hi - lo + 1; + if (bigN < 2) { + return this.firstAttempt && (this.workDone > this.workLimit); + } + + int hp = 0; + while (INCS[hp] < bigN) { + hp++; + } + + final int[] fmap = dataShadow.fmap; + final char[] quadrant = this.quadrant; + final byte[] block = dataShadow.block; + final int lastPlus1 = lastShadow + 1; + final boolean firstAttemptShadow = this.firstAttempt; + final int workLimitShadow = this.workLimit; + int workDoneShadow = this.workDone; + + // Following block contains unrolled code which could be shortened by + // coding it in additional loops. + + HP: while (--hp >= 0) { + final int h = INCS[hp]; + final int mj = lo + h - 1; + + for (int i = lo + h; i <= hi;) { + // copy + for (int k = 3; (i <= hi) && (--k >= 0); i++) { + final int v = fmap[i]; + final int vd = v + d; + int j = i; + + // for (int a; + // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, + // block, quadrant, lastShadow); + // j -= h) { + // fmap[j] = a; + // } + // + // unrolled version: + + // start inline mainGTU + boolean onceRunned = false; + int a = 0; + + HAMMER: while (true) { + if (onceRunned) { + fmap[j] = a; + if ((j -= h) <= mj) { + break HAMMER; + } + } else { + onceRunned = true; + } + + a = fmap[j - h]; + int i1 = a + d; + int i2 = vd; + + // following could be done in a loop, but + // unrolled it for performance: + if (block[i1 + 1] == block[i2 + 1]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (block[i1 + 5] == block[i2 + 5]) { + if (block[(i1 += 6)] == block[(i2 += 6)]) { + int x = lastShadow; + X: while (x > 0) { + x -= 4; + + if (block[i1 + 1] == block[i2 + 1]) { + if (quadrant[i1] == quadrant[i2]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (quadrant[i1 + 1] == quadrant[i2 + 1]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (quadrant[i1 + 2] == quadrant[i2 + 2]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (quadrant[i1 + 3] == quadrant[i2 + 3]) { + if ((i1 += 4) >= lastPlus1) { + i1 -= lastPlus1; + } + if ((i2 += 4) >= lastPlus1) { + i2 -= lastPlus1; + } + workDoneShadow++; + continue X; + } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1] > quadrant[i2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } + break HAMMER; + } // while x > 0 + else { + if ((block[i1] & 0xff) > (block[i2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } + } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } // HAMMER + // end inline mainGTU + + fmap[j] = v; + } + + if (firstAttemptShadow && (i <= hi) + && (workDoneShadow > workLimitShadow)) { + break HP; + } + } + } + + this.workDone = workDoneShadow; + return firstAttemptShadow && (workDoneShadow > workLimitShadow); + } + +/*-- + LBZ2: The following is an implementation of + an elegant 3-way quicksort for strings, + described in a paper "Fast Algorithms for + Sorting and Searching Strings", by Robert + Sedgewick and Jon L. Bentley. +--*/ + + private static void vswap(int[] fmap, int p1, int p2, int n) { + n += p1; + while (p1 < n) { + int t = fmap[p1]; + fmap[p1++] = fmap[p2]; + fmap[p2++] = t; + } + } + + private static byte med3(byte a, byte b, byte c) { + return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c + : a); + } + + private static final int SMALL_THRESH = 20; + private static final int DEPTH_THRESH = 10; + private static final int WORK_FACTOR = 30; + + /** + * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 + */ + private void mainQSort3(final CBZip2OutputStream.Data dataShadow, + final int loSt, final int hiSt, final int dSt, + final int last) { + final int[] stack_ll = this.stack_ll; + final int[] stack_hh = this.stack_hh; + final int[] stack_dd = this.stack_dd; + final int[] fmap = dataShadow.fmap; + final byte[] block = dataShadow.block; + + stack_ll[0] = loSt; + stack_hh[0] = hiSt; + stack_dd[0] = dSt; + + for (int sp = 1; --sp >= 0;) { + final int lo = stack_ll[sp]; + final int hi = stack_hh[sp]; + final int d = stack_dd[sp]; + + if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { + if (mainSimpleSort(dataShadow, lo, hi, d, last)) { + return; + } + } else { + final int d1 = d + 1; + final int med = med3(block[fmap[lo] + d1], + block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; + + int unLo = lo; + int unHi = hi; + int ltLo = lo; + int gtHi = hi; + + while (true) { + while (unLo <= unHi) { + final int n = (block[fmap[unLo] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[ltLo]; + fmap[ltLo++] = temp; + } else if (n < 0) { + unLo++; + } else { + break; + } + } + + while (unLo <= unHi) { + final int n = (block[fmap[unHi] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unHi]; + fmap[unHi--] = fmap[gtHi]; + fmap[gtHi--] = temp; + } else if (n > 0) { + unHi--; + } else { + break; + } + } + + if (unLo <= unHi) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[unHi]; + fmap[unHi--] = temp; + } else { + break; + } + } + + if (gtHi < ltLo) { + stack_ll[sp] = lo; + stack_hh[sp] = hi; + stack_dd[sp] = d1; + sp++; + } else { + int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) + : (unLo - ltLo); + vswap(fmap, lo, unLo - n, n); + int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) + : (gtHi - unHi); + vswap(fmap, unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack_ll[sp] = lo; + stack_hh[sp] = n; + stack_dd[sp] = d; + sp++; + + stack_ll[sp] = n + 1; + stack_hh[sp] = m - 1; + stack_dd[sp] = d1; + sp++; + + stack_ll[sp] = m; + stack_hh[sp] = hi; + stack_dd[sp] = d; + sp++; + } + } + } + } + + private static final int SETMASK = (1 << 21); + private static final int CLEARMASK = (~SETMASK); + + final void mainSort(final CBZip2OutputStream.Data dataShadow, + final int lastShadow) { + final int[] runningOrder = this.mainSort_runningOrder; + final int[] copy = this.mainSort_copy; + final boolean[] bigDone = this.mainSort_bigDone; + final int[] ftab = this.ftab; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] quadrant = this.quadrant; + final int workLimitShadow = this.workLimit; + final boolean firstAttemptShadow = this.firstAttempt; + + // LBZ2: Set up the 2-byte frequency table + for (int i = 65537; --i >= 0;) { + ftab[i] = 0; + } + + /* + * In the various block-sized structures, live data runs from 0 to + * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area + * for block. + */ + for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) { + block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; + } + for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES +1; --i >= 0;) { + quadrant[i] = 0; + } + block[0] = block[lastShadow + 1]; + + // LBZ2: Complete the initial radix sort: + + int c1 = block[0] & 0xff; + for (int i = 0; i <= lastShadow; i++) { + final int c2 = block[i + 1] & 0xff; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } + + for (int i = 1; i <= 65536; i++) { + ftab[i] += ftab[i - 1]; + } + + c1 = block[1] & 0xff; + for (int i = 0; i < lastShadow; i++) { + final int c2 = block[i + 2] & 0xff; + fmap[--ftab[(c1 << 8) + c2]] = i; + c1 = c2; + } + + fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; + + /* + * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the + * running order, from smallest to largest big bucket. + */ + for (int i = 256; --i >= 0;) { + bigDone[i] = false; + runningOrder[i] = i; + } + + for (int h = 364; h != 1;) { + h /= 3; + for (int i = h; i <= 255; i++) { + final int vv = runningOrder[i]; + final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; + final int b = h - 1; + int j = i; + for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j + - h]) { + runningOrder[j] = ro; + j -= h; + if (j <= b) { + break; + } + } + runningOrder[j] = vv; + } + } + + /* + * LBZ2: The main sorting loop. + */ + for (int i = 0; i <= 255; i++) { + /* + * LBZ2: Process big buckets, starting with the least full. + */ + final int ss = runningOrder[i]; + + // Step 1: + /* + * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small + * buckets [ss, j]. Hopefully previous pointer-scanning phases have + * already completed many of the small buckets [ss, j], so we don't + * have to sort them at all. + */ + for (int j = 0; j <= 255; j++) { + final int sb = (ss << 8) + j; + final int ftab_sb = ftab[sb]; + if ((ftab_sb & SETMASK) != SETMASK) { + final int lo = ftab_sb & CLEARMASK; + final int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3(dataShadow, lo, hi, 2, lastShadow); + if (firstAttemptShadow + && (this.workDone > workLimitShadow)) { + return; + } + } + ftab[sb] = ftab_sb | SETMASK; + } + } + + // Step 2: + // LBZ2: Now scan this big bucket so as to synthesise the + // sorted order for small buckets [t, ss] for all t != ss. + + for (int j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } + + for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { + final int fmap_j = fmap[j]; + c1 = block[fmap_j] & 0xff; + if (!bigDone[c1]) { + fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); + copy[c1]++; + } + } + + for (int j = 256; --j >= 0;) { + ftab[(j << 8) + ss] |= SETMASK; + } + + // Step 3: + /* + * LBZ2: The ss big bucket is now done. Record this fact, and update the + * quadrant descriptors. Remember to update quadrants in the + * overshoot area too, if necessary. The "if (i < 255)" test merely + * skips this updating for the last bucket processed, since updating + * for the last bucket is pointless. + */ + bigDone[ss] = true; + + if (i < 255) { + final int bbStart = ftab[ss << 8] & CLEARMASK; + final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; + } + + for (int j = 0; j < bbSize; j++) { + final int a2update = fmap[bbStart + j]; + final char qVal = (char) (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) { + quadrant[a2update + lastShadow + 1] = qVal; + } + } + } + + } + } + +} diff --git a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java index bc84844ad..cbdd87a01 100644 --- a/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java +++ b/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java @@ -24,8 +24,8 @@ package org.apache.tools.bzip2; -import java.io.OutputStream; import java.io.IOException; +import java.io.OutputStream; /** * An output stream that compresses into the BZip2 format (without the file @@ -528,19 +528,12 @@ public class CBZip2OutputStream extends OutputStream */ private int last; - /** - * Index in fmap[] of original string after sorting. - */ - private int origPtr; - /** * Always: in the range 0 .. 9. The current block size is 100000 * this * number. */ private final int blockSize100k; - private boolean blockRandomised; - private int bsBuff; private int bsLive; private final CRC crc = new CRC(); @@ -549,14 +542,6 @@ public class CBZip2OutputStream extends OutputStream private int nMTF; - /* - * Used when sorting. If too many long comparisons happen, we stop sorting, - * randomise the block slightly, and try again. - */ - private int workDone; - private int workLimit; - private boolean firstAttempt; - private int currentChar = -1; private int runLength = 0; @@ -567,7 +552,8 @@ public class CBZip2OutputStream extends OutputStream /** * All memory intensive stuff. */ - private CBZip2OutputStream.Data data; + private Data data; + private BlockSort blockSorter; private OutputStream out; @@ -592,7 +578,7 @@ public class CBZip2OutputStream extends OutputStream * Constructs a new CBZip2OutputStream with a blocksize of 900k. * *

- * Attention: The caller is resonsible to write the two BZip2 magic + * Attention: The caller is responsible to write the two BZip2 magic * bytes "BZ" to the specified stream prior to calling this * constructor. *

@@ -613,7 +599,7 @@ public class CBZip2OutputStream extends OutputStream * Constructs a new CBZip2OutputStream with specified blocksize. * *

- * Attention: The caller is resonsible to write the two BZip2 magic + * Attention: The caller is responsible to write the two BZip2 magic * bytes "BZ" to the specified stream prior to calling this * constructor. *

@@ -652,6 +638,8 @@ public class CBZip2OutputStream extends OutputStream init(); } + /** {@inheritDoc} */ + @Override public void write(final int b) throws IOException { if (this.out != null) { write0(b); @@ -660,6 +648,19 @@ public class CBZip2OutputStream extends OutputStream } } + /** + * Writes the current byte to the buffer, run-length encoding it + * if it has been repeated at least four times (the first step + * RLEs sequences of four identical bytes). + * + *

Flushes the current block before writing data if it is + * full.

+ * + *

"write to the buffer" means adding to data.buffer starting + * two steps "after" this.last - initially starting at index 1 + * (not 0) - and updating this.last to point to the last index + * written minus 1.

+ */ private void writeRun() throws IOException { final int lastShadow = this.last; @@ -717,6 +718,7 @@ public class CBZip2OutputStream extends OutputStream /** * Overriden to close the stream. */ + @Override protected void finalize() throws Throwable { finish(); super.finalize(); @@ -735,10 +737,12 @@ public class CBZip2OutputStream extends OutputStream } finally { this.out = null; this.data = null; + this.blockSorter = null; } } } + @Override public void close() throws IOException { if (out != null) { OutputStream outShadow = this.out; @@ -747,6 +751,7 @@ public class CBZip2OutputStream extends OutputStream } } + @Override public void flush() throws IOException { OutputStream outShadow = this.out; if (outShadow != null) { @@ -760,6 +765,7 @@ public class CBZip2OutputStream extends OutputStream // this.out.write('Z'); this.data = new Data(this.blockSize100k); + this.blockSorter = new BlockSort(this.data); /* * Write `magic' bytes h indicating file-format == huffmanised, followed @@ -821,12 +827,8 @@ public class CBZip2OutputStream extends OutputStream /* Now the block's CRC, so it is in a known place. */ bsPutInt(this.blockCRC); - /* Now a single bit indicating randomisation. */ - if (this.blockRandomised) { - bsW(1, 1); - } else { - bsW(1, 0); - } + /* Now a single bit indicating no randomisation. */ + bsW(1, 0); /* Finally, block's contents proper. */ moveToFrontCodeAndSend(); @@ -857,6 +859,7 @@ public class CBZip2OutputStream extends OutputStream return this.blockSize100k; } + @Override public void write(final byte[] buf, int offs, final int len) throws IOException { if (offs < 0) { @@ -879,6 +882,10 @@ public class CBZip2OutputStream extends OutputStream } } + /** + * Keeps track of the last bytes written and implicitly performs + * run-length encoding as the first step of the bzip2 algorithm. + */ private void write0(int b) throws IOException { if (this.currentChar != -1) { b &= 0xff; @@ -990,7 +997,7 @@ public class CBZip2OutputStream extends OutputStream sendMTFValues6(nGroups, alphaSize); /* And finally, the block data proper */ - sendMTFValues7(nSelectors); + sendMTFValues7(); } private void sendMTFValues0(final int nGroups, final int alphaSize) { @@ -1343,7 +1350,7 @@ public class CBZip2OutputStream extends OutputStream this.bsLive = bsLiveShadow; } - private void sendMTFValues7(final int nSelectors) throws IOException { + private void sendMTFValues7() throws IOException { final Data dataShadow = this.data; final byte[][] len = dataShadow.sendMTFValues_len; final int[][] code = dataShadow.sendMTFValues_code; @@ -1391,545 +1398,22 @@ public class CBZip2OutputStream extends OutputStream } private void moveToFrontCodeAndSend() throws IOException { - bsW(24, this.origPtr); + bsW(24, this.data.origPtr); generateMTFValues(); sendMTFValues(); } - /** - * This is the most hammered method of this class. - * - *

- * This is the version using unrolled loops. Normally I never use such ones - * in Java code. The unrolling has shown a noticable performance improvement - * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the - * JIT compiler of the vm. - *

- */ - private boolean mainSimpleSort(final Data dataShadow, final int lo, - final int hi, final int d) { - final int bigN = hi - lo + 1; - if (bigN < 2) { - return this.firstAttempt && (this.workDone > this.workLimit); - } - - int hp = 0; - while (INCS[hp] < bigN) { - hp++; - } - - final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; - final byte[] block = dataShadow.block; - final int lastShadow = this.last; - final int lastPlus1 = lastShadow + 1; - final boolean firstAttemptShadow = this.firstAttempt; - final int workLimitShadow = this.workLimit; - int workDoneShadow = this.workDone; - - // Following block contains unrolled code which could be shortened by - // coding it in additional loops. - - HP: while (--hp >= 0) { - final int h = INCS[hp]; - final int mj = lo + h - 1; - - for (int i = lo + h; i <= hi;) { - // copy - for (int k = 3; (i <= hi) && (--k >= 0); i++) { - final int v = fmap[i]; - final int vd = v + d; - int j = i; - - // for (int a; - // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, - // block, quadrant, lastShadow); - // j -= h) { - // fmap[j] = a; - // } - // - // unrolled version: - - // start inline mainGTU - boolean onceRunned = false; - int a = 0; - - HAMMER: while (true) { - if (onceRunned) { - fmap[j] = a; - if ((j -= h) <= mj) { - break HAMMER; - } - } else { - onceRunned = true; - } - - a = fmap[j - h]; - int i1 = a + d; - int i2 = vd; - - // following could be done in a loop, but - // unrolled it for performance: - if (block[i1 + 1] == block[i2 + 1]) { - if (block[i1 + 2] == block[i2 + 2]) { - if (block[i1 + 3] == block[i2 + 3]) { - if (block[i1 + 4] == block[i2 + 4]) { - if (block[i1 + 5] == block[i2 + 5]) { - if (block[(i1 += 6)] == block[(i2 += 6)]) { - int x = lastShadow; - X: while (x > 0) { - x -= 4; - - if (block[i1 + 1] == block[i2 + 1]) { - if (quadrant[i1] == quadrant[i2]) { - if (block[i1 + 2] == block[i2 + 2]) { - if (quadrant[i1 + 1] == quadrant[i2 + 1]) { - if (block[i1 + 3] == block[i2 + 3]) { - if (quadrant[i1 + 2] == quadrant[i2 + 2]) { - if (block[i1 + 4] == block[i2 + 4]) { - if (quadrant[i1 + 3] == quadrant[i2 + 3]) { - if ((i1 += 4) >= lastPlus1) { - i1 -= lastPlus1; - } - if ((i2 += 4) >= lastPlus1) { - i2 -= lastPlus1; - } - workDoneShadow++; - continue X; - } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1] > quadrant[i2])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - - } - break HAMMER; - } // while x > 0 - else { - if ((block[i1] & 0xff) > (block[i2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } - } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - - } // HAMMER - // end inline mainGTU - - fmap[j] = v; - } - - if (firstAttemptShadow && (i <= hi) - && (workDoneShadow > workLimitShadow)) { - break HP; - } - } - } - - this.workDone = workDoneShadow; - return firstAttemptShadow && (workDoneShadow > workLimitShadow); - } - - private static void vswap(int[] fmap, int p1, int p2, int n) { - n += p1; - while (p1 < n) { - int t = fmap[p1]; - fmap[p1++] = fmap[p2]; - fmap[p2++] = t; - } - } - - private static byte med3(byte a, byte b, byte c) { - return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c - : a); - } - private void blockSort() { - this.workLimit = WORK_FACTOR * this.last; - this.workDone = 0; - this.blockRandomised = false; - this.firstAttempt = true; - mainSort(); - - if (this.firstAttempt && (this.workDone > this.workLimit)) { - randomiseBlock(); - this.workLimit = this.workDone = 0; - this.firstAttempt = false; - mainSort(); - } - - int[] fmap = this.data.fmap; - this.origPtr = -1; - for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { - if (fmap[i] == 0) { - this.origPtr = i; - break; - } - } - - // assert (this.origPtr != -1) : this.origPtr; + blockSorter.blockSort(data, last); } - /** - * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 + /* + * Performs Move-To-Front on the Burrows-Wheeler transformed + * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB + * run-length-encoded form. + * + *

Keeps track of byte frequencies in data.mtfFreq at the same time.

*/ - private void mainQSort3(final Data dataShadow, final int loSt, - final int hiSt, final int dSt) { - final int[] stack_ll = dataShadow.stack_ll; - final int[] stack_hh = dataShadow.stack_hh; - final int[] stack_dd = dataShadow.stack_dd; - final int[] fmap = dataShadow.fmap; - final byte[] block = dataShadow.block; - - stack_ll[0] = loSt; - stack_hh[0] = hiSt; - stack_dd[0] = dSt; - - for (int sp = 1; --sp >= 0;) { - final int lo = stack_ll[sp]; - final int hi = stack_hh[sp]; - final int d = stack_dd[sp]; - - if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { - if (mainSimpleSort(dataShadow, lo, hi, d)) { - return; - } - } else { - final int d1 = d + 1; - final int med = med3(block[fmap[lo] + d1], - block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; - - int unLo = lo; - int unHi = hi; - int ltLo = lo; - int gtHi = hi; - - while (true) { - while (unLo <= unHi) { - final int n = ((int) block[fmap[unLo] + d1] & 0xff) - - med; - if (n == 0) { - final int temp = fmap[unLo]; - fmap[unLo++] = fmap[ltLo]; - fmap[ltLo++] = temp; - } else if (n < 0) { - unLo++; - } else { - break; - } - } - - while (unLo <= unHi) { - final int n = ((int) block[fmap[unHi] + d1] & 0xff) - - med; - if (n == 0) { - final int temp = fmap[unHi]; - fmap[unHi--] = fmap[gtHi]; - fmap[gtHi--] = temp; - } else if (n > 0) { - unHi--; - } else { - break; - } - } - - if (unLo <= unHi) { - final int temp = fmap[unLo]; - fmap[unLo++] = fmap[unHi]; - fmap[unHi--] = temp; - } else { - break; - } - } - - if (gtHi < ltLo) { - stack_ll[sp] = lo; - stack_hh[sp] = hi; - stack_dd[sp] = d1; - sp++; - } else { - int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) - : (unLo - ltLo); - vswap(fmap, lo, unLo - n, n); - int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) - : (gtHi - unHi); - vswap(fmap, unLo, hi - m + 1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - stack_ll[sp] = lo; - stack_hh[sp] = n; - stack_dd[sp] = d; - sp++; - - stack_ll[sp] = n + 1; - stack_hh[sp] = m - 1; - stack_dd[sp] = d1; - sp++; - - stack_ll[sp] = m; - stack_hh[sp] = hi; - stack_dd[sp] = d; - sp++; - } - } - } - } - - private void mainSort() { - final Data dataShadow = this.data; - final int[] runningOrder = dataShadow.mainSort_runningOrder; - final int[] copy = dataShadow.mainSort_copy; - final boolean[] bigDone = dataShadow.mainSort_bigDone; - final int[] ftab = dataShadow.ftab; - final byte[] block = dataShadow.block; - final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; - final int lastShadow = this.last; - final int workLimitShadow = this.workLimit; - final boolean firstAttemptShadow = this.firstAttempt; - - // Set up the 2-byte frequency table - for (int i = 65537; --i >= 0;) { - ftab[i] = 0; - } - - /* - * In the various block-sized structures, live data runs from 0 to - * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area - * for block. - */ - for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) { - block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; - } - for (int i = lastShadow + NUM_OVERSHOOT_BYTES +1; --i >= 0;) { - quadrant[i] = 0; - } - block[0] = block[lastShadow + 1]; - - // Complete the initial radix sort: - - int c1 = block[0] & 0xff; - for (int i = 0; i <= lastShadow; i++) { - final int c2 = block[i + 1] & 0xff; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } - - for (int i = 1; i <= 65536; i++) - ftab[i] += ftab[i - 1]; - - c1 = block[1] & 0xff; - for (int i = 0; i < lastShadow; i++) { - final int c2 = block[i + 2] & 0xff; - fmap[--ftab[(c1 << 8) + c2]] = i; - c1 = c2; - } - - fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; - - /* - * Now ftab contains the first loc of every small bucket. Calculate the - * running order, from smallest to largest big bucket. - */ - for (int i = 256; --i >= 0;) { - bigDone[i] = false; - runningOrder[i] = i; - } - - for (int h = 364; h != 1;) { - h /= 3; - for (int i = h; i <= 255; i++) { - final int vv = runningOrder[i]; - final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; - final int b = h - 1; - int j = i; - for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j - - h]) { - runningOrder[j] = ro; - j -= h; - if (j <= b) { - break; - } - } - runningOrder[j] = vv; - } - } - - /* - * The main sorting loop. - */ - for (int i = 0; i <= 255; i++) { - /* - * Process big buckets, starting with the least full. - */ - final int ss = runningOrder[i]; - - // Step 1: - /* - * Complete the big bucket [ss] by quicksorting any unsorted small - * buckets [ss, j]. Hopefully previous pointer-scanning phases have - * already completed many of the small buckets [ss, j], so we don't - * have to sort them at all. - */ - for (int j = 0; j <= 255; j++) { - final int sb = (ss << 8) + j; - final int ftab_sb = ftab[sb]; - if ((ftab_sb & SETMASK) != SETMASK) { - final int lo = ftab_sb & CLEARMASK; - final int hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) { - mainQSort3(dataShadow, lo, hi, 2); - if (firstAttemptShadow - && (this.workDone > workLimitShadow)) { - return; - } - } - ftab[sb] = ftab_sb | SETMASK; - } - } - - // Step 2: - // Now scan this big bucket so as to synthesise the - // sorted order for small buckets [t, ss] for all t != ss. - - for (int j = 0; j <= 255; j++) { - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - } - - for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { - final int fmap_j = fmap[j]; - c1 = block[fmap_j] & 0xff; - if (!bigDone[c1]) { - fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); - copy[c1]++; - } - } - - for (int j = 256; --j >= 0;) - ftab[(j << 8) + ss] |= SETMASK; - - // Step 3: - /* - * The ss big bucket is now done. Record this fact, and update the - * quadrant descriptors. Remember to update quadrants in the - * overshoot area too, if necessary. The "if (i < 255)" test merely - * skips this updating for the last bucket processed, since updating - * for the last bucket is pointless. - */ - bigDone[ss] = true; - - if (i < 255) { - final int bbStart = ftab[ss << 8] & CLEARMASK; - final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; - int shifts = 0; - - while ((bbSize >> shifts) > 65534) { - shifts++; - } - - for (int j = 0; j < bbSize; j++) { - final int a2update = fmap[bbStart + j]; - final char qVal = (char) (j >> shifts); - quadrant[a2update] = qVal; - if (a2update < NUM_OVERSHOOT_BYTES) { - quadrant[a2update + lastShadow + 1] = qVal; - } - } - } - - } - } - - private void randomiseBlock() { - final boolean[] inUse = this.data.inUse; - final byte[] block = this.data.block; - final int lastShadow = this.last; - - for (int i = 256; --i >= 0;) - inUse[i] = false; - - int rNToGo = 0; - int rTPos = 0; - for (int i = 0, j = 1; i <= lastShadow; i = j, j++) { - if (rNToGo == 0) { - rNToGo = (char) BZip2Constants.rNums[rTPos]; - if (++rTPos == 512) { - rTPos = 0; - } - } - - rNToGo--; - block[j] ^= ((rNToGo == 1) ? 1 : 0); - - // handle 16 bit signed numbers - inUse[block[j] & 0xff] = true; - } - - this.blockRandomised = true; - } - private void generateMTFValues() { final int lastShadow = this.last; final Data dataShadow = this.data; @@ -2033,9 +1517,10 @@ public class CBZip2OutputStream extends OutputStream this.nMTF = wr + 1; } - private static final class Data extends Object { + static final class Data extends Object { // with blockSize 900k + /* maps unsigned byte => "does it occur in block" */ final boolean[] inUse = new boolean[256]; // 256 byte final byte[] unseqToSeq = new byte[256]; // 256 byte final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte @@ -2054,23 +1539,19 @@ public class CBZip2OutputStream extends OutputStream final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte - final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte - - final int[] mainSort_runningOrder = new int[256]; // 1024 byte - final int[] mainSort_copy = new int[256]; // 1024 byte - final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte - final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte - final int[] ftab = new int[65537]; // 262148 byte // ------------ // 333408 byte + /* holds the RLEd block of original data starting at index 1. + * After sorting the last byte added to the buffer is at index + * 0. */ final byte[] block; // 900021 byte + /* maps index in Burrows-Wheeler transformed block => index of + * byte in original block */ final int[] fmap; // 3600000 byte final char[] sfmap; // 3600000 byte // ------------ @@ -2078,11 +1559,12 @@ public class CBZip2OutputStream extends OutputStream // ============ /** - * Array instance identical to sfmap, both are used only - * temporarily and indepently, so we do not need to allocate - * additional memory. + * Index of original line in Burrows-Wheeler table. + * + *

This is the index in fmap that points to the last byte + * of the original data.

*/ - final char[] quadrant; + int origPtr; Data(int blockSize100k) { super(); @@ -2091,7 +1573,6 @@ public class CBZip2OutputStream extends OutputStream this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; this.fmap = new int[n]; this.sfmap = new char[2 * n]; - this.quadrant = this.sfmap; } } diff --git a/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java b/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java new file mode 100644 index 000000000..caa43e4c3 --- /dev/null +++ b/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tools.bzip2; + +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class BlockSortTest { + + private static final byte[] FIXTURE = { 0, 1, (byte) 252, (byte) 253, (byte) 255, + (byte) 254, 3, 2, (byte) 128 }; + + /* + Burrows-Wheeler transform of fixture the manual way: + + * build the matrix + + 0, 1, 252, 253, 255, 254, 3, 2, 128 + 1, 252, 253, 255, 254, 3, 2, 128, 0 + 252, 253, 255, 254, 3, 2, 128, 0, 1 + 253, 255, 254, 3, 2, 128, 0, 1, 252 + 255, 254, 3, 2, 128, 0, 1, 252, 253 + 254, 3, 2, 128, 0, 1, 252, 253, 255 + 3, 2, 128, 0, 1, 252, 253, 255, 254 + 2, 128, 0, 1, 252, 253, 255, 254, 3 + 128, 0, 1, 252, 253, 255, 254, 3, 2 + + * sort it + + 0, 1, 252, 253, 255, 254, 3, 2, 128 + 1, 252, 253, 255, 254, 3, 2, 128, 0 + 2, 128, 0, 1, 252, 253, 255, 254, 3 + 3, 2, 128, 0, 1, 252, 253, 255, 254 + 128, 0, 1, 252, 253, 255, 254, 3, 2 + 252, 253, 255, 254, 3, 2, 128, 0, 1 + 253, 255, 254, 3, 2, 128, 0, 1, 252 + 254, 3, 2, 128, 0, 1, 252, 253, 255 + 255, 254, 3, 2, 128, 0, 1, 252, 253 + + * grab last column + + 128, 0, 3, 254, 2, 1, 252, 255, 253 + + and the original line has been 0 + */ + + private static final byte[] FIXTURE_BWT = { (byte) 128, 0, 3, (byte) 254, 2, 1, + (byte) 252, (byte) 255, (byte) 253 }; + + private static final int[] FIXTURE_SORTED = { + 0, 1, 7, 6, 8, 2, 3, 5, 4 + }; + + private static final byte[] FIXTURE2 = { + 'C', 'o', 'm', 'm', 'o', 'n', 's', ' ', 'C', 'o', 'm', 'p', 'r', 'e', 's', 's', + }; + + private static final byte[] FIXTURE2_BWT = { + 's', 's', ' ', 'r', 'o', 'm', 'o', 'o', 'C', 'C', 'm', 'm', 'p', 'n', 's', 'e', + }; + + @Test + public void testSortFixture() { + DS ds = setUpFixture(); + ds.s.blockSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + assertEquals(0, ds.data.origPtr); + } + + @Test + public void testSortFixtureMainSort() { + DS ds = setUpFixture(); + ds.s.mainSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + } + + @Test + public void testSortFixtureFallbackSort() { + DS ds = setUpFixture(); + ds.s.fallbackSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + } + + @Test + public void testSortFixture2() { + DS ds = setUpFixture2(); + ds.s.blockSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); + assertEquals(1, ds.data.origPtr); + } + + @Test + public void testSortFixture2MainSort() { + DS ds = setUpFixture2(); + ds.s.mainSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); + } + + @Test + public void testSortFixture2FallbackSort() { + DS ds = setUpFixture2(); + ds.s.fallbackSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); + } + + @Test + public void testFallbackSort() { + CBZip2OutputStream.Data data = new CBZip2OutputStream.Data(1); + BlockSort s = new BlockSort(data); + int[] fmap = new int[FIXTURE.length]; + s.fallbackSort(fmap, FIXTURE, FIXTURE.length); + assertArrayEquals(FIXTURE_SORTED, fmap); + } + + private DS setUpFixture() { + return setUpFixture(FIXTURE); + } + + private void assertFixtureSorted(CBZip2OutputStream.Data data) { + assertFixtureSorted(data, FIXTURE, FIXTURE_BWT); + } + + private DS setUpFixture2() { + return setUpFixture(FIXTURE2); + } + + private void assertFixture2Sorted(CBZip2OutputStream.Data data) { + assertFixtureSorted(data, FIXTURE2, FIXTURE2_BWT); + } + + private DS setUpFixture(byte[] fixture) { + CBZip2OutputStream.Data data = new CBZip2OutputStream.Data(1); + System.arraycopy(fixture, 0, data.block, 1, fixture.length); + return new DS(data, new BlockSort(data)); + } + + private void assertFixtureSorted(CBZip2OutputStream.Data data, + byte[] fixture, byte[] fixtureBwt) { + assertEquals(fixture[fixture.length - 1], data.block[0]); + for (int i = 0; i < fixture.length; i++) { + assertEquals(fixtureBwt[i], data.block[data.fmap[i]]); + } + } + + private static class DS { + private final CBZip2OutputStream.Data data; + private final BlockSort s; + DS(CBZip2OutputStream.Data data, BlockSort s) { + this.data = data; + this.s = s; + } + } +} \ No newline at end of file