Browse Source

optionally read/write InfoZIP unicode fields, merge from commons-compress

git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748147 13f79535-47bb-0310-9956-ffa450edef68
master
Stefan Bodewig 16 years ago
parent
commit
8a183a492d
7 changed files with 222 additions and 32 deletions
  1. +17
    -8
      src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java
  2. +15
    -4
      src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
  3. +12
    -1
      src/main/org/apache/tools/zip/UnicodePathExtraField.java
  4. +14
    -4
      src/main/org/apache/tools/zip/ZipEncodingHelper.java
  5. +12
    -0
      src/main/org/apache/tools/zip/ZipEntry.java
  6. +124
    -12
      src/main/org/apache/tools/zip/ZipFile.java
  7. +28
    -3
      src/main/org/apache/tools/zip/ZipOutputStream.java

+ 17
- 8
src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java View File

@@ -34,23 +34,32 @@ public abstract class AbstractUnicodeExtraField implements ZipExtraField {
} }


/** /**
* Assemble as unicode path extension form the name and encoding
* of the orginal zip entry.
* Assemble as unicode extension from the name/comment and
* encoding of the orginal zip entry.
* *
* @param name The file name or comment.
* @param text The file name or comment.
* @param zipEncoding The encoding of the filenames in the zip * @param zipEncoding The encoding of the filenames in the zip
* file, usually <code>"CP437"</code>. * file, usually <code>"CP437"</code>.
*/ */
protected AbstractUnicodeExtraField(String name, String zipEncoding) {
byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding);
protected AbstractUnicodeExtraField(String text, String zipEncoding) {
this(text, ZipEncodingHelper.encodeName(text, zipEncoding));
}


/**
* Assemble as unicode extension from the name/comment and
* encoding of the orginal zip entry.
*
* @param text The file name or comment.
* @param zipEncoding The encoding of the filenames in the zip
* file, usually <code>"CP437"</code>.
*/
protected AbstractUnicodeExtraField(String text, byte[] bytes) {
CRC32 crc32 = new CRC32(); CRC32 crc32 = new CRC32();
crc32.update(filename);
crc32.update(bytes);
nameCRC32 = crc32.getValue(); nameCRC32 = crc32.getValue();


try { try {
unicodeName = name.getBytes("UTF-8");
unicodeName = text.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
throw new RuntimeException("FATAL: UTF-8 encoding not supported.", throw new RuntimeException("FATAL: UTF-8 encoding not supported.",
e); e);


+ 15
- 4
src/main/org/apache/tools/zip/UnicodeCommentExtraField.java View File

@@ -42,15 +42,26 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField {
} }


/** /**
* Assemble as unicode comment extension form the comment and
* Assemble as unicode comment extension from the comment and
* encoding of the orginal zip entry. * encoding of the orginal zip entry.
* *
* @param name The file name
* @param comment The file comment
* @param zipEncoding The encoding of the comment in the zip file, * @param zipEncoding The encoding of the comment in the zip file,
* usually <code>"CP437"</code>. * usually <code>"CP437"</code>.
*/ */
public UnicodeCommentExtraField(String name, String zipEncoding) {
super(name, zipEncoding);
public UnicodeCommentExtraField(String comment, String zipEncoding) {
super(comment, zipEncoding);
}

/**
* Assemble as unicode comment extension from the comment given as
* text as well as the bytes actually written to the archive.
*
* @param comment The file comment
* @param bytes the bytes actually written to the archive
*/
public UnicodeCommentExtraField(String comment, byte[] bytes) {
super(comment, bytes);
} }


public ZipShort getHeaderId() { public ZipShort getHeaderId() {


+ 12
- 1
src/main/org/apache/tools/zip/UnicodePathExtraField.java View File

@@ -42,7 +42,7 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField {
} }


/** /**
* Assemble as unicode path extension form the name and encoding
* Assemble as unicode path extension from the name and encoding
* of the orginal zip entry. * of the orginal zip entry.
* *
* @param name The file name * @param name The file name
@@ -53,6 +53,17 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField {
super(name, zipEncoding); super(name, zipEncoding);
} }


/**
* Assemble as unicode path extension from the name given as
* text as well as the bytes actually written to the archive.
*
* @param name The file name
* @param bytes the bytes actually written to the archive
*/
public UnicodePathExtraField(String name, byte[] bytes) {
super(name, bytes);
}

public ZipShort getHeaderId() { public ZipShort getHeaderId() {
return UPATH_ID; return UPATH_ID;
} }


+ 14
- 4
src/main/org/apache/tools/zip/ZipEncodingHelper.java View File

@@ -75,17 +75,23 @@ abstract class ZipEncodingHelper {
* </pre> * </pre>
* *
* @param name The filename or comment with possible non-ASCII * @param name The filename or comment with possible non-ASCII
* unicode characters.
* unicode characters. Must not be null.
* @param encoding A valid encoding name. The standard zip * @param encoding A valid encoding name. The standard zip
* encoding is <code>"CP437"</code>, * encoding is <code>"CP437"</code>,
* <code>"UTF-8"</code> is supported in ZIP file * <code>"UTF-8"</code> is supported in ZIP file
* version <code>6.3</code> or later.
* version <code>6.3</code> or later. If null,
* will use the platform's {@link
* java.lang.String#getBytes default encoding}.
* @return A byte array containing the mapped file * @return A byte array containing the mapped file
* name. Unmappable characters or malformed character * name. Unmappable characters or malformed character
* sequences are mapped to a sequence of utf-16 words * sequences are mapped to a sequence of utf-16 words
* encoded in the format <code>%Uxxxx</code>. * encoded in the format <code>%Uxxxx</code>.
*/ */
static final byte[] encodeName(String name, String encoding) { static final byte[] encodeName(String name, String encoding) {
if (encoding == null) {
return name.getBytes();
}

Charset cs = Charset.forName(encoding); Charset cs = Charset.forName(encoding);
CharsetEncoder enc = cs.newEncoder(); CharsetEncoder enc = cs.newEncoder();


@@ -178,8 +184,12 @@ abstract class ZipEncodingHelper {
* <code>"UTF-8"</code> is supported in ZIP file * <code>"UTF-8"</code> is supported in ZIP file
* version <code>6.3</code> or later. * version <code>6.3</code> or later.
*/ */
static final String decodeName(byte[] name, String encoding) {
static final String decodeName(byte[] name, String encoding)
throws java.nio.charset.CharacterCodingException {
Charset cs = Charset.forName(encoding); Charset cs = Charset.forName(encoding);
return cs.decode(ByteBuffer.wrap(name)).toString();
return cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT)
.decode(ByteBuffer.wrap(name)).toString();
} }
} }

+ 12
- 0
src/main/org/apache/tools/zip/ZipEntry.java View File

@@ -263,6 +263,18 @@ public class ZipEntry extends java.util.zip.ZipEntry implements Cloneable {
setExtra(); setExtra();
} }


/**
* Looks up an extra field by its header id.
*
* @return null if no such field exists.
*/
public ZipExtraField getExtraField(ZipShort type) {
if (extraFields != null) {
return (ZipExtraField) extraFields.get(type);
}
return null;
}

/** /**
* Throws an Exception if extra data cannot be parsed into extra fields. * Throws an Exception if extra data cannot be parsed into extra fields.
* @param extra an array of bytes to be parsed into extra fields * @param extra an array of bytes to be parsed into extra fields


+ 124
- 12
src/main/org/apache/tools/zip/ZipFile.java View File

@@ -23,12 +23,14 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.nio.charset.CharacterCodingException;
import java.util.Calendar; import java.util.Calendar;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.zip.CRC32;
import java.util.zip.Inflater; import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream; import java.util.zip.InflaterInputStream;
import java.util.zip.ZipException; import java.util.zip.ZipException;
@@ -101,6 +103,11 @@ public class ZipFile {
*/ */
private RandomAccessFile archive; private RandomAccessFile archive;


/**
* Whether to look for and use Unicode extra fields.
*/
private final boolean useUnicodeExtraFields;

/** /**
* Opens the given file for reading, assuming the platform's * Opens the given file for reading, assuming the platform's
* native encoding for file names. * native encoding for file names.
@@ -127,7 +134,7 @@ public class ZipFile {


/** /**
* Opens the given file for reading, assuming the specified * Opens the given file for reading, assuming the specified
* encoding for file names.
* encoding for file names and ignoring unicode extra fields.
* *
* @param name name of the archive. * @param name name of the archive.
* @param encoding the encoding to use for file names * @param encoding the encoding to use for file names
@@ -135,7 +142,21 @@ public class ZipFile {
* @throws IOException if an error occurs while reading the file. * @throws IOException if an error occurs while reading the file.
*/ */
public ZipFile(String name, String encoding) throws IOException { public ZipFile(String name, String encoding) throws IOException {
this(new File(name), encoding);
this(new File(name), encoding, false);
}

/**
* Opens the given file for reading, assuming the specified
* encoding for file names and ignoring unicode extra fields.
*
* @param f the archive.
* @param encoding the encoding to use for file names, use null
* for the platform's default encoding
*
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(File f, String encoding) throws IOException {
this(f, encoding, false);
} }


/** /**
@@ -144,16 +165,20 @@ public class ZipFile {
* *
* @param f the archive. * @param f the archive.
* @param encoding the encoding to use for file names * @param encoding the encoding to use for file names
* @param whether to use InfoZIP Unicode Extra Fields (if present)
* to set the file names.
* *
* @throws IOException if an error occurs while reading the file. * @throws IOException if an error occurs while reading the file.
*/ */
public ZipFile(File f, String encoding) throws IOException {
public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
throws IOException {
this.encoding = encoding; this.encoding = encoding;
this.useUnicodeExtraFields = useUnicodeExtraFields;
archive = new RandomAccessFile(f, "r"); archive = new RandomAccessFile(f, "r");
boolean success = false; boolean success = false;
try { try {
populateFromCentralDirectory();
resolveLocalFileHeaderData();
Map entriesWithoutEFS = populateFromCentralDirectory();
resolveLocalFileHeaderData(entriesWithoutEFS);
success = true; success = true;
} finally { } finally {
if (!success) { if (!success) {
@@ -270,9 +295,15 @@ public class ZipFile {
* <p>The ZipEntrys will know all data that can be obtained from * <p>The ZipEntrys will know all data that can be obtained from
* the central directory alone, but not the data that requires the * the central directory alone, but not the data that requires the
* local file header or additional data to be read.</p> * local file header or additional data to be read.</p>
*
* @return a Map&lt;ZipEntry, NameAndComment>&gt; of
* zipentries that didn't have the language encoding flag set when
* read.
*/ */
private void populateFromCentralDirectory()
private Map populateFromCentralDirectory()
throws IOException { throws IOException {
HashMap noEFS = new HashMap();

positionAtCentralDirectory(); positionAtCentralDirectory();


byte[] cfh = new byte[CFH_LEN]; byte[] cfh = new byte[CFH_LEN];
@@ -297,10 +328,10 @@ public class ZipFile {
off += SHORT; // skip version info off += SHORT; // skip version info


final int generalPurposeFlag = ZipShort.getValue(cfh, off); final int generalPurposeFlag = ZipShort.getValue(cfh, off);
final String entryEncoding =
(generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0
? ZipOutputStream.UTF8
: encoding;
final boolean hasEFS =
(generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0;
final String entryEncoding =
hasEFS ? ZipOutputStream.UTF8 : encoding;


off += SHORT; off += SHORT;


@@ -368,7 +399,12 @@ public class ZipFile {


archive.readFully(signatureBytes); archive.readFully(signatureBytes);
sig = ZipLong.getValue(signatureBytes); sig = ZipLong.getValue(signatureBytes);

if (!hasEFS && useUnicodeExtraFields) {
noEFS.put(ze, new NameAndComment(fileName, comment));
}
} }
return noEFS;
} }


private static final int MIN_EOCD_SIZE = private static final int MIN_EOCD_SIZE =
@@ -463,7 +499,7 @@ public class ZipFile {
* <p>Also records the offsets for the data to read from the * <p>Also records the offsets for the data to read from the
* entries.</p> * entries.</p>
*/ */
private void resolveLocalFileHeaderData()
private void resolveLocalFileHeaderData(Map entriesWithoutEFS)
throws IOException { throws IOException {
Enumeration e = getEntries(); Enumeration e = getEntries();
while (e.hasMoreElements()) { while (e.hasMoreElements()) {
@@ -494,6 +530,12 @@ public class ZipFile {
*/ */
offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
+ SHORT + SHORT + fileNameLen + extraFieldLen; + SHORT + SHORT + fileNameLen + extraFieldLen;

if (entriesWithoutEFS.containsKey(ze)) {
setNameAndCommentFromExtraFields(ze,
(NameAndComment)
entriesWithoutEFS.get(ze));
}
} }
} }


@@ -551,7 +593,11 @@ public class ZipFile {
return new String(bytes); return new String(bytes);
} else { } else {
try { try {
return ZipEncodingHelper.decodeName(bytes, enc);
try {
return ZipEncodingHelper.decodeName(bytes, enc);
} catch (CharacterCodingException ex) {
throw new ZipException(ex.getMessage());
}
} catch (java.nio.charset.UnsupportedCharsetException ex) { } catch (java.nio.charset.UnsupportedCharsetException ex) {
// Java 1.4's NIO doesn't recognize a few names that // Java 1.4's NIO doesn't recognize a few names that
// String.getBytes does // String.getBytes does
@@ -580,6 +626,64 @@ public class ZipFile {
return true; return true;
} }


/**
* If the entry has Unicode*ExtraFields and the CRCs of the
* names/comments match those of the extra fields, transfer the
* known Unicode values from the extra field.
*/
private void setNameAndCommentFromExtraFields(ZipEntry ze,
NameAndComment nc) {
UnicodePathExtraField name = (UnicodePathExtraField)
ze.getExtraField(UnicodePathExtraField.UPATH_ID);
String originalName = ze.getName();
String newName = getUnicodeStringIfOriginalMatches(name, nc.name);
if (newName != null && !originalName.equals(newName)) {
ze.setName(newName);
nameMap.remove(originalName);
nameMap.put(newName, ze);
}

if (nc.comment != null && nc.comment.length > 0) {
UnicodeCommentExtraField cmt = (UnicodeCommentExtraField)
ze.getExtraField(UnicodeCommentExtraField.UCOM_ID);
String newComment =
getUnicodeStringIfOriginalMatches(cmt, nc.comment);
if (newComment != null) {
ze.setComment(newComment);
}
}
}

/**
* If the stored CRC matches the one of the given name, return the
* Unicode name of the given field.
*
* <p>If the field is null or the CRCs don't match, return null
* instead.</p>
*/
private String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField f,
byte[] orig) {
if (f != null) {
CRC32 crc32 = new CRC32();
crc32.update(orig);
long origCRC32 = crc32.getValue();

if (origCRC32 == f.getNameCRC32()) {
try {
return ZipEncodingHelper
.decodeName(f.getUnicodeName(), ZipOutputStream.UTF8);
} catch (CharacterCodingException ex) {
// UTF-8 unsupported? should be impossible the
// Unicode*ExtraField must contain some bad bytes

// TODO log this anywhere?
return null;
}
}
}
return null;
}

/** /**
* InputStream that delegates requests to the underlying * InputStream that delegates requests to the underlying
* RandomAccessFile, making sure that only bytes from a certain * RandomAccessFile, making sure that only bytes from a certain
@@ -647,4 +751,12 @@ public class ZipFile {
} }
} }


private static final class NameAndComment {
private final byte[] name;
private final byte[] comment;
private NameAndComment(byte[] name, byte[] comment) {
this.name = name;
this.comment = comment;
}
}
} }

+ 28
- 3
src/main/org/apache/tools/zip/ZipOutputStream.java View File

@@ -260,6 +260,11 @@ public class ZipOutputStream extends FilterOutputStream {
*/ */
private boolean useEFS = true; private boolean useEFS = true;


/**
* whether to create UnicodePathExtraField-s for each entry.
*/
private boolean createUnicodeExtraFields = false;

/** /**
* Creates a new ZIP OutputStream filtering the underlying stream. * Creates a new ZIP OutputStream filtering the underlying stream.
* @param out the outputstream to zip * @param out the outputstream to zip
@@ -335,14 +340,24 @@ public class ZipOutputStream extends FilterOutputStream {
} }


/** /**
* Whether to set the EFS flag if the file name encoding is UTF-8.
* Whether to set the language encoding flag if the file name
* encoding is UTF-8.
* *
* <p>Defaults to true.</p> * <p>Defaults to true.</p>
*/ */
public void setUseEFS(boolean b) {
public void setUseLanguageEncodingFlag(boolean b) {
useEFS = b && isUTF8(encoding); useEFS = b && isUTF8(encoding);
} }


/**
* Whether to create Unicode Extra Fields for all entries.
*
* <p>Defaults to false.</p>
*/
public void setCreateUnicodeExtraFields(boolean b) {
createUnicodeExtraFields = b;
}

/** /**
* Finishs writing the contents and closes this as well as the * Finishs writing the contents and closes this as well as the
* underlying stream. * underlying stream.
@@ -638,6 +653,17 @@ public class ZipOutputStream extends FilterOutputStream {
* @since 1.1 * @since 1.1
*/ */
protected void writeLocalFileHeader(ZipEntry ze) throws IOException { protected void writeLocalFileHeader(ZipEntry ze) throws IOException {

byte[] name = getBytes(ze.getName());
if (createUnicodeExtraFields) {
ze.addExtraField(new UnicodePathExtraField(ze.getName(), name));
String comm = ze.getComment();
if (comm != null && !"".equals(comm)) {
byte[] commentB = getBytes(comm);
ze.addExtraField(new UnicodeCommentExtraField(comm, commentB));
}
}

offsets.put(ze, ZipLong.getBytes(written)); offsets.put(ze, ZipLong.getBytes(written));


writeOut(LFH_SIG); writeOut(LFH_SIG);
@@ -675,7 +701,6 @@ public class ZipOutputStream extends FilterOutputStream {
// CheckStyle:MagicNumber ON // CheckStyle:MagicNumber ON


// file name length // file name length
byte[] name = getBytes(ze.getName());
writeOut(ZipShort.getBytes(name.length)); writeOut(ZipShort.getBytes(name.length));
written += SHORT; written += SHORT;




Loading…
Cancel
Save