Browse Source

Add support for InfoZIP invented extra fields for Unicode file names and comments. Submitted by Wolfgang Glas. Merge from commons-compress revision 745528

git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@745531 13f79535-47bb-0310-9956-ffa450edef68
master
Stefan Bodewig 16 years ago
parent
commit
25f451bd36
6 changed files with 442 additions and 0 deletions
  1. +3
    -0
      WHATSNEW
  2. +147
    -0
      src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java
  3. +2
    -0
      src/main/org/apache/tools/zip/ExtraFieldUtils.java
  4. +60
    -0
      src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
  5. +59
    -0
      src/main/org/apache/tools/zip/UnicodePathExtraField.java
  6. +171
    -0
      src/main/org/apache/tools/zip/ZipEncodingHelper.java

+ 3
- 0
WHATSNEW View File

@@ -348,6 +348,9 @@ Fixed bugs:
VM was running) for files with an unexpected internal structure. VM was running) for files with an unexpected internal structure.
Bugzilla Report 46559. Bugzilla Report 46559.


* The zip package now supports the extra fields invented by InfoZIP
in order to store Unicode file names and comments.

Other changes: Other changes:
-------------- --------------
* A HostInfo task was added performing information on hosts, including info on * A HostInfo task was added performing information on hosts, including info on


+ 147
- 0
src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java View File

@@ -0,0 +1,147 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package org.apache.tools.zip;

import java.io.UnsupportedEncodingException;
import java.util.zip.CRC32;
import java.util.zip.ZipException;

/**
* A common base class for Unicode extra information extra fields.
*/
public abstract class AbstractUnicodeExtraField implements ZipExtraField {
private long nameCRC32;
private byte[] unicodeName;
private byte[] data;

protected AbstractUnicodeExtraField() {
}

/**
* Assemble as unicode path extension form the name and encoding
* of the orginal zip entry.
*
* @param name The file name or comment.
* @param zipEncoding The encoding of the filenames in the zip
* file, usually <code>"CP437"</code>.
*/
protected AbstractUnicodeExtraField(String name, String zipEncoding) {

byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding);

CRC32 crc32 = new CRC32();
crc32.update(filename);
nameCRC32 = crc32.getValue();

try {
unicodeName = name.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("FATAL: UTF-8 encoding not supported.",
e);
}
}

private void assembleData() {
if (unicodeName == null) {
return;
}

data = new byte[5 + unicodeName.length];
// version 1
data[0] = 0x01;
System.arraycopy(ZipLong.getBytes(nameCRC32), 0, data, 1, 4);
System.arraycopy(unicodeName, 0, data, 5, unicodeName.length);
}

/**
* @return The CRC32 checksum of the filename or comment as
* encoded in the central directory of the zip file.
*/
public long getNameCRC32() {
return nameCRC32;
}

/**
* @param nameCRC32 The CRC32 checksum of the filename as encoded
* in the central directory of the zip file to set.
*/
public void setNameCRC32(long nameCRC32) {
nameCRC32 = nameCRC32;
data = null;
}

/**
* @return The utf-8 encoded name.
*/
public byte[] getUnicodeName() {
return unicodeName;
}

/**
* @param unicodeName The utf-8 encoded name to set.
*/
public void setUnicodeName(byte[] unicodeName) {
unicodeName = unicodeName;
data = null;
}

public byte[] getCentralDirectoryData() {
if (data == null) {
this.assembleData();
}
return data;
}

public ZipShort getCentralDirectoryLength() {
if (data == null) {
assembleData();
}
return new ZipShort(data.length);
}

public byte[] getLocalFileDataData() {
return getCentralDirectoryData();
}

public ZipShort getLocalFileDataLength() {
return getCentralDirectoryLength();
}

public void parseFromLocalFileData(byte[] buffer, int offset, int length)
throws ZipException {

if (length < 5) {
throw new ZipException("UniCode path extra data must have at least"
+ " 5 bytes.");
}

int version = buffer[offset];

if (version != 0x01) {
throw new ZipException("Unsupported version [" + version
+ "] for UniCode path extra data.");
}

nameCRC32 = ZipLong.getValue(buffer, offset + 1);
unicodeName = new byte[length - 5];
System.arraycopy(buffer, offset + 5, unicodeName, 0, length - 5);
data = null;
}

}

+ 2
- 0
src/main/org/apache/tools/zip/ExtraFieldUtils.java View File

@@ -44,6 +44,8 @@ public class ExtraFieldUtils {
implementations = new HashMap(); implementations = new HashMap();
register(AsiExtraField.class); register(AsiExtraField.class);
register(JarMarker.class); register(JarMarker.class);
register(UnicodePathExtraField.class);
register(UnicodeCommentExtraField.class);
} }


/** /**


+ 60
- 0
src/main/org/apache/tools/zip/UnicodeCommentExtraField.java View File

@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package org.apache.tools.zip;

/**
* Info-ZIP Unicode Comment Extra Field (0x6375):
*
* <p>Stores the UTF-8 version of the file comment as stored in the
* central directory header.</p>
*
* <pre>
* Value Size Description
* ----- ---- -----------
* (UCom) 0x6375 Short tag for this extra block type ("uc")
* TSize Short total data size for this block
* Version 1 byte version of this extra field, currently 1
* ComCRC32 4 bytes Comment Field CRC32 Checksum
* UnicodeCom Variable UTF-8 version of the entry comment
* </pre>
*/
public class UnicodeCommentExtraField extends AbstractUnicodeExtraField {

public static final ZipShort UCOM_ID = new ZipShort(0x6375);

public UnicodeCommentExtraField () {
}

/**
* Assemble as unicode comment extension form the comment and
* encoding of the orginal zip entry.
*
* @param name The file name
* @param zipEncoding The encoding of the comment in the zip file,
* usually <code>"CP437"</code>.
*/
public UnicodeCommentExtraField(String name, String zipEncoding) {
super(name, zipEncoding);
}

public ZipShort getHeaderId() {
return UCOM_ID;
}

}

+ 59
- 0
src/main/org/apache/tools/zip/UnicodePathExtraField.java View File

@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package org.apache.tools.zip;

/**
* Info-ZIP Unicode Path Extra Field (0x7075):
*
* <p>Stores the UTF-8 version of the file name field as stored in the
* local header and central directory header.</p>
*
* <pre>
* Value Size Description
* ----- ---- -----------
* (UPath) 0x7075 Short tag for this extra block type ("up")
* TSize Short total data size for this block
* Version 1 byte version of this extra field, currently 1
* NameCRC32 4 bytes File Name Field CRC32 Checksum
* UnicodeName Variable UTF-8 version of the entry File Name
* </pre>
*/
public class UnicodePathExtraField extends AbstractUnicodeExtraField {

public static final ZipShort UPATH_ID = new ZipShort(0x7075);

public UnicodePathExtraField () {
}

/**
* Assemble as unicode path extension form the name and encoding
* of the orginal zip entry.
*
* @param name The file name
* @param zipEncoding The encoding of the filename in the zip
* file, usually <code>"CP437"</code>.
*/
public UnicodePathExtraField(String name, String zipEncoding) {
super(name, zipEncoding);
}

public ZipShort getHeaderId() {
return UPATH_ID;
}
}

+ 171
- 0
src/main/org/apache/tools/zip/ZipEncodingHelper.java View File

@@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package org.apache.tools.zip;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;

/**
* Static helper functions for robustly encoding filenames in zip files.
*/
abstract class ZipEncodingHelper {

/**
* Grow a byte buffer, so it has a minimal capacity or at least
* the double capacity of the original buffer
*
* @param b The original buffer.
* @param newCapacity The minimal requested new capacity.
* @return A byte buffer <code>r</code> with
* <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
* all the data contained in <code>b</code> copied to the beginning
* of <code>r</code>.
*
*/
static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) {
b.limit(b.position());
b.rewind();

int c2 = b.capacity() * 2;
ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);

on.put(b);
return on;
}


/**
* The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
* ASCII bytes.
*/
private static final byte[] HEX_DIGITS =
new byte [] {
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41,
0x42, 0x43, 0x44, 0x45, 0x46
};

/**
* Encode a filename or a comment to a byte array suitable for
* storing it to a serialized zip entry.
*
* Examples (in pseudo-notation, right hand side is C-style notation):
* <pre>
* encodeName("\u20AC_for_Dollar.txt","CP437") = "%U20AC_for_Dollar.txt"
* encodeName("\u00D6lf\u00E4sser.txt","CP437") = "\231lf\204sser.txt"
* </pre>
*
* @param name The filename or comment with possible non-ASCII
* unicode characters.
* @param encoding A valid encoding name. The standard zip
* encoding is <code>"CP437"</code>,
* <code>"UTF-8"</code> is supported in ZIP file
* version <code>6.3</code> or later.
* @return A byte array containing the mapped file
* name. Unmappable characters or malformed character
* sequences are mapped to a sequence of utf-16 words
* encoded in the format <code>%Uxxxx</code>.
*/
static final byte[] encodeName(String name, String encoding) {
Charset cs = Charset.forName(encoding);
CharsetEncoder enc = cs.newEncoder();

enc.onMalformedInput(CodingErrorAction.REPORT);
enc.onUnmappableCharacter(CodingErrorAction.REPORT);

CharBuffer cb = CharBuffer.wrap(name);
ByteBuffer out = ByteBuffer.allocate(name.length()
+ (name.length() + 1) / 2);

while (cb.remaining() > 0) {
CoderResult res = enc.encode(cb, out,true);

if (res.isUnmappable() || res.isMalformed()) {

// write the unmappable characters in utf-16
// pseudo-URL encoding style to ByteBuffer.
if (res.length() * 6 > out.remaining()) {
out = growBuffer(out,out.position() + res.length() * 6);
}

for (int i=0; i<res.length(); ++i) {
out.put((byte) '%');
out.put((byte) 'U');

char c = cb.get();

out.put(HEX_DIGITS[(c >> 12)&0x0f]);
out.put(HEX_DIGITS[(c >> 8)&0x0f]);
out.put(HEX_DIGITS[(c >> 4)&0x0f]);
out.put(HEX_DIGITS[c & 0x0f]);
}

} else if (res.isOverflow()) {

out = growBuffer(out, 0);

} else if (res.isUnderflow()) {

enc.flush(out);
break;

}
}

byte [] ret = new byte[out.position()];
out.rewind();
out.get(ret);

return ret;
}

/**
* Return, whether a filename or a comment may be encoded to a
* byte array suitable for storing it to a serialized zip entry
* without any losses.
*
* Examples (in pseudo-notation, right hand side is C-style notation):
* <pre>
* canEncodeName("\u20AC_for_Dollar.txt","CP437") = false
* canEncodeName("\u20AC_for_Dollar.txt","UTF-8") = true
* canEncodeName("\u00D6lf\u00E4sser.txt","CP437") = true
* </pre>
*
* @param name The filename or comment with possible non-ASCII
* unicode characters.
* @param encoding A valid encoding name. The standard zip
* encoding is <code>"CP437"</code>,
* <code>"UTF-8"</code> is supported in ZIP file
* version <code>6.3</code> or later.
* @return Whether the given encoding may encode the given name.
*/
static final boolean canEncodeName(String name, String encoding) {

Charset cs = Charset.forName(encoding);

CharsetEncoder enc = cs.newEncoder();
enc.onMalformedInput(CodingErrorAction.REPORT);
enc.onUnmappableCharacter(CodingErrorAction.REPORT);

return enc.canEncode(name);
}
}

Loading…
Cancel
Save