From f21a51dfd3ecb9fa8813f5612a5f87d437de2450 Mon Sep 17 00:00:00 2001 From: Stefan Bodewig Date: Wed, 4 Mar 2009 16:54:39 +0000 Subject: [PATCH] even more encoding options for zips git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@750072 13f79535-47bb-0310-9956-ffa450edef68 --- docs/manual/CoreTasks/ear.html | 14 ++- docs/manual/CoreTasks/jar.html | 14 ++- docs/manual/CoreTasks/war.html | 14 ++- docs/manual/CoreTasks/zip.html | 34 ++++-- .../org/apache/tools/ant/taskdefs/Zip.java | 86 +++++++++++++- .../apache/tools/zip/FallbackZipEncoding.java | 2 +- .../tools/zip/Simple8BitZipEncoding.java | 2 +- .../tools/zip/UnicodeCommentExtraField.java | 2 +- .../tools/zip/UnicodePathExtraField.java | 2 +- .../apache/tools/zip/ZipEncodingHelper.java | 2 +- src/main/org/apache/tools/zip/ZipLong.java | 2 +- .../org/apache/tools/zip/ZipOutputStream.java | 105 +++++++++++++++--- src/main/org/apache/tools/zip/ZipShort.java | 2 +- .../apache/tools/zip/UTF8ZipFilesTest.java | 6 +- 14 files changed, 247 insertions(+), 40 deletions(-) diff --git a/docs/manual/CoreTasks/ear.html b/docs/manual/CoreTasks/ear.html index d89de7cca..34c746e07 100644 --- a/docs/manual/CoreTasks/ear.html +++ b/docs/manual/CoreTasks/ear.html @@ -213,10 +213,22 @@ to a value other than its default, "add".

createUnicodeExtraFields Whether to create unicode extra fields to store the file names a second time inside the entry's metadata. +
Possible values are "never", "always" and "not-encodable" + which will only add Unicode extra fields if the file name cannot + be encoded using the specified encoding. Since Ant 1.8.0.
See also the discussion in the zip task page - No, default is false + + + fallbacktoUTF8 + Whether to use UTF-8 and the language encoding + flag instead of the specified encoding if a file name cannot be + encoded using the specified encoding. + Since Ant 1.8.0. +
See also the discussion in the + zip task page + No, default is false diff --git a/docs/manual/CoreTasks/jar.html b/docs/manual/CoreTasks/jar.html index 02908b389..700ac78c1 100644 --- a/docs/manual/CoreTasks/jar.html +++ b/docs/manual/CoreTasks/jar.html @@ -267,10 +267,22 @@ to a value other than its default, "add".

createUnicodeExtraFields Whether to create unicode extra fields to store the file names a second time inside the entry's metadata. +
Possible values are "never", "always" and "not-encodable" + which will only add Unicode extra fields if the file name cannot + be encoded using the specified encoding. Since Ant 1.8.0.
See also the discussion in the zip task page - No, default is false + + + fallbacktoUTF8 + Whether to use UTF-8 and the language encoding + flag instead of the specified encoding if a file name cannot be + encoded using the specified encoding. + Since Ant 1.8.0. +
See also the discussion in the + zip task page + No, default is false diff --git a/docs/manual/CoreTasks/war.html b/docs/manual/CoreTasks/war.html index d4a42cf7e..9258f4619 100644 --- a/docs/manual/CoreTasks/war.html +++ b/docs/manual/CoreTasks/war.html @@ -230,10 +230,22 @@ to a value other than its default, "add".

createUnicodeExtraFields Whether to create unicode extra fields to store the file names a second time inside the entry's metadata. +
Possible values are "never", "always" and "not-encodable" + which will only add Unicode extra fields if the file name cannot + be encoded using the specified encoding. Since Ant 1.8.0.
See also the discussion in the zip task page - No, default is false + + + fallbacktoUTF8 + Whether to use UTF-8 and the language encoding + flag instead of the specified encoding if a file name cannot be + encoded using the specified encoding. + Since Ant 1.8.0. +
See also the discussion in the + zip task page + No, default is false diff --git a/docs/manual/CoreTasks/zip.html b/docs/manual/CoreTasks/zip.html index fa0233afb..16fa794b9 100644 --- a/docs/manual/CoreTasks/zip.html +++ b/docs/manual/CoreTasks/zip.html @@ -256,7 +256,19 @@ archive.

createUnicodeExtraFields Whether to create unicode extra fields to store the file names a second time inside the entry's metadata. - Defaults to false. Since Ant 1.8.0. +
Possible values are "never", "always" and "not-encodable" + which will only add Unicode extra fields if the file name cannot + be encoded using the specified encoding. + Since Ant 1.8.0. +
See also the discussion below + No, default is "never" + + + fallbacktoUTF8 + Whether to use UTF-8 and the language encoding + flag instead of the specified encoding if a file name cannot be + encoded using the specified encoding. + Since Ant 1.8.0.
See also the discussion below No, default is false @@ -298,8 +310,15 @@ archive.

entry's metadata. Most archivers ignore these extra fields. The zip family of tasks support an option createUnicodeExtraFields since Ant 1.8.0 which - makes Ant write these extra fields, it defaults to false since it - creates a bigger archive.

+ makes Ant write these extra fields either for all entries ("always") + or only those whose name cannot be encoded using the spcified + encoding (not-encodeable), it defaults to "never" since the extra + fields create bigger archives.

+ +

The fallbackToUTF8 attribute of zip can be used to create archives + that use the specified encoding in the majority of cases but UTF and + the language encoding flag for filenames that cannot be encoded + using the specified encoding.

The unzip-task will recognize the unicode extra fields by default and read the file name information from them, unless you set the @@ -320,12 +339,13 @@ archive.

  • 7Zip writes CodePage 437 by default but uses UTF-8 and the language encoding flag when writing entries that cannot be encoded - as CodePage 437. It recognizes the language encoding flag when - reading and ignores the unicode extra fields.
  • + as CodePage 437 (similar to the zip task with fallbacktoUTF8 set + to true). It recognizes the language encoding flag when reading + and ignores the unicode extra fields.
  • WinZIP writes CodePage 437 and uses unicode extra fields by - default. It recognizes the unicode extra field when reading and - ignores the language encoding flag.
  • + default. It recognizes the unicode extra field and the language + encoding flag when reading.
  • Windows' "compressed folder" feature doesn't recognize any flag or extra field and creates archives using the platforms default diff --git a/src/main/org/apache/tools/ant/taskdefs/Zip.java b/src/main/org/apache/tools/ant/taskdefs/Zip.java index 83e5da83c..8366facc7 100644 --- a/src/main/org/apache/tools/ant/taskdefs/Zip.java +++ b/src/main/org/apache/tools/ant/taskdefs/Zip.java @@ -27,8 +27,10 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Enumeration; +import java.util.HashMap; import java.util.Hashtable; import java.util.Iterator; +import java.util.Map; import java.util.Stack; import java.util.Vector; import java.util.zip.CRC32; @@ -182,11 +184,20 @@ public class Zip extends MatchingTask { private boolean useLanguageEncodingFlag = true; /** - * Whether to set the language encoding flag when creating the archive. + * Whether to add unicode extra fields. * * @since Ant 1.8.0 */ - private boolean createUnicodeExtraFields = false; + private UnicodeExtraField createUnicodeExtraFields = + UnicodeExtraField.NEVER; + + /** + * Whether to fall back to UTF-8 if a name cannot be enoded using + * the specified encoding. + * + * @since Ant 1.8.0 + */ + private boolean fallBackToUTF8 = false; /** * This is the name/location of where to @@ -486,7 +497,7 @@ public class Zip extends MatchingTask { * Whether Unicode extra fields will be created. * @since Ant 1.8.0 */ - public void setCreateUnicodeExtraFields(boolean b) { + public void setCreateUnicodeExtraFields(UnicodeExtraField b) { createUnicodeExtraFields = b; } @@ -494,10 +505,32 @@ public class Zip extends MatchingTask { * Whether Unicode extra fields will be created. * @since Ant 1.8.0 */ - public boolean getCreateUnicodeExtraFields() { + public UnicodeExtraField getCreateUnicodeExtraFields() { return createUnicodeExtraFields; } + /** + * Whether to fall back to UTF-8 if a name cannot be enoded using + * the specified encoding. + * + *

    Defaults to false.

    + * + * @since Ant 1.8.0 + */ + public void setFallBackToUTF8(boolean b) { + fallBackToUTF8 = b; + } + + /** + * Whether to fall back to UTF-8 if a name cannot be enoded using + * the specified encoding. + * + * @since Ant 1.8.0 + */ + public boolean getFallBackToUTF8() { + return fallBackToUTF8; + } + /** * validate and build * @throws BuildException on error @@ -587,7 +620,9 @@ public class Zip extends MatchingTask { zOut.setEncoding(encoding); zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag); - zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields); + zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields. + getPolicy()); + zOut.setFallbackToUTF8(fallBackToUTF8); zOut.setMethod(doCompress ? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); zOut.setLevel(level); @@ -1881,4 +1916,45 @@ public class Zip extends MatchingTask { return true; } } + + /** + * Policiy for creation of Unicode extra fields: never, always or + * not-encodeable. + * + * @since Ant 1.8.0 + */ + public static final class UnicodeExtraField extends EnumeratedAttribute { + private static final Map POLICIES = new HashMap(); + private static final String NEVER_KEY = "never"; + private static final String ALWAYS_KEY = "always"; + private static final String N_E_KEY = "not-encodeable"; + static { + POLICIES.put(NEVER_KEY, + ZipOutputStream.UnicodeExtraFieldPolicy.NEVER); + POLICIES.put(ALWAYS_KEY, + ZipOutputStream.UnicodeExtraFieldPolicy.ALWAYS); + POLICIES.put(N_E_KEY, + ZipOutputStream.UnicodeExtraFieldPolicy + .NOT_ENCODEABLE); + } + + public String[] getValues() { + return new String[] {NEVER_KEY, ALWAYS_KEY, N_E_KEY}; + } + + public static final UnicodeExtraField NEVER = + new UnicodeExtraField(NEVER_KEY); + + private UnicodeExtraField(String name) { + setValue(name); + } + + public UnicodeExtraField() { + } + + public ZipOutputStream.UnicodeExtraFieldPolicy getPolicy() { + return (ZipOutputStream.UnicodeExtraFieldPolicy) + POLICIES.get(getValue()); + } + } } diff --git a/src/main/org/apache/tools/zip/FallbackZipEncoding.java b/src/main/org/apache/tools/zip/FallbackZipEncoding.java index 4c2fe8500..5b1dc8902 100644 --- a/src/main/org/apache/tools/zip/FallbackZipEncoding.java +++ b/src/main/org/apache/tools/zip/FallbackZipEncoding.java @@ -34,7 +34,7 @@ import java.nio.ByteBuffer; * given name can be safely encoded or not.

    * *

    This implementation acts as a last resort implementation, when - * neither {@see Simple8BitZipEnoding} nor {@see NioZipEncoding} is + * neither {@link Simple8BitZipEnoding} nor {@link NioZipEncoding} is * available.

    * *

    The methods of this class are reentrant.

    diff --git a/src/main/org/apache/tools/zip/Simple8BitZipEncoding.java b/src/main/org/apache/tools/zip/Simple8BitZipEncoding.java index b23e4c233..20ee231ba 100644 --- a/src/main/org/apache/tools/zip/Simple8BitZipEncoding.java +++ b/src/main/org/apache/tools/zip/Simple8BitZipEncoding.java @@ -77,7 +77,7 @@ class Simple8BitZipEncoding implements ZipEncoding { private final char[] highChars; /** - * A list of {@see Simple8BitChar} objects sorted by the unicode + * A list of {@link Simple8BitChar} objects sorted by the unicode * field. This list is used to binary search reverse mapping of * unicode characters with a character code greater than 127. */ diff --git a/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java b/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java index 5535d5d7c..90cb4349e 100644 --- a/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java +++ b/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java @@ -45,7 +45,7 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { * Assemble as unicode comment extension from the name given as * text as well as the encoded bytes actually written to the archive. * - * @param name The file name + * @param text The file name * @param bytes the bytes actually written to the archive * @param off The offset of the encoded comment in bytes. * @param len The length of the encoded comment or comment in diff --git a/src/main/org/apache/tools/zip/UnicodePathExtraField.java b/src/main/org/apache/tools/zip/UnicodePathExtraField.java index 8c26e1557..c40b79a25 100644 --- a/src/main/org/apache/tools/zip/UnicodePathExtraField.java +++ b/src/main/org/apache/tools/zip/UnicodePathExtraField.java @@ -45,7 +45,7 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField { * Assemble as unicode path extension from the name given as * text as well as the encoded bytes actually written to the archive. * - * @param name The file name + * @param text The file name * @param bytes the bytes actually written to the archive * @param off The offset of the encoded filename in bytes. * @param len The length of the encoded filename or comment in diff --git a/src/main/org/apache/tools/zip/ZipEncodingHelper.java b/src/main/org/apache/tools/zip/ZipEncodingHelper.java index e09327341..bb07d9caa 100644 --- a/src/main/org/apache/tools/zip/ZipEncodingHelper.java +++ b/src/main/org/apache/tools/zip/ZipEncodingHelper.java @@ -51,7 +51,7 @@ abstract class ZipEncodingHelper { } /** - * @return The associated {@see Simple8BitZipEncoding}, which + * @return The associated {@link Simple8BitZipEncoding}, which * is instantiated if not done so far. */ public synchronized Simple8BitZipEncoding getEncoding() { diff --git a/src/main/org/apache/tools/zip/ZipLong.java b/src/main/org/apache/tools/zip/ZipLong.java index a6b5dda39..4b48656bb 100644 --- a/src/main/org/apache/tools/zip/ZipLong.java +++ b/src/main/org/apache/tools/zip/ZipLong.java @@ -150,7 +150,7 @@ public final class ZipLong implements Cloneable { public Object clone() { try { - return (ZipLong) super.clone(); + return super.clone(); } catch (CloneNotSupportedException cnfe) { // impossible throw new RuntimeException(cnfe); diff --git a/src/main/org/apache/tools/zip/ZipOutputStream.java b/src/main/org/apache/tools/zip/ZipOutputStream.java index cad9f9a95..bdc2f825c 100644 --- a/src/main/org/apache/tools/zip/ZipOutputStream.java +++ b/src/main/org/apache/tools/zip/ZipOutputStream.java @@ -269,10 +269,16 @@ public class ZipOutputStream extends FilterOutputStream { */ private boolean useEFS = true; + /** + * Whether to encode non-encodable file names as UTF-8. + */ + private boolean fallbackToUTF8 = false; + /** * whether to create UnicodePathExtraField-s for each entry. */ - private boolean createUnicodeExtraFields = false; + private UnicodeExtraFieldPolicy createUnicodeExtraFields = + UnicodeExtraFieldPolicy.NEVER; /** * Creates a new ZIP OutputStream filtering the underlying stream. @@ -360,14 +366,24 @@ public class ZipOutputStream extends FilterOutputStream { } /** - * Whether to create Unicode Extra Fields for all entries. + * Whether to create Unicode Extra Fields. * - *

    Defaults to false.

    + *

    Defaults to NEVER.

    */ - public void setCreateUnicodeExtraFields(boolean b) { + public void setCreateUnicodeExtraFields(UnicodeExtraFieldPolicy b) { createUnicodeExtraFields = b; } + /** + * Whether to fall back to UTF and the language encoding flag if + * the file name cannot be encoded using the specified encoding. + * + *

    Defaults to false.

    + */ + public void setFallbackToUTF8(boolean b) { + fallbackToUTF8 = b; + } + /** * Finishs writing the contents and closes this as well as the * underlying stream. @@ -665,31 +681,38 @@ public class ZipOutputStream extends FilterOutputStream { */ protected void writeLocalFileHeader(ZipEntry ze) throws IOException { - boolean encodable = this.zipEncoding.canEncode(ze.getName()); - ByteBuffer name = this.zipEncoding.encode(ze.getName()); + boolean encodable = zipEncoding.canEncode(ze.getName()); + ByteBuffer name; + if (!encodable && fallbackToUTF8) { + name = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(ze.getName()); + } else { + name = zipEncoding.encode(ze.getName()); + } - if (createUnicodeExtraFields) { + if (createUnicodeExtraFields != UnicodeExtraFieldPolicy.NEVER) { - /* if (!encodable) { -- FIXME decide what to*/ + if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS + || !encodable) { ze.addExtraField(new UnicodePathExtraField(ze.getName(), name.array(), name.arrayOffset(), name.limit())); - /* } */ + } String comm = ze.getComment(); if (comm != null && !"".equals(comm)) { boolean commentEncodable = this.zipEncoding.canEncode(comm); - /* if (!commentEncodable) { -- FIXME decide what to*/ + if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS + || !commentEncodable) { ByteBuffer commentB = this.zipEncoding.encode(comm); ze.addExtraField(new UnicodeCommentExtraField(comm, commentB.array(), commentB.arrayOffset(), commentB.limit()) ); - /* } */ + } } } @@ -701,7 +724,9 @@ public class ZipOutputStream extends FilterOutputStream { //store method in local variable to prevent multiple method calls final int zipMethod = ze.getMethod(); - writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod); + writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod, + !encodable + && fallbackToUTF8); written += WORD; // compression method @@ -786,7 +811,10 @@ public class ZipOutputStream extends FilterOutputStream { written += SHORT; final int zipMethod = ze.getMethod(); - writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod); + final boolean encodable = zipEncoding.canEncode(ze.getName()); + writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod, + !encodable + && fallbackToUTF8); written += WORD; // compression method @@ -808,7 +836,12 @@ public class ZipOutputStream extends FilterOutputStream { // CheckStyle:MagicNumber ON // file name length - ByteBuffer name = this.zipEncoding.encode(ze.getName()); + ByteBuffer name; + if (!encodable && fallbackToUTF8) { + name = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(ze.getName()); + } else { + name = zipEncoding.encode(ze.getName()); + } writeOut(ZipShort.getBytes(name.limit())); written += SHORT; @@ -822,7 +855,12 @@ public class ZipOutputStream extends FilterOutputStream { if (comm == null) { comm = ""; } - ByteBuffer commentB = this.zipEncoding.encode(comm); + ByteBuffer commentB; + if (!encodable && fallbackToUTF8) { + commentB = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(comm); + } else { + commentB = zipEncoding.encode(comm); + } writeOut(ZipShort.getBytes(commentB.limit())); written += SHORT; @@ -1000,12 +1038,14 @@ public class ZipOutputStream extends FilterOutputStream { } private void writeVersionNeededToExtractAndGeneralPurposeBits(final int - zipMethod) + zipMethod, + final boolean + utfFallback) throws IOException { // CheckStyle:MagicNumber OFF int versionNeededToExtract = 10; - int generalPurposeFlag = useEFS ? EFS_FLAG : 0; + int generalPurposeFlag = (useEFS || utfFallback) ? EFS_FLAG : 0; if (zipMethod == DEFLATED && raf == null) { // requires version 2 as we are going to store length info // in the data descriptor @@ -1020,4 +1060,35 @@ public class ZipOutputStream extends FilterOutputStream { // general purpose bit flag writeOut(ZipShort.getBytes(generalPurposeFlag)); } + + /** + * enum that represents the possible policies for creating Unicode + * extra fields. + */ + public static final class UnicodeExtraFieldPolicy { + /** + * Always create Unicode extra fields. + */ + public static final UnicodeExtraFieldPolicy ALWAYS = + new UnicodeExtraFieldPolicy("always"); + /** + * Never create Unicode extra fields. + */ + public static final UnicodeExtraFieldPolicy NEVER = + new UnicodeExtraFieldPolicy("never"); + /** + * Create Unicode extra fields for filenames that cannot be + * encoded using the specified encoding. + */ + public static final UnicodeExtraFieldPolicy NOT_ENCODEABLE = + new UnicodeExtraFieldPolicy("not encodeable"); + + private final String name; + private UnicodeExtraFieldPolicy(String n) { + name = n; + } + public String toString() { + return name; + } + } } diff --git a/src/main/org/apache/tools/zip/ZipShort.java b/src/main/org/apache/tools/zip/ZipShort.java index da6ca490a..b50b2fba2 100644 --- a/src/main/org/apache/tools/zip/ZipShort.java +++ b/src/main/org/apache/tools/zip/ZipShort.java @@ -136,7 +136,7 @@ public final class ZipShort implements Cloneable { public Object clone() { try { - return (ZipShort) super.clone(); + return super.clone(); } catch (CloneNotSupportedException cnfe) { // impossible throw new RuntimeException(cnfe); diff --git a/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java b/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java index 3e7edb1dd..ddd4f1e9e 100644 --- a/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java +++ b/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java @@ -122,7 +122,11 @@ public class UTF8ZipFilesTest extends TestCase { zos = new ZipOutputStream(file); zos.setEncoding(encoding); zos.setUseLanguageEncodingFlag(withEFS); - zos.setCreateUnicodeExtraFields(!withExplicitUnicodeExtra); + zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ? + ZipOutputStream + .UnicodeExtraFieldPolicy.NEVER + : ZipOutputStream + .UnicodeExtraFieldPolicy.ALWAYS); ZipEntry ze = new ZipEntry(OIL_BARREL_TXT); if (withExplicitUnicodeExtra