From a8c2ab18688af2c73127f69ab1a1855241bd282f Mon Sep 17 00:00:00 2001
From: Stefan Bodewig
Date: Fri, 27 Feb 2009 17:00:59 +0000
Subject: [PATCH] provide options for enhanced encoding support in ZIP,
document it. Many thanks to Wolfgang Glas who provided most of the test
input as well as patches the new support is based on.
git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748593 13f79535-47bb-0310-9956-ffa450edef68
---
WHATSNEW | 4 +
docs/manual/CoreTasks/ear.html | 23 +++-
docs/manual/CoreTasks/jar.html | 25 +++-
docs/manual/CoreTasks/unzip.html | 14 +-
docs/manual/CoreTasks/war.html | 23 +++-
docs/manual/CoreTasks/zip.html | 126 +++++++++++++++++-
.../org/apache/tools/ant/taskdefs/Expand.java | 11 +-
.../org/apache/tools/ant/taskdefs/Zip.java | 48 +++++++
src/main/org/apache/tools/zip/ZipFile.java | 8 +-
9 files changed, 270 insertions(+), 12 deletions(-)
diff --git a/WHATSNEW b/WHATSNEW
index ba1753145..c76a4c4f4 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -702,6 +702,10 @@ Other changes:
* CBZip2OutputStream now has a finish method separate from close.
Bugzilla Report 42713.
+ * the and family of tasks has new option to deal with
+ file name and comment encoding. Please see the zip tasks'
+ documentation for details.
+
Changes from Ant 1.7.0 TO Ant 1.7.1
=============================================
diff --git a/docs/manual/CoreTasks/ear.html b/docs/manual/CoreTasks/ear.html
index 5f8d116c1..d89de7cca 100644
--- a/docs/manual/CoreTasks/ear.html
+++ b/docs/manual/CoreTasks/ear.html
@@ -83,7 +83,9 @@ to a value other than its default, "add"
.
The character encoding to use for filenames
inside the archive. Defaults to UTF8. It is not
recommended to change this value as the created archive will most
- likely be unreadable for Java otherwise. |
+ likely be unreadable for Java otherwise.
+
See also the discussion in the
+ zip task page
No |
@@ -197,6 +199,25 @@ to a value other than its default, "add"
.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is false |
+
Nested elements
diff --git a/docs/manual/CoreTasks/jar.html b/docs/manual/CoreTasks/jar.html
index 68cb0c1ec..02908b389 100644
--- a/docs/manual/CoreTasks/jar.html
+++ b/docs/manual/CoreTasks/jar.html
@@ -125,8 +125,10 @@ to a value other than its default, "add"
.
encoding |
The character encoding to use for filenames
inside the archive. Defaults to UTF8. It is not
- recommended to change this value as the created archive will most
- likely be unreadable for Java otherwise. |
+ recommended to change this value as the created archive will
+ most likely be unreadable for Java otherwise.
+
See also the discussion in the
+ zip task page
No |
@@ -251,6 +253,25 @@ to a value other than its default, "add"
.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is false |
+
Nested elements
diff --git a/docs/manual/CoreTasks/unzip.html b/docs/manual/CoreTasks/unzip.html
index d9243a6ee..b9883fcc1 100644
--- a/docs/manual/CoreTasks/unzip.html
+++ b/docs/manual/CoreTasks/unzip.html
@@ -107,7 +107,9 @@ archive.
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html.
Defaults to "UTF8", use the magic value
native-encoding
for the platform's default character
- encoding.
+ encoding.
+
See also the discussion in the
+ zip task page
No |
@@ -125,6 +127,16 @@ archive.
any). since Ant 1.8.0
No, defaults to false |
+
+ scanForUnicodeExtraFields |
+ Note: This attribute is not available for
+ the untar task.
+ If the archive contains uncode extra fields then use them to set
+ the file names, ignoring the specified encoding.
+ See also the discussion in the
+ zip task page |
+ No, defaults to true |
+
Examples
diff --git a/docs/manual/CoreTasks/war.html b/docs/manual/CoreTasks/war.html
index 1b5aeef31..d4a42cf7e 100644
--- a/docs/manual/CoreTasks/war.html
+++ b/docs/manual/CoreTasks/war.html
@@ -116,7 +116,9 @@ to a value other than its default, "add"
.
The character encoding to use for filenames
inside the archive. Defaults to UTF8. It is not
recommended to change this value as the created archive will most
- likely be unreadable for Java otherwise. |
+ likely be unreadable for Java otherwise.
+
See also the discussion in the
+ zip task page
No |
@@ -214,6 +216,25 @@ to a value other than its default, "add"
.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is false |
+
Nested elements
diff --git a/docs/manual/CoreTasks/zip.html b/docs/manual/CoreTasks/zip.html
index 0e4d05d8b..fa0233afb 100644
--- a/docs/manual/CoreTasks/zip.html
+++ b/docs/manual/CoreTasks/zip.html
@@ -74,7 +74,8 @@ for filenames - this is consistent with the command line ZIP tools,
but causes problems if you try to open them from within Java and your
filenames contain non US-ASCII characters. Use the encoding attribute
and set it to UTF8 to create zip files that can safely be read by
-Java.
+Java. For a more complete discussion,
+see below
Starting with Ant 1.5.2, <zip>
can store Unix permissions
inside the archive (see description of the filemode and dirmode
@@ -149,7 +150,8 @@ archive.
The character encoding to use for filenames
inside the zip file. For a list of possible values see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html.
- Defaults to the platform's default character encoding. |
+ Defaults to the platform's default character encoding.
+
See also the discussion below
No |
@@ -241,7 +243,127 @@ archive.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion below |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Defaults to false. Since Ant 1.8.0.
+ See also the discussion below |
+ No, default is false |
+
+
+
+
+Traditionally the ZIP archive format uses CodePage 437 as encoding
+ for file name, which is not sufficient for many international
+ character sets.
+
+Over time different archivers have chosen different ways to work
+ around the limitation - the java.util.zip
packages
+ simply uses UTF-8 as its encoding for example.
+
+Ant has been offering the encoding attribute of the zip and unzip
+ task as a way to explicitly specify the encoding to use (or expect)
+ since Ant 1.4. It defaults to the platform's default encoding for
+ zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as
+ well as the unzip family of tasks.
+
+More recent versions of the ZIP specification introduce something
+ called the "language encoding flag" which can be used to
+ signal that a file name has been encoded using UTF-8. Starting with
+ Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set
+ this flag, if the encoding has been set to UTF-8. Our
+ interoperabilty tests with existing archivers didn't show any ill
+ effects (in fact, most archivers ignore the flag to date), but you
+ can turn off the "language encoding flag" by setting the attribute
+ useLanguageEncodingFlag
to false
on the
+ zip-task if you should encounter problems.
+
+The unzip (and similar tasks) -task will recognize the language
+ encoding flag and ignore the encoding set on the task if it has been
+ found.
+
+The InfoZIP developers have introduced new ZIP extra fields that
+ can be used to add an additional UTF-8 encoded file name to the
+ entry's metadata. Most archivers ignore these extra fields. The
+ zip family of tasks support an
+ option createUnicodeExtraFields
since Ant 1.8.0 which
+ makes Ant write these extra fields, it defaults to false since it
+ creates a bigger archive.
+
+The unzip-task will recognize the unicode extra fields by default
+ and read the file name information from them, unless you set the
+ optional attribute scanForUnicodeExtraFields
to
+ false.
+
+Recommendations for Interoperability
+
+The optimal setting of flags depends on the archivers you expect as
+ consumers/producers of the ZIP archives. Below are some test
+ results which may be superseeded with later versions of each
+ tool.
+
+
+ - The java.util.zip package used by the jar executable or to read
+ jars from your CLASSPATH reads and writes UTF-8 names, it doesn't
+ set or recognize any flags or unicode extra fields.
+
+ - 7Zip writes CodePage 437 by default but uses UTF-8 and the
+ language encoding flag when writing entries that cannot be encoded
+ as CodePage 437. It recognizes the language encoding flag when
+ reading and ignores the unicode extra fields.
+
+ - WinZIP writes CodePage 437 and uses unicode extra fields by
+ default. It recognizes the unicode extra field when reading and
+ ignores the language encoding flag.
+
+ - Windows' "compressed folder" feature doesn't recognize any flag
+ or extra field and creates archives using the platforms default
+ encoding - and expects archives to be in that encoding when reading
+ them.
+
+ - InfoZIP based tools can recognize and write both, it is a
+ compile time option and depends on the platform so your mileage
+ may vary.
+
+ - PKWARE zip tools recognize both and prefer the language encoding
+ flag. They create archives using CodePage 437 if possible and UTF-8
+ plus the language encoding flag for file names that cannot be
+ encoded as CodePage 437.
+
+
+So, what to do?
+
+If you are creating jars, then java.util.zip is your main
+ consumer. We recommend you set the encoding to UTF-8 and keep the
+ language encoding flag enabled. The flag won't help or hurt
+ java.util.zip but archivers that support it will show the correct
+ file names.
+
+For maximum interop it is probably best to set the encoding to
+ UTF-8, enable the language encoding flag and create unicode extra
+ fields when writing ZIPs. Such archives should be extracted
+ correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most
+ likely InfoZIP tools. They will be unusable with Windows'
+ "compressed folders" feature and bigger than archives without the
+ unicode extra fields, though.
+
+If Windows' "compressed folders" is your primary consumer, then
+ your best option is to explicitly set the encoding to the target
+ platform. You may want to enable creation of unicode extra fields
+ so the tools that support them will extract the file names
+ correctly.
+
Parameters specified as nested elements
any resource collection
diff --git a/src/main/org/apache/tools/ant/taskdefs/Expand.java b/src/main/org/apache/tools/ant/taskdefs/Expand.java
index 0b6243fb3..aa57176ae 100644
--- a/src/main/org/apache/tools/ant/taskdefs/Expand.java
+++ b/src/main/org/apache/tools/ant/taskdefs/Expand.java
@@ -68,6 +68,7 @@ public class Expand extends Task {
private boolean resourcesSpecified = false;
private boolean failOnEmptyArchive = false;
private boolean stripAbsolutePathSpec = false;
+ private boolean scanForUnicodeExtraFields = true;
private static final String NATIVE_ENCODING = "native-encoding";
@@ -166,7 +167,7 @@ public class Expand extends Task {
getLocation());
}
try {
- zf = new ZipFile(srcF, encoding);
+ zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields);
boolean empty = true;
Enumeration e = zf.getEntries();
while (e.hasMoreElements()) {
@@ -453,4 +454,12 @@ public class Expand extends Task {
stripAbsolutePathSpec = b;
}
+ /**
+ * Whether unicode extra fields will be used if present.
+ *
+ * @since Ant 1.8.0
+ */
+ public void setScanForUnicodeExtraFields(boolean b) {
+ scanForUnicodeExtraFields = b;
+ }
}
diff --git a/src/main/org/apache/tools/ant/taskdefs/Zip.java b/src/main/org/apache/tools/ant/taskdefs/Zip.java
index 8f1d0c382..83e5da83c 100644
--- a/src/main/org/apache/tools/ant/taskdefs/Zip.java
+++ b/src/main/org/apache/tools/ant/taskdefs/Zip.java
@@ -174,6 +174,20 @@ public class Zip extends MatchingTask {
*/
private boolean preserve0Permissions = false;
+ /**
+ * Whether to set the language encoding flag when creating the archive.
+ *
+ * @since Ant 1.8.0
+ */
+ private boolean useLanguageEncodingFlag = true;
+
+ /**
+ * Whether to set the language encoding flag when creating the archive.
+ *
+ * @since Ant 1.8.0
+ */
+ private boolean createUnicodeExtraFields = false;
+
/**
* This is the name/location of where to
* create the .zip file.
@@ -452,6 +466,38 @@ public class Zip extends MatchingTask {
return preserve0Permissions;
}
+ /**
+ * Whether to set the language encoding flag.
+ * @since Ant 1.8.0
+ */
+ public void setUseLanguageEncodingFlag(boolean b) {
+ useLanguageEncodingFlag = b;
+ }
+
+ /**
+ * Whether the language encoding flag will be used.
+ * @since Ant 1.8.0
+ */
+ public boolean getUseLanguageEnodingFlag() {
+ return useLanguageEncodingFlag;
+ }
+
+ /**
+ * Whether Unicode extra fields will be created.
+ * @since Ant 1.8.0
+ */
+ public void setCreateUnicodeExtraFields(boolean b) {
+ createUnicodeExtraFields = b;
+ }
+
+ /**
+ * Whether Unicode extra fields will be created.
+ * @since Ant 1.8.0
+ */
+ public boolean getCreateUnicodeExtraFields() {
+ return createUnicodeExtraFields;
+ }
+
/**
* validate and build
* @throws BuildException on error
@@ -540,6 +586,8 @@ public class Zip extends MatchingTask {
zOut = new ZipOutputStream(zipFile);
zOut.setEncoding(encoding);
+ zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag);
+ zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields);
zOut.setMethod(doCompress
? ZipOutputStream.DEFLATED : ZipOutputStream.STORED);
zOut.setLevel(level);
diff --git a/src/main/org/apache/tools/zip/ZipFile.java b/src/main/org/apache/tools/zip/ZipFile.java
index 661500a7c..bd83f6c6f 100644
--- a/src/main/org/apache/tools/zip/ZipFile.java
+++ b/src/main/org/apache/tools/zip/ZipFile.java
@@ -134,7 +134,7 @@ public class ZipFile {
/**
* Opens the given file for reading, assuming the specified
- * encoding for file names and ignoring unicode extra fields.
+ * encoding for file names, scanning unicode extra fields.
*
* @param name name of the archive.
* @param encoding the encoding to use for file names
@@ -142,12 +142,12 @@ public class ZipFile {
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(String name, String encoding) throws IOException {
- this(new File(name), encoding, false);
+ this(new File(name), encoding, true);
}
/**
* Opens the given file for reading, assuming the specified
- * encoding for file names and ignoring unicode extra fields.
+ * encoding for file names and scanning for unicode extra fields.
*
* @param f the archive.
* @param encoding the encoding to use for file names, use null
@@ -156,7 +156,7 @@ public class ZipFile {
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(File f, String encoding) throws IOException {
- this(f, encoding, false);
+ this(f, encoding, true);
}
/**