diff --git a/WHATSNEW b/WHATSNEW
index ba1753145..c76a4c4f4 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -702,6 +702,10 @@ Other changes:
* CBZip2OutputStream now has a finish method separate from close.
Bugzilla Report 42713.
+ * the and family of tasks has new option to deal with
+ file name and comment encoding. Please see the zip tasks'
+ documentation for details.
+
Changes from Ant 1.7.0 TO Ant 1.7.1
=============================================
diff --git a/docs/manual/CoreTasks/ear.html b/docs/manual/CoreTasks/ear.html
index 5f8d116c1..d89de7cca 100644
--- a/docs/manual/CoreTasks/ear.html
+++ b/docs/manual/CoreTasks/ear.html
@@ -83,7 +83,9 @@ to a value other than its default, "add"
.
The character encoding to use for filenames
inside the archive. Defaults to UTF8. It is not
recommended to change this value as the created archive will most
- likely be unreadable for Java otherwise. |
+ likely be unreadable for Java otherwise.
+
See also the discussion in the
+ zip task page
No |
@@ -197,6 +199,25 @@ to a value other than its default, "add"
.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is false |
+
Nested elements
diff --git a/docs/manual/CoreTasks/jar.html b/docs/manual/CoreTasks/jar.html
index 68cb0c1ec..02908b389 100644
--- a/docs/manual/CoreTasks/jar.html
+++ b/docs/manual/CoreTasks/jar.html
@@ -125,8 +125,10 @@ to a value other than its default, "add"
.
encoding |
The character encoding to use for filenames
inside the archive. Defaults to UTF8. It is not
- recommended to change this value as the created archive will most
- likely be unreadable for Java otherwise. |
+ recommended to change this value as the created archive will
+ most likely be unreadable for Java otherwise.
+
See also the discussion in the
+ zip task page
No |
@@ -251,6 +253,25 @@ to a value other than its default, "add"
.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is false |
+
Nested elements
diff --git a/docs/manual/CoreTasks/unzip.html b/docs/manual/CoreTasks/unzip.html
index d9243a6ee..b9883fcc1 100644
--- a/docs/manual/CoreTasks/unzip.html
+++ b/docs/manual/CoreTasks/unzip.html
@@ -107,7 +107,9 @@ archive.
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html.
Defaults to "UTF8", use the magic value
native-encoding
for the platform's default character
- encoding.
+ encoding.
+
See also the discussion in the
+ zip task page
No |
@@ -125,6 +127,16 @@ archive.
any). since Ant 1.8.0
No, defaults to false |
+
+ scanForUnicodeExtraFields |
+ Note: This attribute is not available for
+ the untar task.
+ If the archive contains uncode extra fields then use them to set
+ the file names, ignoring the specified encoding.
+ See also the discussion in the
+ zip task page |
+ No, defaults to true |
+
Examples
diff --git a/docs/manual/CoreTasks/war.html b/docs/manual/CoreTasks/war.html
index 1b5aeef31..d4a42cf7e 100644
--- a/docs/manual/CoreTasks/war.html
+++ b/docs/manual/CoreTasks/war.html
@@ -116,7 +116,9 @@ to a value other than its default, "add"
.
The character encoding to use for filenames
inside the archive. Defaults to UTF8. It is not
recommended to change this value as the created archive will most
- likely be unreadable for Java otherwise. |
+ likely be unreadable for Java otherwise.
+
See also the discussion in the
+ zip task page
No |
@@ -214,6 +216,25 @@ to a value other than its default, "add"
.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Since Ant 1.8.0.
+ See also the discussion in the
+ zip task page |
+ No, default is false |
+
Nested elements
diff --git a/docs/manual/CoreTasks/zip.html b/docs/manual/CoreTasks/zip.html
index 0e4d05d8b..fa0233afb 100644
--- a/docs/manual/CoreTasks/zip.html
+++ b/docs/manual/CoreTasks/zip.html
@@ -74,7 +74,8 @@ for filenames - this is consistent with the command line ZIP tools,
but causes problems if you try to open them from within Java and your
filenames contain non US-ASCII characters. Use the encoding attribute
and set it to UTF8 to create zip files that can safely be read by
-Java.
+Java. For a more complete discussion,
+see below
Starting with Ant 1.5.2, <zip>
can store Unix permissions
inside the archive (see description of the filemode and dirmode
@@ -149,7 +150,8 @@ archive.
The character encoding to use for filenames
inside the zip file. For a list of possible values see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html.
- Defaults to the platform's default character encoding. |
+ Defaults to the platform's default character encoding.
+
See also the discussion below
No |
@@ -241,7 +243,127 @@ archive.
No, default is false |
+
+ useLanguageEncodingFlag |
+ Whether to set the language encoding flag if the
+ encoding is UTF-8. This setting doesn't have any effect if the
+ encoding is not UTF-8.
+ Since Ant 1.8.0.
+ See also the discussion below |
+ No, default is true |
+
+
+ createUnicodeExtraFields |
+ Whether to create unicode extra fields to store
+ the file names a second time inside the entry's metadata.
+ Defaults to false. Since Ant 1.8.0.
+ See also the discussion below |
+ No, default is false |
+
+
+
+
+Traditionally the ZIP archive format uses CodePage 437 as encoding
+ for file name, which is not sufficient for many international
+ character sets.
+
+Over time different archivers have chosen different ways to work
+ around the limitation - the java.util.zip
packages
+ simply uses UTF-8 as its encoding for example.
+
+Ant has been offering the encoding attribute of the zip and unzip
+ task as a way to explicitly specify the encoding to use (or expect)
+ since Ant 1.4. It defaults to the platform's default encoding for
+ zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as
+ well as the unzip family of tasks.
+
+More recent versions of the ZIP specification introduce something
+ called the "language encoding flag" which can be used to
+ signal that a file name has been encoded using UTF-8. Starting with
+ Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set
+ this flag, if the encoding has been set to UTF-8. Our
+ interoperabilty tests with existing archivers didn't show any ill
+ effects (in fact, most archivers ignore the flag to date), but you
+ can turn off the "language encoding flag" by setting the attribute
+ useLanguageEncodingFlag
to false
on the
+ zip-task if you should encounter problems.
+
+The unzip (and similar tasks) -task will recognize the language
+ encoding flag and ignore the encoding set on the task if it has been
+ found.
+
+The InfoZIP developers have introduced new ZIP extra fields that
+ can be used to add an additional UTF-8 encoded file name to the
+ entry's metadata. Most archivers ignore these extra fields. The
+ zip family of tasks support an
+ option createUnicodeExtraFields
since Ant 1.8.0 which
+ makes Ant write these extra fields, it defaults to false since it
+ creates a bigger archive.
+
+The unzip-task will recognize the unicode extra fields by default
+ and read the file name information from them, unless you set the
+ optional attribute scanForUnicodeExtraFields
to
+ false.
+
+Recommendations for Interoperability
+
+The optimal setting of flags depends on the archivers you expect as
+ consumers/producers of the ZIP archives. Below are some test
+ results which may be superseeded with later versions of each
+ tool.
+
+
+ - The java.util.zip package used by the jar executable or to read
+ jars from your CLASSPATH reads and writes UTF-8 names, it doesn't
+ set or recognize any flags or unicode extra fields.
+
+ - 7Zip writes CodePage 437 by default but uses UTF-8 and the
+ language encoding flag when writing entries that cannot be encoded
+ as CodePage 437. It recognizes the language encoding flag when
+ reading and ignores the unicode extra fields.
+
+ - WinZIP writes CodePage 437 and uses unicode extra fields by
+ default. It recognizes the unicode extra field when reading and
+ ignores the language encoding flag.
+
+ - Windows' "compressed folder" feature doesn't recognize any flag
+ or extra field and creates archives using the platforms default
+ encoding - and expects archives to be in that encoding when reading
+ them.
+
+ - InfoZIP based tools can recognize and write both, it is a
+ compile time option and depends on the platform so your mileage
+ may vary.
+
+ - PKWARE zip tools recognize both and prefer the language encoding
+ flag. They create archives using CodePage 437 if possible and UTF-8
+ plus the language encoding flag for file names that cannot be
+ encoded as CodePage 437.
+
+
+So, what to do?
+
+If you are creating jars, then java.util.zip is your main
+ consumer. We recommend you set the encoding to UTF-8 and keep the
+ language encoding flag enabled. The flag won't help or hurt
+ java.util.zip but archivers that support it will show the correct
+ file names.
+
+For maximum interop it is probably best to set the encoding to
+ UTF-8, enable the language encoding flag and create unicode extra
+ fields when writing ZIPs. Such archives should be extracted
+ correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most
+ likely InfoZIP tools. They will be unusable with Windows'
+ "compressed folders" feature and bigger than archives without the
+ unicode extra fields, though.
+
+If Windows' "compressed folders" is your primary consumer, then
+ your best option is to explicitly set the encoding to the target
+ platform. You may want to enable creation of unicode extra fields
+ so the tools that support them will extract the file names
+ correctly.
+
Parameters specified as nested elements
any resource collection
diff --git a/src/main/org/apache/tools/ant/taskdefs/Expand.java b/src/main/org/apache/tools/ant/taskdefs/Expand.java
index 0b6243fb3..aa57176ae 100644
--- a/src/main/org/apache/tools/ant/taskdefs/Expand.java
+++ b/src/main/org/apache/tools/ant/taskdefs/Expand.java
@@ -68,6 +68,7 @@ public class Expand extends Task {
private boolean resourcesSpecified = false;
private boolean failOnEmptyArchive = false;
private boolean stripAbsolutePathSpec = false;
+ private boolean scanForUnicodeExtraFields = true;
private static final String NATIVE_ENCODING = "native-encoding";
@@ -166,7 +167,7 @@ public class Expand extends Task {
getLocation());
}
try {
- zf = new ZipFile(srcF, encoding);
+ zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields);
boolean empty = true;
Enumeration e = zf.getEntries();
while (e.hasMoreElements()) {
@@ -453,4 +454,12 @@ public class Expand extends Task {
stripAbsolutePathSpec = b;
}
+ /**
+ * Whether unicode extra fields will be used if present.
+ *
+ * @since Ant 1.8.0
+ */
+ public void setScanForUnicodeExtraFields(boolean b) {
+ scanForUnicodeExtraFields = b;
+ }
}
diff --git a/src/main/org/apache/tools/ant/taskdefs/Zip.java b/src/main/org/apache/tools/ant/taskdefs/Zip.java
index 8f1d0c382..83e5da83c 100644
--- a/src/main/org/apache/tools/ant/taskdefs/Zip.java
+++ b/src/main/org/apache/tools/ant/taskdefs/Zip.java
@@ -174,6 +174,20 @@ public class Zip extends MatchingTask {
*/
private boolean preserve0Permissions = false;
+ /**
+ * Whether to set the language encoding flag when creating the archive.
+ *
+ * @since Ant 1.8.0
+ */
+ private boolean useLanguageEncodingFlag = true;
+
+ /**
+ * Whether to set the language encoding flag when creating the archive.
+ *
+ * @since Ant 1.8.0
+ */
+ private boolean createUnicodeExtraFields = false;
+
/**
* This is the name/location of where to
* create the .zip file.
@@ -452,6 +466,38 @@ public class Zip extends MatchingTask {
return preserve0Permissions;
}
+ /**
+ * Whether to set the language encoding flag.
+ * @since Ant 1.8.0
+ */
+ public void setUseLanguageEncodingFlag(boolean b) {
+ useLanguageEncodingFlag = b;
+ }
+
+ /**
+ * Whether the language encoding flag will be used.
+ * @since Ant 1.8.0
+ */
+ public boolean getUseLanguageEnodingFlag() {
+ return useLanguageEncodingFlag;
+ }
+
+ /**
+ * Whether Unicode extra fields will be created.
+ * @since Ant 1.8.0
+ */
+ public void setCreateUnicodeExtraFields(boolean b) {
+ createUnicodeExtraFields = b;
+ }
+
+ /**
+ * Whether Unicode extra fields will be created.
+ * @since Ant 1.8.0
+ */
+ public boolean getCreateUnicodeExtraFields() {
+ return createUnicodeExtraFields;
+ }
+
/**
* validate and build
* @throws BuildException on error
@@ -540,6 +586,8 @@ public class Zip extends MatchingTask {
zOut = new ZipOutputStream(zipFile);
zOut.setEncoding(encoding);
+ zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag);
+ zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields);
zOut.setMethod(doCompress
? ZipOutputStream.DEFLATED : ZipOutputStream.STORED);
zOut.setLevel(level);
diff --git a/src/main/org/apache/tools/zip/ZipFile.java b/src/main/org/apache/tools/zip/ZipFile.java
index 661500a7c..bd83f6c6f 100644
--- a/src/main/org/apache/tools/zip/ZipFile.java
+++ b/src/main/org/apache/tools/zip/ZipFile.java
@@ -134,7 +134,7 @@ public class ZipFile {
/**
* Opens the given file for reading, assuming the specified
- * encoding for file names and ignoring unicode extra fields.
+ * encoding for file names, scanning unicode extra fields.
*
* @param name name of the archive.
* @param encoding the encoding to use for file names
@@ -142,12 +142,12 @@ public class ZipFile {
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(String name, String encoding) throws IOException {
- this(new File(name), encoding, false);
+ this(new File(name), encoding, true);
}
/**
* Opens the given file for reading, assuming the specified
- * encoding for file names and ignoring unicode extra fields.
+ * encoding for file names and scanning for unicode extra fields.
*
* @param f the archive.
* @param encoding the encoding to use for file names, use null
@@ -156,7 +156,7 @@ public class ZipFile {
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(File f, String encoding) throws IOException {
- this(f, encoding, false);
+ this(f, encoding, true);
}
/**