Browse Source

provide options for enhanced encoding support in ZIP, document it. Many thanks to Wolfgang Glas who provided most of the test input as well as patches the new support is based on.

git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748593 13f79535-47bb-0310-9956-ffa450edef68
master
Stefan Bodewig 16 years ago
parent
commit
a8c2ab1868
9 changed files with 270 additions and 12 deletions
  1. +4
    -0
      WHATSNEW
  2. +22
    -1
      docs/manual/CoreTasks/ear.html
  3. +23
    -2
      docs/manual/CoreTasks/jar.html
  4. +13
    -1
      docs/manual/CoreTasks/unzip.html
  5. +22
    -1
      docs/manual/CoreTasks/war.html
  6. +124
    -2
      docs/manual/CoreTasks/zip.html
  7. +10
    -1
      src/main/org/apache/tools/ant/taskdefs/Expand.java
  8. +48
    -0
      src/main/org/apache/tools/ant/taskdefs/Zip.java
  9. +4
    -4
      src/main/org/apache/tools/zip/ZipFile.java

+ 4
- 0
WHATSNEW View File

@@ -702,6 +702,10 @@ Other changes:
* CBZip2OutputStream now has a finish method separate from close.
Bugzilla Report 42713.

* the <zip> and <unzip> family of tasks has new option to deal with
file name and comment encoding. Please see the zip tasks'
documentation for details.

Changes from Ant 1.7.0 TO Ant 1.7.1
=============================================



+ 22
- 1
docs/manual/CoreTasks/ear.html View File

@@ -83,7 +83,9 @@ to a value other than its default, <code>&quot;add&quot;</code>.</b></p>
<td valign="top">The character encoding to use for filenames
inside the archive. Defaults to UTF8. <strong>It is not
recommended to change this value as the created archive will most
likely be unreadable for Java otherwise.</strong></td>
likely be unreadable for Java otherwise.</strong>
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No</td>
</tr>
<tr>
@@ -197,6 +199,25 @@ to a value other than its default, <code>&quot;add&quot;</code>.</b></p>
</td>
<td valign="top" align="center">No, default is false</td>
</tr>
<tr>
<td valign="top">useLanguageEncodingFlag</td>
<td valign="top">Whether to set the language encoding flag if the
encoding is UTF-8. This setting doesn't have any effect if the
encoding is not UTF-8.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is true</td>
</tr>
<tr>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is false</td>
</tr>
</table>

<h3>Nested elements</h3>


+ 23
- 2
docs/manual/CoreTasks/jar.html View File

@@ -125,8 +125,10 @@ to a value other than its default, <code>"add"</code>.</b></p>
<td valign="top">encoding</td>
<td valign="top">The character encoding to use for filenames
inside the archive. Defaults to UTF8. <strong>It is not
recommended to change this value as the created archive will most
likely be unreadable for Java otherwise.</strong></td>
recommended to change this value as the created archive will
most likely be unreadable for Java otherwise.</strong>
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No</td>
</tr>
<tr>
@@ -251,6 +253,25 @@ to a value other than its default, <code>"add"</code>.</b></p>
</td>
<td valign="top" align="center">No, default is false</td>
</tr>
<tr>
<td valign="top">useLanguageEncodingFlag</td>
<td valign="top">Whether to set the language encoding flag if the
encoding is UTF-8. This setting doesn't have any effect if the
encoding is not UTF-8.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is true</td>
</tr>
<tr>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is false</td>
</tr>
</table>

<h3>Nested elements</h3>


+ 13
- 1
docs/manual/CoreTasks/unzip.html View File

@@ -107,7 +107,9 @@ archive.</p>
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br>
Defaults to &quot;UTF8&quot;, use the magic value
<code>native-encoding</code> for the platform's default character
encoding.</td>
encoding.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No</td>
</tr>
<tr>
@@ -125,6 +127,16 @@ archive.</p>
any). <em>since Ant 1.8.0</em></td>
<td valign="top" align="center">No, defaults to false</td>
</tr>
<tr>
<td valign="top">scanForUnicodeExtraFields</td>
<td valign="top"><b>Note:</b> This attribute is not available for
the <code>untar</code> task.<br>
If the archive contains uncode extra fields then use them to set
the file names, ignoring the specified encoding.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No, defaults to true</td>
</tr>
</table>
<h3>Examples</h3>
<pre>


+ 22
- 1
docs/manual/CoreTasks/war.html View File

@@ -116,7 +116,9 @@ to a value other than its default, <code>&quot;add&quot;</code>.</b></p>
<td valign="top">The character encoding to use for filenames
inside the archive. Defaults to UTF8. <strong>It is not
recommended to change this value as the created archive will most
likely be unreadable for Java otherwise.</strong></td>
likely be unreadable for Java otherwise.</strong>
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No</td>
</tr>
<tr>
@@ -214,6 +216,25 @@ to a value other than its default, <code>&quot;add&quot;</code>.</b></p>
</td>
<td valign="top" align="center">No, default is false</td>
</tr>
<tr>
<td valign="top">useLanguageEncodingFlag</td>
<td valign="top">Whether to set the language encoding flag if the
encoding is UTF-8. This setting doesn't have any effect if the
encoding is not UTF-8.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is true</td>
</tr>
<tr>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is false</td>
</tr>
</table>

<h3>Nested elements</h3>


+ 124
- 2
docs/manual/CoreTasks/zip.html View File

@@ -74,7 +74,8 @@ for filenames - this is consistent with the command line ZIP tools,
but causes problems if you try to open them from within Java and your
filenames contain non US-ASCII characters. Use the encoding attribute
and set it to UTF8 to create zip files that can safely be read by
Java.</p>
Java. For a more complete discussion,
see <a href="#encoding">below</a></p>

<p>Starting with Ant 1.5.2, <code>&lt;zip&gt;</code> can store Unix permissions
inside the archive (see description of the filemode and dirmode
@@ -149,7 +150,8 @@ archive.</p>
<td valign="top">The character encoding to use for filenames
inside the zip file. For a list of possible values see <a
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
Defaults to the platform's default character encoding.</td>
Defaults to the platform's default character encoding.
<br/>See also the <a href="#encoding">discussion below</a></td>
<td align="center" valign="top">No</td>
</tr>
<tr>
@@ -241,7 +243,127 @@ archive.</p>
</td>
<td valign="top" align="center">No, default is false</td>
</tr>
<tr>
<td valign="top">useLanguageEncodingFlag</td>
<td valign="top">Whether to set the language encoding flag if the
encoding is UTF-8. This setting doesn't have any effect if the
encoding is not UTF-8.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="#encoding">discussion below</a></td>
<td align="center" valign="top">No, default is true</td>
</tr>
<tr>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
Defaults to false. <em>Since Ant 1.8.0</em>.
<br/>See also the <a href="#encoding">discussion below</a></td>
<td align="center" valign="top">No, default is false</td>
</tr>
</table>

<h3><a name="encoding">Encoding of File Names</a></h3>

<p>Traditionally the ZIP archive format uses CodePage 437 as encoding
for file name, which is not sufficient for many international
character sets.</p>

<p>Over time different archivers have chosen different ways to work
around the limitation - the <code>java.util.zip</code> packages
simply uses UTF-8 as its encoding for example.</p>

<p>Ant has been offering the encoding attribute of the zip and unzip
task as a way to explicitly specify the encoding to use (or expect)
since Ant 1.4. It defaults to the platform's default encoding for
zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as
well as the unzip family of tasks.</p>

<p>More recent versions of the ZIP specification introduce something
called the &quot;language encoding flag&quot; which can be used to
signal that a file name has been encoded using UTF-8. Starting with
Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set
this flag, if the encoding has been set to UTF-8. Our
interoperabilty tests with existing archivers didn't show any ill
effects (in fact, most archivers ignore the flag to date), but you
can turn off the "language encoding flag" by setting the attribute
<code>useLanguageEncodingFlag</code> to <code>false</code> on the
zip-task if you should encounter problems.</p>

<p>The unzip (and similar tasks) -task will recognize the language
encoding flag and ignore the encoding set on the task if it has been
found.</p>

<p>The InfoZIP developers have introduced new ZIP extra fields that
can be used to add an additional UTF-8 encoded file name to the
entry's metadata. Most archivers ignore these extra fields. The
zip family of tasks support an
option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which
makes Ant write these extra fields, it defaults to false since it
creates a bigger archive.</p>

<p>The unzip-task will recognize the unicode extra fields by default
and read the file name information from them, unless you set the
optional attribute <code>scanForUnicodeExtraFields</code> to
false.</p>

<h4>Recommendations for Interoperability</h4>

<p>The optimal setting of flags depends on the archivers you expect as
consumers/producers of the ZIP archives. Below are some test
results which may be superseeded with later versions of each
tool.</p>

<ul>
<li>The java.util.zip package used by the jar executable or to read
jars from your CLASSPATH reads and writes UTF-8 names, it doesn't
set or recognize any flags or unicode extra fields.</li>

<li>7Zip writes CodePage 437 by default but uses UTF-8 and the
language encoding flag when writing entries that cannot be encoded
as CodePage 437. It recognizes the language encoding flag when
reading and ignores the unicode extra fields.</li>

<li>WinZIP writes CodePage 437 and uses unicode extra fields by
default. It recognizes the unicode extra field when reading and
ignores the language encoding flag.</li>

<li>Windows' "compressed folder" feature doesn't recognize any flag
or extra field and creates archives using the platforms default
encoding - and expects archives to be in that encoding when reading
them.</li>

<li>InfoZIP based tools can recognize and write both, it is a
compile time option and depends on the platform so your mileage
may vary.</li>

<li>PKWARE zip tools recognize both and prefer the language encoding
flag. They create archives using CodePage 437 if possible and UTF-8
plus the language encoding flag for file names that cannot be
encoded as CodePage 437.</li>
</ul>

<p>So, what to do?</p>

<p>If you are creating jars, then java.util.zip is your main
consumer. We recommend you set the encoding to UTF-8 and keep the
language encoding flag enabled. The flag won't help or hurt
java.util.zip but archivers that support it will show the correct
file names.</p>

<p>For maximum interop it is probably best to set the encoding to
UTF-8, enable the language encoding flag and create unicode extra
fields when writing ZIPs. Such archives should be extracted
correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most
likely InfoZIP tools. They will be unusable with Windows'
"compressed folders" feature and bigger than archives without the
unicode extra fields, though.</p>

<p>If Windows' "compressed folders" is your primary consumer, then
your best option is to explicitly set the encoding to the target
platform. You may want to enable creation of unicode extra fields
so the tools that support them will extract the file names
correctly.</p>

<h3>Parameters specified as nested elements</h3>

<h4>any resource collection</h4>


+ 10
- 1
src/main/org/apache/tools/ant/taskdefs/Expand.java View File

@@ -68,6 +68,7 @@ public class Expand extends Task {
private boolean resourcesSpecified = false;
private boolean failOnEmptyArchive = false;
private boolean stripAbsolutePathSpec = false;
private boolean scanForUnicodeExtraFields = true;

private static final String NATIVE_ENCODING = "native-encoding";

@@ -166,7 +167,7 @@ public class Expand extends Task {
getLocation());
}
try {
zf = new ZipFile(srcF, encoding);
zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields);
boolean empty = true;
Enumeration e = zf.getEntries();
while (e.hasMoreElements()) {
@@ -453,4 +454,12 @@ public class Expand extends Task {
stripAbsolutePathSpec = b;
}

/**
* Whether unicode extra fields will be used if present.
*
* @since Ant 1.8.0
*/
public void setScanForUnicodeExtraFields(boolean b) {
scanForUnicodeExtraFields = b;
}
}

+ 48
- 0
src/main/org/apache/tools/ant/taskdefs/Zip.java View File

@@ -174,6 +174,20 @@ public class Zip extends MatchingTask {
*/
private boolean preserve0Permissions = false;

/**
* Whether to set the language encoding flag when creating the archive.
*
* @since Ant 1.8.0
*/
private boolean useLanguageEncodingFlag = true;

/**
* Whether to set the language encoding flag when creating the archive.
*
* @since Ant 1.8.0
*/
private boolean createUnicodeExtraFields = false;

/**
* This is the name/location of where to
* create the .zip file.
@@ -452,6 +466,38 @@ public class Zip extends MatchingTask {
return preserve0Permissions;
}

/**
* Whether to set the language encoding flag.
* @since Ant 1.8.0
*/
public void setUseLanguageEncodingFlag(boolean b) {
useLanguageEncodingFlag = b;
}

/**
* Whether the language encoding flag will be used.
* @since Ant 1.8.0
*/
public boolean getUseLanguageEnodingFlag() {
return useLanguageEncodingFlag;
}

/**
* Whether Unicode extra fields will be created.
* @since Ant 1.8.0
*/
public void setCreateUnicodeExtraFields(boolean b) {
createUnicodeExtraFields = b;
}

/**
* Whether Unicode extra fields will be created.
* @since Ant 1.8.0
*/
public boolean getCreateUnicodeExtraFields() {
return createUnicodeExtraFields;
}

/**
* validate and build
* @throws BuildException on error
@@ -540,6 +586,8 @@ public class Zip extends MatchingTask {
zOut = new ZipOutputStream(zipFile);

zOut.setEncoding(encoding);
zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag);
zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields);
zOut.setMethod(doCompress
? ZipOutputStream.DEFLATED : ZipOutputStream.STORED);
zOut.setLevel(level);


+ 4
- 4
src/main/org/apache/tools/zip/ZipFile.java View File

@@ -134,7 +134,7 @@ public class ZipFile {

/**
* Opens the given file for reading, assuming the specified
* encoding for file names and ignoring unicode extra fields.
* encoding for file names, scanning unicode extra fields.
*
* @param name name of the archive.
* @param encoding the encoding to use for file names
@@ -142,12 +142,12 @@ public class ZipFile {
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(String name, String encoding) throws IOException {
this(new File(name), encoding, false);
this(new File(name), encoding, true);
}

/**
* Opens the given file for reading, assuming the specified
* encoding for file names and ignoring unicode extra fields.
* encoding for file names and scanning for unicode extra fields.
*
* @param f the archive.
* @param encoding the encoding to use for file names, use null
@@ -156,7 +156,7 @@ public class ZipFile {
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(File f, String encoding) throws IOException {
this(f, encoding, false);
this(f, encoding, true);
}

/**


Loading…
Cancel
Save