git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748593 13f79535-47bb-0310-9956-ffa450edef68master
| @@ -702,6 +702,10 @@ Other changes: | |||
| * CBZip2OutputStream now has a finish method separate from close. | |||
| Bugzilla Report 42713. | |||
| * the <zip> and <unzip> family of tasks has new option to deal with | |||
| file name and comment encoding. Please see the zip tasks' | |||
| documentation for details. | |||
| Changes from Ant 1.7.0 TO Ant 1.7.1 | |||
| ============================================= | |||
| @@ -83,7 +83,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
| <td valign="top">The character encoding to use for filenames | |||
| inside the archive. Defaults to UTF8. <strong>It is not | |||
| recommended to change this value as the created archive will most | |||
| likely be unreadable for Java otherwise.</strong></td> | |||
| likely be unreadable for Java otherwise.</strong> | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td align="center" valign="top">No</td> | |||
| </tr> | |||
| <tr> | |||
| @@ -197,6 +199,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
| </td> | |||
| <td valign="top" align="center">No, default is false</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">useLanguageEncodingFlag</td> | |||
| <td valign="top">Whether to set the language encoding flag if the | |||
| encoding is UTF-8. This setting doesn't have any effect if the | |||
| encoding is not UTF-8. | |||
| <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td valign="top" align="center">No, default is true</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">createUnicodeExtraFields</td> | |||
| <td valign="top">Whether to create unicode extra fields to store | |||
| the file names a second time inside the entry's metadata. | |||
| <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td valign="top" align="center">No, default is false</td> | |||
| </tr> | |||
| </table> | |||
| <h3>Nested elements</h3> | |||
| @@ -125,8 +125,10 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
| <td valign="top">encoding</td> | |||
| <td valign="top">The character encoding to use for filenames | |||
| inside the archive. Defaults to UTF8. <strong>It is not | |||
| recommended to change this value as the created archive will most | |||
| likely be unreadable for Java otherwise.</strong></td> | |||
| recommended to change this value as the created archive will | |||
| most likely be unreadable for Java otherwise.</strong> | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td align="center" valign="top">No</td> | |||
| </tr> | |||
| <tr> | |||
| @@ -251,6 +253,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
| </td> | |||
| <td valign="top" align="center">No, default is false</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">useLanguageEncodingFlag</td> | |||
| <td valign="top">Whether to set the language encoding flag if the | |||
| encoding is UTF-8. This setting doesn't have any effect if the | |||
| encoding is not UTF-8. | |||
| <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td valign="top" align="center">No, default is true</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">createUnicodeExtraFields</td> | |||
| <td valign="top">Whether to create unicode extra fields to store | |||
| the file names a second time inside the entry's metadata. | |||
| <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td valign="top" align="center">No, default is false</td> | |||
| </tr> | |||
| </table> | |||
| <h3>Nested elements</h3> | |||
| @@ -107,7 +107,9 @@ archive.</p> | |||
| href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br> | |||
| Defaults to "UTF8", use the magic value | |||
| <code>native-encoding</code> for the platform's default character | |||
| encoding.</td> | |||
| encoding. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td align="center" valign="top">No</td> | |||
| </tr> | |||
| <tr> | |||
| @@ -125,6 +127,16 @@ archive.</p> | |||
| any). <em>since Ant 1.8.0</em></td> | |||
| <td valign="top" align="center">No, defaults to false</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">scanForUnicodeExtraFields</td> | |||
| <td valign="top"><b>Note:</b> This attribute is not available for | |||
| the <code>untar</code> task.<br> | |||
| If the archive contains uncode extra fields then use them to set | |||
| the file names, ignoring the specified encoding. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td align="center" valign="top">No, defaults to true</td> | |||
| </tr> | |||
| </table> | |||
| <h3>Examples</h3> | |||
| <pre> | |||
| @@ -116,7 +116,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
| <td valign="top">The character encoding to use for filenames | |||
| inside the archive. Defaults to UTF8. <strong>It is not | |||
| recommended to change this value as the created archive will most | |||
| likely be unreadable for Java otherwise.</strong></td> | |||
| likely be unreadable for Java otherwise.</strong> | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td align="center" valign="top">No</td> | |||
| </tr> | |||
| <tr> | |||
| @@ -214,6 +216,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
| </td> | |||
| <td valign="top" align="center">No, default is false</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">useLanguageEncodingFlag</td> | |||
| <td valign="top">Whether to set the language encoding flag if the | |||
| encoding is UTF-8. This setting doesn't have any effect if the | |||
| encoding is not UTF-8. | |||
| <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td valign="top" align="center">No, default is true</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">createUnicodeExtraFields</td> | |||
| <td valign="top">Whether to create unicode extra fields to store | |||
| the file names a second time inside the entry's metadata. | |||
| <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||
| zip task page</a></td> | |||
| <td valign="top" align="center">No, default is false</td> | |||
| </tr> | |||
| </table> | |||
| <h3>Nested elements</h3> | |||
| @@ -74,7 +74,8 @@ for filenames - this is consistent with the command line ZIP tools, | |||
| but causes problems if you try to open them from within Java and your | |||
| filenames contain non US-ASCII characters. Use the encoding attribute | |||
| and set it to UTF8 to create zip files that can safely be read by | |||
| Java.</p> | |||
| Java. For a more complete discussion, | |||
| see <a href="#encoding">below</a></p> | |||
| <p>Starting with Ant 1.5.2, <code><zip></code> can store Unix permissions | |||
| inside the archive (see description of the filemode and dirmode | |||
| @@ -149,7 +150,8 @@ archive.</p> | |||
| <td valign="top">The character encoding to use for filenames | |||
| inside the zip file. For a list of possible values see <a | |||
| href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. | |||
| Defaults to the platform's default character encoding.</td> | |||
| Defaults to the platform's default character encoding. | |||
| <br/>See also the <a href="#encoding">discussion below</a></td> | |||
| <td align="center" valign="top">No</td> | |||
| </tr> | |||
| <tr> | |||
| @@ -241,7 +243,127 @@ archive.</p> | |||
| </td> | |||
| <td valign="top" align="center">No, default is false</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">useLanguageEncodingFlag</td> | |||
| <td valign="top">Whether to set the language encoding flag if the | |||
| encoding is UTF-8. This setting doesn't have any effect if the | |||
| encoding is not UTF-8. | |||
| <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="#encoding">discussion below</a></td> | |||
| <td align="center" valign="top">No, default is true</td> | |||
| </tr> | |||
| <tr> | |||
| <td valign="top">createUnicodeExtraFields</td> | |||
| <td valign="top">Whether to create unicode extra fields to store | |||
| the file names a second time inside the entry's metadata. | |||
| Defaults to false. <em>Since Ant 1.8.0</em>. | |||
| <br/>See also the <a href="#encoding">discussion below</a></td> | |||
| <td align="center" valign="top">No, default is false</td> | |||
| </tr> | |||
| </table> | |||
| <h3><a name="encoding">Encoding of File Names</a></h3> | |||
| <p>Traditionally the ZIP archive format uses CodePage 437 as encoding | |||
| for file name, which is not sufficient for many international | |||
| character sets.</p> | |||
| <p>Over time different archivers have chosen different ways to work | |||
| around the limitation - the <code>java.util.zip</code> packages | |||
| simply uses UTF-8 as its encoding for example.</p> | |||
| <p>Ant has been offering the encoding attribute of the zip and unzip | |||
| task as a way to explicitly specify the encoding to use (or expect) | |||
| since Ant 1.4. It defaults to the platform's default encoding for | |||
| zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as | |||
| well as the unzip family of tasks.</p> | |||
| <p>More recent versions of the ZIP specification introduce something | |||
| called the "language encoding flag" which can be used to | |||
| signal that a file name has been encoded using UTF-8. Starting with | |||
| Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set | |||
| this flag, if the encoding has been set to UTF-8. Our | |||
| interoperabilty tests with existing archivers didn't show any ill | |||
| effects (in fact, most archivers ignore the flag to date), but you | |||
| can turn off the "language encoding flag" by setting the attribute | |||
| <code>useLanguageEncodingFlag</code> to <code>false</code> on the | |||
| zip-task if you should encounter problems.</p> | |||
| <p>The unzip (and similar tasks) -task will recognize the language | |||
| encoding flag and ignore the encoding set on the task if it has been | |||
| found.</p> | |||
| <p>The InfoZIP developers have introduced new ZIP extra fields that | |||
| can be used to add an additional UTF-8 encoded file name to the | |||
| entry's metadata. Most archivers ignore these extra fields. The | |||
| zip family of tasks support an | |||
| option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which | |||
| makes Ant write these extra fields, it defaults to false since it | |||
| creates a bigger archive.</p> | |||
| <p>The unzip-task will recognize the unicode extra fields by default | |||
| and read the file name information from them, unless you set the | |||
| optional attribute <code>scanForUnicodeExtraFields</code> to | |||
| false.</p> | |||
| <h4>Recommendations for Interoperability</h4> | |||
| <p>The optimal setting of flags depends on the archivers you expect as | |||
| consumers/producers of the ZIP archives. Below are some test | |||
| results which may be superseeded with later versions of each | |||
| tool.</p> | |||
| <ul> | |||
| <li>The java.util.zip package used by the jar executable or to read | |||
| jars from your CLASSPATH reads and writes UTF-8 names, it doesn't | |||
| set or recognize any flags or unicode extra fields.</li> | |||
| <li>7Zip writes CodePage 437 by default but uses UTF-8 and the | |||
| language encoding flag when writing entries that cannot be encoded | |||
| as CodePage 437. It recognizes the language encoding flag when | |||
| reading and ignores the unicode extra fields.</li> | |||
| <li>WinZIP writes CodePage 437 and uses unicode extra fields by | |||
| default. It recognizes the unicode extra field when reading and | |||
| ignores the language encoding flag.</li> | |||
| <li>Windows' "compressed folder" feature doesn't recognize any flag | |||
| or extra field and creates archives using the platforms default | |||
| encoding - and expects archives to be in that encoding when reading | |||
| them.</li> | |||
| <li>InfoZIP based tools can recognize and write both, it is a | |||
| compile time option and depends on the platform so your mileage | |||
| may vary.</li> | |||
| <li>PKWARE zip tools recognize both and prefer the language encoding | |||
| flag. They create archives using CodePage 437 if possible and UTF-8 | |||
| plus the language encoding flag for file names that cannot be | |||
| encoded as CodePage 437.</li> | |||
| </ul> | |||
| <p>So, what to do?</p> | |||
| <p>If you are creating jars, then java.util.zip is your main | |||
| consumer. We recommend you set the encoding to UTF-8 and keep the | |||
| language encoding flag enabled. The flag won't help or hurt | |||
| java.util.zip but archivers that support it will show the correct | |||
| file names.</p> | |||
| <p>For maximum interop it is probably best to set the encoding to | |||
| UTF-8, enable the language encoding flag and create unicode extra | |||
| fields when writing ZIPs. Such archives should be extracted | |||
| correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most | |||
| likely InfoZIP tools. They will be unusable with Windows' | |||
| "compressed folders" feature and bigger than archives without the | |||
| unicode extra fields, though.</p> | |||
| <p>If Windows' "compressed folders" is your primary consumer, then | |||
| your best option is to explicitly set the encoding to the target | |||
| platform. You may want to enable creation of unicode extra fields | |||
| so the tools that support them will extract the file names | |||
| correctly.</p> | |||
| <h3>Parameters specified as nested elements</h3> | |||
| <h4>any resource collection</h4> | |||
| @@ -68,6 +68,7 @@ public class Expand extends Task { | |||
| private boolean resourcesSpecified = false; | |||
| private boolean failOnEmptyArchive = false; | |||
| private boolean stripAbsolutePathSpec = false; | |||
| private boolean scanForUnicodeExtraFields = true; | |||
| private static final String NATIVE_ENCODING = "native-encoding"; | |||
| @@ -166,7 +167,7 @@ public class Expand extends Task { | |||
| getLocation()); | |||
| } | |||
| try { | |||
| zf = new ZipFile(srcF, encoding); | |||
| zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields); | |||
| boolean empty = true; | |||
| Enumeration e = zf.getEntries(); | |||
| while (e.hasMoreElements()) { | |||
| @@ -453,4 +454,12 @@ public class Expand extends Task { | |||
| stripAbsolutePathSpec = b; | |||
| } | |||
| /** | |||
| * Whether unicode extra fields will be used if present. | |||
| * | |||
| * @since Ant 1.8.0 | |||
| */ | |||
| public void setScanForUnicodeExtraFields(boolean b) { | |||
| scanForUnicodeExtraFields = b; | |||
| } | |||
| } | |||
| @@ -174,6 +174,20 @@ public class Zip extends MatchingTask { | |||
| */ | |||
| private boolean preserve0Permissions = false; | |||
| /** | |||
| * Whether to set the language encoding flag when creating the archive. | |||
| * | |||
| * @since Ant 1.8.0 | |||
| */ | |||
| private boolean useLanguageEncodingFlag = true; | |||
| /** | |||
| * Whether to set the language encoding flag when creating the archive. | |||
| * | |||
| * @since Ant 1.8.0 | |||
| */ | |||
| private boolean createUnicodeExtraFields = false; | |||
| /** | |||
| * This is the name/location of where to | |||
| * create the .zip file. | |||
| @@ -452,6 +466,38 @@ public class Zip extends MatchingTask { | |||
| return preserve0Permissions; | |||
| } | |||
| /** | |||
| * Whether to set the language encoding flag. | |||
| * @since Ant 1.8.0 | |||
| */ | |||
| public void setUseLanguageEncodingFlag(boolean b) { | |||
| useLanguageEncodingFlag = b; | |||
| } | |||
| /** | |||
| * Whether the language encoding flag will be used. | |||
| * @since Ant 1.8.0 | |||
| */ | |||
| public boolean getUseLanguageEnodingFlag() { | |||
| return useLanguageEncodingFlag; | |||
| } | |||
| /** | |||
| * Whether Unicode extra fields will be created. | |||
| * @since Ant 1.8.0 | |||
| */ | |||
| public void setCreateUnicodeExtraFields(boolean b) { | |||
| createUnicodeExtraFields = b; | |||
| } | |||
| /** | |||
| * Whether Unicode extra fields will be created. | |||
| * @since Ant 1.8.0 | |||
| */ | |||
| public boolean getCreateUnicodeExtraFields() { | |||
| return createUnicodeExtraFields; | |||
| } | |||
| /** | |||
| * validate and build | |||
| * @throws BuildException on error | |||
| @@ -540,6 +586,8 @@ public class Zip extends MatchingTask { | |||
| zOut = new ZipOutputStream(zipFile); | |||
| zOut.setEncoding(encoding); | |||
| zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag); | |||
| zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields); | |||
| zOut.setMethod(doCompress | |||
| ? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); | |||
| zOut.setLevel(level); | |||
| @@ -134,7 +134,7 @@ public class ZipFile { | |||
| /** | |||
| * Opens the given file for reading, assuming the specified | |||
| * encoding for file names and ignoring unicode extra fields. | |||
| * encoding for file names, scanning unicode extra fields. | |||
| * | |||
| * @param name name of the archive. | |||
| * @param encoding the encoding to use for file names | |||
| @@ -142,12 +142,12 @@ public class ZipFile { | |||
| * @throws IOException if an error occurs while reading the file. | |||
| */ | |||
| public ZipFile(String name, String encoding) throws IOException { | |||
| this(new File(name), encoding, false); | |||
| this(new File(name), encoding, true); | |||
| } | |||
| /** | |||
| * Opens the given file for reading, assuming the specified | |||
| * encoding for file names and ignoring unicode extra fields. | |||
| * encoding for file names and scanning for unicode extra fields. | |||
| * | |||
| * @param f the archive. | |||
| * @param encoding the encoding to use for file names, use null | |||
| @@ -156,7 +156,7 @@ public class ZipFile { | |||
| * @throws IOException if an error occurs while reading the file. | |||
| */ | |||
| public ZipFile(File f, String encoding) throws IOException { | |||
| this(f, encoding, false); | |||
| this(f, encoding, true); | |||
| } | |||
| /** | |||