git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748593 13f79535-47bb-0310-9956-ffa450edef68master
| @@ -702,6 +702,10 @@ Other changes: | |||||
| * CBZip2OutputStream now has a finish method separate from close. | * CBZip2OutputStream now has a finish method separate from close. | ||||
| Bugzilla Report 42713. | Bugzilla Report 42713. | ||||
| * the <zip> and <unzip> family of tasks has new option to deal with | |||||
| file name and comment encoding. Please see the zip tasks' | |||||
| documentation for details. | |||||
| Changes from Ant 1.7.0 TO Ant 1.7.1 | Changes from Ant 1.7.0 TO Ant 1.7.1 | ||||
| ============================================= | ============================================= | ||||
| @@ -83,7 +83,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
| <td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
| inside the archive. Defaults to UTF8. <strong>It is not | inside the archive. Defaults to UTF8. <strong>It is not | ||||
| recommended to change this value as the created archive will most | recommended to change this value as the created archive will most | ||||
| likely be unreadable for Java otherwise.</strong></td> | |||||
| likely be unreadable for Java otherwise.</strong> | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
| </tr> | </tr> | ||||
| <tr> | <tr> | ||||
| @@ -197,6 +199,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
| </td> | </td> | ||||
| <td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
| </tr> | </tr> | ||||
| <tr> | |||||
| <td valign="top">useLanguageEncodingFlag</td> | |||||
| <td valign="top">Whether to set the language encoding flag if the | |||||
| encoding is UTF-8. This setting doesn't have any effect if the | |||||
| encoding is not UTF-8. | |||||
| <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td valign="top" align="center">No, default is true</td> | |||||
| </tr> | |||||
| <tr> | |||||
| <td valign="top">createUnicodeExtraFields</td> | |||||
| <td valign="top">Whether to create unicode extra fields to store | |||||
| the file names a second time inside the entry's metadata. | |||||
| <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td valign="top" align="center">No, default is false</td> | |||||
| </tr> | |||||
| </table> | </table> | ||||
| <h3>Nested elements</h3> | <h3>Nested elements</h3> | ||||
| @@ -125,8 +125,10 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
| <td valign="top">encoding</td> | <td valign="top">encoding</td> | ||||
| <td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
| inside the archive. Defaults to UTF8. <strong>It is not | inside the archive. Defaults to UTF8. <strong>It is not | ||||
| recommended to change this value as the created archive will most | |||||
| likely be unreadable for Java otherwise.</strong></td> | |||||
| recommended to change this value as the created archive will | |||||
| most likely be unreadable for Java otherwise.</strong> | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
| </tr> | </tr> | ||||
| <tr> | <tr> | ||||
| @@ -251,6 +253,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
| </td> | </td> | ||||
| <td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
| </tr> | </tr> | ||||
| <tr> | |||||
| <td valign="top">useLanguageEncodingFlag</td> | |||||
| <td valign="top">Whether to set the language encoding flag if the | |||||
| encoding is UTF-8. This setting doesn't have any effect if the | |||||
| encoding is not UTF-8. | |||||
| <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td valign="top" align="center">No, default is true</td> | |||||
| </tr> | |||||
| <tr> | |||||
| <td valign="top">createUnicodeExtraFields</td> | |||||
| <td valign="top">Whether to create unicode extra fields to store | |||||
| the file names a second time inside the entry's metadata. | |||||
| <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td valign="top" align="center">No, default is false</td> | |||||
| </tr> | |||||
| </table> | </table> | ||||
| <h3>Nested elements</h3> | <h3>Nested elements</h3> | ||||
| @@ -107,7 +107,9 @@ archive.</p> | |||||
| href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br> | href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br> | ||||
| Defaults to "UTF8", use the magic value | Defaults to "UTF8", use the magic value | ||||
| <code>native-encoding</code> for the platform's default character | <code>native-encoding</code> for the platform's default character | ||||
| encoding.</td> | |||||
| encoding. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
| </tr> | </tr> | ||||
| <tr> | <tr> | ||||
| @@ -125,6 +127,16 @@ archive.</p> | |||||
| any). <em>since Ant 1.8.0</em></td> | any). <em>since Ant 1.8.0</em></td> | ||||
| <td valign="top" align="center">No, defaults to false</td> | <td valign="top" align="center">No, defaults to false</td> | ||||
| </tr> | </tr> | ||||
| <tr> | |||||
| <td valign="top">scanForUnicodeExtraFields</td> | |||||
| <td valign="top"><b>Note:</b> This attribute is not available for | |||||
| the <code>untar</code> task.<br> | |||||
| If the archive contains uncode extra fields then use them to set | |||||
| the file names, ignoring the specified encoding. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td align="center" valign="top">No, defaults to true</td> | |||||
| </tr> | |||||
| </table> | </table> | ||||
| <h3>Examples</h3> | <h3>Examples</h3> | ||||
| <pre> | <pre> | ||||
| @@ -116,7 +116,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
| <td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
| inside the archive. Defaults to UTF8. <strong>It is not | inside the archive. Defaults to UTF8. <strong>It is not | ||||
| recommended to change this value as the created archive will most | recommended to change this value as the created archive will most | ||||
| likely be unreadable for Java otherwise.</strong></td> | |||||
| likely be unreadable for Java otherwise.</strong> | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
| </tr> | </tr> | ||||
| <tr> | <tr> | ||||
| @@ -214,6 +216,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
| </td> | </td> | ||||
| <td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
| </tr> | </tr> | ||||
| <tr> | |||||
| <td valign="top">useLanguageEncodingFlag</td> | |||||
| <td valign="top">Whether to set the language encoding flag if the | |||||
| encoding is UTF-8. This setting doesn't have any effect if the | |||||
| encoding is not UTF-8. | |||||
| <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td valign="top" align="center">No, default is true</td> | |||||
| </tr> | |||||
| <tr> | |||||
| <td valign="top">createUnicodeExtraFields</td> | |||||
| <td valign="top">Whether to create unicode extra fields to store | |||||
| the file names a second time inside the entry's metadata. | |||||
| <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="zip.html#encoding">discussion in the | |||||
| zip task page</a></td> | |||||
| <td valign="top" align="center">No, default is false</td> | |||||
| </tr> | |||||
| </table> | </table> | ||||
| <h3>Nested elements</h3> | <h3>Nested elements</h3> | ||||
| @@ -74,7 +74,8 @@ for filenames - this is consistent with the command line ZIP tools, | |||||
| but causes problems if you try to open them from within Java and your | but causes problems if you try to open them from within Java and your | ||||
| filenames contain non US-ASCII characters. Use the encoding attribute | filenames contain non US-ASCII characters. Use the encoding attribute | ||||
| and set it to UTF8 to create zip files that can safely be read by | and set it to UTF8 to create zip files that can safely be read by | ||||
| Java.</p> | |||||
| Java. For a more complete discussion, | |||||
| see <a href="#encoding">below</a></p> | |||||
| <p>Starting with Ant 1.5.2, <code><zip></code> can store Unix permissions | <p>Starting with Ant 1.5.2, <code><zip></code> can store Unix permissions | ||||
| inside the archive (see description of the filemode and dirmode | inside the archive (see description of the filemode and dirmode | ||||
| @@ -149,7 +150,8 @@ archive.</p> | |||||
| <td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
| inside the zip file. For a list of possible values see <a | inside the zip file. For a list of possible values see <a | ||||
| href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. | href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. | ||||
| Defaults to the platform's default character encoding.</td> | |||||
| Defaults to the platform's default character encoding. | |||||
| <br/>See also the <a href="#encoding">discussion below</a></td> | |||||
| <td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
| </tr> | </tr> | ||||
| <tr> | <tr> | ||||
| @@ -241,7 +243,127 @@ archive.</p> | |||||
| </td> | </td> | ||||
| <td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
| </tr> | </tr> | ||||
| <tr> | |||||
| <td valign="top">useLanguageEncodingFlag</td> | |||||
| <td valign="top">Whether to set the language encoding flag if the | |||||
| encoding is UTF-8. This setting doesn't have any effect if the | |||||
| encoding is not UTF-8. | |||||
| <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="#encoding">discussion below</a></td> | |||||
| <td align="center" valign="top">No, default is true</td> | |||||
| </tr> | |||||
| <tr> | |||||
| <td valign="top">createUnicodeExtraFields</td> | |||||
| <td valign="top">Whether to create unicode extra fields to store | |||||
| the file names a second time inside the entry's metadata. | |||||
| Defaults to false. <em>Since Ant 1.8.0</em>. | |||||
| <br/>See also the <a href="#encoding">discussion below</a></td> | |||||
| <td align="center" valign="top">No, default is false</td> | |||||
| </tr> | |||||
| </table> | </table> | ||||
| <h3><a name="encoding">Encoding of File Names</a></h3> | |||||
| <p>Traditionally the ZIP archive format uses CodePage 437 as encoding | |||||
| for file name, which is not sufficient for many international | |||||
| character sets.</p> | |||||
| <p>Over time different archivers have chosen different ways to work | |||||
| around the limitation - the <code>java.util.zip</code> packages | |||||
| simply uses UTF-8 as its encoding for example.</p> | |||||
| <p>Ant has been offering the encoding attribute of the zip and unzip | |||||
| task as a way to explicitly specify the encoding to use (or expect) | |||||
| since Ant 1.4. It defaults to the platform's default encoding for | |||||
| zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as | |||||
| well as the unzip family of tasks.</p> | |||||
| <p>More recent versions of the ZIP specification introduce something | |||||
| called the "language encoding flag" which can be used to | |||||
| signal that a file name has been encoded using UTF-8. Starting with | |||||
| Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set | |||||
| this flag, if the encoding has been set to UTF-8. Our | |||||
| interoperabilty tests with existing archivers didn't show any ill | |||||
| effects (in fact, most archivers ignore the flag to date), but you | |||||
| can turn off the "language encoding flag" by setting the attribute | |||||
| <code>useLanguageEncodingFlag</code> to <code>false</code> on the | |||||
| zip-task if you should encounter problems.</p> | |||||
| <p>The unzip (and similar tasks) -task will recognize the language | |||||
| encoding flag and ignore the encoding set on the task if it has been | |||||
| found.</p> | |||||
| <p>The InfoZIP developers have introduced new ZIP extra fields that | |||||
| can be used to add an additional UTF-8 encoded file name to the | |||||
| entry's metadata. Most archivers ignore these extra fields. The | |||||
| zip family of tasks support an | |||||
| option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which | |||||
| makes Ant write these extra fields, it defaults to false since it | |||||
| creates a bigger archive.</p> | |||||
| <p>The unzip-task will recognize the unicode extra fields by default | |||||
| and read the file name information from them, unless you set the | |||||
| optional attribute <code>scanForUnicodeExtraFields</code> to | |||||
| false.</p> | |||||
| <h4>Recommendations for Interoperability</h4> | |||||
| <p>The optimal setting of flags depends on the archivers you expect as | |||||
| consumers/producers of the ZIP archives. Below are some test | |||||
| results which may be superseeded with later versions of each | |||||
| tool.</p> | |||||
| <ul> | |||||
| <li>The java.util.zip package used by the jar executable or to read | |||||
| jars from your CLASSPATH reads and writes UTF-8 names, it doesn't | |||||
| set or recognize any flags or unicode extra fields.</li> | |||||
| <li>7Zip writes CodePage 437 by default but uses UTF-8 and the | |||||
| language encoding flag when writing entries that cannot be encoded | |||||
| as CodePage 437. It recognizes the language encoding flag when | |||||
| reading and ignores the unicode extra fields.</li> | |||||
| <li>WinZIP writes CodePage 437 and uses unicode extra fields by | |||||
| default. It recognizes the unicode extra field when reading and | |||||
| ignores the language encoding flag.</li> | |||||
| <li>Windows' "compressed folder" feature doesn't recognize any flag | |||||
| or extra field and creates archives using the platforms default | |||||
| encoding - and expects archives to be in that encoding when reading | |||||
| them.</li> | |||||
| <li>InfoZIP based tools can recognize and write both, it is a | |||||
| compile time option and depends on the platform so your mileage | |||||
| may vary.</li> | |||||
| <li>PKWARE zip tools recognize both and prefer the language encoding | |||||
| flag. They create archives using CodePage 437 if possible and UTF-8 | |||||
| plus the language encoding flag for file names that cannot be | |||||
| encoded as CodePage 437.</li> | |||||
| </ul> | |||||
| <p>So, what to do?</p> | |||||
| <p>If you are creating jars, then java.util.zip is your main | |||||
| consumer. We recommend you set the encoding to UTF-8 and keep the | |||||
| language encoding flag enabled. The flag won't help or hurt | |||||
| java.util.zip but archivers that support it will show the correct | |||||
| file names.</p> | |||||
| <p>For maximum interop it is probably best to set the encoding to | |||||
| UTF-8, enable the language encoding flag and create unicode extra | |||||
| fields when writing ZIPs. Such archives should be extracted | |||||
| correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most | |||||
| likely InfoZIP tools. They will be unusable with Windows' | |||||
| "compressed folders" feature and bigger than archives without the | |||||
| unicode extra fields, though.</p> | |||||
| <p>If Windows' "compressed folders" is your primary consumer, then | |||||
| your best option is to explicitly set the encoding to the target | |||||
| platform. You may want to enable creation of unicode extra fields | |||||
| so the tools that support them will extract the file names | |||||
| correctly.</p> | |||||
| <h3>Parameters specified as nested elements</h3> | <h3>Parameters specified as nested elements</h3> | ||||
| <h4>any resource collection</h4> | <h4>any resource collection</h4> | ||||
| @@ -68,6 +68,7 @@ public class Expand extends Task { | |||||
| private boolean resourcesSpecified = false; | private boolean resourcesSpecified = false; | ||||
| private boolean failOnEmptyArchive = false; | private boolean failOnEmptyArchive = false; | ||||
| private boolean stripAbsolutePathSpec = false; | private boolean stripAbsolutePathSpec = false; | ||||
| private boolean scanForUnicodeExtraFields = true; | |||||
| private static final String NATIVE_ENCODING = "native-encoding"; | private static final String NATIVE_ENCODING = "native-encoding"; | ||||
| @@ -166,7 +167,7 @@ public class Expand extends Task { | |||||
| getLocation()); | getLocation()); | ||||
| } | } | ||||
| try { | try { | ||||
| zf = new ZipFile(srcF, encoding); | |||||
| zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields); | |||||
| boolean empty = true; | boolean empty = true; | ||||
| Enumeration e = zf.getEntries(); | Enumeration e = zf.getEntries(); | ||||
| while (e.hasMoreElements()) { | while (e.hasMoreElements()) { | ||||
| @@ -453,4 +454,12 @@ public class Expand extends Task { | |||||
| stripAbsolutePathSpec = b; | stripAbsolutePathSpec = b; | ||||
| } | } | ||||
| /** | |||||
| * Whether unicode extra fields will be used if present. | |||||
| * | |||||
| * @since Ant 1.8.0 | |||||
| */ | |||||
| public void setScanForUnicodeExtraFields(boolean b) { | |||||
| scanForUnicodeExtraFields = b; | |||||
| } | |||||
| } | } | ||||
| @@ -174,6 +174,20 @@ public class Zip extends MatchingTask { | |||||
| */ | */ | ||||
| private boolean preserve0Permissions = false; | private boolean preserve0Permissions = false; | ||||
| /** | |||||
| * Whether to set the language encoding flag when creating the archive. | |||||
| * | |||||
| * @since Ant 1.8.0 | |||||
| */ | |||||
| private boolean useLanguageEncodingFlag = true; | |||||
| /** | |||||
| * Whether to set the language encoding flag when creating the archive. | |||||
| * | |||||
| * @since Ant 1.8.0 | |||||
| */ | |||||
| private boolean createUnicodeExtraFields = false; | |||||
| /** | /** | ||||
| * This is the name/location of where to | * This is the name/location of where to | ||||
| * create the .zip file. | * create the .zip file. | ||||
| @@ -452,6 +466,38 @@ public class Zip extends MatchingTask { | |||||
| return preserve0Permissions; | return preserve0Permissions; | ||||
| } | } | ||||
| /** | |||||
| * Whether to set the language encoding flag. | |||||
| * @since Ant 1.8.0 | |||||
| */ | |||||
| public void setUseLanguageEncodingFlag(boolean b) { | |||||
| useLanguageEncodingFlag = b; | |||||
| } | |||||
| /** | |||||
| * Whether the language encoding flag will be used. | |||||
| * @since Ant 1.8.0 | |||||
| */ | |||||
| public boolean getUseLanguageEnodingFlag() { | |||||
| return useLanguageEncodingFlag; | |||||
| } | |||||
| /** | |||||
| * Whether Unicode extra fields will be created. | |||||
| * @since Ant 1.8.0 | |||||
| */ | |||||
| public void setCreateUnicodeExtraFields(boolean b) { | |||||
| createUnicodeExtraFields = b; | |||||
| } | |||||
| /** | |||||
| * Whether Unicode extra fields will be created. | |||||
| * @since Ant 1.8.0 | |||||
| */ | |||||
| public boolean getCreateUnicodeExtraFields() { | |||||
| return createUnicodeExtraFields; | |||||
| } | |||||
| /** | /** | ||||
| * validate and build | * validate and build | ||||
| * @throws BuildException on error | * @throws BuildException on error | ||||
| @@ -540,6 +586,8 @@ public class Zip extends MatchingTask { | |||||
| zOut = new ZipOutputStream(zipFile); | zOut = new ZipOutputStream(zipFile); | ||||
| zOut.setEncoding(encoding); | zOut.setEncoding(encoding); | ||||
| zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag); | |||||
| zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields); | |||||
| zOut.setMethod(doCompress | zOut.setMethod(doCompress | ||||
| ? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); | ? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); | ||||
| zOut.setLevel(level); | zOut.setLevel(level); | ||||
| @@ -134,7 +134,7 @@ public class ZipFile { | |||||
| /** | /** | ||||
| * Opens the given file for reading, assuming the specified | * Opens the given file for reading, assuming the specified | ||||
| * encoding for file names and ignoring unicode extra fields. | |||||
| * encoding for file names, scanning unicode extra fields. | |||||
| * | * | ||||
| * @param name name of the archive. | * @param name name of the archive. | ||||
| * @param encoding the encoding to use for file names | * @param encoding the encoding to use for file names | ||||
| @@ -142,12 +142,12 @@ public class ZipFile { | |||||
| * @throws IOException if an error occurs while reading the file. | * @throws IOException if an error occurs while reading the file. | ||||
| */ | */ | ||||
| public ZipFile(String name, String encoding) throws IOException { | public ZipFile(String name, String encoding) throws IOException { | ||||
| this(new File(name), encoding, false); | |||||
| this(new File(name), encoding, true); | |||||
| } | } | ||||
| /** | /** | ||||
| * Opens the given file for reading, assuming the specified | * Opens the given file for reading, assuming the specified | ||||
| * encoding for file names and ignoring unicode extra fields. | |||||
| * encoding for file names and scanning for unicode extra fields. | |||||
| * | * | ||||
| * @param f the archive. | * @param f the archive. | ||||
| * @param encoding the encoding to use for file names, use null | * @param encoding the encoding to use for file names, use null | ||||
| @@ -156,7 +156,7 @@ public class ZipFile { | |||||
| * @throws IOException if an error occurs while reading the file. | * @throws IOException if an error occurs while reading the file. | ||||
| */ | */ | ||||
| public ZipFile(File f, String encoding) throws IOException { | public ZipFile(File f, String encoding) throws IOException { | ||||
| this(f, encoding, false); | |||||
| this(f, encoding, true); | |||||
| } | } | ||||
| /** | /** | ||||