Browse Source

even more encoding options for zips

git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@750072 13f79535-47bb-0310-9956-ffa450edef68
master
Stefan Bodewig 16 years ago
parent
commit
f21a51dfd3
14 changed files with 247 additions and 40 deletions
  1. +13
    -1
      docs/manual/CoreTasks/ear.html
  2. +13
    -1
      docs/manual/CoreTasks/jar.html
  3. +13
    -1
      docs/manual/CoreTasks/war.html
  4. +27
    -7
      docs/manual/CoreTasks/zip.html
  5. +81
    -5
      src/main/org/apache/tools/ant/taskdefs/Zip.java
  6. +1
    -1
      src/main/org/apache/tools/zip/FallbackZipEncoding.java
  7. +1
    -1
      src/main/org/apache/tools/zip/Simple8BitZipEncoding.java
  8. +1
    -1
      src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
  9. +1
    -1
      src/main/org/apache/tools/zip/UnicodePathExtraField.java
  10. +1
    -1
      src/main/org/apache/tools/zip/ZipEncodingHelper.java
  11. +1
    -1
      src/main/org/apache/tools/zip/ZipLong.java
  12. +88
    -17
      src/main/org/apache/tools/zip/ZipOutputStream.java
  13. +1
    -1
      src/main/org/apache/tools/zip/ZipShort.java
  14. +5
    -1
      src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java

+ 13
- 1
docs/manual/CoreTasks/ear.html View File

@@ -213,10 +213,22 @@ to a value other than its default, <code>&quot;add&quot;</code>.</b></p>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
<br>Possible values are "never", "always" and "not-encodable"
which will only add Unicode extra fields if the file name cannot
be encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is false</td>
</tr>
<tr>
<td valign="top">fallbacktoUTF8</td>
<td valign="top">Whether to use UTF-8 and the language encoding
flag instead of the specified encoding if a file name cannot be
encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No, default is false</td>
</tr>
</table>



+ 13
- 1
docs/manual/CoreTasks/jar.html View File

@@ -267,10 +267,22 @@ to a value other than its default, <code>"add"</code>.</b></p>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
<br>Possible values are "never", "always" and "not-encodable"
which will only add Unicode extra fields if the file name cannot
be encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is false</td>
</tr>
<tr>
<td valign="top">fallbacktoUTF8</td>
<td valign="top">Whether to use UTF-8 and the language encoding
flag instead of the specified encoding if a file name cannot be
encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No, default is false</td>
</tr>
</table>



+ 13
- 1
docs/manual/CoreTasks/war.html View File

@@ -230,10 +230,22 @@ to a value other than its default, <code>&quot;add&quot;</code>.</b></p>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
<br>Possible values are "never", "always" and "not-encodable"
which will only add Unicode extra fields if the file name cannot
be encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td valign="top" align="center">No, default is false</td>
</tr>
<tr>
<td valign="top">fallbacktoUTF8</td>
<td valign="top">Whether to use UTF-8 and the language encoding
flag instead of the specified encoding if a file name cannot be
encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="zip.html#encoding">discussion in the
zip task page</a></td>
<td align="center" valign="top">No, default is false</td>
</tr>
</table>



+ 27
- 7
docs/manual/CoreTasks/zip.html View File

@@ -256,7 +256,19 @@ archive.</p>
<td valign="top">createUnicodeExtraFields</td>
<td valign="top">Whether to create unicode extra fields to store
the file names a second time inside the entry's metadata.
Defaults to false. <em>Since Ant 1.8.0</em>.
<br>Possible values are "never", "always" and "not-encodable"
which will only add Unicode extra fields if the file name cannot
be encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="#encoding">discussion below</a></td>
<td align="center" valign="top">No, default is "never"</td>
</tr>
<tr>
<td valign="top">fallbacktoUTF8</td>
<td valign="top">Whether to use UTF-8 and the language encoding
flag instead of the specified encoding if a file name cannot be
encoded using the specified encoding.
<em>Since Ant 1.8.0</em>.
<br/>See also the <a href="#encoding">discussion below</a></td>
<td align="center" valign="top">No, default is false</td>
</tr>
@@ -298,8 +310,15 @@ archive.</p>
entry's metadata. Most archivers ignore these extra fields. The
zip family of tasks support an
option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which
makes Ant write these extra fields, it defaults to false since it
creates a bigger archive.</p>
makes Ant write these extra fields either for all entries ("always")
or only those whose name cannot be encoded using the spcified
encoding (not-encodeable), it defaults to "never" since the extra
fields create bigger archives.</p>

<p>The fallbackToUTF8 attribute of zip can be used to create archives
that use the specified encoding in the majority of cases but UTF and
the language encoding flag for filenames that cannot be encoded
using the specified encoding.</p>

<p>The unzip-task will recognize the unicode extra fields by default
and read the file name information from them, unless you set the
@@ -320,12 +339,13 @@ archive.</p>

<li>7Zip writes CodePage 437 by default but uses UTF-8 and the
language encoding flag when writing entries that cannot be encoded
as CodePage 437. It recognizes the language encoding flag when
reading and ignores the unicode extra fields.</li>
as CodePage 437 (similar to the zip task with fallbacktoUTF8 set
to true). It recognizes the language encoding flag when reading
and ignores the unicode extra fields.</li>

<li>WinZIP writes CodePage 437 and uses unicode extra fields by
default. It recognizes the unicode extra field when reading and
ignores the language encoding flag.</li>
default. It recognizes the unicode extra field and the language
encoding flag when reading.</li>

<li>Windows' "compressed folder" feature doesn't recognize any flag
or extra field and creates archives using the platforms default


+ 81
- 5
src/main/org/apache/tools/ant/taskdefs/Zip.java View File

@@ -27,8 +27,10 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import java.util.Stack;
import java.util.Vector;
import java.util.zip.CRC32;
@@ -182,11 +184,20 @@ public class Zip extends MatchingTask {
private boolean useLanguageEncodingFlag = true;

/**
* Whether to set the language encoding flag when creating the archive.
* Whether to add unicode extra fields.
*
* @since Ant 1.8.0
*/
private boolean createUnicodeExtraFields = false;
private UnicodeExtraField createUnicodeExtraFields =
UnicodeExtraField.NEVER;

/**
* Whether to fall back to UTF-8 if a name cannot be enoded using
* the specified encoding.
*
* @since Ant 1.8.0
*/
private boolean fallBackToUTF8 = false;

/**
* This is the name/location of where to
@@ -486,7 +497,7 @@ public class Zip extends MatchingTask {
* Whether Unicode extra fields will be created.
* @since Ant 1.8.0
*/
public void setCreateUnicodeExtraFields(boolean b) {
public void setCreateUnicodeExtraFields(UnicodeExtraField b) {
createUnicodeExtraFields = b;
}

@@ -494,10 +505,32 @@ public class Zip extends MatchingTask {
* Whether Unicode extra fields will be created.
* @since Ant 1.8.0
*/
public boolean getCreateUnicodeExtraFields() {
public UnicodeExtraField getCreateUnicodeExtraFields() {
return createUnicodeExtraFields;
}

/**
* Whether to fall back to UTF-8 if a name cannot be enoded using
* the specified encoding.
*
* <p>Defaults to false.</p>
*
* @since Ant 1.8.0
*/
public void setFallBackToUTF8(boolean b) {
fallBackToUTF8 = b;
}

/**
* Whether to fall back to UTF-8 if a name cannot be enoded using
* the specified encoding.
*
* @since Ant 1.8.0
*/
public boolean getFallBackToUTF8() {
return fallBackToUTF8;
}

/**
* validate and build
* @throws BuildException on error
@@ -587,7 +620,9 @@ public class Zip extends MatchingTask {

zOut.setEncoding(encoding);
zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag);
zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields);
zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields.
getPolicy());
zOut.setFallbackToUTF8(fallBackToUTF8);
zOut.setMethod(doCompress
? ZipOutputStream.DEFLATED : ZipOutputStream.STORED);
zOut.setLevel(level);
@@ -1881,4 +1916,45 @@ public class Zip extends MatchingTask {
return true;
}
}

/**
* Policiy for creation of Unicode extra fields: never, always or
* not-encodeable.
*
* @since Ant 1.8.0
*/
public static final class UnicodeExtraField extends EnumeratedAttribute {
private static final Map POLICIES = new HashMap();
private static final String NEVER_KEY = "never";
private static final String ALWAYS_KEY = "always";
private static final String N_E_KEY = "not-encodeable";
static {
POLICIES.put(NEVER_KEY,
ZipOutputStream.UnicodeExtraFieldPolicy.NEVER);
POLICIES.put(ALWAYS_KEY,
ZipOutputStream.UnicodeExtraFieldPolicy.ALWAYS);
POLICIES.put(N_E_KEY,
ZipOutputStream.UnicodeExtraFieldPolicy
.NOT_ENCODEABLE);
}

public String[] getValues() {
return new String[] {NEVER_KEY, ALWAYS_KEY, N_E_KEY};
}

public static final UnicodeExtraField NEVER =
new UnicodeExtraField(NEVER_KEY);

private UnicodeExtraField(String name) {
setValue(name);
}

public UnicodeExtraField() {
}

public ZipOutputStream.UnicodeExtraFieldPolicy getPolicy() {
return (ZipOutputStream.UnicodeExtraFieldPolicy)
POLICIES.get(getValue());
}
}
}

+ 1
- 1
src/main/org/apache/tools/zip/FallbackZipEncoding.java View File

@@ -34,7 +34,7 @@ import java.nio.ByteBuffer;
* given name can be safely encoded or not.</p>
*
* <p>This implementation acts as a last resort implementation, when
* neither {@see Simple8BitZipEnoding} nor {@see NioZipEncoding} is
* neither {@link Simple8BitZipEnoding} nor {@link NioZipEncoding} is
* available.</p>
*
* <p>The methods of this class are reentrant.</p>


+ 1
- 1
src/main/org/apache/tools/zip/Simple8BitZipEncoding.java View File

@@ -77,7 +77,7 @@ class Simple8BitZipEncoding implements ZipEncoding {
private final char[] highChars;

/**
* A list of {@see Simple8BitChar} objects sorted by the unicode
* A list of {@link Simple8BitChar} objects sorted by the unicode
* field. This list is used to binary search reverse mapping of
* unicode characters with a character code greater than 127.
*/


+ 1
- 1
src/main/org/apache/tools/zip/UnicodeCommentExtraField.java View File

@@ -45,7 +45,7 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField {
* Assemble as unicode comment extension from the name given as
* text as well as the encoded bytes actually written to the archive.
*
* @param name The file name
* @param text The file name
* @param bytes the bytes actually written to the archive
* @param off The offset of the encoded comment in <code>bytes</code>.
* @param len The length of the encoded comment or comment in


+ 1
- 1
src/main/org/apache/tools/zip/UnicodePathExtraField.java View File

@@ -45,7 +45,7 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField {
* Assemble as unicode path extension from the name given as
* text as well as the encoded bytes actually written to the archive.
*
* @param name The file name
* @param text The file name
* @param bytes the bytes actually written to the archive
* @param off The offset of the encoded filename in <code>bytes</code>.
* @param len The length of the encoded filename or comment in


+ 1
- 1
src/main/org/apache/tools/zip/ZipEncodingHelper.java View File

@@ -51,7 +51,7 @@ abstract class ZipEncodingHelper {
}

/**
* @return The associated {@see Simple8BitZipEncoding}, which
* @return The associated {@link Simple8BitZipEncoding}, which
* is instantiated if not done so far.
*/
public synchronized Simple8BitZipEncoding getEncoding() {


+ 1
- 1
src/main/org/apache/tools/zip/ZipLong.java View File

@@ -150,7 +150,7 @@ public final class ZipLong implements Cloneable {

public Object clone() {
try {
return (ZipLong) super.clone();
return super.clone();
} catch (CloneNotSupportedException cnfe) {
// impossible
throw new RuntimeException(cnfe);


+ 88
- 17
src/main/org/apache/tools/zip/ZipOutputStream.java View File

@@ -269,10 +269,16 @@ public class ZipOutputStream extends FilterOutputStream {
*/
private boolean useEFS = true;

/**
* Whether to encode non-encodable file names as UTF-8.
*/
private boolean fallbackToUTF8 = false;

/**
* whether to create UnicodePathExtraField-s for each entry.
*/
private boolean createUnicodeExtraFields = false;
private UnicodeExtraFieldPolicy createUnicodeExtraFields =
UnicodeExtraFieldPolicy.NEVER;

/**
* Creates a new ZIP OutputStream filtering the underlying stream.
@@ -360,14 +366,24 @@ public class ZipOutputStream extends FilterOutputStream {
}

/**
* Whether to create Unicode Extra Fields for all entries.
* Whether to create Unicode Extra Fields.
*
* <p>Defaults to false.</p>
* <p>Defaults to NEVER.</p>
*/
public void setCreateUnicodeExtraFields(boolean b) {
public void setCreateUnicodeExtraFields(UnicodeExtraFieldPolicy b) {
createUnicodeExtraFields = b;
}

/**
* Whether to fall back to UTF and the language encoding flag if
* the file name cannot be encoded using the specified encoding.
*
* <p>Defaults to false.</p>
*/
public void setFallbackToUTF8(boolean b) {
fallbackToUTF8 = b;
}

/**
* Finishs writing the contents and closes this as well as the
* underlying stream.
@@ -665,31 +681,38 @@ public class ZipOutputStream extends FilterOutputStream {
*/
protected void writeLocalFileHeader(ZipEntry ze) throws IOException {

boolean encodable = this.zipEncoding.canEncode(ze.getName());
ByteBuffer name = this.zipEncoding.encode(ze.getName());
boolean encodable = zipEncoding.canEncode(ze.getName());
ByteBuffer name;
if (!encodable && fallbackToUTF8) {
name = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(ze.getName());
} else {
name = zipEncoding.encode(ze.getName());
}

if (createUnicodeExtraFields) {
if (createUnicodeExtraFields != UnicodeExtraFieldPolicy.NEVER) {

/* if (!encodable) { -- FIXME decide what to*/
if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS
|| !encodable) {
ze.addExtraField(new UnicodePathExtraField(ze.getName(),
name.array(),
name.arrayOffset(),
name.limit()));
/* } */
}

String comm = ze.getComment();
if (comm != null && !"".equals(comm)) {

boolean commentEncodable = this.zipEncoding.canEncode(comm);

/* if (!commentEncodable) { -- FIXME decide what to*/
if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS
|| !commentEncodable) {
ByteBuffer commentB = this.zipEncoding.encode(comm);
ze.addExtraField(new UnicodeCommentExtraField(comm,
commentB.array(),
commentB.arrayOffset(),
commentB.limit())
);
/* } */
}
}
}

@@ -701,7 +724,9 @@ public class ZipOutputStream extends FilterOutputStream {
//store method in local variable to prevent multiple method calls
final int zipMethod = ze.getMethod();

writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod);
writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod,
!encodable
&& fallbackToUTF8);
written += WORD;

// compression method
@@ -786,7 +811,10 @@ public class ZipOutputStream extends FilterOutputStream {
written += SHORT;

final int zipMethod = ze.getMethod();
writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod);
final boolean encodable = zipEncoding.canEncode(ze.getName());
writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod,
!encodable
&& fallbackToUTF8);
written += WORD;

// compression method
@@ -808,7 +836,12 @@ public class ZipOutputStream extends FilterOutputStream {
// CheckStyle:MagicNumber ON

// file name length
ByteBuffer name = this.zipEncoding.encode(ze.getName());
ByteBuffer name;
if (!encodable && fallbackToUTF8) {
name = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(ze.getName());
} else {
name = zipEncoding.encode(ze.getName());
}
writeOut(ZipShort.getBytes(name.limit()));
written += SHORT;

@@ -822,7 +855,12 @@ public class ZipOutputStream extends FilterOutputStream {
if (comm == null) {
comm = "";
}
ByteBuffer commentB = this.zipEncoding.encode(comm);
ByteBuffer commentB;
if (!encodable && fallbackToUTF8) {
commentB = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(comm);
} else {
commentB = zipEncoding.encode(comm);
}
writeOut(ZipShort.getBytes(commentB.limit()));
written += SHORT;

@@ -1000,12 +1038,14 @@ public class ZipOutputStream extends FilterOutputStream {
}

private void writeVersionNeededToExtractAndGeneralPurposeBits(final int
zipMethod)
zipMethod,
final boolean
utfFallback)
throws IOException {

// CheckStyle:MagicNumber OFF
int versionNeededToExtract = 10;
int generalPurposeFlag = useEFS ? EFS_FLAG : 0;
int generalPurposeFlag = (useEFS || utfFallback) ? EFS_FLAG : 0;
if (zipMethod == DEFLATED && raf == null) {
// requires version 2 as we are going to store length info
// in the data descriptor
@@ -1020,4 +1060,35 @@ public class ZipOutputStream extends FilterOutputStream {
// general purpose bit flag
writeOut(ZipShort.getBytes(generalPurposeFlag));
}

/**
* enum that represents the possible policies for creating Unicode
* extra fields.
*/
public static final class UnicodeExtraFieldPolicy {
/**
* Always create Unicode extra fields.
*/
public static final UnicodeExtraFieldPolicy ALWAYS =
new UnicodeExtraFieldPolicy("always");
/**
* Never create Unicode extra fields.
*/
public static final UnicodeExtraFieldPolicy NEVER =
new UnicodeExtraFieldPolicy("never");
/**
* Create Unicode extra fields for filenames that cannot be
* encoded using the specified encoding.
*/
public static final UnicodeExtraFieldPolicy NOT_ENCODEABLE =
new UnicodeExtraFieldPolicy("not encodeable");

private final String name;
private UnicodeExtraFieldPolicy(String n) {
name = n;
}
public String toString() {
return name;
}
}
}

+ 1
- 1
src/main/org/apache/tools/zip/ZipShort.java View File

@@ -136,7 +136,7 @@ public final class ZipShort implements Cloneable {

public Object clone() {
try {
return (ZipShort) super.clone();
return super.clone();
} catch (CloneNotSupportedException cnfe) {
// impossible
throw new RuntimeException(cnfe);


+ 5
- 1
src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java View File

@@ -122,7 +122,11 @@ public class UTF8ZipFilesTest extends TestCase {
zos = new ZipOutputStream(file);
zos.setEncoding(encoding);
zos.setUseLanguageEncodingFlag(withEFS);
zos.setCreateUnicodeExtraFields(!withExplicitUnicodeExtra);
zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ?
ZipOutputStream
.UnicodeExtraFieldPolicy.NEVER
: ZipOutputStream
.UnicodeExtraFieldPolicy.ALWAYS);

ZipEntry ze = new ZipEntry(OIL_BARREL_TXT);
if (withExplicitUnicodeExtra


Loading…
Cancel
Save