diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index f33ce0e3b..4326f26bf 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -282,6 +282,7 @@ Steve Morin
Steve Wadsworth
Steven E. Newton
Takashi Okamoto
+TAMURA Kent
Taoufik Romdhane
Tariq Master
Thomas Aglassinger
diff --git a/WHATSNEW b/WHATSNEW
index d814cd177..3cc065876 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -351,6 +351,11 @@ Fixed bugs:
* The zip package now supports the extra fields invented by InfoZIP
in order to store Unicode file names and comments.
+ * The zip package detects the encoding bit set by more modern
+ archivers when they write UTF-8 filenames and optionally sets it
+ when writing zips or jars.
+ Bugzilla Report 45548
+
Other changes:
--------------
* A HostInfo task was added performing information on hosts, including info on
diff --git a/contributors.xml b/contributors.xml
index 18292d5ec..299d1db5d 100644
--- a/contributors.xml
+++ b/contributors.xml
@@ -1146,6 +1146,10 @@
Takashi
Okamoto
+
+ TAMURA
+ Kent
+
Taoufik
Romdhane
diff --git a/src/main/org/apache/tools/zip/ZipFile.java b/src/main/org/apache/tools/zip/ZipFile.java
index d59e8fa93..44a160de0 100644
--- a/src/main/org/apache/tools/zip/ZipFile.java
+++ b/src/main/org/apache/tools/zip/ZipFile.java
@@ -294,7 +294,15 @@ public class ZipFile {
off += SHORT;
ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
- off += WORD; // skip version info and general purpose byte
+ off += SHORT; // skip version info
+
+ final int generalPurposeFlag = ZipShort.getValue(cfh, off);
+ final String entryEncoding =
+ (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0
+ ? ZipOutputStream.UTF8
+ : encoding;
+
+ off += SHORT;
ze.setMethod(ZipShort.getValue(cfh, off));
off += SHORT;
@@ -334,8 +342,7 @@ public class ZipFile {
byte[] fileName = new byte[fileNameLen];
archive.readFully(fileName);
- ze.setName(getString(fileName));
-
+ ze.setName(getString(fileName, entryEncoding));
// LFH offset,
OffsetEntry offset = new OffsetEntry();
@@ -357,7 +364,7 @@ public class ZipFile {
byte[] comment = new byte[commentLen];
archive.readFully(comment);
- ze.setComment(getString(comment));
+ ze.setComment(getString(comment, entryEncoding));
archive.readFully(signatureBytes);
sig = ZipLong.getValue(signatureBytes);
@@ -527,11 +534,24 @@ public class ZipFile {
* @throws ZipException if the encoding cannot be recognized.
*/
protected String getString(byte[] bytes) throws ZipException {
- if (encoding == null) {
+ return getString(bytes, encoding);
+ }
+
+ /**
+ * Retrieve a String from the given bytes using the encoding set
+ * for this ZipFile.
+ *
+ * @param bytes the byte array to transform
+ * @return String obtained by using the given encoding
+ * @throws ZipException if the encoding cannot be recognized.
+ */
+ protected String getString(byte[] bytes, String enc)
+ throws ZipException {
+ if (enc == null) {
return new String(bytes);
} else {
try {
- return new String(bytes, encoding);
+ return new String(bytes, enc);
} catch (UnsupportedEncodingException uee) {
throw new ZipException(uee.getMessage());
}
diff --git a/src/main/org/apache/tools/zip/ZipOutputStream.java b/src/main/org/apache/tools/zip/ZipOutputStream.java
index aa69efd23..4c479c17c 100644
--- a/src/main/org/apache/tools/zip/ZipOutputStream.java
+++ b/src/main/org/apache/tools/zip/ZipOutputStream.java
@@ -91,6 +91,17 @@ public class ZipOutputStream extends FilterOutputStream {
*/
public static final int STORED = java.util.zip.ZipEntry.STORED;
+ /**
+ * name of the encoding UTF-8
+ */
+ static final String UTF8 = "UTF8";
+
+ /**
+ * General purpose flag, which indicates that filenames are
+ * written in utf-8.
+ */
+ public static final int EFS_FLAG = 1 << 11;
+
/**
* Current entry.
*
@@ -244,6 +255,11 @@ public class ZipOutputStream extends FilterOutputStream {
*/
private RandomAccessFile raf = null;
+ /**
+ * whether to use the EFS flag when writing UTF-8 filenames or not.
+ */
+ private boolean useEFS = true;
+
/**
* Creates a new ZIP OutputStream filtering the underlying stream.
* @param out the outputstream to zip
@@ -302,8 +318,9 @@ public class ZipOutputStream extends FilterOutputStream {
* @param encoding the encoding value
* @since 1.3
*/
- public void setEncoding(String encoding) {
+ public void setEncoding(final String encoding) {
this.encoding = encoding;
+ useEFS &= isUTF8(encoding);
}
/**
@@ -317,6 +334,15 @@ public class ZipOutputStream extends FilterOutputStream {
return encoding;
}
+ /**
+ * Whether to set the EFS flag if the file name encoding is UTF-8.
+ *
+ * Defaults to true.
+ */
+ public void setUseEFS(boolean b) {
+ useEFS = b && isUTF8(encoding);
+ }
+
/**
* Finishs writing the contents and closes this as well as the
* underlying stream.
@@ -620,21 +646,7 @@ public class ZipOutputStream extends FilterOutputStream {
//store method in local variable to prevent multiple method calls
final int zipMethod = ze.getMethod();
- // version needed to extract
- // general purpose bit flag
- // CheckStyle:MagicNumber OFF
- if (zipMethod == DEFLATED && raf == null) {
- // requires version 2 as we are going to store length info
- // in the data descriptor
- writeOut(ZipShort.getBytes(20));
-
- // bit3 set to signal, we use a data descriptor
- writeOut(ZipShort.getBytes(8));
- } else {
- writeOut(ZipShort.getBytes(10));
- writeOut(ZERO);
- }
- // CheckStyle:MagicNumber ON
+ writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod);
written += WORD;
// compression method
@@ -719,24 +731,12 @@ public class ZipOutputStream extends FilterOutputStream {
writeOut(ZipShort.getBytes((ze.getPlatform() << 8) | 20));
written += SHORT;
- // version needed to extract
- // general purpose bit flag
- if (ze.getMethod() == DEFLATED && raf == null) {
- // requires version 2 as we are going to store length info
- // in the data descriptor
- writeOut(ZipShort.getBytes(20));
-
- // bit3 set to signal, we use a data descriptor
- writeOut(ZipShort.getBytes(8));
- } else {
- writeOut(ZipShort.getBytes(10));
- writeOut(ZERO);
- }
- // CheckStyle:MagicNumber ON
+ final int zipMethod = ze.getMethod();
+ writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod);
written += WORD;
// compression method
- writeOut(ZipShort.getBytes(ze.getMethod()));
+ writeOut(ZipShort.getBytes(zipMethod));
written += SHORT;
// last mod. time and date
@@ -886,11 +886,17 @@ public class ZipOutputStream extends FilterOutputStream {
if (encoding == null) {
return name.getBytes();
} else {
+ try {
+ return ZipEncodingHelper.encodeName(name, encoding);
+ } catch (java.nio.charset.UnsupportedCharsetException ex) {
+ // Java 1.4's NIO doesn't recognize a few names that
+ // String.getBytes does
try {
return name.getBytes(encoding);
} catch (UnsupportedEncodingException uee) {
throw new ZipException(uee.getMessage());
}
+ }
}
}
@@ -944,4 +950,38 @@ public class ZipOutputStream extends FilterOutputStream {
}
}
+ /**
+ * Whether a given encoding - or the platform's default encoding
+ * if the parameter is null - is UTF-8.
+ */
+ static boolean isUTF8(String encoding) {
+ if (encoding == null) {
+ // check platform's default encoding
+ encoding = System.getProperty("file.encoding");
+ }
+ return UTF8.equalsIgnoreCase(encoding)
+ || "utf-8".equalsIgnoreCase(encoding);
+ }
+
+ private void writeVersionNeededToExtractAndGeneralPurposeBits(final int
+ zipMethod)
+ throws IOException {
+
+ // CheckStyle:MagicNumber OFF
+ int versionNeededToExtract = 10;
+ int generalPurposeFlag = useEFS ? EFS_FLAG : 0;
+ if (zipMethod == DEFLATED && raf == null) {
+ // requires version 2 as we are going to store length info
+ // in the data descriptor
+ versionNeededToExtract = 20;
+ // bit3 set to signal, we use a data descriptor
+ generalPurposeFlag |= 8;
+ }
+ // CheckStyle:MagicNumber ON
+
+ // version needed to extract
+ writeOut(ZipShort.getBytes(versionNeededToExtract));
+ // general purpose bit flag
+ writeOut(ZipShort.getBytes(generalPurposeFlag));
+ }
}