From 3cb22aa1c607dcc42ccdf3249be2c26e3585c6f5 Mon Sep 17 00:00:00 2001 From: Stefan Bodewig Date: Thu, 19 Feb 2009 17:19:23 +0000 Subject: [PATCH] Support more modern encoding flag where archives signal filenames as UTF-8. Based on submissions by Wolfgang Glas to commons-compress and TAMURA Kent to Ant. PR 45548. git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@745930 13f79535-47bb-0310-9956-ffa450edef68 --- CONTRIBUTORS | 1 + WHATSNEW | 5 + contributors.xml | 4 + src/main/org/apache/tools/zip/ZipFile.java | 32 ++++-- .../org/apache/tools/zip/ZipOutputStream.java | 102 ++++++++++++------ 5 files changed, 107 insertions(+), 37 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index f33ce0e3b..4326f26bf 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -282,6 +282,7 @@ Steve Morin Steve Wadsworth Steven E. Newton Takashi Okamoto +TAMURA Kent Taoufik Romdhane Tariq Master Thomas Aglassinger diff --git a/WHATSNEW b/WHATSNEW index d814cd177..3cc065876 100644 --- a/WHATSNEW +++ b/WHATSNEW @@ -351,6 +351,11 @@ Fixed bugs: * The zip package now supports the extra fields invented by InfoZIP in order to store Unicode file names and comments. + * The zip package detects the encoding bit set by more modern + archivers when they write UTF-8 filenames and optionally sets it + when writing zips or jars. + Bugzilla Report 45548 + Other changes: -------------- * A HostInfo task was added performing information on hosts, including info on diff --git a/contributors.xml b/contributors.xml index 18292d5ec..299d1db5d 100644 --- a/contributors.xml +++ b/contributors.xml @@ -1146,6 +1146,10 @@ Takashi Okamoto + + TAMURA + Kent + Taoufik Romdhane diff --git a/src/main/org/apache/tools/zip/ZipFile.java b/src/main/org/apache/tools/zip/ZipFile.java index d59e8fa93..44a160de0 100644 --- a/src/main/org/apache/tools/zip/ZipFile.java +++ b/src/main/org/apache/tools/zip/ZipFile.java @@ -294,7 +294,15 @@ public class ZipFile { off += SHORT; ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); - off += WORD; // skip version info and general purpose byte + off += SHORT; // skip version info + + final int generalPurposeFlag = ZipShort.getValue(cfh, off); + final String entryEncoding = + (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0 + ? ZipOutputStream.UTF8 + : encoding; + + off += SHORT; ze.setMethod(ZipShort.getValue(cfh, off)); off += SHORT; @@ -334,8 +342,7 @@ public class ZipFile { byte[] fileName = new byte[fileNameLen]; archive.readFully(fileName); - ze.setName(getString(fileName)); - + ze.setName(getString(fileName, entryEncoding)); // LFH offset, OffsetEntry offset = new OffsetEntry(); @@ -357,7 +364,7 @@ public class ZipFile { byte[] comment = new byte[commentLen]; archive.readFully(comment); - ze.setComment(getString(comment)); + ze.setComment(getString(comment, entryEncoding)); archive.readFully(signatureBytes); sig = ZipLong.getValue(signatureBytes); @@ -527,11 +534,24 @@ public class ZipFile { * @throws ZipException if the encoding cannot be recognized. */ protected String getString(byte[] bytes) throws ZipException { - if (encoding == null) { + return getString(bytes, encoding); + } + + /** + * Retrieve a String from the given bytes using the encoding set + * for this ZipFile. + * + * @param bytes the byte array to transform + * @return String obtained by using the given encoding + * @throws ZipException if the encoding cannot be recognized. + */ + protected String getString(byte[] bytes, String enc) + throws ZipException { + if (enc == null) { return new String(bytes); } else { try { - return new String(bytes, encoding); + return new String(bytes, enc); } catch (UnsupportedEncodingException uee) { throw new ZipException(uee.getMessage()); } diff --git a/src/main/org/apache/tools/zip/ZipOutputStream.java b/src/main/org/apache/tools/zip/ZipOutputStream.java index aa69efd23..4c479c17c 100644 --- a/src/main/org/apache/tools/zip/ZipOutputStream.java +++ b/src/main/org/apache/tools/zip/ZipOutputStream.java @@ -91,6 +91,17 @@ public class ZipOutputStream extends FilterOutputStream { */ public static final int STORED = java.util.zip.ZipEntry.STORED; + /** + * name of the encoding UTF-8 + */ + static final String UTF8 = "UTF8"; + + /** + * General purpose flag, which indicates that filenames are + * written in utf-8. + */ + public static final int EFS_FLAG = 1 << 11; + /** * Current entry. * @@ -244,6 +255,11 @@ public class ZipOutputStream extends FilterOutputStream { */ private RandomAccessFile raf = null; + /** + * whether to use the EFS flag when writing UTF-8 filenames or not. + */ + private boolean useEFS = true; + /** * Creates a new ZIP OutputStream filtering the underlying stream. * @param out the outputstream to zip @@ -302,8 +318,9 @@ public class ZipOutputStream extends FilterOutputStream { * @param encoding the encoding value * @since 1.3 */ - public void setEncoding(String encoding) { + public void setEncoding(final String encoding) { this.encoding = encoding; + useEFS &= isUTF8(encoding); } /** @@ -317,6 +334,15 @@ public class ZipOutputStream extends FilterOutputStream { return encoding; } + /** + * Whether to set the EFS flag if the file name encoding is UTF-8. + * + *

Defaults to true.

+ */ + public void setUseEFS(boolean b) { + useEFS = b && isUTF8(encoding); + } + /** * Finishs writing the contents and closes this as well as the * underlying stream. @@ -620,21 +646,7 @@ public class ZipOutputStream extends FilterOutputStream { //store method in local variable to prevent multiple method calls final int zipMethod = ze.getMethod(); - // version needed to extract - // general purpose bit flag - // CheckStyle:MagicNumber OFF - if (zipMethod == DEFLATED && raf == null) { - // requires version 2 as we are going to store length info - // in the data descriptor - writeOut(ZipShort.getBytes(20)); - - // bit3 set to signal, we use a data descriptor - writeOut(ZipShort.getBytes(8)); - } else { - writeOut(ZipShort.getBytes(10)); - writeOut(ZERO); - } - // CheckStyle:MagicNumber ON + writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod); written += WORD; // compression method @@ -719,24 +731,12 @@ public class ZipOutputStream extends FilterOutputStream { writeOut(ZipShort.getBytes((ze.getPlatform() << 8) | 20)); written += SHORT; - // version needed to extract - // general purpose bit flag - if (ze.getMethod() == DEFLATED && raf == null) { - // requires version 2 as we are going to store length info - // in the data descriptor - writeOut(ZipShort.getBytes(20)); - - // bit3 set to signal, we use a data descriptor - writeOut(ZipShort.getBytes(8)); - } else { - writeOut(ZipShort.getBytes(10)); - writeOut(ZERO); - } - // CheckStyle:MagicNumber ON + final int zipMethod = ze.getMethod(); + writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod); written += WORD; // compression method - writeOut(ZipShort.getBytes(ze.getMethod())); + writeOut(ZipShort.getBytes(zipMethod)); written += SHORT; // last mod. time and date @@ -886,11 +886,17 @@ public class ZipOutputStream extends FilterOutputStream { if (encoding == null) { return name.getBytes(); } else { + try { + return ZipEncodingHelper.encodeName(name, encoding); + } catch (java.nio.charset.UnsupportedCharsetException ex) { + // Java 1.4's NIO doesn't recognize a few names that + // String.getBytes does try { return name.getBytes(encoding); } catch (UnsupportedEncodingException uee) { throw new ZipException(uee.getMessage()); } + } } } @@ -944,4 +950,38 @@ public class ZipOutputStream extends FilterOutputStream { } } + /** + * Whether a given encoding - or the platform's default encoding + * if the parameter is null - is UTF-8. + */ + static boolean isUTF8(String encoding) { + if (encoding == null) { + // check platform's default encoding + encoding = System.getProperty("file.encoding"); + } + return UTF8.equalsIgnoreCase(encoding) + || "utf-8".equalsIgnoreCase(encoding); + } + + private void writeVersionNeededToExtractAndGeneralPurposeBits(final int + zipMethod) + throws IOException { + + // CheckStyle:MagicNumber OFF + int versionNeededToExtract = 10; + int generalPurposeFlag = useEFS ? EFS_FLAG : 0; + if (zipMethod == DEFLATED && raf == null) { + // requires version 2 as we are going to store length info + // in the data descriptor + versionNeededToExtract = 20; + // bit3 set to signal, we use a data descriptor + generalPurposeFlag |= 8; + } + // CheckStyle:MagicNumber ON + + // version needed to extract + writeOut(ZipShort.getBytes(versionNeededToExtract)); + // general purpose bit flag + writeOut(ZipShort.getBytes(generalPurposeFlag)); + } }