From 752d64fa43db2172327771ba4d36bee3f2d3b49a Mon Sep 17 00:00:00 2001
From: Stefan Bodewig <bodewig@apache.org>
Date: Mon, 2 Mar 2009 17:17:09 +0000
Subject: [PATCH] improved zip-encoding support for JDK < 1.5, submitted by
 Wolfgang Glas, merge from commons-compress

git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@749368 13f79535-47bb-0310-9956-ffa450edef68
---
 .../tools/zip/AbstractUnicodeExtraField.java  |  36 ++-
 .../apache/tools/zip/FallbackZipEncoding.java |  94 ++++++
 .../org/apache/tools/zip/NioZipEncoding.java  | 122 ++++++++
 .../tools/zip/Simple8BitZipEncoding.java      | 261 +++++++++++++++++
 .../tools/zip/UnicodeCommentExtraField.java   |  17 +-
 .../tools/zip/UnicodePathExtraField.java      |  16 +-
 .../org/apache/tools/zip/ZipEncoding.java     |  85 ++++++
 .../apache/tools/zip/ZipEncodingHelper.java   | 274 +++++++++++-------
 src/main/org/apache/tools/zip/ZipFile.java    |  67 ++---
 .../org/apache/tools/zip/ZipOutputStream.java | 123 ++++----
 .../apache/tools/zip/UTF8ZipFilesTest.java    | 144 ++++++---
 .../org/apache/tools/zip/ZipEncodingTest.java | 147 ++++++++++
 12 files changed, 1097 insertions(+), 289 deletions(-)
 create mode 100644 src/main/org/apache/tools/zip/FallbackZipEncoding.java
 create mode 100644 src/main/org/apache/tools/zip/NioZipEncoding.java
 create mode 100644 src/main/org/apache/tools/zip/Simple8BitZipEncoding.java
 create mode 100644 src/main/org/apache/tools/zip/ZipEncoding.java
 create mode 100644 src/tests/junit/org/apache/tools/zip/ZipEncodingTest.java
diff --git a/src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java b/src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java
index 49d998bcf..3ba2e0ea5 100644
--- a/src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java
+++ b/src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java
@@ -38,11 +38,25 @@ public abstract class AbstractUnicodeExtraField implements ZipExtraField {
      * encoding of the orginal zip entry.
      * 
      * @param text The file name or comment.
-     * @param zipEncoding The encoding of the filenames in the zip
-     * file, usually <code>"CP437"</code>.
+     * @param bytes The encoded of the filename or comment in the zip
+     * file.
+     * @param off The offset of the encoded filename or comment in
+     * <code>bytes</code>.
+     * @param len The length of the encoded filename or commentin
+     * <code>bytes</code>.
      */
-    protected AbstractUnicodeExtraField(String text, String zipEncoding) {
-        this(text, ZipEncodingHelper.encodeName(text, zipEncoding));
+    protected AbstractUnicodeExtraField(String text, byte[] bytes, int off,
+                                        int len) {
+        CRC32 crc32 = new CRC32();
+        crc32.update(bytes, off, len);
+        nameCRC32 = crc32.getValue();
+
+        try {
+            unicodeName = text.getBytes("UTF-8");
+        } catch (UnsupportedEncodingException e) {
+            throw new RuntimeException("FATAL: UTF-8 encoding not supported.",
+                                       e);
+        }
     }
 
     /**
@@ -50,20 +64,12 @@ public abstract class AbstractUnicodeExtraField implements ZipExtraField {
      * encoding of the orginal zip entry.
      * 
      * @param text The file name or comment.
-     * @param zipEncoding The encoding of the filenames in the zip
-     * file, usually <code>"CP437"</code>.
+     * @param bytes The encoded of the filename or comment in the zip
+     * file.
      */
     protected AbstractUnicodeExtraField(String text, byte[] bytes) {
-        CRC32 crc32 = new CRC32();
-        crc32.update(bytes);
-        nameCRC32 = crc32.getValue();
 
-        try {
-            unicodeName = text.getBytes("UTF-8");
-        } catch (UnsupportedEncodingException e) {
-            throw new RuntimeException("FATAL: UTF-8 encoding not supported.",
-                                       e);
-        }
+        this(text, bytes, 0, bytes.length);
     }
 
     private void assembleData() {
diff --git a/src/main/org/apache/tools/zip/FallbackZipEncoding.java b/src/main/org/apache/tools/zip/FallbackZipEncoding.java
new file mode 100644
index 000000000..4c2fe8500
--- /dev/null
+++ b/src/main/org/apache/tools/zip/FallbackZipEncoding.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tools.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * A fallback ZipEncoding, which uses a java.io means to encode names.
+ *
+ * <p>This implementation is not favorable for encodings other than
+ * utf-8, because java.io encodes unmappable character as question
+ * marks leading to unreadable ZIP entries on some operating
+ * systems.</p>
+ * 
+ * <p>Furthermore this implementation is unable to tell, whether a
+ * given name can be safely encoded or not.</p>
+ * 
+ * <p>This implementation acts as a last resort implementation, when
+ * neither {@see Simple8BitZipEnoding} nor {@see NioZipEncoding} is
+ * available.</p>
+ * 
+ * <p>The methods of this class are reentrant.</p>
+ */
+class FallbackZipEncoding implements ZipEncoding {
+    private final String charset;
+
+    /**
+     * Construct a fallback zip encoding, which uses the platform's
+     * default charset.
+     */
+    public FallbackZipEncoding() {
+        this.charset = null;
+    }
+
+    /**
+     * Construct a fallback zip encoding, which uses the given charset.
+     * 
+     * @param charset The name of the charset or <code>null</code> for
+     *                the platform's default character set.
+     */
+    public FallbackZipEncoding(String charset) {
+        this.charset = charset;
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#canEncode(java.lang.String)
+     */
+    public boolean canEncode(String name) {
+        return true;
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#encode(java.lang.String)
+     */
+    public ByteBuffer encode(String name) throws IOException {
+        if (this.charset == null) {
+            return ByteBuffer.wrap(name.getBytes());
+        } else {
+            return ByteBuffer.wrap(name.getBytes(this.charset));
+        }
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#decode(byte[])
+     */
+    public String decode(byte[] data) throws IOException {
+        if (this.charset == null) {
+            return new String(data);
+        } else {
+            return new String(data,this.charset);
+        }
+    }
+}
diff --git a/src/main/org/apache/tools/zip/NioZipEncoding.java b/src/main/org/apache/tools/zip/NioZipEncoding.java
new file mode 100644
index 000000000..a6870ca8f
--- /dev/null
+++ b/src/main/org/apache/tools/zip/NioZipEncoding.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tools.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+/**
+ * A ZipEncoding, which uses a java.nio {@link
+ * java.nio.charset.Charset Charset} to encode names.
+ *
+ * <p>This implementation works for all cases under java-1.5 or
+ * later. However, in java-1.4, some charsets don't have a java-nio
+ * implementation, most notably the default ZIP encoding Cp437.</p>
+ * 
+ * <p>The methods of this class are reentrant.</p>
+ */
+class NioZipEncoding implements ZipEncoding {
+    private final Charset charset;
+
+    /**
+     * Construct an NIO based zip encoding, which wraps the given
+     * charset.
+     * 
+     * @param charset The NIO charset to wrap.
+     */
+    public NioZipEncoding(Charset charset) {
+        this.charset = charset;
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#canEncode(java.lang.String)
+     */
+    public boolean canEncode(String name) {
+        CharsetEncoder enc = this.charset.newEncoder();
+        enc.onMalformedInput(CodingErrorAction.REPORT);
+        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+
+        return enc.canEncode(name);
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#encode(java.lang.String)
+     */
+    public ByteBuffer encode(String name) {
+        CharsetEncoder enc = this.charset.newEncoder();
+
+        enc.onMalformedInput(CodingErrorAction.REPORT);
+        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+
+        CharBuffer cb = CharBuffer.wrap(name);
+        ByteBuffer out = ByteBuffer.allocate(name.length()
+                                             + (name.length() + 1) / 2);
+
+        while (cb.remaining() > 0) {
+            CoderResult res = enc.encode(cb, out,true);
+
+            if (res.isUnmappable() || res.isMalformed()) {
+
+                // write the unmappable characters in utf-16
+                // pseudo-URL encoding style to ByteBuffer.
+                if (res.length() * 6 > out.remaining()) {
+                    out = ZipEncodingHelper.growBuffer(out, out.position()
+                                                       + res.length() * 6);
+                }
+
+                for (int i=0; i<res.length(); ++i) {
+                    ZipEncodingHelper.appendSurrogate(out,cb.get());
+                }
+
+            } else if (res.isOverflow()) {
+
+                out = ZipEncodingHelper.growBuffer(out, 0);
+
+            } else if (res.isUnderflow()) {
+
+                enc.flush(out);
+                break;
+
+            }
+        }
+
+        out.limit(out.position());
+        out.rewind();
+        return out;
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#decode(byte[])
+     */
+    public String decode(byte[] data) throws IOException {
+        return this.charset.newDecoder()
+            .onMalformedInput(CodingErrorAction.REPORT)
+            .onUnmappableCharacter(CodingErrorAction.REPORT)
+            .decode(ByteBuffer.wrap(data)).toString();
+    }
+}
diff --git a/src/main/org/apache/tools/zip/Simple8BitZipEncoding.java b/src/main/org/apache/tools/zip/Simple8BitZipEncoding.java
new file mode 100644
index 000000000..a845e763a
--- /dev/null
+++ b/src/main/org/apache/tools/zip/Simple8BitZipEncoding.java
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tools.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This ZipEncoding implementation implements a simple 8bit character
+ * set, which mets the following restrictions:
+ * 
+ * <ul>
+ * <li>Characters 0x0000 to 0x007f are encoded as the corresponding
+ *        byte values 0x00 to 0x7f.</li>
+ * <li>All byte codes from 0x80 to 0xff are mapped to a unique unicode
+ *       character in the range 0x0080 to 0x7fff. (No support for
+ *       UTF-16 surrogates)
+ * </ul>
+ * 
+ * <p>These restrictions most notably apply to the most prominent
+ * omissions of java-1.4's {@link java.nio.charset.Charset Charset}
+ * implementation, Cp437 and Cp850.</p>
+ * 
+ * <p>The methods of this class are reentrant.</p>
+ */
+class Simple8BitZipEncoding implements ZipEncoding {
+
+    /**
+     * A character entity, which is put to the reverse mapping table
+     * of a simple encoding.
+     */
+    private static final class Simple8BitChar implements Comparable {
+        public final char unicode;
+        public final byte code;
+
+        Simple8BitChar(byte code, char unicode) {
+            this.code = code;
+            this.unicode = unicode;
+        }
+
+        public int compareTo(Object o) {
+            Simple8BitChar a = (Simple8BitChar) o;
+
+            return this.unicode - a.unicode;
+        }
+
+        public String toString() {
+            return "0x" + Integer.toHexString(0xffff & (int) unicode)
+                + "->0x" + Integer.toHexString(0xff & (int) code);
+        }
+    }
+
+    /**
+     * The characters for byte values of 128 to 255 stored as an array of
+     * 128 chars.
+     */
+    private final char[] highChars;
+
+    /**
+     * A list of {@see Simple8BitChar} objects sorted by the unicode
+     * field.  This list is used to binary search reverse mapping of
+     * unicode characters with a character code greater than 127.
+     */
+    private final List reverseMapping;
+
+    /**
+     * @param highChars The characters for byte values of 128 to 255
+     * stored as an array of 128 chars.
+     */
+    public Simple8BitZipEncoding(char[] highChars) {
+        this.highChars = highChars;
+        this.reverseMapping = new ArrayList(this.highChars.length);
+
+        byte code = 127;
+
+        for (int i = 0; i < this.highChars.length; ++i) {
+            this.reverseMapping.add(new Simple8BitChar(++code,
+                                                       this.highChars[i]));
+        }
+
+        Collections.sort(this.reverseMapping);
+    }
+
+    /**
+     * Return the character code for a given encoded byte.
+     * 
+     * @param b The byte to decode.
+     * @return The associated character value.
+     */
+    public char decodeByte(byte b) {
+        // code 0-127
+        if (b >= 0) {
+            return (char) b;
+        }
+
+        // byte is signed, so 128 == -128 and 255 == -1
+        return this.highChars[128 + (int) b];
+    }
+
+    /**
+     * @param c The character to encode.
+     * @return Whether the given unicode character is covered by this encoding.
+     */
+    public boolean canEncodeChar(char c) {
+
+        if (c >= 0 && c < 128) {
+            return true;
+        }
+
+        Simple8BitChar r = this.encodeHighChar(c);
+        return r != null;
+    }
+
+    /**
+     * Pushes the encoded form of the given character to the given byte buffer.
+     * 
+     * @param bb The byte buffer to write to.
+     * @param c The character to encode.
+     * @return Whether the given unicode character is covered by this encoding.
+     *         If <code>false</code> is returned, nothing is pushed to the
+     *         byte buffer. 
+     */
+    public boolean pushEncodedChar(ByteBuffer bb, char c) {
+
+        if (c >= 0 && c < 128) {
+            bb.put((byte) c);
+            return true;
+        }
+
+        Simple8BitChar r = this.encodeHighChar(c);
+        if (r == null) {
+            return false;
+        }
+        bb.put(r.code);
+        return true;
+    }
+
+    /**
+     * @param c A unicode character in the range from 0x0080 to 0x7f00
+     * @return A Simple8BitChar, if this character is covered by this encoding.
+     *         A <code>null</code> value is returned, if this character is not
+     *         covered by this encoding.
+     */
+    private Simple8BitChar encodeHighChar(char c) {
+        // for performance an simplicity, yet another reincarnation of
+        // binary search...
+        int i0 = 0;
+        int i1 = this.reverseMapping.size();
+
+        while (i1 > i0) {
+
+            int i = i0 + (i1 - i0) / 2;
+
+            Simple8BitChar m = (Simple8BitChar) this.reverseMapping.get(i);
+
+            if (m.unicode == c) {
+                return m;
+            }
+
+            if (m.unicode < c) {
+                i0 = i + 1;
+            } else {
+                i1 = i;
+            }
+        }
+
+        if (i0 >= this.reverseMapping.size()) {
+            return null;
+        }
+
+        Simple8BitChar r = (Simple8BitChar) this.reverseMapping.get(i0);
+
+        if (r.unicode != c) {
+            return null;
+        }
+
+        return r;
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#canEncode(java.lang.String)
+     */
+    public boolean canEncode(String name) {
+
+        for (int i=0;i<name.length();++i) {
+
+            char c = name.charAt(i);
+
+            if (!this.canEncodeChar(c)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#encode(java.lang.String)
+     */
+    public ByteBuffer encode(String name) {
+        ByteBuffer out = ByteBuffer.allocate(name.length()
+                                             + 6 + (name.length() + 1) / 2);
+
+        for (int i=0;i<name.length();++i) {
+
+            char c = name.charAt(i);
+
+            if (out.remaining() < 6) {
+                out = ZipEncodingHelper.growBuffer(out,out.position() + 6);
+            }
+
+            if (!this.pushEncodedChar(out,c)) {
+
+                ZipEncodingHelper.appendSurrogate(out,c);
+            }
+        }
+
+        out.limit(out.position());
+        out.rewind();
+        return out;
+    }
+
+    /**
+     * @see
+     * org.apache.tools.zip.ZipEncoding#decode(byte[])
+     */
+    public String decode(byte[] data) throws IOException {
+        char [] ret = new char[data.length];
+
+        for (int i=0;i<data.length;++i) {
+            ret[i] = this.decodeByte(data[i]);
+        }
+
+        return new String(ret);
+    }
+
+
+}
diff --git a/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java b/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
index ca100a548..5535d5d7c 100644
--- a/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
+++ b/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
@@ -42,15 +42,18 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField {
     }
 
     /**
-     * Assemble as unicode comment extension from the comment and
-     * encoding of the orginal zip entry.
+     * Assemble as unicode comment extension from the name given as
+     * text as well as the encoded bytes actually written to the archive.
      * 
-     * @param comment The file comment
-     * @param zipEncoding The encoding of the comment in the zip file,
-     * usually <code>"CP437"</code>.
+     * @param name The file name
+     * @param bytes the bytes actually written to the archive
+     * @param off The offset of the encoded comment in <code>bytes</code>.
+     * @param len The length of the encoded comment or comment in
+     * <code>bytes</code>.
      */
-    public UnicodeCommentExtraField(String comment, String zipEncoding) {
-        super(comment, zipEncoding);
+    public UnicodeCommentExtraField(String text, byte[] bytes, int off,
+                                    int len) {
+        super(text, bytes, off, len);
     }
 
     /**
diff --git a/src/main/org/apache/tools/zip/UnicodePathExtraField.java b/src/main/org/apache/tools/zip/UnicodePathExtraField.java
index dd3359a3d..8c26e1557 100644
--- a/src/main/org/apache/tools/zip/UnicodePathExtraField.java
+++ b/src/main/org/apache/tools/zip/UnicodePathExtraField.java
@@ -42,20 +42,22 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField {
     }
 
     /**
-     * Assemble as unicode path extension from the name and encoding
-     * of the orginal zip entry.
+     * Assemble as unicode path extension from the name given as
+     * text as well as the encoded bytes actually written to the archive.
      * 
      * @param name The file name
-     * @param zipEncoding The encoding of the filename in the zip
-     * file, usually <code>"CP437"</code>.
+     * @param bytes the bytes actually written to the archive
+     * @param off The offset of the encoded filename in <code>bytes</code>.
+     * @param len The length of the encoded filename or comment in
+     * <code>bytes</code>.
      */
-    public UnicodePathExtraField(String name, String zipEncoding) {
-        super(name, zipEncoding);
+    public UnicodePathExtraField(String text, byte[] bytes, int off, int len) {
+        super(text, bytes, off, len);
     }
 
     /**
      * Assemble as unicode path extension from the name given as
-     * text as well as the bytes actually written to the archive.
+     * text as well as the encoded bytes actually written to the archive.
      * 
      * @param name The file name
      * @param bytes the bytes actually written to the archive
diff --git a/src/main/org/apache/tools/zip/ZipEncoding.java b/src/main/org/apache/tools/zip/ZipEncoding.java
new file mode 100644
index 000000000..5dc88e4b4
--- /dev/null
+++ b/src/main/org/apache/tools/zip/ZipEncoding.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tools.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+
+/**
+ * An interface for encoders that do a pretty encoding of ZIP
+ * filenames.
+ *
+ * <p>There are mostly two implementations, one that uses java.nio
+ * {@link java.nio.charset.Charset Charset} and one implementation,
+ * which copes with simple 8 bit charsets, because java-1.4 did not
+ * support Cp437 in java.nio.</p>
+ * 
+ * <p>The main reason for defining an own encoding layer comes from
+ * the problems with {@link java.lang.String#getBytes(String)
+ * String.getBytes}, which encodes unknown characters as ASCII
+ * quotation marks ('?'), which is per definition an invalid filename
+ * character under some operating systems (Windows, e.g.) leading to
+ * ignored ZIP entries.</p>
+ * 
+ * <p>All implementations should implement this interface in a
+ * reentrant way.<(p>
+ */
+interface ZipEncoding {
+    /**
+     * Check, whether the given string may be losslessly encoded using this
+     * encoding.
+     * 
+     * @param name A filename or ZIP comment.
+     * @return Whether the given name may be encoded with out any losses.
+     */
+    boolean canEncode(String name);
+
+    /**
+     * Encode a filename or a comment to a byte array suitable for
+     * storing it to a serialized zip entry.
+     * 
+     * <p>Examples for CP 437 (in pseudo-notation, right hand side is
+     * C-style notation):</p>
+     * <pre>
+     *  encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
+     *  encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
+     * </pre>
+     * 
+     * @param name A filename or ZIP comment. 
+     * @return A byte buffer with a backing array containing the
+     *         encoded name.  Unmappable characters or malformed
+     *         character sequences are mapped to a sequence of utf-16
+     *         words encoded in the format <code>%Uxxxx</code>.  It is
+     *         assumed, that the byte buffer is positioned at the
+     *         beinning of the encoded result, the byte buffer has a
+     *         backing array and the limit of the byte buffer points
+     *         to the end of the encoded result.
+     * @throws IOException 
+     */
+    ByteBuffer encode(String name) throws IOException;
+
+    /**
+     * @param data The byte values to decode.
+     * @return The decoded string.
+     * @throws IOException 
+     */
+    String decode(byte [] data) throws IOException;
+}
diff --git a/src/main/org/apache/tools/zip/ZipEncodingHelper.java b/src/main/org/apache/tools/zip/ZipEncodingHelper.java
index 1b5d16e97..e09327341 100644
--- a/src/main/org/apache/tools/zip/ZipEncodingHelper.java
+++ b/src/main/org/apache/tools/zip/ZipEncodingHelper.java
@@ -19,17 +19,119 @@
 package org.apache.tools.zip;
 
 import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
 import java.nio.charset.Charset;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
-import java.nio.charset.CodingErrorAction;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Static helper functions for robustly encoding filenames in zip files. 
  */
 abstract class ZipEncodingHelper {
 
+    /**
+     * A class, which holds the high characters of a simple encoding
+     * and lazily instantiates a Simple8BitZipEncoding instance in a
+     * thread-safe manner.
+     */
+    private static class SimpleEncodingHolder {
+
+        private final char [] highChars;
+        private Simple8BitZipEncoding encoding;
+
+        /**
+         * Instantiate a simple encoding holder.
+         * 
+         * @param highChars The characters for byte codes 128 to 255.
+         * 
+         * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
+         */
+        SimpleEncodingHolder(char [] highChars) {
+            this.highChars = highChars;
+        }
+
+        /**
+         * @return The associated {@see Simple8BitZipEncoding}, which
+         *         is instantiated if not done so far.
+         */
+        public synchronized Simple8BitZipEncoding getEncoding() {
+            if (this.encoding == null) {
+                this.encoding = new Simple8BitZipEncoding(this.highChars);
+            }
+            return this.encoding;
+        }
+    }
+
+    private static final Map simpleEncodings;
+
+    static {
+        simpleEncodings = new HashMap();
+
+        char[] cp437_high_chars =
+            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
+                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
+                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
+                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
+                         0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
+                         0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
+                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
+                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
+                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
+                         0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
+                         0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
+                         0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
+                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
+                         0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
+                         0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
+                         0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
+                         0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
+                         0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
+                         0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
+                         0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
+                         0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
+                         0x25a0, 0x00a0 };
+
+        SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
+
+        simpleEncodings.put("CP437",cp437);
+        simpleEncodings.put("Cp437",cp437);
+        simpleEncodings.put("cp437",cp437);
+        simpleEncodings.put("IBM437",cp437);
+        simpleEncodings.put("ibm437",cp437);
+
+        char[] cp850_high_chars =
+            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
+                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
+                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
+                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
+                         0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
+                         0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
+                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
+                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
+                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
+                         0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
+                         0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
+                         0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
+                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
+                         0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
+                         0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
+                         0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
+                         0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
+                         0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
+                         0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
+                         0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
+                         0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
+                         0x25a0, 0x00a0 };
+
+        SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
+
+        simpleEncodings.put("CP850",cp850);
+        simpleEncodings.put("Cp850",cp850);
+        simpleEncodings.put("cp850",cp850);
+        simpleEncodings.put("IBM850",cp850);
+        simpleEncodings.put("ibm850",cp850);
+    }
+
     /**
      * Grow a byte buffer, so it has a minimal capacity or at least
      * the double capacity of the original buffer 
@@ -53,7 +155,7 @@ abstract class ZipEncodingHelper {
         return on;
     }
 
-
+ 
     /**
      * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
      * ASCII bytes.
@@ -65,131 +167,79 @@ abstract class ZipEncodingHelper {
     };
 
     /**
-     * Encode a filename or a comment to a byte array suitable for
-     * storing it to a serialized zip entry.
+     * Append <code>%Uxxxx</code> to the given byte buffer.
+     * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
      * 
-     * Examples (in pseudo-notation, right hand side is C-style notation):
-     * <pre>
-     *  encodeName("\u20AC_for_Dollar.txt","CP437") = "%U20AC_for_Dollar.txt"
-     *  encodeName("\u00D6lf\u00E4sser.txt","CP437") = "\231lf\204sser.txt"
-     * </pre>
-     * 
-     * @param name The filename or comment with possible non-ASCII
-     * unicode characters.  Must not be null.
-     * @param encoding A valid encoding name. The standard zip
-     *                 encoding is <code>"CP437"</code>,
-     *                 <code>"UTF-8"</code> is supported in ZIP file
-     *                 version <code>6.3</code> or later.  If null,
-     *                 will use the platform's {@link
-     *                 java.lang.String#getBytes default encoding}.
-     * @return A byte array containing the mapped file
-     *         name. Unmappable characters or malformed character
-     *         sequences are mapped to a sequence of utf-16 words
-     *         encoded in the format <code>%Uxxxx</code>.
+     * @param bb The byte buffer to write to.
+     * @param c The character to write.
      */
-    static final byte[] encodeName(String name, String encoding) {
-        if (encoding == null) {
-            return name.getBytes();
-        }
-
-        Charset cs = Charset.forName(encoding);
-        CharsetEncoder enc = cs.newEncoder();
-
-        enc.onMalformedInput(CodingErrorAction.REPORT);
-        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
-
-        CharBuffer cb = CharBuffer.wrap(name);
-        ByteBuffer out = ByteBuffer.allocate(name.length()
-                                             + (name.length() + 1) / 2);
-
-        while (cb.remaining() > 0) {
-            CoderResult res = enc.encode(cb, out,true);
-
-            if (res.isUnmappable() || res.isMalformed()) {
-
-                // write the unmappable characters in utf-16
-                // pseudo-URL encoding style to ByteBuffer.
-                if (res.length() * 6 > out.remaining()) {
-                    out = growBuffer(out,out.position() + res.length() * 6);
-                }
-
-                for (int i=0; i<res.length(); ++i) {
-                    out.put((byte) '%');
-                    out.put((byte) 'U');
+    static void appendSurrogate(ByteBuffer bb, char c) {
 
-                    char c = cb.get();
+        bb.put((byte) '%');
+        bb.put((byte) 'U');
 
-                    out.put(HEX_DIGITS[(c >> 12)&0x0f]);
-                    out.put(HEX_DIGITS[(c >> 8)&0x0f]);
-                    out.put(HEX_DIGITS[(c >> 4)&0x0f]);
-                    out.put(HEX_DIGITS[c & 0x0f]);
-                }
+        bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
+        bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
+        bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
+        bb.put(HEX_DIGITS[c & 0x0f]);
+    }
 
-            } else if (res.isOverflow()) {
 
-                out = growBuffer(out, 0);
+    /**
+     * name of the encoding UTF-8
+     */
+    static final String UTF8 = "UTF8";
 
-            } else if (res.isUnderflow()) {
+    /**
+     * name of the encoding UTF-8
+     */
+    static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
 
-                enc.flush(out);
-                break;
+    /**
+     * Instantiates a zip encoding.
+     * 
+     * @param name The name of the zip encoding. Specify <code>null</code> for
+     *             the platform's default encoding.
+     * @return A zip encoding for the given encoding name.
+     */
+    static ZipEncoding getZipEncoding(String name) {
+ 
+        // fallback encoding is good enough for utf-8.
+        if (isUTF8(name)) {
+            return UTF8_ZIP_ENCODING;
+        }
 
-            }
+        if (name == null) {
+            return new FallbackZipEncoding();
         }
 
-        byte [] ret = new byte[out.position()];
-        out.rewind();
-        out.get(ret);
+        SimpleEncodingHolder h =
+            (SimpleEncodingHolder) simpleEncodings.get(name);
 
-        return ret;
-    }
-
-    /**
-     * Return, whether a filename or a comment may be encoded to a
-     * byte array suitable for storing it to a serialized zip entry
-     * without any losses.
-     * 
-     * Examples (in pseudo-notation, right hand side is C-style notation):
-     * <pre>
-     *  canEncodeName("\u20AC_for_Dollar.txt","CP437") = false
-     *  canEncodeName("\u20AC_for_Dollar.txt","UTF-8") = true
-     *  canEncodeName("\u00D6lf\u00E4sser.txt","CP437") = true
-     * </pre>
-     * 
-     * @param name The filename or comment with possible non-ASCII
-     * unicode characters.
-     * @param encoding A valid encoding name. The standard zip
-     *                 encoding is <code>"CP437"</code>,
-     *                 <code>"UTF-8"</code> is supported in ZIP file
-     *                 version <code>6.3</code> or later.
-     * @return Whether the given encoding may encode the given name.
-     */
-    static final boolean canEncodeName(String name, String encoding) {
+        if (h!=null) {
+            return h.getEncoding();
+        }
 
-        Charset cs = Charset.forName(encoding);
+        try {
 
-        CharsetEncoder enc = cs.newEncoder();
-        enc.onMalformedInput(CodingErrorAction.REPORT);
-        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+            Charset cs = Charset.forName(name);
+            return new NioZipEncoding(cs);
 
-        return enc.canEncode(name);
+        } catch (UnsupportedCharsetException e) {
+            return new FallbackZipEncoding(name);
+        }
     }
 
     /**
-     * Decode a filename or a comment from a byte array.
-     * 
-     * @param name The filename or comment.
-     * @param encoding A valid encoding name. The standard zip
-     *                 encoding is <code>"CP437"</code>,
-     *                 <code>"UTF-8"</code> is supported in ZIP file
-     *                 version <code>6.3</code> or later.
+     * Whether a given encoding - or the platform's default encoding
+     * if the parameter is null - is UTF-8.
      */
-    static final String decodeName(byte[] name, String encoding)
-        throws java.nio.charset.CharacterCodingException {
-        Charset cs = Charset.forName(encoding);
-        return cs.newDecoder()
-            .onMalformedInput(CodingErrorAction.REPORT)
-            .onUnmappableCharacter(CodingErrorAction.REPORT)
-            .decode(ByteBuffer.wrap(name)).toString();
+    static boolean isUTF8(String encoding) {
+        if (encoding == null) {
+            // check platform's default encoding
+            encoding = System.getProperty("file.encoding");
+        }
+        return UTF8.equalsIgnoreCase(encoding)
+            || "utf-8".equalsIgnoreCase(encoding);
     }
 }
diff --git a/src/main/org/apache/tools/zip/ZipFile.java b/src/main/org/apache/tools/zip/ZipFile.java
index bd83f6c6f..bd6a47df1 100644
--- a/src/main/org/apache/tools/zip/ZipFile.java
+++ b/src/main/org/apache/tools/zip/ZipFile.java
@@ -22,8 +22,6 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.CharacterCodingException;
 import java.util.Calendar;
 import java.util.Collections;
 import java.util.Date;
@@ -98,6 +96,11 @@ public class ZipFile {
      */
     private String encoding = null;
 
+    /**
+     * The zip encoding to use for filenames and the file comment.
+     */
+    private final ZipEncoding zipEncoding;
+
     /**
      * The actual data source.
      */
@@ -164,15 +167,17 @@ public class ZipFile {
      * encoding for file names.
      *
      * @param f the archive.
-     * @param encoding the encoding to use for file names
-     * @param whether to use InfoZIP Unicode Extra Fields (if present)
-     * to set the file names.
+     * @param encoding the encoding to use for file names, use null
+     * for the platform's default encoding
+     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
+     * Extra Fields (if present) to set the file names.
      *
      * @throws IOException if an error occurs while reading the file.
      */
     public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
         throws IOException {
         this.encoding = encoding;
+        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
         this.useUnicodeExtraFields = useUnicodeExtraFields;
         archive = new RandomAccessFile(f, "r");
         boolean success = false;
@@ -247,7 +252,8 @@ public class ZipFile {
      * @param ze the entry to get the stream for.
      * @return a stream to read the entry from.
      * @throws IOException if unable to create an input stream from the zipenty
-     * @throws ZipException if the zipentry has an unsupported compression method
+     * @throws ZipException if the zipentry has an unsupported
+     * compression method
      */
     public InputStream getInputStream(ZipEntry ze)
         throws IOException, ZipException {
@@ -330,8 +336,8 @@ public class ZipFile {
             final int generalPurposeFlag = ZipShort.getValue(cfh, off);
             final boolean hasEFS = 
                 (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0;
-            final String entryEncoding =
-                hasEFS ? ZipOutputStream.UTF8 : encoding;
+            final ZipEncoding entryEncoding =
+                hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
 
             off += SHORT;
 
@@ -373,7 +379,7 @@ public class ZipFile {
 
             byte[] fileName = new byte[fileNameLen];
             archive.readFully(fileName);
-            ze.setName(getString(fileName, entryEncoding));
+            ze.setName(entryEncoding.decode(fileName));
 
             // LFH offset,
             OffsetEntry offset = new OffsetEntry();
@@ -395,7 +401,7 @@ public class ZipFile {
 
             byte[] comment = new byte[commentLen];
             archive.readFully(comment);
-            ze.setComment(getString(comment, entryEncoding));
+            ze.setComment(entryEncoding.decode(comment));
 
             archive.readFully(signatureBytes);
             sig = ZipLong.getValue(signatureBytes);
@@ -529,7 +535,7 @@ public class ZipFile {
                                      + SHORT + SHORT + fileNameLen + extraFieldLen));
             */
             offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
-                                     + SHORT + SHORT + fileNameLen + extraFieldLen;
+                + SHORT + SHORT + fileNameLen + extraFieldLen;
 
             if (entriesWithoutEFS.containsKey(ze)) {
                 setNameAndCommentFromExtraFields(ze,
@@ -576,37 +582,10 @@ public class ZipFile {
      * @throws ZipException if the encoding cannot be recognized.
      */
     protected String getString(byte[] bytes) throws ZipException {
-        return getString(bytes, encoding);
-    }
-
-    /**
-     * Retrieve a String from the given bytes using the encoding set
-     * for this ZipFile.
-     *
-     * @param bytes the byte array to transform
-     * @return String obtained by using the given encoding
-     * @throws ZipException if the encoding cannot be recognized.
-     */
-    protected String getString(byte[] bytes, String enc)
-        throws ZipException {
-        if (enc == null) {
-            return new String(bytes);
-        } else {
-            try {
-                try {
-                    return ZipEncodingHelper.decodeName(bytes, enc);
-                } catch (CharacterCodingException ex) {
-                    throw new ZipException(ex.getMessage());
-                }
-            } catch (java.nio.charset.UnsupportedCharsetException ex) {
-                // Java 1.4's NIO doesn't recognize a few names that
-                // String.getBytes does
-                try {
-                    return new String(bytes, enc);
-                } catch (UnsupportedEncodingException uee) {
-                    throw new ZipException(uee.getMessage());
-                }
-            }
+        try {
+            return ZipEncodingHelper.getZipEncoding(encoding).decode(bytes);
+        } catch (IOException ex) {
+            throw new ZipException("Failed to decode name: " + ex.getMessage());
         }
     }
 
@@ -671,8 +650,8 @@ public class ZipFile {
             if (origCRC32 == f.getNameCRC32()) {
                 try {
                     return ZipEncodingHelper
-                        .decodeName(f.getUnicodeName(), ZipOutputStream.UTF8);
-                } catch (CharacterCodingException ex) {
+                        .UTF8_ZIP_ENCODING.decode(f.getUnicodeName());
+                } catch (IOException ex) {
                     // UTF-8 unsupported?  should be impossible the
                     // Unicode*ExtraField must contain some bad bytes
 
diff --git a/src/main/org/apache/tools/zip/ZipOutputStream.java b/src/main/org/apache/tools/zip/ZipOutputStream.java
index 3dd34afd2..cad9f9a95 100644
--- a/src/main/org/apache/tools/zip/ZipOutputStream.java
+++ b/src/main/org/apache/tools/zip/ZipOutputStream.java
@@ -24,7 +24,7 @@ import java.io.FilterOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.RandomAccessFile;
-import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -92,9 +92,9 @@ public class ZipOutputStream extends FilterOutputStream {
     public static final int STORED = java.util.zip.ZipEntry.STORED;
 
     /**
-     * name of the encoding UTF-8
+     * default encoding for file names and comment.
      */
-    static final String UTF8 = "UTF8";
+    static final String DEFAULT_ENCODING = null;
 
      /**
      * General purpose flag, which indicates that filenames are
@@ -220,7 +220,16 @@ public class ZipOutputStream extends FilterOutputStream {
      */
     private String encoding = null;
 
-    // CheckStyle:VisibilityModifier OFF - bc
+    /**
+     * The zip encoding to use for filenames and the file comment.
+     *
+     * This field is of internal use and will be set in {@link
+     * #setEncoding(String)}.
+     */
+    private ZipEncoding zipEncoding =
+        ZipEncodingHelper.getZipEncoding(DEFAULT_ENCODING);
+
+   // CheckStyle:VisibilityModifier OFF - bc
 
     /**
      * This Deflater object is used for output.
@@ -301,8 +310,8 @@ public class ZipOutputStream extends FilterOutputStream {
     }
 
     /**
-     * This method indicates whether this archive is writing to a seekable stream (i.e., to a random
-     * access file).
+     * This method indicates whether this archive is writing to a
+     * seekable stream (i.e., to a random access file).
      *
      * <p>For seekable streams, you don't need to calculate the CRC or
      * uncompressed size for {@link #STORED} entries before
@@ -325,7 +334,8 @@ public class ZipOutputStream extends FilterOutputStream {
      */
     public void setEncoding(final String encoding) {
         this.encoding = encoding;
-        useEFS &= isUTF8(encoding);
+        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
+        useEFS &= ZipEncodingHelper.isUTF8(encoding);
     }
 
     /**
@@ -346,7 +356,7 @@ public class ZipOutputStream extends FilterOutputStream {
      * <p>Defaults to true.</p>
      */
     public void setUseLanguageEncodingFlag(boolean b) {
-        useEFS = b && isUTF8(encoding);
+        useEFS = b && ZipEncodingHelper.isUTF8(encoding);
     }
 
     /**
@@ -499,14 +509,15 @@ public class ZipOutputStream extends FilterOutputStream {
      *
      * <p>Default is Deflater.DEFAULT_COMPRESSION.</p>
      * @param level the compression level.
-     * @throws IllegalArgumentException if an invalid compression level is specified.
+     * @throws IllegalArgumentException if an invalid compression
+     * level is specified.
      * @since 1.1
      */
     public void setLevel(int level) {
         if (level < Deflater.DEFAULT_COMPRESSION
             || level > Deflater.BEST_COMPRESSION) {
-            throw new IllegalArgumentException(
-                "Invalid compression level: " + level);
+            throw new IllegalArgumentException("Invalid compression level: "
+                                               + level);
         }
         hasCompressionLevelChanged = (this.level != level);
         this.level = level;
@@ -654,13 +665,31 @@ public class ZipOutputStream extends FilterOutputStream {
      */
     protected void writeLocalFileHeader(ZipEntry ze) throws IOException {
 
-        byte[] name = getBytes(ze.getName());
+        boolean encodable = this.zipEncoding.canEncode(ze.getName());
+        ByteBuffer name = this.zipEncoding.encode(ze.getName());
+
         if (createUnicodeExtraFields) {
-            ze.addExtraField(new UnicodePathExtraField(ze.getName(), name));
+
+            /*            if (!encodable) { -- FIXME decide what to*/
+                ze.addExtraField(new UnicodePathExtraField(ze.getName(),
+                                                           name.array(),
+                                                           name.arrayOffset(),
+                                                           name.limit()));
+            /* } */
+
             String comm = ze.getComment();
             if (comm != null && !"".equals(comm)) {
-                byte[] commentB = getBytes(comm);
-                ze.addExtraField(new UnicodeCommentExtraField(comm, commentB));
+
+                boolean commentEncodable = this.zipEncoding.canEncode(comm);
+
+                /*            if (!commentEncodable) { -- FIXME decide what to*/
+                    ByteBuffer commentB = this.zipEncoding.encode(comm);
+                    ze.addExtraField(new UnicodeCommentExtraField(comm,
+                                                                  commentB.array(),
+                                                                  commentB.arrayOffset(),
+                                                                  commentB.limit())
+                                     );
+                /* } */
             }
         }
 
@@ -701,7 +730,7 @@ public class ZipOutputStream extends FilterOutputStream {
         // CheckStyle:MagicNumber ON
 
         // file name length
-        writeOut(ZipShort.getBytes(name.length));
+        writeOut(ZipShort.getBytes(name.limit()));
         written += SHORT;
 
         // extra field length
@@ -710,8 +739,8 @@ public class ZipOutputStream extends FilterOutputStream {
         written += SHORT;
 
         // file name
-        writeOut(name);
-        written += name.length;
+        writeOut(name.array(), name.arrayOffset(), name.limit());
+        written += name.limit();
 
         // extra field
         writeOut(extra);
@@ -779,8 +808,8 @@ public class ZipOutputStream extends FilterOutputStream {
         // CheckStyle:MagicNumber ON
 
         // file name length
-        byte[] name = getBytes(ze.getName());
-        writeOut(ZipShort.getBytes(name.length));
+        ByteBuffer name = this.zipEncoding.encode(ze.getName());
+        writeOut(ZipShort.getBytes(name.limit()));
         written += SHORT;
 
         // extra field length
@@ -793,8 +822,8 @@ public class ZipOutputStream extends FilterOutputStream {
         if (comm == null) {
             comm = "";
         }
-        byte[] commentB = getBytes(comm);
-        writeOut(ZipShort.getBytes(commentB.length));
+        ByteBuffer commentB = this.zipEncoding.encode(comm);
+        writeOut(ZipShort.getBytes(commentB.limit()));
         written += SHORT;
 
         // disk number start
@@ -814,16 +843,16 @@ public class ZipOutputStream extends FilterOutputStream {
         written += WORD;
 
         // file name
-        writeOut(name);
-        written += name.length;
+        writeOut(name.array(), name.arrayOffset(), name.limit());
+        written += name.limit();
 
         // extra field
         writeOut(extra);
         written += extra.length;
 
         // file comment
-        writeOut(commentB);
-        written += commentB.length;
+        writeOut(commentB.array(), commentB.arrayOffset(), commentB.limit());
+        written += commentB.limit();
     }
 
     /**
@@ -849,9 +878,9 @@ public class ZipOutputStream extends FilterOutputStream {
         writeOut(ZipLong.getBytes(cdOffset));
 
         // ZIP file comment
-        byte[] data = getBytes(comment);
-        writeOut(ZipShort.getBytes(data.length));
-        writeOut(data);
+        ByteBuffer data = this.zipEncoding.encode(comment);
+        writeOut(ZipShort.getBytes(data.limit()));
+        writeOut(data.array(), data.arrayOffset(), data.limit());
     }
 
     /**
@@ -908,20 +937,15 @@ public class ZipOutputStream extends FilterOutputStream {
      * @since 1.3
      */
     protected byte[] getBytes(String name) throws ZipException {
-        if (encoding == null) {
-            return name.getBytes();
-        } else {
-            try {
-                return ZipEncodingHelper.encodeName(name, encoding);
-            } catch (java.nio.charset.UnsupportedCharsetException ex) {
-                // Java 1.4's NIO doesn't recognize a few names that
-                // String.getBytes does
-                try {
-                    return name.getBytes(encoding);
-                } catch (UnsupportedEncodingException uee) {
-                    throw new ZipException(uee.getMessage());
-                }
-            }
+        try {
+            ByteBuffer b =
+                ZipEncodingHelper.getZipEncoding(encoding).encode(name);
+            byte[] result = new byte[b.limit()];
+            System.arraycopy(b.array(), b.arrayOffset(), result, 0,
+                             result.length);
+            return result;
+        } catch (IOException ex) {
+            throw new ZipException("Failed to encode name: " + ex.getMessage());
         }
     }
 
@@ -975,19 +999,6 @@ public class ZipOutputStream extends FilterOutputStream {
         }
     }
 
-    /**
-     * Whether a given encoding - or the platform's default encoding
-     * if the parameter is null - is UTF-8.
-     */
-    static boolean isUTF8(String encoding) {
-        if (encoding == null) {
-            // check platform's default encoding
-            encoding = System.getProperty("file.encoding");
-        }
-        return UTF8.equalsIgnoreCase(encoding)
-            || "utf-8".equalsIgnoreCase(encoding);
-    }
-
     private void writeVersionNeededToExtractAndGeneralPurposeBits(final int
                                                                   zipMethod)
         throws IOException {
diff --git a/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java b/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java
index bb8246fe5..3e7edb1dd 100644
--- a/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java
+++ b/src/tests/junit/org/apache/tools/zip/UTF8ZipFilesTest.java
@@ -19,12 +19,14 @@
 package org.apache.tools.zip;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
-import java.nio.charset.Charset;
-import java.nio.charset.UnsupportedCharsetException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.ByteBuffer;
 import java.util.Enumeration;
+import java.util.zip.CRC32;
 import junit.framework.TestCase;
 
 public class UTF8ZipFilesTest extends TestCase {
@@ -36,33 +38,70 @@ public class UTF8ZipFilesTest extends TestCase {
     private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
     private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
 
-    public void testUtf8FileRoundtrip() throws IOException {
-        testFileRoundtrip(UTF_8);
+    public void testUtf8FileRoundtripExplicitUnicodeExtra()
+        throws IOException {
+        testFileRoundtrip(UTF_8, true, true);
+    }
+
+    public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra()
+        throws IOException {
+        testFileRoundtrip(UTF_8, false, true);
+    }
+
+    public void testCP437FileRoundtripExplicitUnicodeExtra()
+        throws IOException {
+        testFileRoundtrip(CP437, false, true);
+    }
+
+    public void testASCIIFileRoundtripExplicitUnicodeExtra()
+        throws IOException {
+        testFileRoundtrip(US_ASCII, false, true);
     }
 
+    public void testUtf8FileRoundtripImplicitUnicodeExtra()
+        throws IOException {
+        testFileRoundtrip(UTF_8, true, false);
+    }
 
-    public void testCP437FileRoundtrip() throws IOException {
-        testFileRoundtrip(CP437);
+    public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra()
+        throws IOException {
+        testFileRoundtrip(UTF_8, false, false);
     }
 
-    public void testASCIIFileRoundtrip() throws IOException {
-        testFileRoundtrip(US_ASCII);
+    public void testCP437FileRoundtripImplicitUnicodeExtra()
+        throws IOException {
+        testFileRoundtrip(CP437, false, false);
     }
 
-    private static void testFileRoundtrip(String encoding)
+    public void testASCIIFileRoundtripImplicitUnicodeExtra()
         throws IOException {
+        testFileRoundtrip(US_ASCII, false, false);
+    }
 
+    public void testZipFileReadsUnicodeFields() throws IOException {
+        File file = File.createTempFile("unicode-test", ".zip");
+        ZipFile zf = null;
         try {
-            Charset.forName(encoding);
-        } catch (UnsupportedCharsetException use) {
-            System.err.println("Skipping testFileRoundtrip for unsupported "
-                               + " encoding " + encoding);
-            return;
+            createTestFile(file, US_ASCII, false, true);
+            zf = new ZipFile(file, US_ASCII, true);
+            assertNotNull(zf.getEntry(ASCII_TXT));
+            assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
+            assertNotNull(zf.getEntry(OIL_BARREL_TXT));
+        } finally {
+            ZipFile.closeQuietly(zf);
+            if (file.exists()) {
+                file.delete();
+            }
         }
+    }
+
+    private static void testFileRoundtrip(String encoding, boolean withEFS,
+                                          boolean withExplicitUnicodeExtra)
+        throws IOException {
 
         File file = File.createTempFile(encoding + "-test", ".zip");
         try {
-            createTestFile(file, encoding);
+            createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
             testFile(file, encoding);
         } finally {
             if (file.exists()) {
@@ -71,19 +110,30 @@ public class UTF8ZipFilesTest extends TestCase {
         }
     }
 
-    private static void createTestFile(File file, String encoding)
+    private static void createTestFile(File file, String encoding,
+                                       boolean withEFS,
+                                       boolean withExplicitUnicodeExtra)
         throws UnsupportedEncodingException, IOException {
 
+        ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
+
         ZipOutputStream zos = null;
         try {
             zos = new ZipOutputStream(file);
             zos.setEncoding(encoding);
+            zos.setUseLanguageEncodingFlag(withEFS);
+            zos.setCreateUnicodeExtraFields(!withExplicitUnicodeExtra);
 
             ZipEntry ze = new ZipEntry(OIL_BARREL_TXT);
-            if (!ZipEncodingHelper.canEncodeName(ze.getName(),
-                                                 zos.getEncoding())) {
+            if (withExplicitUnicodeExtra
+                && !zipEncoding.canEncode(ze.getName())) {
+
+                ByteBuffer en = zipEncoding.encode(ze.getName());
+
                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
-                                                           zos.getEncoding()));
+                                                           en.array(),
+                                                           en.arrayOffset(),
+                                                           en.limit()));
             }
 
             zos.putNextEntry(ze);
@@ -91,10 +141,15 @@ public class UTF8ZipFilesTest extends TestCase {
             zos.closeEntry();
 
             ze = new ZipEntry(EURO_FOR_DOLLAR_TXT);
-            if (!ZipEncodingHelper.canEncodeName(ze.getName(),
-                                                 zos.getEncoding())) {
+            if (withExplicitUnicodeExtra
+                && !zipEncoding.canEncode(ze.getName())) {
+
+                ByteBuffer en = zipEncoding.encode(ze.getName());
+
                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
-                                                           zos.getEncoding()));
+                                                           en.array(),
+                                                           en.arrayOffset(),
+                                                           en.limit()));
             }
 
             zos.putNextEntry(ze);
@@ -103,10 +158,15 @@ public class UTF8ZipFilesTest extends TestCase {
 
             ze = new ZipEntry(ASCII_TXT);
 
-            if (!ZipEncodingHelper.canEncodeName(ze.getName(),
-                                                 zos.getEncoding())) {
+            if (withExplicitUnicodeExtra
+                && !zipEncoding.canEncode(ze.getName())) {
+
+                ByteBuffer en = zipEncoding.encode(ze.getName());
+
                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
-                                                           zos.getEncoding()));
+                                                           en.array(),
+                                                           en.arrayOffset(),
+                                                           en.limit()));
             }
 
             zos.putNextEntry(ze);
@@ -125,7 +185,7 @@ public class UTF8ZipFilesTest extends TestCase {
         throws IOException {
         ZipFile zf = null;
         try {
-            zf = new ZipFile(file, encoding);
+            zf = new ZipFile(file, encoding, false);
 
             Enumeration e = zf.getEntries();
             while (e.hasMoreElements()) {
@@ -147,14 +207,8 @@ public class UTF8ZipFilesTest extends TestCase {
     }
 
     private static UnicodePathExtraField findUniCodePath(ZipEntry ze) {
-
-        ZipExtraField[] efs = ze.getExtraFields();
-        for (int i = 0; i < efs.length; ++i) {
-            if (efs[i].getHeaderId().equals(UnicodePathExtraField.UPATH_ID)) {
-                return (UnicodePathExtraField) efs[i];
-            }
-        }
-        return null;
+        return (UnicodePathExtraField)
+            ze.getExtraField(UnicodePathExtraField.UPATH_ID);
     }
 
     private static void assertUnicodeName(ZipEntry ze,
@@ -165,23 +219,17 @@ public class UTF8ZipFilesTest extends TestCase {
             UnicodePathExtraField ucpf = findUniCodePath(ze);
             assertNotNull(ucpf);
 
-            UnicodePathExtraField ucpe = new UnicodePathExtraField(expectedName,
-                                                                   encoding);
-            assertEquals(ucpe.getNameCRC32(), ucpf.getNameCRC32());
+            ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
+            ByteBuffer ne = enc.encode(ze.getName());
+
+            CRC32 crc = new CRC32();
+            crc.update(ne.array(),ne.arrayOffset(),ne.limit());
+
+            assertEquals(crc.getValue(), ucpf.getNameCRC32());
             assertEquals(expectedName, new String(ucpf.getUnicodeName(),
                                                   UTF_8));
         }
     }
 
-    /*
-    public void testUtf8Interoperability() throws IOException {
-        File file1 = super.getFile("utf8-7zip-test.zip");
-        File file2 = super.getFile("utf8-winzip-test.zip");
-
-        testFile(file1,CP437);
-        testFile(file2,CP437);
-
-    }
-    */
 }
 
diff --git a/src/tests/junit/org/apache/tools/zip/ZipEncodingTest.java b/src/tests/junit/org/apache/tools/zip/ZipEncodingTest.java
new file mode 100644
index 000000000..d935fa988
--- /dev/null
+++ b/src/tests/junit/org/apache/tools/zip/ZipEncodingTest.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tools.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import junit.framework.TestCase;
+
+/**
+ * Test zip encodings.
+ */
+public class ZipEncodingTest extends TestCase {
+    private static final String UNENC_STRING = "\u2016";
+
+    // stress test for internal grow method.
+    private static final String BAD_STRING =
+        "\u2016\u2015\u2016\u2015\u2016\u2015\u2016\u2015\u2016\u2015\u2016";
+
+    private static final String BAD_STRING_ENC =
+        "%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016";
+
+    public void testSimpleCp437Encoding() throws IOException {
+
+        doSimpleEncodingTest("Cp437", null);
+    }
+
+    public void testSimpleCp850Encoding() throws IOException {
+
+        doSimpleEncodingTest("Cp850", null);
+    }
+
+    public void testNioCp1252Encoding() throws IOException {
+        // CP1252 has some undefined code points, these are
+        // the defined ones
+        // retrieved by
+        //    awk '/^0x/ && NF>2 {print $1;}' CP1252.TXT
+        byte[] b =
+            new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                         0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+                         0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                         0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+                         0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+                         0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+                         0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+                         0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+                         0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+                         0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+                         0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+                         0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+                         0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+                         0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+                         0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+                         0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+                         (byte) 0x80, (byte) 0x82, (byte) 0x83, (byte) 0x84,
+                         (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88,
+                         (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C,
+                         (byte) 0x8E, (byte) 0x91, (byte) 0x92, (byte) 0x93,
+                         (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97,
+                         (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B,
+                         (byte) 0x9C, (byte) 0x9E, (byte) 0x9F, (byte) 0xA0,
+                         (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4,
+                         (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8,
+                         (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC,
+                         (byte) 0xAD, (byte) 0xAE, (byte) 0xAF, (byte) 0xB0,
+                         (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4,
+                         (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8,
+                         (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC,
+                         (byte) 0xBD, (byte) 0xBE, (byte) 0xBF, (byte) 0xC0,
+                         (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4,
+                         (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8,
+                         (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC,
+                         (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, (byte) 0xD0,
+                         (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4,
+                         (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8,
+                         (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC,
+                         (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0xE0,
+                         (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4,
+                         (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8,
+                         (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC,
+                         (byte) 0xED, (byte) 0xEE, (byte) 0xEF, (byte) 0xF0,
+                         (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4,
+                         (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8,
+                         (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC,
+                         (byte) 0xFD, (byte) 0xFE, (byte) 0xFF };
+
+        doSimpleEncodingTest("Cp1252",b);
+    }
+
+    private static final void assertEquals(byte[] expected, ByteBuffer actual) {
+
+        assertEquals(expected.length, actual.limit());
+
+        for (int i = 0; i < expected.length; ++i) {
+
+            byte a = actual.get();
+            assertEquals(expected[i], a);
+        }
+
+    }
+
+    private void doSimpleEncodingTest(String name, byte[] testBytes)
+        throws IOException {
+
+        ZipEncoding enc = ZipEncodingHelper.getZipEncoding(name);
+
+        if (testBytes == null) {
+
+            testBytes = new byte[256];
+            for (int i = 0; i < 256; ++i) {
+                testBytes[i] = (byte) i;
+            }
+        }
+
+        String decoded = enc.decode(testBytes);
+
+        assertEquals(true, enc.canEncode(decoded));
+
+        ByteBuffer encoded = enc.encode(decoded);
+
+        assertEquals(testBytes, encoded);
+
+        assertEquals(false, enc.canEncode(UNENC_STRING));
+        assertEquals("%U2016".getBytes("US-ASCII"), enc.encode(UNENC_STRING));
+        assertEquals(false, enc.canEncode(BAD_STRING));
+        assertEquals(BAD_STRING_ENC.getBytes("US-ASCII"),
+                     enc.encode(BAD_STRING));
+    }
+
+}