From c5d57a281a3b28f9d711b3f70e900333faa7c457 Mon Sep 17 00:00:00 2001
From: Peter Reilly <peterreilly@apache.org>
Date: Thu, 7 Sep 2006 23:16:18 +0000
Subject: [PATCH] Bugzilla 40300: incorrent handling of exceptions in <copy>

git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@441294 13f79535-47bb-0310-9956-ffa450edef68
---
 WHATSNEW                                      |  1 +
 docs/manual/CoreTasks/copy.html               | 53 +++++++++++++++---
 .../org/apache/tools/ant/taskdefs/Copy.java   | 56 ++++++++++++++++---
 .../org/apache/tools/ant/util/FileUtils.java  | 22 ++++++++
 .../apache/tools/ant/util/FileUtilsTest.java  |  5 ++
 5 files changed, 123 insertions(+), 14 deletions(-)
diff --git a/WHATSNEW b/WHATSNEW
index 2fb6fab0d..98661111b 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -15,6 +15,7 @@ Fixed bugs:
 * Macro element did not include top level Text. Bugzilla report 36803.
 * AntClassLoader did not isolate resources when isolate was set. Bugzilla report 38747.
 * Diagnostics broken when using java 1.4. Bugzilla report 40395.
+* Exception reporting in <copy> was broken. Bugzilla report 40300.
 
 Other changes:
 --------------
diff --git a/docs/manual/CoreTasks/copy.html b/docs/manual/CoreTasks/copy.html
index 91924a73a..d8c7a17a8 100644
--- a/docs/manual/CoreTasks/copy.html
+++ b/docs/manual/CoreTasks/copy.html
@@ -20,13 +20,14 @@ Collection</a>s are used to select a group of files to copy.  To use a
 resource collection, the <code>todir</code> attribute must be set.</p>
 
 <p>
-<strong>Note: </strong>If you employ filters in your copy operation, you should
-limit the copy to text files. Binary files will be corrupted by the copy operation.
+<strong>Note: </strong>If you employ filters in your copy operation,
+you should limit the copy to text files. Binary files will be corrupted
+by the copy operation.
 This applies whether the filters are implicitly defined by the
 <a href="filter.html">filter</a> task or explicitly provided to the copy
-operation as <a href="../CoreTypes/filterset.html">filtersets</a>
+operation as <a href="../CoreTypes/filterset.html">filtersets</a>.
+  <em>See <a href="#encoding">encoding note</a></em>.
 </p>
-
 <h3>Parameters</h3>
 <table border="1" cellpadding="2" cellspacing="0">
   <tr>
@@ -278,9 +279,47 @@ the copied file takes on the case of the original. The workaround is to
 <a href="delete.html">delete</a>
 the file in the destination directory before you copy it.
 </p>
+  <p>
+    <strong><a name="encoding">Important Encoding Note:</a></strong>
+    The reason that binary files when filtered get corrupted is that
+    filtering involves reading in the file using a Reader class. This
+    has an encoding specifing how files are encoded. There are a number
+    of different types of encoding - UTF-8, UTF-16, Cp1252, ISO-8859-1,
+    US-ASCII and (lots) others. On Windows the default character encoding
+    is Cp1252, on Unix it is usually UTF-8. For both of these encoding
+    there are illegal byte sequences (more in UTF-8 than for Cp1252).
+  </p>
+  <p>
+    How the Reader class deals with these illegal sequences is up to the
+    implementation
+    of the character decoder. The current Sun Java implemenation is to
+    map them to legal characters. Previous Sun Java (1.3 and lower) threw
+    a MalformedInputException. IBM Java 1.4 also thows this exception.
+    It is the mapping of the characters that cause the corruption.
+  </p>
+  <p>
+    On Unix, where the default is normally UTF-8, this is a <em>big</em>
+    problem, as it is easy to edit a file to contain non US Ascii characters
+    from ISO-8859-1, for example the Danish oe character. When this is
+    copied (with filtering) by Ant, the character get converted to a
+    question mark (or some such thing).
+  </p>
+  <p>
+    There is not much that Ant can do. It cannot figure out which
+    files are binary - a UTF-8 version of Korean will have lots of
+    bytes with the top bit set. It is not informed about illegal
+    character sequences by current Sun Java implementions.
+  </p>
+  <p>
+    One trick for filtering containing only US-ASCII is to
+    use the ISO-8859-1 encoding. This does not seem to contain
+    illegal character sequences, and the lower 7 bits are US-ASCII.
+    Another trick is to change the LANG environment variable from
+    something like "us.utf8" to "us".
+  </p>
+    
 
-<hr><p align="center">Copyright &copy; 2000-2005 The Apache Software Foundation.
+<hr><p align="center">Copyright &copy; 2000-2006 The Apache Software Foundation.
 All rights Reserved.</p>
 
-</body>
-</html>
+</body></html>
diff --git a/src/main/org/apache/tools/ant/taskdefs/Copy.java b/src/main/org/apache/tools/ant/taskdefs/Copy.java
index 0af8ca0e3..94e1f982a 100644
--- a/src/main/org/apache/tools/ant/taskdefs/Copy.java
+++ b/src/main/org/apache/tools/ant/taskdefs/Copy.java
@@ -66,7 +66,7 @@ import org.apache.tools.ant.util.FlatFileNameMapper;
  */
 public class Copy extends Task {
     static final File NULL_FILE_PLACEHOLDER = new File("/NULL_FILE");
-
+    static final String LINE_SEPARATOR = System.getProperty("line.separator");
     protected File file = null;     // the source file
     protected File destFile = null; // the destination file
     protected File destDir = null;  // the destination directory
@@ -451,10 +451,10 @@ public class Copy extends Task {
                         ds = fs.getDirectoryScanner(getProject());
                     } catch (BuildException e) {
                         if (failonerror
-                            || !e.getMessage().endsWith(" not found.")) {
+                            || !getMessage(e).endsWith(" not found.")) {
                             throw e;
                         } else {
-                            log("Warning: " + e.getMessage());
+                            log("Warning: " + getMessage(e));
                             continue;
                         }
                     }
@@ -532,7 +532,7 @@ public class Copy extends Task {
                 doFileOperations();
             } catch (BuildException e) {
                 if (!failonerror) {
-                    log("Warning: " + e.getMessage(), Project.MSG_ERR);
+                    log("Warning: " + getMessage(e), Project.MSG_ERR);
                 } else {
                     throw e;
                 }
@@ -547,7 +547,7 @@ public class Copy extends Task {
                     doResourceOperations(map);
                 } catch (BuildException e) {
                     if (!failonerror) {
-                        log("Warning: " + e.getMessage(), Project.MSG_ERR);
+                        log("Warning: " + getMessage(e), Project.MSG_ERR);
                     } else {
                         throw e;
                     }
@@ -796,7 +796,7 @@ public class Copy extends Task {
                                            outputEncoding, getProject());
                     } catch (IOException ioe) {
                         String msg = "Failed to copy " + fromFile + " to " + toFile
-                            + " due to " + ioe.getMessage();
+                            + " due to " + getDueTo(ioe);
                         File targetFile = new File(toFile);
                         if (targetFile.exists() && !targetFile.delete()) {
                             msg += " and I couldn't delete the corrupt " + toFile;
@@ -886,7 +886,7 @@ public class Copy extends Task {
                     } catch (IOException ioe) {
                         String msg = "Failed to copy " + fromResource
                             + " to " + toFile
-                            + " due to " + ioe.getMessage();
+                            + " due to " + getDueTo(ioe);
                         File targetFile = new File(toFile);
                         if (targetFile.exists() && !targetFile.delete()) {
                             msg += " and I couldn't delete the corrupt " + toFile;
@@ -968,4 +968,46 @@ public class Copy extends Task {
         return mapper;
     }
 
+    /**
+     * Handle getMessage() for exceptions.
+     * @param ex the exception to handle
+     * @return ex.getMessage() if ex.getMessage() is not null
+     *         otherwise return ex.toString()
+     */
+    private String getMessage(Exception ex) {
+        return ex.getMessage() == null ? ex.toString() : ex.getMessage();
+    }
+    
+    /**
+     * Returns a reason for failure based on
+     * the exception thrown.
+     * If the exception is not IOException output the class name,
+     * output the message
+     * if the exception is MalformedInput add a little note.
+     */
+    private String getDueTo(Exception ex) {
+        boolean baseIOException = ex.getClass() == IOException.class;
+        StringBuffer message = new StringBuffer();
+        if (!baseIOException || ex.getMessage() == null) {
+            message.append(ex.getClass().getName());
+        }
+        if (ex.getMessage() != null) {
+            if (!baseIOException) {
+                message.append(" ");
+            }
+            message.append(ex.getMessage());
+        }
+        if (ex.getClass().getName().indexOf("MalformedInput") != -1) {
+            message.append(LINE_SEPARATOR);
+            message.append(
+                "This is normally due to the input file containing invalid");
+             message.append(LINE_SEPARATOR);
+            message.append("bytes for the character encoding used : ");
+            message.append(
+                (inputEncoding == null
+                 ? fileUtils.getDefaultEncoding() : inputEncoding));
+            message.append(LINE_SEPARATOR);
+        }
+        return message.toString();
+    }
 }
diff --git a/src/main/org/apache/tools/ant/util/FileUtils.java b/src/main/org/apache/tools/ant/util/FileUtils.java
index d0e15717f..0a77df240 100644
--- a/src/main/org/apache/tools/ant/util/FileUtils.java
+++ b/src/main/org/apache/tools/ant/util/FileUtils.java
@@ -21,6 +21,7 @@ package org.apache.tools.ant.util;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.UnsupportedEncodingException;
 import java.io.Writer;
@@ -1483,4 +1484,25 @@ public class FileUtils {
         return buffer.toString();
     }
 
+    /**
+     * Get the default encoding.
+     * This is done by opening an InputStreamReader on
+     * a dummy InputStream and getting the encoding.
+     * Could use System.getProperty("file.encoding"), but cannot
+     * see where this is documented.
+     * @return the default file encoding.
+     */
+    public String getDefaultEncoding() {
+        InputStreamReader is = new InputStreamReader(
+            new InputStream() {
+                public int read() {
+                    return -1;
+                }
+            });
+        try {
+            return is.getEncoding();
+        } finally {
+            close(is);
+        }
+    }
 }
diff --git a/src/testcases/org/apache/tools/ant/util/FileUtilsTest.java b/src/testcases/org/apache/tools/ant/util/FileUtilsTest.java
index 4634683a8..e1f4092e6 100644
--- a/src/testcases/org/apache/tools/ant/util/FileUtilsTest.java
+++ b/src/testcases/org/apache/tools/ant/util/FileUtilsTest.java
@@ -540,6 +540,11 @@ public class FileUtilsTest extends TestCase {
                 !FILE_UTILS.isUpToDate(firstTime,-1L));
     }
 
+    public void testGetDefaultEncoding() {
+        // This just tests that the function does not blow up
+        FILE_UTILS.getDefaultEncoding();
+    }
+
     /**
      * adapt file separators to local conventions
      */