diff --git a/WHATSNEW b/WHATSNEW index ddcc2eaa2..f2aa4f8e8 100644 --- a/WHATSNEW +++ b/WHATSNEW @@ -49,6 +49,9 @@ Other changes: Java VMs. Bugzilla Report 52706. + * merged the TAR package from Commons Compress, it can now read + archives using POSIX extension headers and STAR extensions. + Changes from Ant 1.8.3 TO Ant 1.8.4 =================================== diff --git a/src/main/org/apache/tools/tar/TarArchiveSparseEntry.java b/src/main/org/apache/tools/tar/TarArchiveSparseEntry.java new file mode 100644 index 000000000..2e76fb699 --- /dev/null +++ b/src/main/org/apache/tools/tar/TarArchiveSparseEntry.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tools.tar; + +import java.io.IOException; + +/** + * This class represents a sparse entry in a Tar archive. + * + *
+ * The C structure for a sparse entry is: + *
+ * struct posix_header { + * struct sparse sp[21]; // TarConstants.SPARSELEN_GNU_SPARSE - offset 0 + * char isextended; // TarConstants.ISEXTENDEDLEN_GNU_SPARSE - offset 504 + * }; + *+ * Whereas, "struct sparse" is: + *
+ * struct sparse { + * char offset[12]; // offset 0 + * char numbytes[12]; // offset 12 + * }; + *+ */ + +public class TarArchiveSparseEntry implements TarConstants { + /** If an extension sparse header follows. */ + private boolean isExtended; + + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @throws IOException on unknown format + */ + public TarArchiveSparseEntry(byte[] headerBuf) throws IOException { + int offset = 0; + offset += SPARSELEN_GNU_SPARSE; + isExtended = TarUtils.parseBoolean(headerBuf, offset); + } + + public boolean isExtended() { + return isExtended; + } +} diff --git a/src/main/org/apache/tools/tar/TarBuffer.java b/src/main/org/apache/tools/tar/TarBuffer.java index d34b775e4..de2321033 100644 --- a/src/main/org/apache/tools/tar/TarBuffer.java +++ b/src/main/org/apache/tools/tar/TarBuffer.java @@ -51,12 +51,13 @@ public class TarBuffer { private InputStream inStream; private OutputStream outStream; - private byte[] blockBuffer; + private final int blockSize; + private final int recordSize; + private final int recsPerBlock; + private final byte[] blockBuffer; + private int currBlkIdx; private int currRecIdx; - private int blockSize; - private int recordSize; - private int recsPerBlock; private boolean debug; /** @@ -83,10 +84,7 @@ public class TarBuffer { * @param recordSize the record size to use */ public TarBuffer(InputStream inStream, int blockSize, int recordSize) { - this.inStream = inStream; - this.outStream = null; - - this.initialize(blockSize, recordSize); + this(inStream, null, blockSize, recordSize); } /** @@ -113,16 +111,15 @@ public class TarBuffer { * @param recordSize the record size to use */ public TarBuffer(OutputStream outStream, int blockSize, int recordSize) { - this.inStream = null; - this.outStream = outStream; - - this.initialize(blockSize, recordSize); + this(null, outStream, blockSize, recordSize); } /** - * Initialization common to all constructors. + * Private constructor to perform common setup. */ - private void initialize(int blockSize, int recordSize) { + private TarBuffer(InputStream inStream, OutputStream outStream, int blockSize, int recordSize) { + this.inStream = inStream; + this.outStream = outStream; this.debug = false; this.blockSize = blockSize; this.recordSize = recordSize; @@ -194,10 +191,8 @@ public class TarBuffer { throw new IOException("reading (via skip) from an output buffer"); } - if (currRecIdx >= recsPerBlock) { - if (!readBlock()) { - return; // UNDONE - } + if (currRecIdx >= recsPerBlock && !readBlock()) { + return; // UNDONE } currRecIdx++; @@ -216,13 +211,14 @@ public class TarBuffer { } if (inStream == null) { + if (outStream == null) { + throw new IOException("input buffer is closed"); + } throw new IOException("reading from an output buffer"); } - if (currRecIdx >= recsPerBlock) { - if (!readBlock()) { - return null; - } + if (currRecIdx >= recsPerBlock && !readBlock()) { + return null; } byte[] result = new byte[recordSize]; @@ -337,6 +333,9 @@ public class TarBuffer { } if (outStream == null) { + if (inStream == null){ + throw new IOException("Output buffer is closed"); + } throw new IOException("writing to an input buffer"); } @@ -374,6 +373,9 @@ public class TarBuffer { } if (outStream == null) { + if (inStream == null){ + throw new IOException("Output buffer is closed"); + } throw new IOException("writing to an input buffer"); } @@ -454,9 +456,8 @@ public class TarBuffer { } else if (inStream != null) { if (inStream != System.in) { inStream.close(); - - inStream = null; } + inStream = null; } } } diff --git a/src/main/org/apache/tools/tar/TarConstants.java b/src/main/org/apache/tools/tar/TarConstants.java index 2ba5d6667..7992bbbaa 100644 --- a/src/main/org/apache/tools/tar/TarConstants.java +++ b/src/main/org/apache/tools/tar/TarConstants.java @@ -26,10 +26,22 @@ package org.apache.tools.tar; /** * This interface contains all the definitions used in the package. * + * For tar formats (FORMAT_OLDGNU, FORMAT_POSIX, etc.) see GNU tar + * tar.h type enum archive_format */ // CheckStyle:InterfaceIsTypeCheck OFF (bc) public interface TarConstants { + /** + * GNU format as per before tar 1.12. + */ + int FORMAT_OLDGNU = 2; + + /** + * Pure Posix format. + */ + int FORMAT_POSIX = 3; + /** * The length of the name field in a header buffer. */ @@ -50,6 +62,12 @@ public interface TarConstants { */ int GIDLEN = 8; + /** + * The maximum value of gid/uid in a tar archive which can + * be expressed in octal char notation (that's 7 sevens, octal). + */ + long MAXID = 07777777L; + /** * The length of the checksum field in a header buffer. */ @@ -57,19 +75,36 @@ public interface TarConstants { /** * The length of the size field in a header buffer. + * Includes the trailing space or NUL. */ int SIZELEN = 12; /** - * The maximum size of a file in a tar archive (That's 11 sevens, octal). + * The maximum size of a file in a tar archive + * which can be expressed in octal char notation (that's 11 sevens, octal). */ long MAXSIZE = 077777777777L; + /** Offset of start of magic field within header record */ + int MAGIC_OFFSET = 257; /** - * The length of the magic field in a header buffer. + * The length of the magic field in a header buffer including the version. */ int MAGICLEN = 8; + /** + * The length of the magic field in a header buffer. + */ + int PURE_MAGICLEN = 6; + + /** Offset of start of magic field within header record */ + int VERSION_OFFSET = 263; + /** + * Previously this was regarded as part of "magic" field, but it + * is separate. + */ + int VERSIONLEN = 2; + /** * The length of the modification time field in a header buffer. */ @@ -86,10 +121,76 @@ public interface TarConstants { int GNAMELEN = 32; /** - * The length of the devices field in a header buffer. + * The length of each of the device fields (major and minor) in a header buffer. */ int DEVLEN = 8; + /** + * Length of the prefix field. + * + */ + int PREFIXLEN = 155; + + /** + * The length of the access time field in an old GNU header buffer. + * + */ + int ATIMELEN_GNU = 12; + + /** + * The length of the created time field in an old GNU header buffer. + * + */ + int CTIMELEN_GNU = 12; + + /** + * The length of the multivolume start offset field in an old GNU header buffer. + * + */ + int OFFSETLEN_GNU = 12; + + /** + * The length of the long names field in an old GNU header buffer. + * + */ + int LONGNAMESLEN_GNU = 4; + + /** + * The length of the padding field in an old GNU header buffer. + * + */ + int PAD2LEN_GNU = 1; + + /** + * The sum of the length of all sparse headers in an old GNU header buffer. + * + */ + int SPARSELEN_GNU = 96; + + /** + * The length of the is extension field in an old GNU header buffer. + * + */ + int ISEXTENDEDLEN_GNU = 1; + + /** + * The length of the real size field in an old GNU header buffer. + * + */ + int REALSIZELEN_GNU = 12; + + /** + * The sum of the length of all sparse headers in a sparse header buffer. + * + */ + int SPARSELEN_GNU_SPARSE = 504; + + /** + * The length of the is extension field in a sparse header buffer. + * + */ + int ISEXTENDEDLEN_GNU_SPARSE = 1; + /** * LF_ constants represent the "link flag" of an entry, or more commonly, * the "entry type". This is the "old way" of indicating a normal file. @@ -137,22 +238,51 @@ public interface TarConstants { byte LF_CONTIG = (byte) '7'; /** - * The magic tag representing a POSIX tar archive. + * Identifies the *next* file on the tape as having a long name. */ + byte LF_GNUTYPE_LONGNAME = (byte) 'L'; + + /** + * Sparse file type. + */ + byte LF_GNUTYPE_SPARSE = (byte) 'S'; + + // See "http://www.opengroup.org/onlinepubs/009695399/utilities/pax.html#tag_04_100_13_02" + + /** + * Identifies the entry as a Pax extended header. + */ + byte LF_PAX_EXTENDED_HEADER_LC = (byte) 'x'; + + /** + * Identifies the entry as a Pax extended header (SunOS tar -E). + */ + byte LF_PAX_EXTENDED_HEADER_UC = (byte) 'X'; + + /** + * Identifies the entry as a Pax global extended header. + */ + byte LF_PAX_GLOBAL_EXTENDED_HEADER = (byte) 'g'; + String TMAGIC = "ustar"; + /** + * The magic tag representing a POSIX tar archive. + */ + String MAGIC_POSIX = "ustar\0"; + String VERSION_POSIX = "00"; + /** * The magic tag representing a GNU tar archive. */ String GNU_TMAGIC = "ustar "; + // Appear to be two possible GNU versions + String VERSION_GNU_SPACE = " \0"; + String VERSION_GNU_ZERO = "0\0"; /** - * The namr of the GNU tar entry which contains a long name. + * The name of the GNU tar entry which contains a long name. */ String GNU_LONGLINK = "././@LongLink"; - /** - * Identifies the *next* file on the tape as having a long name. - */ - byte LF_GNUTYPE_LONGNAME = (byte) 'L'; } diff --git a/src/main/org/apache/tools/tar/TarEntry.java b/src/main/org/apache/tools/tar/TarEntry.java index bca54861d..03a699695 100644 --- a/src/main/org/apache/tools/tar/TarEntry.java +++ b/src/main/org/apache/tools/tar/TarEntry.java @@ -24,9 +24,13 @@ package org.apache.tools.tar; import java.io.File; +import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Date; import java.util.Locale; +import org.apache.tools.zip.ZipEncoding; + /** * This class represents an entry in a Tar archive. It consists * of the entry's header, as well as the entry's File. Entries @@ -72,13 +76,44 @@ import java.util.Locale; * char devmajor[8]; * char devminor[8]; * } header; + * All unused bytes are set to null. + * New-style GNU tar files are slightly different from the above. + * For values of size larger than 077777777777L (11 7s) + * or uid and gid larger than 07777777L (7 7s) + * the sign bit of the first byte is set, and the rest of the + * field is the binary representation of the number. + * See TarUtils.parseOctalOrBinary. + * + * + *
+ * The C structure for a old GNU Tar Entry's header is: + *
+ * struct oldgnu_header { + * char unused_pad1[345]; // TarConstants.PAD1LEN_GNU - offset 0 + * char atime[12]; // TarConstants.ATIMELEN_GNU - offset 345 + * char ctime[12]; // TarConstants.CTIMELEN_GNU - offset 357 + * char offset[12]; // TarConstants.OFFSETLEN_GNU - offset 369 + * char longnames[4]; // TarConstants.LONGNAMESLEN_GNU - offset 381 + * char unused_pad2; // TarConstants.PAD2LEN_GNU - offset 385 + * struct sparse sp[4]; // TarConstants.SPARSELEN_GNU - offset 386 + * char isextended; // TarConstants.ISEXTENDEDLEN_GNU - offset 482 + * char realsize[12]; // TarConstants.REALSIZELEN_GNU - offset 483 + * char unused_pad[17]; // TarConstants.PAD3LEN_GNU - offset 495 + * }; + *+ * Whereas, "struct sparse" is: + *
+ * struct sparse { + * char offset[12]; // offset 0 + * char numbytes[12]; // offset 12 + * }; ** */ public class TarEntry implements TarConstants { /** The entry's name. */ - private StringBuffer name; + private String name; /** The entry's permission mode. */ private int mode; @@ -99,16 +134,18 @@ public class TarEntry implements TarConstants { private byte linkFlag; /** The entry's link name. */ - private StringBuffer linkName; + private String linkName; /** The entry's magic tag. */ - private StringBuffer magic; + private String magic; + /** The version of the format */ + private String version; /** The entry's user name. */ - private StringBuffer userName; + private String userName; /** The entry's group name. */ - private StringBuffer groupName; + private String groupName; /** The entry's major device number. */ private int devMajor; @@ -116,6 +153,12 @@ public class TarEntry implements TarConstants { /** The entry's minor device number. */ private int devMinor; + /** If an extension sparse header follows. */ + private boolean isExtended; + + /** The entry's real size in case of a sparse file. */ + private long realSize; + /** The entry's file reference */ private File file; @@ -134,10 +177,11 @@ public class TarEntry implements TarConstants { /** * Construct an empty entry and prepares the header values. */ - private TarEntry () { - this.magic = new StringBuffer(TMAGIC); - this.name = new StringBuffer(); - this.linkName = new StringBuffer(); + private TarEntry() { + this.magic = MAGIC_POSIX; + this.version = VERSION_POSIX; + this.name = ""; + this.linkName = ""; String user = System.getProperty("user.name", ""); @@ -147,8 +191,8 @@ public class TarEntry implements TarConstants { this.userId = 0; this.groupId = 0; - this.userName = new StringBuffer(user); - this.groupName = new StringBuffer(""); + this.userName = user; + this.groupName = ""; this.file = null; } @@ -178,19 +222,16 @@ public class TarEntry implements TarConstants { this.devMajor = 0; this.devMinor = 0; - this.name = new StringBuffer(name); + this.name = name; this.mode = isDir ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE; this.linkFlag = isDir ? LF_DIR : LF_NORMAL; this.userId = 0; this.groupId = 0; this.size = 0; this.modTime = (new Date()).getTime() / MILLIS_PER_SECOND; - this.linkName = new StringBuffer(""); - this.userName = new StringBuffer(""); - this.groupName = new StringBuffer(""); - this.devMajor = 0; - this.devMinor = 0; - + this.linkName = ""; + this.userName = ""; + this.groupName = ""; } /** @@ -203,38 +244,52 @@ public class TarEntry implements TarConstants { this(name); this.linkFlag = linkFlag; if (linkFlag == LF_GNUTYPE_LONGNAME) { - magic = new StringBuffer(GNU_TMAGIC); + magic = GNU_TMAGIC; + version = VERSION_GNU_SPACE; } } /** * Construct an entry for a file. File is set to file, and the * header is constructed from information from the file. + * The name is set from the normalized file path. * * @param file The file that the entry represents. */ public TarEntry(File file) { + this(file, normalizeFileName(file.getPath(), false)); + } + + /** + * Construct an entry for a file. File is set to file, and the + * header is constructed from information from the file. + * + * @param file The file that the entry represents. + * @param fileName the name to be used for the entry. + */ + public TarEntry(File file, String fileName) { this(); this.file = file; - String fileName = normalizeFileName(file.getPath(), false); - this.linkName = new StringBuffer(""); - this.name = new StringBuffer(fileName); + this.linkName = ""; if (file.isDirectory()) { this.mode = DEFAULT_DIR_MODE; this.linkFlag = LF_DIR; - int nameLength = name.length(); - if (nameLength == 0 || name.charAt(nameLength - 1) != '/') { - this.name.append("/"); + int nameLength = fileName.length(); + if (nameLength == 0 || fileName.charAt(nameLength - 1) != '/') { + this.name = fileName + "/"; + } else { + this.name = fileName; } this.size = 0; } else { this.mode = DEFAULT_FILE_MODE; this.linkFlag = LF_NORMAL; this.size = file.length(); + this.name = fileName; } this.modTime = file.lastModified() / MILLIS_PER_SECOND; @@ -247,12 +302,27 @@ public class TarEntry implements TarConstants { * to null. * * @param headerBuf The header bytes from a tar archive entry. + * @throws IllegalArgumentException if any of the numeric fields have an invalid format */ public TarEntry(byte[] headerBuf) { this(); parseTarHeader(headerBuf); } + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @param encoding encoding to use for file names + * @throws IllegalArgumentException if any of the numeric fields have an invalid format + */ + public TarEntry(byte[] headerBuf, ZipEncoding encoding) + throws IOException { + this(); + parseTarHeader(headerBuf, encoding); + } + /** * Determine if the two entries are equal. Equality is determined * by the header names being equal. @@ -271,6 +341,7 @@ public class TarEntry implements TarConstants { * @param it Entry to be checked for equality. * @return True if the entries are equal. */ + @Override public boolean equals(Object it) { if (it == null || getClass() != it.getClass()) { return false; @@ -283,6 +354,7 @@ public class TarEntry implements TarConstants { * * @return the entry hashcode */ + @Override public int hashCode() { return getName().hashCode(); } @@ -314,7 +386,7 @@ public class TarEntry implements TarConstants { * @param name This entry's new name. */ public void setName(String name) { - this.name = new StringBuffer(normalizeFileName(name, false)); + this.name = normalizeFileName(name, false); } /** @@ -335,6 +407,15 @@ public class TarEntry implements TarConstants { return linkName.toString(); } + /** + * Set this entry's link name. + * + * @param link the link name to use. + */ + public void setLinkName(String link) { + this.linkName = link; + } + /** * Get this entry's user id. * @@ -386,7 +467,7 @@ public class TarEntry implements TarConstants { * @param userName This entry's new user name. */ public void setUserName(String userName) { - this.userName = new StringBuffer(userName); + this.userName = userName; } /** @@ -404,7 +485,7 @@ public class TarEntry implements TarConstants { * @param groupName This entry's new group name. */ public void setGroupName(String groupName) { - this.groupName = new StringBuffer(groupName); + this.groupName = groupName; } /** @@ -488,11 +569,88 @@ public class TarEntry implements TarConstants { * Set this entry's file size. * * @param size This entry's new file size. + * @throws IllegalArgumentException if the size is < 0. */ public void setSize(long size) { + if (size < 0){ + throw new IllegalArgumentException("Size is out of range: "+size); + } this.size = size; } + /** + * Get this entry's major device number. + * + * @return This entry's major device number. + */ + public int getDevMajor() { + return devMajor; + } + + /** + * Set this entry's major device number. + * + * @param devNo This entry's major device number. + * @throws IllegalArgumentException if the devNo is < 0. + */ + public void setDevMajor(int devNo) { + if (devNo < 0){ + throw new IllegalArgumentException("Major device number is out of " + + "range: " + devNo); + } + this.devMajor = devNo; + } + + /** + * Get this entry's minor device number. + * + * @return This entry's minor device number. + */ + public int getDevMinor() { + return devMinor; + } + + /** + * Set this entry's minor device number. + * + * @param devNo This entry's minor device number. + * @throws IllegalArgumentException if the devNo is < 0. + */ + public void setDevMinor(int devNo) { + if (devNo < 0){ + throw new IllegalArgumentException("Minor device number is out of " + + "range: " + devNo); + } + this.devMinor = devNo; + } + + /** + * Indicates in case of a sparse file if an extension sparse header + * follows. + * + * @return true if an extension sparse header follows. + */ + public boolean isExtended() { + return isExtended; + } + + /** + * Get this entry's real file size in case of a sparse file. + * + * @return This entry's real file size. + */ + public long getRealSize() { + return realSize; + } + + /** + * Indicate if this entry is a GNU sparse block + * + * @return true if this is a sparse extension provided by GNU tar + */ + public boolean isGNUSparse() { + return linkFlag == LF_GNUTYPE_SPARSE; + } /** * Indicate if this entry is a GNU long name block @@ -501,7 +659,26 @@ public class TarEntry implements TarConstants { */ public boolean isGNULongNameEntry() { return linkFlag == LF_GNUTYPE_LONGNAME - && name.toString().equals(GNU_LONGLINK); + && name.equals(GNU_LONGLINK); + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + */ + public boolean isPaxHeader(){ + return linkFlag == LF_PAX_EXTENDED_HEADER_LC + || linkFlag == LF_PAX_EXTENDED_HEADER_UC; + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + */ + public boolean isGlobalPaxHeader(){ + return linkFlag == LF_PAX_GLOBAL_EXTENDED_HEADER; } /** @@ -525,6 +702,54 @@ public class TarEntry implements TarConstants { return false; } + /** + * Check if this is a "normal file" + */ + public boolean isFile() { + if (file != null) { + return file.isFile(); + } + if (linkFlag == LF_OLDNORM || linkFlag == LF_NORMAL) { + return true; + } + return !getName().endsWith("/"); + } + + /** + * Check if this is a symbolic link entry. + */ + public boolean isSymbolicLink() { + return linkFlag == LF_SYMLINK; + } + + /** + * Check if this is a link entry. + */ + public boolean isLink() { + return linkFlag == LF_LINK; + } + + /** + * Check if this is a character device entry. + */ + public boolean isCharacterDevice() { + return linkFlag == LF_CHR; + } + + /** + * Check if this is a block device entry. + */ + public boolean isBlockDevice() { + return linkFlag == LF_BLK; + } + + /** + * Check if this is a FIFO (pipe) entry. + */ + public boolean isFIFO() { + return linkFlag == LF_FIFO; + } + /** * If this entry represents a file, and the file is a directory, return * an array of TarEntries for this entry's children. @@ -549,17 +774,46 @@ public class TarEntry implements TarConstants { /** * Write an entry's header information to a header buffer. * + *
This method does not use the star/GNU tar/BSD tar extensions.
+ * * @param outbuf The tar entry header buffer to fill in. */ public void writeEntryHeader(byte[] outbuf) { + try { + writeEntryHeader(outbuf, TarUtils.DEFAULT_ENCODING, false); + } catch (IOException ex) { + try { + writeEntryHeader(outbuf, TarUtils.FALLBACK_ENCODING, false); + } catch (IOException ex2) { + // impossible + throw new RuntimeException(ex2); + } + } + } + + /** + * Write an entry's header information to a header buffer. + * + * @param outbuf The tar entry header buffer to fill in. + * @param encoding encoding to use when writing the file name. + * @param starMode whether to use the star/GNU tar/BSD tar + * extension for numeric fields if their value doesn't fit in the + * maximum size of standard tar archives + */ + public void writeEntryHeader(byte[] outbuf, ZipEncoding encoding, + boolean starMode) throws IOException { int offset = 0; - offset = TarUtils.getNameBytes(name, outbuf, offset, NAMELEN); - offset = TarUtils.getOctalBytes(mode, outbuf, offset, MODELEN); - offset = TarUtils.getOctalBytes(userId, outbuf, offset, UIDLEN); - offset = TarUtils.getOctalBytes(groupId, outbuf, offset, GIDLEN); - offset = TarUtils.getLongOctalBytes(size, outbuf, offset, SIZELEN); - offset = TarUtils.getLongOctalBytes(modTime, outbuf, offset, MODTIMELEN); + offset = TarUtils.formatNameBytes(name, outbuf, offset, NAMELEN, + encoding); + offset = writeEntryHeaderField(mode, outbuf, offset, MODELEN, starMode); + offset = writeEntryHeaderField(userId, outbuf, offset, UIDLEN, + starMode); + offset = writeEntryHeaderField(groupId, outbuf, offset, GIDLEN, + starMode); + offset = writeEntryHeaderField(size, outbuf, offset, SIZELEN, starMode); + offset = writeEntryHeaderField(modTime, outbuf, offset, MODTIMELEN, + starMode); int csOffset = offset; @@ -568,12 +822,18 @@ public class TarEntry implements TarConstants { } outbuf[offset++] = linkFlag; - offset = TarUtils.getNameBytes(linkName, outbuf, offset, NAMELEN); - offset = TarUtils.getNameBytes(magic, outbuf, offset, MAGICLEN); - offset = TarUtils.getNameBytes(userName, outbuf, offset, UNAMELEN); - offset = TarUtils.getNameBytes(groupName, outbuf, offset, GNAMELEN); - offset = TarUtils.getOctalBytes(devMajor, outbuf, offset, DEVLEN); - offset = TarUtils.getOctalBytes(devMinor, outbuf, offset, DEVLEN); + offset = TarUtils.formatNameBytes(linkName, outbuf, offset, NAMELEN, + encoding); + offset = TarUtils.formatNameBytes(magic, outbuf, offset, PURE_MAGICLEN); + offset = TarUtils.formatNameBytes(version, outbuf, offset, VERSIONLEN); + offset = TarUtils.formatNameBytes(userName, outbuf, offset, UNAMELEN, + encoding); + offset = TarUtils.formatNameBytes(groupName, outbuf, offset, GNAMELEN, + encoding); + offset = writeEntryHeaderField(devMajor, outbuf, offset, DEVLEN, + starMode); + offset = writeEntryHeaderField(devMinor, outbuf, offset, DEVLEN, + starMode); while (offset < outbuf.length) { outbuf[offset++] = 0; @@ -581,42 +841,122 @@ public class TarEntry implements TarConstants { long chk = TarUtils.computeCheckSum(outbuf); - TarUtils.getCheckSumOctalBytes(chk, outbuf, csOffset, CHKSUMLEN); + TarUtils.formatCheckSumOctalBytes(chk, outbuf, csOffset, CHKSUMLEN); + } + + private int writeEntryHeaderField(long value, byte[] outbuf, int offset, + int length, boolean starMode) { + if (!starMode && (value < 0 + || value >= (1l << (3 * (length - 1))))) { + // value doesn't fit into field when written as octal + // number, will be written to PAX header or causes an + // error + return TarUtils.formatLongOctalBytes(0, outbuf, offset, length); + } + return TarUtils.formatLongOctalOrBinaryBytes(value, outbuf, offset, + length); } /** * Parse an entry's header information from a header buffer. * * @param header The tar entry header buffer to get information from. + * @throws IllegalArgumentException if any of the numeric fields have an invalid format */ public void parseTarHeader(byte[] header) { + try { + parseTarHeader(header, TarUtils.DEFAULT_ENCODING); + } catch (IOException ex) { + try { + parseTarHeader(header, TarUtils.DEFAULT_ENCODING, true); + } catch (IOException ex2) { + // not really possible + throw new RuntimeException(ex2); + } + } + } + + /** + * Parse an entry's header information from a header buffer. + * + * @param header The tar entry header buffer to get information from. + * @param encoding encoding to use for file names + * @throws IllegalArgumentException if any of the numeric fields + * have an invalid format + */ + public void parseTarHeader(byte[] header, ZipEncoding encoding) + throws IOException { + parseTarHeader(header, encoding, false); + } + + private void parseTarHeader(byte[] header, ZipEncoding encoding, + final boolean oldStyle) + throws IOException { int offset = 0; - name = TarUtils.parseName(header, offset, NAMELEN); + name = oldStyle ? TarUtils.parseName(header, offset, NAMELEN) + : TarUtils.parseName(header, offset, NAMELEN, encoding); offset += NAMELEN; - mode = (int) TarUtils.parseOctal(header, offset, MODELEN); + mode = (int) TarUtils.parseOctalOrBinary(header, offset, MODELEN); offset += MODELEN; - userId = (int) TarUtils.parseOctal(header, offset, UIDLEN); + userId = (int) TarUtils.parseOctalOrBinary(header, offset, UIDLEN); offset += UIDLEN; - groupId = (int) TarUtils.parseOctal(header, offset, GIDLEN); + groupId = (int) TarUtils.parseOctalOrBinary(header, offset, GIDLEN); offset += GIDLEN; - size = TarUtils.parseOctal(header, offset, SIZELEN); + size = TarUtils.parseOctalOrBinary(header, offset, SIZELEN); offset += SIZELEN; - modTime = TarUtils.parseOctal(header, offset, MODTIMELEN); + modTime = TarUtils.parseOctalOrBinary(header, offset, MODTIMELEN); offset += MODTIMELEN; offset += CHKSUMLEN; linkFlag = header[offset++]; - linkName = TarUtils.parseName(header, offset, NAMELEN); + linkName = oldStyle ? TarUtils.parseName(header, offset, NAMELEN) + : TarUtils.parseName(header, offset, NAMELEN, encoding); offset += NAMELEN; - magic = TarUtils.parseName(header, offset, MAGICLEN); - offset += MAGICLEN; - userName = TarUtils.parseName(header, offset, UNAMELEN); + magic = TarUtils.parseName(header, offset, PURE_MAGICLEN); + offset += PURE_MAGICLEN; + version = TarUtils.parseName(header, offset, VERSIONLEN); + offset += VERSIONLEN; + userName = oldStyle ? TarUtils.parseName(header, offset, UNAMELEN) + : TarUtils.parseName(header, offset, UNAMELEN, encoding); offset += UNAMELEN; - groupName = TarUtils.parseName(header, offset, GNAMELEN); + groupName = oldStyle ? TarUtils.parseName(header, offset, GNAMELEN) + : TarUtils.parseName(header, offset, GNAMELEN, encoding); offset += GNAMELEN; - devMajor = (int) TarUtils.parseOctal(header, offset, DEVLEN); + devMajor = (int) TarUtils.parseOctalOrBinary(header, offset, DEVLEN); offset += DEVLEN; - devMinor = (int) TarUtils.parseOctal(header, offset, DEVLEN); + devMinor = (int) TarUtils.parseOctalOrBinary(header, offset, DEVLEN); + offset += DEVLEN; + + int type = evaluateType(header); + switch (type) { + case FORMAT_OLDGNU: { + offset += ATIMELEN_GNU; + offset += CTIMELEN_GNU; + offset += OFFSETLEN_GNU; + offset += LONGNAMESLEN_GNU; + offset += PAD2LEN_GNU; + offset += SPARSELEN_GNU; + isExtended = TarUtils.parseBoolean(header, offset); + offset += ISEXTENDEDLEN_GNU; + realSize = TarUtils.parseOctal(header, offset, REALSIZELEN_GNU); + offset += REALSIZELEN_GNU; + break; + } + case FORMAT_POSIX: + default: { + String prefix = oldStyle + ? TarUtils.parseName(header, offset, PREFIXLEN) + : TarUtils.parseName(header, offset, PREFIXLEN, encoding); + // SunOS tar -E does not add / to directory names, so fix + // up to be consistent + if (isDirectory() && !name.endsWith("/")){ + name = name + "/"; + } + if (prefix.length() > 0){ + name = prefix + "/" + name; + } + } + } } /** @@ -661,4 +1001,85 @@ public class TarEntry implements TarConstants { } return fileName; } + + /** + * Evaluate an entry's header format from a header buffer. + * + * @param header The tar entry header buffer to evaluate the format for. + * @return format type + */ + private int evaluateType(byte[] header) { + if (matchAsciiBuffer(GNU_TMAGIC, header, MAGIC_OFFSET, PURE_MAGICLEN)) { + return FORMAT_OLDGNU; + } + if (matchAsciiBuffer(MAGIC_POSIX, header, MAGIC_OFFSET, PURE_MAGICLEN)) { + return FORMAT_POSIX; + } + return 0; + } + + /** + * Check if buffer contents matches Ascii String. + * + * @param expected + * @param buffer + * @param offset + * @param length + * @return {@code true} if buffer is the same as the expected string + */ + private static boolean matchAsciiBuffer(String expected, byte[] buffer, + int offset, int length){ + byte[] buffer1; + try { + buffer1 = expected.getBytes("ASCII"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // Should not happen + } + return isEqual(buffer1, 0, buffer1.length, buffer, offset, length, + false); + } + + /** + * Compare byte buffers, optionally ignoring trailing nulls + * + * @param buffer1 + * @param offset1 + * @param length1 + * @param buffer2 + * @param offset2 + * @param length2 + * @param ignoreTrailingNulls + * @return {@code true} if buffer1 and buffer2 have same contents, having regard to trailing nulls + */ + private static boolean isEqual( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2, + boolean ignoreTrailingNulls){ + int minLen=length1 < length2 ? length1 : length2; + for (int i=0; i < minLen; i++){ + if (buffer1[offset1+i] != buffer2[offset2+i]){ + return false; + } + } + if (length1 == length2){ + return true; + } + if (ignoreTrailingNulls){ + if (length1 > length2){ + for(int i = length2; i < length1; i++){ + if (buffer1[offset1+i] != 0){ + return false; + } + } + } else { + for(int i = length1; i < length2; i++){ + if (buffer2[offset2+i] != 0){ + return false; + } + } + } + return true; + } + return false; + } } diff --git a/src/main/org/apache/tools/tar/TarInputStream.java b/src/main/org/apache/tools/tar/TarInputStream.java index 00f96746b..73e90e955 100644 --- a/src/main/org/apache/tools/tar/TarInputStream.java +++ b/src/main/org/apache/tools/tar/TarInputStream.java @@ -23,10 +23,17 @@ package org.apache.tools.tar; +import java.io.ByteArrayOutputStream; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.tools.zip.ZipEncoding; +import org.apache.tools.zip.ZipEncodingHelper; /** * The TarInputStream reads a UNIX tar archive as an InputStream. @@ -59,6 +66,8 @@ public class TarInputStream extends FilterInputStream { // CheckStyle:VisibilityModifier ON + private final ZipEncoding encoding; + /** * Constructor for TarInputStream. * @param is the input stream to use @@ -67,6 +76,15 @@ public class TarInputStream extends FilterInputStream { this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); } + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param encoding name of the encoding to use for file names + */ + public TarInputStream(InputStream is, String encoding) { + this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + /** * Constructor for TarInputStream. * @param is the input stream to use @@ -76,6 +94,16 @@ public class TarInputStream extends FilterInputStream { this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); } + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use + * @param encoding name of the encoding to use for file names + */ + public TarInputStream(InputStream is, int blockSize, String encoding) { + this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + /** * Constructor for TarInputStream. * @param is the input stream to use @@ -83,13 +111,25 @@ public class TarInputStream extends FilterInputStream { * @param recordSize the record size to use */ public TarInputStream(InputStream is, int blockSize, int recordSize) { - super(is); + this(is, blockSize, recordSize, null); + } + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + * @param encoding name of the encoding to use for file names + */ + public TarInputStream(InputStream is, int blockSize, int recordSize, + String encoding) { + super(is); this.buffer = new TarBuffer(is, blockSize, recordSize); this.readBuf = null; this.oneBuf = new byte[1]; this.debug = false; this.hasHitEOF = false; + this.encoding = ZipEncodingHelper.getZipEncoding(encoding); } /** @@ -106,6 +146,7 @@ public class TarInputStream extends FilterInputStream { * Closes this stream. Calls the TarBuffer's close() method. * @throws IOException on error */ + @Override public void close() throws IOException { buffer.close(); } @@ -131,6 +172,7 @@ public class TarInputStream extends FilterInputStream { * @return The number of available bytes for the current entry. * @throws IOException for signature */ + @Override public int available() throws IOException { if (entrySize - entryOffset > Integer.MAX_VALUE) { return Integer.MAX_VALUE; @@ -148,6 +190,7 @@ public class TarInputStream extends FilterInputStream { * @return the number actually skipped * @throws IOException on error */ + @Override public long skip(long numToSkip) throws IOException { // REVIEW // This is horribly inefficient, but it ensures that we @@ -171,6 +214,7 @@ public class TarInputStream extends FilterInputStream { * * @return False. */ + @Override public boolean markSupported() { return false; } @@ -180,12 +224,14 @@ public class TarInputStream extends FilterInputStream { * * @param markLimit The limit to mark. */ + @Override public void mark(int markLimit) { } /** * Since we do not support marking just yet, we do nothing. */ + @Override public void reset() { } @@ -230,44 +276,37 @@ public class TarInputStream extends FilterInputStream { readBuf = null; } - byte[] headerBuf = buffer.readRecord(); - - if (headerBuf == null) { - if (debug) { - System.err.println("READ NULL RECORD"); - } - hasHitEOF = true; - } else if (buffer.isEOFRecord(headerBuf)) { - if (debug) { - System.err.println("READ EOF RECORD"); - } - hasHitEOF = true; - } + byte[] headerBuf = getRecord(); if (hasHitEOF) { currEntry = null; - } else { - currEntry = new TarEntry(headerBuf); - - if (debug) { - System.err.println("TarInputStream: SET CURRENTRY '" - + currEntry.getName() - + "' size = " - + currEntry.getSize()); - } - - entryOffset = 0; + return null; + } - entrySize = currEntry.getSize(); + try { + currEntry = new TarEntry(headerBuf, encoding); + } catch (IllegalArgumentException e) { + IOException ioe = new IOException("Error detected parsing the header"); + ioe.initCause(e); + throw ioe; } + if (debug) { + System.err.println("TarInputStream: SET CURRENTRY '" + + currEntry.getName() + + "' size = " + + currEntry.getSize()); + } + + entryOffset = 0; + entrySize = currEntry.getSize(); - if (currEntry != null && currEntry.isGNULongNameEntry()) { + if (currEntry.isGNULongNameEntry()) { // read in the name StringBuffer longName = new StringBuffer(); byte[] buf = new byte[SMALL_BUFFER_SIZE]; int length = 0; while ((length = read(buf)) >= 0) { - longName.append(new String(buf, 0, length)); + longName.append(new String(buf, 0, length)); // TODO default charset? } getNextEntry(); if (currEntry == null) { @@ -283,9 +322,176 @@ public class TarInputStream extends FilterInputStream { currEntry.setName(longName.toString()); } + if (currEntry.isPaxHeader()){ // Process Pax headers + paxHeaders(); + } + + if (currEntry.isGNUSparse()){ // Process sparse files + readGNUSparse(); + } + + // If the size of the next element in the archive has changed + // due to a new size being reported in the posix header + // information, we update entrySize here so that it contains + // the correct value. + entrySize = currEntry.getSize(); return currEntry; } + /** + * Get the next record in this tar archive. This will skip + * over any remaining data in the current entry, if there + * is one, and place the input stream at the header of the + * next entry. + * If there are no more entries in the archive, null will + * be returned to indicate that the end of the archive has + * been reached. + * + * @return The next header in the archive, or null. + * @throws IOException on error + */ + private byte[] getRecord() throws IOException { + if (hasHitEOF) { + return null; + } + + byte[] headerBuf = buffer.readRecord(); + + if (headerBuf == null) { + if (debug) { + System.err.println("READ NULL RECORD"); + } + hasHitEOF = true; + } else if (buffer.isEOFRecord(headerBuf)) { + if (debug) { + System.err.println("READ EOF RECORD"); + } + hasHitEOF = true; + } + + return hasHitEOF ? null : headerBuf; + } + + private void paxHeaders() throws IOException{ + MapMay return false if the current entry is a sparse file.
+ */ + public boolean canReadEntryData(TarEntry te) { + return !te.isGNUSparse(); + } } + diff --git a/src/main/org/apache/tools/tar/TarOutputStream.java b/src/main/org/apache/tools/tar/TarOutputStream.java index defab0de9..350801ea1 100644 --- a/src/main/org/apache/tools/tar/TarOutputStream.java +++ b/src/main/org/apache/tools/tar/TarOutputStream.java @@ -23,9 +23,15 @@ package org.apache.tools.tar; +import java.io.File; import java.io.FilterOutputStream; -import java.io.OutputStream; import java.io.IOException; +import java.io.OutputStream; +import java.io.StringWriter; +import java.util.HashMap; +import java.util.Map; +import org.apache.tools.zip.ZipEncoding; +import org.apache.tools.zip.ZipEncodingHelper; /** * The TarOutputStream writes a UNIX tar archive as an OutputStream. @@ -43,6 +49,18 @@ public class TarOutputStream extends FilterOutputStream { /** GNU tar extensions are used to store long file names in the archive. */ public static final int LONGFILE_GNU = 2; + /** POSIX/PAX extensions are used to store long file names in the archive. */ + public static final int LONGFILE_POSIX = 3; + + /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */ + public static final int BIGNUMBER_ERROR = 0; + + /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */ + public static final int BIGNUMBER_STAR = 1; + + /** POSIX/PAX extensions are used to store big numbers in the archive. */ + public static final int BIGNUMBER_POSIX = 2; + // CheckStyle:VisibilityModifier OFF - bc protected boolean debug; protected long currSize; @@ -56,8 +74,22 @@ public class TarOutputStream extends FilterOutputStream { protected int longFileMode = LONGFILE_ERROR; // CheckStyle:VisibilityModifier ON + private int bigNumberMode = BIGNUMBER_ERROR; + private boolean closed = false; + /** Indicates if putNextEntry has been called without closeEntry */ + private boolean haveUnclosedEntry = false; + + /** indicates if this archive is finished */ + private boolean finished = false; + + private final ZipEncoding encoding; + + private boolean addPaxHeadersForNonAsciiNames = false; + private static final ZipEncoding ASCII = + ZipEncodingHelper.getZipEncoding("ASCII"); + /** * Constructor for TarInputStream. * @param os the output stream to use @@ -66,6 +98,15 @@ public class TarOutputStream extends FilterOutputStream { this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); } + /** + * Constructor for TarInputStream. + * @param os the output stream to use + * @param encoding name of the encoding to use for file names + */ + public TarOutputStream(OutputStream os, String encoding) { + this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + /** * Constructor for TarInputStream. * @param os the output stream to use @@ -75,6 +116,16 @@ public class TarOutputStream extends FilterOutputStream { this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE); } + /** + * Constructor for TarInputStream. + * @param os the output stream to use + * @param blockSize the block size to use + * @param encoding name of the encoding to use for file names + */ + public TarOutputStream(OutputStream os, int blockSize, String encoding) { + this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + /** * Constructor for TarInputStream. * @param os the output stream to use @@ -82,7 +133,20 @@ public class TarOutputStream extends FilterOutputStream { * @param recordSize the record size to use */ public TarOutputStream(OutputStream os, int blockSize, int recordSize) { + this(os, blockSize, recordSize, null); + } + + /** + * Constructor for TarInputStream. + * @param os the output stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + * @param encoding name of the encoding to use for file names + */ + public TarOutputStream(OutputStream os, int blockSize, int recordSize, + String encoding) { super(os); + this.encoding = ZipEncodingHelper.getZipEncoding(encoding); this.buffer = new TarBuffer(os, blockSize, recordSize); this.debug = false; @@ -103,6 +167,23 @@ public class TarOutputStream extends FilterOutputStream { this.longFileMode = longFileMode; } + /** + * Set the big number mode. + * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2). + * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header. + * Default is BIGNUMBER_ERROR. + * @param bigNumberMode the mode to use + */ + public void setBigNumberMode(int bigNumberMode) { + this.bigNumberMode = bigNumberMode; + } + + /** + * Whether to add a PAX extension header for non-ASCII file names. + */ + public void setAddPaxHeadersForNonAsciiNames(boolean b) { + addPaxHeadersForNonAsciiNames = b; + } /** * Sets the debugging flag. @@ -124,15 +205,25 @@ public class TarOutputStream extends FilterOutputStream { /** * Ends the TAR archive without closing the underlying OutputStream. - * The result is that the two EOF records of nulls are written. + * + * An archive consists of a series of file entries terminated by an + * end-of-archive entry, which consists of two 512 blocks of zero bytes. + * POSIX.1 requires two EOF records, like some other implementations. + * * @throws IOException on error */ public void finish() throws IOException { - // See Bugzilla 28776 for a discussion on this - // http://issues.apache.org/bugzilla/show_bug.cgi?id=28776 + if (finished) { + throw new IOException("This archive has already been finished"); + } + + if (haveUnclosedEntry) { + throw new IOException("This archives contains unclosed entries."); + } writeEOFRecord(); writeEOFRecord(); buffer.flushBlock(); + finished = true; } /** @@ -141,9 +232,13 @@ public class TarOutputStream extends FilterOutputStream { * TarBuffer's close(). * @throws IOException on error */ + @Override public void close() throws IOException { - if (!closed) { + if(!finished) { finish(); + } + + if (!closed) { buffer.close(); out.close(); closed = true; @@ -172,27 +267,59 @@ public class TarOutputStream extends FilterOutputStream { * @throws IOException on error */ public void putNextEntry(TarEntry entry) throws IOException { - if (entry.getName().length() >= TarConstants.NAMELEN) { - - if (longFileMode == LONGFILE_GNU) { + if(finished) { + throw new IOException("Stream has already been finished"); + } + MapLeading spaces are ignored. + * The buffer must contain a trailing space or NUL, + * and may contain an additional trailing space or NUL.
+ * + *The input buffer is allowed to contain all NULs, + * in which case the method returns 0L + * (this allows for missing fields).
* - * @param header The header buffer from which to parse. + *To work-around some tar implementations that insert a + * leading NUL this method returns 0 if it detects a leading NUL + * since Ant 1.9.
+ * + * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. - * @param length The number of header bytes to parse. + * @param length The maximum number of bytes to parse - must be at least 2 bytes. * @return The long value of the octal string. + * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. */ - public static long parseOctal(byte[] header, int offset, int length) { + public static long parseOctal(final byte[] buffer, final int offset, final int length) { long result = 0; - boolean stillPadding = true; int end = offset + length; + int start = offset; - for (int i = offset; i < end; ++i) { - if (header[i] == 0) { - break; - } + if (length < 2){ + throw new IllegalArgumentException("Length "+length+" must be at least 2"); + } - if (header[i] == (byte) ' ' || header[i] == '0') { - if (stillPadding) { - continue; - } + if (buffer[start] == 0) { + return 0L; + } - if (header[i] == (byte) ' ') { - break; - } + // Skip leading spaces + while (start < end){ + if (buffer[start] == ' '){ + start++; + } else { + break; } + } - stillPadding = false; + // Must have trailing NUL or space + byte trailer; + trailer = buffer[end-1]; + if (trailer == 0 || trailer == ' '){ + end--; + } else { + throw new IllegalArgumentException( + exceptionMessage(buffer, offset, length, end-1, trailer)); + } + // May have additional NUL or space + trailer = buffer[end-1]; + if (trailer == 0 || trailer == ' '){ + end--; + } + + for ( ;start < end; start++) { + final byte currentByte = buffer[start]; // CheckStyle:MagicNumber OFF - result = (result << 3) + (header[i] - '0'); + if (currentByte < '0' || currentByte > '7'){ + throw new IllegalArgumentException( + exceptionMessage(buffer, offset, length, start, currentByte)); + } + result = (result << 3) + (currentByte - '0'); // convert from ASCII // CheckStyle:MagicNumber ON } return result; } - /** - * Parse an entry name from a header buffer. + /** + * Compute the value contained in a byte buffer. If the most + * significant bit of the first byte in the buffer is set, this + * bit is ignored and the rest of the buffer is interpreted as a + * binary number. Otherwise, the buffer is interpreted as an + * octal number as per the parseOctal function above. * - * @param header The header buffer from which to parse. + * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. - * @param length The number of header bytes to parse. - * @return The header's entry name. + * @param length The maximum number of bytes to parse. + * @return The long value of the octal or binary string. + * @throws IllegalArgumentException if the trailing space/NUL is + * missing or an invalid byte is detected in an octal number, or + * if a binary number would exceed the size of a signed long + * 64-bit integer. */ - public static StringBuffer parseName(byte[] header, int offset, int length) { - StringBuffer result = new StringBuffer(length); - int end = offset + length; + public static long parseOctalOrBinary(final byte[] buffer, final int offset, + final int length) { - for (int i = offset; i < end; ++i) { - if (header[i] == 0) { - break; - } + if ((buffer[offset] & 0x80) == 0) { + return parseOctal(buffer, offset, length); + } + final boolean negative = buffer[offset] == (byte) 0xff; + if (length < 9) { + return parseBinaryLong(buffer, offset, length, negative); + } + return parseBinaryBigInteger(buffer, offset, length, negative); + } - result.append((char) header[i]); + private static long parseBinaryLong(final byte[] buffer, final int offset, + final int length, + final boolean negative) { + if (length >= 9) { + throw new IllegalArgumentException("At offset " + offset + ", " + + length + " byte binary number" + + " exceeds maximum signed long" + + " value"); + } + long val = 0; + for (int i = 1; i < length; i++) { + val = (val << 8) + (buffer[offset + i] & 0xff); } + if (negative) { + // 2's complement + val--; + val ^= ((long) Math.pow(2, (length - 1) * 8) - 1); + } + return negative ? -val : val; + } - return result; + private static long parseBinaryBigInteger(final byte[] buffer, + final int offset, + final int length, + final boolean negative) { + byte[] remainder = new byte[length - 1]; + System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); + BigInteger val = new BigInteger(remainder); + if (negative) { + // 2's complement + val = val.add(BigInteger.valueOf(-1)).not(); + } + if (val.bitLength() > 63) { + throw new IllegalArgumentException("At offset " + offset + ", " + + length + " byte binary number" + + " exceeds maximum signed long" + + " value"); + } + return negative ? -val.longValue() : val.longValue(); } /** - * Determine the number of bytes in an entry name. + * Parse a boolean byte from a buffer. + * Leading spaces and NUL are ignored. + * The buffer may contain trailing spaces or NULs. * - * @param name The header name from which to parse. - * @param buf The buffer from which to parse. + * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. - * @param length The number of header bytes to parse. - * @return The number of bytes in a header's entry name. + * @return The boolean value of the bytes. + * @throws IllegalArgumentException if an invalid byte is detected. */ - public static int getNameBytes(StringBuffer name, byte[] buf, int offset, int length) { - int i; + public static boolean parseBoolean(final byte[] buffer, final int offset) { + return buffer[offset] == 1; + } - for (i = 0; i < length && i < name.length(); ++i) { - buf[offset + i] = (byte) name.charAt(i); + // Helper method to generate the exception message + private static String exceptionMessage(byte[] buffer, final int offset, + final int length, int current, final byte currentByte) { + String string = new String(buffer, offset, length); // TODO default charset? + string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed + final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; + return s; + } + + /** + * Parse an entry name from a buffer. + * Parsing stops when a NUL is found + * or the buffer length is reached. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @return The entry name. + */ + public static String parseName(byte[] buffer, final int offset, final int length) { + try { + return parseName(buffer, offset, length, DEFAULT_ENCODING); + } catch (IOException ex) { + try { + return parseName(buffer, offset, length, FALLBACK_ENCODING); + } catch (IOException ex2) { + // impossible + throw new RuntimeException(ex2); + } } + } - for (; i < length; ++i) { - buf[offset + i] = 0; + /** + * Parse an entry name from a buffer. + * Parsing stops when a NUL is found + * or the buffer length is reached. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @param encoding name of the encoding to use for file names + * @return The entry name. + */ + public static String parseName(byte[] buffer, final int offset, + final int length, + final ZipEncoding encoding) + throws IOException { + + int len = length; + for (; len > 0; len--) { + if (buffer[offset + len - 1] != 0) { + break; + } + } + if (len > 0) { + byte[] b = new byte[len]; + System.arraycopy(buffer, offset, b, 0, len); + return encoding.decode(b); } + return ""; + } - return offset + length; + /** + * Copy a name into a buffer. + * Copies characters from the name into the buffer + * starting at the specified offset. + * If the buffer is longer than the name, the buffer + * is filled with trailing NULs. + * If the name is longer than the buffer, + * the output is truncated. + * + * @param name The header name from which to copy the characters. + * @param buf The buffer where the name is to be stored. + * @param offset The starting offset into the buffer + * @param length The maximum number of header bytes to copy. + * @return The updated offset, i.e. offset + length + */ + public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) { + try { + return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); + } catch (IOException ex) { + try { + return formatNameBytes(name, buf, offset, length, + FALLBACK_ENCODING); + } catch (IOException ex2) { + // impossible + throw new RuntimeException(ex2); + } + } } /** - * Parse an octal integer from a header buffer. + * Copy a name into a buffer. + * Copies characters from the name into the buffer + * starting at the specified offset. + * If the buffer is longer than the name, the buffer + * is filled with trailing NULs. + * If the name is longer than the buffer, + * the output is truncated. * - * @param value The header value - * @param buf The buffer from which to parse. - * @param offset The offset into the buffer from which to parse. - * @param length The number of header bytes to parse. - * @return The integer value of the octal bytes. + * @param name The header name from which to copy the characters. + * @param buf The buffer where the name is to be stored. + * @param offset The starting offset into the buffer + * @param length The maximum number of header bytes to copy. + * @param encoding name of the encoding to use for file names + * @return The updated offset, i.e. offset + length */ - public static int getOctalBytes(long value, byte[] buf, int offset, int length) { - int idx = length - 1; + public static int formatNameBytes(String name, byte[] buf, final int offset, + final int length, + final ZipEncoding encoding) + throws IOException { + int len = name.length(); + ByteBuffer b = encoding.encode(name); + while (b.limit() > length && len > 0) { + b = encoding.encode(name.substring(0, --len)); + } + final int limit = b.limit(); + System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); - buf[offset + idx] = 0; - --idx; - buf[offset + idx] = (byte) ' '; - --idx; + // Pad any remaining output bytes with NUL + for (int i = limit; i < length; ++i) { + buf[offset + i] = 0; + } + return offset + length; + } + + /** + * Fill buffer with unsigned octal number, padded with leading zeroes. + * + * @param value number to convert to octal - treated as unsigned + * @param buffer destination buffer + * @param offset starting offset in buffer + * @param length length of buffer to fill + * @throws IllegalArgumentException if the value will not fit in the buffer + */ + public static void formatUnsignedOctalString(final long value, byte[] buffer, + final int offset, final int length) { + int remaining = length; + remaining--; if (value == 0) { - buf[offset + idx] = (byte) '0'; - --idx; + buffer[offset + remaining--] = (byte) '0'; } else { - for (long val = value; idx >= 0 && val > 0; --idx) { + long val = value; + for (; remaining >= 0 && val != 0; --remaining) { // CheckStyle:MagicNumber OFF - buf[offset + idx] = (byte) ((byte) '0' + (byte) (val & 7)); - val = val >> 3; + buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); + val = val >>> 3; // CheckStyle:MagicNumber ON } + if (val != 0){ + throw new IllegalArgumentException + (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); + } } - for (; idx >= 0; --idx) { - buf[offset + idx] = (byte) ' '; + for (; remaining >= 0; --remaining) { // leading zeros + buffer[offset + remaining] = (byte) '0'; } + } + + /** + * Write an octal integer into a buffer. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by space and NUL + * + * @param value The value to write + * @param buf The buffer to receive the output + * @param offset The starting offset into the buffer + * @param length The size of the output buffer + * @return The updated offset, i.e offset+length + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer + */ + public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) { + + int idx=length-2; // For space and trailing null + formatUnsignedOctalString(value, buf, offset, idx); + + buf[offset + idx++] = (byte) ' '; // Trailing space + buf[offset + idx] = 0; // Trailing null return offset + length; } /** - * Parse an octal long integer from a header buffer. - * - * @param value The header value - * @param buf The buffer from which to parse. - * @param offset The offset into the buffer from which to parse. - * @param length The number of header bytes to parse. - * @return The long value of the octal bytes. + * Write an octal long integer into a buffer. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by a space. + * + * @param value The value to write as octal + * @param buf The destinationbuffer. + * @param offset The starting offset into the buffer. + * @param length The length of the buffer + * @return The updated offset + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer */ - public static int getLongOctalBytes(long value, byte[] buf, int offset, int length) { - byte[] temp = new byte[length + 1]; + public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) { - getOctalBytes(value, temp, 0, length + 1); - System.arraycopy(temp, 0, buf, offset, length); + int idx=length-1; // For space + + formatUnsignedOctalString(value, buf, offset, idx); + buf[offset + idx] = (byte) ' '; // Trailing space return offset + length; } /** - * Parse the checksum octal integer from a header buffer. + * Write an long integer into a buffer as an octal string if this + * will fit, or as a binary number otherwise. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by a space. + * + * @param value The value to write into the buffer. + * @param buf The destination buffer. + * @param offset The starting offset into the buffer. + * @param length The length of the buffer. + * @return The updated offset. + * @throws IllegalArgumentException if the value (and trailer) + * will not fit in the buffer. + */ + public static int formatLongOctalOrBinaryBytes( + final long value, byte[] buf, final int offset, final int length) { + + // Check whether we are dealing with UID/GID or SIZE field + final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; + + final boolean negative = value < 0; + if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars + return formatLongOctalBytes(value, buf, offset, length); + } + + if (length < 9) { + formatLongBinary(value, buf, offset, length, negative); + } + formatBigIntegerBinary(value, buf, offset, length, negative); + + buf[offset] = (byte) (negative ? 0xff : 0x80); + return offset + length; + } + + private static void formatLongBinary(final long value, byte[] buf, + final int offset, final int length, + final boolean negative) { + final int bits = (length - 1) * 8; + final long max = 1l << bits; + long val = Math.abs(value); + if (val >= max) { + throw new IllegalArgumentException("Value " + value + + " is too large for " + length + " byte field."); + } + if (negative) { + val ^= max - 1; + val |= 0xff << bits; + val++; + } + for (int i = offset + length - 1; i >= offset; i--) { + buf[i] = (byte) val; + val >>= 8; + } + } + + private static void formatBigIntegerBinary(final long value, byte[] buf, + final int offset, + final int length, + final boolean negative) { + BigInteger val = BigInteger.valueOf(value); + final byte[] b = val.toByteArray(); + final int len = b.length; + final int off = offset + length - len; + System.arraycopy(b, 0, buf, off, len); + final byte fill = (byte) (negative ? 0xff : 0); + for (int i = offset + 1; i < off; i++) { + buf[i] = fill; + } + } + + /** + * Writes an octal value into a buffer. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by NUL and then space. * - * @param value The header value - * @param buf The buffer from which to parse. - * @param offset The offset into the buffer from which to parse. - * @param length The number of header bytes to parse. - * @return The integer value of the entry's checksum. + * @param value The value to convert + * @param buf The destination buffer + * @param offset The starting offset into the buffer. + * @param length The size of the buffer. + * @return The updated value of offset, i.e. offset+length + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer */ - public static int getCheckSumOctalBytes(long value, byte[] buf, int offset, int length) { - getOctalBytes(value, buf, offset, length); + public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) { + + int idx=length-2; // for NUL and space + formatUnsignedOctalString(value, buf, offset, idx); - buf[offset + length - 1] = (byte) ' '; - buf[offset + length - 2] = 0; + buf[offset + idx++] = 0; // Trailing null + buf[offset + idx] = (byte) ' '; // Trailing space return offset + length; } @@ -194,7 +547,7 @@ public class TarUtils { * @param buf The tar entry's header buffer. * @return The computed checksum. */ - public static long computeCheckSum(byte[] buf) { + public static long computeCheckSum(final byte[] buf) { long sum = 0; for (int i = 0; i < buf.length; ++i) { @@ -203,4 +556,5 @@ public class TarUtils { return sum; } + } diff --git a/src/main/org/apache/tools/zip/ZipEncoding.java b/src/main/org/apache/tools/zip/ZipEncoding.java index 3df6329ec..08524735a 100644 --- a/src/main/org/apache/tools/zip/ZipEncoding.java +++ b/src/main/org/apache/tools/zip/ZipEncoding.java @@ -41,7 +41,7 @@ import java.nio.ByteBuffer; *All implementations should implement this interface in a * reentrant way.
*/ -interface ZipEncoding { +public interface ZipEncoding { /** * Check, whether the given string may be losslessly encoded using this * encoding. diff --git a/src/main/org/apache/tools/zip/ZipEncodingHelper.java b/src/main/org/apache/tools/zip/ZipEncodingHelper.java index 1c6a0688d..9c17c9ec8 100644 --- a/src/main/org/apache/tools/zip/ZipEncodingHelper.java +++ b/src/main/org/apache/tools/zip/ZipEncodingHelper.java @@ -27,7 +27,7 @@ import java.util.Map; /** * Static helper functions for robustly encoding filenames in zip files. */ -abstract class ZipEncodingHelper { +public abstract class ZipEncodingHelper { /** * A class, which holds the high characters of a simple encoding @@ -207,7 +207,7 @@ abstract class ZipEncodingHelper { * the platform's default encoding. * @return A zip encoding for the given encoding name. */ - static ZipEncoding getZipEncoding(String name) { + public static ZipEncoding getZipEncoding(String name) { // fallback encoding is good enough for utf-8. if (isUTF8(name)) {