You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

TarInputStream.java 22 kB

11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. /*
  19. * This package is based on the work done by Timothy Gerard Endres
  20. * (time@ice.com) to whom the Ant project is very grateful for his great code.
  21. */
  22. package org.apache.tools.tar;
  23. import java.io.ByteArrayOutputStream;
  24. import java.io.FilterInputStream;
  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.io.OutputStream;
  28. import java.util.HashMap;
  29. import java.util.Map;
  30. import java.util.Map.Entry;
  31. import org.apache.tools.zip.ZipEncoding;
  32. import org.apache.tools.zip.ZipEncodingHelper;
  33. /**
  34. * The TarInputStream reads a UNIX tar archive as an InputStream.
  35. * methods are provided to position at each successive entry in
  36. * the archive, and the read each entry as a normal input stream
  37. * using read().
  38. *
  39. */
  40. public class TarInputStream extends FilterInputStream {
  41. private static final int SMALL_BUFFER_SIZE = 256;
  42. private static final int BUFFER_SIZE = 8 * 1024;
  43. private static final int LARGE_BUFFER_SIZE = 32 * 1024;
  44. private static final int BYTE_MASK = 0xFF;
  45. private final byte[] SKIP_BUF = new byte[BUFFER_SIZE];
  46. private final byte[] SMALL_BUF = new byte[SMALL_BUFFER_SIZE];
  47. // CheckStyle:VisibilityModifier OFF - bc
  48. protected boolean debug;
  49. protected boolean hasHitEOF;
  50. protected long entrySize;
  51. protected long entryOffset;
  52. protected byte[] readBuf;
  53. protected TarBuffer buffer;
  54. protected TarEntry currEntry;
  55. /**
  56. * This contents of this array is not used at all in this class,
  57. * it is only here to avoid repreated object creation during calls
  58. * to the no-arg read method.
  59. */
  60. protected byte[] oneBuf;
  61. // CheckStyle:VisibilityModifier ON
  62. private final ZipEncoding encoding;
  63. /**
  64. * Constructor for TarInputStream.
  65. * @param is the input stream to use
  66. */
  67. public TarInputStream(InputStream is) {
  68. this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
  69. }
  70. /**
  71. * Constructor for TarInputStream.
  72. * @param is the input stream to use
  73. * @param encoding name of the encoding to use for file names
  74. */
  75. public TarInputStream(InputStream is, String encoding) {
  76. this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
  77. }
  78. /**
  79. * Constructor for TarInputStream.
  80. * @param is the input stream to use
  81. * @param blockSize the block size to use
  82. */
  83. public TarInputStream(InputStream is, int blockSize) {
  84. this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
  85. }
  86. /**
  87. * Constructor for TarInputStream.
  88. * @param is the input stream to use
  89. * @param blockSize the block size to use
  90. * @param encoding name of the encoding to use for file names
  91. */
  92. public TarInputStream(InputStream is, int blockSize, String encoding) {
  93. this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
  94. }
  95. /**
  96. * Constructor for TarInputStream.
  97. * @param is the input stream to use
  98. * @param blockSize the block size to use
  99. * @param recordSize the record size to use
  100. */
  101. public TarInputStream(InputStream is, int blockSize, int recordSize) {
  102. this(is, blockSize, recordSize, null);
  103. }
  104. /**
  105. * Constructor for TarInputStream.
  106. * @param is the input stream to use
  107. * @param blockSize the block size to use
  108. * @param recordSize the record size to use
  109. * @param encoding name of the encoding to use for file names
  110. */
  111. public TarInputStream(InputStream is, int blockSize, int recordSize,
  112. String encoding) {
  113. super(is);
  114. this.buffer = new TarBuffer(is, blockSize, recordSize);
  115. this.readBuf = null;
  116. this.oneBuf = new byte[1];
  117. this.debug = false;
  118. this.hasHitEOF = false;
  119. this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
  120. }
  121. /**
  122. * Sets the debugging flag.
  123. *
  124. * @param debug True to turn on debugging.
  125. */
  126. public void setDebug(boolean debug) {
  127. this.debug = debug;
  128. buffer.setDebug(debug);
  129. }
  130. /**
  131. * Closes this stream. Calls the TarBuffer's close() method.
  132. * @throws IOException on error
  133. */
  134. @Override
  135. public void close() throws IOException {
  136. buffer.close();
  137. }
  138. /**
  139. * Get the record size being used by this stream's TarBuffer.
  140. *
  141. * @return The TarBuffer record size.
  142. */
  143. public int getRecordSize() {
  144. return buffer.getRecordSize();
  145. }
  146. /**
  147. * Get the available data that can be read from the current
  148. * entry in the archive. This does not indicate how much data
  149. * is left in the entire archive, only in the current entry.
  150. * This value is determined from the entry's size header field
  151. * and the amount of data already read from the current entry.
  152. * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE
  153. * bytes are left in the current entry in the archive.
  154. *
  155. * @return The number of available bytes for the current entry.
  156. * @throws IOException for signature
  157. */
  158. @Override
  159. public int available() throws IOException {
  160. if (entrySize - entryOffset > Integer.MAX_VALUE) {
  161. return Integer.MAX_VALUE;
  162. }
  163. return (int) (entrySize - entryOffset);
  164. }
  165. /**
  166. * Skip bytes in the input buffer. This skips bytes in the
  167. * current entry's data, not the entire archive, and will
  168. * stop at the end of the current entry's data if the number
  169. * to skip extends beyond that point.
  170. *
  171. * @param numToSkip The number of bytes to skip.
  172. * @return the number actually skipped
  173. * @throws IOException on error
  174. */
  175. @Override
  176. public long skip(long numToSkip) throws IOException {
  177. // REVIEW
  178. // This is horribly inefficient, but it ensures that we
  179. // properly skip over bytes via the TarBuffer...
  180. //
  181. long skip = numToSkip;
  182. while (skip > 0) {
  183. int realSkip = (int) (skip > SKIP_BUF.length
  184. ? SKIP_BUF.length : skip);
  185. int numRead = read(SKIP_BUF, 0, realSkip);
  186. if (numRead == -1) {
  187. break;
  188. }
  189. skip -= numRead;
  190. }
  191. return (numToSkip - skip);
  192. }
  193. /**
  194. * Since we do not support marking just yet, we return false.
  195. *
  196. * @return False.
  197. */
  198. @Override
  199. public boolean markSupported() {
  200. return false;
  201. }
  202. /**
  203. * Since we do not support marking just yet, we do nothing.
  204. *
  205. * @param markLimit The limit to mark.
  206. */
  207. @Override
  208. public void mark(int markLimit) {
  209. }
  210. /**
  211. * Since we do not support marking just yet, we do nothing.
  212. */
  213. @Override
  214. public void reset() {
  215. }
  216. /**
  217. * Get the next entry in this tar archive. This will skip
  218. * over any remaining data in the current entry, if there
  219. * is one, and place the input stream at the header of the
  220. * next entry, and read the header and instantiate a new
  221. * TarEntry from the header bytes and return that entry.
  222. * If there are no more entries in the archive, null will
  223. * be returned to indicate that the end of the archive has
  224. * been reached.
  225. *
  226. * @return The next TarEntry in the archive, or null.
  227. * @throws IOException on error
  228. */
  229. public TarEntry getNextEntry() throws IOException {
  230. if (hasHitEOF) {
  231. return null;
  232. }
  233. if (currEntry != null) {
  234. long numToSkip = entrySize - entryOffset;
  235. if (debug) {
  236. System.err.println("TarInputStream: SKIP currENTRY '"
  237. + currEntry.getName() + "' SZ "
  238. + entrySize + " OFF "
  239. + entryOffset + " skipping "
  240. + numToSkip + " bytes");
  241. }
  242. while (numToSkip > 0) {
  243. long skipped = skip(numToSkip);
  244. if (skipped <= 0) {
  245. throw new RuntimeException("failed to skip current tar"
  246. + " entry");
  247. }
  248. numToSkip -= skipped;
  249. }
  250. readBuf = null;
  251. }
  252. byte[] headerBuf = getRecord();
  253. if (hasHitEOF) {
  254. currEntry = null;
  255. return null;
  256. }
  257. try {
  258. currEntry = new TarEntry(headerBuf, encoding);
  259. } catch (IllegalArgumentException e) {
  260. IOException ioe = new IOException("Error detected parsing the header");
  261. ioe.initCause(e);
  262. throw ioe;
  263. }
  264. if (debug) {
  265. System.err.println("TarInputStream: SET CURRENTRY '"
  266. + currEntry.getName()
  267. + "' size = "
  268. + currEntry.getSize());
  269. }
  270. entryOffset = 0;
  271. entrySize = currEntry.getSize();
  272. if (currEntry.isGNULongLinkEntry()) {
  273. byte[] longLinkData = getLongNameData();
  274. if (longLinkData == null) {
  275. // Bugzilla: 40334
  276. // Malformed tar file - long link entry name not followed by
  277. // entry
  278. return null;
  279. }
  280. currEntry.setLinkName(encoding.decode(longLinkData));
  281. }
  282. if (currEntry.isGNULongNameEntry()) {
  283. byte[] longNameData = getLongNameData();
  284. if (longNameData == null) {
  285. // Bugzilla: 40334
  286. // Malformed tar file - long entry name not followed by
  287. // entry
  288. return null;
  289. }
  290. currEntry.setName(encoding.decode(longNameData));
  291. }
  292. if (currEntry.isPaxHeader()){ // Process Pax headers
  293. paxHeaders();
  294. }
  295. if (currEntry.isGNUSparse()){ // Process sparse files
  296. readGNUSparse();
  297. }
  298. // If the size of the next element in the archive has changed
  299. // due to a new size being reported in the posix header
  300. // information, we update entrySize here so that it contains
  301. // the correct value.
  302. entrySize = currEntry.getSize();
  303. return currEntry;
  304. }
  305. /**
  306. * Get the next entry in this tar archive as longname data.
  307. *
  308. * @return The next entry in the archive as longname data, or null.
  309. * @throws IOException on error
  310. */
  311. protected byte[] getLongNameData() throws IOException {
  312. // read in the name
  313. ByteArrayOutputStream longName = new ByteArrayOutputStream();
  314. int length = 0;
  315. while ((length = read(SMALL_BUF)) >= 0) {
  316. longName.write(SMALL_BUF, 0, length);
  317. }
  318. getNextEntry();
  319. if (currEntry == null) {
  320. // Bugzilla: 40334
  321. // Malformed tar file - long entry name not followed by entry
  322. return null;
  323. }
  324. byte[] longNameData = longName.toByteArray();
  325. // remove trailing null terminator(s)
  326. length = longNameData.length;
  327. while (length > 0 && longNameData[length - 1] == 0) {
  328. --length;
  329. }
  330. if (length != longNameData.length) {
  331. byte[] l = new byte[length];
  332. System.arraycopy(longNameData, 0, l, 0, length);
  333. longNameData = l;
  334. }
  335. return longNameData;
  336. }
  337. /**
  338. * Get the next record in this tar archive. This will skip
  339. * over any remaining data in the current entry, if there
  340. * is one, and place the input stream at the header of the
  341. * next entry.
  342. * If there are no more entries in the archive, null will
  343. * be returned to indicate that the end of the archive has
  344. * been reached.
  345. *
  346. * @return The next header in the archive, or null.
  347. * @throws IOException on error
  348. */
  349. private byte[] getRecord() throws IOException {
  350. if (hasHitEOF) {
  351. return null;
  352. }
  353. byte[] headerBuf = buffer.readRecord();
  354. if (headerBuf == null) {
  355. if (debug) {
  356. System.err.println("READ NULL RECORD");
  357. }
  358. hasHitEOF = true;
  359. } else if (buffer.isEOFRecord(headerBuf)) {
  360. if (debug) {
  361. System.err.println("READ EOF RECORD");
  362. }
  363. hasHitEOF = true;
  364. }
  365. return hasHitEOF ? null : headerBuf;
  366. }
  367. private void paxHeaders() throws IOException{
  368. Map<String, String> headers = parsePaxHeaders(this);
  369. getNextEntry(); // Get the actual file entry
  370. applyPaxHeadersToCurrentEntry(headers);
  371. }
  372. Map<String, String> parsePaxHeaders(InputStream i) throws IOException {
  373. Map<String, String> headers = new HashMap<String, String>();
  374. // Format is "length keyword=value\n";
  375. while(true){ // get length
  376. int ch;
  377. int len = 0;
  378. int read = 0;
  379. while((ch = i.read()) != -1) {
  380. read++;
  381. if (ch == ' '){ // End of length string
  382. // Get keyword
  383. ByteArrayOutputStream coll = new ByteArrayOutputStream();
  384. while((ch = i.read()) != -1) {
  385. read++;
  386. if (ch == '='){ // end of keyword
  387. String keyword = coll.toString("UTF-8");
  388. // Get rest of entry
  389. final int restLen = len - read;
  390. byte[] rest = new byte[restLen];
  391. int got = 0;
  392. while (got < restLen && (ch = i.read()) != -1) {
  393. rest[got++] = (byte) ch;
  394. }
  395. if (got != restLen) {
  396. throw new IOException("Failed to read "
  397. + "Paxheader. Expected "
  398. + restLen
  399. + " bytes, read "
  400. + got);
  401. }
  402. // Drop trailing NL
  403. String value = new String(rest, 0,
  404. restLen - 1, "UTF-8");
  405. headers.put(keyword, value);
  406. break;
  407. }
  408. coll.write((byte) ch);
  409. }
  410. break; // Processed single header
  411. }
  412. len *= 10;
  413. len += ch - '0';
  414. }
  415. if (ch == -1){ // EOF
  416. break;
  417. }
  418. }
  419. return headers;
  420. }
  421. private void applyPaxHeadersToCurrentEntry(Map<String, String> headers) {
  422. /*
  423. * The following headers are defined for Pax.
  424. * atime, ctime, charset: cannot use these without changing TarEntry fields
  425. * mtime
  426. * comment
  427. * gid, gname
  428. * linkpath
  429. * size
  430. * uid,uname
  431. * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those
  432. */
  433. for (Entry<String, String> ent : headers.entrySet()){
  434. String key = ent.getKey();
  435. String val = ent.getValue();
  436. if ("path".equals(key)){
  437. currEntry.setName(val);
  438. } else if ("linkpath".equals(key)){
  439. currEntry.setLinkName(val);
  440. } else if ("gid".equals(key)){
  441. currEntry.setGroupId(Long.parseLong(val));
  442. } else if ("gname".equals(key)){
  443. currEntry.setGroupName(val);
  444. } else if ("uid".equals(key)){
  445. currEntry.setUserId(Long.parseLong(val));
  446. } else if ("uname".equals(key)){
  447. currEntry.setUserName(val);
  448. } else if ("size".equals(key)){
  449. currEntry.setSize(Long.parseLong(val));
  450. } else if ("mtime".equals(key)){
  451. currEntry.setModTime((long) (Double.parseDouble(val) * 1000));
  452. } else if ("SCHILY.devminor".equals(key)){
  453. currEntry.setDevMinor(Integer.parseInt(val));
  454. } else if ("SCHILY.devmajor".equals(key)){
  455. currEntry.setDevMajor(Integer.parseInt(val));
  456. }
  457. }
  458. }
  459. /**
  460. * Adds the sparse chunks from the current entry to the sparse chunks,
  461. * including any additional sparse entries following the current entry.
  462. *
  463. * @throws IOException on error
  464. *
  465. * @todo Sparse files get not yet really processed.
  466. */
  467. private void readGNUSparse() throws IOException {
  468. /* we do not really process sparse files yet
  469. sparses = new ArrayList();
  470. sparses.addAll(currEntry.getSparses());
  471. */
  472. if (currEntry.isExtended()) {
  473. TarArchiveSparseEntry entry;
  474. do {
  475. byte[] headerBuf = getRecord();
  476. if (hasHitEOF) {
  477. currEntry = null;
  478. break;
  479. }
  480. entry = new TarArchiveSparseEntry(headerBuf);
  481. /* we do not really process sparse files yet
  482. sparses.addAll(entry.getSparses());
  483. */
  484. } while (entry.isExtended());
  485. }
  486. }
  487. /**
  488. * Reads a byte from the current tar archive entry.
  489. *
  490. * This method simply calls read( byte[], int, int ).
  491. *
  492. * @return The byte read, or -1 at EOF.
  493. * @throws IOException on error
  494. */
  495. @Override
  496. public int read() throws IOException {
  497. int num = read(oneBuf, 0, 1);
  498. return num == -1 ? -1 : (oneBuf[0]) & BYTE_MASK;
  499. }
  500. /**
  501. * Reads bytes from the current tar archive entry.
  502. *
  503. * This method is aware of the boundaries of the current
  504. * entry in the archive and will deal with them as if they
  505. * were this stream's start and EOF.
  506. *
  507. * @param buf The buffer into which to place bytes read.
  508. * @param offset The offset at which to place bytes read.
  509. * @param numToRead The number of bytes to read.
  510. * @return The number of bytes read, or -1 at EOF.
  511. * @throws IOException on error
  512. */
  513. @Override
  514. public int read(byte[] buf, int offset, int numToRead) throws IOException {
  515. int totalRead = 0;
  516. if (entryOffset >= entrySize) {
  517. return -1;
  518. }
  519. if ((numToRead + entryOffset) > entrySize) {
  520. numToRead = (int) (entrySize - entryOffset);
  521. }
  522. if (readBuf != null) {
  523. int sz = (numToRead > readBuf.length) ? readBuf.length
  524. : numToRead;
  525. System.arraycopy(readBuf, 0, buf, offset, sz);
  526. if (sz >= readBuf.length) {
  527. readBuf = null;
  528. } else {
  529. int newLen = readBuf.length - sz;
  530. byte[] newBuf = new byte[newLen];
  531. System.arraycopy(readBuf, sz, newBuf, 0, newLen);
  532. readBuf = newBuf;
  533. }
  534. totalRead += sz;
  535. numToRead -= sz;
  536. offset += sz;
  537. }
  538. while (numToRead > 0) {
  539. byte[] rec = buffer.readRecord();
  540. if (rec == null) {
  541. // Unexpected EOF!
  542. throw new IOException("unexpected EOF with " + numToRead
  543. + " bytes unread");
  544. }
  545. int sz = numToRead;
  546. int recLen = rec.length;
  547. if (recLen > sz) {
  548. System.arraycopy(rec, 0, buf, offset, sz);
  549. readBuf = new byte[recLen - sz];
  550. System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
  551. } else {
  552. sz = recLen;
  553. System.arraycopy(rec, 0, buf, offset, recLen);
  554. }
  555. totalRead += sz;
  556. numToRead -= sz;
  557. offset += sz;
  558. }
  559. entryOffset += totalRead;
  560. return totalRead;
  561. }
  562. /**
  563. * Copies the contents of the current tar archive entry directly into
  564. * an output stream.
  565. *
  566. * @param out The OutputStream into which to write the entry's data.
  567. * @throws IOException on error
  568. */
  569. public void copyEntryContents(OutputStream out) throws IOException {
  570. byte[] buf = new byte[LARGE_BUFFER_SIZE];
  571. while (true) {
  572. int numRead = read(buf, 0, buf.length);
  573. if (numRead == -1) {
  574. break;
  575. }
  576. out.write(buf, 0, numRead);
  577. }
  578. }
  579. /**
  580. * Whether this class is able to read the given entry.
  581. *
  582. * <p>May return false if the current entry is a sparse file.</p>
  583. */
  584. public boolean canReadEntryData(TarEntry te) {
  585. return !te.isGNUSparse();
  586. }
  587. }