001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one or more
003     *  contributor license agreements.  See the NOTICE file distributed with
004     *  this work for additional information regarding copyright ownership.
005     *  The ASF licenses this file to You under the Apache License, Version 2.0
006     *  (the "License"); you may not use this file except in compliance with
007     *  the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     *
017     */
018    
019    /*
020     * This package is based on the work done by Timothy Gerard Endres
021     * (time@ice.com) to whom the Ant project is very grateful for his great code.
022     */
023    
024    package org.apache.commons.compress.archivers.tar;
025    
026    import java.io.IOException;
027    import java.io.InputStream;
028    import org.apache.commons.compress.archivers.ArchiveEntry;
029    import org.apache.commons.compress.archivers.ArchiveInputStream;
030    import org.apache.commons.compress.utils.ArchiveUtils;
031    
032    /**
033     * The TarInputStream reads a UNIX tar archive as an InputStream.
034     * methods are provided to position at each successive entry in
035     * the archive, and the read each entry as a normal input stream
036     * using read().
037     * @NotThreadSafe
038     */
039    public class TarArchiveInputStream extends ArchiveInputStream {
040        private static final int SMALL_BUFFER_SIZE = 256;
041        private static final int BUFFER_SIZE = 8 * 1024;
042    
043        private boolean hasHitEOF;
044        private long entrySize;
045        private long entryOffset;
046        private byte[] readBuf;
047        protected final TarBuffer buffer;
048        private TarArchiveEntry currEntry;
049    
050        /**
051         * Constructor for TarInputStream.
052         * @param is the input stream to use
053         */
054        public TarArchiveInputStream(InputStream is) {
055            this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
056        }
057    
058        /**
059         * Constructor for TarInputStream.
060         * @param is the input stream to use
061         * @param blockSize the block size to use
062         */
063        public TarArchiveInputStream(InputStream is, int blockSize) {
064            this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
065        }
066    
067        /**
068         * Constructor for TarInputStream.
069         * @param is the input stream to use
070         * @param blockSize the block size to use
071         * @param recordSize the record size to use
072         */
073        public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
074            this.buffer = new TarBuffer(is, blockSize, recordSize);
075            this.readBuf = null;
076            this.hasHitEOF = false;
077        }
078    
079        /**
080         * Closes this stream. Calls the TarBuffer's close() method.
081         * @throws IOException on error
082         */
083        public void close() throws IOException {
084            buffer.close();
085        }
086    
087        /**
088         * Get the record size being used by this stream's TarBuffer.
089         *
090         * @return The TarBuffer record size.
091         */
092        public int getRecordSize() {
093            return buffer.getRecordSize();
094        }
095    
096        /**
097         * Get the available data that can be read from the current
098         * entry in the archive. This does not indicate how much data
099         * is left in the entire archive, only in the current entry.
100         * This value is determined from the entry's size header field
101         * and the amount of data already read from the current entry.
102         * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
103         * bytes are left in the current entry in the archive.
104         *
105         * @return The number of available bytes for the current entry.
106         * @throws IOException for signature
107         */
108        public int available() throws IOException {
109            if (entrySize - entryOffset > Integer.MAX_VALUE) {
110                return Integer.MAX_VALUE;
111            }
112            return (int) (entrySize - entryOffset);
113        }
114    
115        /**
116         * Skip bytes in the input buffer. This skips bytes in the
117         * current entry's data, not the entire archive, and will
118         * stop at the end of the current entry's data if the number
119         * to skip extends beyond that point.
120         *
121         * @param numToSkip The number of bytes to skip.
122         * @return the number actually skipped
123         * @throws IOException on error
124         */
125        public long skip(long numToSkip) throws IOException {
126            // REVIEW
127            // This is horribly inefficient, but it ensures that we
128            // properly skip over bytes via the TarBuffer...
129            //
130            byte[] skipBuf = new byte[BUFFER_SIZE];
131            long skip = numToSkip;
132            while (skip > 0) {
133                int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
134                int numRead = read(skipBuf, 0, realSkip);
135                if (numRead == -1) {
136                    break;
137                }
138                skip -= numRead;
139            }
140            return (numToSkip - skip);
141        }
142    
143        /**
144         * Since we do not support marking just yet, we do nothing.
145         */
146        public void reset() {
147        }
148    
149        /**
150         * Get the next entry in this tar archive. This will skip
151         * over any remaining data in the current entry, if there
152         * is one, and place the input stream at the header of the
153         * next entry, and read the header and instantiate a new
154         * TarEntry from the header bytes and return that entry.
155         * If there are no more entries in the archive, null will
156         * be returned to indicate that the end of the archive has
157         * been reached.
158         *
159         * @return The next TarEntry in the archive, or null.
160         * @throws IOException on error
161         */
162        public TarArchiveEntry getNextTarEntry() throws IOException {
163            if (hasHitEOF) {
164                return null;
165            }
166    
167            if (currEntry != null) {
168                long numToSkip = entrySize - entryOffset;
169    
170                while (numToSkip > 0) {
171                    long skipped = skip(numToSkip);
172                    if (skipped <= 0) {
173                        throw new RuntimeException("failed to skip current tar entry");
174                    }
175                    numToSkip -= skipped;
176                }
177    
178                readBuf = null;
179            }
180    
181            byte[] headerBuf = buffer.readRecord();
182    
183            if (headerBuf == null) {
184                hasHitEOF = true;
185            } else if (buffer.isEOFRecord(headerBuf)) {
186                hasHitEOF = true;
187            }
188    
189            if (hasHitEOF) {
190                currEntry = null;
191            } else {
192                currEntry = new TarArchiveEntry(headerBuf);
193                entryOffset = 0;
194                entrySize = currEntry.getSize();
195            }
196    
197            if (currEntry != null && currEntry.isGNULongNameEntry()) {
198                // read in the name
199                StringBuffer longName = new StringBuffer();
200                byte[] buf = new byte[SMALL_BUFFER_SIZE];
201                int length = 0;
202                while ((length = read(buf)) >= 0) {
203                    longName.append(new String(buf, 0, length));
204                }
205                getNextEntry();
206                if (currEntry == null) {
207                    // Bugzilla: 40334
208                    // Malformed tar file - long entry name not followed by entry
209                    return null;
210                }
211                // remove trailing null terminator
212                if (longName.length() > 0
213                    && longName.charAt(longName.length() - 1) == 0) {
214                    longName.deleteCharAt(longName.length() - 1);
215                }
216                currEntry.setName(longName.toString());
217            }
218    
219            return currEntry;
220        }
221    
222        public ArchiveEntry getNextEntry() throws IOException {
223            return getNextTarEntry();
224        }
225    
226        /**
227         * Reads bytes from the current tar archive entry.
228         *
229         * This method is aware of the boundaries of the current
230         * entry in the archive and will deal with them as if they
231         * were this stream's start and EOF.
232         *
233         * @param buf The buffer into which to place bytes read.
234         * @param offset The offset at which to place bytes read.
235         * @param numToRead The number of bytes to read.
236         * @return The number of bytes read, or -1 at EOF.
237         * @throws IOException on error
238         */
239        public int read(byte[] buf, int offset, int numToRead) throws IOException {
240            int totalRead = 0;
241    
242            if (entryOffset >= entrySize) {
243                return -1;
244            }
245    
246            if ((numToRead + entryOffset) > entrySize) {
247                numToRead = (int) (entrySize - entryOffset);
248            }
249    
250            if (readBuf != null) {
251                int sz = (numToRead > readBuf.length) ? readBuf.length
252                    : numToRead;
253    
254                System.arraycopy(readBuf, 0, buf, offset, sz);
255    
256                if (sz >= readBuf.length) {
257                    readBuf = null;
258                } else {
259                    int newLen = readBuf.length - sz;
260                    byte[] newBuf = new byte[newLen];
261    
262                    System.arraycopy(readBuf, sz, newBuf, 0, newLen);
263    
264                    readBuf = newBuf;
265                }
266    
267                totalRead += sz;
268                numToRead -= sz;
269                offset += sz;
270            }
271    
272            while (numToRead > 0) {
273                byte[] rec = buffer.readRecord();
274    
275                if (rec == null) {
276                    // Unexpected EOF!
277                    throw new IOException("unexpected EOF with " + numToRead
278                                          + " bytes unread. Occured at byte: " + getCount());
279                }
280                count(rec.length);
281                int sz = numToRead;
282                int recLen = rec.length;
283    
284                if (recLen > sz) {
285                    System.arraycopy(rec, 0, buf, offset, sz);
286    
287                    readBuf = new byte[recLen - sz];
288    
289                    System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
290                } else {
291                    sz = recLen;
292    
293                    System.arraycopy(rec, 0, buf, offset, recLen);
294                }
295    
296                totalRead += sz;
297                numToRead -= sz;
298                offset += sz;
299            }
300    
301            entryOffset += totalRead;
302    
303            return totalRead;
304        }
305    
306        protected final TarArchiveEntry getCurrentEntry() {
307            return currEntry;
308        }
309    
310        protected final void setCurrentEntry(TarArchiveEntry e) {
311            currEntry = e;
312        }
313    
314        protected final boolean isAtEOF() {
315            return hasHitEOF;
316        }
317    
318        protected final void setAtEOF(boolean b) {
319            hasHitEOF = b;
320        }
321    
322        // ArchiveInputStream
323    
324        public static boolean matches(byte[] signature, int length) {
325            if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
326                return false;
327            }
328    
329            if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 
330                    signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
331                &&
332                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 
333                    signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
334                    ){
335                return true;
336            }
337            if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 
338                    signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
339                &&
340                (
341                 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 
342                    signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
343                ||
344                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 
345                    signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
346                )
347                    ){
348                return true;
349            }
350            return false;
351        }
352    
353    }