001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.math.BigInteger;
028import java.nio.ByteBuffer;
029import java.util.Arrays;
030import java.util.zip.CRC32;
031import java.util.zip.DataFormatException;
032import java.util.zip.Inflater;
033import java.util.zip.ZipEntry;
034import java.util.zip.ZipException;
035
036import org.apache.commons.compress.archivers.ArchiveEntry;
037import org.apache.commons.compress.archivers.ArchiveInputStream;
038import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
039import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
040import org.apache.commons.compress.utils.ArchiveUtils;
041import org.apache.commons.compress.utils.IOUtils;
042import org.apache.commons.compress.utils.InputStreamStatistics;
043
044import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
045import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
046import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
047import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
048
049/**
050 * Implements an input stream that can read Zip archives.
051 *
052 * <p>As of Apache Commons Compress it transparently supports Zip64
053 * extensions and thus individual entries and archives larger than 4
054 * GB or with more than 65536 entries.</p>
055 *
056 * <p>The {@link ZipFile} class is preferred when reading from files
057 * as {@link ZipArchiveInputStream} is limited by not being able to
058 * read the central directory header before returning entries.  In
059 * particular {@link ZipArchiveInputStream}</p>
060 *
061 * <ul>
062 *
063 *  <li>may return entries that are not part of the central directory
064 *  at all and shouldn't be considered part of the archive.</li>
065 *
066 *  <li>may return several entries with the same name.</li>
067 *
068 *  <li>will not return internal or external attributes.</li>
069 *
070 *  <li>may return incomplete extra field data.</li>
071 *
072 *  <li>may return unknown sizes and CRC values for entries until the
073 *  next entry has been reached if the archive uses the data
074 *  descriptor feature.</li>
075 *
076 * </ul>
077 *
078 * @see ZipFile
079 * @NotThreadSafe
080 */
081public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
082
083    /** The zip encoding to use for file names and the file comment. */
084    private final ZipEncoding zipEncoding;
085
086    // the provided encoding (for unit tests)
087    final String encoding;
088
089    /** Whether to look for and use Unicode extra fields. */
090    private final boolean useUnicodeExtraFields;
091
092    /** Wrapped stream, will always be a PushbackInputStream. */
093    private final InputStream in;
094
095    /** Inflater used for all deflated entries. */
096    private final Inflater inf = new Inflater(true);
097
098    /** Buffer used to read from the wrapped stream. */
099    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
100
101    /** The entry that is currently being read. */
102    private CurrentEntry current = null;
103
104    /** Whether the stream has been closed. */
105    private boolean closed = false;
106
107    /** Whether the stream has reached the central directory - and thus found all entries. */
108    private boolean hitCentralDirectory = false;
109
110    /**
111     * When reading a stored entry that uses the data descriptor this
112     * stream has to read the full entry and caches it.  This is the
113     * cache.
114     */
115    private ByteArrayInputStream lastStoredEntry = null;
116
117    /** Whether the stream will try to read STORED entries that use a data descriptor. */
118    private boolean allowStoredEntriesWithDataDescriptor = false;
119
120    /** Count decompressed bytes for current entry */
121    private long uncompressedCount = 0;
122
123    private static final int LFH_LEN = 30;
124    /*
125      local file header signature     WORD
126      version needed to extract       SHORT
127      general purpose bit flag        SHORT
128      compression method              SHORT
129      last mod file time              SHORT
130      last mod file date              SHORT
131      crc-32                          WORD
132      compressed size                 WORD
133      uncompressed size               WORD
134      file name length                SHORT
135      extra field length              SHORT
136    */
137
138    private static final int CFH_LEN = 46;
139    /*
140        central file header signature   WORD
141        version made by                 SHORT
142        version needed to extract       SHORT
143        general purpose bit flag        SHORT
144        compression method              SHORT
145        last mod file time              SHORT
146        last mod file date              SHORT
147        crc-32                          WORD
148        compressed size                 WORD
149        uncompressed size               WORD
150        file name length                SHORT
151        extra field length              SHORT
152        file comment length             SHORT
153        disk number start               SHORT
154        internal file attributes        SHORT
155        external file attributes        WORD
156        relative offset of local header WORD
157    */
158
159    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
160
161    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
162    private final byte[] lfhBuf = new byte[LFH_LEN];
163    private final byte[] skipBuf = new byte[1024];
164    private final byte[] shortBuf = new byte[SHORT];
165    private final byte[] wordBuf = new byte[WORD];
166    private final byte[] twoDwordBuf = new byte[2 * DWORD];
167
168    private int entriesRead = 0;
169
170    /**
171     * Create an instance using UTF-8 encoding
172     * @param inputStream the stream to wrap
173     */
174    public ZipArchiveInputStream(final InputStream inputStream) {
175        this(inputStream, ZipEncodingHelper.UTF8);
176    }
177
178    /**
179     * Create an instance using the specified encoding
180     * @param inputStream the stream to wrap
181     * @param encoding the encoding to use for file names, use null
182     * for the platform's default encoding
183     * @since 1.5
184     */
185    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
186        this(inputStream, encoding, true);
187    }
188
189    /**
190     * Create an instance using the specified encoding
191     * @param inputStream the stream to wrap
192     * @param encoding the encoding to use for file names, use null
193     * for the platform's default encoding
194     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
195     * Extra Fields (if present) to set the file names.
196     */
197    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
198        this(inputStream, encoding, useUnicodeExtraFields, false);
199    }
200
201    /**
202     * Create an instance using the specified encoding
203     * @param inputStream the stream to wrap
204     * @param encoding the encoding to use for file names, use null
205     * for the platform's default encoding
206     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
207     * Extra Fields (if present) to set the file names.
208     * @param allowStoredEntriesWithDataDescriptor whether the stream
209     * will try to read STORED entries that use a data descriptor
210     * @since 1.1
211     */
212    public ZipArchiveInputStream(final InputStream inputStream,
213                                 final String encoding,
214                                 final boolean useUnicodeExtraFields,
215                                 final boolean allowStoredEntriesWithDataDescriptor) {
216        this.encoding = encoding;
217        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
218        this.useUnicodeExtraFields = useUnicodeExtraFields;
219        in = new PushbackInputStream(inputStream, buf.capacity());
220        this.allowStoredEntriesWithDataDescriptor =
221            allowStoredEntriesWithDataDescriptor;
222        // haven't read anything so far
223        buf.limit(0);
224    }
225
226    public ZipArchiveEntry getNextZipEntry() throws IOException {
227        uncompressedCount = 0;
228
229        boolean firstEntry = true;
230        if (closed || hitCentralDirectory) {
231            return null;
232        }
233        if (current != null) {
234            closeEntry();
235            firstEntry = false;
236        }
237
238        long currentHeaderOffset = getBytesRead();
239        try {
240            if (firstEntry) {
241                // split archives have a special signature before the
242                // first local file header - look for it and fail with
243                // the appropriate error message if this is a split
244                // archive.
245                readFirstLocalFileHeader(lfhBuf);
246            } else {
247                readFully(lfhBuf);
248            }
249        } catch (final EOFException e) { //NOSONAR
250            return null;
251        }
252
253        final ZipLong sig = new ZipLong(lfhBuf);
254        if (!sig.equals(ZipLong.LFH_SIG)) {
255            if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) {
256                hitCentralDirectory = true;
257                skipRemainderOfArchive();
258                return null;
259            }
260            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
261        }
262
263        int off = WORD;
264        current = new CurrentEntry();
265
266        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
267        off += SHORT;
268        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
269
270        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
271        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
272        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
273        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
274        current.entry.setGeneralPurposeBit(gpFlag);
275
276        off += SHORT;
277
278        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
279        off += SHORT;
280
281        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
282        current.entry.setTime(time);
283        off += WORD;
284
285        ZipLong size = null, cSize = null;
286        if (!current.hasDataDescriptor) {
287            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
288            off += WORD;
289
290            cSize = new ZipLong(lfhBuf, off);
291            off += WORD;
292
293            size = new ZipLong(lfhBuf, off);
294            off += WORD;
295        } else {
296            off += 3 * WORD;
297        }
298
299        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
300
301        off += SHORT;
302
303        final int extraLen = ZipShort.getValue(lfhBuf, off);
304        off += SHORT; // NOSONAR - assignment as documentation
305
306        final byte[] fileName = new byte[fileNameLen];
307        readFully(fileName);
308        current.entry.setName(entryEncoding.decode(fileName), fileName);
309        if (hasUTF8Flag) {
310            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
311        }
312
313        final byte[] extraData = new byte[extraLen];
314        readFully(extraData);
315        current.entry.setExtra(extraData);
316
317        if (!hasUTF8Flag && useUnicodeExtraFields) {
318            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
319        }
320
321        processZip64Extra(size, cSize);
322
323        current.entry.setLocalHeaderOffset(currentHeaderOffset);
324        current.entry.setDataOffset(getBytesRead());
325        current.entry.setStreamContiguous(true);
326
327        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
328        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
329            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
330                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
331                switch (m) {
332                case UNSHRINKING:
333                    current.in = new UnshrinkingInputStream(bis);
334                    break;
335                case IMPLODING:
336                    current.in = new ExplodingInputStream(
337                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
338                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
339                        bis);
340                    break;
341                case BZIP2:
342                    current.in = new BZip2CompressorInputStream(bis);
343                    break;
344                case ENHANCED_DEFLATED:
345                    current.in = new Deflate64CompressorInputStream(bis);
346                    break;
347                default:
348                    // we should never get here as all supported methods have been covered
349                    // will cause an error when read is invoked, don't throw an exception here so people can
350                    // skip unsupported entries
351                    break;
352                }
353            }
354        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
355            current.in = new Deflate64CompressorInputStream(in);
356        }
357
358        entriesRead++;
359        return current.entry;
360    }
361
362    /**
363     * Fills the given array with the first local file header and
364     * deals with splitting/spanning markers that may prefix the first
365     * LFH.
366     */
367    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
368        readFully(lfh);
369        final ZipLong sig = new ZipLong(lfh);
370        if (sig.equals(ZipLong.DD_SIG)) {
371            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
372        }
373
374        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
375            // The archive is not really split as only one segment was
376            // needed in the end.  Just skip over the marker.
377            final byte[] missedLfhBytes = new byte[4];
378            readFully(missedLfhBytes);
379            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
380            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
381        }
382    }
383
384    /**
385     * Records whether a Zip64 extra is present and sets the size
386     * information from it if sizes are 0xFFFFFFFF and the entry
387     * doesn't use a data descriptor.
388     */
389    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
390        final Zip64ExtendedInformationExtraField z64 =
391            (Zip64ExtendedInformationExtraField)
392            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
393        current.usesZip64 = z64 != null;
394        if (!current.hasDataDescriptor) {
395            if (z64 != null // same as current.usesZip64 but avoids NPE warning
396                    && (ZipLong.ZIP64_MAGIC.equals(cSize) || ZipLong.ZIP64_MAGIC.equals(size)) ) {
397                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
398                current.entry.setSize(z64.getSize().getLongValue());
399            } else if (cSize != null && size != null) {
400                current.entry.setCompressedSize(cSize.getValue());
401                current.entry.setSize(size.getValue());
402            }
403        }
404    }
405
406    @Override
407    public ArchiveEntry getNextEntry() throws IOException {
408        return getNextZipEntry();
409    }
410
411    /**
412     * Whether this class is able to read the given entry.
413     *
414     * <p>May return false if it is set up to use encryption or a
415     * compression method that hasn't been implemented yet.</p>
416     * @since 1.1
417     */
418    @Override
419    public boolean canReadEntryData(final ArchiveEntry ae) {
420        if (ae instanceof ZipArchiveEntry) {
421            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
422            return ZipUtil.canHandleEntryData(ze)
423                && supportsDataDescriptorFor(ze)
424                && supportsCompressedSizeFor(ze);
425        }
426        return false;
427    }
428
429    @Override
430    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
431        if (closed) {
432            throw new IOException("The stream is closed");
433        }
434
435        if (current == null) {
436            return -1;
437        }
438
439        // avoid int overflow, check null buffer
440        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
441            throw new ArrayIndexOutOfBoundsException();
442        }
443
444        ZipUtil.checkRequestedFeatures(current.entry);
445        if (!supportsDataDescriptorFor(current.entry)) {
446            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
447                    current.entry);
448        }
449        if (!supportsCompressedSizeFor(current.entry)) {
450            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
451                    current.entry);
452        }
453
454        int read;
455        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
456            read = readStored(buffer, offset, length);
457        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
458            read = readDeflated(buffer, offset, length);
459        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
460                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
461                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
462                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
463            read = current.in.read(buffer, offset, length);
464        } else {
465            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
466                    current.entry);
467        }
468
469        if (read >= 0) {
470            current.crc.update(buffer, offset, read);
471            uncompressedCount += read;
472        }
473
474        return read;
475    }
476
477    /**
478     * @since 1.17
479     */
480    @Override
481    public long getCompressedCount() {
482        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
483            return current.bytesRead;
484        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
485            return getBytesInflated();
486        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
487            return ((UnshrinkingInputStream) current.in).getCompressedCount();
488        } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
489            return ((ExplodingInputStream) current.in).getCompressedCount();
490        } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
491            return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
492        } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
493            return ((BZip2CompressorInputStream) current.in).getCompressedCount();
494        } else {
495            return -1;
496        }
497    }
498
499    /**
500     * @since 1.17
501     */
502    @Override
503    public long getUncompressedCount() {
504        return uncompressedCount;
505    }
506
507    /**
508     * Implementation of read for STORED entries.
509     */
510    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
511
512        if (current.hasDataDescriptor) {
513            if (lastStoredEntry == null) {
514                readStoredEntry();
515            }
516            return lastStoredEntry.read(buffer, offset, length);
517        }
518
519        final long csize = current.entry.getSize();
520        if (current.bytesRead >= csize) {
521            return -1;
522        }
523
524        if (buf.position() >= buf.limit()) {
525            buf.position(0);
526            final int l = in.read(buf.array());
527            if (l == -1) {
528                buf.limit(0);
529                throw new IOException("Truncated ZIP file");
530            }
531            buf.limit(l);
532
533            count(l);
534            current.bytesReadFromStream += l;
535        }
536
537        int toRead = Math.min(buf.remaining(), length);
538        if ((csize - current.bytesRead) < toRead) {
539            // if it is smaller than toRead then it fits into an int
540            toRead = (int) (csize - current.bytesRead);
541        }
542        buf.get(buffer, offset, toRead);
543        current.bytesRead += toRead;
544        return toRead;
545    }
546
547    /**
548     * Implementation of read for DEFLATED entries.
549     */
550    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
551        final int read = readFromInflater(buffer, offset, length);
552        if (read <= 0) {
553            if (inf.finished()) {
554                return -1;
555            } else if (inf.needsDictionary()) {
556                throw new ZipException("This archive needs a preset dictionary"
557                                       + " which is not supported by Commons"
558                                       + " Compress.");
559            } else if (read == -1) {
560                throw new IOException("Truncated ZIP file");
561            }
562        }
563        return read;
564    }
565
566    /**
567     * Potentially reads more bytes to fill the inflater's buffer and
568     * reads from it.
569     */
570    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
571        int read = 0;
572        do {
573            if (inf.needsInput()) {
574                final int l = fill();
575                if (l > 0) {
576                    current.bytesReadFromStream += buf.limit();
577                } else if (l == -1) {
578                    return -1;
579                } else {
580                    break;
581                }
582            }
583            try {
584                read = inf.inflate(buffer, offset, length);
585            } catch (final DataFormatException e) {
586                throw (IOException) new ZipException(e.getMessage()).initCause(e);
587            }
588        } while (read == 0 && inf.needsInput());
589        return read;
590    }
591
592    @Override
593    public void close() throws IOException {
594        if (!closed) {
595            closed = true;
596            try {
597                in.close();
598            } finally {
599                inf.end();
600            }
601        }
602    }
603
604    /**
605     * Skips over and discards value bytes of data from this input
606     * stream.
607     *
608     * <p>This implementation may end up skipping over some smaller
609     * number of bytes, possibly 0, if and only if it reaches the end
610     * of the underlying stream.</p>
611     *
612     * <p>The actual number of bytes skipped is returned.</p>
613     *
614     * @param value the number of bytes to be skipped.
615     * @return the actual number of bytes skipped.
616     * @throws IOException - if an I/O error occurs.
617     * @throws IllegalArgumentException - if value is negative.
618     */
619    @Override
620    public long skip(final long value) throws IOException {
621        if (value >= 0) {
622            long skipped = 0;
623            while (skipped < value) {
624                final long rem = value - skipped;
625                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
626                if (x == -1) {
627                    return skipped;
628                }
629                skipped += x;
630            }
631            return skipped;
632        }
633        throw new IllegalArgumentException();
634    }
635
636    /**
637     * Checks if the signature matches what is expected for a zip file.
638     * Does not currently handle self-extracting zips which may have arbitrary
639     * leading content.
640     *
641     * @param signature the bytes to check
642     * @param length    the number of bytes to check
643     * @return true, if this stream is a zip archive stream, false otherwise
644     */
645    public static boolean matches(final byte[] signature, final int length) {
646        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
647            return false;
648        }
649
650        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
651            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
652            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
653            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
654    }
655
656    private static boolean checksig(final byte[] signature, final byte[] expected) {
657        for (int i = 0; i < expected.length; i++) {
658            if (signature[i] != expected[i]) {
659                return false;
660            }
661        }
662        return true;
663    }
664
665    /**
666     * Closes the current ZIP archive entry and positions the underlying
667     * stream to the beginning of the next entry. All per-entry variables
668     * and data structures are cleared.
669     * <p>
670     * If the compressed size of this entry is included in the entry header,
671     * then any outstanding bytes are simply skipped from the underlying
672     * stream without uncompressing them. This allows an entry to be safely
673     * closed even if the compression method is unsupported.
674     * <p>
675     * In case we don't know the compressed size of this entry or have
676     * already buffered too much data from the underlying stream to support
677     * uncompression, then the uncompression process is completed and the
678     * end position of the stream is adjusted based on the result of that
679     * process.
680     *
681     * @throws IOException if an error occurs
682     */
683    private void closeEntry() throws IOException {
684        if (closed) {
685            throw new IOException("The stream is closed");
686        }
687        if (current == null) {
688            return;
689        }
690
691        // Ensure all entry bytes are read
692        if (currentEntryHasOutstandingBytes()) {
693            drainCurrentEntryData();
694        } else {
695            // this is guaranteed to exhaust the stream
696            skip(Long.MAX_VALUE); //NOSONAR
697
698            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
699                       ? getBytesInflated() : current.bytesRead;
700
701            // this is at most a single read() operation and can't
702            // exceed the range of int
703            final int diff = (int) (current.bytesReadFromStream - inB);
704
705            // Pushback any required bytes
706            if (diff > 0) {
707                pushback(buf.array(), buf.limit() - diff, diff);
708                current.bytesReadFromStream -= diff;
709            }
710
711            // Drain remainder of entry if not all data bytes were required
712            if (currentEntryHasOutstandingBytes()) {
713                drainCurrentEntryData();
714            }
715        }
716
717        if (lastStoredEntry == null && current.hasDataDescriptor) {
718            readDataDescriptor();
719        }
720
721        inf.reset();
722        buf.clear().flip();
723        current = null;
724        lastStoredEntry = null;
725    }
726
727    /**
728     * If the compressed size of the current entry is included in the entry header
729     * and there are any outstanding bytes in the underlying stream, then
730     * this returns true.
731     *
732     * @return true, if current entry is determined to have outstanding bytes, false otherwise
733     */
734    private boolean currentEntryHasOutstandingBytes() {
735        return current.bytesReadFromStream <= current.entry.getCompressedSize()
736                && !current.hasDataDescriptor;
737    }
738
739    /**
740     * Read all data of the current entry from the underlying stream
741     * that hasn't been read, yet.
742     */
743    private void drainCurrentEntryData() throws IOException {
744        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
745        while (remaining > 0) {
746            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
747            if (n < 0) {
748                throw new EOFException("Truncated ZIP entry: "
749                                       + ArchiveUtils.sanitize(current.entry.getName()));
750            }
751            count(n);
752            remaining -= n;
753        }
754    }
755
756    /**
757     * Get the number of bytes Inflater has actually processed.
758     *
759     * <p>for Java &lt; Java7 the getBytes* methods in
760     * Inflater/Deflater seem to return unsigned ints rather than
761     * longs that start over with 0 at 2^32.</p>
762     *
763     * <p>The stream knows how many bytes it has read, but not how
764     * many the Inflater actually consumed - it should be between the
765     * total number of bytes read for the entry and the total number
766     * minus the last read operation.  Here we just try to make the
767     * value close enough to the bytes we've read by assuming the
768     * number of bytes consumed must be smaller than (or equal to) the
769     * number of bytes read but not smaller by more than 2^32.</p>
770     */
771    private long getBytesInflated() {
772        long inB = inf.getBytesRead();
773        if (current.bytesReadFromStream >= TWO_EXP_32) {
774            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
775                inB += TWO_EXP_32;
776            }
777        }
778        return inB;
779    }
780
781    private int fill() throws IOException {
782        if (closed) {
783            throw new IOException("The stream is closed");
784        }
785        final int length = in.read(buf.array());
786        if (length > 0) {
787            buf.limit(length);
788            count(buf.limit());
789            inf.setInput(buf.array(), 0, buf.limit());
790        }
791        return length;
792    }
793
794    private void readFully(final byte[] b) throws IOException {
795        readFully(b, 0);
796    }
797
798    private void readFully(final byte[] b, final int off) throws IOException {
799        final int len = b.length - off;
800        final int count = IOUtils.readFully(in, b, off, len);
801        count(count);
802        if (count < len) {
803            throw new EOFException();
804        }
805    }
806
807    private void readDataDescriptor() throws IOException {
808        readFully(wordBuf);
809        ZipLong val = new ZipLong(wordBuf);
810        if (ZipLong.DD_SIG.equals(val)) {
811            // data descriptor with signature, skip sig
812            readFully(wordBuf);
813            val = new ZipLong(wordBuf);
814        }
815        current.entry.setCrc(val.getValue());
816
817        // if there is a ZIP64 extra field, sizes are eight bytes
818        // each, otherwise four bytes each.  Unfortunately some
819        // implementations - namely Java7 - use eight bytes without
820        // using a ZIP64 extra field -
821        // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
822
823        // just read 16 bytes and check whether bytes nine to twelve
824        // look like one of the signatures of what could follow a data
825        // descriptor (ignoring archive decryption headers for now).
826        // If so, push back eight bytes and assume sizes are four
827        // bytes, otherwise sizes are eight bytes each.
828        readFully(twoDwordBuf);
829        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
830        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
831            pushback(twoDwordBuf, DWORD, DWORD);
832            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
833            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
834        } else {
835            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
836            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
837        }
838    }
839
840    /**
841     * Whether this entry requires a data descriptor this library can work with.
842     *
843     * @return true if allowStoredEntriesWithDataDescriptor is true,
844     * the entry doesn't require any data descriptor or the method is
845     * DEFLATED or ENHANCED_DEFLATED.
846     */
847    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
848        return !entry.getGeneralPurposeBit().usesDataDescriptor()
849
850                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
851                || entry.getMethod() == ZipEntry.DEFLATED
852                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
853    }
854
855    /**
856     * Whether the compressed size for the entry is either known or
857     * not required by the compression method being used.
858     */
859    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
860        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
861            || entry.getMethod() == ZipEntry.DEFLATED
862            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
863            || (entry.getGeneralPurposeBit().usesDataDescriptor()
864                && allowStoredEntriesWithDataDescriptor
865                && entry.getMethod() == ZipEntry.STORED);
866    }
867
868    private static final String USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER =
869        " while reading a stored entry using data descriptor. Either the archive is broken"
870        + " or it can not be read using ZipArchiveInputStream and you must use ZipFile."
871        + " A common cause for this is a ZIP archive containing a ZIP archive."
872        + " See http://commons.apache.org/proper/commons-compress/zip.html#ZipArchiveInputStream_vs_ZipFile";
873
874    /**
875     * Caches a stored entry that uses the data descriptor.
876     *
877     * <ul>
878     *   <li>Reads a stored entry until the signature of a local file
879     *     header, central directory header or data descriptor has been
880     *     found.</li>
881     *   <li>Stores all entry data in lastStoredEntry.</p>
882     *   <li>Rewinds the stream to position at the data
883     *     descriptor.</li>
884     *   <li>reads the data descriptor</li>
885     * </ul>
886     *
887     * <p>After calling this method the entry should know its size,
888     * the entry's data is cached and the stream is positioned at the
889     * next local file or central directory header.</p>
890     */
891    private void readStoredEntry() throws IOException {
892        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
893        int off = 0;
894        boolean done = false;
895
896        // length of DD without signature
897        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
898
899        while (!done) {
900            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
901            if (r <= 0) {
902                // read the whole archive without ever finding a
903                // central directory
904                throw new IOException("Truncated ZIP file");
905            }
906            if (r + off < 4) {
907                // buffer too small to check for a signature, loop
908                off += r;
909                continue;
910            }
911
912            done = bufferContainsSignature(bos, off, r, ddLen);
913            if (!done) {
914                off = cacheBytesRead(bos, off, r, ddLen);
915            }
916        }
917        if (current.entry.getCompressedSize() != current.entry.getSize()) {
918            throw new ZipException("compressed and uncompressed size don't match"
919                                   + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER);
920        }
921        final byte[] b = bos.toByteArray();
922        if (b.length != current.entry.getSize()) {
923            throw new ZipException("actual and claimed size don't match"
924                                   + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER);
925        }
926        lastStoredEntry = new ByteArrayInputStream(b);
927    }
928
929    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
930    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
931    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
932
933    /**
934     * Checks whether the current buffer contains the signature of a
935     * &quot;data descriptor&quot;, &quot;local file header&quot; or
936     * &quot;central directory entry&quot;.
937     *
938     * <p>If it contains such a signature, reads the data descriptor
939     * and positions the stream right after the data descriptor.</p>
940     */
941    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
942            throws IOException {
943
944        boolean done = false;
945        for (int i = 0; !done && i < offset + lastRead - 4; i++) {
946            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
947                int expectDDPos = i;
948                if (i >= expectedDDLen &&
949                    (buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
950                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
951                    // found a LFH or CFH:
952                    expectDDPos = i - expectedDDLen;
953                    done = true;
954                }
955                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
956                    // found DD:
957                    done = true;
958                }
959                if (done) {
960                    // * push back bytes read in excess as well as the data
961                    //   descriptor
962                    // * copy the remaining bytes to cache
963                    // * read data descriptor
964                    pushback(buf.array(), expectDDPos, offset + lastRead - expectDDPos);
965                    bos.write(buf.array(), 0, expectDDPos);
966                    readDataDescriptor();
967                }
968            }
969        }
970        return done;
971    }
972
973    /**
974     * If the last read bytes could hold a data descriptor and an
975     * incomplete signature then save the last bytes to the front of
976     * the buffer and cache everything in front of the potential data
977     * descriptor into the given ByteArrayOutputStream.
978     *
979     * <p>Data descriptor plus incomplete signature (3 bytes in the
980     * worst case) can be 20 bytes max.</p>
981     */
982    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
983        final int cacheable = offset + lastRead - expecteDDLen - 3;
984        if (cacheable > 0) {
985            bos.write(buf.array(), 0, cacheable);
986            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
987            offset = expecteDDLen + 3;
988        } else {
989            offset += lastRead;
990        }
991        return offset;
992    }
993
994    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
995        ((PushbackInputStream) in).unread(buf, offset, length);
996        pushedBackBytes(length);
997    }
998
999    // End of Central Directory Record
1000    //   end of central dir signature    WORD
1001    //   number of this disk             SHORT
1002    //   number of the disk with the
1003    //   start of the central directory  SHORT
1004    //   total number of entries in the
1005    //   central directory on this disk  SHORT
1006    //   total number of entries in
1007    //   the central directory           SHORT
1008    //   size of the central directory   WORD
1009    //   offset of start of central
1010    //   directory with respect to
1011    //   the starting disk number        WORD
1012    //   .ZIP file comment length        SHORT
1013    //   .ZIP file comment               up to 64KB
1014    //
1015
1016    /**
1017     * Reads the stream until it find the "End of central directory
1018     * record" and consumes it as well.
1019     */
1020    private void skipRemainderOfArchive() throws IOException {
1021        // skip over central directory. One LFH has been read too much
1022        // already.  The calculation discounts file names and extra
1023        // data so it will be too short.
1024        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1025        findEocdRecord();
1026        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1027        readFully(shortBuf);
1028        // file comment
1029        realSkip(ZipShort.getValue(shortBuf));
1030    }
1031
1032    /**
1033     * Reads forward until the signature of the &quot;End of central
1034     * directory&quot; record is found.
1035     */
1036    private void findEocdRecord() throws IOException {
1037        int currentByte = -1;
1038        boolean skipReadCall = false;
1039        while (skipReadCall || (currentByte = readOneByte()) > -1) {
1040            skipReadCall = false;
1041            if (!isFirstByteOfEocdSig(currentByte)) {
1042                continue;
1043            }
1044            currentByte = readOneByte();
1045            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1046                if (currentByte == -1) {
1047                    break;
1048                }
1049                skipReadCall = isFirstByteOfEocdSig(currentByte);
1050                continue;
1051            }
1052            currentByte = readOneByte();
1053            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1054                if (currentByte == -1) {
1055                    break;
1056                }
1057                skipReadCall = isFirstByteOfEocdSig(currentByte);
1058                continue;
1059            }
1060            currentByte = readOneByte();
1061            if (currentByte == -1
1062                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1063                break;
1064            }
1065            skipReadCall = isFirstByteOfEocdSig(currentByte);
1066        }
1067    }
1068
1069    /**
1070     * Skips bytes by reading from the underlying stream rather than
1071     * the (potentially inflating) archive stream - which {@link
1072     * #skip} would do.
1073     *
1074     * Also updates bytes-read counter.
1075     */
1076    private void realSkip(final long value) throws IOException {
1077        if (value >= 0) {
1078            long skipped = 0;
1079            while (skipped < value) {
1080                final long rem = value - skipped;
1081                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1082                if (x == -1) {
1083                    return;
1084                }
1085                count(x);
1086                skipped += x;
1087            }
1088            return;
1089        }
1090        throw new IllegalArgumentException();
1091    }
1092
1093    /**
1094     * Reads bytes by reading from the underlying stream rather than
1095     * the (potentially inflating) archive stream - which {@link #read} would do.
1096     *
1097     * Also updates bytes-read counter.
1098     */
1099    private int readOneByte() throws IOException {
1100        final int b = in.read();
1101        if (b != -1) {
1102            count(1);
1103        }
1104        return b;
1105    }
1106
1107    private boolean isFirstByteOfEocdSig(final int b) {
1108        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1109    }
1110
1111    private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] {
1112        'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2',
1113    };
1114    private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
1115
1116    /**
1117     * Checks whether this might be an APK Signing Block.
1118     *
1119     * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It
1120     * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature
1121     * and if we've found it, return true.</p>
1122     *
1123     * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold
1124     * the local file header of the next entry.
1125     *
1126     * @return true if this looks like a APK signing block
1127     *
1128     * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a>
1129     */
1130    private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException {
1131        // length of block excluding the size field itself
1132        BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader);
1133        // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block,
1134        // also subtract 16 bytes in order to position us at the magic string
1135        BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length
1136            - (long) APK_SIGNING_BLOCK_MAGIC.length));
1137        byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length];
1138
1139        try {
1140            if (toSkip.signum() < 0) {
1141                // suspectLocalFileHeader contains the start of suspect magic string
1142                int off = suspectLocalFileHeader.length + toSkip.intValue();
1143                // length was shorter than magic length
1144                if (off < DWORD) {
1145                    return false;
1146                }
1147                int bytesInBuffer = Math.abs(toSkip.intValue());
1148                System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length));
1149                if (bytesInBuffer < magic.length) {
1150                    readFully(magic, bytesInBuffer);
1151                }
1152            } else {
1153                while (toSkip.compareTo(LONG_MAX) > 0) {
1154                    realSkip(Long.MAX_VALUE);
1155                    toSkip = toSkip.add(LONG_MAX.negate());
1156                }
1157                realSkip(toSkip.longValue());
1158                readFully(magic);
1159            }
1160        } catch (EOFException ex) { //NOSONAR
1161            // length was invalid
1162            return false;
1163        }
1164        return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC);
1165    }
1166
1167    /**
1168     * Structure collecting information for the entry that is
1169     * currently being read.
1170     */
1171    private static final class CurrentEntry {
1172
1173        /**
1174         * Current ZIP entry.
1175         */
1176        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1177
1178        /**
1179         * Does the entry use a data descriptor?
1180         */
1181        private boolean hasDataDescriptor;
1182
1183        /**
1184         * Does the entry have a ZIP64 extended information extra field.
1185         */
1186        private boolean usesZip64;
1187
1188        /**
1189         * Number of bytes of entry content read by the client if the
1190         * entry is STORED.
1191         */
1192        private long bytesRead;
1193
1194        /**
1195         * Number of bytes of entry content read from the stream.
1196         *
1197         * <p>This may be more than the actual entry's length as some
1198         * stuff gets buffered up and needs to be pushed back when the
1199         * end of the entry has been reached.</p>
1200         */
1201        private long bytesReadFromStream;
1202
1203        /**
1204         * The checksum calculated as the current entry is read.
1205         */
1206        private final CRC32 crc = new CRC32();
1207
1208        /**
1209         * The input stream decompressing the data for shrunk and imploded entries.
1210         */
1211        private InputStream in;
1212    }
1213
1214    /**
1215     * Bounded input stream adapted from commons-io
1216     */
1217    private class BoundedInputStream extends InputStream {
1218
1219        /** the wrapped input stream */
1220        private final InputStream in;
1221
1222        /** the max length to provide */
1223        private final long max;
1224
1225        /** the number of bytes already returned */
1226        private long pos = 0;
1227
1228        /**
1229         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1230         * stream and limits it to a certain size.
1231         *
1232         * @param in The wrapped input stream
1233         * @param size The maximum number of bytes to return
1234         */
1235        public BoundedInputStream(final InputStream in, final long size) {
1236            this.max = size;
1237            this.in = in;
1238        }
1239
1240        @Override
1241        public int read() throws IOException {
1242            if (max >= 0 && pos >= max) {
1243                return -1;
1244            }
1245            final int result = in.read();
1246            pos++;
1247            count(1);
1248            current.bytesReadFromStream++;
1249            return result;
1250        }
1251
1252        @Override
1253        public int read(final byte[] b) throws IOException {
1254            return this.read(b, 0, b.length);
1255        }
1256
1257        @Override
1258        public int read(final byte[] b, final int off, final int len) throws IOException {
1259            if (max >= 0 && pos >= max) {
1260                return -1;
1261            }
1262            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1263            final int bytesRead = in.read(b, off, (int) maxRead);
1264
1265            if (bytesRead == -1) {
1266                return -1;
1267            }
1268
1269            pos += bytesRead;
1270            count(bytesRead);
1271            current.bytesReadFromStream += bytesRead;
1272            return bytesRead;
1273        }
1274
1275        @Override
1276        public long skip(final long n) throws IOException {
1277            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1278            final long skippedBytes = IOUtils.skip(in, toSkip);
1279            pos += skippedBytes;
1280            return skippedBytes;
1281        }
1282
1283        @Override
1284        public int available() throws IOException {
1285            if (max >= 0 && pos >= max) {
1286                return 0;
1287            }
1288            return in.available();
1289        }
1290    }
1291}