001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.nio.ByteBuffer; 027import java.nio.channels.FileChannel; 028import java.nio.channels.SeekableByteChannel; 029import java.nio.file.Files; 030import java.nio.file.StandardOpenOption; 031import java.util.Arrays; 032import java.util.Collections; 033import java.util.Comparator; 034import java.util.Enumeration; 035import java.util.EnumSet; 036import java.util.HashMap; 037import java.util.LinkedList; 038import java.util.List; 039import java.util.Map; 040import java.util.zip.Inflater; 041import java.util.zip.InflaterInputStream; 042import java.util.zip.ZipException; 043 044import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 045import org.apache.commons.compress.utils.IOUtils; 046 047import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 048import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 049import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 050import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 051import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 052 053/** 054 * Replacement for <code>java.util.ZipFile</code>. 055 * 056 * <p>This class adds support for file name encodings other than UTF-8 057 * (which is required to work on ZIP files created by native zip tools 058 * and is able to skip a preamble like the one found in self 059 * extracting archives. Furthermore it returns instances of 060 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 061 * instead of <code>java.util.zip.ZipEntry</code>.</p> 062 * 063 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 064 * have to reimplement all methods anyway. Like 065 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 066 * covers and supports compressed and uncompressed entries. As of 067 * Apache Commons Compress 1.3 it also transparently supports Zip64 068 * extensions and thus individual entries and archives larger than 4 069 * GB or with more than 65536 entries.</p> 070 * 071 * <p>The method signatures mimic the ones of 072 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 073 * 074 * <ul> 075 * <li>There is no getName method.</li> 076 * <li>entries has been renamed to getEntries.</li> 077 * <li>getEntries and getEntry return 078 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 079 * instances.</li> 080 * <li>close is allowed to throw IOException.</li> 081 * </ul> 082 * 083 */ 084public class ZipFile implements Closeable { 085 private static final int HASH_SIZE = 509; 086 static final int NIBLET_MASK = 0x0f; 087 static final int BYTE_SHIFT = 8; 088 private static final int POS_0 = 0; 089 private static final int POS_1 = 1; 090 private static final int POS_2 = 2; 091 private static final int POS_3 = 3; 092 093 /** 094 * List of entries in the order they appear inside the central 095 * directory. 096 */ 097 private final List<ZipArchiveEntry> entries = 098 new LinkedList<>(); 099 100 /** 101 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 102 */ 103 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 104 new HashMap<>(HASH_SIZE); 105 106 /** 107 * The encoding to use for filenames and the file comment. 108 * 109 * <p>For a list of possible values see <a 110 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 111 * Defaults to UTF-8.</p> 112 */ 113 private final String encoding; 114 115 /** 116 * The zip encoding to use for filenames and the file comment. 117 */ 118 private final ZipEncoding zipEncoding; 119 120 /** 121 * File name of actual source. 122 */ 123 private final String archiveName; 124 125 /** 126 * The actual data source. 127 */ 128 private final SeekableByteChannel archive; 129 130 /** 131 * Whether to look for and use Unicode extra fields. 132 */ 133 private final boolean useUnicodeExtraFields; 134 135 /** 136 * Whether the file is closed. 137 */ 138 private volatile boolean closed = true; 139 140 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 141 private final byte[] dwordBuf = new byte[DWORD]; 142 private final byte[] wordBuf = new byte[WORD]; 143 private final byte[] cfhBuf = new byte[CFH_LEN]; 144 private final byte[] shortBuf = new byte[SHORT]; 145 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 146 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 147 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 148 149 /** 150 * Opens the given file for reading, assuming "UTF8" for file names. 151 * 152 * @param f the archive. 153 * 154 * @throws IOException if an error occurs while reading the file. 155 */ 156 public ZipFile(final File f) throws IOException { 157 this(f, ZipEncodingHelper.UTF8); 158 } 159 160 /** 161 * Opens the given file for reading, assuming "UTF8". 162 * 163 * @param name name of the archive. 164 * 165 * @throws IOException if an error occurs while reading the file. 166 */ 167 public ZipFile(final String name) throws IOException { 168 this(new File(name), ZipEncodingHelper.UTF8); 169 } 170 171 /** 172 * Opens the given file for reading, assuming the specified 173 * encoding for file names, scanning unicode extra fields. 174 * 175 * @param name name of the archive. 176 * @param encoding the encoding to use for file names, use null 177 * for the platform's default encoding 178 * 179 * @throws IOException if an error occurs while reading the file. 180 */ 181 public ZipFile(final String name, final String encoding) throws IOException { 182 this(new File(name), encoding, true); 183 } 184 185 /** 186 * Opens the given file for reading, assuming the specified 187 * encoding for file names and scanning for unicode extra fields. 188 * 189 * @param f the archive. 190 * @param encoding the encoding to use for file names, use null 191 * for the platform's default encoding 192 * 193 * @throws IOException if an error occurs while reading the file. 194 */ 195 public ZipFile(final File f, final String encoding) throws IOException { 196 this(f, encoding, true); 197 } 198 199 /** 200 * Opens the given file for reading, assuming the specified 201 * encoding for file names. 202 * 203 * @param f the archive. 204 * @param encoding the encoding to use for file names, use null 205 * for the platform's default encoding 206 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 207 * Extra Fields (if present) to set the file names. 208 * 209 * @throws IOException if an error occurs while reading the file. 210 */ 211 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 212 throws IOException { 213 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 214 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true); 215 } 216 217 /** 218 * Opens the given channel for reading, assuming "UTF8" for file names. 219 * 220 * <p>{@link 221 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 222 * allows you to read from an in-memory archive.</p> 223 * 224 * @param channel the archive. 225 * 226 * @throws IOException if an error occurs while reading the file. 227 * @since 1.13 228 */ 229 public ZipFile(final SeekableByteChannel channel) 230 throws IOException { 231 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 232 } 233 234 /** 235 * Opens the given channel for reading, assuming the specified 236 * encoding for file names. 237 * 238 * <p>{@link 239 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 240 * allows you to read from an in-memory archive.</p> 241 * 242 * @param channel the archive. 243 * @param encoding the encoding to use for file names, use null 244 * for the platform's default encoding 245 * 246 * @throws IOException if an error occurs while reading the file. 247 * @since 1.13 248 */ 249 public ZipFile(final SeekableByteChannel channel, final String encoding) 250 throws IOException { 251 this(channel, "unknown archive", encoding, true); 252 } 253 254 /** 255 * Opens the given channel for reading, assuming the specified 256 * encoding for file names. 257 * 258 * <p>{@link 259 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 260 * allows you to read from an in-memory archive.</p> 261 * 262 * @param channel the archive. 263 * @param archiveName name of the archive, used for error messages only. 264 * @param encoding the encoding to use for file names, use null 265 * for the platform's default encoding 266 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 267 * Extra Fields (if present) to set the file names. 268 * 269 * @throws IOException if an error occurs while reading the file. 270 * @since 1.13 271 */ 272 public ZipFile(final SeekableByteChannel channel, final String archiveName, 273 final String encoding, final boolean useUnicodeExtraFields) 274 throws IOException { 275 this(channel, archiveName, encoding, useUnicodeExtraFields, false); 276 } 277 278 private ZipFile(final SeekableByteChannel channel, final String archiveName, 279 final String encoding, final boolean useUnicodeExtraFields, 280 final boolean closeOnError) 281 throws IOException { 282 this.archiveName = archiveName; 283 this.encoding = encoding; 284 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 285 this.useUnicodeExtraFields = useUnicodeExtraFields; 286 archive = channel; 287 boolean success = false; 288 try { 289 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 290 populateFromCentralDirectory(); 291 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 292 success = true; 293 } finally { 294 closed = !success; 295 if (!success && closeOnError) { 296 IOUtils.closeQuietly(archive); 297 } 298 } 299 } 300 301 /** 302 * The encoding to use for filenames and the file comment. 303 * 304 * @return null if using the platform's default character encoding. 305 */ 306 public String getEncoding() { 307 return encoding; 308 } 309 310 /** 311 * Closes the archive. 312 * @throws IOException if an error occurs closing the archive. 313 */ 314 @Override 315 public void close() throws IOException { 316 // this flag is only written here and read in finalize() which 317 // can never be run in parallel. 318 // no synchronization needed. 319 closed = true; 320 321 archive.close(); 322 } 323 324 /** 325 * close a zipfile quietly; throw no io fault, do nothing 326 * on a null parameter 327 * @param zipfile file to close, can be null 328 */ 329 public static void closeQuietly(final ZipFile zipfile) { 330 IOUtils.closeQuietly(zipfile); 331 } 332 333 /** 334 * Returns all entries. 335 * 336 * <p>Entries will be returned in the same order they appear 337 * within the archive's central directory.</p> 338 * 339 * @return all entries as {@link ZipArchiveEntry} instances 340 */ 341 public Enumeration<ZipArchiveEntry> getEntries() { 342 return Collections.enumeration(entries); 343 } 344 345 /** 346 * Returns all entries in physical order. 347 * 348 * <p>Entries will be returned in the same order their contents 349 * appear within the archive.</p> 350 * 351 * @return all entries as {@link ZipArchiveEntry} instances 352 * 353 * @since 1.1 354 */ 355 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 356 final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 357 Arrays.sort(allEntries, offsetComparator); 358 return Collections.enumeration(Arrays.asList(allEntries)); 359 } 360 361 /** 362 * Returns a named entry - or {@code null} if no entry by 363 * that name exists. 364 * 365 * <p>If multiple entries with the same name exist the first entry 366 * in the archive's central directory by that name is 367 * returned.</p> 368 * 369 * @param name name of the entry. 370 * @return the ZipArchiveEntry corresponding to the given name - or 371 * {@code null} if not present. 372 */ 373 public ZipArchiveEntry getEntry(final String name) { 374 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 375 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 376 } 377 378 /** 379 * Returns all named entries in the same order they appear within 380 * the archive's central directory. 381 * 382 * @param name name of the entry. 383 * @return the Iterable<ZipArchiveEntry> corresponding to the 384 * given name 385 * @since 1.6 386 */ 387 public Iterable<ZipArchiveEntry> getEntries(final String name) { 388 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 389 return entriesOfThatName != null ? entriesOfThatName 390 : Collections.<ZipArchiveEntry>emptyList(); 391 } 392 393 /** 394 * Returns all named entries in the same order their contents 395 * appear within the archive. 396 * 397 * @param name name of the entry. 398 * @return the Iterable<ZipArchiveEntry> corresponding to the 399 * given name 400 * @since 1.6 401 */ 402 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 403 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 404 if (nameMap.containsKey(name)) { 405 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 406 Arrays.sort(entriesOfThatName, offsetComparator); 407 } 408 return Arrays.asList(entriesOfThatName); 409 } 410 411 /** 412 * Whether this class is able to read the given entry. 413 * 414 * <p>May return false if it is set up to use encryption or a 415 * compression method that hasn't been implemented yet.</p> 416 * @since 1.1 417 * @param ze the entry 418 * @return whether this class is able to read the given entry. 419 */ 420 public boolean canReadEntryData(final ZipArchiveEntry ze) { 421 return ZipUtil.canHandleEntryData(ze); 422 } 423 424 /** 425 * Expose the raw stream of the archive entry (compressed form). 426 * 427 * <p>This method does not relate to how/if we understand the payload in the 428 * stream, since we really only intend to move it on to somewhere else.</p> 429 * 430 * @param ze The entry to get the stream for 431 * @return The raw input stream containing (possibly) compressed data. 432 * @since 1.11 433 */ 434 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 435 if (!(ze instanceof Entry)) { 436 return null; 437 } 438 final long start = ze.getDataOffset(); 439 return createBoundedInputStream(start, ze.getCompressedSize()); 440 } 441 442 443 /** 444 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 445 * Compression and all other attributes will be as in this file. 446 * <p>This method transfers entries based on the central directory of the zip file.</p> 447 * 448 * @param target The zipArchiveOutputStream to write the entries to 449 * @param predicate A predicate that selects which entries to write 450 * @throws IOException on error 451 */ 452 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 453 throws IOException { 454 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 455 while (src.hasMoreElements()) { 456 final ZipArchiveEntry entry = src.nextElement(); 457 if (predicate.test( entry)) { 458 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 459 } 460 } 461 } 462 463 /** 464 * Returns an InputStream for reading the contents of the given entry. 465 * 466 * @param ze the entry to get the stream for. 467 * @return a stream to read the entry from. 468 * @throws IOException if unable to create an input stream from the zipentry 469 * @throws ZipException if the zipentry uses an unsupported feature 470 */ 471 public InputStream getInputStream(final ZipArchiveEntry ze) 472 throws IOException, ZipException { 473 if (!(ze instanceof Entry)) { 474 return null; 475 } 476 // cast valididty is checked just above 477 ZipUtil.checkRequestedFeatures(ze); 478 final long start = ze.getDataOffset(); 479 // doesn't get closed if the method is not supported, but doesn't hold any resources either 480 final BoundedInputStream bis = 481 createBoundedInputStream(start, ze.getCompressedSize()); //NOSONAR 482 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 483 case STORED: 484 return bis; 485 case UNSHRINKING: 486 return new UnshrinkingInputStream(bis); 487 case IMPLODING: 488 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 489 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis)); 490 case DEFLATED: 491 bis.addDummy(); 492 final Inflater inflater = new Inflater(true); 493 return new InflaterInputStream(bis, inflater) { 494 @Override 495 public void close() throws IOException { 496 try { 497 super.close(); 498 } finally { 499 inflater.end(); 500 } 501 } 502 }; 503 case BZIP2: 504 return new BZip2CompressorInputStream(bis); 505 case AES_ENCRYPTED: 506 case ENHANCED_DEFLATED: 507 case EXPANDING_LEVEL_1: 508 case EXPANDING_LEVEL_2: 509 case EXPANDING_LEVEL_3: 510 case EXPANDING_LEVEL_4: 511 case JPEG: 512 case LZMA: 513 case PKWARE_IMPLODING: 514 case PPMD: 515 case TOKENIZATION: 516 case UNKNOWN: 517 case WAVPACK: 518 default: 519 throw new ZipException("Found unsupported compression method " 520 + ze.getMethod()); 521 } 522 } 523 524 /** 525 * <p> 526 * Convenience method to return the entry's content as a String if isUnixSymlink() 527 * returns true for it, otherwise returns null. 528 * </p> 529 * 530 * <p>This method assumes the symbolic link's file name uses the 531 * same encoding that as been specified for this ZipFile.</p> 532 * 533 * @param entry ZipArchiveEntry object that represents the symbolic link 534 * @return entry's content as a String 535 * @throws IOException problem with content's input stream 536 * @since 1.5 537 */ 538 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 539 if (entry != null && entry.isUnixSymlink()) { 540 try (InputStream in = getInputStream(entry)) { 541 return zipEncoding.decode(IOUtils.toByteArray(in)); 542 } 543 } 544 return null; 545 } 546 547 /** 548 * Ensures that the close method of this zipfile is called when 549 * there are no more references to it. 550 * @see #close() 551 */ 552 @Override 553 protected void finalize() throws Throwable { 554 try { 555 if (!closed) { 556 System.err.println("Cleaning up unclosed ZipFile for archive " 557 + archiveName); 558 close(); 559 } 560 } finally { 561 super.finalize(); 562 } 563 } 564 565 /** 566 * Length of a "central directory" entry structure without file 567 * name, extra fields or comment. 568 */ 569 private static final int CFH_LEN = 570 /* version made by */ SHORT 571 /* version needed to extract */ + SHORT 572 /* general purpose bit flag */ + SHORT 573 /* compression method */ + SHORT 574 /* last mod file time */ + SHORT 575 /* last mod file date */ + SHORT 576 /* crc-32 */ + WORD 577 /* compressed size */ + WORD 578 /* uncompressed size */ + WORD 579 /* filename length */ + SHORT 580 /* extra field length */ + SHORT 581 /* file comment length */ + SHORT 582 /* disk number start */ + SHORT 583 /* internal file attributes */ + SHORT 584 /* external file attributes */ + WORD 585 /* relative offset of local header */ + WORD; 586 587 private static final long CFH_SIG = 588 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 589 590 /** 591 * Reads the central directory of the given archive and populates 592 * the internal tables with ZipArchiveEntry instances. 593 * 594 * <p>The ZipArchiveEntrys will know all data that can be obtained from 595 * the central directory alone, but not the data that requires the 596 * local file header or additional data to be read.</p> 597 * 598 * @return a map of zipentries that didn't have the language 599 * encoding flag set when read. 600 */ 601 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 602 throws IOException { 603 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 604 new HashMap<>(); 605 606 positionAtCentralDirectory(); 607 608 wordBbuf.rewind(); 609 IOUtils.readFully(archive, wordBbuf); 610 long sig = ZipLong.getValue(wordBuf); 611 612 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 613 throw new IOException("central directory is empty, can't expand" 614 + " corrupt archive."); 615 } 616 617 while (sig == CFH_SIG) { 618 readCentralDirectoryEntry(noUTF8Flag); 619 wordBbuf.rewind(); 620 IOUtils.readFully(archive, wordBbuf); 621 sig = ZipLong.getValue(wordBuf); 622 } 623 return noUTF8Flag; 624 } 625 626 /** 627 * Reads an individual entry of the central directory, creats an 628 * ZipArchiveEntry from it and adds it to the global maps. 629 * 630 * @param noUTF8Flag map used to collect entries that don't have 631 * their UTF-8 flag set and whose name will be set by data read 632 * from the local file header later. The current entry may be 633 * added to this map. 634 */ 635 private void 636 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 637 throws IOException { 638 cfhBbuf.rewind(); 639 IOUtils.readFully(archive, cfhBbuf); 640 int off = 0; 641 final Entry ze = new Entry(); 642 643 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 644 off += SHORT; 645 ze.setVersionMadeBy(versionMadeBy); 646 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 647 648 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 649 off += SHORT; // version required 650 651 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 652 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 653 final ZipEncoding entryEncoding = 654 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 655 ze.setGeneralPurposeBit(gpFlag); 656 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 657 658 off += SHORT; 659 660 //noinspection MagicConstant 661 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 662 off += SHORT; 663 664 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 665 ze.setTime(time); 666 off += WORD; 667 668 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 669 off += WORD; 670 671 ze.setCompressedSize(ZipLong.getValue(cfhBuf, off)); 672 off += WORD; 673 674 ze.setSize(ZipLong.getValue(cfhBuf, off)); 675 off += WORD; 676 677 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 678 off += SHORT; 679 680 final int extraLen = ZipShort.getValue(cfhBuf, off); 681 off += SHORT; 682 683 final int commentLen = ZipShort.getValue(cfhBuf, off); 684 off += SHORT; 685 686 final int diskStart = ZipShort.getValue(cfhBuf, off); 687 off += SHORT; 688 689 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 690 off += SHORT; 691 692 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 693 off += WORD; 694 695 final byte[] fileName = new byte[fileNameLen]; 696 IOUtils.readFully(archive, ByteBuffer.wrap(fileName)); 697 ze.setName(entryEncoding.decode(fileName), fileName); 698 699 // LFH offset, 700 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 701 // data offset will be filled later 702 entries.add(ze); 703 704 final byte[] cdExtraData = new byte[extraLen]; 705 IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); 706 ze.setCentralDirectoryExtra(cdExtraData); 707 708 setSizesAndOffsetFromZip64Extra(ze, diskStart); 709 710 final byte[] comment = new byte[commentLen]; 711 IOUtils.readFully(archive, ByteBuffer.wrap(comment)); 712 ze.setComment(entryEncoding.decode(comment)); 713 714 if (!hasUTF8Flag && useUnicodeExtraFields) { 715 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 716 } 717 } 718 719 /** 720 * If the entry holds a Zip64 extended information extra field, 721 * read sizes from there if the entry's sizes are set to 722 * 0xFFFFFFFFF, do the same for the offset of the local file 723 * header. 724 * 725 * <p>Ensures the Zip64 extra either knows both compressed and 726 * uncompressed size or neither of both as the internal logic in 727 * ExtraFieldUtils forces the field to create local header data 728 * even if they are never used - and here a field with only one 729 * size would be invalid.</p> 730 */ 731 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, 732 final int diskStart) 733 throws IOException { 734 final Zip64ExtendedInformationExtraField z64 = 735 (Zip64ExtendedInformationExtraField) 736 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 737 if (z64 != null) { 738 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 739 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 740 final boolean hasRelativeHeaderOffset = 741 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 742 z64.reparseCentralDirectoryData(hasUncompressedSize, 743 hasCompressedSize, 744 hasRelativeHeaderOffset, 745 diskStart == ZIP64_MAGIC_SHORT); 746 747 if (hasUncompressedSize) { 748 ze.setSize(z64.getSize().getLongValue()); 749 } else if (hasCompressedSize) { 750 z64.setSize(new ZipEightByteInteger(ze.getSize())); 751 } 752 753 if (hasCompressedSize) { 754 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 755 } else if (hasUncompressedSize) { 756 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 757 } 758 759 if (hasRelativeHeaderOffset) { 760 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 761 } 762 } 763 } 764 765 /** 766 * Length of the "End of central directory record" - which is 767 * supposed to be the last structure of the archive - without file 768 * comment. 769 */ 770 static final int MIN_EOCD_SIZE = 771 /* end of central dir signature */ WORD 772 /* number of this disk */ + SHORT 773 /* number of the disk with the */ 774 /* start of the central directory */ + SHORT 775 /* total number of entries in */ 776 /* the central dir on this disk */ + SHORT 777 /* total number of entries in */ 778 /* the central dir */ + SHORT 779 /* size of the central directory */ + WORD 780 /* offset of start of central */ 781 /* directory with respect to */ 782 /* the starting disk number */ + WORD 783 /* zipfile comment length */ + SHORT; 784 785 /** 786 * Maximum length of the "End of central directory record" with a 787 * file comment. 788 */ 789 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 790 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 791 792 /** 793 * Offset of the field that holds the location of the first 794 * central directory entry inside the "End of central directory 795 * record" relative to the start of the "End of central directory 796 * record". 797 */ 798 private static final int CFD_LOCATOR_OFFSET = 799 /* end of central dir signature */ WORD 800 /* number of this disk */ + SHORT 801 /* number of the disk with the */ 802 /* start of the central directory */ + SHORT 803 /* total number of entries in */ 804 /* the central dir on this disk */ + SHORT 805 /* total number of entries in */ 806 /* the central dir */ + SHORT 807 /* size of the central directory */ + WORD; 808 809 /** 810 * Length of the "Zip64 end of central directory locator" - which 811 * should be right in front of the "end of central directory 812 * record" if one is present at all. 813 */ 814 private static final int ZIP64_EOCDL_LENGTH = 815 /* zip64 end of central dir locator sig */ WORD 816 /* number of the disk with the start */ 817 /* start of the zip64 end of */ 818 /* central directory */ + WORD 819 /* relative offset of the zip64 */ 820 /* end of central directory record */ + DWORD 821 /* total number of disks */ + WORD; 822 823 /** 824 * Offset of the field that holds the location of the "Zip64 end 825 * of central directory record" inside the "Zip64 end of central 826 * directory locator" relative to the start of the "Zip64 end of 827 * central directory locator". 828 */ 829 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 830 /* zip64 end of central dir locator sig */ WORD 831 /* number of the disk with the start */ 832 /* start of the zip64 end of */ 833 /* central directory */ + WORD; 834 835 /** 836 * Offset of the field that holds the location of the first 837 * central directory entry inside the "Zip64 end of central 838 * directory record" relative to the start of the "Zip64 end of 839 * central directory record". 840 */ 841 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 842 /* zip64 end of central dir */ 843 /* signature */ WORD 844 /* size of zip64 end of central */ 845 /* directory record */ + DWORD 846 /* version made by */ + SHORT 847 /* version needed to extract */ + SHORT 848 /* number of this disk */ + WORD 849 /* number of the disk with the */ 850 /* start of the central directory */ + WORD 851 /* total number of entries in the */ 852 /* central directory on this disk */ + DWORD 853 /* total number of entries in the */ 854 /* central directory */ + DWORD 855 /* size of the central directory */ + DWORD; 856 857 /** 858 * Searches for either the "Zip64 end of central directory 859 * locator" or the "End of central dir record", parses 860 * it and positions the stream at the first central directory 861 * record. 862 */ 863 private void positionAtCentralDirectory() 864 throws IOException { 865 positionAtEndOfCentralDirectoryRecord(); 866 boolean found = false; 867 final boolean searchedForZip64EOCD = 868 archive.position() > ZIP64_EOCDL_LENGTH; 869 if (searchedForZip64EOCD) { 870 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 871 wordBbuf.rewind(); 872 IOUtils.readFully(archive, wordBbuf); 873 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 874 wordBuf); 875 } 876 if (!found) { 877 // not a ZIP64 archive 878 if (searchedForZip64EOCD) { 879 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 880 } 881 positionAtCentralDirectory32(); 882 } else { 883 positionAtCentralDirectory64(); 884 } 885 } 886 887 /** 888 * Parses the "Zip64 end of central directory locator", 889 * finds the "Zip64 end of central directory record" using the 890 * parsed information, parses that and positions the stream at the 891 * first central directory record. 892 * 893 * Expects stream to be positioned right behind the "Zip64 894 * end of central directory locator"'s signature. 895 */ 896 private void positionAtCentralDirectory64() 897 throws IOException { 898 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 899 - WORD /* signature has already been read */); 900 dwordBbuf.rewind(); 901 IOUtils.readFully(archive, dwordBbuf); 902 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 903 wordBbuf.rewind(); 904 IOUtils.readFully(archive, wordBbuf); 905 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 906 throw new ZipException("archive's ZIP64 end of central " 907 + "directory locator is corrupt."); 908 } 909 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 910 - WORD /* signature has already been read */); 911 dwordBbuf.rewind(); 912 IOUtils.readFully(archive, dwordBbuf); 913 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 914 } 915 916 /** 917 * Parses the "End of central dir record" and positions 918 * the stream at the first central directory record. 919 * 920 * Expects stream to be positioned at the beginning of the 921 * "End of central dir record". 922 */ 923 private void positionAtCentralDirectory32() 924 throws IOException { 925 skipBytes(CFD_LOCATOR_OFFSET); 926 wordBbuf.rewind(); 927 IOUtils.readFully(archive, wordBbuf); 928 archive.position(ZipLong.getValue(wordBuf)); 929 } 930 931 /** 932 * Searches for the and positions the stream at the start of the 933 * "End of central dir record". 934 */ 935 private void positionAtEndOfCentralDirectoryRecord() 936 throws IOException { 937 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 938 ZipArchiveOutputStream.EOCD_SIG); 939 if (!found) { 940 throw new ZipException("archive is not a ZIP archive"); 941 } 942 } 943 944 /** 945 * Searches the archive backwards from minDistance to maxDistance 946 * for the given signature, positions the RandomaccessFile right 947 * at the signature if it has been found. 948 */ 949 private boolean tryToLocateSignature(final long minDistanceFromEnd, 950 final long maxDistanceFromEnd, 951 final byte[] sig) throws IOException { 952 boolean found = false; 953 long off = archive.size() - minDistanceFromEnd; 954 final long stopSearching = 955 Math.max(0L, archive.size() - maxDistanceFromEnd); 956 if (off >= 0) { 957 for (; off >= stopSearching; off--) { 958 archive.position(off); 959 try { 960 wordBbuf.rewind(); 961 IOUtils.readFully(archive, wordBbuf); 962 wordBbuf.flip(); 963 } catch (EOFException ex) { 964 break; 965 } 966 int curr = wordBbuf.get(); 967 if (curr == sig[POS_0]) { 968 curr = wordBbuf.get(); 969 if (curr == sig[POS_1]) { 970 curr = wordBbuf.get(); 971 if (curr == sig[POS_2]) { 972 curr = wordBbuf.get(); 973 if (curr == sig[POS_3]) { 974 found = true; 975 break; 976 } 977 } 978 } 979 } 980 } 981 } 982 if (found) { 983 archive.position(off); 984 } 985 return found; 986 } 987 988 /** 989 * Skips the given number of bytes or throws an EOFException if 990 * skipping failed. 991 */ 992 private void skipBytes(final int count) throws IOException { 993 long currentPosition = archive.position(); 994 long newPosition = currentPosition + count; 995 if (newPosition > archive.size()) { 996 throw new EOFException(); 997 } 998 archive.position(newPosition); 999 } 1000 1001 /** 1002 * Number of bytes in local file header up to the "length of 1003 * filename" entry. 1004 */ 1005 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1006 /* local file header signature */ WORD 1007 /* version needed to extract */ + SHORT 1008 /* general purpose bit flag */ + SHORT 1009 /* compression method */ + SHORT 1010 /* last mod file time */ + SHORT 1011 /* last mod file date */ + SHORT 1012 /* crc-32 */ + WORD 1013 /* compressed size */ + WORD 1014 /* uncompressed size */ + (long) WORD; 1015 1016 /** 1017 * Walks through all recorded entries and adds the data available 1018 * from the local file header. 1019 * 1020 * <p>Also records the offsets for the data to read from the 1021 * entries.</p> 1022 */ 1023 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1024 entriesWithoutUTF8Flag) 1025 throws IOException { 1026 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1027 // entries is filled in populateFromCentralDirectory and 1028 // never modified 1029 final Entry ze = (Entry) zipArchiveEntry; 1030 final long offset = ze.getLocalHeaderOffset(); 1031 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1032 wordBbuf.rewind(); 1033 IOUtils.readFully(archive, wordBbuf); 1034 wordBbuf.flip(); 1035 wordBbuf.get(shortBuf); 1036 final int fileNameLen = ZipShort.getValue(shortBuf); 1037 wordBbuf.get(shortBuf); 1038 final int extraFieldLen = ZipShort.getValue(shortBuf); 1039 skipBytes(fileNameLen); 1040 final byte[] localExtraData = new byte[extraFieldLen]; 1041 IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); 1042 ze.setExtra(localExtraData); 1043 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1044 + SHORT + SHORT + fileNameLen + extraFieldLen); 1045 ze.setStreamContiguous(true); 1046 1047 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1048 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1049 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1050 nc.comment); 1051 } 1052 1053 final String name = ze.getName(); 1054 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 1055 if (entriesOfThatName == null) { 1056 entriesOfThatName = new LinkedList<>(); 1057 nameMap.put(name, entriesOfThatName); 1058 } 1059 entriesOfThatName.addLast(ze); 1060 } 1061 } 1062 1063 /** 1064 * Checks whether the archive starts with a LFH. If it doesn't, 1065 * it may be an empty archive. 1066 */ 1067 private boolean startsWithLocalFileHeader() throws IOException { 1068 archive.position(0); 1069 wordBbuf.rewind(); 1070 IOUtils.readFully(archive, wordBbuf); 1071 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1072 } 1073 1074 /** 1075 * Creates new BoundedInputStream, according to implementation of 1076 * underlying archive channel. 1077 */ 1078 private BoundedInputStream createBoundedInputStream(long start, long remaining) { 1079 return archive instanceof FileChannel ? 1080 new BoundedFileChannelInputStream(start, remaining) : 1081 new BoundedInputStream(start, remaining); 1082 } 1083 1084 /** 1085 * InputStream that delegates requests to the underlying 1086 * SeekableByteChannel, making sure that only bytes from a certain 1087 * range can be read. 1088 */ 1089 private class BoundedInputStream extends InputStream { 1090 private ByteBuffer singleByteBuffer; 1091 private final long end; 1092 private long loc; 1093 private boolean addDummy = false; 1094 1095 BoundedInputStream(final long start, final long remaining) { 1096 this.end = start+remaining; 1097 if (this.end < start) { 1098 // check for potential vulnerability due to overflow 1099 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining); 1100 } 1101 loc = start; 1102 } 1103 1104 @Override 1105 public synchronized int read() throws IOException { 1106 if (loc >= end) { 1107 if (loc == end && addDummy) { 1108 addDummy = false; 1109 return 0; 1110 } 1111 return -1; 1112 } 1113 if (singleByteBuffer == null) { 1114 singleByteBuffer = ByteBuffer.allocate(1); 1115 } 1116 else { 1117 singleByteBuffer.rewind(); 1118 } 1119 int read = read(loc, singleByteBuffer); 1120 if (read < 0) { 1121 return read; 1122 } 1123 loc++; 1124 return singleByteBuffer.get() & 0xff; 1125 } 1126 1127 @Override 1128 public synchronized int read(final byte[] b, final int off, int len) throws IOException { 1129 if (len <= 0) { 1130 return 0; 1131 } 1132 1133 if (len > end-loc) { 1134 if (loc >= end) { 1135 if (loc == end && addDummy) { 1136 addDummy = false; 1137 b[off] = 0; 1138 return 1; 1139 } 1140 return -1; 1141 } 1142 len = (int)(end-loc); 1143 } 1144 1145 ByteBuffer buf; 1146 buf = ByteBuffer.wrap(b, off, len); 1147 int ret = read(loc, buf); 1148 if (ret > 0) { 1149 loc += ret; 1150 return ret; 1151 } 1152 return ret; 1153 } 1154 1155 protected int read(long pos, ByteBuffer buf) throws IOException { 1156 int read; 1157 synchronized (archive) { 1158 archive.position(pos); 1159 read = archive.read(buf); 1160 } 1161 buf.flip(); 1162 return read; 1163 } 1164 1165 synchronized void addDummy() { 1166 this.addDummy = true; 1167 } 1168 } 1169 1170 /** 1171 * Lock-free implementation of BoundedInputStream. The 1172 * implementation uses positioned reads on the underlying archive 1173 * file channel and therefore performs significantly faster in 1174 * concurrent environment. 1175 */ 1176 private class BoundedFileChannelInputStream extends BoundedInputStream { 1177 private final FileChannel archive; 1178 1179 BoundedFileChannelInputStream(final long start, final long remaining) { 1180 super(start, remaining); 1181 archive = (FileChannel)ZipFile.this.archive; 1182 } 1183 1184 @Override 1185 protected int read(long pos, ByteBuffer buf) throws IOException { 1186 int read = archive.read(buf, pos); 1187 buf.flip(); 1188 return read; 1189 } 1190 } 1191 1192 private static final class NameAndComment { 1193 private final byte[] name; 1194 private final byte[] comment; 1195 private NameAndComment(final byte[] name, final byte[] comment) { 1196 this.name = name; 1197 this.comment = comment; 1198 } 1199 } 1200 1201 /** 1202 * Compares two ZipArchiveEntries based on their offset within the archive. 1203 * 1204 * <p>Won't return any meaningful results if one of the entries 1205 * isn't part of the archive at all.</p> 1206 * 1207 * @since 1.1 1208 */ 1209 private final Comparator<ZipArchiveEntry> offsetComparator = 1210 new Comparator<ZipArchiveEntry>() { 1211 @Override 1212 public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) { 1213 if (e1 == e2) { 1214 return 0; 1215 } 1216 1217 final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1218 final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1219 if (ent1 == null) { 1220 return 1; 1221 } 1222 if (ent2 == null) { 1223 return -1; 1224 } 1225 final long val = (ent1.getLocalHeaderOffset() 1226 - ent2.getLocalHeaderOffset()); 1227 return val == 0 ? 0 : val < 0 ? -1 : +1; 1228 } 1229 }; 1230 1231 /** 1232 * Extends ZipArchiveEntry to store the offset within the archive. 1233 */ 1234 private static class Entry extends ZipArchiveEntry { 1235 1236 Entry() { 1237 } 1238 1239 @Override 1240 public int hashCode() { 1241 return 3 * super.hashCode() 1242 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1243 } 1244 1245 @Override 1246 public boolean equals(final Object other) { 1247 if (super.equals(other)) { 1248 // super.equals would return false if other were not an Entry 1249 final Entry otherEntry = (Entry) other; 1250 return getLocalHeaderOffset() 1251 == otherEntry.getLocalHeaderOffset() 1252 && getDataOffset() 1253 == otherEntry.getDataOffset(); 1254 } 1255 return false; 1256 } 1257 } 1258}