001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.InputStreamReader; 007import java.io.PushbackInputStream; 008import java.io.UnsupportedEncodingException; 009 010/** 011 * Detects the different UTF encodings from byte order mark. 012 * @since 3372 013 */ 014public final class UTFInputStreamReader extends InputStreamReader { 015 016 private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException { 017 super(in, cs); 018 } 019 020 /** 021 * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding. 022 * @param input input stream 023 * @return A reader with the correct encoding. Starts to read after the BOM. 024 * @throws IOException if any I/O error occurs 025 * @see #create(java.io.InputStream, String) 026 */ 027 public static UTFInputStreamReader create(InputStream input) throws IOException { 028 return create(input, "UTF-8"); 029 } 030 031 /** 032 * Creates a new {@link InputStreamReader} from the {@link InputStream}. 033 * @param input input stream 034 * @param defaultEncoding Used, when no BOM was recognized. Can be null. 035 * @return A reader with the correct encoding. Starts to read after the BOM. 036 * @throws IOException if any I/O error occurs 037 */ 038 public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException { 039 byte[] bom = new byte[4]; 040 String encoding = defaultEncoding; 041 int unread; 042 PushbackInputStream pushbackStream = new PushbackInputStream(input, 4); 043 int n = pushbackStream.read(bom, 0, 4); 044 045 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { 046 encoding = "UTF-8"; 047 unread = n - 3; 048 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { 049 encoding = "UTF-32BE"; 050 unread = n - 4; 051 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { 052 encoding = "UTF-32LE"; 053 unread = n - 4; 054 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { 055 encoding = "UTF-16BE"; 056 unread = n - 2; 057 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { 058 encoding = "UTF-16LE"; 059 unread = n - 2; 060 } else { 061 unread = n; 062 } 063 064 if (unread > 0) { 065 pushbackStream.unread(bom, n - unread, unread); 066 } else if (unread < -1) { 067 pushbackStream.unread(bom, 0, 0); 068 } 069 070 if (encoding == null) { 071 encoding = "UTF-8"; 072 } 073 return new UTFInputStreamReader(pushbackStream, encoding); 074 } 075}