001/*
002 * Copyright 2012-2017 UnboundID Corp.
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2012-2017 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.util;
022
023
024
025import java.io.IOException;
026import java.text.ParseException;
027
028import static com.unboundid.util.UtilityMessages.*;
029import static com.unboundid.util.Validator.*;
030
031
032
033/**
034 * This class provides methods for encoding and decoding data in base32 as
035 * defined in <A HREF="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</A>.  It
036 * provides a somewhat compact way of representing binary data using only
037 * printable characters (a subset of ASCII letters and numeric digits selected
038 * to avoid ambiguity, like confusion between the number 1 and the uppercase
039 * letter I, and between the number 0 and the uppercase letter O).  It uses a
040 * five-bit encoding mechanism in which every five bytes of raw data is
041 * converted into eight bytes of base32-encoded data.
042 * <BR><BR>
043 * <H2>Example</H2>
044 * The following examples demonstrate the process for base32-encoding raw data,
045 * and for decoding a string containing base32-encoded data back to the raw
046 * data used to create it:
047 * <PRE>
048 * // Base32-encode some raw data:
049 * String base32String = Base32.encode(rawDataBytes);
050 *
051 * // Decode a base32 string back to raw data:
052 * byte[] decodedRawDataBytes;
053 * try
054 * {
055 *   decodedRawDataBytes = Base32.decode(base32String);
056 * }
057 * catch (ParseException pe)
058 * {
059 *   // The string did not represent a valid base32 encoding.
060 *   decodedRawDataBytes = null;
061 * }
062 * </PRE>
063 */
064@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
065public final class Base32
066{
067  /**
068   * The set of characters in the base32 alphabet.
069   */
070  private static final char[] BASE32_ALPHABET =
071       ("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567").toCharArray();
072
073
074
075  /**
076   * Prevent this class from being instantiated.
077   */
078  private Base32()
079  {
080    // No implementation is required.
081  }
082
083
084
085  /**
086   * Encodes the UTF-8 representation of the provided string in base32 format.
087   *
088   * @param  data  The raw data to be encoded.  It must not be {@code null}.
089   *
090   * @return  The base32-encoded representation of the provided data.
091   */
092  public static String encode(final String data)
093  {
094    ensureNotNull(data);
095
096    return encode(StaticUtils.getBytes(data));
097  }
098
099
100
101  /**
102   * Encodes the provided data in base32 format.
103   *
104   * @param  data  The raw data to be encoded.  It must not be {@code null}.
105   *
106   * @return  The base32-encoded representation of the provided data.
107   */
108  public static String encode(final byte[] data)
109  {
110    ensureNotNull(data);
111
112    final StringBuilder buffer = new StringBuilder(4*data.length/3+1);
113    encodeInternal(data, 0, data.length, buffer);
114    return buffer.toString();
115  }
116
117
118
119  /**
120   * Appends a base32-encoded version of the contents of the provided buffer
121   * (using a UTF-8 representation) to the given buffer.
122   *
123   * @param  data    The raw data to be encoded.  It must not be {@code null}.
124   * @param  buffer  The buffer to which the base32-encoded data is to be
125   *                 written.
126   */
127  public static void encode(final String data, final StringBuilder buffer)
128  {
129    ensureNotNull(data);
130
131    encode(StaticUtils.getBytes(data), buffer);
132  }
133
134
135
136  /**
137   * Appends a base32-encoded version of the contents of the provided buffer
138   * (using a UTF-8 representation) to the given buffer.
139   *
140   * @param  data    The raw data to be encoded.  It must not be {@code null}.
141   * @param  buffer  The buffer to which the base32-encoded data is to be
142   *                 written.
143   */
144  public static void encode(final String data, final ByteStringBuffer buffer)
145  {
146    ensureNotNull(data);
147
148    encode(StaticUtils.getBytes(data), buffer);
149  }
150
151
152
153  /**
154   * Appends a base32-encoded representation of the provided data to the given
155   * buffer.
156   *
157   * @param  data    The raw data to be encoded.  It must not be {@code null}.
158   * @param  buffer  The buffer to which the base32-encoded data is to be
159   *                 written.
160   */
161  public static void encode(final byte[] data, final StringBuilder buffer)
162  {
163    encodeInternal(data, 0, data.length, buffer);
164  }
165
166
167
168  /**
169   * Appends a base32-encoded representation of the provided data to the given
170   * buffer.
171   *
172   * @param  data    The array containing the raw data to be encoded.  It must
173   *                 not be {@code null}.
174   * @param  off     The offset in the array at which the data to encode begins.
175   * @param  length  The number of bytes to be encoded.
176   * @param  buffer  The buffer to which the base32-encoded data is to be
177   *                 written.
178   */
179  public static void encode(final byte[] data, final int off, final int length,
180                            final StringBuilder buffer)
181  {
182    encodeInternal(data, off, length, buffer);
183  }
184
185
186
187  /**
188   * Appends a base32-encoded representation of the provided data to the given
189   * buffer.
190   *
191   * @param  data    The raw data to be encoded.  It must not be {@code null}.
192   * @param  buffer  The buffer to which the base32-encoded data is to be
193   *                 written.
194   */
195  public static void encode(final byte[] data, final ByteStringBuffer buffer)
196  {
197    encodeInternal(data, 0, data.length, buffer);
198  }
199
200
201
202  /**
203   * Appends a base32-encoded representation of the provided data to the given
204   * buffer.
205   *
206   * @param  data    The raw data to be encoded.  It must not be {@code null}.
207   * @param  off     The offset in the array at which the data to encode begins.
208   * @param  length  The number of bytes to be encoded.
209   * @param  buffer  The buffer to which the base32-encoded data is to be
210   *                 written.
211   */
212  public static void encode(final byte[] data, final int off, final int length,
213                            final ByteStringBuffer buffer)
214  {
215    encodeInternal(data, off, length, buffer);
216  }
217
218
219
220  /**
221   * Appends a base32-encoded representation of the provided data to the given
222   * buffer.
223   *
224   * @param  data    The raw data to be encoded.  It must not be {@code null}.
225   * @param  off     The offset in the array at which the data to encode begins.
226   * @param  length  The number of bytes to be encoded.
227   * @param  buffer  The buffer to which the base32-encoded data is to be
228   *                 written.
229   */
230  private static void encodeInternal(final byte[] data, final int off,
231                                     final int length, final Appendable buffer)
232  {
233    ensureNotNull(data);
234    ensureTrue(data.length >= off);
235    ensureTrue(data.length >= (off+length));
236
237    if (length == 0)
238    {
239      return;
240    }
241
242    try
243    {
244      int pos = off;
245      for (int i=0; i < (length / 5); i++)
246      {
247        final long longValue =
248             (((data[pos++] & 0xFFL) << 32) |
249              ((data[pos++] & 0xFFL) << 24) |
250              ((data[pos++] & 0xFFL) << 16) |
251              ((data[pos++] & 0xFFL) << 8) |
252               (data[pos++] & 0xFFL));
253
254        buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
255        buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
256        buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
257        buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
258        buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
259        buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
260        buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
261        buffer.append(BASE32_ALPHABET[(int) (longValue & 0x1FL)]);
262      }
263
264      switch ((off+length) - pos)
265      {
266        case 1:
267          long longValue = ((data[pos] & 0xFFL) << 32);
268          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
269          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
270          buffer.append("======");
271          return;
272
273        case 2:
274          longValue = (((data[pos++] & 0xFFL) << 32) |
275                       ((data[pos] & 0xFFL) << 24));
276          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
277          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
278          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
279          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
280          buffer.append("====");
281          return;
282
283        case 3:
284          longValue = (((data[pos++] & 0xFFL) << 32) |
285                       ((data[pos++] & 0xFFL) << 24) |
286                       ((data[pos] & 0xFFL) << 16));
287          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
288          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
289          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
290          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
291          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
292          buffer.append("===");
293          return;
294
295        case 4:
296          longValue = (((data[pos++] & 0xFFL) << 32) |
297                       ((data[pos++] & 0xFFL) << 24) |
298                       ((data[pos++] & 0xFFL) << 16) |
299                       ((data[pos] & 0xFFL) << 8));
300          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
301          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
302          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
303          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
304          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
305          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
306          buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
307          buffer.append("=");
308          return;
309      }
310    }
311    catch (final IOException ioe)
312    {
313      Debug.debugException(ioe);
314
315      // This should never happen.
316      throw new RuntimeException(ioe.getMessage(), ioe);
317    }
318  }
319
320
321
322  /**
323   * Decodes the contents of the provided base32-encoded string.
324   *
325   * @param  data  The base32-encoded string to decode.  It must not be
326   *               {@code null}.
327   *
328   * @return  A byte array containing the decoded data.
329   *
330   * @throws  ParseException  If the contents of the provided string cannot be
331   *                          parsed as base32-encoded data.
332   */
333  public static byte[] decode(final String data)
334         throws ParseException
335  {
336    ensureNotNull(data);
337
338    final int length = data.length();
339    if (length == 0)
340    {
341      return new byte[0];
342    }
343
344    if ((length % 8) != 0)
345    {
346      throw new ParseException(ERR_BASE32_DECODE_INVALID_LENGTH.get(), length);
347    }
348
349    final ByteStringBuffer buffer = new ByteStringBuffer(5 * (length / 8));
350
351    int stringPos = 0;
352    while (stringPos < length)
353    {
354      long longValue = 0x00;
355      for (int i=0; i < 8; i++)
356      {
357        longValue <<= 5;
358        switch (data.charAt(stringPos++))
359        {
360          case 'A':
361          case 'a':
362            longValue |= 0x00L;
363            break;
364          case 'B':
365          case 'b':
366            longValue |= 0x01L;
367            break;
368          case 'C':
369          case 'c':
370            longValue |= 0x02L;
371            break;
372          case 'D':
373          case 'd':
374            longValue |= 0x03L;
375            break;
376          case 'E':
377          case 'e':
378            longValue |= 0x04L;
379            break;
380          case 'F':
381          case 'f':
382            longValue |= 0x05L;
383            break;
384          case 'G':
385          case 'g':
386            longValue |= 0x06L;
387            break;
388          case 'H':
389          case 'h':
390            longValue |= 0x07L;
391            break;
392          case 'I':
393          case 'i':
394            longValue |= 0x08L;
395            break;
396          case 'J':
397          case 'j':
398            longValue |= 0x09L;
399            break;
400          case 'K':
401          case 'k':
402            longValue |= 0x0AL;
403            break;
404          case 'L':
405          case 'l':
406            longValue |= 0x0BL;
407            break;
408          case 'M':
409          case 'm':
410            longValue |= 0x0CL;
411            break;
412          case 'N':
413          case 'n':
414            longValue |= 0x0DL;
415            break;
416          case 'O':
417          case 'o':
418            longValue |= 0x0EL;
419            break;
420          case 'P':
421          case 'p':
422            longValue |= 0x0FL;
423            break;
424          case 'Q':
425          case 'q':
426            longValue |= 0x10L;
427            break;
428          case 'R':
429          case 'r':
430            longValue |= 0x11L;
431            break;
432          case 'S':
433          case 's':
434            longValue |= 0x12L;
435            break;
436          case 'T':
437          case 't':
438            longValue |= 0x13L;
439            break;
440          case 'U':
441          case 'u':
442            longValue |= 0x14L;
443            break;
444          case 'V':
445          case 'v':
446            longValue |= 0x15L;
447            break;
448          case 'W':
449          case 'w':
450            longValue |= 0x16L;
451            break;
452          case 'X':
453          case 'x':
454            longValue |= 0x17L;
455            break;
456          case 'Y':
457          case 'y':
458            longValue |= 0x18L;
459            break;
460          case 'Z':
461          case 'z':
462            longValue |= 0x19L;
463            break;
464          case '2':
465            longValue |= 0x1AL;
466            break;
467          case '3':
468            longValue |= 0x1BL;
469            break;
470          case '4':
471            longValue |= 0x1CL;
472            break;
473          case '5':
474            longValue |= 0x1DL;
475            break;
476          case '6':
477            longValue |= 0x1EL;
478            break;
479          case '7':
480            longValue |= 0x1FL;
481            break;
482
483          case '=':
484            switch (length - stringPos)
485            {
486              case 0:
487                // The string ended with a single equal sign, so there are
488                // four bytes left.
489                buffer.append((byte) ((longValue >> 32) & 0xFFL));
490                buffer.append((byte) ((longValue >> 24) & 0xFFL));
491                buffer.append((byte) ((longValue >> 16) & 0xFFL));
492                buffer.append((byte) ((longValue >> 8) & 0xFFL));
493                return buffer.toByteArray();
494
495              case 2:
496                // The string ended with three equal signs, so there are three
497                // bytes left.
498                longValue <<= 10;
499                buffer.append((byte) ((longValue >> 32) & 0xFFL));
500                buffer.append((byte) ((longValue >> 24) & 0xFFL));
501                buffer.append((byte) ((longValue >> 16) & 0xFFL));
502                return buffer.toByteArray();
503
504              case 3:
505                // The string ended with four equal signs, so there are two
506                // bytes left.
507                longValue <<= 15;
508                buffer.append((byte) ((longValue >> 32) & 0xFFL));
509                buffer.append((byte) ((longValue >> 24) & 0xFFL));
510                return buffer.toByteArray();
511
512              case 5:
513                // The string ended with six equal signs, so there is one byte
514                // left.
515                longValue <<= 25;
516                buffer.append((byte) ((longValue >> 32) & 0xFFL));
517                return buffer.toByteArray();
518
519              default:
520                throw new ParseException(
521                     ERR_BASE32_DECODE_UNEXPECTED_EQUAL.get((stringPos-1)),
522                     (stringPos-1));
523            }
524
525          default:
526            throw new ParseException(
527                 ERR_BASE32_DECODE_UNEXPECTED_CHAR.get(
528                      data.charAt(stringPos-1)),
529                 (stringPos-1));
530        }
531      }
532
533      buffer.append((byte) ((longValue >> 32) & 0xFFL));
534      buffer.append((byte) ((longValue >> 24) & 0xFFL));
535      buffer.append((byte) ((longValue >> 16) & 0xFFL));
536      buffer.append((byte) ((longValue >> 8) & 0xFFL));
537      buffer.append((byte) (longValue & 0xFFL));
538    }
539
540    return buffer.toByteArray();
541  }
542
543
544
545  /**
546   * Decodes the contents of the provided base32-encoded string to a string
547   * containing the raw data using the UTF-8 encoding.
548   *
549   * @param  data  The base32-encoded string to decode.  It must not be
550   *               {@code null}.
551   *
552   * @return  A string containing the decoded data.
553   *
554   * @throws  ParseException  If the contents of the provided string cannot be
555   *                          parsed as base32-encoded data using the UTF-8
556   *                          encoding.
557   */
558  public static String decodeToString(final String data)
559         throws ParseException
560  {
561    ensureNotNull(data);
562
563    final byte[] decodedBytes = decode(data);
564    return StaticUtils.toUTF8String(decodedBytes);
565  }
566}