001/*
002 * Copyright 2015-2017 UnboundID Corp.
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2015-2017 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.util.json;
022
023
024
025import com.unboundid.util.ByteStringBuffer;
026import com.unboundid.util.NotMutable;
027import com.unboundid.util.StaticUtils;
028import com.unboundid.util.ThreadSafety;
029import com.unboundid.util.ThreadSafetyLevel;
030
031
032
033/**
034 * This class provides an implementation of a JSON value that represents a
035 * string of Unicode characters.  The string representation of a JSON string
036 * must start and end with the double quotation mark character, and a Unicode
037 * (preferably UTF-8) representation of the string between the quotes.  The
038 * following special characters must be escaped:
039 * <UL>
040 *   <LI>
041 *     The double quotation mark (Unicode character U+0022) must be escaped as
042 *     either {@code \"} or {@code \}{@code u0022}.
043 *   </LI>
044 *   <LI>
045 *     The backslash (Unicode character U+005C) must be escaped as either
046 *     {@code \\} or {@code \}{@code u005C}.
047 *   </LI>
048 *   <LI>
049 *     All ASCII control characters (Unicode characters U+0000 through U+001F)
050 *     must be escaped.  They can all be escaped by prefixing the
051 *     four-hexadecimal-digit Unicode character code with {@code \}{@code u},
052 *     like {@code \}{@code u0000} to represent the ASCII null character U+0000.
053 *     For certain characters, a more user-friendly escape sequence is also
054 *     defined:
055 *     <UL>
056 *       <LI>
057 *         The horizontal tab character can be escaped as either {@code \t} or
058 *         {@code \}{@code u0009}.
059 *       </LI>
060 *       <LI>
061 *         The newline character can be escaped as either {@code \n} or
062 *         {@code \}{@code u000A}.
063 *       </LI>
064 *       <LI>
065 *         The formfeed character can be escaped as either {@code \f} or
066 *         {@code \}{@code u000C}.
067 *       </LI>
068 *       <LI>
069 *         The carriage return character can be escaped as either {@code \r} or
070 *         {@code \}{@code u000D}.
071 *       </LI>
072 *     </UL>
073 *   </LI>
074 * </UL>
075 * In addition, any other character may optionally be escaped by placing the
076 * {@code \}{@code u} prefix in front of each four-hexadecimal digit sequence in
077 * the UTF-16 representation of that character.  For example, the "LATIN SMALL
078 * LETTER N WITH TILDE" character U+00F1 may be escaped as
079 * {@code \}{@code u00F1}, while the "MUSICAL SYMBOL G CLEF" character U+1D11E
080 * may be escaped as {@code \}{@code uD834}{@code \}{@code uDD1E}.  And while
081 * the forward slash character is not required to be escaped in JSON strings, it
082 * can be escaped using {@code \/} as a more human-readable alternative to
083 * {@code \}{@code u002F}.
084 * <BR><BR>
085 * The string provided to the {@link #JSONString(String)} constructor should not
086 * have any escaping performed, and the string returned by the
087 * {@link #stringValue()} method will not have any escaping performed.  These
088 * methods work with the Java string that is represented by the JSON string.
089 * <BR><BR>
090 * If this JSON string was parsed from the string representation of a JSON
091 * object, then the value returned by the {@link #toString()} method (or
092 * appended to the buffer provided to the {@link #toString(StringBuilder)}
093 * method) will be the string representation used in the JSON object that was
094 * parsed.  Otherwise, this class will generate an appropriate string
095 * representation, which will be surrounded by quotation marks and will have the
096 * minimal required encoding applied.
097 * <BR><BR>
098 * The string returned by the {@link #toNormalizedString()} method (or appended
099 * to the buffer provided to the {@link #toNormalizedString(StringBuilder)}
100 * method) will be generated by converting it to lowercase, surrounding it with
101 * quotation marks, and using the {@code \}{@code u}-style escaping for all
102 * characters other than the following (as contained in the LDAP printable
103 * character set defined in <A HREF="http://www.ietf.org/rfc/rfc4517.txt">RFC
104 * 4517</A> section 3.2, and indicated by the
105 * {@link StaticUtils#isPrintable(char)} method):
106 * <UL>
107 *   <LI>All uppercase ASCII alphabetic letters (U+0041 through U+005A).</LI>
108 *   <LI>All lowercase ASCII alphabetic letters (U+0061 through U+007A).</LI>
109 *   <LI>All ASCII numeric digits (U+0030 through U+0039).</LI>
110 *   <LI>The ASCII space character U+0020.</LI>
111 *   <LI>The ASCII single quote (aka apostrophe) character U+0027.</LI>
112 *   <LI>The ASCII left parenthesis character U+0028.</LI>
113 *   <LI>The ASCII right parenthesis character U+0029.</LI>
114 *   <LI>The ASCII plus sign character U+002B.</LI>
115 *   <LI>The ASCII comma character U+002C.</LI>
116 *   <LI>The ASCII minus sign (aka hyphen) character U+002D.</LI>
117 *   <LI>The ASCII period character U+002E.</LI>
118 *   <LI>The ASCII forward slash character U+002F.</LI>
119 *   <LI>The ASCII colon character U+003A.</LI>
120 *   <LI>The ASCII equals sign character U+003D.</LI>
121 *   <LI>The ASCII question mark character U+003F.</LI>
122 * </UL>
123 */
124@NotMutable()
125@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
126public final class JSONString
127       extends JSONValue
128{
129  /**
130   * The serial version UID for this serializable class.
131   */
132  private static final long serialVersionUID = -4677194657299153890L;
133
134
135
136  // The JSON-formatted string representation for this JSON string.  It will be
137  // surrounded by quotation marks and any necessary escaping will have been
138  // performed.
139  private String jsonStringRepresentation;
140
141  // The string value for this object.
142  private final String value;
143
144
145
146  /**
147   * Creates a new JSON string.
148   *
149   * @param  value  The string to represent in this JSON value.  It must not be
150   *                {@code null}.
151   */
152  public JSONString(final String value)
153  {
154    this.value = value;
155    jsonStringRepresentation = null;
156  }
157
158
159
160  /**
161   * Creates a new JSON string.  This method should be used for strings parsed
162   * from the string representation of a JSON object.
163   *
164   * @param  javaString  The Java string to represent.
165   * @param  jsonString  The JSON string representation to use for the Java
166   *                     string.
167   */
168  JSONString(final String javaString, final String jsonString)
169  {
170    value = javaString;
171    jsonStringRepresentation = jsonString;
172  }
173
174
175
176  /**
177   * Retrieves the string value for this object.  This will be the interpreted
178   * value, without the surrounding quotation marks or escaping.
179   *
180   * @return  The string value for this object.
181   */
182  public String stringValue()
183  {
184    return value;
185  }
186
187
188
189  /**
190   * {@inheritDoc}
191   */
192  @Override()
193  public int hashCode()
194  {
195    return stringValue().hashCode();
196  }
197
198
199
200  /**
201   * {@inheritDoc}
202   */
203  @Override()
204  public boolean equals(final Object o)
205  {
206    if (o == this)
207    {
208      return true;
209    }
210
211    if (o instanceof JSONString)
212    {
213      final JSONString s = (JSONString) o;
214      return value.equals(s.value);
215    }
216
217    return false;
218  }
219
220
221
222  /**
223   * Indicates whether the value of this JSON string matches that of the
224   * provided string, optionally ignoring differences in capitalization.
225   *
226   * @param  s           The JSON string to compare against this JSON string.
227   *                     It must not be {@code null}.
228   * @param  ignoreCase  Indicates whether to ignore differences in
229   *                     capitalization.
230   *
231   * @return  {@code true} if the value of this JSON string matches the value of
232   *          the provided string (optionally ignoring differences in
233   *          capitalization), or {@code false} if not.
234   */
235  public boolean equals(final JSONString s, final boolean ignoreCase)
236  {
237    if (ignoreCase)
238    {
239      return value.equalsIgnoreCase(s.value);
240    }
241    else
242    {
243      return value.equals(s.value);
244    }
245  }
246
247
248
249  /**
250   * {@inheritDoc}
251   */
252  @Override()
253  public boolean equals(final JSONValue v, final boolean ignoreFieldNameCase,
254                        final boolean ignoreValueCase,
255                        final boolean ignoreArrayOrder)
256  {
257    return ((v instanceof JSONString) &&
258         equals((JSONString) v, ignoreValueCase));
259  }
260
261
262
263  /**
264   * Retrieves a string representation of this JSON string as it should appear
265   * in a JSON object, including the surrounding quotation marks and any
266   * appropriate escaping  To obtain the string to which this value refers
267   * without the surrounding quotation marks or escaping, use the
268   * {@link #stringValue()} method.
269   * <BR><BR>
270   * If the object containing this string was decoded from a string, then this
271   * method will use the same string representation as in that original object.
272   * Otherwise, the string representation will be constructed.
273   *
274   * @return  A string representation of this value as it should appear in a
275   *          JSON object.
276   */
277  @Override()
278  public String toString()
279  {
280    if (jsonStringRepresentation == null)
281    {
282      final StringBuilder buffer = new StringBuilder();
283      toString(buffer);
284      jsonStringRepresentation = buffer.toString();
285    }
286
287    return jsonStringRepresentation;
288  }
289
290
291
292  /**
293   * Appends a string representation of this JSON string as it should appear
294   * in a JSON object, including the surrounding quotation marks and any
295   * appropriate escaping, to the provided buffer.  To obtain the string to
296   * which this value refers without the surrounding quotation marks or
297   * escaping, use the {@link #stringValue()} method.
298   * <BR><BR>
299   * If the object containing this string was decoded from a string, then this
300   * method will use the same string representation as in that original object.
301   * Otherwise, the string representation will be constructed.
302   *
303   * @param  buffer  The buffer to which the information should be appended.
304   */
305  @Override()
306  public void toString(final StringBuilder buffer)
307  {
308    if (jsonStringRepresentation != null)
309    {
310      buffer.append(jsonStringRepresentation);
311    }
312    else
313    {
314      final boolean emptyBufferProvided = (buffer.length() == 0);
315      encodeString(value, buffer);
316
317      if (emptyBufferProvided)
318      {
319        jsonStringRepresentation = buffer.toString();
320      }
321    }
322  }
323
324
325
326  /**
327   * Retrieves a single-line representation of this JSON string as it should
328   * appear in a JSON object, including the surrounding quotation marks and any
329   * appropriate escaping.  To obtain the string to which this value refers
330   * without the surrounding quotation marks or escaping, use the
331   * {@link #stringValue()} method.
332   *
333   * @return  A single-line representation of this value as it should appear in
334   *          a JSON object.
335   */
336  @Override()
337  public String toSingleLineString()
338  {
339    return toString();
340  }
341
342
343
344  /**
345   * Appends a single-line string representation of this JSON string as it
346   * should appear in a JSON object, including the surrounding quotation marks
347   * and any appropriate escaping, to the provided buffer.  To obtain the string
348   * to which this value refers without the surrounding quotation marks or
349   * escaping, use the {@link #stringValue()} method.
350   *
351   * @param  buffer  The buffer to which the information should be appended.
352   */
353  @Override()
354  public void toSingleLineString(final StringBuilder buffer)
355  {
356    toString(buffer);
357  }
358
359
360
361  /**
362   * Appends a minimally-escaped JSON representation of the provided string to
363   * the given buffer.  When escaping is required, the most user-friendly form
364   * of escaping will be used.
365   *
366   * @param  s       The string to be encoded.
367   * @param  buffer  The buffer to which the encoded representation should be
368   *                 appended.
369   */
370  static void encodeString(final String s, final StringBuilder buffer)
371  {
372    buffer.append('"');
373
374    for (final char c : s.toCharArray())
375    {
376      switch (c)
377      {
378        case '"':
379          buffer.append("\\\"");
380          break;
381        case '\\':
382          buffer.append("\\\\");
383          break;
384        case '\b': // backspace
385          buffer.append("\\b");
386          break;
387        case '\f': // formfeed
388          buffer.append("\\f");
389          break;
390        case '\n': // newline
391          buffer.append("\\n");
392          break;
393        case '\r': // carriage return
394          buffer.append("\\r");
395          break;
396        case '\t': // horizontal tab
397          buffer.append("\\t");
398          break;
399        default:
400          if (c <= '\u001F')
401          {
402            buffer.append("\\u");
403            buffer.append(String.format("%04X", (int) c));
404          }
405          else
406          {
407            buffer.append(c);
408          }
409          break;
410      }
411    }
412
413    buffer.append('"');
414  }
415
416
417
418  /**
419   * Appends a minimally-escaped JSON representation of the provided string to
420   * the given buffer.  When escaping is required, the most user-friendly form
421   * of escaping will be used.
422   *
423   * @param  s       The string to be encoded.
424   * @param  buffer  The buffer to which the encoded representation should be
425   *                 appended.
426   */
427  static void encodeString(final String s, final ByteStringBuffer buffer)
428  {
429    buffer.append('"');
430
431    for (final char c : s.toCharArray())
432    {
433      switch (c)
434      {
435        case '"':
436          buffer.append("\\\"");
437          break;
438        case '\\':
439          buffer.append("\\\\");
440          break;
441        case '\b': // backspace
442          buffer.append("\\b");
443          break;
444        case '\f': // formfeed
445          buffer.append("\\f");
446          break;
447        case '\n': // newline
448          buffer.append("\\n");
449          break;
450        case '\r': // carriage return
451          buffer.append("\\r");
452          break;
453        case '\t': // horizontal tab
454          buffer.append("\\t");
455          break;
456        default:
457          if (c <= '\u001F')
458          {
459            buffer.append("\\u");
460            buffer.append(String.format("%04X", (int) c));
461          }
462          else
463          {
464            buffer.append(c);
465          }
466          break;
467      }
468    }
469
470    buffer.append('"');
471  }
472
473
474
475  /**
476   * Retrieves a normalized representation of this JSON string as it should
477   * appear in a JSON object, including the surrounding quotes and any
478   * appropriate escaping.  The normalized representation will use the unescaped
479   * ASCII representation of all of the following characters:
480   * <UL>
481   *   <LI>The letters a through z (ASCII character codes 0x61 through
482   *       0x7A).</LI>
483   *   <LI>The digits 0 through 9 (ASCII character codes 0x30 through
484   *       0x39).</LI>
485   *   <LI>The space (ASCII character code 0x20).</LI>
486   *   <LI>The single quote (ASCII character code 0x27).</LI>
487   *   <LI>The left parenthesis (ASCII character code 0x28).</LI>
488   *   <LI>The right parenthesis (ASCII character code 0x29).</LI>
489   *   <LI>The plus sign (ASCII character code 0x2B).</LI>
490   *   <LI>The comma (ASCII character code 0x2C).</LI>
491   *   <LI>The hyphen (ASCII character code 0x2D).</LI>
492   *   <LI>The period (ASCII character code 0x2E).</LI>
493   *   <LI>The forward slash (ASCII character code 0x2F).</LI>
494   *   <LI>The colon (ASCII character code 0x3A).</LI>
495   *   <LI>The equal sign (ASCII character code 0x3D).</LI>
496   *   <LI>The question mark (ASCII character code 0x3F).</LI>
497   * </UL>
498   * All characters except those listed above will be escaped using their
499   * Unicode representation.
500   *
501   * @return  A normalized representation of this JSON string as it should
502   *          appear in a JSON object, including
503   */
504  @Override()
505  public String toNormalizedString()
506  {
507    final StringBuilder buffer = new StringBuilder();
508    toNormalizedString(buffer);
509    return buffer.toString();
510  }
511
512
513
514  /**
515   * Appends a normalized representation of this JSON string as it should
516   * appear in a JSON object, including the surrounding quotes and any
517   * appropriate escaping, to the provided buffer.  The normalized
518   * representation will use the unescaped ASCII representation of all of the
519   * following characters:
520   * <UL>
521   *   <LI>The letters a through z (ASCII character codes 0x61 through
522   *       0x7A).</LI>
523   *   <LI>The digits 0 through 9 (ASCII character codes 0x30 through
524   *       0x39).</LI>
525   *   <LI>The space (ASCII character code 0x20).</LI>
526   *   <LI>The single quote (ASCII character code 0x27).</LI>
527   *   <LI>The left parenthesis (ASCII character code 0x28).</LI>
528   *   <LI>The right parenthesis (ASCII character code 0x29).</LI>
529   *   <LI>The plus sign (ASCII character code 0x2B).</LI>
530   *   <LI>The comma (ASCII character code 0x2C).</LI>
531   *   <LI>The hyphen (ASCII character code 0x2D).</LI>
532   *   <LI>The period (ASCII character code 0x2E).</LI>
533   *   <LI>The forward slash (ASCII character code 0x2F).</LI>
534   *   <LI>The colon (ASCII character code 0x3A).</LI>
535   *   <LI>The equal sign (ASCII character code 0x3D).</LI>
536   *   <LI>The question mark (ASCII character code 0x3F).</LI>
537   * </UL>
538   * All characters except those listed above will be escaped using their
539   * Unicode representation.
540   *
541   * @param  buffer  The buffer to which the information should be appended.
542   */
543  @Override()
544  public void toNormalizedString(final StringBuilder buffer)
545  {
546    buffer.append('"');
547
548    for (final char c : value.toLowerCase().toCharArray())
549    {
550      if (StaticUtils.isPrintable(c))
551      {
552        buffer.append(c);
553      }
554      else
555      {
556        buffer.append("\\u");
557        buffer.append(String.format("%04X", (int) c));
558      }
559    }
560
561    buffer.append('"');
562  }
563
564
565
566  /**
567   * {@inheritDoc}
568   */
569  @Override()
570  public void appendToJSONBuffer(final JSONBuffer buffer)
571  {
572    buffer.appendString(value);
573  }
574
575
576
577  /**
578   * {@inheritDoc}
579   */
580  @Override()
581  public void appendToJSONBuffer(final String fieldName,
582                                 final JSONBuffer buffer)
583  {
584    buffer.appendString(fieldName, value);
585  }
586}