001/* 002 * Copyright 2015-2017 UnboundID Corp. 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2015-2017 UnboundID Corp. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.util.json; 022 023 024 025import com.unboundid.util.ByteStringBuffer; 026import com.unboundid.util.NotMutable; 027import com.unboundid.util.StaticUtils; 028import com.unboundid.util.ThreadSafety; 029import com.unboundid.util.ThreadSafetyLevel; 030 031 032 033/** 034 * This class provides an implementation of a JSON value that represents a 035 * string of Unicode characters. The string representation of a JSON string 036 * must start and end with the double quotation mark character, and a Unicode 037 * (preferably UTF-8) representation of the string between the quotes. The 038 * following special characters must be escaped: 039 * <UL> 040 * <LI> 041 * The double quotation mark (Unicode character U+0022) must be escaped as 042 * either {@code \"} or {@code \}{@code u0022}. 043 * </LI> 044 * <LI> 045 * The backslash (Unicode character U+005C) must be escaped as either 046 * {@code \\} or {@code \}{@code u005C}. 047 * </LI> 048 * <LI> 049 * All ASCII control characters (Unicode characters U+0000 through U+001F) 050 * must be escaped. They can all be escaped by prefixing the 051 * four-hexadecimal-digit Unicode character code with {@code \}{@code u}, 052 * like {@code \}{@code u0000} to represent the ASCII null character U+0000. 053 * For certain characters, a more user-friendly escape sequence is also 054 * defined: 055 * <UL> 056 * <LI> 057 * The horizontal tab character can be escaped as either {@code \t} or 058 * {@code \}{@code u0009}. 059 * </LI> 060 * <LI> 061 * The newline character can be escaped as either {@code \n} or 062 * {@code \}{@code u000A}. 063 * </LI> 064 * <LI> 065 * The formfeed character can be escaped as either {@code \f} or 066 * {@code \}{@code u000C}. 067 * </LI> 068 * <LI> 069 * The carriage return character can be escaped as either {@code \r} or 070 * {@code \}{@code u000D}. 071 * </LI> 072 * </UL> 073 * </LI> 074 * </UL> 075 * In addition, any other character may optionally be escaped by placing the 076 * {@code \}{@code u} prefix in front of each four-hexadecimal digit sequence in 077 * the UTF-16 representation of that character. For example, the "LATIN SMALL 078 * LETTER N WITH TILDE" character U+00F1 may be escaped as 079 * {@code \}{@code u00F1}, while the "MUSICAL SYMBOL G CLEF" character U+1D11E 080 * may be escaped as {@code \}{@code uD834}{@code \}{@code uDD1E}. And while 081 * the forward slash character is not required to be escaped in JSON strings, it 082 * can be escaped using {@code \/} as a more human-readable alternative to 083 * {@code \}{@code u002F}. 084 * <BR><BR> 085 * The string provided to the {@link #JSONString(String)} constructor should not 086 * have any escaping performed, and the string returned by the 087 * {@link #stringValue()} method will not have any escaping performed. These 088 * methods work with the Java string that is represented by the JSON string. 089 * <BR><BR> 090 * If this JSON string was parsed from the string representation of a JSON 091 * object, then the value returned by the {@link #toString()} method (or 092 * appended to the buffer provided to the {@link #toString(StringBuilder)} 093 * method) will be the string representation used in the JSON object that was 094 * parsed. Otherwise, this class will generate an appropriate string 095 * representation, which will be surrounded by quotation marks and will have the 096 * minimal required encoding applied. 097 * <BR><BR> 098 * The string returned by the {@link #toNormalizedString()} method (or appended 099 * to the buffer provided to the {@link #toNormalizedString(StringBuilder)} 100 * method) will be generated by converting it to lowercase, surrounding it with 101 * quotation marks, and using the {@code \}{@code u}-style escaping for all 102 * characters other than the following (as contained in the LDAP printable 103 * character set defined in <A HREF="http://www.ietf.org/rfc/rfc4517.txt">RFC 104 * 4517</A> section 3.2, and indicated by the 105 * {@link StaticUtils#isPrintable(char)} method): 106 * <UL> 107 * <LI>All uppercase ASCII alphabetic letters (U+0041 through U+005A).</LI> 108 * <LI>All lowercase ASCII alphabetic letters (U+0061 through U+007A).</LI> 109 * <LI>All ASCII numeric digits (U+0030 through U+0039).</LI> 110 * <LI>The ASCII space character U+0020.</LI> 111 * <LI>The ASCII single quote (aka apostrophe) character U+0027.</LI> 112 * <LI>The ASCII left parenthesis character U+0028.</LI> 113 * <LI>The ASCII right parenthesis character U+0029.</LI> 114 * <LI>The ASCII plus sign character U+002B.</LI> 115 * <LI>The ASCII comma character U+002C.</LI> 116 * <LI>The ASCII minus sign (aka hyphen) character U+002D.</LI> 117 * <LI>The ASCII period character U+002E.</LI> 118 * <LI>The ASCII forward slash character U+002F.</LI> 119 * <LI>The ASCII colon character U+003A.</LI> 120 * <LI>The ASCII equals sign character U+003D.</LI> 121 * <LI>The ASCII question mark character U+003F.</LI> 122 * </UL> 123 */ 124@NotMutable() 125@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE) 126public final class JSONString 127 extends JSONValue 128{ 129 /** 130 * The serial version UID for this serializable class. 131 */ 132 private static final long serialVersionUID = -4677194657299153890L; 133 134 135 136 // The JSON-formatted string representation for this JSON string. It will be 137 // surrounded by quotation marks and any necessary escaping will have been 138 // performed. 139 private String jsonStringRepresentation; 140 141 // The string value for this object. 142 private final String value; 143 144 145 146 /** 147 * Creates a new JSON string. 148 * 149 * @param value The string to represent in this JSON value. It must not be 150 * {@code null}. 151 */ 152 public JSONString(final String value) 153 { 154 this.value = value; 155 jsonStringRepresentation = null; 156 } 157 158 159 160 /** 161 * Creates a new JSON string. This method should be used for strings parsed 162 * from the string representation of a JSON object. 163 * 164 * @param javaString The Java string to represent. 165 * @param jsonString The JSON string representation to use for the Java 166 * string. 167 */ 168 JSONString(final String javaString, final String jsonString) 169 { 170 value = javaString; 171 jsonStringRepresentation = jsonString; 172 } 173 174 175 176 /** 177 * Retrieves the string value for this object. This will be the interpreted 178 * value, without the surrounding quotation marks or escaping. 179 * 180 * @return The string value for this object. 181 */ 182 public String stringValue() 183 { 184 return value; 185 } 186 187 188 189 /** 190 * {@inheritDoc} 191 */ 192 @Override() 193 public int hashCode() 194 { 195 return stringValue().hashCode(); 196 } 197 198 199 200 /** 201 * {@inheritDoc} 202 */ 203 @Override() 204 public boolean equals(final Object o) 205 { 206 if (o == this) 207 { 208 return true; 209 } 210 211 if (o instanceof JSONString) 212 { 213 final JSONString s = (JSONString) o; 214 return value.equals(s.value); 215 } 216 217 return false; 218 } 219 220 221 222 /** 223 * Indicates whether the value of this JSON string matches that of the 224 * provided string, optionally ignoring differences in capitalization. 225 * 226 * @param s The JSON string to compare against this JSON string. 227 * It must not be {@code null}. 228 * @param ignoreCase Indicates whether to ignore differences in 229 * capitalization. 230 * 231 * @return {@code true} if the value of this JSON string matches the value of 232 * the provided string (optionally ignoring differences in 233 * capitalization), or {@code false} if not. 234 */ 235 public boolean equals(final JSONString s, final boolean ignoreCase) 236 { 237 if (ignoreCase) 238 { 239 return value.equalsIgnoreCase(s.value); 240 } 241 else 242 { 243 return value.equals(s.value); 244 } 245 } 246 247 248 249 /** 250 * {@inheritDoc} 251 */ 252 @Override() 253 public boolean equals(final JSONValue v, final boolean ignoreFieldNameCase, 254 final boolean ignoreValueCase, 255 final boolean ignoreArrayOrder) 256 { 257 return ((v instanceof JSONString) && 258 equals((JSONString) v, ignoreValueCase)); 259 } 260 261 262 263 /** 264 * Retrieves a string representation of this JSON string as it should appear 265 * in a JSON object, including the surrounding quotation marks and any 266 * appropriate escaping To obtain the string to which this value refers 267 * without the surrounding quotation marks or escaping, use the 268 * {@link #stringValue()} method. 269 * <BR><BR> 270 * If the object containing this string was decoded from a string, then this 271 * method will use the same string representation as in that original object. 272 * Otherwise, the string representation will be constructed. 273 * 274 * @return A string representation of this value as it should appear in a 275 * JSON object. 276 */ 277 @Override() 278 public String toString() 279 { 280 if (jsonStringRepresentation == null) 281 { 282 final StringBuilder buffer = new StringBuilder(); 283 toString(buffer); 284 jsonStringRepresentation = buffer.toString(); 285 } 286 287 return jsonStringRepresentation; 288 } 289 290 291 292 /** 293 * Appends a string representation of this JSON string as it should appear 294 * in a JSON object, including the surrounding quotation marks and any 295 * appropriate escaping, to the provided buffer. To obtain the string to 296 * which this value refers without the surrounding quotation marks or 297 * escaping, use the {@link #stringValue()} method. 298 * <BR><BR> 299 * If the object containing this string was decoded from a string, then this 300 * method will use the same string representation as in that original object. 301 * Otherwise, the string representation will be constructed. 302 * 303 * @param buffer The buffer to which the information should be appended. 304 */ 305 @Override() 306 public void toString(final StringBuilder buffer) 307 { 308 if (jsonStringRepresentation != null) 309 { 310 buffer.append(jsonStringRepresentation); 311 } 312 else 313 { 314 final boolean emptyBufferProvided = (buffer.length() == 0); 315 encodeString(value, buffer); 316 317 if (emptyBufferProvided) 318 { 319 jsonStringRepresentation = buffer.toString(); 320 } 321 } 322 } 323 324 325 326 /** 327 * Retrieves a single-line representation of this JSON string as it should 328 * appear in a JSON object, including the surrounding quotation marks and any 329 * appropriate escaping. To obtain the string to which this value refers 330 * without the surrounding quotation marks or escaping, use the 331 * {@link #stringValue()} method. 332 * 333 * @return A single-line representation of this value as it should appear in 334 * a JSON object. 335 */ 336 @Override() 337 public String toSingleLineString() 338 { 339 return toString(); 340 } 341 342 343 344 /** 345 * Appends a single-line string representation of this JSON string as it 346 * should appear in a JSON object, including the surrounding quotation marks 347 * and any appropriate escaping, to the provided buffer. To obtain the string 348 * to which this value refers without the surrounding quotation marks or 349 * escaping, use the {@link #stringValue()} method. 350 * 351 * @param buffer The buffer to which the information should be appended. 352 */ 353 @Override() 354 public void toSingleLineString(final StringBuilder buffer) 355 { 356 toString(buffer); 357 } 358 359 360 361 /** 362 * Appends a minimally-escaped JSON representation of the provided string to 363 * the given buffer. When escaping is required, the most user-friendly form 364 * of escaping will be used. 365 * 366 * @param s The string to be encoded. 367 * @param buffer The buffer to which the encoded representation should be 368 * appended. 369 */ 370 static void encodeString(final String s, final StringBuilder buffer) 371 { 372 buffer.append('"'); 373 374 for (final char c : s.toCharArray()) 375 { 376 switch (c) 377 { 378 case '"': 379 buffer.append("\\\""); 380 break; 381 case '\\': 382 buffer.append("\\\\"); 383 break; 384 case '\b': // backspace 385 buffer.append("\\b"); 386 break; 387 case '\f': // formfeed 388 buffer.append("\\f"); 389 break; 390 case '\n': // newline 391 buffer.append("\\n"); 392 break; 393 case '\r': // carriage return 394 buffer.append("\\r"); 395 break; 396 case '\t': // horizontal tab 397 buffer.append("\\t"); 398 break; 399 default: 400 if (c <= '\u001F') 401 { 402 buffer.append("\\u"); 403 buffer.append(String.format("%04X", (int) c)); 404 } 405 else 406 { 407 buffer.append(c); 408 } 409 break; 410 } 411 } 412 413 buffer.append('"'); 414 } 415 416 417 418 /** 419 * Appends a minimally-escaped JSON representation of the provided string to 420 * the given buffer. When escaping is required, the most user-friendly form 421 * of escaping will be used. 422 * 423 * @param s The string to be encoded. 424 * @param buffer The buffer to which the encoded representation should be 425 * appended. 426 */ 427 static void encodeString(final String s, final ByteStringBuffer buffer) 428 { 429 buffer.append('"'); 430 431 for (final char c : s.toCharArray()) 432 { 433 switch (c) 434 { 435 case '"': 436 buffer.append("\\\""); 437 break; 438 case '\\': 439 buffer.append("\\\\"); 440 break; 441 case '\b': // backspace 442 buffer.append("\\b"); 443 break; 444 case '\f': // formfeed 445 buffer.append("\\f"); 446 break; 447 case '\n': // newline 448 buffer.append("\\n"); 449 break; 450 case '\r': // carriage return 451 buffer.append("\\r"); 452 break; 453 case '\t': // horizontal tab 454 buffer.append("\\t"); 455 break; 456 default: 457 if (c <= '\u001F') 458 { 459 buffer.append("\\u"); 460 buffer.append(String.format("%04X", (int) c)); 461 } 462 else 463 { 464 buffer.append(c); 465 } 466 break; 467 } 468 } 469 470 buffer.append('"'); 471 } 472 473 474 475 /** 476 * Retrieves a normalized representation of this JSON string as it should 477 * appear in a JSON object, including the surrounding quotes and any 478 * appropriate escaping. The normalized representation will use the unescaped 479 * ASCII representation of all of the following characters: 480 * <UL> 481 * <LI>The letters a through z (ASCII character codes 0x61 through 482 * 0x7A).</LI> 483 * <LI>The digits 0 through 9 (ASCII character codes 0x30 through 484 * 0x39).</LI> 485 * <LI>The space (ASCII character code 0x20).</LI> 486 * <LI>The single quote (ASCII character code 0x27).</LI> 487 * <LI>The left parenthesis (ASCII character code 0x28).</LI> 488 * <LI>The right parenthesis (ASCII character code 0x29).</LI> 489 * <LI>The plus sign (ASCII character code 0x2B).</LI> 490 * <LI>The comma (ASCII character code 0x2C).</LI> 491 * <LI>The hyphen (ASCII character code 0x2D).</LI> 492 * <LI>The period (ASCII character code 0x2E).</LI> 493 * <LI>The forward slash (ASCII character code 0x2F).</LI> 494 * <LI>The colon (ASCII character code 0x3A).</LI> 495 * <LI>The equal sign (ASCII character code 0x3D).</LI> 496 * <LI>The question mark (ASCII character code 0x3F).</LI> 497 * </UL> 498 * All characters except those listed above will be escaped using their 499 * Unicode representation. 500 * 501 * @return A normalized representation of this JSON string as it should 502 * appear in a JSON object, including 503 */ 504 @Override() 505 public String toNormalizedString() 506 { 507 final StringBuilder buffer = new StringBuilder(); 508 toNormalizedString(buffer); 509 return buffer.toString(); 510 } 511 512 513 514 /** 515 * Appends a normalized representation of this JSON string as it should 516 * appear in a JSON object, including the surrounding quotes and any 517 * appropriate escaping, to the provided buffer. The normalized 518 * representation will use the unescaped ASCII representation of all of the 519 * following characters: 520 * <UL> 521 * <LI>The letters a through z (ASCII character codes 0x61 through 522 * 0x7A).</LI> 523 * <LI>The digits 0 through 9 (ASCII character codes 0x30 through 524 * 0x39).</LI> 525 * <LI>The space (ASCII character code 0x20).</LI> 526 * <LI>The single quote (ASCII character code 0x27).</LI> 527 * <LI>The left parenthesis (ASCII character code 0x28).</LI> 528 * <LI>The right parenthesis (ASCII character code 0x29).</LI> 529 * <LI>The plus sign (ASCII character code 0x2B).</LI> 530 * <LI>The comma (ASCII character code 0x2C).</LI> 531 * <LI>The hyphen (ASCII character code 0x2D).</LI> 532 * <LI>The period (ASCII character code 0x2E).</LI> 533 * <LI>The forward slash (ASCII character code 0x2F).</LI> 534 * <LI>The colon (ASCII character code 0x3A).</LI> 535 * <LI>The equal sign (ASCII character code 0x3D).</LI> 536 * <LI>The question mark (ASCII character code 0x3F).</LI> 537 * </UL> 538 * All characters except those listed above will be escaped using their 539 * Unicode representation. 540 * 541 * @param buffer The buffer to which the information should be appended. 542 */ 543 @Override() 544 public void toNormalizedString(final StringBuilder buffer) 545 { 546 buffer.append('"'); 547 548 for (final char c : value.toLowerCase().toCharArray()) 549 { 550 if (StaticUtils.isPrintable(c)) 551 { 552 buffer.append(c); 553 } 554 else 555 { 556 buffer.append("\\u"); 557 buffer.append(String.format("%04X", (int) c)); 558 } 559 } 560 561 buffer.append('"'); 562 } 563 564 565 566 /** 567 * {@inheritDoc} 568 */ 569 @Override() 570 public void appendToJSONBuffer(final JSONBuffer buffer) 571 { 572 buffer.appendString(value); 573 } 574 575 576 577 /** 578 * {@inheritDoc} 579 */ 580 @Override() 581 public void appendToJSONBuffer(final String fieldName, 582 final JSONBuffer buffer) 583 { 584 buffer.appendString(fieldName, value); 585 } 586}