001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.jexl2.parser; 018 019 /** 020 * Common constant strings utilities. 021 * <p> 022 * This package methods read JEXL string literals and handle escaping through the 023 * 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single 024 * and double quotes) and read Unicode hexadecimal encoded characters. 025 * </p> 026 * <p> 027 * The only escapable characters are the single and double quotes - ''' and '"' -, 028 * a Unicode sequence starting with 'u' followed by 4 hexadecimals and 029 * the backslash character - '\' - itself. 030 * </p> 031 * <p> 032 * A sequence where '\' occurs before any non-escapable character or sequence has no effect, the 033 * sequence output being the same as the input. 034 * </p> 035 */ 036 public class StringParser { 037 /** Default constructor. */ 038 public StringParser() {} 039 040 /** 041 * Builds a string, handles escaping through '\' syntax. 042 * @param str the string to build from 043 * @param eatsep whether the separator, the first character, should be considered 044 * @return the built string 045 */ 046 public static String buildString(CharSequence str, boolean eatsep) { 047 StringBuilder strb = new StringBuilder(str.length()); 048 char sep = eatsep ? str.charAt(0) : 0; 049 int end = str.length() - (eatsep ? 1 : 0); 050 int begin = (eatsep ? 1 : 0); 051 read(strb, str, begin, end, sep); 052 return strb.toString(); 053 } 054 055 /** 056 * Read the remainder of a string till a given separator, 057 * handles escaping through '\' syntax. 058 * @param strb the destination buffer to copy characters into 059 * @param str the origin 060 * @param index the offset into the origin 061 * @param sep the separator, single or double quote, marking end of string 062 * @return the offset in origin 063 */ 064 public static int readString(StringBuilder strb, CharSequence str, int index, char sep) { 065 return read(strb, str, index, str.length(), sep); 066 } 067 068 /** The length of an escaped unicode sequence. */ 069 private static final int UCHAR_LEN = 4; 070 071 /** 072 * Read the remainder of a string till a given separator, 073 * handles escaping through '\' syntax. 074 * @param strb the destination buffer to copy characters into 075 * @param str the origin 076 * @param begin the relative offset in str to begin reading 077 * @param end the relative offset in str to end reading 078 * @param sep the separator, single or double quote, marking end of string 079 * @return the last character offset handled in origin 080 */ 081 private static int read(StringBuilder strb, CharSequence str, int begin, int end, char sep) { 082 boolean escape = false; 083 int index = begin; 084 for (; index < end; ++index) { 085 char c = str.charAt(index); 086 if (escape) { 087 if (c == 'u' && (index + UCHAR_LEN) < end && readUnicodeChar(strb, str, index + 1) > 0) { 088 index += UCHAR_LEN; 089 } else { 090 // if c is not an escapable character, re-emmit the backslash before it 091 boolean notSeparator = sep == 0? c != '\'' && c != '"' : c != sep; 092 if (notSeparator && c != '\\') { 093 strb.append('\\'); 094 } 095 strb.append(c); 096 } 097 escape = false; 098 continue; 099 } 100 if (c == '\\') { 101 escape = true; 102 continue; 103 } 104 strb.append(c); 105 if (c == sep) { 106 break; 107 } 108 } 109 return index; 110 } 111 112 /** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */ 113 private static final int SHIFT = 12; 114 /** The base 10 offset used to convert hexa characters to decimal. */ 115 private static final int BASE10 = 10; 116 /** 117 * Reads a Unicode escape character. 118 * @param strb the builder to write the character to 119 * @param str the sequence 120 * @param begin the begin offset in sequence (after the '\\u') 121 * @return 0 if char could not be read, 4 otherwise 122 */ 123 private static int readUnicodeChar(StringBuilder strb, CharSequence str, int begin) { 124 char xc = 0; 125 int bits = SHIFT; 126 int value = 0; 127 for(int offset = 0; offset < UCHAR_LEN; ++offset) { 128 char c = str.charAt(begin + offset); 129 if (c >= '0' && c <= '9') { 130 value = (c - '0'); 131 } else if (c >= 'a' && c <= 'h') { 132 value = (c - 'a' + BASE10); 133 } else if (c >= 'A' && c <= 'H') { 134 value = (c - 'A' + BASE10); 135 } else { 136 return 0; 137 } 138 xc |= value << bits; 139 bits -= UCHAR_LEN; 140 } 141 strb.append(xc); 142 return UCHAR_LEN; 143 } 144 145 /** The last 7bits ascii character. */ 146 private static final char LAST_ASCII = 127; 147 148 /** 149 * Escapes a String representation, expand non-ASCII characters as Unicode escape sequence. 150 * @param str the string to escape 151 * @return the escaped representation 152 */ 153 public static String escapeString(String str) { 154 if (str == null) { 155 return null; 156 } 157 final int length = str.length(); 158 StringBuilder strb = new StringBuilder(length + 2); 159 strb.append('\''); 160 for (int i = 0; i < length; ++i) { 161 char c = str.charAt(i); 162 if (c < LAST_ASCII) { 163 if (c == '\'') { 164 // escape quote 165 strb.append('\\'); 166 strb.append('\''); 167 } else if (c == '\\') { 168 // escape backslash 169 strb.append('\\'); 170 strb.append('\\'); 171 } else { 172 strb.append(c); 173 } 174 } else { 175 // convert to Unicode escape sequence 176 strb.append('\\'); 177 strb.append('u'); 178 String hex = Integer.toHexString(c); 179 for (int h = hex.length(); h < UCHAR_LEN; ++h) { 180 strb.append('0'); 181 } 182 strb.append(hex); 183 } 184 } 185 strb.append('\''); 186 return strb.toString(); 187 } 188 }