cprover
string_constraint_generator_format.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for the Java format function
4 
5 Author: Romain Brenguier
6 
7 Date: May 2017
8 
9 \*******************************************************************/
10 
13 
14 #include <iomanip>
15 #include <string>
16 #include <regex>
17 #include <vector>
18 
19 #include <util/std_expr.h>
20 #include <util/unicode.h>
21 
23 
24 // Format specifier describes how a value should be printed.
26 {
27 public:
28  // Constants describing the meaning of characters in format specifiers.
29  static const char DECIMAL_INTEGER ='d';
30  static const char OCTAL_INTEGER ='o';
31  static const char HEXADECIMAL_INTEGER ='x';
32  static const char HEXADECIMAL_INTEGER_UPPER='X';
33  static const char SCIENTIFIC ='e';
34  static const char SCIENTIFIC_UPPER ='E';
35  static const char GENERAL ='g';
36  static const char GENERAL_UPPER ='G';
37  static const char DECIMAL_FLOAT ='f';
38  static const char HEXADECIMAL_FLOAT ='a';
39  static const char HEXADECIMAL_FLOAT_UPPER ='A';
40  static const char CHARACTER ='c';
41  static const char CHARACTER_UPPER ='C';
42  static const char DATE_TIME ='t';
43  static const char DATE_TIME_UPPER ='T';
44  static const char BOOLEAN ='b';
45  static const char BOOLEAN_UPPER ='B';
46  static const char STRING ='s';
47  static const char STRING_UPPER ='S';
48  static const char HASHCODE ='h';
49  static const char HASHCODE_UPPER ='H';
50  static const char LINE_SEPARATOR ='n';
51  static const char PERCENT_SIGN ='%';
52 
53  int index=-1;
54  std::string flag;
55  int width;
56  int precision;
57  bool dt=false;
58  char conversion;
59 
61  int _index,
62  std::string _flag,
63  int _width,
64  int _precision,
65  bool _dt,
66  char c):
67  index(_index),
68  flag(_flag),
69  width(_width),
70  precision(_precision),
71  dt(_dt),
72  conversion(c)
73  { }
74 };
75 
76 // Format text represent a constant part of a format string.
78 {
79 public:
80  explicit format_textt(std::string _content): content(_content) { }
81 
83 
84  std::string get_content() const
85  {
86  return content;
87  }
88 
89 private:
90  std::string content;
91 };
92 
93 // A format element is either a specifier or text.
95 {
96 public:
97  typedef enum {SPECIFIER, TEXT} format_typet;
98 
99  explicit format_elementt(format_typet _type): type(_type), fstring("")
100  {
101  }
102 
103  explicit format_elementt(std::string s): type(TEXT), fstring(s)
104  {
105  }
106 
108  type(SPECIFIER),
109  fstring("")
110  {
111  fspec.push_back(fs);
112  }
113 
114  bool is_format_specifier() const
115  {
116  return type==SPECIFIER;
117  }
118 
119  bool is_format_text() const
120  {
121  return type==TEXT;
122  }
123 
125  {
127  return fspec.back();
128  }
129 
131  {
133  return fstring;
134  }
135 
137  {
139  return fstring;
140  }
141 
142 private:
145  std::vector<string_constraint_generatort::format_specifiert> fspec;
146 };
147 
148 #if 0
149 // This code is deactivated as it is not used for now, but ultimalety this
150 // should be used to throw an exception when the format string is not correct
155 static bool check_format_string(std::string s)
156 {
157  std::string format_specifier=
158  "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";
159  std::regex regex(format_specifier);
160  std::smatch match;
161 
162  while(std::regex_search(s, match, regex))
163  {
164  if(match.position()!= 0)
165  for(const auto &c : match.str())
166  if(c=='%')
167  return false;
168  s=match.suffix();
169  }
170 
171  for(const auto &c : s)
172  if(c=='%')
173  return false;
174 
175  return true;
176 }
177 #endif
178 
188 {
189  int index=m[1].str().empty()?-1:std::stoi(m[1].str());
190  std::string flag=m[2].str().empty()?"":m[2].str();
191  int width=m[3].str().empty()?-1:std::stoi(m[3].str());
192  int precision=m[4].str().empty()?-1:std::stoi(m[4].str());
193  std::string tT=m[5].str();
194 
195  bool dt=(tT!="");
196  if(tT=="T")
197  flag.push_back(
199 
200  INVARIANT(
201  m[6].str().length()==1, "format conversion should be one character");
202  char conversion=m[6].str()[0];
203 
205  index, flag, width, precision, dt, conversion);
206 }
207 
213 static std::vector<format_elementt> parse_format_string(std::string s)
214 {
215  std::string format_specifier=
216  "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";
217  std::regex regex(format_specifier);
218  std::vector<format_elementt> al;
219  std::smatch match;
220 
221  while(std::regex_search(s, match, regex))
222  {
223  if(match.position()!=0)
224  {
225  std::string pre_match=s.substr(0, match.position());
226  al.emplace_back(pre_match);
227  }
228 
229  al.emplace_back(format_specifier_of_match(match));
230  s=match.suffix();
231  }
232 
233  al.emplace_back(s);
234  return al;
235 }
236 
242  const struct_exprt &expr, irep_idt component_name)
243 {
244  const struct_typet &type=to_struct_type(expr.type());
245  std::size_t number=type.component_number(component_name);
246  return expr.operands()[number];
247 }
248 
261  const format_specifiert &fs,
262  const struct_exprt &arg,
263  const typet &index_type,
264  const typet &char_type)
265 {
267  exprt return_code;
268  switch(fs.conversion)
269  {
271  return_code =
273  return res;
275  return_code =
277  return res;
280  res, get_component_in_struct(arg, ID_float));
281  return res;
284  return res;
286  return_code =
287  add_axioms_from_char(res, get_component_in_struct(arg, ID_char));
288  return res;
290  return_code =
291  add_axioms_from_bool(res, get_component_in_struct(arg, ID_boolean));
292  return res;
294  return get_string_expr(get_component_in_struct(arg, "string_expr"));
296  return_code = add_axioms_for_string_of_int(
297  res, get_component_in_struct(arg, "hashcode"));
298  return res;
300  // TODO: the constant should depend on the system: System.lineSeparator()
301  return_code = add_axioms_for_constant(res, "\n");
302  return res;
304  return_code = add_axioms_for_constant(res, "%");
305  return res;
314  {
316  fs_lower.conversion=tolower(fs.conversion);
317  const array_string_exprt lower_case =
319  add_axioms_for_to_upper_case(res, lower_case);
320  return res;
321  }
330  // For all these unimplemented cases we return a non-deterministic string
331  message.warning() << "unimplemented format specifier: " << fs.conversion
332  << message.eom;
334  default:
335  message.error() << "invalid format specifier: " << fs.conversion
336  << message.eom;
337  INVARIANT(
338  false, "format specifier must belong to [bBhHsScCdoxXeEfgGaAtT%n]");
339  throw 0;
340  }
341 }
342 
350  const array_string_exprt &res,
351  const std::string &s,
352  const exprt::operandst &args)
353 {
354  const std::vector<format_elementt> format_strings=parse_format_string(s);
355  std::vector<array_string_exprt> intermediary_strings;
356  std::size_t arg_count=0;
357  const typet &char_type = res.content().type().subtype();
358  const typet &index_type = res.length().type();
359 
360  for(const format_elementt &fe : format_strings)
361  {
362  if(fe.is_format_specifier())
363  {
364  const format_specifiert &fs=fe.get_format_specifier();
365  struct_exprt arg;
368  {
369  if(fs.index==-1)
370  {
371  INVARIANT(
372  arg_count<args.size(), "number of format must match specifiers");
373  arg=to_struct_expr(args[arg_count++]);
374  }
375  else
376  {
377  INVARIANT(fs.index > 0, "index in format should be positive");
378  INVARIANT(
379  static_cast<std::size_t>(fs.index)<=args.size(),
380  "number of format must match specifiers");
381 
382  // first argument `args[0]` corresponds to index 1
383  arg=to_struct_expr(args[fs.index-1]);
384  }
385  }
386  intermediary_strings.push_back(
388  }
389  else
390  {
392  const exprt return_code =
393  add_axioms_for_constant(str, fe.get_format_text().get_content());
394  intermediary_strings.push_back(str);
395  }
396  }
397 
398  exprt return_code = from_integer(0, get_return_code_type());
399 
400  if(intermediary_strings.empty())
401  {
402  lemmas.push_back(equal_exprt(res.length(), from_integer(0, index_type)));
403  return return_code;
404  }
405 
406  array_string_exprt str = intermediary_strings[0];
407 
408  if(intermediary_strings.size() == 1)
409  {
410  // Copy the first string
412  res, str, from_integer(0, index_type), str.length());
413  }
414 
415  // start after the first string and stop before the last
416  for(std::size_t i = 1; i < intermediary_strings.size() - 1; ++i)
417  {
418  const array_string_exprt &intermediary = intermediary_strings[i];
420  return_code =
421  bitor_exprt(return_code, add_axioms_for_concat(fresh, str, intermediary));
422  str = fresh;
423  }
424 
425  return bitor_exprt(
426  return_code, add_axioms_for_concat(res, str, intermediary_strings.back()));
427 }
428 
434 std::string
435 utf16_constant_array_to_java(const array_exprt &arr, std::size_t length)
436 {
437  std::wstring out(length, '?');
438  unsigned int c;
439  for(std::size_t i=0; i<arr.operands().size() && i<length; i++)
440  {
441  PRECONDITION(arr.operands()[i].id()==ID_constant);
442  bool conversion_failed=to_unsigned_integer(
443  to_constant_expr(arr.operands()[i]), c);
444  INVARIANT(!conversion_failed, "constant should be convertible to unsigned");
445  out[i]=c;
446  }
447  return utf16_native_endian_to_java(out);
448 }
449 
462 {
463  PRECONDITION(f.arguments().size() >= 3);
464  const array_string_exprt res =
465  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
467  unsigned int length;
468 
469  if(s1.length().id()==ID_constant &&
470  s1.content().id()==ID_array &&
471  !to_unsigned_integer(to_constant_expr(s1.length()), length))
472  {
473  std::string s=utf16_constant_array_to_java(
474  to_array_expr(s1.content()), length);
475  // List of arguments after s
476  std::vector<exprt> args(f.arguments().begin() + 3, f.arguments().end());
477  return add_axioms_for_format(res, s, args);
478  }
479  else
480  {
481  message.warning()
482  << "ignoring format function with non constant first argument"
483  << message.eom;
484  return from_integer(1, f.type());
485  }
486 }
The type of an expression.
Definition: type.h:22
Generates string constraints to link results from string functions with their arguments.
application of (mathematical) function
Definition: std_expr.h:4529
std::string utf16_constant_array_to_java(const array_exprt &arr, std::size_t length)
Construct a string from a constant array.
static std::vector< format_elementt > parse_format_string(std::string s)
Parse the given string into format specifiers and text.
const array_exprt & to_array_expr(const exprt &expr)
Cast a generic exprt to an array_exprt.
Definition: std_expr.h:1640
argumentst & arguments()
Definition: std_expr.h:4562
typet & type()
Definition: expr.h:56
exprt add_axioms_for_to_upper_case(const array_string_exprt &res, const array_string_exprt &expr)
Add axioms ensuring res corresponds to str in which lowercase characters of Basic Latin and Latin-1 s...
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
Structure type.
Definition: std_types.h:297
format_textt(std::string _content)
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:204
mstreamt & warning() const
Definition: message.h:307
equality
Definition: std_expr.h:1354
bool to_unsigned_integer(const constant_exprt &expr, unsigned &uint_value)
convert a positive integer expression to an unsigned int
Definition: arith_tools.cpp:94
string_constraint_generatort::format_specifiert get_format_specifier() const
exprt add_axioms_from_int_hex(const array_string_exprt &res, const exprt &i)
exprt add_axioms_from_float_scientific_notation(const array_string_exprt &res, const exprt &f)
Add axioms to write the float in scientific notation.
Bit-wise OR.
Definition: std_expr.h:2583
static void utf16_native_endian_to_java(const wchar_t ch, std::ostringstream &result, const std::locale &loc)
Definition: unicode.cpp:253
exprt add_axioms_for_constant(const array_string_exprt &res, irep_idt sval, const exprt &guard=true_exprt())
Add axioms ensuring that the provided string expression and constant are equal.
format_textt(const format_textt &fs)
API to expression classes.
mstreamt & error() const
Definition: message.h:302
exprt add_axioms_for_string_of_int(const array_string_exprt &res, const exprt &input_int, size_t max_size=0)
Add axioms enforcing that the string corresponds to the result of String.valueOf(I) or String...
#define PRECONDITION(CONDITION)
Definition: invariant.h:242
bitvector_typet index_type()
Definition: c_types.cpp:16
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:318
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:33
const struct_exprt & to_struct_expr(const exprt &expr)
Cast a generic exprt to a struct_exprt.
Definition: std_expr.h:1838
const constant_exprt & to_constant_expr(const exprt &expr)
Cast a generic exprt to a constant_exprt.
Definition: std_expr.h:4463
static exprt get_component_in_struct(const struct_exprt &expr, irep_idt component_name)
Helper for add_axioms_for_format_specifier.
std::vector< exprt > operandst
Definition: expr.h:45
static string_constraint_generatort::format_specifiert format_specifier_of_match(std::smatch &m)
Helper function for parsing format strings.
exprt add_axioms_for_substring(const array_string_exprt &res, const array_string_exprt &str, const exprt &start, const exprt &end)
Add axioms ensuring that res corresponds to the substring of str between indexes ‘start’ = max(st...
Base class for all expressions.
Definition: expr.h:42
std::size_t component_number(const irep_idt &component_name) const
Definition: std_types.cpp:29
exprt add_axioms_from_bool(const function_application_exprt &f)
Add axioms corresponding to the String.valueOf(Z) java function.
exprt add_axioms_for_string_of_float(const function_application_exprt &f)
String representation of a float value.
const format_textt & get_format_text() const
exprt add_axioms_for_format(const function_application_exprt &f)
Formatted string using a format string and list of arguments.
format_specifiert(int _index, std::string _flag, int _width, int _precision, bool _dt, char c)
int8_t s1
Definition: bytecode_info.h:59
format_elementt(string_constraint_generatort::format_specifiert fs)
const array_string_exprt & char_array_of_pointer(const exprt &pointer, const exprt &length)
Adds creates a new array if it does not already exists.
const typet & subtype() const
Definition: type.h:33
std::vector< string_constraint_generatort::format_specifiert > fspec
array_string_exprt fresh_string(const typet &index_type, const typet &char_type)
construct a string expression whose length and content are new variables
operandst & operands()
Definition: expr.h:66
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
exprt add_axioms_from_char(const function_application_exprt &f)
Conversion from char to string.
struct constructor from list of elements
Definition: std_expr.h:1815
bitvector_typet char_type()
Definition: c_types.cpp:114
exprt add_axioms_for_concat(const array_string_exprt &res, const array_string_exprt &s1, const array_string_exprt &s2)
Add axioms enforcing that res is equal to the concatenation of s1 and s2.
array constructor from list of elements
Definition: std_expr.h:1617
array_string_exprt get_string_expr(const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
array_string_exprt add_axioms_for_format_specifier(const format_specifiert &fs, const struct_exprt &arg, const typet &index_type, const typet &char_type)
Parse s and add axioms ensuring the output corresponds to the output of String.format.