cprover
string_constraint_generator_comparison.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for function comparing strings,
4  such as: equals, equalsIgnoreCase, compareTo, hashCode, intern
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
13 
15 #include <util/deprecate.h>
16 
31 {
32  PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool);
33  PRECONDITION(f.arguments().size() == 2);
34 
37  symbol_exprt eq=fresh_boolean("equal");
38  typecast_exprt tc_eq(eq, f.type());
39 
40  typet index_type=s1.length().type();
41 
42  // Axiom 1.
43  lemmas.push_back(implies_exprt(eq, equal_exprt(s1.length(), s2.length())));
44 
45  // Axiom 2.
46  constraints.push_back([&] {
47  const symbol_exprt qvar = fresh_univ_index("QA_equal", index_type);
48  return string_constraintt(
49  qvar,
50  zero_if_negative(s1.length()),
51  implies_exprt(eq, equal_exprt(s1[qvar], s2[qvar])));
52  }());
53 
54  // Axiom 3.
55  lemmas.push_back([&] {
56  const symbol_exprt witness =
57  fresh_exist_index("witness_unequal", index_type);
58  const exprt zero = from_integer(0, index_type);
59  const and_exprt bound_witness(
60  binary_relation_exprt(witness, ID_lt, s1.length()),
61  binary_relation_exprt(witness, ID_ge, zero));
62  const and_exprt witnessing(
63  bound_witness, notequal_exprt(s1[witness], s2[witness]));
64  const and_exprt diff_length(
65  notequal_exprt(s1.length(), s2.length()),
67  return implies_exprt(not_exprt(eq), or_exprt(diff_length, witnessing));
68  }());
69 
70  return tc_eq;
71 }
72 
83  exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z)
84 {
85  and_exprt is_upper_case_1(
86  binary_relation_exprt(char_A, ID_le, char1),
87  binary_relation_exprt(char1, ID_le, char_Z));
88  and_exprt is_upper_case_2(
89  binary_relation_exprt(char_A, ID_le, char2),
90  binary_relation_exprt(char2, ID_le, char_Z));
91 
92  // Three possibilities:
93  // p1 : char1=char2
94  // p2 : (is_up1&&'a'-'A'+char1=char2)
95  // p3 : (is_up2&&'a'-'A'+char2=char1)
96  equal_exprt p1(char1, char2);
97  minus_exprt diff(char_a, char_A);
98 
99  // Overflow is not a problem here because is_upper_case conditions
100  // ensure that we are within a safe range.
101  and_exprt p2(is_upper_case_1,
102  equal_exprt(plus_exprt(diff, char1), char2));
103  and_exprt p3(is_upper_case_2,
104  equal_exprt(plus_exprt(diff, char2), char1));
105  return or_exprt(or_exprt(p1, p2), p3);
106 }
107 
123 {
124  PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool);
125  PRECONDITION(f.arguments().size() == 2);
126  const symbol_exprt eq = fresh_boolean("equal_ignore_case");
129  const typet char_type = s1.content().type().subtype();
130  const exprt char_a = constant_char('a', char_type);
131  const exprt char_A = constant_char('A', char_type);
132  const exprt char_Z = constant_char('Z', char_type);
133  const typet index_type = s1.length().type();
134 
135  const implies_exprt a1(eq, equal_exprt(s1.length(), s2.length()));
136  lemmas.push_back(a1);
137 
138  const symbol_exprt qvar =
139  fresh_univ_index("QA_equal_ignore_case", index_type);
140  const exprt constr2 =
141  character_equals_ignore_case(s1[qvar], s2[qvar], char_a, char_A, char_Z);
142  const string_constraintt a2(
143  qvar, zero_if_negative(s1.length()), implies_exprt(eq, constr2));
144  constraints.push_back(a2);
145 
146  const symbol_exprt witness =
147  fresh_exist_index("witness_unequal_ignore_case", index_type);
148  const exprt zero = from_integer(0, witness.type());
149  const and_exprt bound_witness(
150  binary_relation_exprt(witness, ID_lt, s1.length()),
151  binary_relation_exprt(witness, ID_ge, zero));
152  const exprt witness_eq = character_equals_ignore_case(
153  s1[witness], s2[witness], char_a, char_A, char_Z);
154  const not_exprt witness_diff(witness_eq);
155  const implies_exprt a3(
156  not_exprt(eq),
157  or_exprt(
158  notequal_exprt(s1.length(), s2.length()),
159  and_exprt(bound_witness, witness_diff)));
160  lemmas.push_back(a3);
161 
162  return typecast_exprt(eq, f.type());
163 }
164 
176 {
177  PRECONDITION(f.arguments().size() == 1);
178  const array_string_exprt str = get_string_expr(f.arguments()[0]);
179  const typet &return_type = f.type();
180  const typet &index_type = str.length().type();
181 
182  auto pair=hash_code_of_string.insert(
183  std::make_pair(str, fresh_symbol("hash", return_type)));
184  const exprt hash = pair.first->second;
185 
186  for(auto it : hash_code_of_string)
187  {
188  const symbol_exprt i = fresh_exist_index("index_hash", index_type);
189  const equal_exprt c1(it.second, hash);
190  const notequal_exprt c2(it.first.length(), str.length());
191  const and_exprt c3(
192  equal_exprt(it.first.length(), str.length()),
193  and_exprt(
194  notequal_exprt(str[i], it.first[i]),
195  and_exprt(str.axiom_for_length_gt(i), axiom_for_is_positive_index(i))));
196  lemmas.push_back(or_exprt(c1, or_exprt(c2, c3)));
197  }
198  return hash;
199 }
200 
222 {
223  PRECONDITION(f.arguments().size() == 2);
224  const typet &return_type=f.type();
225  PRECONDITION(return_type.id() == ID_signedbv);
228  const symbol_exprt res = fresh_symbol("compare_to", return_type);
229  const typet &index_type = s1.length().type();
230 
231  const equal_exprt res_null(res, from_integer(0, return_type));
232  const implies_exprt a1(res_null, equal_exprt(s1.length(), s2.length()));
233  lemmas.push_back(a1);
234 
235  const symbol_exprt i = fresh_univ_index("QA_compare_to", index_type);
236  const string_constraintt a2(
237  i,
238  zero_if_negative(s1.length()),
239  implies_exprt(res_null, equal_exprt(s1[i], s2[i])));
240  constraints.push_back(a2);
241 
242  const symbol_exprt x = fresh_exist_index("index_compare_to", index_type);
243  const equal_exprt ret_char_diff(
244  res,
245  minus_exprt(
246  typecast_exprt(s1[x], return_type), typecast_exprt(s2[x], return_type)));
247  const equal_exprt ret_length_diff(
248  res,
249  minus_exprt(
250  typecast_exprt(s1.length(), return_type),
251  typecast_exprt(s2.length(), return_type)));
252  const or_exprt guard1(
253  and_exprt(s1.axiom_for_length_le(s2.length()), s1.axiom_for_length_gt(x)),
254  and_exprt(s1.axiom_for_length_ge(s2.length()), s2.axiom_for_length_gt(x)));
255  const and_exprt cond1(ret_char_diff, guard1);
256  const or_exprt guard2(
257  and_exprt(s2.axiom_for_length_gt(s1.length()), s1.axiom_for_has_length(x)),
258  and_exprt(s1.axiom_for_length_gt(s2.length()), s2.axiom_for_has_length(x)));
259  const and_exprt cond2(ret_length_diff, guard2);
260 
261  const implies_exprt a3(
262  not_exprt(res_null),
263  and_exprt(
264  binary_relation_exprt(x, ID_ge, from_integer(0, return_type)),
265  or_exprt(cond1, cond2)));
266  lemmas.push_back(a3);
267 
268  const symbol_exprt i2 = fresh_univ_index("QA_compare_to", index_type);
269  const string_constraintt a4(
270  i2,
271  zero_if_negative(x),
272  implies_exprt(not_exprt(res_null), equal_exprt(s1[i2], s2[i2])));
273  constraints.push_back(a4);
274 
275  return res;
276 }
277 
283 DEPRECATED("never tested")
286 {
287  PRECONDITION(f.arguments().size() == 1);
288  const array_string_exprt str = get_string_expr(f.arguments()[0]);
289  // For now we only enforce content equality and not pointer equality
290  const typet &return_type=f.type();
291  const typet index_type = str.length().type();
292 
293  auto pair=intern_of_string.insert(
294  std::make_pair(str, fresh_symbol("pool", return_type)));
295  const symbol_exprt intern = pair.first->second;
296 
297  // intern(str)=s_0 || s_1 || ...
298  // for each string s.
299  // intern(str)=intern(s) || |str|!=|s|
300  // || (|str|==|s| &&exists i<|s|. s[i]!=str[i])
301 
302  exprt::operandst disj;
303  for(auto it : intern_of_string)
304  disj.push_back(equal_exprt(intern, it.second));
305  lemmas.push_back(disjunction(disj));
306 
307  // WARNING: the specification may be incomplete or incorrect
308  for(auto it : intern_of_string)
309  if(it.second!=str)
310  {
311  symbol_exprt i=fresh_exist_index("index_intern", index_type);
312  lemmas.push_back(
313  or_exprt(
314  equal_exprt(it.second, intern),
315  or_exprt(
316  notequal_exprt(str.length(), it.first.length()),
317  and_exprt(
318  equal_exprt(str.length(), it.first.length()),
319  and_exprt(
320  notequal_exprt(str[i], it.first[i]),
321  and_exprt(
322  str.axiom_for_length_gt(i),
323  axiom_for_is_positive_index(i)))))));
324  }
325 
326  return intern;
327 }
exprt add_axioms_for_compare_to(const function_application_exprt &f)
Lexicographic comparison of two strings.
The type of an expression.
Definition: type.h:22
Boolean negation.
Definition: std_expr.h:3228
semantic type conversion
Definition: std_expr.h:2111
Generates string constraints to link results from string functions with their arguments.
A generic base class for relations, i.e., binary predicates.
Definition: std_expr.h:752
application of (mathematical) function
Definition: std_expr.h:4529
boolean OR
Definition: std_expr.h:2391
exprt add_axioms_for_equals(const function_application_exprt &f)
Equality of the content of two strings.
std::map< array_string_exprt, exprt > hash_code_of_string
argumentst & arguments()
Definition: std_expr.h:4562
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type)
generate an index symbol to be used as an universaly quantified variable
typet & type()
Definition: expr.h:56
The proper Booleans.
Definition: std_types.h:34
boolean implication
Definition: std_expr.h:2339
equality
Definition: std_expr.h:1354
symbol_exprt fresh_boolean(const irep_idt &prefix)
generate a Boolean symbol which is existentially quantified
const irep_idt & id() const
Definition: irep.h:259
exprt add_axioms_for_hash_code(const function_application_exprt &f)
Value that is identical for strings with the same content.
boolean AND
Definition: std_expr.h:2255
inequality
Definition: std_expr.h:1406
#define PRECONDITION(CONDITION)
Definition: invariant.h:242
exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f)
Equality of the content ignoring case of characters.
The plus expression.
Definition: std_expr.h:893
bitvector_typet index_type()
Definition: c_types.cpp:16
exprt disjunction(const exprt::operandst &op)
Definition: std_expr.cpp:24
std::vector< exprt > operandst
Definition: expr.h:45
#define DEPRECATED(msg)
Definition: deprecate.h:23
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
exprt zero_if_negative(const exprt &expr)
Returns a non-negative version of the argument.
symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type)
generate an index symbol which is existentially quantified
Base class for all expressions.
Definition: expr.h:42
Universally quantified string constraint
static exprt character_equals_ignore_case(exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z)
Returns an expression which is true when the two given characters are equal when ignoring case for AS...
binary minus
Definition: std_expr.h:959
Expression to hold a symbol (variable)
Definition: std_expr.h:90
int8_t s1
Definition: bytecode_info.h:59
int16_t s2
Definition: bytecode_info.h:60
const typet & subtype() const
Definition: type.h:33
std::vector< string_constraintt > constraints
std::map< string_not_contains_constraintt, symbol_exprt > witness
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bitvector_typet char_type()
Definition: c_types.cpp:114
array_string_exprt get_string_expr(const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
exprt axiom_for_is_positive_index(const exprt &x)
expression true exactly when the index is positive