cprover
taint_analysis.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Taint Analysis
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "taint_analysis.h"
13 
14 #include <iostream>
15 #include <fstream>
16 
17 #include <util/json.h>
18 #include <util/prefix.h>
19 #include <util/simplify_expr.h>
20 #include <util/string_constant.h>
21 
23 
25 
26 #include "taint_parser.h"
27 
29 {
30 public:
32  {
33  }
34 
35  bool operator()(
36  const std::string &taint_file_name,
37  const symbol_tablet &,
39  bool show_full,
40  const std::string &json_file_name);
41 
42 protected:
45 
46  void instrument(const namespacet &, goto_functionst &);
48 };
49 
51  const namespacet &ns,
52  goto_functionst &goto_functions)
53 {
54  for(auto &function : goto_functions.function_map)
55  instrument(ns, function.second);
56 }
57 
59  const namespacet &ns,
60  goto_functionst::goto_functiont &goto_function)
61 {
62  for(goto_programt::instructionst::iterator
63  it=goto_function.body.instructions.begin();
64  it!=goto_function.body.instructions.end();
65  it++)
66  {
67  const goto_programt::instructiont &instruction=*it;
68 
69  goto_programt insert_before, insert_after;
70 
71  switch(instruction.type)
72  {
73  case FUNCTION_CALL:
74  {
75  const code_function_callt &function_call=
76  to_code_function_call(instruction.code);
77  const exprt &function=function_call.function();
78 
79  if(function.id()==ID_symbol)
80  {
81  const irep_idt &identifier=
82  to_symbol_expr(function).get_identifier();
83 
84  std::set<irep_idt> identifiers;
85 
86  identifiers.insert(identifier);
87 
88  irep_idt class_id=function.get(ID_C_class);
89  if(class_id.empty())
90  {
91  }
92  else
93  {
94  std::string suffix=
95  std::string(
96  id2string(identifier), class_id.size(), std::string::npos);
97 
98  class_hierarchyt::idst parents=
100  for(const auto &p : parents)
101  identifiers.insert(id2string(p)+suffix);
102  }
103 
104  for(const auto &rule : taint.rules)
105  {
106  bool match=false;
107  for(const auto &i : identifiers)
108  if(i==rule.function_identifier ||
109  has_prefix(
110  id2string(i),
111  "java::"+id2string(rule.function_identifier)+":"))
112  {
113  match=true;
114  break;
115  }
116 
117  if(match)
118  {
119  debug() << "MATCH " << rule.id << " on " << identifier << eom;
120 
121  exprt where=nil_exprt();
122 
123  const code_typet &code_type=to_code_type(function.type());
124 
125  bool have_this=
126  !code_type.parameters().empty() &&
127  code_type.parameters().front().get_bool(ID_C_this);
128 
129  switch(rule.where)
130  {
132  {
133  const symbolt &return_value_symbol=
134  ns.lookup(id2string(identifier)+"#return_value");
135  where=return_value_symbol.symbol_expr();
136  }
137  break;
138 
140  {
141  unsigned nr=
142  have_this?rule.parameter_number:rule.parameter_number-1;
143  if(function_call.arguments().size()>nr)
144  where=function_call.arguments()[nr];
145  }
146  break;
147 
149  if(have_this)
150  {
151  assert(!function_call.arguments().empty());
152  where=function_call.arguments()[0];
153  }
154  break;
155  }
156 
157  switch(rule.kind)
158  {
160  {
161  codet code_set_may("set_may");
162  code_set_may.operands().resize(2);
163  code_set_may.op0()=where;
164  code_set_may.op1()=
165  address_of_exprt(string_constantt(rule.taint));
166  goto_programt::targett t=insert_after.add_instruction();
167  t->make_other(code_set_may);
168  t->source_location=instruction.source_location;
169  }
170  break;
171 
173  {
174  goto_programt::targett t=insert_before.add_instruction();
175  binary_predicate_exprt get_may("get_may");
176  get_may.op0()=where;
177  get_may.op1()=address_of_exprt(string_constantt(rule.taint));
178  t->make_assertion(not_exprt(get_may));
179  t->source_location=instruction.source_location;
180  t->source_location.set_property_class(
181  "taint rule "+id2string(rule.id));
182  t->source_location.set_comment(rule.message);
183  }
184  break;
185 
187  {
188  codet code_clear_may("clear_may");
189  code_clear_may.operands().resize(2);
190  code_clear_may.op0()=where;
191  code_clear_may.op1()=
192  address_of_exprt(string_constantt(rule.taint));
193  goto_programt::targett t=insert_after.add_instruction();
194  t->make_other(code_clear_may);
195  t->source_location=instruction.source_location;
196  }
197  break;
198  }
199  }
200  }
201  }
202  }
203  break;
204 
205  default:
206  {
207  }
208  }
209 
210  if(!insert_before.empty())
211  {
212  goto_function.body.insert_before_swap(it, insert_before);
213  // advance until we get back to the call
214  while(!it->is_function_call()) ++it;
215  }
216 
217  if(!insert_after.empty())
218  {
219  goto_function.body.destructive_insert(
220  std::next(it), insert_after);
221  }
222  }
223 }
224 
226  const std::string &taint_file_name,
227  const symbol_tablet &symbol_table,
228  goto_functionst &goto_functions,
229  bool show_full,
230  const std::string &json_file_name)
231 {
232  try
233  {
234  json_arrayt json_result;
235  bool use_json=!json_file_name.empty();
236 
237  status() << "Reading taint file `" << taint_file_name
238  << "'" << eom;
239 
240  if(taint_parser(taint_file_name, taint, get_message_handler()))
241  {
242  error() << "Failed to read taint definition file" << eom;
243  return true;
244  }
245 
246  status() << "Got " << taint.rules.size()
247  << " taint definitions" << eom;
248 
249  taint.output(debug());
250  debug() << eom;
251 
252  status() << "Instrumenting taint" << eom;
253 
254  class_hierarchy(symbol_table);
255 
256  const namespacet ns(symbol_table);
257  instrument(ns, goto_functions);
258  goto_functions.update();
259 
260  bool have_entry_point=
261  goto_functions.function_map.find(goto_functionst::entry_point())!=
262  goto_functions.function_map.end();
263 
264  // do we have an entry point?
265  if(have_entry_point)
266  {
267  status() << "Working from entry point" << eom;
268  }
269  else
270  {
271  status() << "No entry point found; "
272  << "we will consider the heads of all functions as reachable"
273  << eom;
274 
275  goto_programt end, gotos, calls;
276 
278 
279  forall_goto_functions(f_it, goto_functions)
280  if(f_it->second.body_available() &&
281  f_it->first!=goto_functionst::entry_point())
282  {
284  code_function_callt call;
285  call.function()=ns.lookup(f_it->first).symbol_expr();
286  t->make_function_call(call);
287  calls.add_instruction()->make_goto(end.instructions.begin());
289  g->make_goto(t, side_effect_expr_nondett(bool_typet()));
290  }
291 
293  goto_functions.function_map[goto_functionst::entry_point()];
294 
295  goto_programt &body=entry.body;
296 
297  body.destructive_append(gotos);
298  body.destructive_append(calls);
299  body.destructive_append(end);
300 
301  goto_functions.update();
302  }
303 
304  status() << "Data-flow analysis" << eom;
305 
306  custom_bitvector_analysist custom_bitvector_analysis;
307  custom_bitvector_analysis(goto_functions, ns);
308 
309  if(show_full)
310  {
311  custom_bitvector_analysis.output(ns, goto_functions, std::cout);
312  return false;
313  }
314 
315  forall_goto_functions(f_it, goto_functions)
316  {
317  if(!f_it->second.body.has_assertion())
318  continue;
319 
320  const symbolt &symbol=ns.lookup(f_it->first);
321 
322  if(f_it->first=="__actual_thread_spawn")
323  continue;
324 
325  bool first=true;
326 
327  forall_goto_program_instructions(i_it, f_it->second.body)
328  {
329  if(!i_it->is_assert())
330  continue;
332  continue;
333 
334  if(custom_bitvector_analysis[i_it].has_values.is_false())
335  continue;
336 
337  exprt result=custom_bitvector_analysis.eval(i_it->guard, i_it);
338  exprt result2=simplify_expr(result, ns);
339 
340  if(result2.is_true())
341  continue;
342 
343  if(first)
344  {
345  first=false;
346  if(!use_json)
347  std::cout << "\n"
348  << "******** Function " << symbol.display_name() << '\n';
349  }
350 
351  if(use_json)
352  {
354  json["bugClass"] =
355  json_stringt(i_it->source_location.get_property_class());
356  json["file"] = json_stringt(i_it->source_location.get_file());
357  json["line"]=
358  json_numbert(id2string(i_it->source_location.get_line()));
359  json_result.array.push_back(json);
360  }
361  else
362  {
363  std::cout << i_it->source_location;
364  if(!i_it->source_location.get_comment().empty())
365  std::cout << ": " << i_it->source_location.get_comment();
366 
367  if(!i_it->source_location.get_property_class().empty())
368  std::cout << " ("
369  << i_it->source_location.get_property_class() << ")";
370 
371  std::cout << '\n';
372  }
373  }
374  }
375 
376  if(use_json)
377  {
378  std::ofstream json_out(json_file_name);
379 
380  if(!json_out)
381  {
382  error() << "Failed to open json output `"
383  << json_file_name << "'" << eom;
384  return true;
385  }
386 
387  status() << "Analysis result is written to `"
388  << json_file_name << "'" << eom;
389 
390  json_out << json_result << '\n';
391  }
392 
393  return false;
394  }
395  catch(const char *error_msg)
396  {
397  error() << error_msg << eom;
398  return true;
399  }
400  catch(const std::string &error_msg)
401  {
402  error() << error_msg << eom;
403  return true;
404  }
405  catch(...)
406  {
407  error() << "Caught unexpected error in taint_analysist::operator()" << eom;
408  return true;
409  }
410 }
411 
413  goto_modelt &goto_model,
414  const std::string &taint_file_name,
416  bool show_full,
417  const std::string &json_file_name)
418 {
420  taint_analysis.set_message_handler(message_handler);
421  return taint_analysis(
422  taint_file_name,
423  goto_model.symbol_table,
424  goto_model.goto_functions,
425  show_full,
426  json_file_name);
427 }
Boolean negation.
Definition: std_expr.h:3228
Field-insensitive, location-sensitive bitvector analysis.
Base type of functions.
Definition: std_types.h:764
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
exprt & op0()
Definition: expr.h:72
void set_property_class(const irep_idt &property_class)
Non-graph-based representation of the class hierarchy.
exprt simplify_expr(const exprt &src, const namespacet &ns)
goto_program_instruction_typet type
What kind of instruction?
Definition: goto_program.h:185
void output(std::ostream &) const
const code_typet & to_code_type(const typet &type)
Cast a generic typet to a code_typet.
Definition: std_types.h:993
const irep_idt & get_identifier() const
Definition: std_expr.h:128
void destructive_append(goto_programt &p)
Appends the given program, which is destroyed.
Definition: goto_program.h:486
Taint Parser.
bool is_true() const
Definition: expr.cpp:124
function_mapt function_map
The proper Booleans.
Definition: std_types.h:34
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:29
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:30
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:173
Class Hierarchy.
virtual void output(const namespacet &ns, const goto_functionst &goto_functions, std::ostream &out) const
Definition: ai.cpp:24
bool taint_analysis(goto_modelt &goto_model, const std::string &taint_file_name, message_handlert &message_handler, bool show_full, const std::string &json_file_name)
class_hierarchyt class_hierarchy
class symbol_exprt symbol_expr() const
produces a symbol_exprt for a symbol
Definition: symbol.cpp:111
argumentst & arguments()
Definition: std_code.h:890
void instrument(const namespacet &, goto_functionst &)
instructionst::iterator targett
Definition: goto_program.h:397
The NIL expression.
Definition: std_expr.h:4508
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:403
static bool has_get_must_or_may(const exprt &)
exprt & op1()
Definition: expr.h:75
The symbol table.
Definition: symbol_table.h:19
mstreamt & error() const
Definition: message.h:302
TO_BE_DOCUMENTED.
Definition: namespace.h:74
::goto_functiont goto_functiont
A side effect that returns a non-deterministically chosen value.
Definition: std_code.h:1340
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
A function call.
Definition: std_code.h:858
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Definition: std_expr.h:210
size_t size() const
Definition: dstring.h:89
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:33
Operator to return the address of an object.
Definition: std_expr.h:3168
std::vector< irep_idt > idst
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:70
const irep_idt & display_name() const
Definition: symbol.h:57
bool taint_parser(const std::string &file_name, taint_parse_treet &dest, message_handlert &message_handler)
message_handlert & get_message_handler()
Definition: message.h:153
static irep_idt entry_point()
taint_parse_treet taint
mstreamt & result() const
Definition: message.h:312
mstreamt & status() const
Definition: message.h:317
exprt & function()
Definition: std_code.h:878
source_locationt source_location
The location of the instruction in the source file.
Definition: goto_program.h:182
targett add_instruction()
Adds an instruction at the end.
Definition: goto_program.h:505
Base class for all expressions.
Definition: expr.h:42
const parameterst & parameters() const
Definition: std_types.h:905
bool empty() const
Is the program empty?
Definition: goto_program.h:590
bool operator()(const std::string &taint_file_name, const symbol_tablet &, goto_functionst &, bool show_full, const std::string &json_file_name)
exprt eval(const exprt &src, locationt loc)
Taint Analysis.
goto_programt coverage_criteriont message_handlert & message_handler
Definition: cover.cpp:66
mstreamt & debug() const
Definition: message.h:332
A statement in a programming language.
Definition: std_code.h:21
A generic base class for expressions that are predicates, i.e., boolean-typed, and that take exactly ...
Definition: std_expr.h:730
arrayt array
Definition: json.h:129
#define forall_goto_functions(it, functions)
operandst & operands()
Definition: expr.h:66
#define forall_goto_program_instructions(it, program)
Definition: goto_program.h:735
bool empty() const
Definition: dstring.h:73
idst get_parents_trans(const irep_idt &id) const
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:32
json_objectt json(const source_locationt &location)
Definition: json_expr.cpp:83
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See namespace_baset::lookup().
Definition: namespace.cpp:136
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:909