cprover
ci_lazy_methods.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3  Module: Java Bytecode
4 
5  Author: Diffblue Ltd.
6 
7 \*******************************************************************/
8 
9 #include "ci_lazy_methods.h"
10 #include "java_entry_point.h"
11 #include "java_class_loader.h"
12 #include "java_utils.h"
14 #include "remove_exceptions.h"
15 
16 #include <util/expr_iterator.h>
17 #include <util/suffix.h>
18 
20 
34  const symbol_tablet &symbol_table,
35  const irep_idt &main_class,
36  const std::vector<irep_idt> &main_jar_classes,
37  const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
38  java_class_loadert &java_class_loader,
39  const std::vector<irep_idt> &extra_instantiated_classes,
40  const select_pointer_typet &pointer_type_selector,
42  const synthetic_methods_mapt &synthetic_methods)
44  main_class(main_class),
45  main_jar_classes(main_jar_classes),
46  lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
47  java_class_loader(java_class_loader),
48  extra_instantiated_classes(extra_instantiated_classes),
49  pointer_type_selector(pointer_type_selector),
50  synthetic_methods(synthetic_methods)
51 {
52  // build the class hierarchy
53  class_hierarchy(symbol_table);
54 }
55 
62 static bool references_class_model(const exprt &expr)
63 {
64  static const symbol_typet class_type("java::java.lang.Class");
65 
66  for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
67  {
69  it->type() == class_type &&
70  has_suffix(
73  {
74  return true;
75  }
76  }
77 
78  return false;
79 }
80 
97  symbol_tablet &symbol_table,
98  method_bytecodet &method_bytecode,
99  const method_convertert &method_converter)
100 {
101  std::unordered_set<irep_idt> methods_to_convert_later =
102  entry_point_methods(symbol_table);
103 
104  // Add any extra entry points specified; we should elaborate these in the
105  // same way as the main function.
106  std::vector<irep_idt> extra_entry_points;
107  for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
108  {
109  const auto &extra_methods = extra_function_generator(symbol_table);
110  extra_entry_points.insert(
111  extra_entry_points.end(), extra_methods.begin(), extra_methods.end());
112  }
113  methods_to_convert_later.insert(
114  extra_entry_points.begin(), extra_entry_points.end());
115 
116  std::unordered_set<irep_idt> instantiated_classes;
117 
118  {
119  std::unordered_set<irep_idt> initial_callable_methods;
120  ci_lazy_methods_neededt initial_lazy_methods(
121  initial_callable_methods,
122  instantiated_classes,
123  symbol_table,
126  methods_to_convert_later, namespacet(symbol_table), initial_lazy_methods);
127  methods_to_convert_later.insert(
128  initial_callable_methods.begin(), initial_callable_methods.end());
129  }
130 
131  std::unordered_set<irep_idt> methods_already_populated;
132  std::unordered_set<exprt, irep_hash> virtual_function_calls;
133  bool class_initializer_seen = false;
134 
135  bool any_new_classes = true;
136  while(any_new_classes)
137  {
138  bool any_new_methods = true;
139  while(any_new_methods)
140  {
141  any_new_methods = false;
142  while(!methods_to_convert_later.empty())
143  {
144  std::unordered_set<irep_idt> methods_to_convert;
145  std::swap(methods_to_convert, methods_to_convert_later);
146  for(const auto &mname : methods_to_convert)
147  {
148  const auto conversion_result = convert_and_analyze_method(
149  method_converter,
150  methods_already_populated,
151  class_initializer_seen,
152  mname,
153  symbol_table,
154  methods_to_convert_later,
155  instantiated_classes,
156  virtual_function_calls);
157  any_new_methods |= conversion_result.new_method_seen;
158  class_initializer_seen |= conversion_result.class_initializer_seen;
159  }
160  }
161 
162  // Given the object types we now know may be created, populate more
163  // possible virtual function call targets:
164 
165  debug() << "CI lazy methods: add virtual method targets ("
166  << virtual_function_calls.size() << " callsites)" << eom;
167 
168  for(const exprt &function : virtual_function_calls)
169  {
171  function,
172  instantiated_classes,
173  methods_to_convert_later,
174  symbol_table);
175  }
176  }
177 
178  any_new_classes = handle_virtual_methods_with_no_callees(
179  methods_to_convert_later,
180  instantiated_classes,
181  virtual_function_calls,
182  symbol_table);
183  }
184 
185  // Remove symbols for methods that were declared but never used:
186  symbol_tablet keep_symbols;
187  // Manually keep @inflight_exception, as it is unused at this stage
188  // but will become used when the `remove_exceptions` pass is run:
189  keep_symbols.add(symbol_table.lookup_ref(INFLIGHT_EXCEPTION_VARIABLE_NAME));
190 
191  for(const auto &sym : symbol_table.symbols)
192  {
193  // Don't keep global variables (unless they're gathered below from a
194  // function that references them)
195  if(sym.second.is_static_lifetime)
196  continue;
197  if(sym.second.type.id()==ID_code)
198  {
199  // Don't keep functions that belong to this language that we haven't
200  // converted above
201  if(
202  (method_bytecode.contains_method(sym.first) ||
203  synthetic_methods.count(sym.first)) &&
204  !methods_already_populated.count(sym.first))
205  {
206  continue;
207  }
208  // If this is a function then add all the things used in it
209  gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
210  }
211  keep_symbols.add(sym.second);
212  }
213 
214  debug() << "CI lazy methods: removed "
215  << symbol_table.symbols.size() - keep_symbols.symbols.size()
216  << " unreachable methods and globals" << eom;
217 
218  symbol_table.swap(keep_symbols);
219 
220  return false;
221 }
222 
231  std::unordered_set<irep_idt> &methods_to_convert_later,
232  std::unordered_set<irep_idt> &instantiated_classes,
233  const std::unordered_set<exprt, irep_hash> &virtual_function_calls,
234  symbol_tablet &symbol_table)
235 {
236  bool any_new_classes = false;
237  for(const exprt &virtual_function_call : virtual_function_calls)
238  {
239  std::unordered_set<irep_idt> candidate_target_methods;
241  virtual_function_call,
242  instantiated_classes,
243  candidate_target_methods,
244  symbol_table);
245 
246  if(!candidate_target_methods.empty())
247  continue;
248 
249  // Add the call class to instantiated_classes and assert that it
250  // didn't already exist
251  const irep_idt &call_class = virtual_function_call.get(ID_C_class);
252  auto ret_class = instantiated_classes.insert(call_class);
253  CHECK_RETURN(ret_class.second);
254  any_new_classes = true;
255 
256  // Check that `get_virtual_method_target` returns a method now
257  const irep_idt &call_basename =
258  virtual_function_call.get(ID_component_name);
259  const irep_idt method_name = get_virtual_method_target(
260  instantiated_classes, call_basename, call_class, symbol_table);
261  CHECK_RETURN(!method_name.empty());
262 
263  // Add what it returns to methods_to_convert_later
264  methods_to_convert_later.insert(method_name);
265  }
266  return any_new_classes;
267 }
268 
280  const method_convertert &method_converter,
281  std::unordered_set<irep_idt> &methods_already_populated,
282  const bool class_initializer_already_seen,
283  const irep_idt &method_name,
284  symbol_tablet &symbol_table,
285  std::unordered_set<irep_idt> &methods_to_convert_later,
286  std::unordered_set<irep_idt> &instantiated_classes,
287  std::unordered_set<exprt, irep_hash> &virtual_function_calls)
288 {
290  if(!methods_already_populated.insert(method_name).second)
291  return result;
292 
293  debug() << "CI lazy methods: elaborate " << method_name << eom;
294 
295  // Note this wraps *references* to methods_to_convert_later &
296  // instantiated_classes
297  ci_lazy_methods_neededt needed_methods(
298  methods_to_convert_later,
299  instantiated_classes,
300  symbol_table,
302 
303  const bool could_not_convert_function =
304  method_converter(method_name, needed_methods);
305  if(could_not_convert_function)
306  return result;
307 
308  const exprt &method_body = symbol_table.lookup_ref(method_name).value;
309  gather_virtual_callsites(method_body, virtual_function_calls);
310 
311  if(!class_initializer_already_seen && references_class_model(method_body))
312  {
313  result.class_initializer_seen = true;
314  const irep_idt initializer_signature =
316  if(symbol_table.has_symbol(initializer_signature))
317  methods_to_convert_later.insert(initializer_signature);
318  }
319  result.new_method_seen = true;
320  return result;
321 }
322 
328 std::unordered_set<irep_idt>
330 {
331  std::unordered_set<irep_idt> methods_to_convert_later;
332 
333  const main_function_resultt main_function = get_main_symbol(
334  symbol_table, this->main_class, this->get_message_handler());
335  if(!main_function.is_success())
336  {
337  // Failed, mark all functions in the given main class(es)
338  // reachable.
339  std::vector<irep_idt> reachable_classes;
340  if(!this->main_class.empty())
341  reachable_classes.push_back(this->main_class);
342  else
343  reachable_classes = this->main_jar_classes;
344  for(const irep_idt &class_name : reachable_classes)
345  {
346  const auto &methods =
347  this->java_class_loader.get_original_class(class_name)
349  for(const auto &method : methods)
350  {
351  const irep_idt methodid = "java::" + id2string(class_name) + "." +
352  id2string(method.name) + ":" +
353  id2string(method.descriptor);
354  methods_to_convert_later.insert(methodid);
355  }
356  }
357  }
358  else
359  methods_to_convert_later.insert(main_function.main_function.name);
360  return methods_to_convert_later;
361 }
362 
372  const std::unordered_set<irep_idt> &entry_points,
373  const namespacet &ns,
374  ci_lazy_methods_neededt &needed_lazy_methods)
375 {
376  for(const auto &mname : entry_points)
377  {
378  const auto &symbol=ns.lookup(mname);
379  const auto &mtype = to_java_method_type(symbol.type);
380  for(const auto &param : mtype.parameters())
381  {
382  if(param.type().id()==ID_pointer)
383  {
384  const pointer_typet &original_pointer=to_pointer_type(param.type());
385  needed_lazy_methods.add_all_needed_classes(original_pointer);
386  }
387  }
388  }
389 
390  // Also add classes whose instances are magically
391  // created by the JVM and so won't be spotted by
392  // looking for constructors and calls as usual:
393  needed_lazy_methods.add_needed_class("java::java.lang.String");
394  needed_lazy_methods.add_needed_class("java::java.lang.Class");
395  needed_lazy_methods.add_needed_class("java::java.lang.Object");
396 
397  // As in class_loader, ensure these classes stay available
398  for(const auto &id : extra_instantiated_classes)
399  needed_lazy_methods.add_needed_class("java::" + id2string(id));
400 }
401 
402 
408  const exprt &e,
409  std::unordered_set<exprt, irep_hash> &result)
410 {
411  if(e.id()!=ID_code)
412  return;
413  const codet &c=to_code(e);
414  if(c.get_statement()==ID_function_call &&
415  to_code_function_call(c).function().id()==ID_virtual_function)
416  {
417  result.insert(to_code_function_call(c).function());
418  }
419  else
420  {
421  for(const exprt &op : e.operands())
423  }
424 }
425 
437  const exprt &called_function,
438  const std::unordered_set<irep_idt> &instantiated_classes,
439  std::unordered_set<irep_idt> &callable_methods,
440  symbol_tablet &symbol_table)
441 {
442  PRECONDITION(called_function.id()==ID_virtual_function);
443 
444  const auto &call_class=called_function.get(ID_C_class);
445  INVARIANT(
446  !call_class.empty(), "All virtual calls should be aimed at a class");
447  const auto &call_basename=called_function.get(ID_component_name);
448  INVARIANT(
449  !call_basename.empty(),
450  "Virtual function must have a reasonable name after removing class");
451 
452  class_hierarchyt::idst self_and_child_classes =
454  self_and_child_classes.push_back(call_class);
455 
456  for(const irep_idt &class_name : self_and_child_classes)
457  {
458  const irep_idt method_name = get_virtual_method_target(
459  instantiated_classes, call_basename, class_name, symbol_table);
460  if(!method_name.empty())
461  callable_methods.insert(method_name);
462  }
463 }
464 
471  const exprt &e,
472  const symbol_tablet &symbol_table,
473  symbol_tablet &needed)
474 {
475  if(e.id()==ID_symbol)
476  {
477  // If the symbol isn't in the symbol table at all, then it is defined
478  // on an opaque type (i.e. we don't have the class definition at this point)
479  // and will be created during the typecheck phase.
480  // We don't mark it as 'needed' as it doesn't exist yet to keep.
481  const auto findit=
482  symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
483  if(findit!=symbol_table.symbols.end() &&
484  findit->second.is_static_lifetime)
485  {
486  needed.add(findit->second);
487  // Gather any globals referenced in the initialiser:
488  gather_needed_globals(findit->second.value, symbol_table, needed);
489  }
490  }
491  else
492  forall_operands(opit, e)
493  gather_needed_globals(*opit, symbol_table, needed);
494 }
495 
509  const std::unordered_set<irep_idt> &instantiated_classes,
510  const irep_idt &call_basename,
511  const irep_idt &classname,
512  const symbol_tablet &symbol_table)
513 {
514  // Program-wide, is this class ever instantiated?
515  if(!instantiated_classes.count(classname))
516  return irep_idt();
517 
518  resolve_inherited_componentt call_resolver(symbol_table, class_hierarchy);
520  call_resolver(classname, call_basename, false);
521 
522  if(resolved_call.is_valid())
523  return resolved_call.get_full_component_identifier();
524  else
525  return irep_idt();
526 }
const irep_idt & get_statement() const
Definition: std_code.h:40
irep_idt name
The unique identifier.
Definition: symbol.h:43
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_tablet &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< exprt, irep_hash > &virtual_function_calls)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< exprt, irep_hash > &virtual_function_calls, symbol_tablet &symbol_table)
Look for virtual callsites with no candidate targets.
const select_pointer_typet & pointer_type_selector
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
Remove function exceptional returns.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
const std::vector< irep_idt > & extra_instantiated_classes
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
const irep_idt & get_identifier() const
Definition: std_expr.h:128
bool is_valid() const
Use to check if this inherited_componentt has been fully constructed.
void gather_virtual_callsites(const exprt &e, std::unordered_set< exprt, irep_hash > &result)
Get places where virtual functions are called.
exprt value
Initial value of symbol.
Definition: symbol.h:37
depth_iteratort depth_begin()
Definition: expr.cpp:299
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:266
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:204
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point...
const irep_idt & id() const
Definition: irep.h:259
A reference into the symbol table.
Definition: std_types.h:110
The pointer type.
Definition: std_types.h:1435
void gather_needed_globals(const exprt &e, const symbol_tablet &symbol_table, symbol_tablet &needed)
See output.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class&#39; initializer method.
java_class_loadert & java_class_loader
The symbol table.
Definition: symbol_table.h:19
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
Collect methods needed to be loaded using the lazy method.
TO_BE_DOCUMENTED.
Definition: namespace.h:74
#define PRECONDITION(CONDITION)
Definition: invariant.h:242
Given a class and a component (either field or method), find the closest parent that defines that com...
#define forall_operands(it, expr)
Definition: expr.h:17
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Definition: std_expr.h:210
const synthetic_methods_mapt & synthetic_methods
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:33
const symbolst & symbols
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
const std::vector< load_extra_methodst > & lazy_methods_extra_entry_points
std::vector< irep_idt > idst
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition: std_expr.h:227
message_handlert & get_message_handler()
Definition: message.h:153
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:289
bool contains_method(const irep_idt &method_id) const
mstreamt & result() const
Definition: message.h:312
exprt & function()
Definition: std_code.h:878
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
Base class for all expressions.
Definition: expr.h:42
std::unordered_set< irep_idt > entry_point_methods(const symbol_tablet &symbol_table)
Entry point methods are either:
#define JAVA_CLASS_MODEL_SUFFIX
std::vector< irep_idt > main_jar_classes
const codet & to_code(const exprt &expr)
Definition: std_code.h:75
depth_iteratort depth_end()
Definition: expr.cpp:301
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced...
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const pointer_typet & to_pointer_type(const typet &type)
Cast a generic typet to a pointer_typet.
Definition: std_types.h:1459
goto_programt coverage_criteriont message_handlert & message_handler
Definition: cover.cpp:66
dstringt irep_idt
Definition: irep.h:32
void swap(symbol_tablet &other)
Definition: symbol_table.h:71
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
mstreamt & debug() const
Definition: message.h:332
A statement in a programming language.
Definition: std_code.h:21
void get_virtual_method_targets(const exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_tablet &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
idst get_children_trans(const irep_idt &id) const
operandst & operands()
Definition: expr.h:66
bool operator()(symbol_tablet &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter)
Uses a simple context-insensitive (&#39;ci&#39;) analysis to determine which methods may be reachable from th...
bool empty() const
Definition: dstring.h:73
irep_idt get_full_component_identifier() const
Get the full name of this function.
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See namespace_baset::lookup().
Definition: namespace.cpp:136
class_hierarchyt class_hierarchy
ci_lazy_methodst(const symbol_tablet &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< load_extra_methodst > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, message_handlert &message_handler, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:909
Produce code for simple implementation of String Java libraries.