cprover
Loading...
Searching...
No Matches
java_bytecode_convert_method.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: JAVA Bytecode Language Conversion
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
12#ifdef DEBUG
13#include <iostream>
14#endif
15
16#include "bytecode_info.h"
19#include "java_expr.h"
23#include "java_types.h"
24#include "java_utils.h"
25#include "lambda_synthesis.h"
26#include "pattern.h"
27
28#include <util/arith_tools.h>
29#include <util/bitvector_expr.h>
30#include <util/c_types.h>
32#include <util/floatbv_expr.h>
33#include <util/ieee_float.h>
34#include <util/invariant.h>
35#include <util/namespace.h>
36#include <util/prefix.h>
37#include <util/prefix_filter.h>
38#include <util/std_expr.h>
39#include <util/threeval.h>
40
42
44
45#include <algorithm>
46#include <limits>
47
61 java_method_typet &ftype,
62 const irep_idt &name_prefix,
63 symbol_table_baset &symbol_table)
64{
65 java_method_typet::parameterst &parameters = ftype.parameters();
66
67 // Mostly borrowed from java_bytecode_convert.cpp; maybe factor this out.
68 // assign names to parameters
69 for(std::size_t i=0; i<parameters.size(); ++i)
70 {
71 irep_idt base_name, identifier;
72
73 if(i==0 && parameters[i].get_this())
74 base_name = ID_this;
75 else
76 base_name="stub_ignored_arg"+std::to_string(i);
77
78 identifier=id2string(name_prefix)+"::"+id2string(base_name);
79 parameters[i].set_base_name(base_name);
80 parameters[i].set_identifier(identifier);
81
82 // add to symbol table
83 parameter_symbolt parameter_symbol;
84 parameter_symbol.base_name=base_name;
85 parameter_symbol.mode=ID_java;
86 parameter_symbol.name=identifier;
87 parameter_symbol.type=parameters[i].type();
88 symbol_table.add(parameter_symbol);
89 }
90}
91
93 const irep_idt &identifier,
94 const irep_idt &base_name,
95 const irep_idt &pretty_name,
96 const typet &type,
98 symbol_table_baset &symbol_table,
99 message_handlert &message_handler)
100{
101 messaget log(message_handler);
102
103 symbolt symbol;
104 symbol.name = identifier;
105 symbol.base_name = base_name;
106 symbol.pretty_name = pretty_name;
107 symbol.type = type;
108 symbol.type.set(ID_access, ID_private);
110 symbol.value.make_nil();
111 symbol.mode = ID_java;
113 to_java_method_type(symbol.type), symbol.name, symbol_table);
115
116 log.debug() << "Generating codet: new opaque symbol: method '" << symbol.name
117 << "'" << messaget::eom;
118 symbol_table.add(symbol);
119}
120
121static bool is_constructor(const irep_idt &method_name)
122{
123 return id2string(method_name).find("<init>") != std::string::npos;
124}
125
127{
128 if(stack.size()<n)
129 {
130 log.error() << "malformed bytecode (pop too high)" << messaget::eom;
131 throw 0;
132 }
133
134 exprt::operandst operands;
135 operands.resize(n);
136 for(std::size_t i=0; i<n; i++)
137 operands[i]=stack[stack.size()-n+i];
138
139 stack.resize(stack.size()-n);
140 return operands;
141}
142
145{
146 std::size_t residue_size=std::min(n, stack.size());
147
148 stack.resize(stack.size()-residue_size);
149}
150
152{
153 stack.resize(stack.size()+o.size());
154
155 for(std::size_t i=0; i<o.size(); i++)
156 stack[stack.size()-o.size()+i]=o[i];
157}
158
159// JVM program locations
161{
162 return "pc"+id2string(address);
163}
164
166 const std::string &prefix,
167 const typet &type)
168{
169 irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size());
170 irep_idt identifier=id2string(current_method)+"::"+id2string(base_name);
171
172 auxiliary_symbolt tmp_symbol;
173 tmp_symbol.base_name=base_name;
174 tmp_symbol.is_static_lifetime=false;
175 tmp_symbol.mode=ID_java;
176 tmp_symbol.name=identifier;
177 tmp_symbol.type=type;
178 symbol_table.add(tmp_symbol);
179
180 symbol_exprt result(identifier, type);
181 result.set(ID_C_base_name, base_name);
182 tmp_vars.push_back(result);
183
184 return result;
185}
186
199 const exprt &arg,
200 char type_char,
201 size_t address)
202{
203 const std::size_t number_int =
204 numeric_cast_v<std::size_t>(to_constant_expr(arg));
205 variablest &var_list=variables[number_int];
206
207 // search variable in list for correct frame / address if necessary
208 const variablet &var=
209 find_variable_for_slot(address, var_list);
210
211 if(!var.symbol_expr.get_identifier().empty())
212 return var.symbol_expr;
213
214 // an unnamed local variable
215 irep_idt base_name = "anonlocal::" + std::to_string(number_int) + type_char;
216 irep_idt identifier = id2string(current_method) + "::" + id2string(base_name);
217
218 symbol_exprt result(identifier, java_type_from_char(type_char));
219 result.set(ID_C_base_name, base_name);
220 used_local_names.insert(result);
221 return std::move(result);
222}
223
232 const std::string &descriptor,
233 const optionalt<std::string> &signature,
234 const std::string &class_name,
235 const std::string &method_name,
236 message_handlert &message_handler)
237{
238 // In order to construct the method type, we can either use signature or
239 // descriptor. Since only signature contains the generics info, we want to
240 // use signature whenever possible. We revert to using descriptor if (1) the
241 // signature does not exist, (2) an unsupported generics are present or
242 // (3) in the special case when the number of parameters in the descriptor
243 // does not match the number of parameters in the signature - e.g. for
244 // certain types of inner classes and enums (see unit tests for examples).
245
246 messaget message(message_handler);
247
248 auto member_type_from_descriptor = java_type_from_string(descriptor);
249 INVARIANT(
250 member_type_from_descriptor.has_value() &&
251 member_type_from_descriptor->id() == ID_code,
252 "Must be code type");
253 if(signature.has_value())
254 {
255 try
256 {
257 auto member_type_from_signature =
258 java_type_from_string(signature.value(), class_name);
259 INVARIANT(
260 member_type_from_signature.has_value() &&
261 member_type_from_signature->id() == ID_code,
262 "Must be code type");
263 if(
264 to_java_method_type(*member_type_from_signature).parameters().size() ==
265 to_java_method_type(*member_type_from_descriptor).parameters().size())
266 {
267 return to_java_method_type(*member_type_from_signature);
268 }
269 else
270 {
271 message.debug() << "Method: " << class_name << "." << method_name
272 << "\n signature: " << signature.value()
273 << "\n descriptor: " << descriptor
274 << "\n different number of parameters, reverting to "
275 "descriptor"
276 << message.eom;
277 }
278 }
280 {
281 message.debug() << "Method: " << class_name << "." << method_name
282 << "\n could not parse signature: " << signature.value()
283 << "\n " << e.what() << "\n"
284 << " reverting to descriptor: " << descriptor
285 << message.eom;
286 }
287 }
288 return to_java_method_type(*member_type_from_descriptor);
289}
290
302 symbolt &class_symbol,
303 const irep_idt &method_identifier,
305 symbol_tablet &symbol_table,
306 message_handlert &message_handler)
307{
308 symbolt method_symbol;
309
311 m.descriptor,
312 m.signature,
313 id2string(class_symbol.name),
315 message_handler);
316
317 method_symbol.name=method_identifier;
318 method_symbol.base_name=m.base_name;
319 method_symbol.mode=ID_java;
320 method_symbol.location=m.source_location;
321 method_symbol.location.set_function(method_identifier);
322
323 if(m.is_public)
324 member_type.set_access(ID_public);
325 else if(m.is_protected)
326 member_type.set_access(ID_protected);
327 else if(m.is_private)
328 member_type.set_access(ID_private);
329 else
330 member_type.set_access(ID_default);
331
332 if(m.is_synchronized)
333 member_type.set(ID_is_synchronized, true);
334 if(m.is_static)
335 member_type.set(ID_is_static, true);
336 member_type.set_native(m.is_native);
337 member_type.set_is_varargs(m.is_varargs);
338 member_type.set_is_synthetic(m.is_synthetic);
339
340 if(m.is_bridge)
341 member_type.set(ID_is_bridge_method, m.is_bridge);
342
343 // do we need to add 'this' as a parameter?
344 if(!m.is_static)
345 {
346 java_method_typet::parameterst &parameters = member_type.parameters();
347 const reference_typet object_ref_type =
349 java_method_typet::parametert this_p(object_ref_type);
350 this_p.set_this();
351 parameters.insert(parameters.begin(), this_p);
352 }
353
354 const std::string signature_string = pretty_signature(member_type);
355
356 if(is_constructor(method_symbol.base_name))
357 {
358 // we use full.class_name(...) as pretty name
359 // for constructors -- the idea is that they have
360 // an empty declarator.
361 method_symbol.pretty_name=
362 id2string(class_symbol.pretty_name) + signature_string;
363
364 member_type.set_is_constructor();
365 }
366 else
367 {
368 method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
369 id2string(m.base_name) + signature_string;
370 }
371
372 // Load annotations
373 if(!m.annotations.empty())
374 {
376 m.annotations,
377 type_checked_cast<annotated_typet>(static_cast<typet &>(member_type))
378 .get_annotations());
379 }
380
381 method_symbol.type=member_type;
382 // Not used in jbmc at present, but other codebases that use jbmc as a library
383 // use this information.
384 method_symbol.type.set(ID_C_abstract, m.is_abstract);
385 set_declaring_class(method_symbol, class_symbol.name);
386
388 m.annotations, "java::org.cprover.MustNotThrow"))
389 {
390 method_symbol.type.set(ID_C_must_not_throw, true);
391 }
392
393 // Assign names to parameters in the method symbol
394 java_method_typet &method_type = to_java_method_type(method_symbol.type);
395 method_type.set_is_final(m.is_final);
396 java_method_typet::parameterst &parameters = method_type.parameters();
398 java_method_parameter_slots(method_type);
400 m, method_identifier, parameters, slots_for_parameters);
401
402 symbol_table.add(method_symbol);
403
404 if(!m.is_static)
405 {
406 java_class_typet::methodt new_method{method_symbol.name, method_type};
407 new_method.set_base_name(method_symbol.base_name);
408 new_method.set_pretty_name(method_symbol.pretty_name);
409 new_method.set_access(member_type.get_access());
410 new_method.set_descriptor(m.descriptor);
411
412 to_java_class_type(class_symbol.type)
413 .methods()
414 .emplace_back(std::move(new_method));
415 }
416}
417
419 const irep_idt &class_identifier,
421{
422 return
423 id2string(class_identifier) + "." + id2string(method.name) + ":" +
424 method.descriptor;
425}
426
429 const irep_idt &method_identifier,
431 const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
432{
433 auto variables = variablest{};
434 // Find parameter names in the local variable table
435 // and store the result in the external variables vector
436 for(const auto &v : m.local_variable_table)
437 {
438 // Skip this variable if it is not a method parameter
439 if(v.index >= slots_for_parameters)
440 continue;
441
442 std::ostringstream id_oss;
443 id_oss << method_identifier << "::" << v.name;
444 irep_idt identifier(id_oss.str());
445 symbol_exprt result = symbol_exprt::typeless(identifier);
446 result.set(ID_C_base_name, v.name);
447
448 // Create a new variablet in the variables vector; in fact this entry will
449 // be rewritten below in the loop that iterates through the method
450 // parameters; the only field that seem to be useful to write here is the
451 // symbol_expr, others will be rewritten
452 variables[v.index].emplace_back(result, v.start_pc, v.length);
453 }
454
455 // The variables is a expanding_vectort, and the loop above may have expanded
456 // the vector introducing gaps where the entries are empty vectors. We now
457 // make sure that the vector of each LV slot contains exactly one variablet,
458 // which we will add below
459 std::size_t param_index = 0;
460 for(const auto &param : parameters)
461 {
462 INVARIANT(
463 variables[param_index].size() <= 1,
464 "should have at most one entry per index");
465 param_index += java_local_variable_slots(param.type());
466 }
467 INVARIANT(
468 param_index == slots_for_parameters,
469 "java_parameter_count and local computation must agree");
470 param_index = 0;
471 for(auto &param : parameters)
472 {
473 irep_idt base_name, identifier;
474
475 // Construct a sensible base name (base_name) and a fully qualified name
476 // (identifier) for every parameter of the method under translation,
477 // regardless of whether we have an LVT or not; and assign it to the
478 // parameter object (which is stored in the type of the symbol, not in the
479 // symbol table)
480 if(param_index == 0 && param.get_this())
481 {
482 // my.package.ClassName.myMethodName:(II)I::this
483 base_name = ID_this;
484 identifier = id2string(method_identifier) + "::" + id2string(base_name);
485 }
486 else if(!variables[param_index].empty())
487 {
488 // if already present in the LVT ...
489 base_name = variables[param_index][0].symbol_expr.get(ID_C_base_name);
490 identifier = variables[param_index][0].symbol_expr.get(ID_identifier);
491 }
492 else
493 {
494 // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
495 // variable slot where the parameter is stored and T is a character
496 // indicating the type
497 char suffix = java_char_from_type(param.type());
498 base_name = "arg" + std::to_string(param_index) + suffix;
499 identifier = id2string(method_identifier) + "::" + id2string(base_name);
500 }
501 param.set_base_name(base_name);
502 param.set_identifier(identifier);
503 param_index += java_local_variable_slots(param.type());
504 }
505 // The parameter slots detected in this function should agree with what
506 // java_parameter_count() thinks about this method
507 INVARIANT(
508 param_index == slots_for_parameters,
509 "java_parameter_count and local computation must agree");
510}
511
513 const java_method_typet::parameterst &parameters,
514 expanding_vectort<std::vector<java_bytecode_convert_methodt::variablet>>
515 &variables,
516 symbol_table_baset &symbol_table)
517{
518 std::size_t param_index = 0;
519 for(const auto &param : parameters)
520 {
521 parameter_symbolt parameter_symbol;
522 parameter_symbol.base_name = param.get_base_name();
523 parameter_symbol.mode = ID_java;
524 parameter_symbol.name = param.get_identifier();
525 parameter_symbol.type = param.type();
526 symbol_table.add(parameter_symbol);
527
528 // Add as a JVM local variable
529 variables[param_index].clear();
530 variables[param_index].emplace_back(
531 parameter_symbol.symbol_expr(),
532 0,
533 std::numeric_limits<size_t>::max(),
534 true);
535 param_index += java_local_variable_slots(param.type());
536 }
537}
538
540 const symbolt &class_symbol,
541 const methodt &m,
542 const optionalt<prefix_filtert> &method_context)
543{
544 // Construct the fully qualified method name
545 // (e.g. "my.package.ClassName.myMethodName:(II)I") and query the symbol table
546 // to retrieve the symbol (constructed by java_bytecode_convert_method_lazy)
547 // associated to the method
548 const irep_idt method_identifier =
549 get_method_identifier(class_symbol.name, m);
550
551 method_id = method_identifier;
553 symbol_table.get_writeable_ref(method_identifier), class_symbol.name);
554
555 // Obtain a std::vector of java_method_typet::parametert objects from the
556 // (function) type of the symbol
557 // Don't try to hang on to this reference into the symbol table, as we're
558 // about to create symbols for the method's parameters, which would invalidate
559 // the reference. Instead, copy the type here, set it up, then assign it back
560 // to the symbol later.
561 java_method_typet method_type =
563 method_type.set_is_final(m.is_final);
564 method_return_type = method_type.return_type();
565 java_method_typet::parameterst &parameters = method_type.parameters();
566
567 // Determine the number of local variable slots used by the JVM to maintain
568 // the formal parameters
570
571 log.debug() << "Generating codet: class " << class_symbol.name << ", method "
572 << m.name << messaget::eom;
573
574 // Add parameter symbols to the symbol table
576
577 symbolt &method_symbol = symbol_table.get_writeable_ref(method_identifier);
578
579 // Check the fields that can't change are valid
580 INVARIANT(
581 method_symbol.name == method_identifier,
582 "Name of method symbol shouldn't change");
583 INVARIANT(
584 method_symbol.base_name == m.base_name,
585 "Base name of method symbol shouldn't change");
586 INVARIANT(
587 method_symbol.module.empty(),
588 "Method symbol shouldn't have module");
589 // Update the symbol for the method
590 method_symbol.mode=ID_java;
591 method_symbol.location=m.source_location;
592 method_symbol.location.set_function(method_identifier);
593
594 for(const auto &exception_name : m.throws_exception_table)
595 method_type.add_throws_exception(exception_name);
596
597 const std::string signature_string = pretty_signature(method_type);
598
599 // Set up the pretty name for the method entry in the symbol table.
600 // The pretty name of a constructor includes the base name of the class
601 // instead of the internal method name "<init>". For regular methods, it's
602 // just the base name of the method.
603 if(is_constructor(method_symbol.base_name))
604 {
605 // we use full.class_name(...) as pretty name
606 // for constructors -- the idea is that they have
607 // an empty declarator.
608 method_symbol.pretty_name =
609 id2string(class_symbol.pretty_name) + signature_string;
610 INVARIANT(
611 method_type.get_is_constructor(),
612 "Member type should have already been marked as a constructor");
613 }
614 else
615 {
616 method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
617 id2string(m.base_name) + signature_string;
618 }
619
620 method_symbol.type = method_type;
621 current_method = method_symbol.name;
622 method_has_this = method_type.has_this();
623 if((!m.is_abstract) && (!m.is_native))
624 {
625 // Do not convert if method is not in context
626 if(!method_context || (*method_context)(id2string(method_identifier)))
627 {
628 code_blockt code{convert_parameter_annotations(m, method_type)};
630 method_symbol.value = std::move(code);
631 }
632 }
633}
634
635static irep_idt get_if_cmp_operator(const u1 bytecode)
636{
637 if(bytecode == patternt("if_?cmplt"))
638 return ID_lt;
639 if(bytecode == patternt("if_?cmple"))
640 return ID_le;
641 if(bytecode == patternt("if_?cmpgt"))
642 return ID_gt;
643 if(bytecode == patternt("if_?cmpge"))
644 return ID_ge;
645 if(bytecode == patternt("if_?cmpeq"))
646 return ID_equal;
647 if(bytecode == patternt("if_?cmpne"))
648 return ID_notequal;
649
650 throw "unhandled java comparison instruction";
651}
652
662 const exprt &pointer,
663 const fieldref_exprt &field_reference,
664 const namespacet &ns)
665{
666 struct_tag_typet class_type(field_reference.class_name());
667
668 const exprt typed_pointer =
670
671 const irep_idt &component_name = field_reference.component_name();
672
673 exprt accessed_object = checked_dereference(typed_pointer);
674 const auto type_of = [&ns](const exprt &object) {
675 return to_struct_type(ns.follow(object.type()));
676 };
677
678 // The field access is described as being against a particular type, but it
679 // may in fact belong to any ancestor type.
680 while(type_of(accessed_object).get_component(component_name).is_nil())
681 {
682 const auto components = type_of(accessed_object).components();
683 INVARIANT(
684 components.size() != 0,
685 "infer_opaque_type_fields should guarantee that a member access has a "
686 "corresponding field");
687
688 // Base class access is always done through the first component
689 accessed_object = member_exprt(accessed_object, components.front());
690 }
691 return member_exprt(
692 accessed_object, type_of(accessed_object).get_component(component_name));
693}
694
701 codet &repl,
702 const irep_idt &old_label,
703 const irep_idt &new_label)
704{
705 const auto &stmt=repl.get_statement();
706 if(stmt==ID_goto)
707 {
708 auto &g=to_code_goto(repl);
709 if(g.get_destination()==old_label)
710 g.set_destination(new_label);
711 }
712 else
713 {
714 for(auto &op : repl.operands())
715 if(op.id()==ID_code)
716 replace_goto_target(to_code(op), old_label, new_label);
717 }
718}
719
735 block_tree_nodet &tree,
736 code_blockt &this_block,
737 method_offsett address_start,
738 method_offsett address_limit,
739 method_offsett next_block_start_address)
740{
741 address_mapt dummy;
743 tree,
744 this_block,
745 address_start,
746 address_limit,
747 next_block_start_address,
748 dummy,
749 false);
750}
751
772 block_tree_nodet &tree,
773 code_blockt &this_block,
774 method_offsett address_start,
775 method_offsett address_limit,
776 method_offsett next_block_start_address,
777 const address_mapt &amap,
778 bool allow_merge)
779{
780 // Check the tree shape invariant:
781 assert(tree.branch.size()==tree.branch_addresses.size());
782
783 // If there are no child blocks, return this.
784 if(tree.leaf)
785 return this_block;
786 assert(!tree.branch.empty());
787
788 // Find child block starting > address_start:
789 const auto afterstart=
790 std::upper_bound(
791 tree.branch_addresses.begin(),
792 tree.branch_addresses.end(),
793 address_start);
794 assert(afterstart!=tree.branch_addresses.begin());
795 auto findstart=afterstart;
796 --findstart;
797 auto child_offset=
798 std::distance(tree.branch_addresses.begin(), findstart);
799
800 // Find child block starting >= address_limit:
801 auto findlim=
802 std::lower_bound(
803 tree.branch_addresses.begin(),
804 tree.branch_addresses.end(),
805 address_limit);
806 const auto findlim_block_start_address =
807 findlim == tree.branch_addresses.end() ? next_block_start_address
808 : (*findlim);
809
810 // If all children are in scope, return this.
811 if(findstart==tree.branch_addresses.begin() &&
812 findlim==tree.branch_addresses.end())
813 return this_block;
814
815 // Find the child code_blockt where the queried range begins:
816 auto child_iter = this_block.statements().begin();
817 // Skip any top-of-block declarations;
818 // all other children are labelled subblocks.
819 while(child_iter != this_block.statements().end() &&
820 child_iter->get_statement() == ID_decl)
821 ++child_iter;
822 assert(child_iter != this_block.statements().end());
823 std::advance(child_iter, child_offset);
824 assert(child_iter != this_block.statements().end());
825 auto &child_label=to_code_label(*child_iter);
826 auto &child_block=to_code_block(child_label.code());
827
828 bool single_child(afterstart==findlim);
829 if(single_child)
830 {
831 // Range wholly contained within a child block
833 tree.branch[child_offset],
834 child_block,
835 address_start,
836 address_limit,
837 findlim_block_start_address,
838 amap,
839 allow_merge);
840 }
841
842 // Otherwise we're being asked for a range of subblocks, but not all of them.
843 // If it's legal to draw a new lexical scope around the requested subset,
844 // do so; otherwise just return this block.
845
846 // This can be a new lexical scope if all incoming edges target the
847 // new block header, or come from within the suggested new block.
848
849 // If modifying the block tree is forbidden, give up and return this:
850 if(!allow_merge)
851 return this_block;
852
853 // Check for incoming control-flow edges targeting non-header
854 // blocks of the new proposed block range:
855 auto checkit=amap.find(*findstart);
856 assert(checkit!=amap.end());
857 ++checkit; // Skip the header, which can have incoming edges from outside.
858 for(;
859 checkit!=amap.end() && (checkit->first)<(findlim_block_start_address);
860 ++checkit)
861 {
862 for(auto p : checkit->second.predecessors)
863 {
864 if(p<(*findstart) || p>=findlim_block_start_address)
865 {
866 log.debug() << "Generating codet: "
867 << "warning: refusing to create lexical block spanning "
868 << (*findstart) << "-" << findlim_block_start_address
869 << " due to incoming edge " << p << " -> " << checkit->first
870 << messaget::eom;
871 return this_block;
872 }
873 }
874 }
875
876 // All incoming edges are acceptable! Create a new block wrapping
877 // the relevant children. Borrow the header block's label, and redirect
878 // any block-internal edges to target the inner header block.
879
880 const irep_idt child_label_name=child_label.get_label();
881 std::string new_label_str = id2string(child_label_name);
882 new_label_str+='$';
883 irep_idt new_label_irep(new_label_str);
884
885 code_labelt newlabel(child_label_name, code_blockt());
886 code_blockt &newblock=to_code_block(newlabel.code());
887 auto nblocks=std::distance(findstart, findlim);
888 assert(nblocks>=2);
889 log.debug() << "Generating codet: combining "
890 << std::distance(findstart, findlim) << " blocks for addresses "
891 << (*findstart) << "-" << findlim_block_start_address
892 << messaget::eom;
893
894 // Make a new block containing every child of interest:
895 auto &this_block_children = this_block.statements();
896 assert(tree.branch.size()==this_block_children.size());
897 for(auto blockidx=child_offset, blocklim=child_offset+nblocks;
898 blockidx!=blocklim;
899 ++blockidx)
900 newblock.add(this_block_children[blockidx]);
901
902 // Relabel the inner header:
903 to_code_label(newblock.statements()[0]).set_label(new_label_irep);
904 // Relabel internal gotos:
905 replace_goto_target(newblock, child_label_name, new_label_irep);
906
907 // Remove the now-empty sibling blocks:
908 auto delfirst=this_block_children.begin();
909 std::advance(delfirst, child_offset+1);
910 auto dellim=delfirst;
911 std::advance(dellim, nblocks-1);
912 this_block_children.erase(delfirst, dellim);
913 this_block_children[child_offset].swap(newlabel);
914
915 // Perform the same transformation on the index tree:
916 block_tree_nodet newnode;
917 auto branchstart=tree.branch.begin();
918 std::advance(branchstart, child_offset);
919 auto branchlim=branchstart;
920 std::advance(branchlim, nblocks);
921 for(auto branchiter=branchstart; branchiter!=branchlim; ++branchiter)
922 newnode.branch.push_back(std::move(*branchiter));
923 ++branchstart;
924 tree.branch.erase(branchstart, branchlim);
925
926 assert(tree.branch.size()==this_block_children.size());
927
928 auto branchaddriter=tree.branch_addresses.begin();
929 std::advance(branchaddriter, child_offset);
930 auto branchaddrlim=branchaddriter;
931 std::advance(branchaddrlim, nblocks);
932 newnode.branch_addresses.insert(
933 newnode.branch_addresses.begin(),
934 branchaddriter,
935 branchaddrlim);
936
937 ++branchaddriter;
938 tree.branch_addresses.erase(branchaddriter, branchaddrlim);
939
940 tree.branch[child_offset]=std::move(newnode);
941
942 assert(tree.branch.size()==tree.branch_addresses.size());
943
944 return
947 this_block_children[child_offset]).code());
948}
949
952 const exprt &e,
953 std::map<irep_idt, java_bytecode_convert_methodt::variablet> &result)
954{
955 if(e.id()==ID_symbol)
956 {
957 const auto &symexpr=to_symbol_expr(e);
958 auto findit = result.emplace(
959 std::piecewise_construct,
960 std::forward_as_tuple(symexpr.get_identifier()),
961 std::forward_as_tuple(symexpr, pc, 1));
962 if(!findit.second)
963 {
964 auto &var = findit.first->second;
965
966 if(pc<var.start_pc)
967 {
968 var.length+=(var.start_pc-pc);
969 var.start_pc=pc;
970 }
971 else
972 {
973 var.length=std::max(var.length, (pc-var.start_pc)+1);
974 }
975 }
976 }
977 else
978 {
979 forall_operands(it, e)
980 gather_symbol_live_ranges(pc, *it, result);
981 }
982}
983
990 const irep_idt &classname)
991{
992 auto findit = symbol_table.symbols.find(clinit_wrapper_name(classname));
993 if(findit == symbol_table.symbols.end())
994 return code_skipt();
995 else
996 {
997 const code_function_callt ret(findit->second.symbol_expr());
999 needed_lazy_methods->add_needed_method(findit->second.name);
1000 return ret;
1001 }
1002}
1003
1004static std::size_t get_bytecode_type_width(const typet &ty)
1005{
1006 if(ty.id()==ID_pointer)
1007 return 32;
1008 return to_bitvector_type(ty).get_width();
1009}
1010
1012 const methodt &method,
1013 const java_method_typet &method_type)
1014{
1015 code_blockt code;
1016
1017 // Consider parameter annotations
1018 const java_method_typet::parameterst &parameters(method_type.parameters());
1019 std::size_t param_index = method_type.has_this() ? 1 : 0;
1021 parameters.size() >= method.parameter_annotations.size() + param_index,
1022 "parameters and parameter annotations mismatch");
1023 for(const auto &param_annotations : method.parameter_annotations)
1024 {
1025 // NotNull annotations are not standardized. We support these:
1026 if(
1028 param_annotations, "java::javax.validation.constraints.NotNull") ||
1030 param_annotations, "java::org.jetbrains.annotations.NotNull") ||
1032 param_annotations, "org.eclipse.jdt.annotation.NonNull") ||
1034 param_annotations, "java::edu.umd.cs.findbugs.annotations.NonNull"))
1035 {
1036 const irep_idt &param_identifier =
1037 parameters[param_index].get_identifier();
1038 const symbolt &param_symbol = symbol_table.lookup_ref(param_identifier);
1039 const auto param_type =
1040 type_try_dynamic_cast<pointer_typet>(param_symbol.type);
1041 if(param_type)
1042 {
1043 code_assertt code_assert(notequal_exprt(
1044 param_symbol.symbol_expr(), null_pointer_exprt(*param_type)));
1045 source_locationt check_loc = method.source_location;
1046 check_loc.set_comment("Not null annotation check");
1047 check_loc.set_property_class("not-null-annotation-check");
1048 code_assert.add_source_location() = check_loc;
1049
1050 code.add(std::move(code_assert));
1051 }
1052 }
1053 ++param_index;
1054 }
1055
1056 return code;
1057}
1058
1061{
1062 const instructionst &instructions=method.instructions;
1063
1064 // Run a worklist algorithm, assuming that the bytecode has not
1065 // been tampered with. See "Leroy, X. (2003). Java bytecode
1066 // verification: algorithms and formalizations. Journal of Automated
1067 // Reasoning, 30(3-4), 235-269." for a more complete treatment.
1068
1069 // first pass: get targets and map addresses to instructions
1070
1071 address_mapt address_map;
1072 std::set<method_offsett> targets;
1073
1074 std::vector<method_offsett> jsr_ret_targets;
1075 std::vector<instructionst::const_iterator> ret_instructions;
1076
1077 for(auto i_it = instructions.begin(); i_it != instructions.end(); i_it++)
1078 {
1080 std::pair<address_mapt::iterator, bool> a_entry=
1081 address_map.insert(std::make_pair(i_it->address, ins));
1082 assert(a_entry.second);
1083 // addresses are strictly increasing, hence we must have inserted
1084 // a new maximal key
1085 assert(a_entry.first==--address_map.end());
1086
1087 const auto bytecode = i_it->bytecode;
1088 const std::string statement = bytecode_info[i_it->bytecode].mnemonic;
1089
1090 // clang-format off
1091 if(bytecode != BC_goto &&
1092 bytecode != BC_return &&
1093 bytecode != patternt("?return") &&
1094 bytecode != BC_athrow &&
1095 bytecode != BC_jsr &&
1096 bytecode != BC_jsr_w &&
1097 bytecode != BC_ret)
1098 {
1099 // clang-format on
1100 instructionst::const_iterator next=i_it;
1101 if(++next!=instructions.end())
1102 a_entry.first->second.successors.push_back(next->address);
1103 }
1104
1105 // clang-format off
1106 if(bytecode == BC_athrow ||
1107 bytecode == BC_putfield ||
1108 bytecode == BC_getfield ||
1109 bytecode == BC_checkcast ||
1110 bytecode == BC_newarray ||
1111 bytecode == BC_anewarray ||
1112 bytecode == BC_idiv ||
1113 bytecode == BC_ldiv ||
1114 bytecode == BC_irem ||
1115 bytecode == BC_lrem ||
1116 bytecode == patternt("?astore") ||
1117 bytecode == patternt("?aload") ||
1118 bytecode == BC_invokestatic ||
1119 bytecode == BC_invokevirtual ||
1120 bytecode == BC_invokespecial ||
1121 bytecode == BC_invokeinterface ||
1123 (bytecode == BC_monitorenter || bytecode == BC_monitorexit)))
1124 {
1125 // clang-format on
1126 const std::vector<method_offsett> handler =
1127 try_catch_handler(i_it->address, method.exception_table);
1128 std::list<method_offsett> &successors = a_entry.first->second.successors;
1129 successors.insert(successors.end(), handler.begin(), handler.end());
1130 targets.insert(handler.begin(), handler.end());
1131 }
1132
1133 // clang-format off
1134 if(bytecode == BC_goto ||
1135 bytecode == patternt("if_?cmp??") ||
1136 bytecode == patternt("if??") ||
1137 bytecode == BC_ifnonnull ||
1138 bytecode == BC_ifnull ||
1139 bytecode == BC_jsr ||
1140 bytecode == BC_jsr_w)
1141 {
1142 // clang-format on
1143 PRECONDITION(!i_it->args.empty());
1144
1145 auto target = numeric_cast_v<unsigned>(to_constant_expr(i_it->args[0]));
1146 targets.insert(target);
1147
1148 a_entry.first->second.successors.push_back(target);
1149
1150 if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1151 {
1152 auto next = std::next(i_it);
1154 next != instructions.end(), "jsr should have valid return address");
1155 targets.insert(next->address);
1156 jsr_ret_targets.push_back(next->address);
1157 }
1158 }
1159 else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1160 {
1161 bool is_label=true;
1162 for(const auto &arg : i_it->args)
1163 {
1164 if(is_label)
1165 {
1166 auto target = numeric_cast_v<unsigned>(to_constant_expr(arg));
1167 targets.insert(target);
1168 a_entry.first->second.successors.push_back(target);
1169 }
1170 is_label=!is_label;
1171 }
1172 }
1173 else if(bytecode == BC_ret)
1174 {
1175 // Finish these later, once we've seen all jsr instructions.
1176 ret_instructions.push_back(i_it);
1177 }
1178 }
1179 draw_edges_from_ret_to_jsr(address_map, jsr_ret_targets, ret_instructions);
1180
1181 for(const auto &address : address_map)
1182 {
1183 for(auto s : address.second.successors)
1184 {
1185 const auto a_it = address_map.find(s);
1186 CHECK_RETURN(a_it != address_map.end());
1187 a_it->second.predecessors.insert(address.first);
1188 }
1189 }
1190
1191 // Clean the list of temporary variables created by a call to `tmp_variable`.
1192 // These are local variables in the goto function used to represent temporary
1193 // values of the JVM operand stack, newly allocated objects before the
1194 // constructor is called, ...
1195 tmp_vars.clear();
1196
1197 // Now that the control flow graph is built, set up our local variables
1198 // (these require the graph to determine live ranges)
1199 setup_local_variables(method, address_map);
1200
1201 std::set<method_offsett> working_set;
1202
1203 if(!instructions.empty())
1204 working_set.insert(instructions.front().address);
1205
1206 while(!working_set.empty())
1207 {
1208 auto cur = working_set.begin();
1209 auto address_it = address_map.find(*cur);
1210 CHECK_RETURN(address_it != address_map.end());
1211 auto &instruction = address_it->second;
1212 const method_offsett cur_pc = *cur;
1213 working_set.erase(cur);
1214
1215 if(instruction.done)
1216 continue;
1217 working_set.insert(
1218 instruction.successors.begin(), instruction.successors.end());
1219
1220 instructionst::const_iterator i_it = instruction.source;
1221 stack.swap(instruction.stack);
1222 instruction.stack.clear();
1223 codet &c = instruction.code;
1224
1225 assert(
1226 stack.empty() || instruction.predecessors.size() <= 1 ||
1227 has_prefix(stack.front().get_string(ID_C_base_name), "$stack"));
1228
1229 exprt arg0=i_it->args.size()>=1?i_it->args[0]:nil_exprt();
1230 exprt arg1=i_it->args.size()>=2?i_it->args[1]:nil_exprt();
1231
1232 const auto bytecode = i_it->bytecode;
1233 const bytecode_infot &stmt_bytecode_info = bytecode_info[i_it->bytecode];
1234 const std::string statement = stmt_bytecode_info.mnemonic;
1235
1236 // deal with _idx suffixes
1237 if(statement.size()>=2 &&
1238 statement[statement.size()-2]=='_' &&
1239 isdigit(statement[statement.size()-1]))
1240 {
1241 arg0=
1244 std::string(id2string(statement), statement.size()-1, 1)),
1245 java_int_type());
1246 }
1247
1248 typet catch_type;
1249
1250 // Find catch blocks that begin here. For now we assume if more than
1251 // one catch targets the same bytecode then we must be indifferent to
1252 // its type and just call it a Throwable.
1253 auto it=method.exception_table.begin();
1254 for(; it!=method.exception_table.end(); ++it)
1255 {
1256 if(cur_pc==it->handler_pc)
1257 {
1258 if(
1259 catch_type != typet() ||
1260 it->catch_type == struct_tag_typet(irep_idt()))
1261 {
1262 catch_type = struct_tag_typet("java::java.lang.Throwable");
1263 break;
1264 }
1265 else
1266 catch_type=it->catch_type;
1267 }
1268 }
1269
1270 optionalt<codet> catch_instruction;
1271
1272 if(catch_type!=typet())
1273 {
1274 // at the beginning of a handler, clear the stack and
1275 // push the corresponding exceptional return variable
1276 // We also create a catch exception instruction that
1277 // precedes the catch block, and which remove_exceptionst
1278 // will transform into something like:
1279 // catch_var = GLOBAL_THROWN_EXCEPTION;
1280 // GLOBAL_THROWN_EXCEPTION = null;
1281 stack.clear();
1282 symbol_exprt catch_var=
1284 "caught_exception",
1285 java_reference_type(catch_type));
1286 stack.push_back(catch_var);
1287 catch_instruction = code_landingpadt(catch_var);
1288 }
1289
1290 exprt::operandst op = pop(stmt_bytecode_info.pop);
1291 exprt::operandst results;
1292 results.resize(stmt_bytecode_info.push, nil_exprt());
1293
1294 if(bytecode == BC_aconst_null)
1295 {
1296 assert(results.size()==1);
1298 }
1299 else if(bytecode == BC_athrow)
1300 {
1301 PRECONDITION(op.size() == 1 && results.size() == 1);
1302 convert_athrow(i_it->source_location, op, c, results);
1303 }
1304 else if(bytecode == BC_checkcast)
1305 {
1306 // checkcast throws an exception in case a cast of object
1307 // on stack to given type fails.
1308 // The stack isn't modified.
1309 PRECONDITION(op.size() == 1 && results.size() == 1);
1310 convert_checkcast(arg0, op, c, results);
1311 }
1312 else if(bytecode == BC_invokedynamic)
1313 {
1314 if(
1315 const auto res =
1316 convert_invoke_dynamic(i_it->source_location, i_it->address, arg0, c))
1317 {
1318 results.resize(1);
1319 results[0] = *res;
1320 }
1321 }
1322 else if(
1323 bytecode == BC_invokestatic && id2string(arg0.get(ID_identifier)) ==
1324 "java::org.cprover.CProver.assume:(Z)V")
1325 {
1326 const java_method_typet &method_type = to_java_method_type(arg0.type());
1327 INVARIANT(
1328 method_type.parameters().size() == 1,
1329 "function expected to have exactly one parameter");
1330 c = replace_call_to_cprover_assume(i_it->source_location, c);
1331 }
1332 // replace calls to CProver.atomicBegin
1333 else if(
1334 bytecode == BC_invokestatic &&
1335 arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicBegin:()V")
1336 {
1337 c = codet(ID_atomic_begin);
1338 }
1339 // replace calls to CProver.atomicEnd
1340 else if(
1341 bytecode == BC_invokestatic &&
1342 arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicEnd:()V")
1343 {
1344 c = codet(ID_atomic_end);
1345 }
1346 else if(
1347 bytecode == BC_invokeinterface || bytecode == BC_invokespecial ||
1348 bytecode == BC_invokevirtual || bytecode == BC_invokestatic)
1349 {
1350 class_method_descriptor_exprt *class_method_descriptor =
1351 expr_try_dynamic_cast<class_method_descriptor_exprt>(arg0);
1352
1353 INVARIANT(
1354 class_method_descriptor,
1355 "invokeinterface, invokespecial, invokevirtual and invokestatic should "
1356 "be called with a class method descriptor expression as arg0");
1357
1359 i_it->source_location, statement, *class_method_descriptor, c, results);
1360 }
1361 else if(bytecode == BC_return)
1362 {
1363 PRECONDITION(op.empty() && results.empty());
1365 }
1366 else if(bytecode == patternt("?return"))
1367 {
1368 // Return types are promoted in java, so this might need
1369 // conversion.
1370 PRECONDITION(op.size() == 1 && results.empty());
1371 const exprt r =
1374 }
1375 else if(bytecode == patternt("?astore"))
1376 {
1377 PRECONDITION(results.empty());
1378 c = convert_astore(statement, op, i_it->source_location);
1379 }
1380 else if(bytecode == patternt("?store") || bytecode == patternt("?store_?"))
1381 {
1382 // store value into some local variable
1383 PRECONDITION(op.size() == 1 && results.empty());
1384 c = convert_store(
1385 statement, arg0, op, i_it->address, i_it->source_location);
1386 }
1387 else if(bytecode == patternt("?aload"))
1388 {
1389 PRECONDITION(results.size() == 1);
1390 results[0] = convert_aload(statement, op);
1391 }
1392 else if(bytecode == patternt("?load") || bytecode == patternt("?load_?"))
1393 {
1394 // load a value from a local variable
1395 results[0] = convert_load(arg0, statement[0], i_it->address);
1396 }
1397 else if(bytecode == BC_ldc || bytecode == BC_ldc_w || bytecode == BC_ldc2_w)
1398 {
1399 PRECONDITION(op.empty() && results.size() == 1);
1400
1401 INVARIANT(
1402 !can_cast_expr<java_string_literal_exprt>(arg0) && arg0.id() != ID_type,
1403 "String and Class literals should have been lowered in "
1404 "generate_constant_global_variables");
1405
1406 results[0] = arg0;
1407 }
1408 else if(bytecode == BC_goto || bytecode == BC_goto_w)
1409 {
1410 PRECONDITION(op.empty() && results.empty());
1411 const mp_integer number =
1412 numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1413 code_gotot code_goto(label(integer2string(number)));
1414 c=code_goto;
1415 }
1416 else if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1417 {
1418 // As 'goto', except we must also push the subroutine return address:
1419 PRECONDITION(op.empty() && results.size() == 1);
1420 const mp_integer number =
1421 numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1422 code_gotot code_goto(label(integer2string(number)));
1423 c=code_goto;
1424 results[0]=
1426 std::next(i_it)->address,
1427 unsignedbv_typet(64));
1428 results[0].type() = pointer_type(java_void_type());
1429 }
1430 else if(bytecode == BC_ret)
1431 {
1432 // Since we have a bounded target set, make life easier on our analyses
1433 // and write something like:
1434 // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ...
1435 PRECONDITION(op.empty() && results.empty());
1436 assert(!jsr_ret_targets.empty());
1437 c = convert_ret(
1438 jsr_ret_targets, arg0, i_it->source_location, i_it->address);
1439 }
1440 else if(bytecode == BC_iconst_m1)
1441 {
1442 assert(results.size()==1);
1443 results[0]=from_integer(-1, java_int_type());
1444 }
1445 else if(bytecode == patternt("?const_?"))
1446 {
1447 assert(results.size()==1);
1448 results = convert_const(statement, to_constant_expr(arg0), results);
1449 }
1450 else if(bytecode == patternt("?ipush"))
1451 {
1452 PRECONDITION(results.size()==1);
1454 arg0.id()==ID_constant,
1455 "ipush argument expected to be constant");
1456 results[0] = typecast_exprt::conditional_cast(arg0, java_int_type());
1457 }
1458 else if(bytecode == patternt("if_?cmp??"))
1459 {
1460 PRECONDITION(op.size() == 2 && results.empty());
1461 const mp_integer number =
1462 numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1463 c = convert_if_cmp(
1464 address_map, bytecode, op, number, i_it->source_location);
1465 }
1466 else if(bytecode == patternt("if??"))
1467 {
1468 // clang-format off
1469 const irep_idt id=
1470 bytecode == BC_ifeq ? ID_equal :
1471 bytecode == BC_ifne ? ID_notequal :
1472 bytecode == BC_iflt ? ID_lt :
1473 bytecode == BC_ifge ? ID_ge :
1474 bytecode == BC_ifgt ? ID_gt :
1475 bytecode == BC_ifle ? ID_le :
1476 irep_idt();
1477 // clang-format on
1478
1479 INVARIANT(!id.empty(), "unexpected bytecode-if");
1480 PRECONDITION(op.size() == 1 && results.empty());
1481 const mp_integer number =
1482 numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1483 c = convert_if(address_map, op, id, number, i_it->source_location);
1484 }
1485 else if(bytecode == patternt("ifnonnull"))
1486 {
1487 PRECONDITION(op.size() == 1 && results.empty());
1488 const mp_integer number =
1489 numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1490 c = convert_ifnonull(address_map, op, number, i_it->source_location);
1491 }
1492 else if(bytecode == patternt("ifnull"))
1493 {
1494 PRECONDITION(op.size() == 1 && results.empty());
1495 const mp_integer number =
1496 numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1497 c = convert_ifnull(address_map, op, number, i_it->source_location);
1498 }
1499 else if(bytecode == BC_iinc)
1500 {
1501 c = convert_iinc(arg0, arg1, i_it->source_location, i_it->address);
1502 }
1503 else if(bytecode == patternt("?xor"))
1504 {
1505 PRECONDITION(op.size() == 2 && results.size() == 1);
1506 results[0]=bitxor_exprt(op[0], op[1]);
1507 }
1508 else if(bytecode == patternt("?or"))
1509 {
1510 PRECONDITION(op.size() == 2 && results.size() == 1);
1511 results[0]=bitor_exprt(op[0], op[1]);
1512 }
1513 else if(bytecode == patternt("?and"))
1514 {
1515 PRECONDITION(op.size() == 2 && results.size() == 1);
1516 results[0]=bitand_exprt(op[0], op[1]);
1517 }
1518 else if(bytecode == patternt("?shl"))
1519 {
1520 PRECONDITION(op.size() == 2 && results.size() == 1);
1521 results = convert_shl(statement, op, results);
1522 }
1523 else if(bytecode == patternt("?shr"))
1524 {
1525 PRECONDITION(op.size() == 2 && results.size() == 1);
1526 results[0]=ashr_exprt(op[0], op[1]);
1527 }
1528 else if(bytecode == patternt("?ushr"))
1529 {
1530 PRECONDITION(op.size() == 2 && results.size() == 1);
1531 results = convert_ushr(statement, op, results);
1532 }
1533 else if(bytecode == patternt("?add"))
1534 {
1535 PRECONDITION(op.size() == 2 && results.size() == 1);
1536 results[0]=plus_exprt(op[0], op[1]);
1537 }
1538 else if(bytecode == patternt("?sub"))
1539 {
1540 PRECONDITION(op.size() == 2 && results.size() == 1);
1541 results[0]=minus_exprt(op[0], op[1]);
1542 }
1543 else if(bytecode == patternt("?div"))
1544 {
1545 PRECONDITION(op.size() == 2 && results.size() == 1);
1546 results[0]=div_exprt(op[0], op[1]);
1547 }
1548 else if(bytecode == patternt("?mul"))
1549 {
1550 PRECONDITION(op.size() == 2 && results.size() == 1);
1551 results[0]=mult_exprt(op[0], op[1]);
1552 }
1553 else if(bytecode == patternt("?neg"))
1554 {
1555 PRECONDITION(op.size() == 1 && results.size() == 1);
1556 results[0]=unary_minus_exprt(op[0], op[0].type());
1557 }
1558 else if(bytecode == patternt("?rem"))
1559 {
1560 PRECONDITION(op.size() == 2 && results.size() == 1);
1561 // This is _not_ the remainder operation defined by IEEE 754,
1562 // but similar to the fmod C library function.
1563 if(bytecode == BC_frem || bytecode == BC_drem)
1564 results[0] = binary_exprt(op[0], ID_floatbv_mod, op[1]);
1565 else
1566 results[0]=mod_exprt(op[0], op[1]);
1567 }
1568 else if(bytecode == patternt("?cmp"))
1569 {
1570 PRECONDITION(op.size() == 2 && results.size() == 1);
1571 results = convert_cmp(op, results);
1572 }
1573 else if(bytecode == patternt("?cmp?"))
1574 {
1575 PRECONDITION(op.size() == 2 && results.size() == 1);
1576 results = convert_cmp2(statement, op, results);
1577 }
1578 else if(bytecode == patternt("?cmpl"))
1579 {
1580 PRECONDITION(op.size() == 2 && results.size() == 1);
1581 results[0]=binary_relation_exprt(op[0], ID_lt, op[1]);
1582 }
1583 else if(bytecode == BC_dup)
1584 {
1585 PRECONDITION(op.size() == 1 && results.size() == 2);
1586 results[0]=results[1]=op[0];
1587 }
1588 else if(bytecode == BC_dup_x1)
1589 {
1590 PRECONDITION(op.size() == 2 && results.size() == 3);
1591 results[0]=op[1];
1592 results[1]=op[0];
1593 results[2]=op[1];
1594 }
1595 else if(bytecode == BC_dup_x2)
1596 {
1597 PRECONDITION(op.size() == 3 && results.size() == 4);
1598 results[0]=op[2];
1599 results[1]=op[0];
1600 results[2]=op[1];
1601 results[3]=op[2];
1602 }
1603 // dup2* behaviour depends on the size of the operands on the
1604 // stack
1605 else if(bytecode == BC_dup2)
1606 {
1607 PRECONDITION(!stack.empty() && results.empty());
1608 convert_dup2(op, results);
1609 }
1610 else if(bytecode == BC_dup2_x1)
1611 {
1612 PRECONDITION(!stack.empty() && results.empty());
1613 convert_dup2_x1(op, results);
1614 }
1615 else if(bytecode == BC_dup2_x2)
1616 {
1617 PRECONDITION(!stack.empty() && results.empty());
1618 convert_dup2_x2(op, results);
1619 }
1620 else if(bytecode == BC_getfield)
1621 {
1622 PRECONDITION(op.size() == 1 && results.size() == 1);
1623 results[0] = java_bytecode_promotion(
1624 to_member(op[0], expr_dynamic_cast<fieldref_exprt>(arg0), ns));
1625 }
1626 else if(bytecode == BC_getstatic)
1627 {
1628 PRECONDITION(op.empty() && results.size() == 1);
1629 const auto &field_name=arg0.get_string(ID_component_name);
1630 const bool is_assertions_disabled_field=
1631 field_name.find("$assertionsDisabled")!=std::string::npos;
1632
1633 const irep_idt field_id(
1634 get_static_field(arg0.get_string(ID_class), field_name));
1635
1636 // Symbol should have been populated by java_bytecode_convert_class:
1637 const symbol_exprt symbol_expr(
1638 symbol_table.lookup_ref(field_id).symbol_expr());
1639
1641 i_it->source_location,
1642 arg0,
1643 symbol_expr,
1644 is_assertions_disabled_field,
1645 c,
1646 results);
1647 }
1648 else if(bytecode == BC_putfield)
1649 {
1650 PRECONDITION(op.size() == 2 && results.empty());
1651 c = convert_putfield(expr_dynamic_cast<fieldref_exprt>(arg0), op);
1652 }
1653 else if(bytecode == BC_putstatic)
1654 {
1655 PRECONDITION(op.size() == 1 && results.empty());
1656 const auto &field_name=arg0.get_string(ID_component_name);
1657
1658 const irep_idt field_id(
1659 get_static_field(arg0.get_string(ID_class), field_name));
1660
1661 // Symbol should have been populated by java_bytecode_convert_class:
1662 const symbol_exprt symbol_expr(
1663 symbol_table.lookup_ref(field_id).symbol_expr());
1664
1665 c = convert_putstatic(i_it->source_location, arg0, op, symbol_expr);
1666 }
1667 else if(
1668 bytecode == BC_f2i || bytecode == BC_f2l || bytecode == BC_d2i ||
1669 bytecode == BC_d2l)
1670 {
1671 PRECONDITION(op.size() == 1 && results.size() == 1);
1672 typet src_type = java_type_from_char(statement[0]);
1673 typet dest_type = java_type_from_char(statement[2]);
1674
1675 // See JLS 5.1.3. Narrowing Primitive Conversion
1676 // +-NaN is converted to 0
1677 // +-Inf resp. values beyond the int/long range
1678 // are mapped to max/min of int/long.
1679 // Other values are rounded towards zero
1680
1681 // for int: 2147483647, for long: 9223372036854775807L
1682 exprt largest_as_dest =
1684
1685 // 2147483647 is not exactly representable in float;
1686 // it will be rounded up to 2147483648, which is fine.
1687 // 9223372036854775807L is not exactly representable
1688 // neither in float nor in double; it is rounded up to
1689 // 9223372036854775808.0, which is fine.
1690 exprt largest_as_src =
1691 from_integer(to_integer_bitvector_type(dest_type).largest(), src_type);
1692
1693 // for int: -2147483648, for long: -9223372036854775808L
1694 exprt smallest_as_dest =
1696
1697 // -2147483648 and -9223372036854775808L are exactly
1698 // representable in float and double.
1699 exprt smallest_as_src =
1700 from_integer(to_integer_bitvector_type(dest_type).smallest(), src_type);
1701
1702 results[0] = if_exprt(
1703 binary_relation_exprt(op[0], ID_le, smallest_as_src),
1704 smallest_as_dest,
1705 if_exprt(
1706 binary_relation_exprt(op[0], ID_ge, largest_as_src),
1707 largest_as_dest,
1708 typecast_exprt::conditional_cast(op[0], dest_type)));
1709 }
1710 else if(bytecode == patternt("?2?")) // i2c etc.
1711 {
1712 PRECONDITION(op.size() == 1 && results.size() == 1);
1713 typet type=java_type_from_char(statement[2]);
1714 results[0] = typecast_exprt::conditional_cast(op[0], type);
1715
1716 // These types get converted/truncated then immediately turned back into
1717 // ints again, so we just double-cast here.
1718 if(
1719 type == java_char_type() || type == java_byte_type() ||
1720 type == java_short_type())
1721 {
1722 results[0] = typecast_exprt(results[0], java_int_type());
1723 }
1724 }
1725 else if(bytecode == BC_new)
1726 {
1727 // use temporary since the stack symbol might get duplicated
1728 PRECONDITION(op.empty() && results.size() == 1);
1729 convert_new(i_it->source_location, arg0, c, results);
1730 }
1731 else if(bytecode == BC_newarray || bytecode == BC_anewarray)
1732 {
1733 // the op is the array size
1734 PRECONDITION(op.size() == 1 && results.size() == 1);
1735 c = convert_newarray(i_it->source_location, statement, arg0, op, results);
1736 }
1737 else if(bytecode == BC_multianewarray)
1738 {
1739 // The first argument is the type, the second argument is the number of
1740 // dimensions. The size of each dimension is on the stack.
1741 const std::size_t dimension =
1742 numeric_cast_v<std::size_t>(to_constant_expr(arg1));
1743
1744 op=pop(dimension);
1745 assert(results.size()==1);
1746 c = convert_multianewarray(i_it->source_location, arg0, op, results);
1747 }
1748 else if(bytecode == BC_arraylength)
1749 {
1750 PRECONDITION(op.size() == 1 && results.size() == 1);
1751
1752 // any array type is fine here, so we go for a reference array
1754 PRECONDITION(array.type().id() == ID_struct_tag);
1755 array.set(ID_java_member_access, true);
1756
1757 results[0] = member_exprt{std::move(array), "length", java_int_type()};
1758 }
1759 else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1760 {
1761 PRECONDITION(op.size() == 1 && results.empty());
1762 c = convert_switch(op, i_it->args, i_it->source_location);
1763 }
1764 else if(bytecode == BC_pop || bytecode == BC_pop2)
1765 {
1766 c = convert_pop(statement, op);
1767 }
1768 else if(bytecode == BC_instanceof)
1769 {
1770 PRECONDITION(op.size() == 1 && results.size() == 1);
1771
1772 results[0] =
1774 }
1775 else if(bytecode == BC_monitorenter || bytecode == BC_monitorexit)
1776 {
1777 c = convert_monitorenterexit(statement, op, i_it->source_location);
1778 }
1779 else if(bytecode == BC_swap)
1780 {
1781 PRECONDITION(op.size() == 2 && results.size() == 2);
1782 results[1]=op[0];
1783 results[0]=op[1];
1784 }
1785 else if(bytecode == BC_nop)
1786 {
1787 c=code_skipt();
1788 }
1789 else
1790 {
1791 c=codet(statement);
1792 c.operands()=op;
1793 }
1794
1795 c = do_exception_handling(method, working_set, cur_pc, c);
1796
1797 // Finally if this is the beginning of a catch block (already determined
1798 // before the big bytecode switch), insert the exception 'landing pad'
1799 // instruction before the actual instruction:
1800 if(catch_instruction.has_value())
1801 {
1802 if(c.get_statement() != ID_block)
1803 c = code_blockt{{c}};
1804 c.operands().insert(c.operands().begin(), *catch_instruction);
1805 }
1806
1807 if(!i_it->source_location.get_line().empty())
1809
1810 push(results);
1811
1812 instruction.done = true;
1813 for(const auto address : instruction.successors)
1814 {
1815 address_mapt::iterator a_it2=address_map.find(address);
1816 CHECK_RETURN(a_it2 != address_map.end());
1817
1818 // clear the stack if this is an exception handler
1819 for(const auto &exception_row : method.exception_table)
1820 {
1821 if(address==exception_row.handler_pc)
1822 {
1823 stack.clear();
1824 break;
1825 }
1826 }
1827
1828 if(!stack.empty() && a_it2->second.predecessors.size()>1)
1829 {
1830 // copy into temporaries
1831 code_blockt more_code;
1832
1833 // introduce temporaries when successor is seen for the first
1834 // time
1835 if(a_it2->second.stack.empty())
1836 {
1837 for(stackt::iterator s_it=stack.begin();
1838 s_it!=stack.end();
1839 ++s_it)
1840 {
1841 symbol_exprt lhs=tmp_variable("$stack", s_it->type());
1842 code_assignt a(lhs, *s_it);
1843 more_code.add(a);
1844
1845 s_it->swap(lhs);
1846 }
1847 }
1848 else
1849 {
1850 INVARIANT(
1851 a_it2->second.stack.size() == stack.size(),
1852 "Stack sizes should be the same.");
1853 stackt::const_iterator os_it=a_it2->second.stack.begin();
1854 for(auto &expr : stack)
1855 {
1856 assert(has_prefix(os_it->get_string(ID_C_base_name), "$stack"));
1857 symbol_exprt lhs=to_symbol_expr(*os_it);
1858 code_assignt a(lhs, expr);
1859 more_code.add(a);
1860
1861 expr.swap(lhs);
1862 ++os_it;
1863 }
1864 }
1865
1866 if(results.empty())
1867 {
1868 more_code.add(c);
1869 c.swap(more_code);
1870 }
1871 else
1872 {
1873 if(c.get_statement() != ID_block)
1874 c = code_blockt{{c}};
1875
1876 auto &last_statement=to_code_block(c).find_last_statement();
1877 if(last_statement.get_statement()==ID_goto)
1878 {
1879 // Insert stack twiddling before branch:
1880 if(last_statement.get_statement() != ID_block)
1881 last_statement = code_blockt{{last_statement}};
1882
1883 last_statement.operands().insert(
1884 last_statement.operands().begin(),
1885 more_code.statements().begin(),
1886 more_code.statements().end());
1887 }
1888 else
1889 to_code_block(c).append(more_code);
1890 }
1891 }
1892 a_it2->second.stack=stack;
1893 }
1894 }
1895
1896 code_blockt code;
1897
1898 // Add anonymous locals to the symtab:
1899 for(const auto &var : used_local_names)
1900 {
1901 symbolt new_symbol;
1902 new_symbol.name=var.get_identifier();
1903 new_symbol.type=var.type();
1904 new_symbol.base_name=var.get(ID_C_base_name);
1905 new_symbol.pretty_name=strip_java_namespace_prefix(var.get_identifier());
1906 new_symbol.mode=ID_java;
1907 new_symbol.is_type=false;
1908 new_symbol.is_file_local=true;
1909 new_symbol.is_thread_local=true;
1910 new_symbol.is_lvalue=true;
1911 symbol_table.add(new_symbol);
1912 }
1913
1914 // Try to recover block structure as indicated in the local variable table:
1915
1916 // The block tree node mirrors the block structure of root_block,
1917 // indexing the Java PCs where each subblock starts and ends.
1918 block_tree_nodet root;
1919 code_blockt root_block;
1920
1921 // First create a simple flat list of basic blocks. We'll add lexical nesting
1922 // constructs as variable live-ranges require next.
1923 bool start_new_block=true;
1924 bool has_seen_previous_address=false;
1925 method_offsett previous_address = 0;
1926 for(const auto &address_pair : address_map)
1927 {
1928 const method_offsett address = address_pair.first;
1929 assert(address_pair.first==address_pair.second.source->address);
1930 const codet &c=address_pair.second.code;
1931
1932 // Start a new lexical block if this is a branch target:
1933 if(!start_new_block)
1934 start_new_block=targets.find(address)!=targets.end();
1935 // Start a new lexical block if this is a control flow join
1936 // (e.g. due to exceptional control flow)
1937 if(!start_new_block)
1938 start_new_block=address_pair.second.predecessors.size()>1;
1939 // Start a new lexical block if we've just entered a block in which
1940 // exceptions are handled. This is usually the start of a try block, but a
1941 // single try can be represented as multiple non-contiguous blocks in the
1942 // exception table.
1943 if(!start_new_block && has_seen_previous_address)
1944 {
1945 for(const auto &exception_row : method.exception_table)
1946 if(exception_row.start_pc==previous_address)
1947 {
1948 start_new_block=true;
1949 break;
1950 }
1951 }
1952
1953 if(start_new_block)
1954 {
1955 root_block.add(
1956 code_labelt{label(std::to_string(address)), code_blockt{}});
1957 root.branch.push_back(block_tree_nodet::get_leaf());
1958 assert((root.branch_addresses.empty() ||
1959 root.branch_addresses.back()<address) &&
1960 "Block addresses should be unique and increasing");
1961 root.branch_addresses.push_back(address);
1962 }
1963
1964 if(c.get_statement()!=ID_skip)
1965 {
1966 auto &lastlabel = to_code_label(root_block.statements().back());
1967 auto &add_to_block=to_code_block(lastlabel.code());
1968 add_to_block.add(c);
1969 }
1970 start_new_block=address_pair.second.successors.size()>1;
1971
1972 previous_address=address;
1973 has_seen_previous_address=true;
1974 }
1975
1976 // Find out where temporaries are used:
1977 std::map<irep_idt, variablet> temporary_variable_live_ranges;
1978 for(const auto &aentry : address_map)
1980 aentry.first,
1981 aentry.second.code,
1982 temporary_variable_live_ranges);
1983
1984 std::vector<const variablet*> vars_to_process;
1985 for(const auto &vlist : variables)
1986 for(const auto &v : vlist)
1987 vars_to_process.push_back(&v);
1988
1989 for(const auto &v : tmp_vars)
1990 vars_to_process.push_back(
1991 &temporary_variable_live_ranges.at(v.get_identifier()));
1992
1993 for(const auto &v : used_local_names)
1994 vars_to_process.push_back(
1995 &temporary_variable_live_ranges.at(v.get_identifier()));
1996
1997 for(const auto vp : vars_to_process)
1998 {
1999 const auto &v=*vp;
2000 if(v.is_parameter)
2001 continue;
2002 // Merge lexical scopes as far as possible to allow us to
2003 // declare these variable scopes faithfully.
2004 // Don't insert yet, as for the time being the blocks' only
2005 // operands must be other blocks.
2006 // The declarations will be inserted in the next pass instead.
2008 root,
2009 root_block,
2010 v.start_pc,
2011 v.start_pc + v.length,
2012 std::numeric_limits<method_offsett>::max(),
2013 address_map);
2014 }
2015 for(const auto vp : vars_to_process)
2016 {
2017 const auto &v=*vp;
2018 if(v.is_parameter)
2019 continue;
2020 // Skip anonymous variables:
2021 if(v.symbol_expr.get_identifier().empty())
2022 continue;
2023 auto &block = get_block_for_pcrange(
2024 root,
2025 root_block,
2026 v.start_pc,
2027 v.start_pc + v.length,
2028 std::numeric_limits<method_offsett>::max());
2029 code_declt d(v.symbol_expr);
2030 block.statements().insert(block.statements().begin(), d);
2031 }
2032
2033 for(auto &block : root_block.statements())
2034 code.add(block);
2035
2036 return code;
2037}
2038
2040 const irep_idt &statement,
2041 const exprt::operandst &op)
2042{
2043 // these are skips
2044 codet c = code_skipt();
2045
2046 // pop2 removes two single-word items from the stack (e.g. two
2047 // integers, or an integer and an object reference) or one
2048 // two-word item (i.e. a double or a long).
2049 // http://cs.au.dk/~mis/dOvs/jvmspec/ref-pop2.html
2050 if(statement == "pop2" && get_bytecode_type_width(op[0].type()) == 32)
2051 pop(1);
2052 return c;
2053}
2054
2056 const exprt::operandst &op,
2058 const source_locationt &location)
2059{
2060 // we turn into switch-case
2061 code_blockt code_block;
2062 code_block.add_source_location() = location;
2063
2064 bool is_label = true;
2065 for(auto a_it = args.begin(); a_it != args.end();
2066 a_it++, is_label = !is_label)
2067 {
2068 if(is_label)
2069 {
2070 const mp_integer number =
2071 numeric_cast_v<mp_integer>(to_constant_expr(*a_it));
2072 // The switch case does not contain any code, it just branches via a GOTO
2073 // to the jump target of the tableswitch/lookupswitch case at
2074 // hand. Therefore we consider this code to belong to the source bytecode
2075 // instruction and not the target instruction.
2076 const method_offsett label_number =
2077 numeric_cast_v<method_offsett>(number);
2078 code_gotot code(label(std::to_string(label_number)));
2079 code.add_source_location() = location;
2080
2081 if(a_it == args.begin())
2082 {
2083 code_switch_caset code_case(nil_exprt(), std::move(code));
2084 code_case.set_default();
2085
2086 code_block.add(std::move(code_case), location);
2087 }
2088 else
2089 {
2090 exprt case_op =
2091 typecast_exprt::conditional_cast(*std::prev(a_it), op[0].type());
2092 case_op.add_source_location() = location;
2093
2094 code_switch_caset code_case(std::move(case_op), std::move(code));
2095 code_block.add(std::move(code_case), location);
2096 }
2097 }
2098 }
2099
2100 code_switcht code_switch(op[0], std::move(code_block));
2101 code_switch.add_source_location() = location;
2102 return code_switch;
2103}
2104
2106 const irep_idt &statement,
2107 const exprt::operandst &op,
2108 const source_locationt &source_location)
2109{
2110 const irep_idt descriptor = (statement == "monitorenter") ?
2111 "java::java.lang.Object.monitorenter:(Ljava/lang/Object;)V" :
2112 "java::java.lang.Object.monitorexit:(Ljava/lang/Object;)V";
2113
2114 if(!threading_support || !symbol_table.has_symbol(descriptor))
2115 return code_skipt();
2116
2117 // becomes a function call
2118 java_method_typet type(
2120 java_void_type());
2121 code_function_callt call(symbol_exprt(descriptor, type), {op[0]});
2122 call.add_source_location() = source_location;
2124 needed_lazy_methods->add_needed_method(descriptor);
2125 return std::move(call);
2126}
2127
2129 exprt::operandst &op,
2130 exprt::operandst &results)
2131{
2132 if(get_bytecode_type_width(stack.back().type()) == 32)
2133 op = pop(2);
2134 else
2135 op = pop(1);
2136
2137 results.insert(results.end(), op.begin(), op.end());
2138 results.insert(results.end(), op.begin(), op.end());
2139}
2140
2142 exprt::operandst &op,
2143 exprt::operandst &results)
2144{
2145 if(get_bytecode_type_width(stack.back().type()) == 32)
2146 op = pop(3);
2147 else
2148 op = pop(2);
2149
2150 results.insert(results.end(), op.begin() + 1, op.end());
2151 results.insert(results.end(), op.begin(), op.end());
2152}
2153
2155 exprt::operandst &op,
2156 exprt::operandst &results)
2157{
2158 if(get_bytecode_type_width(stack.back().type()) == 32)
2159 op = pop(2);
2160 else
2161 op = pop(1);
2162
2163 exprt::operandst op2;
2164
2165 if(get_bytecode_type_width(stack.back().type()) == 32)
2166 op2 = pop(2);
2167 else
2168 op2 = pop(1);
2169
2170 results.insert(results.end(), op.begin(), op.end());
2171 results.insert(results.end(), op2.begin(), op2.end());
2172 results.insert(results.end(), op.begin(), op.end());
2173}
2174
2176 const irep_idt &statement,
2177 const constant_exprt &arg0,
2178 exprt::operandst &results) const
2179{
2180 const char type_char = statement[0];
2181 const bool is_double('d' == type_char);
2182 const bool is_float('f' == type_char);
2183
2184 if(is_double || is_float)
2185 {
2186 const ieee_float_spect spec(
2189
2190 ieee_floatt value(spec);
2191 if(arg0.type().id() != ID_floatbv)
2192 {
2193 const mp_integer number = numeric_cast_v<mp_integer>(arg0);
2194 value.from_integer(number);
2195 }
2196 else
2197 value.from_expr(arg0);
2198
2199 results[0] = value.to_expr();
2200 }
2201 else
2202 {
2203 const mp_integer value = numeric_cast_v<mp_integer>(arg0);
2204 const typet type = java_type_from_char(statement[0]);
2205 results[0] = from_integer(value, type);
2206 }
2207 return results;
2208}
2209
2211 const java_method_typet::parameterst &parameters,
2213{
2214 // do some type adjustment for the arguments,
2215 // as Java promotes arguments
2216 // Also cast pointers since intermediate locals
2217 // can be void*.
2218 INVARIANT(
2219 parameters.size() == arguments.size(),
2220 "for each parameter there must be exactly one argument");
2221 for(std::size_t i = 0; i < parameters.size(); i++)
2222 {
2223 const typet &type = parameters[i].type();
2224 if(
2225 type == java_boolean_type() || type == java_char_type() ||
2226 type == java_byte_type() || type == java_short_type() ||
2227 type.id() == ID_pointer)
2228 {
2229 arguments[i] = typecast_exprt::conditional_cast(arguments[i], type);
2230 }
2231 }
2232}
2233
2235 source_locationt location,
2236 const irep_idt &statement,
2237 class_method_descriptor_exprt &class_method_descriptor,
2238 codet &c,
2239 exprt::operandst &results)
2240{
2241 const bool use_this(statement != "invokestatic");
2242 const bool is_virtual(
2243 statement == "invokevirtual" || statement == "invokeinterface");
2244
2245 const irep_idt &invoked_method_id = class_method_descriptor.get_identifier();
2246 INVARIANT(
2247 !invoked_method_id.empty(),
2248 "invoke statement arg0 must have an identifier");
2249
2250 auto method_symbol = symbol_table.symbols.find(invoked_method_id);
2251
2252 // Use the most precise type available: the actual symbol has generic info,
2253 // whereas the type given by the invoke instruction doesn't and is therefore
2254 // less accurate.
2255 if(method_symbol != symbol_table.symbols.end())
2256 {
2257 // Note the number of parameters might change here due to constructors using
2258 // invokespecial will have zero arguments (the `this` is added below)
2259 // but the symbol for the <init> will have the this parameter.
2260 INVARIANT(
2261 to_java_method_type(class_method_descriptor.type()).return_type().id() ==
2262 to_code_type(method_symbol->second.type).return_type().id(),
2263 "Function return type must not change in kind");
2264 class_method_descriptor.type() = method_symbol->second.type;
2265 }
2266
2267 // Note arg0 and arg0.type() are subject to many side-effects in this method,
2268 // then finally used to populate the call instruction.
2269 java_method_typet &method_type =
2270 to_java_method_type(class_method_descriptor.type());
2271
2272 java_method_typet::parameterst &parameters(method_type.parameters());
2273
2274 if(use_this)
2275 {
2276 const irep_idt class_id = class_method_descriptor.class_id();
2277
2278 if(parameters.empty() || !parameters[0].get_this())
2279 {
2280 typet thistype = struct_tag_typet(class_id);
2281 reference_typet object_ref_type = java_reference_type(thistype);
2282 java_method_typet::parametert this_p(object_ref_type);
2283 this_p.set_this();
2284 this_p.set_base_name(ID_this);
2285 parameters.insert(parameters.begin(), this_p);
2286 }
2287
2288 // Note invokespecial is used for super-method calls as well as
2289 // constructors.
2290 if(statement == "invokespecial")
2291 {
2292 if(is_constructor(invoked_method_id))
2293 {
2295 needed_lazy_methods->add_needed_class(class_id);
2296 method_type.set_is_constructor();
2297 }
2298 else
2299 method_type.set(ID_java_super_method_call, true);
2300 }
2301 }
2302
2303 location.set_function(method_id);
2304
2305 code_function_callt::argumentst arguments = pop(parameters.size());
2306
2307 // double-check a bit
2308 INVARIANT(
2309 !use_this || arguments.front().type().id() == ID_pointer,
2310 "first argument must be a pointer");
2311
2312 adjust_invoke_argument_types(parameters, arguments);
2313
2314 // do some type adjustment for return values
2315 exprt lhs = nil_exprt();
2316 const typet &return_type = method_type.return_type();
2317
2318 if(return_type.id() != ID_empty)
2319 {
2320 // return types are promoted in Java
2321 lhs = tmp_variable("return", return_type);
2322 exprt promoted = java_bytecode_promotion(lhs);
2323 results.resize(1);
2324 results[0] = promoted;
2325 }
2326
2327 // If we don't have a definition for the called symbol, and we won't
2328 // inherit a definition from a super-class, we create a new symbol and
2329 // insert it in the symbol table. The name and type of the method are
2330 // derived from the information we have in the call.
2331 // We fix the access attribute to ID_private, because of the following
2332 // reasons:
2333 // - We don't know the original access attribute and since the .class file is
2334 // unavailable, we have no way to know.
2335 // - The translated method could be an inherited protected method, hence
2336 // accessible from the original caller, but not from the generated test.
2337 // Therefore we must assume that the method is not accessible.
2338 // We set opaque methods as final to avoid assuming they can be overridden.
2339 if(
2340 method_symbol == symbol_table.symbols.end() &&
2341 !(is_virtual && is_method_inherited(
2342 class_method_descriptor.class_id(),
2343 class_method_descriptor.mangled_method_name())))
2344 {
2346 invoked_method_id,
2347 class_method_descriptor.base_method_name(),
2348 id2string(class_method_descriptor.class_id()).substr(6) + "." +
2349 id2string(class_method_descriptor.base_method_name()) + "()",
2350 method_type,
2351 class_method_descriptor.class_id(),
2354 }
2355
2356 exprt function;
2357 if(is_virtual)
2358 {
2359 // dynamic binding
2360 PRECONDITION(use_this);
2361 PRECONDITION(!arguments.empty());
2362 function = class_method_descriptor;
2363 // Populate needed methods later,
2364 // once we know what object types can exist.
2365 }
2366 else
2367 {
2368 // static binding
2369 function = symbol_exprt(invoked_method_id, method_type);
2371 {
2372 needed_lazy_methods->add_needed_method(invoked_method_id);
2373 // Calling a static method causes static initialization:
2374 needed_lazy_methods->add_needed_class(class_method_descriptor.class_id());
2375 }
2376 }
2377
2379 std::move(lhs), std::move(function), std::move(arguments));
2380 call.add_source_location() = location;
2381 call.function().add_source_location() = location;
2382
2383 // Replacing call if it is a function of the Character library,
2384 // returning the same call otherwise
2386
2387 if(!use_this)
2388 {
2389 codet clinit_call = get_clinit_call(class_method_descriptor.class_id());
2390 if(clinit_call.get_statement() != ID_skip)
2391 c = code_blockt({clinit_call, c});
2392 }
2393}
2394
2396 source_locationt location,
2397 codet &c)
2398{
2399 exprt operand = pop(1)[0];
2400
2401 // we may need to adjust the type of the argument
2402 operand = typecast_exprt::conditional_cast(operand, bool_typet());
2403
2404 c = code_assumet(operand);
2405 location.set_function(method_id);
2406 c.add_source_location() = location;
2407 return c;
2408}
2409
2411 const exprt &arg0,
2412 const exprt::operandst &op,
2413 codet &c,
2414 exprt::operandst &results) const
2415{
2416 java_instanceof_exprt check(op[0], to_struct_tag_type(arg0.type()));
2417 code_assertt assert_class(check);
2418 assert_class.add_source_location().set_comment("Dynamic cast check");
2419 assert_class.add_source_location().set_property_class("bad-dynamic-cast");
2420 // we add this assert such that we can recognise it
2421 // during the instrumentation phase
2422 c = std::move(assert_class);
2423 results[0] = op[0];
2424}
2425
2427 const source_locationt &location,
2428 const exprt::operandst &op,
2429 codet &c,
2430 exprt::operandst &results) const
2431{
2432 if(
2435 to_reference_type(op[0].type())) == "java::java.lang.AssertionError") &&
2437 {
2438 // we translate athrow into
2439 // ASSERT false;
2440 // ASSUME false:
2441 code_assertt assert_code(false_exprt{});
2442 source_locationt assert_location = location; // copy
2443 assert_location.set_comment("assertion at " + location.as_string());
2444 assert_location.set("user-provided", true);
2445 assert_location.set_property_class(ID_assertion);
2446 assert_code.add_source_location() = assert_location;
2447
2448 code_assumet assume_code(false_exprt{});
2449 source_locationt assume_location = location; // copy
2450 assume_location.set("user-provided", true);
2451 assume_code.add_source_location() = assume_location;
2452
2453 c = code_blockt({assert_code, assume_code});
2454 }
2455 else
2456 {
2457 side_effect_expr_throwt throw_expr(irept(), typet(), location);
2458 throw_expr.copy_to_operands(op[0]);
2459 c = code_expressiont(throw_expr);
2460 }
2461 results[0] = op[0];
2462}
2463
2466 const std::set<method_offsett> &working_set,
2467 method_offsett cur_pc,
2468 codet &code)
2469{
2470 // For each exception handler range that starts here add a CATCH-PUSH marker
2471 // Each CATCH-PUSH records a list of all the exception id and handler label
2472 // pairs handled for its exact block
2473
2474 // Gather all try-catch blocks that have cur_pc as the starting pc
2475 typedef std::vector<std::reference_wrapper<
2477 std::map<std::size_t, exceptionst> exceptions_by_end;
2479 : method.exception_table)
2480 {
2481 if(exception.start_pc == cur_pc)
2482 exceptions_by_end[exception.end_pc].push_back(exception);
2483 }
2484 for(const auto &exceptions : exceptions_by_end)
2485 {
2486 // For each block with a distinct end position create one CATCH-PUSH
2487 code_push_catcht catch_push;
2488 // Fill in its exception_list
2489 code_push_catcht::exception_listt &exception_list =
2490 catch_push.exception_list();
2492 : exceptions.second)
2493 {
2494 exception_list.emplace_back(
2495 exception.catch_type.get_identifier(),
2496 // Record the exception handler in the CATCH-PUSH instruction by
2497 // generating a label corresponding to the handler's pc
2498 label(std::to_string(exception.handler_pc)));
2499 }
2500 // Prepend the CATCH-PUSH instruction
2501 code = code_blockt({ std::move(catch_push), code });
2502 }
2503
2504 // Next add the CATCH-POP instructions
2505 // exception_row.end_pc is exclusive so append a CATCH-POP instruction if
2506 // this is the instruction before it.
2507 // To do this, attempt to find all exception handlers that end at the
2508 // earliest known instruction after this one.
2509
2510 // Dangerously, we assume that the next opcode in the bytecode after the end
2511 // of the exception handler block (whose address matches the exclusive end
2512 // position of the block) will be a successor of some code investigated
2513 // before the instruction at the end of that handler and therefore in the
2514 // working set.
2515 // As an example of where this may fail, for non-obfuscated bytecode
2516 // generated by most compilers the next opcode after the block ending at the
2517 // end of the try block is the lexically next bit of code after the try
2518 // block, i.e. the catch block. When there aren't any throwing statements in
2519 // the try block this block will not be the successor of any instruction.
2520
2521 auto next_opcode_it = std::find_if(
2522 working_set.begin(),
2523 working_set.end(),
2524 [cur_pc](method_offsett offset) { return offset > cur_pc; });
2525 if(next_opcode_it != working_set.end())
2526 {
2527 // Count the distinct start positions of handlers that end at this location
2528 std::set<std::size_t> start_positions; // Use a set to deduplicate
2529 for(const auto &exception_row : method.exception_table)
2530 {
2531 // Check if the next instruction found is the (exclusive) end of a block
2532 if(*next_opcode_it == exception_row.end_pc)
2533 start_positions.insert(exception_row.start_pc);
2534 }
2535 for(std::size_t handler = 0; handler < start_positions.size(); ++handler)
2536 {
2537 // Append a CATCH-POP instruction before the end of the block
2538 code = code_blockt({ code, code_pop_catcht() });
2539 }
2540 }
2541
2542 return code;
2543}
2544
2546 const source_locationt &location,
2547 const exprt &arg0,
2548 const exprt::operandst &op,
2549 exprt::operandst &results)
2550{
2551 const reference_typet ref_type = java_reference_type(arg0.type());
2552 side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2553 java_new_array.operands() = op;
2554
2555 code_blockt create;
2556
2557 if(max_array_length != 0)
2558 {
2560 binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2561 code_assumet assume_le_max_size(le_max_size);
2562 create.add(assume_le_max_size);
2563 }
2564
2565 const exprt tmp = tmp_variable("newarray", ref_type);
2566 create.add(code_assignt(tmp, java_new_array));
2567 results[0] = tmp;
2568
2569 return create;
2570}
2571
2573 const source_locationt &location,
2574 const irep_idt &statement,
2575 const exprt &arg0,
2576 const exprt::operandst &op,
2577 exprt::operandst &results)
2578{
2579 java_reference_typet ref_type = [&]() {
2580 if(statement == "newarray")
2581 {
2582 irep_idt id = arg0.type().id();
2583
2584 char element_type;
2585 if(id == ID_bool)
2586 element_type = 'z';
2587 else if(id == ID_char)
2588 element_type = 'c';
2589 else if(id == ID_float)
2590 element_type = 'f';
2591 else if(id == ID_double)
2592 element_type = 'd';
2593 else if(id == ID_byte)
2594 element_type = 'b';
2595 else if(id == ID_short)
2596 element_type = 's';
2597 else if(id == ID_int)
2598 element_type = 'i';
2599 else if(id == ID_long)
2600 element_type = 'j';
2601 else
2602 element_type = '?';
2603 return java_array_type(element_type);
2604 }
2605 else
2606 {
2608 }
2609 }();
2610
2611 side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2612 java_new_array.copy_to_operands(op[0]);
2613
2614 code_blockt block;
2615
2616 if(max_array_length != 0)
2617 {
2619 binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2620 code_assumet assume_le_max_size(le_max_size);
2621 block.add(std::move(assume_le_max_size));
2622 }
2623 const exprt tmp = tmp_variable("newarray", ref_type);
2624 block.add(code_assignt(tmp, java_new_array));
2625 results[0] = tmp;
2626
2627 return block;
2628}
2629
2631 const source_locationt &location,
2632 const exprt &arg0,
2633 codet &c,
2634 exprt::operandst &results)
2635{
2636 const reference_typet ref_type = java_reference_type(arg0.type());
2637 side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
2638
2639 if(!location.get_line().empty())
2640 java_new_expr.add_source_location() = location;
2641
2642 const exprt tmp = tmp_variable("new", ref_type);
2643 c = code_assignt(tmp, java_new_expr);
2644 c.add_source_location() = location;
2645 codet clinit_call =
2647 if(clinit_call.get_statement() != ID_skip)
2648 {
2649 c = code_blockt({clinit_call, c});
2650 }
2651 results[0] = tmp;
2652}
2653
2655 const source_locationt &location,
2656 const exprt &arg0,
2657 const exprt::operandst &op,
2658 const symbol_exprt &symbol_expr)
2659{
2660 if(needed_lazy_methods && arg0.type().id() == ID_struct_tag)
2661 {
2662 needed_lazy_methods->add_needed_class(
2664 }
2665
2666 code_blockt block;
2667 block.add_source_location() = location;
2668
2669 // Note this initializer call deliberately inits the class used to make
2670 // the reference, which may be a child of the class that actually defines
2671 // the field.
2672 codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2673 if(clinit_call.get_statement() != ID_skip)
2674 block.add(clinit_call);
2675
2677 "stack_static_field",
2678 block,
2680 symbol_expr.get_identifier());
2681 block.add(code_assignt(symbol_expr, op[0]));
2682 return block;
2683}
2684
2686 const fieldref_exprt &arg0,
2687 const exprt::operandst &op)
2688{
2689 code_blockt block;
2691 "stack_field", block, bytecode_write_typet::FIELD, arg0.component_name());
2692 block.add(code_assignt(to_member(op[0], arg0, ns), op[1]));
2693 return block;
2694}
2695
2697 const source_locationt &source_location,
2698 const exprt &arg0,
2699 const symbol_exprt &symbol_expr,
2700 const bool is_assertions_disabled_field,
2701 codet &c,
2702 exprt::operandst &results)
2703{
2705 {
2706 if(arg0.type().id() == ID_struct_tag)
2707 {
2708 needed_lazy_methods->add_needed_class(
2710 }
2711 else if(arg0.type().id() == ID_pointer)
2712 {
2713 const auto &pointer_type = to_pointer_type(arg0.type());
2714 if(pointer_type.base_type().id() == ID_struct_tag)
2715 {
2716 needed_lazy_methods->add_needed_class(
2718 }
2719 }
2720 else if(is_assertions_disabled_field)
2721 {
2722 needed_lazy_methods->add_needed_class(arg0.get_string(ID_class));
2723 }
2724 }
2725 symbol_exprt symbol_with_location = symbol_expr;
2726 symbol_with_location.add_source_location() = source_location;
2727 results[0] = java_bytecode_promotion(symbol_with_location);
2728
2729 // Note this initializer call deliberately inits the class used to make
2730 // the reference, which may be a child of the class that actually defines
2731 // the field.
2732 codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2733 if(clinit_call.get_statement() != ID_skip)
2734 c = clinit_call;
2735 else if(is_assertions_disabled_field)
2736 {
2737 // set $assertionDisabled to false
2738 c = code_assignt(symbol_expr, false_exprt());
2739 }
2740}
2741
2743 const irep_idt &statement,
2744 const exprt::operandst &op,
2745 exprt::operandst &results) const
2746{
2747 const int nan_value(statement[4] == 'l' ? -1 : 1);
2748 const typet result_type(java_int_type());
2749 const exprt nan_result(from_integer(nan_value, result_type));
2750
2751 // (value1 == NaN || value2 == NaN) ?
2752 // nan_value : value1 < value2 ? -1 : value2 < value1 1 ? 1 : 0;
2753 // (value1 == NaN || value2 == NaN) ?
2754 // nan_value : value1 == value2 ? 0 : value1 < value2 -1 ? 1 : 0;
2755
2756 isnan_exprt nan_op0(op[0]);
2757 isnan_exprt nan_op1(op[1]);
2758 exprt one = from_integer(1, result_type);
2759 exprt minus_one = from_integer(-1, result_type);
2760 results[0] = if_exprt(
2761 or_exprt(nan_op0, nan_op1),
2762 nan_result,
2763 if_exprt(
2764 ieee_float_equal_exprt(op[0], op[1]),
2765 from_integer(0, result_type),
2766 if_exprt(binary_relation_exprt(op[0], ID_lt, op[1]), minus_one, one)));
2767 return results;
2768}
2769
2771 const exprt::operandst &op,
2772 exprt::operandst &results) const
2773{ // The integer result on the stack is:
2774 // 0 if op[0] equals op[1]
2775 // -1 if op[0] is less than op[1]
2776 // 1 if op[0] is greater than op[1]
2777
2778 const typet t = java_int_type();
2779 exprt one = from_integer(1, t);
2780 exprt minus_one = from_integer(-1, t);
2781
2782 if_exprt greater =
2783 if_exprt(binary_relation_exprt(op[0], ID_gt, op[1]), one, minus_one);
2784
2785 results[0] = if_exprt(
2786 binary_relation_exprt(op[0], ID_equal, op[1]), from_integer(0, t), greater);
2787 return results;
2788}
2789
2791 const irep_idt &statement,
2792 const exprt::operandst &op,
2793 exprt::operandst &results) const
2794{
2795 const typet type = java_type_from_char(statement[0]);
2796
2797 const std::size_t width = get_bytecode_type_width(type);
2798
2799 // According to JLS 15.19 Shift Operators
2800 // a mask 0b11111 is applied for int and 0b111111 for long.
2801 exprt mask = from_integer(width - 1, op[1].type());
2802
2803 results[0] = shl_exprt(op[0], bitand_exprt(op[1], mask));
2804 return results;
2805}
2806
2808 const irep_idt &statement,
2809 const exprt::operandst &op,
2810 exprt::operandst &results) const
2811{
2812 const typet type = java_type_from_char(statement[0]);
2813
2814 const std::size_t width = get_bytecode_type_width(type);
2815 typet target = unsignedbv_typet(width);
2816
2817 exprt lhs = typecast_exprt::conditional_cast(op[0], target);
2818 exprt rhs = typecast_exprt::conditional_cast(op[1], target);
2819
2820 results[0] =
2821 typecast_exprt::conditional_cast(lshr_exprt(lhs, rhs), op[0].type());
2822
2823 return results;
2824}
2825
2827 const exprt &arg0,
2828 const exprt &arg1,
2829 const source_locationt &location,
2830 const method_offsett address)
2831{
2832 code_blockt block;
2833 block.add_source_location() = location;
2834 // search variable on stack
2835 const exprt &locvar = variable(arg0, 'i', address);
2837 "stack_iinc",
2838 block,
2840 to_symbol_expr(locvar).get_identifier());
2841
2842 const exprt arg1_int_type =
2844 code_assignt code_assign(
2845 variable(arg0, 'i', address),
2846 plus_exprt(
2848 variable(arg0, 'i', address), java_int_type()),
2849 arg1_int_type));
2850 block.add(std::move(code_assign));
2851
2852 return block;
2853}
2854
2857 const exprt::operandst &op,
2858 const mp_integer &number,
2859 const source_locationt &location) const
2860{
2862 const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2863
2864 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2865
2866 code_ifthenelset code_branch(
2867 binary_relation_exprt(lhs, ID_equal, rhs),
2868 code_gotot(label(std::to_string(label_number))));
2869
2870 code_branch.then_case().add_source_location() =
2871 address_map.at(label_number).source->source_location;
2872 code_branch.add_source_location() = location;
2873
2874 return code_branch;
2875}
2876
2879 const exprt::operandst &op,
2880 const mp_integer &number,
2881 const source_locationt &location) const
2882{
2884 const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2885
2886 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2887
2888 code_ifthenelset code_branch(
2889 binary_relation_exprt(lhs, ID_notequal, rhs),
2890 code_gotot(label(std::to_string(label_number))));
2891
2892 code_branch.then_case().add_source_location() =
2893 address_map.at(label_number).source->source_location;
2894 code_branch.add_source_location() = location;
2895
2896 return code_branch;
2897}
2898
2901 const exprt::operandst &op,
2902 const irep_idt &id,
2903 const mp_integer &number,
2904 const source_locationt &location) const
2905{
2906 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2907
2908 code_ifthenelset code_branch(
2909 binary_relation_exprt(op[0], id, from_integer(0, op[0].type())),
2910 code_gotot(label(std::to_string(label_number))));
2911
2912 code_branch.cond().add_source_location() = location;
2914 code_branch.then_case().add_source_location() =
2915 address_map.at(label_number).source->source_location;
2917 code_branch.add_source_location() = location;
2919
2920 return code_branch;
2921}
2922
2925 const u1 bytecode,
2926 const exprt::operandst &op,
2927 const mp_integer &number,
2928 const source_locationt &location) const
2929{
2930 const irep_idt cmp_op = get_if_cmp_operator(bytecode);
2931 binary_relation_exprt condition(
2932 op[0], cmp_op, typecast_exprt::conditional_cast(op[1], op[0].type()));
2933 condition.add_source_location() = location;
2934
2935 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2936
2937 code_ifthenelset code_branch(
2938 std::move(condition), code_gotot(label(std::to_string(label_number))));
2939
2940 code_branch.then_case().add_source_location() =
2941 address_map.at(label_number).source->source_location;
2942 code_branch.add_source_location() = location;
2943
2944 return code_branch;
2945}
2946
2948 const std::vector<method_offsett> &jsr_ret_targets,
2949 const exprt &arg0,
2950 const source_locationt &location,
2951 const method_offsett address)
2952{
2953 code_blockt c;
2954 auto retvar = variable(arg0, 'a', address);
2955 for(size_t idx = 0, idxlim = jsr_ret_targets.size(); idx != idxlim; ++idx)
2956 {
2957 irep_idt number = std::to_string(jsr_ret_targets[idx]);
2958 code_gotot g(label(number));
2959 g.add_source_location() = location;
2960 if(idx == idxlim - 1)
2961 c.add(g);
2962 else
2963 {
2964 auto address_ptr =
2965 from_integer(jsr_ret_targets[idx], unsignedbv_typet(64));
2966 address_ptr.type() = pointer_type(java_void_type());
2967
2968 code_ifthenelset branch(equal_exprt(retvar, address_ptr), std::move(g));
2969
2970 branch.cond().add_source_location() = location;
2971 branch.add_source_location() = location;
2972
2973 c.add(std::move(branch));
2974 }
2975 }
2976 return c;
2977}
2978
2984static exprt conditional_array_cast(const exprt &expr, char type_char)
2985{
2986 const auto ref_type =
2987 type_try_dynamic_cast<java_reference_typet>(expr.type());
2988 const bool typecast_not_needed =
2989 ref_type && ((type_char == 'b' && ref_type->subtype().get_identifier() ==
2990 "java::array[boolean]") ||
2991 *ref_type == java_array_type(type_char));
2992 return typecast_not_needed ? expr
2993 : typecast_exprt(expr, java_array_type(type_char));
2994}
2995
2997 const irep_idt &statement,
2998 const exprt::operandst &op)
2999{
3000 PRECONDITION(op.size() == 2);
3001 const char type_char = statement[0];
3002 const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
3003 dereference_exprt deref{op_with_right_type};
3004 deref.set(ID_java_member_access, true);
3005
3006 auto java_array_type = type_try_dynamic_cast<struct_tag_typet>(deref.type());
3007 INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3008 member_exprt data_ptr{
3010 plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3011 // tag it so it's easy to identify during instrumentation
3012 data_plus_offset.set(ID_java_array_access, true);
3013 return java_bytecode_promotion(dereference_exprt{data_plus_offset});
3014}
3015
3017 const exprt &index,
3018 char type_char,
3019 size_t address)
3020{
3021 const exprt var = variable(index, type_char, address);
3022 if(type_char == 'i')
3023 {
3024 INVARIANT(
3026 type_try_dynamic_cast<bitvector_typet>(var.type())->get_width() <= 32,
3027 "iload can be used for boolean, byte, short, int and char");
3029 }
3030 INVARIANT(
3031 (type_char == 'a' && can_cast_type<reference_typet>(var.type())) ||
3032 var.type() == java_type_from_char(type_char),
3033 "Variable type must match [adflv]load return type");
3034 return var;
3035}
3036
3038 const irep_idt &statement,
3039 const exprt &arg0,
3040 const exprt::operandst &op,
3041 const method_offsett address,
3042 const source_locationt &location)
3043{
3044 const exprt var = variable(arg0, statement[0], address);
3045 const irep_idt &var_name = to_symbol_expr(var).get_identifier();
3046
3047 code_blockt block;
3048 block.add_source_location() = location;
3049
3051 "stack_store",
3052 block,
3054 var_name);
3055
3056 block.add(
3058 location);
3059 return block;
3060}
3061
3063 const irep_idt &statement,
3064 const exprt::operandst &op,
3065 const source_locationt &location)
3066{
3067 PRECONDITION(op.size() == 3);
3068 const char type_char = statement[0];
3069 const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
3070 dereference_exprt deref{op_with_right_type};
3071 deref.set(ID_java_member_access, true);
3072
3073 auto java_array_type = type_try_dynamic_cast<struct_tag_typet>(deref.type());
3074 INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3075 member_exprt data_ptr{
3077 plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3078 // tag it so it's easy to identify during instrumentation
3079 data_plus_offset.set(ID_java_array_access, true);
3080
3081 code_blockt block;
3082 block.add_source_location() = location;
3083
3085 "stack_astore", block, bytecode_write_typet::ARRAY_REF, "");
3086
3087 code_assignt array_put{dereference_exprt{data_plus_offset}, op[2]};
3088 block.add(std::move(array_put), location);
3089 return block;
3090}
3091
3093 const source_locationt &location,
3094 std::size_t instruction_address,
3095 const exprt &arg0,
3096 codet &result_code)
3097{
3098 const java_method_typet &method_type = to_java_method_type(arg0.type());
3099 const java_method_typet::parameterst &parameters(method_type.parameters());
3100 const typet &return_type = method_type.return_type();
3101
3102 // Note these must be popped regardless of whether we understand the lambda
3103 // method or not
3104 code_function_callt::argumentst arguments = pop(parameters.size());
3105
3106 irep_idt synthetic_class_name =
3107 lambda_synthetic_class_name(method_id, instruction_address);
3108
3109 if(!symbol_table.has_symbol(synthetic_class_name))
3110 {
3111 // We failed to parse the invokedynamic handle as a Java 8+ lambda;
3112 // give up and return null.
3113 const auto value = zero_initializer(return_type, location, ns);
3114 if(!value.has_value())
3115 {
3116 log.error().source_location = location;
3117 log.error() << "failed to zero-initialize return type" << messaget::eom;
3118 throw 0;
3119 }
3120 return value;
3121 }
3122
3123 // Construct an instance of the synthetic class created for this invokedynamic
3124 // site:
3125
3126 irep_idt constructor_name = id2string(synthetic_class_name) + ".<init>";
3127
3128 const symbolt &constructor_symbol = ns.lookup(constructor_name);
3129
3130 code_blockt result;
3131
3132 // SyntheticType lambda_new = new SyntheticType;
3133 const reference_typet ref_type =
3134 java_reference_type(struct_tag_typet(synthetic_class_name));
3135 side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
3136 const exprt new_instance = tmp_variable("lambda_new", ref_type);
3137 result.add(code_assignt(new_instance, java_new_expr, location));
3138
3139 // lambda_new.<init>(capture_1, capture_2, ...);
3140 // Add the implicit 'this' parameter:
3141 arguments.insert(arguments.begin(), new_instance);
3144
3145 code_function_callt constructor_call(
3146 constructor_symbol.symbol_expr(), arguments);
3147 constructor_call.add_source_location() = location;
3148 result.add(constructor_call);
3150 {
3151 needed_lazy_methods->add_needed_method(constructor_symbol.name);
3152 needed_lazy_methods->add_needed_class(synthetic_class_name);
3153 }
3154
3155 result_code = std::move(result);
3156
3157 if(return_type.id() == ID_empty)
3158 return {};
3159 else
3160 return new_instance;
3161}
3162
3165 const std::vector<method_offsett> &jsr_ret_targets,
3166 const std::vector<
3167 std::vector<java_bytecode_parse_treet::instructiont>::const_iterator>
3168 &ret_instructions) const
3169{ // Draw edges from every `ret` to every `jsr` successor. Could do better with
3170 // flow analysis to distinguish multiple subroutines within the same function.
3171 for(const auto &retinst : ret_instructions)
3172 {
3173 auto &a_entry = address_map.at(retinst->address);
3174 a_entry.successors.insert(
3175 a_entry.successors.end(), jsr_ret_targets.begin(), jsr_ret_targets.end());
3176 }
3177}
3178
3179std::vector<java_bytecode_convert_methodt::method_offsett>
3181 const method_offsett address,
3183 const
3184{
3185 std::vector<method_offsett> result;
3186 for(const auto &exception_row : exception_table)
3187 {
3188 if(address >= exception_row.start_pc && address < exception_row.end_pc)
3189 result.push_back(exception_row.handler_pc);
3190 }
3191 return result;
3192}
3193
3207 symbolt &method_symbol,
3209 &local_variable_table,
3210 symbol_table_baset &symbol_table)
3211{
3212 // Obtain a std::vector of java_method_typet::parametert objects from the
3213 // (function) type of the symbol
3214 java_method_typet &method_type = to_java_method_type(method_symbol.type);
3215 java_method_typet::parameterst &parameters = method_type.parameters();
3216
3217 // Find number of parameters
3218 unsigned slots_for_parameters = java_method_parameter_slots(method_type);
3219
3220 // Find parameter names in the local variable table:
3221 typedef std::pair<irep_idt, irep_idt> base_name_and_identifiert;
3222 std::map<std::size_t, base_name_and_identifiert> param_names;
3223 for(const auto &v : local_variable_table)
3224 {
3225 if(v.index < slots_for_parameters)
3226 param_names.emplace(
3227 v.index,
3228 make_pair(
3229 v.name, id2string(method_symbol.name) + "::" + id2string(v.name)));
3230 }
3231
3232 // Assign names to parameters
3233 std::size_t param_index = 0;
3234 for(auto &param : parameters)
3235 {
3236 irep_idt base_name, identifier;
3237
3238 // Construct a sensible base name (base_name) and a fully qualified name
3239 // (identifier) for every parameter of the method under translation,
3240 // regardless of whether we have an LVT or not; and assign it to the
3241 // parameter object (which is stored in the type of the symbol, not in the
3242 // symbol table)
3243 if(param_index == 0 && param.get_this())
3244 {
3245 // my.package.ClassName.myMethodName:(II)I::this
3246 base_name = ID_this;
3247 identifier = id2string(method_symbol.name) + "::" + id2string(base_name);
3248 }
3249 else
3250 {
3251 auto param_name = param_names.find(param_index);
3252 if(param_name != param_names.end())
3253 {
3254 base_name = param_name->second.first;
3255 identifier = param_name->second.second;
3256 }
3257 else
3258 {
3259 // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
3260 // variable slot where the parameter is stored and T is a character
3261 // indicating the type
3262 const typet &type = param.type();
3263 char suffix = java_char_from_type(type);
3264 base_name = "arg" + std::to_string(param_index) + suffix;
3265 identifier =
3266 id2string(method_symbol.name) + "::" + id2string(base_name);
3267 }
3268 }
3269 param.set_base_name(base_name);
3270 param.set_identifier(identifier);
3271
3272 // Build a new symbol for the parameter and add it to the symbol table
3273 parameter_symbolt parameter_symbol;
3274 parameter_symbol.base_name = base_name;
3275 parameter_symbol.mode = ID_java;
3276 parameter_symbol.name = identifier;
3277 parameter_symbol.type = param.type();
3278 symbol_table.insert(parameter_symbol);
3279
3280 param_index += java_local_variable_slots(param.type());
3281 }
3282}
3283
3285 const symbolt &class_symbol,
3287 symbol_table_baset &symbol_table,
3288 message_handlert &message_handler,
3289 size_t max_array_length,
3290 bool throw_assertion_error,
3291 optionalt<ci_lazy_methods_neededt> needed_lazy_methods,
3292 java_string_library_preprocesst &string_preprocess,
3293 const class_hierarchyt &class_hierarchy,
3294 bool threading_support,
3295 const optionalt<prefix_filtert> &method_context,
3296 bool assert_no_exceptions_thrown)
3297
3298{
3300 symbol_table,
3301 message_handler,
3302 max_array_length,
3303 throw_assertion_error,
3304 needed_lazy_methods,
3305 string_preprocess,
3306 class_hierarchy,
3307 threading_support,
3308 assert_no_exceptions_thrown);
3309
3310 java_bytecode_convert_method(class_symbol, method, method_context);
3311}
3312
3319 const irep_idt &classname,
3320 const irep_idt &mangled_method_name) const
3321{
3322 const auto inherited_method = get_inherited_method_implementation(
3323 mangled_method_name, classname, symbol_table);
3324
3325 return inherited_method.has_value();
3326}
3327
3334 const irep_idt &class_identifier,
3335 const irep_idt &component_name) const
3336{
3337 const auto inherited_method = get_inherited_component(
3338 class_identifier, component_name, symbol_table, true);
3339
3340 INVARIANT(
3341 inherited_method.has_value(), "static field should be in symbol table");
3342
3343 return inherited_method->get_full_component_identifier();
3344}
3345
3353 const std::string &tmp_var_prefix,
3354 code_blockt &block,
3355 const bytecode_write_typet write_type,
3356 const irep_idt &identifier)
3357{
3358 const std::function<bool(
3359 const std::function<tvt(const exprt &expr)>, const exprt &expr)>
3360 entry_matches = [&entry_matches](
3361 const std::function<tvt(const exprt &expr)> predicate,
3362 const exprt &expr) {
3363 const tvt &tvres = predicate(expr);
3364 if(tvres.is_unknown())
3365 {
3366 return std::any_of(
3367 expr.operands().begin(),
3368 expr.operands().end(),
3369 [&predicate, &entry_matches](const exprt &expr) {
3370 return entry_matches(predicate, expr);
3371 });
3372 }
3373 else
3374 {
3375 return tvres.is_true();
3376 }
3377 };
3378
3379 // Function that checks whether the expression accesses a member with the
3380 // given identifier name. These accesses are created in the case of `iinc`, or
3381 // non-array `?store` instructions.
3382 const std::function<tvt(const exprt &expr)> has_member_entry = [&identifier](
3383 const exprt &expr) {
3384 const auto member_expr = expr_try_dynamic_cast<member_exprt>(expr);
3385 return !member_expr ? tvt::unknown()
3386 : tvt(member_expr->get_component_name() == identifier);
3387 };
3388
3389 // Function that checks whether the expression is a symbol with the given
3390 // identifier name. These accesses are created in the case of `putstatic` or
3391 // `putfield` instructions.
3392 const std::function<tvt(const exprt &expr)> is_symbol_entry =
3393 [&identifier](const exprt &expr) {
3394 const auto symbol_expr = expr_try_dynamic_cast<symbol_exprt>(expr);
3395 return !symbol_expr ? tvt::unknown()
3396 : tvt(symbol_expr->get_identifier() == identifier);
3397 };
3398
3399 // Function that checks whether the expression is a dereference
3400 // expression. These accesses are created in `?astore` array write
3401 // instructions.
3402 const std::function<tvt(const exprt &expr)> is_dereference_entry =
3403 [](const exprt &expr) {
3404 const auto dereference_expr =
3405 expr_try_dynamic_cast<dereference_exprt>(expr);
3406 return !dereference_expr ? tvt::unknown() : tvt(true);
3407 };
3408
3409 for(auto &stack_entry : stack)
3410 {
3411 bool replace = false;
3412 switch(write_type)
3413 {
3416 replace = entry_matches(is_symbol_entry, stack_entry);
3417 break;
3419 replace = entry_matches(is_dereference_entry, stack_entry);
3420 break;
3422 replace = entry_matches(has_member_entry, stack_entry);
3423 break;
3424 }
3425 if(replace)
3426 {
3428 tmp_var_prefix, stack_entry.type(), block, stack_entry);
3429 }
3430 }
3431}
3432
3435 const std::string &tmp_var_prefix,
3436 const typet &tmp_var_type,
3437 code_blockt &block,
3438 exprt &stack_entry)
3439{
3440 const exprt tmp_var=
3441 tmp_variable(tmp_var_prefix, tmp_var_type);
3442 block.add(code_assignt(tmp_var, stack_entry));
3443 stack_entry=tmp_var;
3444}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
API to expression classes for bitvectors.
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
void branch(goto_modelt &goto_model, const irep_idt &id)
Definition: branch.cpp:22
struct bytecode_infot const bytecode_info[]
#define BC_ifnull
#define BC_jsr
#define BC_invokestatic
#define BC_iinc
#define BC_tableswitch
#define BC_drem
#define BC_d2l
#define BC_dup_x1
#define BC_invokespecial
#define BC_getfield
#define BC_nop
Definition: bytecode_info.h:65
#define BC_pop2
#define BC_instanceof
#define BC_goto
#define BC_arraylength
#define BC_dup_x2
#define BC_ldc
Definition: bytecode_info.h:83
#define BC_ldc2_w
Definition: bytecode_info.h:85
#define BC_ldiv
#define BC_iconst_m1
Definition: bytecode_info.h:67
#define BC_monitorexit
#define BC_new
#define BC_lookupswitch
#define BC_ifge
#define BC_newarray
#define BC_getstatic
#define BC_jsr_w
#define BC_anewarray
#define BC_lrem
#define BC_dup2
#define BC_aconst_null
Definition: bytecode_info.h:66
#define BC_pop
#define BC_return
#define BC_d2i
#define BC_dup
#define BC_athrow
#define BC_ifgt
#define BC_putfield
#define BC_f2i
#define BC_idiv
#define BC_ifeq
#define BC_monitorenter
#define BC_ret
#define BC_goto_w
#define BC_swap
#define BC_iflt
#define BC_checkcast
#define BC_putstatic
#define BC_ldc_w
Definition: bytecode_info.h:84
#define BC_multianewarray
#define BC_dup2_x2
uint8_t u1
Definition: bytecode_info.h:55
#define BC_dup2_x1
#define BC_invokedynamic
#define BC_irem
#define BC_invokeinterface
#define BC_frem
#define BC_ifnonnull
#define BC_invokevirtual
#define BC_ifle
#define BC_f2l
#define BC_ifne
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:253
Arithmetic right shift.
Internally generated symbol table entryThis is a symbol generated as part of translation to or modifi...
Definition: symbol.h:147
A base class for binary expressions.
Definition: std_expr.h:550
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
Bit-wise AND.
Bit-wise OR.
std::size_t get_width() const
Definition: std_types.h:864
Bit-wise XOR.
The Boolean type.
Definition: std_types.h:36
Non-graph-based representation of the class hierarchy.
An expression describing a method on a class.
Definition: std_expr.h:3272
const irep_idt & base_method_name() const
The name of the method to which this expression is applied as would be seen in the source code.
Definition: std_expr.h:3324
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition: std_expr.h:3309
const irep_idt & get_identifier() const
A unique identifier of the combination of class and method overload to which this expression refers.
Definition: std_expr.h:3332
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition: std_expr.h:3317
A non-fatal assertion, which checks a condition then permits execution to continue.
Definition: std_code.h:270
A codet representing an assignment in the program.
An assumption, which must hold in subsequent code.
Definition: std_code.h:217
A codet representing sequential composition of program statements.
Definition: std_code.h:130
code_operandst & statements()
Definition: std_code.h:138
void append(const code_blockt &extra_block)
Add all the codets from extra_block to the current code_blockt.
Definition: std_code.cpp:89
void add(const codet &code)
Definition: std_code.h:168
codet & find_last_statement()
Definition: std_code.cpp:99
A codet representing the declaration of a local variable.
codet representation of an expression statement.
Definition: std_code.h:1394
codet representation of a "return from a function" statement.
Definition: std_code.h:893
codet representation of a function call statement.
exprt::operandst argumentst
codet representation of a goto statement.
Definition: std_code.h:841
codet representation of an if-then-else statement.
Definition: std_code.h:460
const exprt & cond() const
Definition: std_code.h:478
const codet & then_case() const
Definition: std_code.h:488
codet representation of a label for branch targets.
Definition: std_code.h:959
codet & code()
Definition: std_code.h:977
void set_label(const irep_idt &label)
Definition: std_code.h:972
A statement that catches an exception, assigning the exception in flight to an expression (e....
Definition: std_code.h:1936
Pops an exception handler from the stack of active handlers (i.e.
Definition: std_code.h:1899
Pushes an exception handler, of the form: exception_tag1 -> label1 exception_tag2 -> label2 ....
Definition: std_code.h:1805
exception_listt & exception_list()
Definition: std_code.h:1860
std::vector< exception_list_entryt > exception_listt
Definition: std_code.h:1849
A codet representing a skip statement.
Definition: std_code.h:322
codet representation of a switch-case, i.e. a case statement within a switch.
Definition: std_code.h:1023
void set_default()
Definition: std_code.h:1035
codet representing a switch statement.
Definition: std_code.h:548
void set_base_name(const irep_idt &name)
Definition: std_types.h:585
const irep_idt & get_access() const
Definition: std_types.h:675
void set_is_constructor()
Definition: std_types.h:690
bool has_this() const
Definition: std_types.h:616
const parameterst & parameters() const
Definition: std_types.h:655
const typet & return_type() const
Definition: std_types.h:645
void set_access(const irep_idt &access)
Definition: std_types.h:680
bool get_is_constructor() const
Definition: std_types.h:685
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
const irep_idt & get_statement() const
Definition: std_code_base.h:65
A constant literal expression.
Definition: std_expr.h:2807
Operator to dereference a pointer.
Definition: pointer_expr.h:648
Division.
Definition: std_expr.h:1064
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
bool empty() const
Definition: dstring.h:88
Equality.
Definition: std_expr.h:1225
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:129
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
const source_locationt & source_location() const
Definition: expr.h:230
source_locationt & add_source_location()
Definition: expr.h:235
The Boolean constant false.
Definition: std_expr.h:2865
Represents the argument of an instruction that uses a CONSTANT_Fieldref This is used for example as a...
irep_idt class_name() const
irep_idt component_name() const
IEEE-floating-point equality.
Definition: floatbv_expr.h:264
static ieee_float_spect single_precision()
Definition: ieee_float.h:71
static ieee_float_spect double_precision()
Definition: ieee_float.h:77
constant_exprt to_expr() const
Definition: ieee_float.cpp:703
void from_expr(const constant_exprt &expr)
void from_integer(const mp_integer &i)
Definition: ieee_float.cpp:516
The trinary if-then-else operator.
Definition: std_expr.h:2226
constant_exprt largest_expr() const
Return an expression representing the largest value of this type.
constant_exprt smallest_expr() const
Return an expression representing the smallest value of this type.
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:372
const irept & find(const irep_idt &name) const
Definition: irep.cpp:106
const irep_idt & get(const irep_idt &name) const
Definition: irep.cpp:45
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:420
void make_nil()
Definition: irep.h:454
void swap(irept &irep)
Definition: irep.h:442
const irep_idt & id() const
Definition: irep.h:396
const std::string & get_string(const irep_idt &name) const
Definition: irep.h:409
Evaluates to true if the operand is NaN.
Definition: floatbv_expr.h:88
void convert_dup2_x2(exprt::operandst &op, exprt::operandst &results)
optionalt< ci_lazy_methods_neededt > needed_lazy_methods
method_offsett slots_for_parameters
Number of local variable slots used by the JVM to pass parameters upon invocation of the method under...
irep_idt get_static_field(const irep_idt &class_identifier, const irep_idt &component_name) const
Get static field identifier referred to by class_identifier.component_name Note this may be inherited...
void draw_edges_from_ret_to_jsr(address_mapt &address_map, const std::vector< method_offsett > &jsr_ret_targets, const std::vector< std::vector< java_bytecode_parse_treet::instructiont >::const_iterator > &ret_instructions) const
code_ifthenelset convert_if_cmp(const java_bytecode_convert_methodt::address_mapt &address_map, const u1 bytecode, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
void create_stack_tmp_var(const std::string &, const typet &, code_blockt &, exprt &)
actually create a temporary variable to hold the value of a stack entry
codet & do_exception_handling(const methodt &method, const std::set< method_offsett > &working_set, method_offsett cur_pc, codet &c)
exprt::operandst & convert_ushr(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt convert_load(const exprt &index, char type_char, size_t address)
Load reference from local variable.
void setup_local_variables(const methodt &m, const address_mapt &amap)
See find_initializers_for_slot above for more detail.
const variablet & find_variable_for_slot(size_t address, variablest &var_list)
See above.
void push(const exprt::operandst &o)
code_blockt convert_store(const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, const method_offsett address, const source_locationt &location)
code_blockt convert_astore(const irep_idt &statement, const exprt::operandst &op, const source_locationt &location)
static irep_idt label(const irep_idt &address)
std::vector< method_offsett > try_catch_handler(method_offsett address, const java_bytecode_parse_treet::methodt::exception_tablet &exception_table) const
code_blockt convert_instructions(const methodt &)
codet & replace_call_to_cprover_assume(source_locationt location, codet &c)
java_string_library_preprocesst & string_preprocess
code_blockt convert_ret(const std::vector< method_offsett > &jsr_ret_targets, const exprt &arg0, const source_locationt &location, const method_offsett address)
codet get_clinit_call(const irep_idt &classname)
Each static access to classname should be prefixed with a check for necessary static init; this retur...
code_blockt convert_putstatic(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, const symbol_exprt &symbol_expr)
code_ifthenelset convert_if(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const irep_idt &id, const mp_integer &number, const source_locationt &location) const
code_ifthenelset convert_ifnull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
code_blockt & get_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address)
'tree' describes a tree of code_blockt objects; this_block is the corresponding block (thus they are ...
codet convert_pop(const irep_idt &statement, const exprt::operandst &op)
exprt::operandst pop(std::size_t n)
exprt::operandst & convert_const(const irep_idt &statement, const constant_exprt &arg0, exprt::operandst &results) const
void convert_checkcast(const exprt &arg0, const exprt::operandst &op, codet &c, exprt::operandst &results) const
void convert_dup2_x1(exprt::operandst &op, exprt::operandst &results)
code_blockt convert_putfield(const fieldref_exprt &arg0, const exprt::operandst &op)
code_blockt convert_multianewarray(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
code_ifthenelset convert_ifnonull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
void convert(const symbolt &class_symbol, const methodt &, const optionalt< prefix_filtert > &method_context)
code_blockt convert_parameter_annotations(const methodt &method, const java_method_typet &method_type)
code_switcht convert_switch(const exprt::operandst &op, const java_bytecode_parse_treet::instructiont::argst &args, const source_locationt &location)
void save_stack_entries(const std::string &, code_blockt &, const bytecode_write_typet, const irep_idt &)
Create temporary variables if a write instruction can have undesired side- effects.
void convert_new(const source_locationt &location, const exprt &arg0, codet &c, exprt::operandst &results)
exprt variable(const exprt &arg, char type_char, size_t address)
Returns an expression indicating a local variable suitable to load/store from a bytecode at address a...
std::map< method_offsett, converted_instructiont > address_mapt
static void replace_goto_target(codet &repl, const irep_idt &old_label, const irep_idt &new_label)
Find all goto statements in 'repl' that target 'old_label' and redirect them to 'new_label'.
code_blockt convert_iinc(const exprt &arg0, const exprt &arg1, const source_locationt &location, method_offsett address)
exprt::operandst & convert_cmp2(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
void convert_dup2(exprt::operandst &op, exprt::operandst &results)
optionalt< exprt > convert_invoke_dynamic(const source_locationt &location, std::size_t instruction_address, const exprt &arg0, codet &result_code)
bool is_method_inherited(const irep_idt &classname, const irep_idt &mangled_method_name) const
Returns true iff method methodid from class classname is a method inherited from a class or interface...
void convert_athrow(const source_locationt &location, const exprt::operandst &op, codet &c, exprt::operandst &results) const
typet method_return_type
Return type of the method under conversion.
void convert_getstatic(const source_locationt &source_location, const exprt &arg0, const symbol_exprt &symbol_expr, bool is_assertions_disabled_field, codet &c, exprt::operandst &results)
irep_idt method_id
Fully qualified name of the method under translation.
code_blockt convert_newarray(const source_locationt &location, const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
exprt::operandst & convert_shl(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt::operandst & convert_cmp(const exprt::operandst &op, exprt::operandst &results) const
void pop_residue(std::size_t n)
removes minimum(n, stack.size()) elements from the stack
void convert_invoke(source_locationt location, const irep_idt &statement, class_method_descriptor_exprt &class_method_descriptor, codet &c, exprt::operandst &results)
code_blockt & get_or_create_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address, const address_mapt &amap, bool allow_merge=true)
As above, but this version can additionally create a new branch in the block_tree-node and code_block...
codet convert_monitorenterexit(const irep_idt &statement, const exprt::operandst &op, const source_locationt &source_location)
irep_idt current_method
A copy of method_id :/.
static exprt convert_aload(const irep_idt &statement, const exprt::operandst &op)
symbol_exprt tmp_variable(const std::string &prefix, const typet &type)
const methodst & methods() const
Definition: java_types.h:300
void add_throws_exception(irep_idt exception)
Definition: java_types.h:132
std::vector< parametert > parameterst
Definition: std_types.h:542
void set_is_varargs(bool is_varargs)
Definition: java_types.h:162
void set_is_synthetic(bool is_synthetic)
Definition: java_types.h:172
void set_is_final(bool is_final)
Definition: java_types.h:142
void set_native(bool is_native)
Definition: java_types.h:152
This is a specialization of reference_typet.
Definition: java_types.h:603
codet replace_character_call(code_function_callt call)
Logical right shift.
Extract member of struct or union.
Definition: std_expr.h:2667
source_locationt source_location
Definition: message.h:247
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
mstreamt & error() const
Definition: message.h:399
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
static eomt eom
Definition: message.h:297
Binary minus.
Definition: std_expr.h:973
Modulo defined as lhs-(rhs * truncate(lhs/rhs)).
Definition: std_expr.h:1135
Binary multiplication Associativity is not specified.
Definition: std_expr.h:1019
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:49
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:138
The NIL expression.
Definition: std_expr.h:2874
Disequality.
Definition: std_expr.h:1283
The null pointer constant.
Definition: pointer_expr.h:723
Boolean OR.
Definition: std_expr.h:2082
Symbol table entry of function parameterThis is a symbol generated as part of type checking.
Definition: symbol.h:171
Given a string of the format '?blah?', will return true when compared against a string that matches a...
Definition: pattern.h:21
The plus expression Associativity is not specified.
Definition: std_expr.h:914
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
The reference type.
Definition: pointer_expr.h:107
Left shift.
A side_effect_exprt representation of a side effect that throws an exception.
Definition: std_code.h:1757
An expression containing a side effect.
Definition: std_code.h:1450
void set_comment(const irep_idt &comment)
void set_property_class(const irep_idt &property_class)
const irep_idt & get_line() const
std::string as_string() const
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:449
Expression to hold a symbol (variable)
Definition: std_expr.h:80
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition: std_expr.h:99
const irep_idt & get_identifier() const
Definition: std_expr.h:109
The symbol table base class interface.
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
virtual std::pair< symbolt &, bool > insert(symbolt symbol)=0
Move or copy a new symbol to the symbol table.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
The symbol table.
Definition: symbol_table.h:14
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
bool is_file_local
Definition: symbol.h:66
irep_idt module
Name of module the symbol belongs to.
Definition: symbol.h:43
bool is_static_lifetime
Definition: symbol.h:65
bool is_thread_local
Definition: symbol.h:65
bool is_type
Definition: symbol.h:61
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
bool is_lvalue
Definition: symbol.h:66
exprt value
Initial value of symbol.
Definition: symbol.h:34
irep_idt mode
Language mode.
Definition: symbol.h:49
const irep_idt & get_identifier() const
Definition: std_types.h:410
Definition: threeval.h:20
bool is_unknown() const
Definition: threeval.h:27
bool is_true() const
Definition: threeval.h:25
static tvt unknown()
Definition: threeval.h:33
Semantic type conversion.
Definition: std_expr.h:1920
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:1928
The type of an expression, extends irept.
Definition: type.h:29
The unary minus expression.
Definition: std_expr.h:390
static irep_idt get_exception_type(const pointer_typet &)
Returns the compile type of an exception.
Fixed-width bit-vector with unsigned binary interpretation.
An exception that is raised for unsupported class signature.
Definition: java_types.h:1132
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
#define forall_operands(it, expr)
Definition: expr.h:18
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
API to expression classes for floating-point arithmetic.
dstringt irep_idt
Definition: irep.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
static int8_t r
Definition: irep_hash.h:60
void convert_annotations(const java_bytecode_parse_treet::annotationst &parsed_annotations, std::vector< java_annotationt > &java_annotations)
Convert parsed annotations into the symbol table.
void java_bytecode_convert_method_lazy(symbolt &class_symbol, const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, symbol_tablet &symbol_table, message_handlert &message_handler)
This creates a method symbol in the symtab, but doesn't actually perform method conversion just yet.
void create_parameter_symbols(const java_method_typet::parameterst &parameters, expanding_vectort< std::vector< java_bytecode_convert_methodt::variablet > > &variables, symbol_table_baset &symbol_table)
static void gather_symbol_live_ranges(java_bytecode_convert_methodt::method_offsett pc, const exprt &e, std::map< irep_idt, java_bytecode_convert_methodt::variablet > &result)
static bool is_constructor(const irep_idt &method_name)
static irep_idt get_method_identifier(const irep_idt &class_identifier, const java_bytecode_parse_treet::methodt &method)
void java_bytecode_initialize_parameter_names(symbolt &method_symbol, const java_bytecode_parse_treet::methodt::local_variable_tablet &local_variable_table, symbol_table_baset &symbol_table)
This uses a cut-down version of the logic in java_bytecode_convert_methodt::convert to initialize sym...
static member_exprt to_member(const exprt &pointer, const fieldref_exprt &field_reference, const namespacet &ns)
Build a member exprt for accessing a specific field that may come from a base class.
static exprt conditional_array_cast(const exprt &expr, char type_char)
Add typecast if necessary to expr to make it compatible with array type corresponding to type_char (s...
void java_bytecode_convert_method(const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, bool throw_assertion_error, optionalt< ci_lazy_methods_neededt > needed_lazy_methods, java_string_library_preprocesst &string_preprocess, const class_hierarchyt &class_hierarchy, bool threading_support, const optionalt< prefix_filtert > &method_context, bool assert_no_exceptions_thrown)
java_method_typet member_type_lazy(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name, const std::string &method_name, message_handlert &message_handler)
Returns the member type for a method, based on signature or descriptor.
void create_parameter_names(const java_bytecode_parse_treet::methodt &m, const irep_idt &method_identifier, java_method_typet::parameterst &parameters, const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
Extracts the names of parameters from the local variable table in the method, and uses it to construc...
void create_method_stub_symbol(const irep_idt &identifier, const irep_idt &base_name, const irep_idt &pretty_name, const typet &type, const irep_idt &declaring_class, symbol_table_baset &symbol_table, message_handlert &message_handler)
static std::size_t get_bytecode_type_width(const typet &ty)
static void adjust_invoke_argument_types(const java_method_typet::parameterst &parameters, code_function_callt::argumentst &arguments)
static void assign_parameter_names(java_method_typet &ftype, const irep_idt &name_prefix, symbol_table_baset &symbol_table)
Iterates through the parameters of the function type ftype, finds a new new name for each parameter a...
static irep_idt get_if_cmp_operator(const u1 bytecode)
JAVA Bytecode Language Conversion.
JAVA Bytecode Language Conversion.
Java-specific exprt subclasses.
irep_idt clinit_wrapper_name(const irep_idt &class_name)
Get the Java static initializer wrapper name for a given class (the wrapper checks if static initiali...
Produce code for simple implementation of String Java libraries.
Representation of a constant Java string.
bool can_cast_expr< java_string_literal_exprt >(const exprt &base)
signedbv_typet java_int_type()
Definition: java_types.cpp:31
typet java_type_from_char(char t)
Constructs a type indicated by the given character:
Definition: java_types.cpp:246
java_reference_typet java_reference_array_type(const struct_tag_typet &subtype)
Definition: java_types.cpp:539
empty_typet java_void_type()
Definition: java_types.cpp:37
char java_char_from_type(const typet &type)
Definition: java_types.cpp:705
optionalt< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
Definition: java_types.cpp:559
signedbv_typet java_byte_type()
Definition: java_types.cpp:55
std::string pretty_signature(const java_method_typet &method_type)
typet java_bytecode_promotion(const typet &type)
Java does not support byte/short return types. These are always promoted.
Definition: java_types.cpp:267
java_reference_typet java_array_type(const char subtype)
Construct an array pointer type.
Definition: java_types.cpp:109
signedbv_typet java_short_type()
Definition: java_types.cpp:49
reference_typet java_reference_type(const typet &subtype)
Definition: java_types.cpp:88
c_bool_typet java_boolean_type()
Definition: java_types.cpp:79
unsignedbv_typet java_char_type()
Definition: java_types.cpp:61
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
Definition: java_types.cpp:144
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:184
const java_class_typet & to_java_class_type(const typet &type)
Definition: java_types.h:582
void set_declaring_class(symbolt &symbol, const irep_idt &declaring_class)
Sets the identifier of the class which declared a given symbol to declaring_class.
Definition: java_utils.cpp:577
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_component(const irep_idt &component_class_id, const irep_idt &component_name, const symbol_tablet &symbol_table, bool include_interfaces)
Finds an inherited component (method or field), taking component visibility into account.
Definition: java_utils.cpp:451
unsigned java_method_parameter_slots(const java_method_typet &t)
Returns the the number of JVM local variables (slots) used by the JVM to pass, upon call,...
Definition: java_utils.cpp:146
optionalt< irep_idt > declaring_class(const symbolt &symbol)
Gets the identifier of the class which declared a given symbol.
Definition: java_utils.cpp:571
irep_idt strip_java_namespace_prefix(const irep_idt &to_strip)
Strip java:: prefix from given identifier.
Definition: java_utils.cpp:410
dereference_exprt checked_dereference(const exprt &expr)
Dereference an expression and flag it for a null-pointer check.
Definition: java_utils.cpp:284
unsigned java_local_variable_slots(const typet &t)
Returns the number of JVM local variables (slots) taken by a local variable that, when translated to ...
Definition: java_utils.cpp:127
void merge_source_location_rec(exprt &expr, const source_locationt &source_location)
Attaches a source location to an expression and all of its subexpressions.
Definition: java_utils.cpp:200
irep_idt lambda_synthetic_class_name(const irep_idt &method_identifier, std::size_t instruction_address)
static symbolt constructor_symbol(synthetic_methods_mapt &synthetic_methods, const irep_idt &synthetic_class_name, java_method_typet constructor_type)
Java lambda code synthesis.
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:103
nonstd::optional< T > optionalt
Definition: optional.h:35
Pattern matching for bytecode instructions.
bool can_cast_type< reference_typet >(const typet &type)
Check whether a reference to a typet is a reference_typet.
Definition: pointer_expr.h:135
const reference_typet & to_reference_type(const typet &type)
Cast a typet to a reference_typet.
Definition: pointer_expr.h:148
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:79
Prefix Filtering.
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
static optionalt< smt_termt > get_identifier(const exprt &expr, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_handle_identifiers, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_identifiers)
BigInt mp_integer
Definition: smt_terms.h:12
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:510
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
const code_gotot & to_code_goto(const codet &code)
Definition: std_code.h:875
const code_labelt & to_code_label(const codet &code)
Definition: std_code.h:1004
const code_blockt & to_code_block(const codet &code)
Definition: std_code.h:203
const codet & to_code(const exprt &expr)
API to expression classes.
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:2840
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:189
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:744
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:474
bool can_cast_type< bitvector_typet >(const typet &type)
Check whether a reference to a typet is a bitvector_typet.
Definition: std_types.h:887
void replace(const union_find_replacet &replace_map, string_not_contains_constraintt &constraint)
const char * mnemonic
Definition: bytecode_info.h:46
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.
std::vector< local_variablet > local_variable_tablet
static optionalt< annotationt > find_annotation(const annotationst &annotations, const irep_idt &annotation_type_name)
Find an annotation given its name.
Over-approximative uncaught exceptions analysis.