cprover
java_bytecode_convert_method.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: JAVA Bytecode Language Conversion
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #ifdef DEBUG
13 #include <iostream>
14 #endif
15 
16 #include "bytecode_info.h"
19 #include "java_expr.h"
23 #include "java_types.h"
24 #include "java_utils.h"
25 #include "lambda_synthesis.h"
26 #include "pattern.h"
27 #include "remove_exceptions.h"
28 
29 #include <util/arith_tools.h>
30 #include <util/c_types.h>
31 #include <util/expr_initializer.h>
32 #include <util/floatbv_expr.h>
33 #include <util/ieee_float.h>
34 #include <util/invariant.h>
35 #include <util/namespace.h>
36 #include <util/prefix.h>
37 #include <util/simplify_expr.h>
38 #include <util/std_expr.h>
39 #include <util/string2int.h>
40 #include <util/threeval.h>
41 
42 #include <goto-programs/cfg.h>
46 
49 
50 #include <algorithm>
51 #include <functional>
52 #include <limits>
53 #include <regex>
54 #include <unordered_set>
55 
69  java_method_typet &ftype,
70  const irep_idt &name_prefix,
71  symbol_table_baset &symbol_table)
72 {
73  java_method_typet::parameterst &parameters = ftype.parameters();
74 
75  // Mostly borrowed from java_bytecode_convert.cpp; maybe factor this out.
76  // assign names to parameters
77  for(std::size_t i=0; i<parameters.size(); ++i)
78  {
79  irep_idt base_name, identifier;
80 
81  if(i==0 && parameters[i].get_this())
82  base_name = ID_this;
83  else
84  base_name="stub_ignored_arg"+std::to_string(i);
85 
86  identifier=id2string(name_prefix)+"::"+id2string(base_name);
87  parameters[i].set_base_name(base_name);
88  parameters[i].set_identifier(identifier);
89 
90  // add to symbol table
91  parameter_symbolt parameter_symbol;
92  parameter_symbol.base_name=base_name;
93  parameter_symbol.mode=ID_java;
94  parameter_symbol.name=identifier;
95  parameter_symbol.type=parameters[i].type();
96  symbol_table.add(parameter_symbol);
97  }
98 }
99 
101  const irep_idt &identifier,
102  const irep_idt &base_name,
103  const irep_idt &pretty_name,
104  const typet &type,
105  const irep_idt &declaring_class,
106  symbol_table_baset &symbol_table,
107  message_handlert &message_handler)
108 {
109  messaget log(message_handler);
110 
111  symbolt symbol;
112  symbol.name = identifier;
113  symbol.base_name = base_name;
114  symbol.pretty_name = pretty_name;
115  symbol.type = type;
116  symbol.type.set(ID_access, ID_private);
117  to_java_method_type(symbol.type).set_is_final(true);
118  symbol.value.make_nil();
119  symbol.mode = ID_java;
121  to_java_method_type(symbol.type), symbol.name, symbol_table);
123 
124  log.debug() << "Generating codet: new opaque symbol: method '" << symbol.name
125  << "'" << messaget::eom;
126  symbol_table.add(symbol);
127 }
128 
129 static bool is_constructor(const irep_idt &method_name)
130 {
131  return id2string(method_name).find("<init>") != std::string::npos;
132 }
133 
135 {
136  if(stack.size()<n)
137  {
138  log.error() << "malformed bytecode (pop too high)" << messaget::eom;
139  throw 0;
140  }
141 
142  exprt::operandst operands;
143  operands.resize(n);
144  for(std::size_t i=0; i<n; i++)
145  operands[i]=stack[stack.size()-n+i];
146 
147  stack.resize(stack.size()-n);
148  return operands;
149 }
150 
153 {
154  std::size_t residue_size=std::min(n, stack.size());
155 
156  stack.resize(stack.size()-residue_size);
157 }
158 
160 {
161  stack.resize(stack.size()+o.size());
162 
163  for(std::size_t i=0; i<o.size(); i++)
164  stack[stack.size()-o.size()+i]=o[i];
165 }
166 
167 // JVM program locations
169 {
170  return "pc"+id2string(address);
171 }
172 
174  const std::string &prefix,
175  const typet &type)
176 {
177  irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size());
178  irep_idt identifier=id2string(current_method)+"::"+id2string(base_name);
179 
180  auxiliary_symbolt tmp_symbol;
181  tmp_symbol.base_name=base_name;
182  tmp_symbol.is_static_lifetime=false;
183  tmp_symbol.mode=ID_java;
184  tmp_symbol.name=identifier;
185  tmp_symbol.type=type;
186  symbol_table.add(tmp_symbol);
187 
188  symbol_exprt result(identifier, type);
189  result.set(ID_C_base_name, base_name);
190  tmp_vars.push_back(result);
191 
192  return result;
193 }
194 
207  const exprt &arg,
208  char type_char,
209  size_t address)
210 {
211  const std::size_t number_int =
212  numeric_cast_v<std::size_t>(to_constant_expr(arg));
213  variablest &var_list=variables[number_int];
214 
215  // search variable in list for correct frame / address if necessary
216  const variablet &var=
217  find_variable_for_slot(address, var_list);
218 
219  if(!var.symbol_expr.get_identifier().empty())
220  return var.symbol_expr;
221 
222  // an unnamed local variable
223  irep_idt base_name = "anonlocal::" + std::to_string(number_int) + type_char;
224  irep_idt identifier = id2string(current_method) + "::" + id2string(base_name);
225 
226  symbol_exprt result(identifier, java_type_from_char(type_char));
227  result.set(ID_C_base_name, base_name);
228  used_local_names.insert(result);
229  return std::move(result);
230 }
231 
240  const std::string &descriptor,
241  const optionalt<std::string> &signature,
242  const std::string &class_name,
243  const std::string &method_name,
244  message_handlert &message_handler)
245 {
246  // In order to construct the method type, we can either use signature or
247  // descriptor. Since only signature contains the generics info, we want to
248  // use signature whenever possible. We revert to using descriptor if (1) the
249  // signature does not exist, (2) an unsupported generics are present or
250  // (3) in the special case when the number of parameters in the descriptor
251  // does not match the number of parameters in the signature - e.g. for
252  // certain types of inner classes and enums (see unit tests for examples).
253 
254  messaget message(message_handler);
255 
256  auto member_type_from_descriptor = java_type_from_string(descriptor);
257  INVARIANT(
258  member_type_from_descriptor.has_value() &&
259  member_type_from_descriptor->id() == ID_code,
260  "Must be code type");
261  if(signature.has_value())
262  {
263  try
264  {
265  auto member_type_from_signature =
266  java_type_from_string(signature.value(), class_name);
267  INVARIANT(
268  member_type_from_signature.has_value() &&
269  member_type_from_signature->id() == ID_code,
270  "Must be code type");
271  if(
272  to_java_method_type(*member_type_from_signature).parameters().size() ==
273  to_java_method_type(*member_type_from_descriptor).parameters().size())
274  {
275  return to_java_method_type(*member_type_from_signature);
276  }
277  else
278  {
279  message.debug() << "Method: " << class_name << "." << method_name
280  << "\n signature: " << signature.value()
281  << "\n descriptor: " << descriptor
282  << "\n different number of parameters, reverting to "
283  "descriptor"
284  << message.eom;
285  }
286  }
288  {
289  message.debug() << "Method: " << class_name << "." << method_name
290  << "\n could not parse signature: " << signature.value()
291  << "\n " << e.what() << "\n"
292  << " reverting to descriptor: " << descriptor
293  << message.eom;
294  }
295  }
296  return to_java_method_type(*member_type_from_descriptor);
297 }
298 
310  symbolt &class_symbol,
311  const irep_idt &method_identifier,
313  symbol_tablet &symbol_table,
314  message_handlert &message_handler)
315 {
316  symbolt method_symbol;
317 
318  java_method_typet member_type = member_type_lazy(
319  m.descriptor,
320  m.signature,
321  id2string(class_symbol.name),
322  id2string(m.base_name),
323  message_handler);
324 
325  method_symbol.name=method_identifier;
326  method_symbol.base_name=m.base_name;
327  method_symbol.mode=ID_java;
328  method_symbol.location=m.source_location;
329  method_symbol.location.set_function(method_identifier);
330 
331  if(m.is_public)
332  member_type.set_access(ID_public);
333  else if(m.is_protected)
334  member_type.set_access(ID_protected);
335  else if(m.is_private)
336  member_type.set_access(ID_private);
337  else
338  member_type.set_access(ID_default);
339 
340  if(m.is_synchronized)
341  member_type.set(ID_is_synchronized, true);
342  if(m.is_static)
343  member_type.set(ID_is_static, true);
344  member_type.set_native(m.is_native);
345  member_type.set_is_varargs(m.is_varargs);
346  member_type.set_is_synthetic(m.is_synthetic);
347 
348  if(m.is_bridge)
349  member_type.set(ID_is_bridge_method, m.is_bridge);
350 
351  // do we need to add 'this' as a parameter?
352  if(!m.is_static)
353  {
354  java_method_typet::parameterst &parameters = member_type.parameters();
355  const reference_typet object_ref_type =
357  java_method_typet::parametert this_p(object_ref_type);
358  this_p.set_this();
359  parameters.insert(parameters.begin(), this_p);
360  }
361 
362  const std::string signature_string = pretty_signature(member_type);
363 
364  if(is_constructor(method_symbol.base_name))
365  {
366  // we use full.class_name(...) as pretty name
367  // for constructors -- the idea is that they have
368  // an empty declarator.
369  method_symbol.pretty_name=
370  id2string(class_symbol.pretty_name) + signature_string;
371 
372  member_type.set_is_constructor();
373  }
374  else
375  {
376  method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
377  id2string(m.base_name) + signature_string;
378  }
379 
380  // Load annotations
381  if(!m.annotations.empty())
382  {
384  m.annotations,
385  type_checked_cast<annotated_typet>(static_cast<typet &>(member_type))
386  .get_annotations());
387  }
388 
389  method_symbol.type=member_type;
390  // Not used in jbmc at present, but other codebases that use jbmc as a library
391  // use this information.
392  method_symbol.type.set(ID_C_abstract, m.is_abstract);
393  set_declaring_class(method_symbol, class_symbol.name);
394 
396  m.annotations, "java::org.cprover.MustNotThrow"))
397  {
398  method_symbol.type.set(ID_C_must_not_throw, true);
399  }
400 
401  // Assign names to parameters in the method symbol
402  java_method_typet &method_type = to_java_method_type(method_symbol.type);
403  method_type.set_is_final(m.is_final);
404  java_method_typet::parameterst &parameters = method_type.parameters();
405  java_bytecode_convert_methodt::method_offsett slots_for_parameters =
406  java_method_parameter_slots(method_type);
408  m, method_identifier, parameters, slots_for_parameters);
409 
410  symbol_table.add(method_symbol);
411 
412  if(!m.is_static)
413  {
414  java_class_typet::methodt new_method{method_symbol.name, method_type};
415  new_method.set_base_name(method_symbol.base_name);
416  new_method.set_pretty_name(method_symbol.pretty_name);
417  new_method.set_access(member_type.get_access());
418  new_method.set_descriptor(m.descriptor);
419 
420  to_java_class_type(class_symbol.type)
421  .methods()
422  .emplace_back(std::move(new_method));
423  }
424 }
425 
427  const irep_idt &class_identifier,
429 {
430  return
431  id2string(class_identifier) + "." + id2string(method.name) + ":" +
432  method.descriptor;
433 }
434 
437  const irep_idt &method_identifier,
438  java_method_typet::parameterst &parameters,
439  const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
440 {
441  auto variables = variablest{};
442  // Find parameter names in the local variable table
443  // and store the result in the external variables vector
444  for(const auto &v : m.local_variable_table)
445  {
446  // Skip this variable if it is not a method parameter
447  if(v.index >= slots_for_parameters)
448  continue;
449 
450  std::ostringstream id_oss;
451  id_oss << method_identifier << "::" << v.name;
452  irep_idt identifier(id_oss.str());
453  symbol_exprt result = symbol_exprt::typeless(identifier);
454  result.set(ID_C_base_name, v.name);
455 
456  // Create a new variablet in the variables vector; in fact this entry will
457  // be rewritten below in the loop that iterates through the method
458  // parameters; the only field that seem to be useful to write here is the
459  // symbol_expr, others will be rewritten
460  variables[v.index].emplace_back(result, v.start_pc, v.length);
461  }
462 
463  // The variables is a expanding_vectort, and the loop above may have expanded
464  // the vector introducing gaps where the entries are empty vectors. We now
465  // make sure that the vector of each LV slot contains exactly one variablet,
466  // which we will add below
467  std::size_t param_index = 0;
468  for(const auto &param : parameters)
469  {
470  INVARIANT(
471  variables[param_index].size() <= 1,
472  "should have at most one entry per index");
473  param_index += java_local_variable_slots(param.type());
474  }
475  INVARIANT(
476  param_index == slots_for_parameters,
477  "java_parameter_count and local computation must agree");
478  param_index = 0;
479  for(auto &param : parameters)
480  {
481  irep_idt base_name, identifier;
482 
483  // Construct a sensible base name (base_name) and a fully qualified name
484  // (identifier) for every parameter of the method under translation,
485  // regardless of whether we have an LVT or not; and assign it to the
486  // parameter object (which is stored in the type of the symbol, not in the
487  // symbol table)
488  if(param_index == 0 && param.get_this())
489  {
490  // my.package.ClassName.myMethodName:(II)I::this
491  base_name = ID_this;
492  identifier = id2string(method_identifier) + "::" + id2string(base_name);
493  }
494  else if(!variables[param_index].empty())
495  {
496  // if already present in the LVT ...
497  base_name = variables[param_index][0].symbol_expr.get(ID_C_base_name);
498  identifier = variables[param_index][0].symbol_expr.get(ID_identifier);
499  }
500  else
501  {
502  // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
503  // variable slot where the parameter is stored and T is a character
504  // indicating the type
505  char suffix = java_char_from_type(param.type());
506  base_name = "arg" + std::to_string(param_index) + suffix;
507  identifier = id2string(method_identifier) + "::" + id2string(base_name);
508  }
509  param.set_base_name(base_name);
510  param.set_identifier(identifier);
511  param_index += java_local_variable_slots(param.type());
512  }
513  // The parameter slots detected in this function should agree with what
514  // java_parameter_count() thinks about this method
515  INVARIANT(
516  param_index == slots_for_parameters,
517  "java_parameter_count and local computation must agree");
518 }
519 
521  const java_method_typet::parameterst &parameters,
522  expanding_vectort<std::vector<java_bytecode_convert_methodt::variablet>>
523  &variables,
524  symbol_table_baset &symbol_table)
525 {
526  std::size_t param_index = 0;
527  for(const auto &param : parameters)
528  {
529  parameter_symbolt parameter_symbol;
530  parameter_symbol.base_name = param.get_base_name();
531  parameter_symbol.mode = ID_java;
532  parameter_symbol.name = param.get_identifier();
533  parameter_symbol.type = param.type();
534  symbol_table.add(parameter_symbol);
535 
536  // Add as a JVM local variable
537  variables[param_index].clear();
538  variables[param_index].emplace_back(
539  parameter_symbol.symbol_expr(),
540  0,
541  std::numeric_limits<size_t>::max(),
542  true);
543  param_index += java_local_variable_slots(param.type());
544  }
545 }
546 
548  const symbolt &class_symbol,
549  const methodt &m,
550  const optionalt<prefix_filtert> &method_context)
551 {
552  // Construct the fully qualified method name
553  // (e.g. "my.package.ClassName.myMethodName:(II)I") and query the symbol table
554  // to retrieve the symbol (constructed by java_bytecode_convert_method_lazy)
555  // associated to the method
556  const irep_idt method_identifier =
557  get_method_identifier(class_symbol.name, m);
558 
559  method_id = method_identifier;
561  symbol_table.get_writeable_ref(method_identifier), class_symbol.name);
562 
563  // Obtain a std::vector of java_method_typet::parametert objects from the
564  // (function) type of the symbol
565  // Don't try to hang on to this reference into the symbol table, as we're
566  // about to create symbols for the method's parameters, which would invalidate
567  // the reference. Instead, copy the type here, set it up, then assign it back
568  // to the symbol later.
569  java_method_typet method_type =
570  to_java_method_type(symbol_table.lookup_ref(method_identifier).type);
571  method_type.set_is_final(m.is_final);
572  method_return_type = method_type.return_type();
573  java_method_typet::parameterst &parameters = method_type.parameters();
574 
575  // Determine the number of local variable slots used by the JVM to maintain
576  // the formal parameters
578 
579  log.debug() << "Generating codet: class " << class_symbol.name << ", method "
580  << m.name << messaget::eom;
581 
582  // Add parameter symbols to the symbol table
584 
585  symbolt &method_symbol = symbol_table.get_writeable_ref(method_identifier);
586 
587  // Check the fields that can't change are valid
588  INVARIANT(
589  method_symbol.name == method_identifier,
590  "Name of method symbol shouldn't change");
591  INVARIANT(
592  method_symbol.base_name == m.base_name,
593  "Base name of method symbol shouldn't change");
594  INVARIANT(
595  method_symbol.module.empty(),
596  "Method symbol shouldn't have module");
597  // Update the symbol for the method
598  method_symbol.mode=ID_java;
599  method_symbol.location=m.source_location;
600  method_symbol.location.set_function(method_identifier);
601 
602  for(const auto &exception_name : m.throws_exception_table)
603  method_type.add_throws_exception(exception_name);
604 
605  const std::string signature_string = pretty_signature(method_type);
606 
607  // Set up the pretty name for the method entry in the symbol table.
608  // The pretty name of a constructor includes the base name of the class
609  // instead of the internal method name "<init>". For regular methods, it's
610  // just the base name of the method.
611  if(is_constructor(method_symbol.base_name))
612  {
613  // we use full.class_name(...) as pretty name
614  // for constructors -- the idea is that they have
615  // an empty declarator.
616  method_symbol.pretty_name =
617  id2string(class_symbol.pretty_name) + signature_string;
618  INVARIANT(
619  method_type.get_is_constructor(),
620  "Member type should have already been marked as a constructor");
621  }
622  else
623  {
624  method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
625  id2string(m.base_name) + signature_string;
626  }
627 
628  method_symbol.type = method_type;
629  current_method = method_symbol.name;
630  method_has_this = method_type.has_this();
631  if((!m.is_abstract) && (!m.is_native))
632  {
633  // Do not convert if method is not in context
634  if(!method_context || (*method_context)(id2string(method_identifier)))
635  {
636  code_blockt code{convert_parameter_annotations(m, method_type)};
637  code.append(convert_instructions(m));
638  method_symbol.value = std::move(code);
639  }
640  }
641 }
642 
643 static irep_idt get_if_cmp_operator(const u1 bytecode)
644 {
645  if(bytecode == patternt("if_?cmplt"))
646  return ID_lt;
647  if(bytecode == patternt("if_?cmple"))
648  return ID_le;
649  if(bytecode == patternt("if_?cmpgt"))
650  return ID_gt;
651  if(bytecode == patternt("if_?cmpge"))
652  return ID_ge;
653  if(bytecode == patternt("if_?cmpeq"))
654  return ID_equal;
655  if(bytecode == patternt("if_?cmpne"))
656  return ID_notequal;
657 
658  throw "unhandled java comparison instruction";
659 }
660 
670  const exprt &pointer,
671  const fieldref_exprt &field_reference,
672  const namespacet &ns)
673 {
674  struct_tag_typet class_type(field_reference.class_name());
675 
676  const exprt typed_pointer =
678 
679  const irep_idt &component_name = field_reference.component_name();
680 
681  exprt accessed_object = checked_dereference(typed_pointer);
682  const auto type_of = [&ns](const exprt &object) {
683  return to_struct_type(ns.follow(object.type()));
684  };
685 
686  // The field access is described as being against a particular type, but it
687  // may in fact belong to any ancestor type.
688  while(type_of(accessed_object).get_component(component_name).is_nil())
689  {
690  const auto components = type_of(accessed_object).components();
691  INVARIANT(
692  components.size() != 0,
693  "infer_opaque_type_fields should guarantee that a member access has a "
694  "corresponding field");
695 
696  // Base class access is always done through the first component
697  accessed_object = member_exprt(accessed_object, components.front());
698  }
699  return member_exprt(
700  accessed_object, type_of(accessed_object).get_component(component_name));
701 }
702 
709  codet &repl,
710  const irep_idt &old_label,
711  const irep_idt &new_label)
712 {
713  const auto &stmt=repl.get_statement();
714  if(stmt==ID_goto)
715  {
716  auto &g=to_code_goto(repl);
717  if(g.get_destination()==old_label)
718  g.set_destination(new_label);
719  }
720  else
721  {
722  for(auto &op : repl.operands())
723  if(op.id()==ID_code)
724  replace_goto_target(to_code(op), old_label, new_label);
725  }
726 }
727 
743  block_tree_nodet &tree,
744  code_blockt &this_block,
745  method_offsett address_start,
746  method_offsett address_limit,
747  method_offsett next_block_start_address)
748 {
749  address_mapt dummy;
751  tree,
752  this_block,
753  address_start,
754  address_limit,
755  next_block_start_address,
756  dummy,
757  false);
758 }
759 
780  block_tree_nodet &tree,
781  code_blockt &this_block,
782  method_offsett address_start,
783  method_offsett address_limit,
784  method_offsett next_block_start_address,
785  const address_mapt &amap,
786  bool allow_merge)
787 {
788  // Check the tree shape invariant:
789  assert(tree.branch.size()==tree.branch_addresses.size());
790 
791  // If there are no child blocks, return this.
792  if(tree.leaf)
793  return this_block;
794  assert(!tree.branch.empty());
795 
796  // Find child block starting > address_start:
797  const auto afterstart=
798  std::upper_bound(
799  tree.branch_addresses.begin(),
800  tree.branch_addresses.end(),
801  address_start);
802  assert(afterstart!=tree.branch_addresses.begin());
803  auto findstart=afterstart;
804  --findstart;
805  auto child_offset=
806  std::distance(tree.branch_addresses.begin(), findstart);
807 
808  // Find child block starting >= address_limit:
809  auto findlim=
810  std::lower_bound(
811  tree.branch_addresses.begin(),
812  tree.branch_addresses.end(),
813  address_limit);
814  const auto findlim_block_start_address =
815  findlim == tree.branch_addresses.end() ? next_block_start_address
816  : (*findlim);
817 
818  // If all children are in scope, return this.
819  if(findstart==tree.branch_addresses.begin() &&
820  findlim==tree.branch_addresses.end())
821  return this_block;
822 
823  // Find the child code_blockt where the queried range begins:
824  auto child_iter = this_block.statements().begin();
825  // Skip any top-of-block declarations;
826  // all other children are labelled subblocks.
827  while(child_iter != this_block.statements().end() &&
828  child_iter->get_statement() == ID_decl)
829  ++child_iter;
830  assert(child_iter != this_block.statements().end());
831  std::advance(child_iter, child_offset);
832  assert(child_iter != this_block.statements().end());
833  auto &child_label=to_code_label(*child_iter);
834  auto &child_block=to_code_block(child_label.code());
835 
836  bool single_child(afterstart==findlim);
837  if(single_child)
838  {
839  // Range wholly contained within a child block
841  tree.branch[child_offset],
842  child_block,
843  address_start,
844  address_limit,
845  findlim_block_start_address,
846  amap,
847  allow_merge);
848  }
849 
850  // Otherwise we're being asked for a range of subblocks, but not all of them.
851  // If it's legal to draw a new lexical scope around the requested subset,
852  // do so; otherwise just return this block.
853 
854  // This can be a new lexical scope if all incoming edges target the
855  // new block header, or come from within the suggested new block.
856 
857  // If modifying the block tree is forbidden, give up and return this:
858  if(!allow_merge)
859  return this_block;
860 
861  // Check for incoming control-flow edges targeting non-header
862  // blocks of the new proposed block range:
863  auto checkit=amap.find(*findstart);
864  assert(checkit!=amap.end());
865  ++checkit; // Skip the header, which can have incoming edges from outside.
866  for(;
867  checkit!=amap.end() && (checkit->first)<(findlim_block_start_address);
868  ++checkit)
869  {
870  for(auto p : checkit->second.predecessors)
871  {
872  if(p<(*findstart) || p>=findlim_block_start_address)
873  {
874  log.debug() << "Generating codet: "
875  << "warning: refusing to create lexical block spanning "
876  << (*findstart) << "-" << findlim_block_start_address
877  << " due to incoming edge " << p << " -> " << checkit->first
878  << messaget::eom;
879  return this_block;
880  }
881  }
882  }
883 
884  // All incoming edges are acceptable! Create a new block wrapping
885  // the relevant children. Borrow the header block's label, and redirect
886  // any block-internal edges to target the inner header block.
887 
888  const irep_idt child_label_name=child_label.get_label();
889  std::string new_label_str = id2string(child_label_name);
890  new_label_str+='$';
891  irep_idt new_label_irep(new_label_str);
892 
893  code_labelt newlabel(child_label_name, code_blockt());
894  code_blockt &newblock=to_code_block(newlabel.code());
895  auto nblocks=std::distance(findstart, findlim);
896  assert(nblocks>=2);
897  log.debug() << "Generating codet: combining "
898  << std::distance(findstart, findlim) << " blocks for addresses "
899  << (*findstart) << "-" << findlim_block_start_address
900  << messaget::eom;
901 
902  // Make a new block containing every child of interest:
903  auto &this_block_children = this_block.statements();
904  assert(tree.branch.size()==this_block_children.size());
905  for(auto blockidx=child_offset, blocklim=child_offset+nblocks;
906  blockidx!=blocklim;
907  ++blockidx)
908  newblock.add(this_block_children[blockidx]);
909 
910  // Relabel the inner header:
911  to_code_label(newblock.statements()[0]).set_label(new_label_irep);
912  // Relabel internal gotos:
913  replace_goto_target(newblock, child_label_name, new_label_irep);
914 
915  // Remove the now-empty sibling blocks:
916  auto delfirst=this_block_children.begin();
917  std::advance(delfirst, child_offset+1);
918  auto dellim=delfirst;
919  std::advance(dellim, nblocks-1);
920  this_block_children.erase(delfirst, dellim);
921  this_block_children[child_offset].swap(newlabel);
922 
923  // Perform the same transformation on the index tree:
924  block_tree_nodet newnode;
925  auto branchstart=tree.branch.begin();
926  std::advance(branchstart, child_offset);
927  auto branchlim=branchstart;
928  std::advance(branchlim, nblocks);
929  for(auto branchiter=branchstart; branchiter!=branchlim; ++branchiter)
930  newnode.branch.push_back(std::move(*branchiter));
931  ++branchstart;
932  tree.branch.erase(branchstart, branchlim);
933 
934  assert(tree.branch.size()==this_block_children.size());
935 
936  auto branchaddriter=tree.branch_addresses.begin();
937  std::advance(branchaddriter, child_offset);
938  auto branchaddrlim=branchaddriter;
939  std::advance(branchaddrlim, nblocks);
940  newnode.branch_addresses.insert(
941  newnode.branch_addresses.begin(),
942  branchaddriter,
943  branchaddrlim);
944 
945  ++branchaddriter;
946  tree.branch_addresses.erase(branchaddriter, branchaddrlim);
947 
948  tree.branch[child_offset]=std::move(newnode);
949 
950  assert(tree.branch.size()==tree.branch_addresses.size());
951 
952  return
955  this_block_children[child_offset]).code());
956 }
957 
960  const exprt &e,
961  std::map<irep_idt, java_bytecode_convert_methodt::variablet> &result)
962 {
963  if(e.id()==ID_symbol)
964  {
965  const auto &symexpr=to_symbol_expr(e);
966  auto findit = result.emplace(
967  std::piecewise_construct,
968  std::forward_as_tuple(symexpr.get_identifier()),
969  std::forward_as_tuple(symexpr, pc, 1));
970  if(!findit.second)
971  {
972  auto &var = findit.first->second;
973 
974  if(pc<var.start_pc)
975  {
976  var.length+=(var.start_pc-pc);
977  var.start_pc=pc;
978  }
979  else
980  {
981  var.length=std::max(var.length, (pc-var.start_pc)+1);
982  }
983  }
984  }
985  else
986  {
987  forall_operands(it, e)
988  gather_symbol_live_ranges(pc, *it, result);
989  }
990 }
991 
998  const irep_idt &classname)
999 {
1000  auto findit = symbol_table.symbols.find(clinit_wrapper_name(classname));
1001  if(findit == symbol_table.symbols.end())
1002  return code_skipt();
1003  else
1004  {
1005  const code_function_callt ret(findit->second.symbol_expr());
1007  needed_lazy_methods->add_needed_method(findit->second.name);
1008  return ret;
1009  }
1010 }
1011 
1012 static std::size_t get_bytecode_type_width(const typet &ty)
1013 {
1014  if(ty.id()==ID_pointer)
1015  return 32;
1016  return to_bitvector_type(ty).get_width();
1017 }
1018 
1020  const methodt &method,
1021  const java_method_typet &method_type)
1022 {
1023  code_blockt code;
1024 
1025  // Consider parameter annotations
1026  const java_method_typet::parameterst &parameters(method_type.parameters());
1027  std::size_t param_index = method_type.has_this() ? 1 : 0;
1029  parameters.size() >= method.parameter_annotations.size() + param_index,
1030  "parameters and parameter annotations mismatch");
1031  for(const auto &param_annotations : method.parameter_annotations)
1032  {
1033  // NotNull annotations are not standardized. We support these:
1034  if(
1036  param_annotations, "java::javax.validation.constraints.NotNull") ||
1038  param_annotations, "java::org.jetbrains.annotations.NotNull") ||
1040  param_annotations, "org.eclipse.jdt.annotation.NonNull") ||
1042  param_annotations, "java::edu.umd.cs.findbugs.annotations.NonNull"))
1043  {
1044  const irep_idt &param_identifier =
1045  parameters[param_index].get_identifier();
1046  const symbolt &param_symbol = symbol_table.lookup_ref(param_identifier);
1047  const auto param_type =
1048  type_try_dynamic_cast<pointer_typet>(param_symbol.type);
1049  if(param_type)
1050  {
1051  code_assertt code_assert(notequal_exprt(
1052  param_symbol.symbol_expr(), null_pointer_exprt(*param_type)));
1053  source_locationt check_loc = method.source_location;
1054  check_loc.set_comment("Not null annotation check");
1055  check_loc.set_property_class("not-null-annotation-check");
1056  code_assert.add_source_location() = check_loc;
1057 
1058  code.add(std::move(code_assert));
1059  }
1060  }
1061  ++param_index;
1062  }
1063 
1064  return code;
1065 }
1066 
1069 {
1070  const instructionst &instructions=method.instructions;
1071 
1072  // Run a worklist algorithm, assuming that the bytecode has not
1073  // been tampered with. See "Leroy, X. (2003). Java bytecode
1074  // verification: algorithms and formalizations. Journal of Automated
1075  // Reasoning, 30(3-4), 235-269." for a more complete treatment.
1076 
1077  // first pass: get targets and map addresses to instructions
1078 
1079  address_mapt address_map;
1080  std::set<method_offsett> targets;
1081 
1082  std::vector<method_offsett> jsr_ret_targets;
1083  std::vector<instructionst::const_iterator> ret_instructions;
1084 
1085  for(auto i_it = instructions.begin(); i_it != instructions.end(); i_it++)
1086  {
1088  std::pair<address_mapt::iterator, bool> a_entry=
1089  address_map.insert(std::make_pair(i_it->address, ins));
1090  assert(a_entry.second);
1091  // addresses are strictly increasing, hence we must have inserted
1092  // a new maximal key
1093  assert(a_entry.first==--address_map.end());
1094 
1095  const auto bytecode = i_it->bytecode;
1096  const std::string statement = bytecode_info[i_it->bytecode].mnemonic;
1097 
1098  // clang-format off
1099  if(bytecode != BC_goto &&
1100  bytecode != BC_return &&
1101  bytecode != patternt("?return") &&
1102  bytecode != BC_athrow &&
1103  bytecode != BC_jsr &&
1104  bytecode != BC_jsr_w &&
1105  bytecode != BC_ret)
1106  {
1107  // clang-format on
1108  instructionst::const_iterator next=i_it;
1109  if(++next!=instructions.end())
1110  a_entry.first->second.successors.push_back(next->address);
1111  }
1112 
1113  // clang-format off
1114  if(bytecode == BC_athrow ||
1115  bytecode == BC_putfield ||
1116  bytecode == BC_getfield ||
1117  bytecode == BC_checkcast ||
1118  bytecode == BC_newarray ||
1119  bytecode == BC_anewarray ||
1120  bytecode == BC_idiv ||
1121  bytecode == BC_ldiv ||
1122  bytecode == BC_irem ||
1123  bytecode == BC_lrem ||
1124  bytecode == patternt("?astore") ||
1125  bytecode == patternt("?aload") ||
1126  bytecode == BC_invokestatic ||
1127  bytecode == BC_invokevirtual ||
1128  bytecode == BC_invokespecial ||
1129  bytecode == BC_invokeinterface ||
1130  (threading_support &&
1131  (bytecode == BC_monitorenter || bytecode == BC_monitorexit)))
1132  {
1133  // clang-format on
1134  const std::vector<method_offsett> handler =
1135  try_catch_handler(i_it->address, method.exception_table);
1136  std::list<method_offsett> &successors = a_entry.first->second.successors;
1137  successors.insert(successors.end(), handler.begin(), handler.end());
1138  targets.insert(handler.begin(), handler.end());
1139  }
1140 
1141  // clang-format off
1142  if(bytecode == BC_goto ||
1143  bytecode == patternt("if_?cmp??") ||
1144  bytecode == patternt("if??") ||
1145  bytecode == BC_ifnonnull ||
1146  bytecode == BC_ifnull ||
1147  bytecode == BC_jsr ||
1148  bytecode == BC_jsr_w)
1149  {
1150  // clang-format on
1151  PRECONDITION(!i_it->args.empty());
1152 
1153  auto target = numeric_cast_v<unsigned>(to_constant_expr(i_it->args[0]));
1154  targets.insert(target);
1155 
1156  a_entry.first->second.successors.push_back(target);
1157 
1158  if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1159  {
1160  auto next = std::next(i_it);
1162  next != instructions.end(), "jsr should have valid return address");
1163  targets.insert(next->address);
1164  jsr_ret_targets.push_back(next->address);
1165  }
1166  }
1167  else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1168  {
1169  bool is_label=true;
1170  for(const auto &arg : i_it->args)
1171  {
1172  if(is_label)
1173  {
1174  auto target = numeric_cast_v<unsigned>(to_constant_expr(arg));
1175  targets.insert(target);
1176  a_entry.first->second.successors.push_back(target);
1177  }
1178  is_label=!is_label;
1179  }
1180  }
1181  else if(bytecode == BC_ret)
1182  {
1183  // Finish these later, once we've seen all jsr instructions.
1184  ret_instructions.push_back(i_it);
1185  }
1186  }
1187  draw_edges_from_ret_to_jsr(address_map, jsr_ret_targets, ret_instructions);
1188 
1189  for(const auto &address : address_map)
1190  {
1191  for(auto s : address.second.successors)
1192  {
1193  const auto a_it = address_map.find(s);
1194  CHECK_RETURN(a_it != address_map.end());
1195  a_it->second.predecessors.insert(address.first);
1196  }
1197  }
1198 
1199  // Clean the list of temporary variables created by a call to `tmp_variable`.
1200  // These are local variables in the goto function used to represent temporary
1201  // values of the JVM operand stack, newly allocated objects before the
1202  // constructor is called, ...
1203  tmp_vars.clear();
1204 
1205  // Now that the control flow graph is built, set up our local variables
1206  // (these require the graph to determine live ranges)
1207  setup_local_variables(method, address_map);
1208 
1209  std::set<method_offsett> working_set;
1210 
1211  if(!instructions.empty())
1212  working_set.insert(instructions.front().address);
1213 
1214  while(!working_set.empty())
1215  {
1216  auto cur = working_set.begin();
1217  auto address_it = address_map.find(*cur);
1218  CHECK_RETURN(address_it != address_map.end());
1219  auto &instruction = address_it->second;
1220  const method_offsett cur_pc = *cur;
1221  working_set.erase(cur);
1222 
1223  if(instruction.done)
1224  continue;
1225  working_set.insert(
1226  instruction.successors.begin(), instruction.successors.end());
1227 
1228  instructionst::const_iterator i_it = instruction.source;
1229  stack.swap(instruction.stack);
1230  instruction.stack.clear();
1231  codet &c = instruction.code;
1232 
1233  assert(
1234  stack.empty() || instruction.predecessors.size() <= 1 ||
1235  has_prefix(stack.front().get_string(ID_C_base_name), "$stack"));
1236 
1237  exprt arg0=i_it->args.size()>=1?i_it->args[0]:nil_exprt();
1238  exprt arg1=i_it->args.size()>=2?i_it->args[1]:nil_exprt();
1239 
1240  const auto bytecode = i_it->bytecode;
1241  const bytecode_infot &stmt_bytecode_info = bytecode_info[i_it->bytecode];
1242  const std::string statement = stmt_bytecode_info.mnemonic;
1243 
1244  // deal with _idx suffixes
1245  if(statement.size()>=2 &&
1246  statement[statement.size()-2]=='_' &&
1247  isdigit(statement[statement.size()-1]))
1248  {
1249  arg0=
1250  from_integer(
1252  std::string(id2string(statement), statement.size()-1, 1)),
1253  java_int_type());
1254  }
1255 
1256  typet catch_type;
1257 
1258  // Find catch blocks that begin here. For now we assume if more than
1259  // one catch targets the same bytecode then we must be indifferent to
1260  // its type and just call it a Throwable.
1261  auto it=method.exception_table.begin();
1262  for(; it!=method.exception_table.end(); ++it)
1263  {
1264  if(cur_pc==it->handler_pc)
1265  {
1266  if(
1267  catch_type != typet() ||
1268  it->catch_type == struct_tag_typet(irep_idt()))
1269  {
1270  catch_type = struct_tag_typet("java::java.lang.Throwable");
1271  break;
1272  }
1273  else
1274  catch_type=it->catch_type;
1275  }
1276  }
1277 
1278  optionalt<codet> catch_instruction;
1279 
1280  if(catch_type!=typet())
1281  {
1282  // at the beginning of a handler, clear the stack and
1283  // push the corresponding exceptional return variable
1284  // We also create a catch exception instruction that
1285  // precedes the catch block, and which remove_exceptionst
1286  // will transform into something like:
1287  // catch_var = GLOBAL_THROWN_EXCEPTION;
1288  // GLOBAL_THROWN_EXCEPTION = null;
1289  stack.clear();
1290  symbol_exprt catch_var=
1291  tmp_variable(
1292  "caught_exception",
1293  java_reference_type(catch_type));
1294  stack.push_back(catch_var);
1295  catch_instruction = code_landingpadt(catch_var);
1296  }
1297 
1298  exprt::operandst op = pop(stmt_bytecode_info.pop);
1299  exprt::operandst results;
1300  results.resize(stmt_bytecode_info.push, nil_exprt());
1301 
1302  if(bytecode == BC_aconst_null)
1303  {
1304  assert(results.size()==1);
1306  }
1307  else if(bytecode == BC_athrow)
1308  {
1309  PRECONDITION(op.size() == 1 && results.size() == 1);
1310  convert_athrow(i_it->source_location, op, c, results);
1311  }
1312  else if(bytecode == BC_checkcast)
1313  {
1314  // checkcast throws an exception in case a cast of object
1315  // on stack to given type fails.
1316  // The stack isn't modified.
1317  PRECONDITION(op.size() == 1 && results.size() == 1);
1318  convert_checkcast(arg0, op, c, results);
1319  }
1320  else if(bytecode == BC_invokedynamic)
1321  {
1322  if(
1323  const auto res =
1324  convert_invoke_dynamic(i_it->source_location, i_it->address, arg0, c))
1325  {
1326  results.resize(1);
1327  results[0] = *res;
1328  }
1329  }
1330  else if(
1331  bytecode == BC_invokestatic && id2string(arg0.get(ID_identifier)) ==
1332  "java::org.cprover.CProver.assume:(Z)V")
1333  {
1334  const java_method_typet &method_type = to_java_method_type(arg0.type());
1335  INVARIANT(
1336  method_type.parameters().size() == 1,
1337  "function expected to have exactly one parameter");
1338  c = replace_call_to_cprover_assume(i_it->source_location, c);
1339  }
1340  // replace calls to CProver.atomicBegin
1341  else if(
1342  bytecode == BC_invokestatic &&
1343  arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicBegin:()V")
1344  {
1345  c = codet(ID_atomic_begin);
1346  }
1347  // replace calls to CProver.atomicEnd
1348  else if(
1349  bytecode == BC_invokestatic &&
1350  arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicEnd:()V")
1351  {
1352  c = codet(ID_atomic_end);
1353  }
1354  else if(
1355  bytecode == BC_invokeinterface || bytecode == BC_invokespecial ||
1356  bytecode == BC_invokevirtual || bytecode == BC_invokestatic)
1357  {
1358  class_method_descriptor_exprt *class_method_descriptor =
1359  expr_try_dynamic_cast<class_method_descriptor_exprt>(arg0);
1360 
1361  INVARIANT(
1362  class_method_descriptor,
1363  "invokeinterface, invokespecial, invokevirtual and invokestatic should "
1364  "be called with a class method descriptor expression as arg0");
1365 
1367  i_it->source_location, statement, *class_method_descriptor, c, results);
1368  }
1369  else if(bytecode == BC_return)
1370  {
1371  PRECONDITION(op.empty() && results.empty());
1372  c=code_returnt();
1373  }
1374  else if(bytecode == patternt("?return"))
1375  {
1376  // Return types are promoted in java, so this might need
1377  // conversion.
1378  PRECONDITION(op.size() == 1 && results.empty());
1379  const exprt r =
1381  c=code_returnt(r);
1382  }
1383  else if(bytecode == patternt("?astore"))
1384  {
1385  PRECONDITION(results.empty());
1386  c = convert_astore(statement, op, i_it->source_location);
1387  }
1388  else if(bytecode == patternt("?store") || bytecode == patternt("?store_?"))
1389  {
1390  // store value into some local variable
1391  PRECONDITION(op.size() == 1 && results.empty());
1392  c = convert_store(
1393  statement, arg0, op, i_it->address, i_it->source_location);
1394  }
1395  else if(bytecode == patternt("?aload"))
1396  {
1397  PRECONDITION(results.size() == 1);
1398  results[0] = convert_aload(statement, op);
1399  }
1400  else if(bytecode == patternt("?load") || bytecode == patternt("?load_?"))
1401  {
1402  // load a value from a local variable
1403  results[0] = convert_load(arg0, statement[0], i_it->address);
1404  }
1405  else if(bytecode == BC_ldc || bytecode == BC_ldc_w || bytecode == BC_ldc2_w)
1406  {
1407  PRECONDITION(op.empty() && results.size() == 1);
1408 
1409  INVARIANT(
1410  !can_cast_expr<java_string_literal_exprt>(arg0) && arg0.id() != ID_type,
1411  "String and Class literals should have been lowered in "
1412  "generate_constant_global_variables");
1413 
1414  results[0] = arg0;
1415  }
1416  else if(bytecode == BC_goto || bytecode == BC_goto_w)
1417  {
1418  PRECONDITION(op.empty() && results.empty());
1419  const mp_integer number =
1420  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1421  code_gotot code_goto(label(integer2string(number)));
1422  c=code_goto;
1423  }
1424  else if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1425  {
1426  // As 'goto', except we must also push the subroutine return address:
1427  PRECONDITION(op.empty() && results.size() == 1);
1428  const mp_integer number =
1429  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1430  code_gotot code_goto(label(integer2string(number)));
1431  c=code_goto;
1432  results[0]=
1433  from_integer(
1434  std::next(i_it)->address,
1435  unsignedbv_typet(64));
1436  results[0].type() = pointer_type(java_void_type());
1437  }
1438  else if(bytecode == BC_ret)
1439  {
1440  // Since we have a bounded target set, make life easier on our analyses
1441  // and write something like:
1442  // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ...
1443  PRECONDITION(op.empty() && results.empty());
1444  assert(!jsr_ret_targets.empty());
1445  c = convert_ret(
1446  jsr_ret_targets, arg0, i_it->source_location, i_it->address);
1447  }
1448  else if(bytecode == BC_iconst_m1)
1449  {
1450  assert(results.size()==1);
1451  results[0]=from_integer(-1, java_int_type());
1452  }
1453  else if(bytecode == patternt("?const_?"))
1454  {
1455  assert(results.size()==1);
1456  results = convert_const(statement, to_constant_expr(arg0), results);
1457  }
1458  else if(bytecode == patternt("?ipush"))
1459  {
1460  PRECONDITION(results.size()==1);
1462  arg0.id()==ID_constant,
1463  "ipush argument expected to be constant");
1464  results[0] = typecast_exprt::conditional_cast(arg0, java_int_type());
1465  }
1466  else if(bytecode == patternt("if_?cmp??"))
1467  {
1468  PRECONDITION(op.size() == 2 && results.empty());
1469  const mp_integer number =
1470  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1471  c = convert_if_cmp(
1472  address_map, bytecode, op, number, i_it->source_location);
1473  }
1474  else if(bytecode == patternt("if??"))
1475  {
1476  // clang-format off
1477  const irep_idt id=
1478  bytecode == BC_ifeq ? ID_equal :
1479  bytecode == BC_ifne ? ID_notequal :
1480  bytecode == BC_iflt ? ID_lt :
1481  bytecode == BC_ifge ? ID_ge :
1482  bytecode == BC_ifgt ? ID_gt :
1483  bytecode == BC_ifle ? ID_le :
1484  irep_idt();
1485  // clang-format on
1486 
1487  INVARIANT(!id.empty(), "unexpected bytecode-if");
1488  PRECONDITION(op.size() == 1 && results.empty());
1489  const mp_integer number =
1490  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1491  c = convert_if(address_map, op, id, number, i_it->source_location);
1492  }
1493  else if(bytecode == patternt("ifnonnull"))
1494  {
1495  PRECONDITION(op.size() == 1 && results.empty());
1496  const mp_integer number =
1497  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1498  c = convert_ifnonull(address_map, op, number, i_it->source_location);
1499  }
1500  else if(bytecode == patternt("ifnull"))
1501  {
1502  PRECONDITION(op.size() == 1 && results.empty());
1503  const mp_integer number =
1504  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1505  c = convert_ifnull(address_map, op, number, i_it->source_location);
1506  }
1507  else if(bytecode == BC_iinc)
1508  {
1509  c = convert_iinc(arg0, arg1, i_it->source_location, i_it->address);
1510  }
1511  else if(bytecode == patternt("?xor"))
1512  {
1513  PRECONDITION(op.size() == 2 && results.size() == 1);
1514  results[0]=bitxor_exprt(op[0], op[1]);
1515  }
1516  else if(bytecode == patternt("?or"))
1517  {
1518  PRECONDITION(op.size() == 2 && results.size() == 1);
1519  results[0]=bitor_exprt(op[0], op[1]);
1520  }
1521  else if(bytecode == patternt("?and"))
1522  {
1523  PRECONDITION(op.size() == 2 && results.size() == 1);
1524  results[0]=bitand_exprt(op[0], op[1]);
1525  }
1526  else if(bytecode == patternt("?shl"))
1527  {
1528  PRECONDITION(op.size() == 2 && results.size() == 1);
1529  results = convert_shl(statement, op, results);
1530  }
1531  else if(bytecode == patternt("?shr"))
1532  {
1533  PRECONDITION(op.size() == 2 && results.size() == 1);
1534  results[0]=ashr_exprt(op[0], op[1]);
1535  }
1536  else if(bytecode == patternt("?ushr"))
1537  {
1538  PRECONDITION(op.size() == 2 && results.size() == 1);
1539  results = convert_ushr(statement, op, results);
1540  }
1541  else if(bytecode == patternt("?add"))
1542  {
1543  PRECONDITION(op.size() == 2 && results.size() == 1);
1544  results[0]=plus_exprt(op[0], op[1]);
1545  }
1546  else if(bytecode == patternt("?sub"))
1547  {
1548  PRECONDITION(op.size() == 2 && results.size() == 1);
1549  results[0]=minus_exprt(op[0], op[1]);
1550  }
1551  else if(bytecode == patternt("?div"))
1552  {
1553  PRECONDITION(op.size() == 2 && results.size() == 1);
1554  results[0]=div_exprt(op[0], op[1]);
1555  }
1556  else if(bytecode == patternt("?mul"))
1557  {
1558  PRECONDITION(op.size() == 2 && results.size() == 1);
1559  results[0]=mult_exprt(op[0], op[1]);
1560  }
1561  else if(bytecode == patternt("?neg"))
1562  {
1563  PRECONDITION(op.size() == 1 && results.size() == 1);
1564  results[0]=unary_minus_exprt(op[0], op[0].type());
1565  }
1566  else if(bytecode == patternt("?rem"))
1567  {
1568  PRECONDITION(op.size() == 2 && results.size() == 1);
1569  if(bytecode == BC_frem || bytecode == BC_drem)
1570  results[0]=rem_exprt(op[0], op[1]);
1571  else
1572  results[0]=mod_exprt(op[0], op[1]);
1573  }
1574  else if(bytecode == patternt("?cmp"))
1575  {
1576  PRECONDITION(op.size() == 2 && results.size() == 1);
1577  results = convert_cmp(op, results);
1578  }
1579  else if(bytecode == patternt("?cmp?"))
1580  {
1581  PRECONDITION(op.size() == 2 && results.size() == 1);
1582  results = convert_cmp2(statement, op, results);
1583  }
1584  else if(bytecode == patternt("?cmpl"))
1585  {
1586  PRECONDITION(op.size() == 2 && results.size() == 1);
1587  results[0]=binary_relation_exprt(op[0], ID_lt, op[1]);
1588  }
1589  else if(bytecode == BC_dup)
1590  {
1591  PRECONDITION(op.size() == 1 && results.size() == 2);
1592  results[0]=results[1]=op[0];
1593  }
1594  else if(bytecode == BC_dup_x1)
1595  {
1596  PRECONDITION(op.size() == 2 && results.size() == 3);
1597  results[0]=op[1];
1598  results[1]=op[0];
1599  results[2]=op[1];
1600  }
1601  else if(bytecode == BC_dup_x2)
1602  {
1603  PRECONDITION(op.size() == 3 && results.size() == 4);
1604  results[0]=op[2];
1605  results[1]=op[0];
1606  results[2]=op[1];
1607  results[3]=op[2];
1608  }
1609  // dup2* behaviour depends on the size of the operands on the
1610  // stack
1611  else if(bytecode == BC_dup2)
1612  {
1613  PRECONDITION(!stack.empty() && results.empty());
1614  convert_dup2(op, results);
1615  }
1616  else if(bytecode == BC_dup2_x1)
1617  {
1618  PRECONDITION(!stack.empty() && results.empty());
1619  convert_dup2_x1(op, results);
1620  }
1621  else if(bytecode == BC_dup2_x2)
1622  {
1623  PRECONDITION(!stack.empty() && results.empty());
1624  convert_dup2_x2(op, results);
1625  }
1626  else if(bytecode == BC_getfield)
1627  {
1628  PRECONDITION(op.size() == 1 && results.size() == 1);
1629  results[0] = java_bytecode_promotion(
1630  to_member(op[0], expr_dynamic_cast<fieldref_exprt>(arg0), ns));
1631  }
1632  else if(bytecode == BC_getstatic)
1633  {
1634  PRECONDITION(op.empty() && results.size() == 1);
1635  const auto &field_name=arg0.get_string(ID_component_name);
1636  const bool is_assertions_disabled_field=
1637  field_name.find("$assertionsDisabled")!=std::string::npos;
1638 
1639  const irep_idt field_id(
1640  get_static_field(arg0.get_string(ID_class), field_name));
1641 
1642  // Symbol should have been populated by java_bytecode_convert_class:
1643  const symbol_exprt symbol_expr(
1644  symbol_table.lookup_ref(field_id).symbol_expr());
1645 
1647  i_it->source_location,
1648  arg0,
1649  symbol_expr,
1650  is_assertions_disabled_field,
1651  c,
1652  results);
1653  }
1654  else if(bytecode == BC_putfield)
1655  {
1656  PRECONDITION(op.size() == 2 && results.empty());
1657  c = convert_putfield(expr_dynamic_cast<fieldref_exprt>(arg0), op);
1658  }
1659  else if(bytecode == BC_putstatic)
1660  {
1661  PRECONDITION(op.size() == 1 && results.empty());
1662  const auto &field_name=arg0.get_string(ID_component_name);
1663 
1664  const irep_idt field_id(
1665  get_static_field(arg0.get_string(ID_class), field_name));
1666 
1667  // Symbol should have been populated by java_bytecode_convert_class:
1668  const symbol_exprt symbol_expr(
1669  symbol_table.lookup_ref(field_id).symbol_expr());
1670 
1671  c = convert_putstatic(i_it->source_location, arg0, op, symbol_expr);
1672  }
1673  else if(
1674  bytecode == BC_f2i || bytecode == BC_f2l || bytecode == BC_d2i ||
1675  bytecode == BC_d2l)
1676  {
1677  PRECONDITION(op.size() == 1 && results.size() == 1);
1678  typet src_type = java_type_from_char(statement[0]);
1679  typet dest_type = java_type_from_char(statement[2]);
1680 
1681  // See JLS 5.1.3. Narrowing Primitive Conversion
1682  // +-NaN is converted to 0
1683  // +-Inf resp. values beyond the int/long range
1684  // are mapped to max/min of int/long.
1685  // Other values are rounded towards zero
1686 
1687  // for int: 2147483647, for long: 9223372036854775807L
1688  exprt largest_as_dest =
1690 
1691  // 2147483647 is not exactly representable in float;
1692  // it will be rounded up to 2147483648, which is fine.
1693  // 9223372036854775807L is not exactly representable
1694  // neither in float nor in double; it is rounded up to
1695  // 9223372036854775808.0, which is fine.
1696  exprt largest_as_src =
1697  from_integer(to_integer_bitvector_type(dest_type).largest(), src_type);
1698 
1699  // for int: -2147483648, for long: -9223372036854775808L
1700  exprt smallest_as_dest =
1702 
1703  // -2147483648 and -9223372036854775808L are exactly
1704  // representable in float and double.
1705  exprt smallest_as_src =
1706  from_integer(to_integer_bitvector_type(dest_type).smallest(), src_type);
1707 
1708  results[0] = if_exprt(
1709  binary_relation_exprt(op[0], ID_le, smallest_as_src),
1710  smallest_as_dest,
1711  if_exprt(
1712  binary_relation_exprt(op[0], ID_ge, largest_as_src),
1713  largest_as_dest,
1714  typecast_exprt::conditional_cast(op[0], dest_type)));
1715  }
1716  else if(bytecode == patternt("?2?")) // i2c etc.
1717  {
1718  PRECONDITION(op.size() == 1 && results.size() == 1);
1719  typet type=java_type_from_char(statement[2]);
1720  results[0] = typecast_exprt::conditional_cast(op[0], type);
1721 
1722  // These types get converted/truncated then immediately turned back into
1723  // ints again, so we just double-cast here.
1724  if(
1725  type == java_char_type() || type == java_byte_type() ||
1726  type == java_short_type())
1727  {
1728  results[0] = typecast_exprt(results[0], java_int_type());
1729  }
1730  }
1731  else if(bytecode == BC_new)
1732  {
1733  // use temporary since the stack symbol might get duplicated
1734  PRECONDITION(op.empty() && results.size() == 1);
1735  convert_new(i_it->source_location, arg0, c, results);
1736  }
1737  else if(bytecode == BC_newarray || bytecode == BC_anewarray)
1738  {
1739  // the op is the array size
1740  PRECONDITION(op.size() == 1 && results.size() == 1);
1741  c = convert_newarray(i_it->source_location, statement, arg0, op, results);
1742  }
1743  else if(bytecode == BC_multianewarray)
1744  {
1745  // The first argument is the type, the second argument is the number of
1746  // dimensions. The size of each dimension is on the stack.
1747  const std::size_t dimension =
1748  numeric_cast_v<std::size_t>(to_constant_expr(arg1));
1749 
1750  op=pop(dimension);
1751  assert(results.size()==1);
1752  c = convert_multianewarray(i_it->source_location, arg0, op, results);
1753  }
1754  else if(bytecode == BC_arraylength)
1755  {
1756  PRECONDITION(op.size() == 1 && results.size() == 1);
1757 
1758  // any array type is fine here, so we go for a reference array
1759  dereference_exprt array{typecast_exprt{op[0], java_array_type('a')}};
1760  PRECONDITION(array.type().id() == ID_struct_tag);
1761  array.set(ID_java_member_access, true);
1762 
1763  results[0] = member_exprt{std::move(array), "length", java_int_type()};
1764  }
1765  else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1766  {
1767  PRECONDITION(op.size() == 1 && results.empty());
1768  c = convert_switch(op, i_it->args, i_it->source_location);
1769  }
1770  else if(bytecode == BC_pop || bytecode == BC_pop2)
1771  {
1772  c = convert_pop(statement, op);
1773  }
1774  else if(bytecode == BC_instanceof)
1775  {
1776  PRECONDITION(op.size() == 1 && results.size() == 1);
1777 
1778  results[0] =
1780  }
1781  else if(bytecode == BC_monitorenter || bytecode == BC_monitorexit)
1782  {
1783  c = convert_monitorenterexit(statement, op, i_it->source_location);
1784  }
1785  else if(bytecode == BC_swap)
1786  {
1787  PRECONDITION(op.size() == 2 && results.size() == 2);
1788  results[1]=op[0];
1789  results[0]=op[1];
1790  }
1791  else if(bytecode == BC_nop)
1792  {
1793  c=code_skipt();
1794  }
1795  else
1796  {
1797  c=codet(statement);
1798  c.operands()=op;
1799  }
1800 
1801  c = do_exception_handling(method, working_set, cur_pc, c);
1802 
1803  // Finally if this is the beginning of a catch block (already determined
1804  // before the big bytecode switch), insert the exception 'landing pad'
1805  // instruction before the actual instruction:
1806  if(catch_instruction.has_value())
1807  {
1808  if(c.get_statement() != ID_block)
1809  c = code_blockt{{c}};
1810  c.operands().insert(c.operands().begin(), *catch_instruction);
1811  }
1812 
1813  if(!i_it->source_location.get_line().empty())
1815 
1816  push(results);
1817 
1818  instruction.done = true;
1819  for(const auto address : instruction.successors)
1820  {
1821  address_mapt::iterator a_it2=address_map.find(address);
1822  CHECK_RETURN(a_it2 != address_map.end());
1823 
1824  // clear the stack if this is an exception handler
1825  for(const auto &exception_row : method.exception_table)
1826  {
1827  if(address==exception_row.handler_pc)
1828  {
1829  stack.clear();
1830  break;
1831  }
1832  }
1833 
1834  if(!stack.empty() && a_it2->second.predecessors.size()>1)
1835  {
1836  // copy into temporaries
1837  code_blockt more_code;
1838 
1839  // introduce temporaries when successor is seen for the first
1840  // time
1841  if(a_it2->second.stack.empty())
1842  {
1843  for(stackt::iterator s_it=stack.begin();
1844  s_it!=stack.end();
1845  ++s_it)
1846  {
1847  symbol_exprt lhs=tmp_variable("$stack", s_it->type());
1848  code_assignt a(lhs, *s_it);
1849  more_code.add(a);
1850 
1851  s_it->swap(lhs);
1852  }
1853  }
1854  else
1855  {
1856  INVARIANT(
1857  a_it2->second.stack.size() == stack.size(),
1858  "Stack sizes should be the same.");
1859  stackt::const_iterator os_it=a_it2->second.stack.begin();
1860  for(auto &expr : stack)
1861  {
1862  assert(has_prefix(os_it->get_string(ID_C_base_name), "$stack"));
1863  symbol_exprt lhs=to_symbol_expr(*os_it);
1864  code_assignt a(lhs, expr);
1865  more_code.add(a);
1866 
1867  expr.swap(lhs);
1868  ++os_it;
1869  }
1870  }
1871 
1872  if(results.empty())
1873  {
1874  more_code.add(c);
1875  c.swap(more_code);
1876  }
1877  else
1878  {
1879  if(c.get_statement() != ID_block)
1880  c = code_blockt{{c}};
1881 
1882  auto &last_statement=to_code_block(c).find_last_statement();
1883  if(last_statement.get_statement()==ID_goto)
1884  {
1885  // Insert stack twiddling before branch:
1886  if(last_statement.get_statement() != ID_block)
1887  last_statement = code_blockt{{last_statement}};
1888 
1889  last_statement.operands().insert(
1890  last_statement.operands().begin(),
1891  more_code.statements().begin(),
1892  more_code.statements().end());
1893  }
1894  else
1895  to_code_block(c).append(more_code);
1896  }
1897  }
1898  a_it2->second.stack=stack;
1899  }
1900  }
1901 
1902  code_blockt code;
1903 
1904  // Add anonymous locals to the symtab:
1905  for(const auto &var : used_local_names)
1906  {
1907  symbolt new_symbol;
1908  new_symbol.name=var.get_identifier();
1909  new_symbol.type=var.type();
1910  new_symbol.base_name=var.get(ID_C_base_name);
1911  new_symbol.pretty_name=strip_java_namespace_prefix(var.get_identifier());
1912  new_symbol.mode=ID_java;
1913  new_symbol.is_type=false;
1914  new_symbol.is_file_local=true;
1915  new_symbol.is_thread_local=true;
1916  new_symbol.is_lvalue=true;
1917  symbol_table.add(new_symbol);
1918  }
1919 
1920  // Try to recover block structure as indicated in the local variable table:
1921 
1922  // The block tree node mirrors the block structure of root_block,
1923  // indexing the Java PCs where each subblock starts and ends.
1924  block_tree_nodet root;
1925  code_blockt root_block;
1926 
1927  // First create a simple flat list of basic blocks. We'll add lexical nesting
1928  // constructs as variable live-ranges require next.
1929  bool start_new_block=true;
1930  bool has_seen_previous_address=false;
1931  method_offsett previous_address = 0;
1932  for(const auto &address_pair : address_map)
1933  {
1934  const method_offsett address = address_pair.first;
1935  assert(address_pair.first==address_pair.second.source->address);
1936  const codet &c=address_pair.second.code;
1937 
1938  // Start a new lexical block if this is a branch target:
1939  if(!start_new_block)
1940  start_new_block=targets.find(address)!=targets.end();
1941  // Start a new lexical block if this is a control flow join
1942  // (e.g. due to exceptional control flow)
1943  if(!start_new_block)
1944  start_new_block=address_pair.second.predecessors.size()>1;
1945  // Start a new lexical block if we've just entered a block in which
1946  // exceptions are handled. This is usually the start of a try block, but a
1947  // single try can be represented as multiple non-contiguous blocks in the
1948  // exception table.
1949  if(!start_new_block && has_seen_previous_address)
1950  {
1951  for(const auto &exception_row : method.exception_table)
1952  if(exception_row.start_pc==previous_address)
1953  {
1954  start_new_block=true;
1955  break;
1956  }
1957  }
1958 
1959  if(start_new_block)
1960  {
1961  root_block.add(
1963  root.branch.push_back(block_tree_nodet::get_leaf());
1964  assert((root.branch_addresses.empty() ||
1965  root.branch_addresses.back()<address) &&
1966  "Block addresses should be unique and increasing");
1967  root.branch_addresses.push_back(address);
1968  }
1969 
1970  if(c.get_statement()!=ID_skip)
1971  {
1972  auto &lastlabel = to_code_label(root_block.statements().back());
1973  auto &add_to_block=to_code_block(lastlabel.code());
1974  add_to_block.add(c);
1975  }
1976  start_new_block=address_pair.second.successors.size()>1;
1977 
1978  previous_address=address;
1979  has_seen_previous_address=true;
1980  }
1981 
1982  // Find out where temporaries are used:
1983  std::map<irep_idt, variablet> temporary_variable_live_ranges;
1984  for(const auto &aentry : address_map)
1986  aentry.first,
1987  aentry.second.code,
1988  temporary_variable_live_ranges);
1989 
1990  std::vector<const variablet*> vars_to_process;
1991  for(const auto &vlist : variables)
1992  for(const auto &v : vlist)
1993  vars_to_process.push_back(&v);
1994 
1995  for(const auto &v : tmp_vars)
1996  vars_to_process.push_back(
1997  &temporary_variable_live_ranges.at(v.get_identifier()));
1998 
1999  for(const auto &v : used_local_names)
2000  vars_to_process.push_back(
2001  &temporary_variable_live_ranges.at(v.get_identifier()));
2002 
2003  for(const auto vp : vars_to_process)
2004  {
2005  const auto &v=*vp;
2006  if(v.is_parameter)
2007  continue;
2008  // Merge lexical scopes as far as possible to allow us to
2009  // declare these variable scopes faithfully.
2010  // Don't insert yet, as for the time being the blocks' only
2011  // operands must be other blocks.
2012  // The declarations will be inserted in the next pass instead.
2014  root,
2015  root_block,
2016  v.start_pc,
2017  v.start_pc + v.length,
2018  std::numeric_limits<method_offsett>::max(),
2019  address_map);
2020  }
2021  for(const auto vp : vars_to_process)
2022  {
2023  const auto &v=*vp;
2024  if(v.is_parameter)
2025  continue;
2026  // Skip anonymous variables:
2027  if(v.symbol_expr.get_identifier().empty())
2028  continue;
2029  auto &block = get_block_for_pcrange(
2030  root,
2031  root_block,
2032  v.start_pc,
2033  v.start_pc + v.length,
2034  std::numeric_limits<method_offsett>::max());
2035  code_declt d(v.symbol_expr);
2036  block.statements().insert(block.statements().begin(), d);
2037  }
2038 
2039  for(auto &block : root_block.statements())
2040  code.add(block);
2041 
2042  return code;
2043 }
2044 
2046  const irep_idt &statement,
2047  const exprt::operandst &op)
2048 {
2049  // these are skips
2050  codet c = code_skipt();
2051 
2052  // pop2 removes two single-word items from the stack (e.g. two
2053  // integers, or an integer and an object reference) or one
2054  // two-word item (i.e. a double or a long).
2055  // http://cs.au.dk/~mis/dOvs/jvmspec/ref-pop2.html
2056  if(statement == "pop2" && get_bytecode_type_width(op[0].type()) == 32)
2057  pop(1);
2058  return c;
2059 }
2060 
2062  const exprt::operandst &op,
2064  const source_locationt &location)
2065 {
2066  // we turn into switch-case
2067  code_blockt code_block;
2068  code_block.add_source_location() = location;
2069 
2070  bool is_label = true;
2071  for(auto a_it = args.begin(); a_it != args.end();
2072  a_it++, is_label = !is_label)
2073  {
2074  if(is_label)
2075  {
2076  const mp_integer number =
2077  numeric_cast_v<mp_integer>(to_constant_expr(*a_it));
2078  // The switch case does not contain any code, it just branches via a GOTO
2079  // to the jump target of the tableswitch/lookupswitch case at
2080  // hand. Therefore we consider this code to belong to the source bytecode
2081  // instruction and not the target instruction.
2082  const method_offsett label_number =
2083  numeric_cast_v<method_offsett>(number);
2084  code_gotot code(label(std::to_string(label_number)));
2085  code.add_source_location() = location;
2086 
2087  if(a_it == args.begin())
2088  {
2089  code_switch_caset code_case(nil_exprt(), std::move(code));
2090  code_case.set_default();
2091 
2092  code_block.add(std::move(code_case), location);
2093  }
2094  else
2095  {
2096  exprt case_op =
2097  typecast_exprt::conditional_cast(*std::prev(a_it), op[0].type());
2098  case_op.add_source_location() = location;
2099 
2100  code_switch_caset code_case(std::move(case_op), std::move(code));
2101  code_block.add(std::move(code_case), location);
2102  }
2103  }
2104  }
2105 
2106  code_switcht code_switch(op[0], std::move(code_block));
2107  code_switch.add_source_location() = location;
2108  return code_switch;
2109 }
2110 
2112  const irep_idt &statement,
2113  const exprt::operandst &op,
2114  const source_locationt &source_location)
2115 {
2116  const irep_idt descriptor = (statement == "monitorenter") ?
2117  "java::java.lang.Object.monitorenter:(Ljava/lang/Object;)V" :
2118  "java::java.lang.Object.monitorexit:(Ljava/lang/Object;)V";
2119 
2120  if(!threading_support || !symbol_table.has_symbol(descriptor))
2121  return code_skipt();
2122 
2123  // becomes a function call
2124  java_method_typet type(
2126  java_void_type());
2127  code_function_callt call(symbol_exprt(descriptor, type), {op[0]});
2128  call.add_source_location() = source_location;
2129  if(needed_lazy_methods && symbol_table.has_symbol(descriptor))
2130  needed_lazy_methods->add_needed_method(descriptor);
2131  return std::move(call);
2132 }
2133 
2135  exprt::operandst &op,
2136  exprt::operandst &results)
2137 {
2138  if(get_bytecode_type_width(stack.back().type()) == 32)
2139  op = pop(2);
2140  else
2141  op = pop(1);
2142 
2143  results.insert(results.end(), op.begin(), op.end());
2144  results.insert(results.end(), op.begin(), op.end());
2145 }
2146 
2148  exprt::operandst &op,
2149  exprt::operandst &results)
2150 {
2151  if(get_bytecode_type_width(stack.back().type()) == 32)
2152  op = pop(3);
2153  else
2154  op = pop(2);
2155 
2156  results.insert(results.end(), op.begin() + 1, op.end());
2157  results.insert(results.end(), op.begin(), op.end());
2158 }
2159 
2161  exprt::operandst &op,
2162  exprt::operandst &results)
2163 {
2164  if(get_bytecode_type_width(stack.back().type()) == 32)
2165  op = pop(2);
2166  else
2167  op = pop(1);
2168 
2169  exprt::operandst op2;
2170 
2171  if(get_bytecode_type_width(stack.back().type()) == 32)
2172  op2 = pop(2);
2173  else
2174  op2 = pop(1);
2175 
2176  results.insert(results.end(), op.begin(), op.end());
2177  results.insert(results.end(), op2.begin(), op2.end());
2178  results.insert(results.end(), op.begin(), op.end());
2179 }
2180 
2182  const irep_idt &statement,
2183  const constant_exprt &arg0,
2184  exprt::operandst &results) const
2185 {
2186  const char type_char = statement[0];
2187  const bool is_double('d' == type_char);
2188  const bool is_float('f' == type_char);
2189 
2190  if(is_double || is_float)
2191  {
2192  const ieee_float_spect spec(
2195 
2196  ieee_floatt value(spec);
2197  if(arg0.type().id() != ID_floatbv)
2198  {
2199  const mp_integer number = numeric_cast_v<mp_integer>(arg0);
2200  value.from_integer(number);
2201  }
2202  else
2203  value.from_expr(arg0);
2204 
2205  results[0] = value.to_expr();
2206  }
2207  else
2208  {
2209  const mp_integer value = numeric_cast_v<mp_integer>(arg0);
2210  const typet type = java_type_from_char(statement[0]);
2211  results[0] = from_integer(value, type);
2212  }
2213  return results;
2214 }
2215 
2217  const java_method_typet::parameterst &parameters,
2219 {
2220  // do some type adjustment for the arguments,
2221  // as Java promotes arguments
2222  // Also cast pointers since intermediate locals
2223  // can be void*.
2224  INVARIANT(
2225  parameters.size() == arguments.size(),
2226  "for each parameter there must be exactly one argument");
2227  for(std::size_t i = 0; i < parameters.size(); i++)
2228  {
2229  const typet &type = parameters[i].type();
2230  if(
2231  type == java_boolean_type() || type == java_char_type() ||
2232  type == java_byte_type() || type == java_short_type() ||
2233  type.id() == ID_pointer)
2234  {
2235  arguments[i] = typecast_exprt::conditional_cast(arguments[i], type);
2236  }
2237  }
2238 }
2239 
2241  source_locationt location,
2242  const irep_idt &statement,
2243  class_method_descriptor_exprt &class_method_descriptor,
2244  codet &c,
2245  exprt::operandst &results)
2246 {
2247  const bool use_this(statement != "invokestatic");
2248  const bool is_virtual(
2249  statement == "invokevirtual" || statement == "invokeinterface");
2250 
2251  const irep_idt &invoked_method_id = class_method_descriptor.get_identifier();
2252  INVARIANT(
2253  !invoked_method_id.empty(),
2254  "invoke statement arg0 must have an identifier");
2255 
2256  auto method_symbol = symbol_table.symbols.find(invoked_method_id);
2257 
2258  // Use the most precise type available: the actual symbol has generic info,
2259  // whereas the type given by the invoke instruction doesn't and is therefore
2260  // less accurate.
2261  if(method_symbol != symbol_table.symbols.end())
2262  {
2263  // Note the number of parameters might change here due to constructors using
2264  // invokespecial will have zero arguments (the `this` is added below)
2265  // but the symbol for the <init> will have the this parameter.
2266  INVARIANT(
2267  to_java_method_type(class_method_descriptor.type()).return_type().id() ==
2268  to_code_type(method_symbol->second.type).return_type().id(),
2269  "Function return type must not change in kind");
2270  class_method_descriptor.type() = method_symbol->second.type;
2271  }
2272 
2273  // Note arg0 and arg0.type() are subject to many side-effects in this method,
2274  // then finally used to populate the call instruction.
2275  java_method_typet &method_type =
2276  to_java_method_type(class_method_descriptor.type());
2277 
2278  java_method_typet::parameterst &parameters(method_type.parameters());
2279 
2280  if(use_this)
2281  {
2282  const irep_idt class_id = class_method_descriptor.class_id();
2283 
2284  if(parameters.empty() || !parameters[0].get_this())
2285  {
2286  typet thistype = struct_tag_typet(class_id);
2287  reference_typet object_ref_type = java_reference_type(thistype);
2288  java_method_typet::parametert this_p(object_ref_type);
2289  this_p.set_this();
2290  this_p.set_base_name(ID_this);
2291  parameters.insert(parameters.begin(), this_p);
2292  }
2293 
2294  // Note invokespecial is used for super-method calls as well as
2295  // constructors.
2296  if(statement == "invokespecial")
2297  {
2298  if(is_constructor(invoked_method_id))
2299  {
2301  needed_lazy_methods->add_needed_class(class_id);
2302  method_type.set_is_constructor();
2303  }
2304  else
2305  method_type.set(ID_java_super_method_call, true);
2306  }
2307  }
2308 
2309  location.set_function(method_id);
2310 
2311  code_function_callt::argumentst arguments = pop(parameters.size());
2312 
2313  // double-check a bit
2314  INVARIANT(
2315  !use_this || arguments.front().type().id() == ID_pointer,
2316  "first argument must be a pointer");
2317 
2318  adjust_invoke_argument_types(parameters, arguments);
2319 
2320  // do some type adjustment for return values
2321  exprt lhs = nil_exprt();
2322  const typet &return_type = method_type.return_type();
2323 
2324  if(return_type.id() != ID_empty)
2325  {
2326  // return types are promoted in Java
2327  lhs = tmp_variable("return", return_type);
2328  exprt promoted = java_bytecode_promotion(lhs);
2329  results.resize(1);
2330  results[0] = promoted;
2331  }
2332 
2333  // If we don't have a definition for the called symbol, and we won't
2334  // inherit a definition from a super-class, we create a new symbol and
2335  // insert it in the symbol table. The name and type of the method are
2336  // derived from the information we have in the call.
2337  // We fix the access attribute to ID_private, because of the following
2338  // reasons:
2339  // - We don't know the original access attribute and since the .class file is
2340  // unavailable, we have no way to know.
2341  // - The translated method could be an inherited protected method, hence
2342  // accessible from the original caller, but not from the generated test.
2343  // Therefore we must assume that the method is not accessible.
2344  // We set opaque methods as final to avoid assuming they can be overridden.
2345  if(
2346  method_symbol == symbol_table.symbols.end() &&
2347  !(is_virtual && is_method_inherited(
2348  class_method_descriptor.class_id(),
2349  class_method_descriptor.mangled_method_name())))
2350  {
2352  invoked_method_id,
2353  class_method_descriptor.base_method_name(),
2354  id2string(class_method_descriptor.class_id()).substr(6) + "." +
2355  id2string(class_method_descriptor.base_method_name()) + "()",
2356  method_type,
2357  class_method_descriptor.class_id(),
2358  symbol_table,
2360  }
2361 
2362  exprt function;
2363  if(is_virtual)
2364  {
2365  // dynamic binding
2366  PRECONDITION(use_this);
2367  PRECONDITION(!arguments.empty());
2368  function = class_method_descriptor;
2369  // Populate needed methods later,
2370  // once we know what object types can exist.
2371  }
2372  else
2373  {
2374  // static binding
2375  function = symbol_exprt(invoked_method_id, method_type);
2377  {
2378  needed_lazy_methods->add_needed_method(invoked_method_id);
2379  // Calling a static method causes static initialization:
2380  needed_lazy_methods->add_needed_class(class_method_descriptor.class_id());
2381  }
2382  }
2383 
2384  code_function_callt call(
2385  std::move(lhs), std::move(function), std::move(arguments));
2386  call.add_source_location() = location;
2387  call.function().add_source_location() = location;
2388 
2389  // Replacing call if it is a function of the Character library,
2390  // returning the same call otherwise
2392 
2393  if(!use_this)
2394  {
2395  codet clinit_call = get_clinit_call(class_method_descriptor.class_id());
2396  if(clinit_call.get_statement() != ID_skip)
2397  c = code_blockt({clinit_call, c});
2398  }
2399 }
2400 
2402  source_locationt location,
2403  codet &c)
2404 {
2405  exprt operand = pop(1)[0];
2406 
2407  // we may need to adjust the type of the argument
2408  operand = typecast_exprt::conditional_cast(operand, bool_typet());
2409 
2410  c = code_assumet(operand);
2411  location.set_function(method_id);
2412  c.add_source_location() = location;
2413  return c;
2414 }
2415 
2417  const exprt &arg0,
2418  const exprt::operandst &op,
2419  codet &c,
2420  exprt::operandst &results) const
2421 {
2422  java_instanceof_exprt check(op[0], to_struct_tag_type(arg0.type()));
2423  code_assertt assert_class(check);
2424  assert_class.add_source_location().set_comment("Dynamic cast check");
2425  assert_class.add_source_location().set_property_class("bad-dynamic-cast");
2426  // we add this assert such that we can recognise it
2427  // during the instrumentation phase
2428  c = std::move(assert_class);
2429  results[0] = op[0];
2430 }
2431 
2433  const source_locationt &location,
2434  const exprt::operandst &op,
2435  codet &c,
2436  exprt::operandst &results) const
2437 {
2438  if(
2441  "java::java.lang.AssertionError") &&
2443  {
2444  // we translate athrow into
2445  // ASSERT false;
2446  // ASSUME false:
2447  code_assertt assert_code(false_exprt{});
2448  source_locationt assert_location = location; // copy
2449  assert_location.set_comment("assertion at " + location.as_string());
2450  assert_location.set("user-provided", true);
2451  assert_location.set_property_class(ID_assertion);
2452  assert_code.add_source_location() = assert_location;
2453 
2454  code_assumet assume_code(false_exprt{});
2455  source_locationt assume_location = location; // copy
2456  assume_location.set("user-provided", true);
2457  assume_code.add_source_location() = assume_location;
2458 
2459  c = code_blockt({assert_code, assume_code});
2460  }
2461  else
2462  {
2463  side_effect_expr_throwt throw_expr(irept(), typet(), location);
2464  throw_expr.copy_to_operands(op[0]);
2465  c = code_expressiont(throw_expr);
2466  }
2467  results[0] = op[0];
2468 }
2469 
2472  const std::set<method_offsett> &working_set,
2473  method_offsett cur_pc,
2474  codet &code)
2475 {
2476  // For each exception handler range that starts here add a CATCH-PUSH marker
2477  // Each CATCH-PUSH records a list of all the exception id and handler label
2478  // pairs handled for its exact block
2479 
2480  // Gather all try-catch blocks that have cur_pc as the starting pc
2481  typedef std::vector<std::reference_wrapper<
2483  std::map<std::size_t, exceptionst> exceptions_by_end;
2485  : method.exception_table)
2486  {
2487  if(exception.start_pc == cur_pc)
2488  exceptions_by_end[exception.end_pc].push_back(exception);
2489  }
2490  for(const auto &exceptions : exceptions_by_end)
2491  {
2492  // For each block with a distinct end position create one CATCH-PUSH
2493  code_push_catcht catch_push;
2494  // Fill in its exception_list
2495  code_push_catcht::exception_listt &exception_list =
2496  catch_push.exception_list();
2498  : exceptions.second)
2499  {
2500  exception_list.emplace_back(
2501  exception.catch_type.get_identifier(),
2502  // Record the exception handler in the CATCH-PUSH instruction by
2503  // generating a label corresponding to the handler's pc
2504  label(std::to_string(exception.handler_pc)));
2505  }
2506  // Prepend the CATCH-PUSH instruction
2507  code = code_blockt({ std::move(catch_push), code });
2508  }
2509 
2510  // Next add the CATCH-POP instructions
2511  // exception_row.end_pc is exclusive so append a CATCH-POP instruction if
2512  // this is the instruction before it.
2513  // To do this, attempt to find all exception handlers that end at the
2514  // earliest known instruction after this one.
2515 
2516  // Dangerously, we assume that the next opcode in the bytecode after the end
2517  // of the exception handler block (whose address matches the exclusive end
2518  // position of the block) will be a successor of some code investigated
2519  // before the instruction at the end of that handler and therefore in the
2520  // working set.
2521  // As an example of where this may fail, for non-obfuscated bytecode
2522  // generated by most compilers the next opcode after the block ending at the
2523  // end of the try block is the lexically next bit of code after the try
2524  // block, i.e. the catch block. When there aren't any throwing statements in
2525  // the try block this block will not be the successor of any instruction.
2526 
2527  auto next_opcode_it = std::find_if(
2528  working_set.begin(),
2529  working_set.end(),
2530  [cur_pc](method_offsett offset) { return offset > cur_pc; });
2531  if(next_opcode_it != working_set.end())
2532  {
2533  // Count the distinct start positions of handlers that end at this location
2534  std::set<std::size_t> start_positions; // Use a set to deduplicate
2535  for(const auto &exception_row : method.exception_table)
2536  {
2537  // Check if the next instruction found is the (exclusive) end of a block
2538  if(*next_opcode_it == exception_row.end_pc)
2539  start_positions.insert(exception_row.start_pc);
2540  }
2541  for(std::size_t handler = 0; handler < start_positions.size(); ++handler)
2542  {
2543  // Append a CATCH-POP instruction before the end of the block
2544  code = code_blockt({ code, code_pop_catcht() });
2545  }
2546  }
2547 
2548  return code;
2549 }
2550 
2552  const source_locationt &location,
2553  const exprt &arg0,
2554  const exprt::operandst &op,
2555  exprt::operandst &results)
2556 {
2557  const reference_typet ref_type = java_reference_type(arg0.type());
2558  side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2559  java_new_array.operands() = op;
2560 
2561  code_blockt create;
2562 
2563  if(max_array_length != 0)
2564  {
2566  binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2567  code_assumet assume_le_max_size(le_max_size);
2568  create.add(assume_le_max_size);
2569  }
2570 
2571  const exprt tmp = tmp_variable("newarray", ref_type);
2572  create.add(code_assignt(tmp, java_new_array));
2573  results[0] = tmp;
2574 
2575  return create;
2576 }
2577 
2579  const source_locationt &location,
2580  const irep_idt &statement,
2581  const exprt &arg0,
2582  const exprt::operandst &op,
2583  exprt::operandst &results)
2584 {
2585  java_reference_typet ref_type = [&]() {
2586  if(statement == "newarray")
2587  {
2588  irep_idt id = arg0.type().id();
2589 
2590  char element_type;
2591  if(id == ID_bool)
2592  element_type = 'z';
2593  else if(id == ID_char)
2594  element_type = 'c';
2595  else if(id == ID_float)
2596  element_type = 'f';
2597  else if(id == ID_double)
2598  element_type = 'd';
2599  else if(id == ID_byte)
2600  element_type = 'b';
2601  else if(id == ID_short)
2602  element_type = 's';
2603  else if(id == ID_int)
2604  element_type = 'i';
2605  else if(id == ID_long)
2606  element_type = 'j';
2607  else
2608  element_type = '?';
2609  return java_array_type(element_type);
2610  }
2611  else
2612  {
2614  }
2615  }();
2616 
2617  side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2618  java_new_array.copy_to_operands(op[0]);
2619 
2620  code_blockt block;
2621 
2622  if(max_array_length != 0)
2623  {
2625  binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2626  code_assumet assume_le_max_size(le_max_size);
2627  block.add(std::move(assume_le_max_size));
2628  }
2629  const exprt tmp = tmp_variable("newarray", ref_type);
2630  block.add(code_assignt(tmp, java_new_array));
2631  results[0] = tmp;
2632 
2633  return block;
2634 }
2635 
2637  const source_locationt &location,
2638  const exprt &arg0,
2639  codet &c,
2640  exprt::operandst &results)
2641 {
2642  const reference_typet ref_type = java_reference_type(arg0.type());
2643  side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
2644 
2645  if(!location.get_line().empty())
2646  java_new_expr.add_source_location() = location;
2647 
2648  const exprt tmp = tmp_variable("new", ref_type);
2649  c = code_assignt(tmp, java_new_expr);
2650  c.add_source_location() = location;
2651  codet clinit_call =
2653  if(clinit_call.get_statement() != ID_skip)
2654  {
2655  c = code_blockt({clinit_call, c});
2656  }
2657  results[0] = tmp;
2658 }
2659 
2661  const source_locationt &location,
2662  const exprt &arg0,
2663  const exprt::operandst &op,
2664  const symbol_exprt &symbol_expr)
2665 {
2666  if(needed_lazy_methods && arg0.type().id() == ID_struct_tag)
2667  {
2668  needed_lazy_methods->add_needed_class(
2670  }
2671 
2672  code_blockt block;
2673  block.add_source_location() = location;
2674 
2675  // Note this initializer call deliberately inits the class used to make
2676  // the reference, which may be a child of the class that actually defines
2677  // the field.
2678  codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2679  if(clinit_call.get_statement() != ID_skip)
2680  block.add(clinit_call);
2681 
2683  "stack_static_field",
2684  block,
2686  symbol_expr.get_identifier());
2687  block.add(code_assignt(symbol_expr, op[0]));
2688  return block;
2689 }
2690 
2692  const fieldref_exprt &arg0,
2693  const exprt::operandst &op)
2694 {
2695  code_blockt block;
2697  "stack_field", block, bytecode_write_typet::FIELD, arg0.component_name());
2698  block.add(code_assignt(to_member(op[0], arg0, ns), op[1]));
2699  return block;
2700 }
2701 
2703  const source_locationt &source_location,
2704  const exprt &arg0,
2705  const symbol_exprt &symbol_expr,
2706  const bool is_assertions_disabled_field,
2707  codet &c,
2708  exprt::operandst &results)
2709 {
2711  {
2712  if(arg0.type().id() == ID_struct_tag)
2713  {
2714  needed_lazy_methods->add_needed_class(
2716  }
2717  else if(arg0.type().id() == ID_pointer)
2718  {
2719  const auto &pointer_type = to_pointer_type(arg0.type());
2720  if(pointer_type.subtype().id() == ID_struct_tag)
2721  {
2722  needed_lazy_methods->add_needed_class(
2724  }
2725  }
2726  else if(is_assertions_disabled_field)
2727  {
2728  needed_lazy_methods->add_needed_class(arg0.get_string(ID_class));
2729  }
2730  }
2731  symbol_exprt symbol_with_location = symbol_expr;
2732  symbol_with_location.add_source_location() = source_location;
2733  results[0] = java_bytecode_promotion(symbol_with_location);
2734 
2735  // Note this initializer call deliberately inits the class used to make
2736  // the reference, which may be a child of the class that actually defines
2737  // the field.
2738  codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2739  if(clinit_call.get_statement() != ID_skip)
2740  c = clinit_call;
2741  else if(is_assertions_disabled_field)
2742  {
2743  // set $assertionDisabled to false
2744  c = code_assignt(symbol_expr, false_exprt());
2745  }
2746 }
2747 
2749  const irep_idt &statement,
2750  const exprt::operandst &op,
2751  exprt::operandst &results) const
2752 {
2753  const int nan_value(statement[4] == 'l' ? -1 : 1);
2754  const typet result_type(java_int_type());
2755  const exprt nan_result(from_integer(nan_value, result_type));
2756 
2757  // (value1 == NaN || value2 == NaN) ?
2758  // nan_value : value1 < value2 ? -1 : value2 < value1 1 ? 1 : 0;
2759  // (value1 == NaN || value2 == NaN) ?
2760  // nan_value : value1 == value2 ? 0 : value1 < value2 -1 ? 1 : 0;
2761 
2762  isnan_exprt nan_op0(op[0]);
2763  isnan_exprt nan_op1(op[1]);
2764  exprt one = from_integer(1, result_type);
2765  exprt minus_one = from_integer(-1, result_type);
2766  results[0] = if_exprt(
2767  or_exprt(nan_op0, nan_op1),
2768  nan_result,
2769  if_exprt(
2770  ieee_float_equal_exprt(op[0], op[1]),
2771  from_integer(0, result_type),
2772  if_exprt(binary_relation_exprt(op[0], ID_lt, op[1]), minus_one, one)));
2773  return results;
2774 }
2775 
2777  const exprt::operandst &op,
2778  exprt::operandst &results) const
2779 { // The integer result on the stack is:
2780  // 0 if op[0] equals op[1]
2781  // -1 if op[0] is less than op[1]
2782  // 1 if op[0] is greater than op[1]
2783 
2784  const typet t = java_int_type();
2785  exprt one = from_integer(1, t);
2786  exprt minus_one = from_integer(-1, t);
2787 
2788  if_exprt greater =
2789  if_exprt(binary_relation_exprt(op[0], ID_gt, op[1]), one, minus_one);
2790 
2791  results[0] = if_exprt(
2792  binary_relation_exprt(op[0], ID_equal, op[1]), from_integer(0, t), greater);
2793  return results;
2794 }
2795 
2797  const irep_idt &statement,
2798  const exprt::operandst &op,
2799  exprt::operandst &results) const
2800 {
2801  const typet type = java_type_from_char(statement[0]);
2802 
2803  const std::size_t width = get_bytecode_type_width(type);
2804 
2805  // According to JLS 15.19 Shift Operators
2806  // a mask 0b11111 is applied for int and 0b111111 for long.
2807  exprt mask = from_integer(width - 1, op[1].type());
2808 
2809  results[0] = shl_exprt(op[0], bitand_exprt(op[1], mask));
2810  return results;
2811 }
2812 
2814  const irep_idt &statement,
2815  const exprt::operandst &op,
2816  exprt::operandst &results) const
2817 {
2818  const typet type = java_type_from_char(statement[0]);
2819 
2820  const std::size_t width = get_bytecode_type_width(type);
2821  typet target = unsignedbv_typet(width);
2822 
2823  exprt lhs = typecast_exprt::conditional_cast(op[0], target);
2824  exprt rhs = typecast_exprt::conditional_cast(op[1], target);
2825 
2826  results[0] =
2827  typecast_exprt::conditional_cast(lshr_exprt(lhs, rhs), op[0].type());
2828 
2829  return results;
2830 }
2831 
2833  const exprt &arg0,
2834  const exprt &arg1,
2835  const source_locationt &location,
2836  const method_offsett address)
2837 {
2838  code_blockt block;
2839  block.add_source_location() = location;
2840  // search variable on stack
2841  const exprt &locvar = variable(arg0, 'i', address);
2843  "stack_iinc",
2844  block,
2846  to_symbol_expr(locvar).get_identifier());
2847 
2848  const exprt arg1_int_type =
2850  code_assignt code_assign(
2851  variable(arg0, 'i', address),
2852  plus_exprt(
2854  variable(arg0, 'i', address), java_int_type()),
2855  arg1_int_type));
2856  block.add(std::move(code_assign));
2857 
2858  return block;
2859 }
2860 
2863  const exprt::operandst &op,
2864  const mp_integer &number,
2865  const source_locationt &location) const
2866 {
2867  const typecast_exprt lhs(op[0], java_reference_type(java_void_type()));
2868  const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2869 
2870  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2871 
2872  code_ifthenelset code_branch(
2873  binary_relation_exprt(lhs, ID_equal, rhs),
2874  code_gotot(label(std::to_string(label_number))));
2875 
2876  code_branch.then_case().add_source_location() =
2877  address_map.at(label_number).source->source_location;
2878  code_branch.add_source_location() = location;
2879 
2880  return code_branch;
2881 }
2882 
2885  const exprt::operandst &op,
2886  const mp_integer &number,
2887  const source_locationt &location) const
2888 {
2889  const typecast_exprt lhs(op[0], java_reference_type(java_void_type()));
2890  const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2891 
2892  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2893 
2894  code_ifthenelset code_branch(
2895  binary_relation_exprt(lhs, ID_notequal, rhs),
2896  code_gotot(label(std::to_string(label_number))));
2897 
2898  code_branch.then_case().add_source_location() =
2899  address_map.at(label_number).source->source_location;
2900  code_branch.add_source_location() = location;
2901 
2902  return code_branch;
2903 }
2904 
2907  const exprt::operandst &op,
2908  const irep_idt &id,
2909  const mp_integer &number,
2910  const source_locationt &location) const
2911 {
2912  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2913 
2914  code_ifthenelset code_branch(
2915  binary_relation_exprt(op[0], id, from_integer(0, op[0].type())),
2916  code_gotot(label(std::to_string(label_number))));
2917 
2918  code_branch.cond().add_source_location() = location;
2920  code_branch.then_case().add_source_location() =
2921  address_map.at(label_number).source->source_location;
2923  code_branch.add_source_location() = location;
2925 
2926  return code_branch;
2927 }
2928 
2931  const u1 bytecode,
2932  const exprt::operandst &op,
2933  const mp_integer &number,
2934  const source_locationt &location) const
2935 {
2936  const irep_idt cmp_op = get_if_cmp_operator(bytecode);
2937  binary_relation_exprt condition(
2938  op[0], cmp_op, typecast_exprt::conditional_cast(op[1], op[0].type()));
2939  condition.add_source_location() = location;
2940 
2941  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2942 
2943  code_ifthenelset code_branch(
2944  std::move(condition), code_gotot(label(std::to_string(label_number))));
2945 
2946  code_branch.then_case().add_source_location() =
2947  address_map.at(label_number).source->source_location;
2948  code_branch.add_source_location() = location;
2949 
2950  return code_branch;
2951 }
2952 
2954  const std::vector<method_offsett> &jsr_ret_targets,
2955  const exprt &arg0,
2956  const source_locationt &location,
2957  const method_offsett address)
2958 {
2959  code_blockt c;
2960  auto retvar = variable(arg0, 'a', address);
2961  for(size_t idx = 0, idxlim = jsr_ret_targets.size(); idx != idxlim; ++idx)
2962  {
2963  irep_idt number = std::to_string(jsr_ret_targets[idx]);
2964  code_gotot g(label(number));
2965  g.add_source_location() = location;
2966  if(idx == idxlim - 1)
2967  c.add(g);
2968  else
2969  {
2970  auto address_ptr =
2971  from_integer(jsr_ret_targets[idx], unsignedbv_typet(64));
2972  address_ptr.type() = pointer_type(java_void_type());
2973 
2974  code_ifthenelset branch(equal_exprt(retvar, address_ptr), std::move(g));
2975 
2976  branch.cond().add_source_location() = location;
2977  branch.add_source_location() = location;
2978 
2979  c.add(std::move(branch));
2980  }
2981  }
2982  return c;
2983 }
2984 
2990 static exprt conditional_array_cast(const exprt &expr, char type_char)
2991 {
2992  const auto ref_type =
2993  type_try_dynamic_cast<java_reference_typet>(expr.type());
2994  const bool typecast_not_needed =
2995  ref_type && ((type_char == 'b' && ref_type->subtype().get_identifier() ==
2996  "java::array[boolean]") ||
2997  *ref_type == java_array_type(type_char));
2998  return typecast_not_needed ? expr
2999  : typecast_exprt(expr, java_array_type(type_char));
3000 }
3001 
3003  const irep_idt &statement,
3004  const exprt::operandst &op)
3005 {
3006  PRECONDITION(op.size() == 2);
3007  const char type_char = statement[0];
3008  const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
3009  dereference_exprt deref{op_with_right_type};
3010  deref.set(ID_java_member_access, true);
3011 
3012  auto java_array_type = type_try_dynamic_cast<struct_tag_typet>(deref.type());
3013  INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3014  member_exprt data_ptr{
3016  plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3017  // tag it so it's easy to identify during instrumentation
3018  data_plus_offset.set(ID_java_array_access, true);
3019  return java_bytecode_promotion(dereference_exprt{data_plus_offset});
3020 }
3021 
3023  const exprt &index,
3024  char type_char,
3025  size_t address)
3026 {
3027  const exprt var = variable(index, type_char, address);
3028  if(type_char == 'i')
3029  {
3030  INVARIANT(
3032  type_try_dynamic_cast<bitvector_typet>(var.type())->get_width() <= 32,
3033  "iload can be used for boolean, byte, short, int and char");
3035  }
3036  INVARIANT(
3037  (type_char == 'a' && can_cast_type<reference_typet>(var.type())) ||
3038  var.type() == java_type_from_char(type_char),
3039  "Variable type must match [adflv]load return type");
3040  return var;
3041 }
3042 
3044  const irep_idt &statement,
3045  const exprt &arg0,
3046  const exprt::operandst &op,
3047  const method_offsett address,
3048  const source_locationt &location)
3049 {
3050  const exprt var = variable(arg0, statement[0], address);
3051  const irep_idt &var_name = to_symbol_expr(var).get_identifier();
3052 
3053  code_blockt block;
3054  block.add_source_location() = location;
3055 
3057  "stack_store",
3058  block,
3060  var_name);
3061 
3062  block.add(
3064  location);
3065  return block;
3066 }
3067 
3069  const irep_idt &statement,
3070  const exprt::operandst &op,
3071  const source_locationt &location)
3072 {
3073  PRECONDITION(op.size() == 3);
3074  const char type_char = statement[0];
3075  const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
3076  dereference_exprt deref{op_with_right_type};
3077  deref.set(ID_java_member_access, true);
3078 
3079  auto java_array_type = type_try_dynamic_cast<struct_tag_typet>(deref.type());
3080  INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3081  member_exprt data_ptr{
3083  plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3084  // tag it so it's easy to identify during instrumentation
3085  data_plus_offset.set(ID_java_array_access, true);
3086 
3087  code_blockt block;
3088  block.add_source_location() = location;
3089 
3091  "stack_astore", block, bytecode_write_typet::ARRAY_REF, "");
3092 
3093  code_assignt array_put{dereference_exprt{data_plus_offset}, op[2]};
3094  block.add(std::move(array_put), location);
3095  return block;
3096 }
3097 
3099  const source_locationt &location,
3100  std::size_t instruction_address,
3101  const exprt &arg0,
3102  codet &result_code)
3103 {
3104  const java_method_typet &method_type = to_java_method_type(arg0.type());
3105  const java_method_typet::parameterst &parameters(method_type.parameters());
3106  const typet &return_type = method_type.return_type();
3107 
3108  // Note these must be popped regardless of whether we understand the lambda
3109  // method or not
3110  code_function_callt::argumentst arguments = pop(parameters.size());
3111 
3112  irep_idt synthetic_class_name =
3113  lambda_synthetic_class_name(method_id, instruction_address);
3114 
3115  if(!symbol_table.has_symbol(synthetic_class_name))
3116  {
3117  // We failed to parse the invokedynamic handle as a Java 8+ lambda;
3118  // give up and return null.
3119  const auto value = zero_initializer(return_type, location, ns);
3120  if(!value.has_value())
3121  {
3122  log.error().source_location = location;
3123  log.error() << "failed to zero-initialize return type" << messaget::eom;
3124  throw 0;
3125  }
3126  return value;
3127  }
3128 
3129  // Construct an instance of the synthetic class created for this invokedynamic
3130  // site:
3131 
3132  irep_idt constructor_name = id2string(synthetic_class_name) + ".<init>";
3133 
3134  const symbolt &constructor_symbol = ns.lookup(constructor_name);
3135 
3136  code_blockt result;
3137 
3138  // SyntheticType lambda_new = new SyntheticType;
3139  const reference_typet ref_type =
3140  java_reference_type(struct_tag_typet(synthetic_class_name));
3141  side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
3142  const exprt new_instance = tmp_variable("lambda_new", ref_type);
3143  result.add(code_assignt(new_instance, java_new_expr, location));
3144 
3145  // lambda_new.<init>(capture_1, capture_2, ...);
3146  // Add the implicit 'this' parameter:
3147  arguments.insert(arguments.begin(), new_instance);
3150 
3151  code_function_callt constructor_call(
3152  constructor_symbol.symbol_expr(), arguments);
3153  constructor_call.add_source_location() = location;
3154  result.add(constructor_call);
3156  {
3157  needed_lazy_methods->add_needed_method(constructor_symbol.name);
3158  needed_lazy_methods->add_needed_class(synthetic_class_name);
3159  }
3160 
3161  result_code = std::move(result);
3162 
3163  if(return_type.id() == ID_empty)
3164  return {};
3165  else
3166  return new_instance;
3167 }
3168 
3171  const std::vector<method_offsett> &jsr_ret_targets,
3172  const std::vector<
3173  std::vector<java_bytecode_parse_treet::instructiont>::const_iterator>
3174  &ret_instructions) const
3175 { // Draw edges from every `ret` to every `jsr` successor. Could do better with
3176  // flow analysis to distinguish multiple subroutines within the same function.
3177  for(const auto &retinst : ret_instructions)
3178  {
3179  auto &a_entry = address_map.at(retinst->address);
3180  a_entry.successors.insert(
3181  a_entry.successors.end(), jsr_ret_targets.begin(), jsr_ret_targets.end());
3182  }
3183 }
3184 
3185 std::vector<java_bytecode_convert_methodt::method_offsett>
3187  const method_offsett address,
3189  const
3190 {
3191  std::vector<method_offsett> result;
3192  for(const auto &exception_row : exception_table)
3193  {
3194  if(address >= exception_row.start_pc && address < exception_row.end_pc)
3195  result.push_back(exception_row.handler_pc);
3196  }
3197  return result;
3198 }
3199 
3213  symbolt &method_symbol,
3215  &local_variable_table,
3216  symbol_table_baset &symbol_table)
3217 {
3218  // Obtain a std::vector of java_method_typet::parametert objects from the
3219  // (function) type of the symbol
3220  java_method_typet &method_type = to_java_method_type(method_symbol.type);
3221  java_method_typet::parameterst &parameters = method_type.parameters();
3222 
3223  // Find number of parameters
3224  unsigned slots_for_parameters = java_method_parameter_slots(method_type);
3225 
3226  // Find parameter names in the local variable table:
3227  typedef std::pair<irep_idt, irep_idt> base_name_and_identifiert;
3228  std::map<std::size_t, base_name_and_identifiert> param_names;
3229  for(const auto &v : local_variable_table)
3230  {
3231  if(v.index < slots_for_parameters)
3232  param_names.emplace(
3233  v.index,
3234  make_pair(
3235  v.name, id2string(method_symbol.name) + "::" + id2string(v.name)));
3236  }
3237 
3238  // Assign names to parameters
3239  std::size_t param_index = 0;
3240  for(auto &param : parameters)
3241  {
3242  irep_idt base_name, identifier;
3243 
3244  // Construct a sensible base name (base_name) and a fully qualified name
3245  // (identifier) for every parameter of the method under translation,
3246  // regardless of whether we have an LVT or not; and assign it to the
3247  // parameter object (which is stored in the type of the symbol, not in the
3248  // symbol table)
3249  if(param_index == 0 && param.get_this())
3250  {
3251  // my.package.ClassName.myMethodName:(II)I::this
3252  base_name = ID_this;
3253  identifier = id2string(method_symbol.name) + "::" + id2string(base_name);
3254  }
3255  else
3256  {
3257  auto param_name = param_names.find(param_index);
3258  if(param_name != param_names.end())
3259  {
3260  base_name = param_name->second.first;
3261  identifier = param_name->second.second;
3262  }
3263  else
3264  {
3265  // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
3266  // variable slot where the parameter is stored and T is a character
3267  // indicating the type
3268  const typet &type = param.type();
3269  char suffix = java_char_from_type(type);
3270  base_name = "arg" + std::to_string(param_index) + suffix;
3271  identifier =
3272  id2string(method_symbol.name) + "::" + id2string(base_name);
3273  }
3274  }
3275  param.set_base_name(base_name);
3276  param.set_identifier(identifier);
3277 
3278  // Build a new symbol for the parameter and add it to the symbol table
3279  parameter_symbolt parameter_symbol;
3280  parameter_symbol.base_name = base_name;
3281  parameter_symbol.mode = ID_java;
3282  parameter_symbol.name = identifier;
3283  parameter_symbol.type = param.type();
3284  symbol_table.insert(parameter_symbol);
3285 
3286  param_index += java_local_variable_slots(param.type());
3287  }
3288 }
3289 
3291  const symbolt &class_symbol,
3292  const java_bytecode_parse_treet::methodt &method,
3293  symbol_table_baset &symbol_table,
3294  message_handlert &message_handler,
3295  size_t max_array_length,
3296  bool throw_assertion_error,
3297  optionalt<ci_lazy_methods_neededt> needed_lazy_methods,
3298  java_string_library_preprocesst &string_preprocess,
3299  const class_hierarchyt &class_hierarchy,
3300  bool threading_support,
3301  const optionalt<prefix_filtert> &method_context,
3302  bool assert_no_exceptions_thrown)
3303 
3304 {
3306  symbol_table,
3307  message_handler,
3308  max_array_length,
3309  throw_assertion_error,
3310  needed_lazy_methods,
3311  string_preprocess,
3312  class_hierarchy,
3313  threading_support,
3314  assert_no_exceptions_thrown);
3315 
3316  java_bytecode_convert_method(class_symbol, method, method_context);
3317 }
3318 
3325  const irep_idt &classname,
3326  const irep_idt &mangled_method_name) const
3327 {
3328  const auto inherited_method = get_inherited_method_implementation(
3329  mangled_method_name, classname, symbol_table);
3330 
3331  return inherited_method.has_value();
3332 }
3333 
3340  const irep_idt &class_identifier,
3341  const irep_idt &component_name) const
3342 {
3343  const auto inherited_method = get_inherited_component(
3344  class_identifier, component_name, symbol_table, true);
3345 
3346  INVARIANT(
3347  inherited_method.has_value(), "static field should be in symbol table");
3348 
3349  return inherited_method->get_full_component_identifier();
3350 }
3351 
3359  const std::string &tmp_var_prefix,
3360  code_blockt &block,
3361  const bytecode_write_typet write_type,
3362  const irep_idt &identifier)
3363 {
3364  const std::function<bool(
3365  const std::function<tvt(const exprt &expr)>, const exprt &expr)>
3366  entry_matches = [&entry_matches](
3367  const std::function<tvt(const exprt &expr)> predicate,
3368  const exprt &expr) {
3369  const tvt &tvres = predicate(expr);
3370  if(tvres.is_unknown())
3371  {
3372  return std::any_of(
3373  expr.operands().begin(),
3374  expr.operands().end(),
3375  [&predicate, &entry_matches](const exprt &expr) {
3376  return entry_matches(predicate, expr);
3377  });
3378  }
3379  else
3380  {
3381  return tvres.is_true();
3382  }
3383  };
3384 
3385  // Function that checks whether the expression accesses a member with the
3386  // given identifier name. These accesses are created in the case of `iinc`, or
3387  // non-array `?store` instructions.
3388  const std::function<tvt(const exprt &expr)> has_member_entry = [&identifier](
3389  const exprt &expr) {
3390  const auto member_expr = expr_try_dynamic_cast<member_exprt>(expr);
3391  return !member_expr ? tvt::unknown()
3392  : tvt(member_expr->get_component_name() == identifier);
3393  };
3394 
3395  // Function that checks whether the expression is a symbol with the given
3396  // identifier name. These accesses are created in the case of `putstatic` or
3397  // `putfield` instructions.
3398  const std::function<tvt(const exprt &expr)> is_symbol_entry =
3399  [&identifier](const exprt &expr) {
3400  const auto symbol_expr = expr_try_dynamic_cast<symbol_exprt>(expr);
3401  return !symbol_expr ? tvt::unknown()
3402  : tvt(symbol_expr->get_identifier() == identifier);
3403  };
3404 
3405  // Function that checks whether the expression is a dereference
3406  // expression. These accesses are created in `?astore` array write
3407  // instructions.
3408  const std::function<tvt(const exprt &expr)> is_dereference_entry =
3409  [](const exprt &expr) {
3410  const auto dereference_expr =
3411  expr_try_dynamic_cast<dereference_exprt>(expr);
3412  return !dereference_expr ? tvt::unknown() : tvt(true);
3413  };
3414 
3415  for(auto &stack_entry : stack)
3416  {
3417  bool replace = false;
3418  switch(write_type)
3419  {
3422  replace = entry_matches(is_symbol_entry, stack_entry);
3423  break;
3425  replace = entry_matches(is_dereference_entry, stack_entry);
3426  break;
3428  replace = entry_matches(has_member_entry, stack_entry);
3429  break;
3430  }
3431  if(replace)
3432  {
3434  tmp_var_prefix, stack_entry.type(), block, stack_entry);
3435  }
3436  }
3437 }
3438 
3441  const std::string &tmp_var_prefix,
3442  const typet &tmp_var_type,
3443  code_blockt &block,
3444  exprt &stack_entry)
3445 {
3446  const exprt tmp_var=
3447  tmp_variable(tmp_var_prefix, tmp_var_type);
3448  block.add(code_assignt(tmp_var, stack_entry));
3449  stack_entry=tmp_var;
3450 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
void branch(goto_modelt &goto_model, const irep_idt &id)
Definition: branch.cpp:20
struct bytecode_infot const bytecode_info[]
#define BC_ifnull
#define BC_jsr
#define BC_invokestatic
#define BC_iinc
#define BC_tableswitch
#define BC_drem
#define BC_d2l
#define BC_dup_x1
#define BC_invokespecial
#define BC_getfield
#define BC_nop
Definition: bytecode_info.h:65
#define BC_pop2
#define BC_instanceof
#define BC_goto
#define BC_arraylength
#define BC_dup_x2
#define BC_ldc
Definition: bytecode_info.h:83
#define BC_ldc2_w
Definition: bytecode_info.h:85
#define BC_ldiv
#define BC_iconst_m1
Definition: bytecode_info.h:67
#define BC_monitorexit
#define BC_new
#define BC_lookupswitch
#define BC_ifge
#define BC_newarray
#define BC_getstatic
#define BC_jsr_w
#define BC_anewarray
#define BC_lrem
#define BC_dup2
#define BC_aconst_null
Definition: bytecode_info.h:66
#define BC_pop
#define BC_return
#define BC_d2i
#define BC_dup
#define BC_athrow
#define BC_ifgt
#define BC_putfield
#define BC_f2i
#define BC_idiv
#define BC_ifeq
#define BC_monitorenter
#define BC_ret
#define BC_goto_w
#define BC_swap
#define BC_iflt
#define BC_checkcast
#define BC_putstatic
#define BC_ldc_w
Definition: bytecode_info.h:84
#define BC_multianewarray
#define BC_dup2_x2
uint8_t u1
Definition: bytecode_info.h:55
#define BC_dup2_x1
#define BC_invokedynamic
#define BC_irem
#define BC_invokeinterface
#define BC_frem
#define BC_ifnonnull
#define BC_invokevirtual
#define BC_ifle
#define BC_f2l
#define BC_ifne
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
Control Flow Graph.
Compute dominators for CFG of goto_function.
Class Hierarchy.
Arithmetic right shift.
Internally generated symbol table entryThis is a symbol generated as part of translation to or modifi...
Definition: symbol.h:147
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:675
Bit-wise AND.
Bit-wise OR.
std::size_t get_width() const
Definition: std_types.h:1048
Bit-wise XOR.
The Boolean type.
Definition: std_types.h:37
Non-graph-based representation of the class hierarchy.
An expression describing a method on a class.
Definition: std_expr.h:3111
const irep_idt & get_identifier() const
A unique identifier of the combination of class and method overload to which this expression refers.
Definition: std_expr.h:3171
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition: std_expr.h:3156
const irep_idt & base_method_name() const
The name of the method to which this expression is applied as would be seen in the source code.
Definition: std_expr.h:3163
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition: std_expr.h:3148
A non-fatal assertion, which checks a condition then permits execution to continue.
Definition: std_code.h:619
A codet representing an assignment in the program.
Definition: std_code.h:295
An assumption, which must hold in subsequent code.
Definition: std_code.h:567
A codet representing sequential composition of program statements.
Definition: std_code.h:170
void append(const code_blockt &extra_block)
Add all the codets from extra_block to the current code_blockt.
Definition: std_code.cpp:88
void add(const codet &code)
Definition: std_code.h:208
code_operandst & statements()
Definition: std_code.h:178
codet & find_last_statement()
Definition: std_code.cpp:98
A codet representing the declaration of a local variable.
Definition: std_code.h:402
codet representation of an expression statement.
Definition: std_code.h:1842
codet representation of a function call statement.
Definition: std_code.h:1215
exprt & function()
Definition: std_code.h:1250
exprt::operandst argumentst
Definition: std_code.h:1224
codet representation of a goto statement.
Definition: std_code.h:1159
codet representation of an if-then-else statement.
Definition: std_code.h:778
const codet & then_case() const
Definition: std_code.h:806
const exprt & cond() const
Definition: std_code.h:796
codet representation of a label for branch targets.
Definition: std_code.h:1407
codet & code()
Definition: std_code.h:1425
void set_label(const irep_idt &label)
Definition: std_code.h:1420
A statement that catches an exception, assigning the exception in flight to an expression (e....
Definition: std_code.h:2384
Pops an exception handler from the stack of active handlers (i.e.
Definition: std_code.h:2347
Pushes an exception handler, of the form: exception_tag1 -> label1 exception_tag2 -> label2 ....
Definition: std_code.h:2253
std::vector< exception_list_entryt > exception_listt
Definition: std_code.h:2297
exception_listt & exception_list()
Definition: std_code.h:2308
codet representation of a "return from a function" statement.
Definition: std_code.h:1342
A codet representing a skip statement.
Definition: std_code.h:270
codet representation of a switch-case, i.e. a case statement within a switch.
Definition: std_code.h:1471
void set_default()
Definition: std_code.h:1483
codet representing a switch statement.
Definition: std_code.h:866
void set_base_name(const irep_idt &name)
Definition: std_types.h:790
void set_is_constructor()
Definition: std_types.h:895
const typet & return_type() const
Definition: std_types.h:850
const irep_idt & get_access() const
Definition: std_types.h:880
bool has_this() const
Definition: std_types.h:821
void set_access(const irep_idt &access)
Definition: std_types.h:885
bool get_is_constructor() const
Definition: std_types.h:890
const parameterst & parameters() const
Definition: std_types.h:860
Data structure for representing an arbitrary statement in a program.
Definition: std_code.h:35
const irep_idt & get_statement() const
Definition: std_code.h:71
A constant literal expression.
Definition: std_expr.h:2668
Operator to dereference a pointer.
Definition: pointer_expr.h:256
Division.
Definition: std_expr.h:981
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
bool empty() const
Definition: dstring.h:88
Equality.
Definition: std_expr.h:1140
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:133
source_locationt & add_source_location()
Definition: expr.h:239
const source_locationt & source_location() const
Definition: expr.h:234
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:96
std::size_t size() const
Amount of nodes this expression tree contains.
Definition: expr.cpp:26
The Boolean constant false.
Definition: std_expr.h:2726
Represents the argument of an instruction that uses a CONSTANT_Fieldref This is used for example as a...
irep_idt class_name() const
irep_idt component_name() const
IEEE-floating-point equality.
Definition: floatbv_expr.h:264
static ieee_float_spect single_precision()
Definition: ieee_float.h:74
static ieee_float_spect double_precision()
Definition: ieee_float.h:80
constant_exprt to_expr() const
Definition: ieee_float.cpp:698
void from_expr(const constant_exprt &expr)
void from_integer(const mp_integer &i)
Definition: ieee_float.cpp:511
The trinary if-then-else operator.
Definition: std_expr.h:2087
constant_exprt largest_expr() const
Return an expression representing the largest value of this type.
Definition: std_types.cpp:200
constant_exprt smallest_expr() const
Return an expression representing the smallest value of this type.
Definition: std_types.cpp:195
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:383
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:431
const std::string & get_string(const irep_namet &name) const
Definition: irep.h:420
const irept & find(const irep_namet &name) const
Definition: irep.cpp:103
const irep_idt & id() const
Definition: irep.h:407
void make_nil()
Definition: irep.h:464
void swap(irept &irep)
Definition: irep.h:452
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:51
Evaluates to true if the operand is NaN.
Definition: floatbv_expr.h:88
void convert_dup2_x2(exprt::operandst &op, exprt::operandst &results)
optionalt< ci_lazy_methods_neededt > needed_lazy_methods
method_offsett slots_for_parameters
Number of local variable slots used by the JVM to pass parameters upon invocation of the method under...
irep_idt get_static_field(const irep_idt &class_identifier, const irep_idt &component_name) const
Get static field identifier referred to by class_identifier.component_name Note this may be inherited...
void draw_edges_from_ret_to_jsr(address_mapt &address_map, const std::vector< method_offsett > &jsr_ret_targets, const std::vector< std::vector< java_bytecode_parse_treet::instructiont >::const_iterator > &ret_instructions) const
code_ifthenelset convert_if_cmp(const java_bytecode_convert_methodt::address_mapt &address_map, const u1 bytecode, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
void create_stack_tmp_var(const std::string &, const typet &, code_blockt &, exprt &)
actually create a temporary variable to hold the value of a stack entry
codet & do_exception_handling(const methodt &method, const std::set< method_offsett > &working_set, method_offsett cur_pc, codet &c)
exprt::operandst & convert_ushr(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt convert_load(const exprt &index, char type_char, size_t address)
Load reference from local variable.
void setup_local_variables(const methodt &m, const address_mapt &amap)
See find_initializers_for_slot above for more detail.
const variablet & find_variable_for_slot(size_t address, variablest &var_list)
See above.
void push(const exprt::operandst &o)
code_blockt convert_store(const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, const method_offsett address, const source_locationt &location)
code_blockt convert_astore(const irep_idt &statement, const exprt::operandst &op, const source_locationt &location)
static irep_idt label(const irep_idt &address)
std::vector< method_offsett > try_catch_handler(method_offsett address, const java_bytecode_parse_treet::methodt::exception_tablet &exception_table) const
code_blockt convert_instructions(const methodt &)
codet & replace_call_to_cprover_assume(source_locationt location, codet &c)
java_string_library_preprocesst & string_preprocess
code_blockt convert_ret(const std::vector< method_offsett > &jsr_ret_targets, const exprt &arg0, const source_locationt &location, const method_offsett address)
codet get_clinit_call(const irep_idt &classname)
Each static access to classname should be prefixed with a check for necessary static init; this retur...
code_blockt convert_putstatic(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, const symbol_exprt &symbol_expr)
code_ifthenelset convert_if(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const irep_idt &id, const mp_integer &number, const source_locationt &location) const
code_ifthenelset convert_ifnull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
code_blockt & get_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address)
'tree' describes a tree of code_blockt objects; this_block is the corresponding block (thus they are ...
codet convert_pop(const irep_idt &statement, const exprt::operandst &op)
exprt::operandst pop(std::size_t n)
exprt::operandst & convert_const(const irep_idt &statement, const constant_exprt &arg0, exprt::operandst &results) const
void convert_checkcast(const exprt &arg0, const exprt::operandst &op, codet &c, exprt::operandst &results) const
void convert_dup2_x1(exprt::operandst &op, exprt::operandst &results)
code_blockt convert_putfield(const fieldref_exprt &arg0, const exprt::operandst &op)
code_blockt convert_multianewarray(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
code_ifthenelset convert_ifnonull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
void convert(const symbolt &class_symbol, const methodt &, const optionalt< prefix_filtert > &method_context)
code_blockt convert_parameter_annotations(const methodt &method, const java_method_typet &method_type)
code_switcht convert_switch(const exprt::operandst &op, const java_bytecode_parse_treet::instructiont::argst &args, const source_locationt &location)
void save_stack_entries(const std::string &, code_blockt &, const bytecode_write_typet, const irep_idt &)
Create temporary variables if a write instruction can have undesired side- effects.
void convert_new(const source_locationt &location, const exprt &arg0, codet &c, exprt::operandst &results)
exprt variable(const exprt &arg, char type_char, size_t address)
Returns an expression indicating a local variable suitable to load/store from a bytecode at address a...
std::map< method_offsett, converted_instructiont > address_mapt
static void replace_goto_target(codet &repl, const irep_idt &old_label, const irep_idt &new_label)
Find all goto statements in 'repl' that target 'old_label' and redirect them to 'new_label'.
code_blockt convert_iinc(const exprt &arg0, const exprt &arg1, const source_locationt &location, method_offsett address)
exprt::operandst & convert_cmp2(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
void convert_dup2(exprt::operandst &op, exprt::operandst &results)
optionalt< exprt > convert_invoke_dynamic(const source_locationt &location, std::size_t instruction_address, const exprt &arg0, codet &result_code)
bool is_method_inherited(const irep_idt &classname, const irep_idt &mangled_method_name) const
Returns true iff method methodid from class classname is a method inherited from a class or interface...
void convert_athrow(const source_locationt &location, const exprt::operandst &op, codet &c, exprt::operandst &results) const
typet method_return_type
Return type of the method under conversion.
void convert_getstatic(const source_locationt &source_location, const exprt &arg0, const symbol_exprt &symbol_expr, bool is_assertions_disabled_field, codet &c, exprt::operandst &results)
irep_idt method_id
Fully qualified name of the method under translation.
code_blockt convert_newarray(const source_locationt &location, const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
exprt::operandst & convert_shl(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt::operandst & convert_cmp(const exprt::operandst &op, exprt::operandst &results) const
void pop_residue(std::size_t n)
removes minimum(n, stack.size()) elements from the stack
void convert_invoke(source_locationt location, const irep_idt &statement, class_method_descriptor_exprt &class_method_descriptor, codet &c, exprt::operandst &results)
code_blockt & get_or_create_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address, const address_mapt &amap, bool allow_merge=true)
As above, but this version can additionally create a new branch in the block_tree-node and code_block...
codet convert_monitorenterexit(const irep_idt &statement, const exprt::operandst &op, const source_locationt &source_location)
irep_idt current_method
A copy of method_id :/.
static exprt convert_aload(const irep_idt &statement, const exprt::operandst &op)
symbol_exprt tmp_variable(const std::string &prefix, const typet &type)
const methodst & methods() const
Definition: java_types.h:302
void add_throws_exception(irep_idt exception)
Definition: java_types.h:134
std::vector< parametert > parameterst
Definition: std_types.h:746
void set_is_varargs(bool is_varargs)
Definition: java_types.h:164
void set_is_synthetic(bool is_synthetic)
Definition: java_types.h:174
void set_is_final(bool is_final)
Definition: java_types.h:144
void set_native(bool is_native)
Definition: java_types.h:154
This is a specialization of reference_typet.
Definition: java_types.h:605
codet replace_character_call(code_function_callt call)
Logical right shift.
Extract member of struct or union.
Definition: std_expr.h:2528
source_locationt source_location
Definition: message.h:247
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
mstreamt & error() const
Definition: message.h:399
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
static eomt eom
Definition: message.h:297
Binary minus.
Definition: std_expr.h:890
Modulo.
Definition: std_expr.h:1050
Binary multiplication Associativity is not specified.
Definition: std_expr.h:936
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:51
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:92
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:140
The NIL expression.
Definition: std_expr.h:2735
Disequality.
Definition: std_expr.h:1198
The null pointer constant.
Definition: std_expr.h:2751
Boolean OR.
Definition: std_expr.h:1943
Symbol table entry of function parameterThis is a symbol generated as part of type checking.
Definition: symbol.h:171
Given a string of the format '?blah?', will return true when compared against a string that matches a...
Definition: pattern.h:21
The plus expression Associativity is not specified.
Definition: std_expr.h:831
The reference type.
Definition: std_types.h:1560
Remainder of division.
Definition: std_expr.h:1095
Left shift.
A side_effect_exprt representation of a side effect that throws an exception.
Definition: std_code.h:2205
An expression containing a side effect.
Definition: std_code.h:1898
void set_comment(const irep_idt &comment)
void set_property_class(const irep_idt &property_class)
const irep_idt & get_line() const
std::string as_string() const
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:498
Expression to hold a symbol (variable)
Definition: std_expr.h:81
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition: std_expr.h:100
const irep_idt & get_identifier() const
Definition: std_expr.h:110
The symbol table base class interface.
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
virtual std::pair< symbolt &, bool > insert(symbolt symbol)=0
Move or copy a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:20
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
bool is_file_local
Definition: symbol.h:66
irep_idt module
Name of module the symbol belongs to.
Definition: symbol.h:43
bool is_static_lifetime
Definition: symbol.h:65
bool is_thread_local
Definition: symbol.h:65
bool is_type
Definition: symbol.h:61
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:122
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
bool is_lvalue
Definition: symbol.h:66
exprt value
Initial value of symbol.
Definition: symbol.h:34
irep_idt mode
Language mode.
Definition: symbol.h:49
const irep_idt & get_identifier() const
Definition: std_types.h:459
Definition: threeval.h:20
bool is_unknown() const
Definition: threeval.h:27
bool is_true() const
Definition: threeval.h:25
static tvt unknown()
Definition: threeval.h:33
Semantic type conversion.
Definition: std_expr.h:1781
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:1789
The type of an expression, extends irept.
Definition: type.h:28
const typet & subtype() const
Definition: type.h:47
The unary minus expression.
Definition: std_expr.h:391
static irep_idt get_exception_type(const typet &type)
Returns the compile type of an exception.
Fixed-width bit-vector with unsigned binary interpretation.
Definition: std_types.h:1223
An exception that is raised for unsupported class signature.
Definition: java_types.h:1132
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
#define forall_operands(it, expr)
Definition: expr.h:18
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
API to expression classes for floating-point arithmetic.
dstringt irep_idt
Definition: irep.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:49
static int8_t r
Definition: irep_hash.h:59
void convert_annotations(const java_bytecode_parse_treet::annotationst &parsed_annotations, std::vector< java_annotationt > &java_annotations)
Convert parsed annotations into the symbol table.
void java_bytecode_convert_method_lazy(symbolt &class_symbol, const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, symbol_tablet &symbol_table, message_handlert &message_handler)
This creates a method symbol in the symtab, but doesn't actually perform method conversion just yet.
static void gather_symbol_live_ranges(java_bytecode_convert_methodt::method_offsett pc, const exprt &e, std::map< irep_idt, java_bytecode_convert_methodt::variablet > &result)
static bool is_constructor(const irep_idt &method_name)
static irep_idt get_method_identifier(const irep_idt &class_identifier, const java_bytecode_parse_treet::methodt &method)
void java_bytecode_initialize_parameter_names(symbolt &method_symbol, const java_bytecode_parse_treet::methodt::local_variable_tablet &local_variable_table, symbol_table_baset &symbol_table)
This uses a cut-down version of the logic in java_bytecode_convert_methodt::convert to initialize sym...
static member_exprt to_member(const exprt &pointer, const fieldref_exprt &field_reference, const namespacet &ns)
Build a member exprt for accessing a specific field that may come from a base class.
static exprt conditional_array_cast(const exprt &expr, char type_char)
Add typecast if necessary to expr to make it compatible with array type corresponding to type_char (s...
void create_parameter_symbols(const java_method_typet::parameterst &parameters, expanding_vectort< std::vector< java_bytecode_convert_methodt::variablet >> &variables, symbol_table_baset &symbol_table)
void java_bytecode_convert_method(const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, bool throw_assertion_error, optionalt< ci_lazy_methods_neededt > needed_lazy_methods, java_string_library_preprocesst &string_preprocess, const class_hierarchyt &class_hierarchy, bool threading_support, const optionalt< prefix_filtert > &method_context, bool assert_no_exceptions_thrown)
java_method_typet member_type_lazy(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name, const std::string &method_name, message_handlert &message_handler)
Returns the member type for a method, based on signature or descriptor.
void create_parameter_names(const java_bytecode_parse_treet::methodt &m, const irep_idt &method_identifier, java_method_typet::parameterst &parameters, const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
Extracts the names of parameters from the local variable table in the method, and uses it to construc...
void create_method_stub_symbol(const irep_idt &identifier, const irep_idt &base_name, const irep_idt &pretty_name, const typet &type, const irep_idt &declaring_class, symbol_table_baset &symbol_table, message_handlert &message_handler)
static std::size_t get_bytecode_type_width(const typet &ty)
static void adjust_invoke_argument_types(const java_method_typet::parameterst &parameters, code_function_callt::argumentst &arguments)
static void assign_parameter_names(java_method_typet &ftype, const irep_idt &name_prefix, symbol_table_baset &symbol_table)
Iterates through the parameters of the function type ftype, finds a new new name for each parameter a...
static irep_idt get_if_cmp_operator(const u1 bytecode)
JAVA Bytecode Language Conversion.
JAVA Bytecode Language Conversion.
Java-specific exprt subclasses.
irep_idt clinit_wrapper_name(const irep_idt &class_name)
Get the Java static initializer wrapper name for a given class (the wrapper checks if static initiali...
Produce code for simple implementation of String Java libraries.
Representation of a constant Java string.
bool can_cast_expr< java_string_literal_exprt >(const exprt &base)
signedbv_typet java_int_type()
Definition: java_types.cpp:32
typet java_type_from_char(char t)
Constructs a type indicated by the given character:
Definition: java_types.cpp:247
java_reference_typet java_reference_array_type(const struct_tag_typet &subtype)
Definition: java_types.cpp:540
empty_typet java_void_type()
Definition: java_types.cpp:38
char java_char_from_type(const typet &type)
Definition: java_types.cpp:706
optionalt< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
Definition: java_types.cpp:560
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
Definition: java_types.cpp:145
signedbv_typet java_byte_type()
Definition: java_types.cpp:56
std::string pretty_signature(const java_method_typet &method_type)
typet java_bytecode_promotion(const typet &type)
Java does not support byte/short return types. These are always promoted.
Definition: java_types.cpp:268
java_reference_typet java_array_type(const char subtype)
Construct an array pointer type.
Definition: java_types.cpp:110
signedbv_typet java_short_type()
Definition: java_types.cpp:50
reference_typet java_reference_type(const typet &subtype)
Definition: java_types.cpp:89
c_bool_typet java_boolean_type()
Definition: java_types.cpp:80
unsignedbv_typet java_char_type()
Definition: java_types.cpp:62
const java_class_typet & to_java_class_type(const typet &type)
Definition: java_types.h:584
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:186
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_component(const irep_idt &component_class_id, const irep_idt &component_name, const symbol_tablet &symbol_table, bool include_interfaces)
Finds an inherited component (method or field), taking component visibility into account.
Definition: java_utils.cpp:451
optionalt< irep_idt > declaring_class(const symbolt &symbol)
Gets the identifier of the class which declared a given symbol.
Definition: java_utils.cpp:571
void set_declaring_class(symbolt &symbol, const irep_idt &declaring_class)
Sets the identifier of the class which declared a given symbol to declaring_class.
Definition: java_utils.cpp:577
unsigned java_method_parameter_slots(const java_method_typet &t)
Returns the the number of JVM local variables (slots) used by the JVM to pass, upon call,...
Definition: java_utils.cpp:146
irep_idt strip_java_namespace_prefix(const irep_idt &to_strip)
Strip java:: prefix from given identifier.
Definition: java_utils.cpp:410
dereference_exprt checked_dereference(const exprt &expr)
Dereference an expression and flag it for a null-pointer check.
Definition: java_utils.cpp:284
unsigned java_local_variable_slots(const typet &t)
Returns the number of JVM local variables (slots) taken by a local variable that, when translated to ...
Definition: java_utils.cpp:127
void merge_source_location_rec(exprt &expr, const source_locationt &source_location)
Attaches a source location to an expression and all of its subexpressions.
Definition: java_utils.cpp:200
irep_idt lambda_synthetic_class_name(const irep_idt &method_identifier, std::size_t instruction_address)
static symbolt constructor_symbol(synthetic_methods_mapt &synthetic_methods, const irep_idt &synthetic_class_name, java_method_typet constructor_type)
Java lambda code synthesis.
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:57
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:106
BigInt mp_integer
Definition: mp_arith.h:19
nonstd::optional< T > optionalt
Definition: optional.h:35
Pattern matching for bytecode instructions.
Remove function exceptional returns.
Replace function returns by assignments to global variables.
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:496
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:511
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:424
static const char * message(const statust &status)
Makes a status message string from a status.
const code_gotot & to_code_goto(const codet &code)
Definition: std_code.h:1193
const code_labelt & to_code_label(const codet &code)
Definition: std_code.h:1452
const codet & to_code(const exprt &expr)
Definition: std_code.h:155
const code_blockt & to_code_block(const codet &code)
Definition: std_code.h:256
API to expression classes.
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:2701
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:190
bool can_cast_type< reference_typet >(const typet &type)
Check whether a reference to a typet is a reference_typet.
Definition: std_types.h:1586
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
Definition: std_types.h:1096
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:303
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:949
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1533
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:523
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
Definition: std_types.h:1206
bool can_cast_type< bitvector_typet >(const typet &type)
Check whether a reference to a typet is a bitvector_typet.
Definition: std_types.h:1071
void replace(const union_find_replacet &replace_map, string_not_contains_constraintt &constraint)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
const char * mnemonic
Definition: bytecode_info.h:46
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.
std::vector< local_variablet > local_variable_tablet
static optionalt< annotationt > find_annotation(const annotationst &annotations, const irep_idt &annotation_type_name)
Find an annotation given its name.
Over-approximative uncaught exceptions analysis.