Skip to content

Resolving signature/descriptor missmatch #1489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/java_bytecode/java_bytecode_convert_class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ void java_bytecode_convert_classt::convert(const classt &c)
*class_symbol,
method_identifier,
method,
symbol_table);
symbol_table,
get_message_handler());
lazy_methods[method_identifier]=std::make_pair(class_symbol, &method);
}

Expand Down
75 changes: 73 additions & 2 deletions src/java_bytecode/java_bytecode_convert_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,69 @@ const exprt java_bytecode_convert_methodt::variable(
}
}

/// Returns the member type for a method, based on signature or descriptor
/// \param descriptor
/// descriptor of the method
/// \param signature
/// signature of the method
/// \param class_name
/// string containing the name of the corresponding class
/// \param method_name
/// string containing the name of the method
/// \param message_handler
/// message handler to collect warnings
/// \return
/// the constructed member type
typet member_type_lazy(const std::string &descriptor,
const optionalt<std::string> &signature,
const std::string &class_name,
const std::string &method_name,
message_handlert &message_handler)
{
// In order to construct the method type, we can either use signature or
// descriptor. Since only signature contains the generics info, we want to
// use signature whenever possible. We revert to using descriptor if (1) the
// signature does not exist, (2) an unsupported generics are present or
// (3) in the special case when the number of parameters in the descriptor
// does not match the number of parameters in the signature - e.g. for
// certain types of inner classes and enums (see unit tests for examples).

messaget message(message_handler);

typet member_type_from_descriptor=java_type_from_string(descriptor);
INVARIANT(member_type_from_descriptor.id()==ID_code, "Must be code type");
if(signature.has_value())
{
try
{
typet member_type_from_signature=java_type_from_string(
signature.value(),
class_name);
INVARIANT(member_type_from_signature.id()==ID_code, "Must be code type");
if(to_code_type(member_type_from_signature).parameters().size()==
to_code_type(member_type_from_descriptor).parameters().size())
{
return member_type_from_signature;
}
else
{
message.warning() << "method: " << class_name << "." << method_name
<< "\n signature: " << signature.value() << "\n descriptor: "
<< descriptor << "\n different number of parameters, reverting to "
"descriptor" << message.eom;
}
}
catch(unsupported_java_class_signature_exceptiont &e)
{
message.warning() << "method: " << class_name << "." << method_name
<< "\n could not parse signature: " << signature.value() << "\n "
<< e.what() << "\n" << " reverting to descriptor: "
<< descriptor << message.eom;
}
}
return member_type_from_descriptor;
}

/// This creates a method symbol in the symtab, but doesn't actually perform
/// method conversion just yet. The caller should call
/// java_bytecode_convert_method later to give the symbol/method a body.
Expand All @@ -256,14 +319,22 @@ const exprt java_bytecode_convert_methodt::variable(
/// (e.g. "x.y.z.f:(I)")
/// `m`: parsed method object to convert
/// `symbol_table`: global symbol table (will be modified)
/// `message_handler`: message handler to collect warnings
void java_bytecode_convert_method_lazy(
const symbolt &class_symbol,
const irep_idt &method_identifier,
const java_bytecode_parse_treet::methodt &m,
symbol_tablet &symbol_table)
symbol_tablet &symbol_table,
message_handlert &message_handler)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update the Doxygen comments for this function to reflect the extra parameter.

{
symbolt method_symbol;
typet member_type=java_type_from_string(m.descriptor);

typet member_type=member_type_lazy(
m.descriptor,
m.signature,
id2string(class_symbol.name),
id2string(m.base_name),
message_handler);

method_symbol.name=method_identifier;
method_symbol.base_name=m.base_name;
Expand Down
3 changes: 2 additions & 1 deletion src/java_bytecode/java_bytecode_convert_method.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ void java_bytecode_convert_method_lazy(
const symbolt &class_symbol,
const irep_idt &method_identifier,
const java_bytecode_parse_treet::methodt &,
symbol_tablet &symbol_table);
symbol_tablet &symbol_table,
message_handlert &);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Even though it's not strictly necessary, I tend to prefer naming the parameters in the declaration - e.g. message_handlert &message_handler - but I note that the java_bytecode_parse_treet::methodt parameter hasn't been named either (I wouldn't be against you doing a drive-by cleanup of that at the same time ;-) )

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I asked about the general guidelines for this recently and I was told that the names can be left out unless it's not clear from the type what the parameter represents. For example, from const irep_idt & it would not be clear that the parameter is the method identifier, thus the name is given. But for const java_bytecode_parse_treet::methodt & it's pretty clear what it is. Since the message handler does not have any 'hidden' role here, I would probably leave it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine by me then :-)


#endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_H
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
public abstract class AbstractGenericClass<T>
{

}
Binary file not shown.
Binary file not shown.
Binary file not shown.
17 changes: 17 additions & 0 deletions unit/java_bytecode/java_bytecode_parse_generics/Outer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
public class Outer<T>
{
private class Inner
{
private final AbstractGenericClass<T> u;

Inner (AbstractGenericClass<T> t)
{
this.u = t;
}
}

private enum InnerEnum
{

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <java_bytecode/java_bytecode_language.h>
#include <iostream>
#include <testing-utils/load_java_class.h>
#include <testing-utils/require_symbol.h>

SCENARIO(
"java_bytecode_parse_derived_generic_class",
Expand All @@ -33,12 +34,8 @@ SCENARIO(
REQUIRE(new_symbol_table.has_symbol(class_prefix));

const symbolt &derived_symbol=new_symbol_table.lookup_ref(class_prefix);
REQUIRE(derived_symbol.is_type);
const typet &derived_type=derived_symbol.type;
REQUIRE(derived_type.id()==ID_struct);
const class_typet &class_type=to_class_type(derived_type);
REQUIRE(class_type.is_class());
REQUIRE_FALSE(class_type.get_bool(ID_incomplete_class));
const class_typet &derived_class_type=
require_symbol::require_complete_class(derived_symbol);

// TODO(tkiley): Currently we do not support extracting information
// about the base classes generic information.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,57 +174,52 @@ SCENARIO(
.has_symbol("java::generics$bound_element.f:()Ljava/lang/Number;"));

// TODO: methods should have generic return type (the tests needs to be
// extended), reintroduce when the issue of signature/descriptor for methods is
// resolved
// THEN("The method should have generic return type")
// {
// const symbolt &method_symbol=
// new_symbol_table
// .lookup("java::generics$bound_element.f:()Ljava/lang/Number;")
// .value().get();
// const typet &symbol_type=method_symbol.type;
//
// REQUIRE(symbol_type.id()==ID_code);
//
// const code_typet &code=to_code_type(symbol_type);
// }
// extended)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Above comment can be removed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorrect - comment relates to a different issue

THEN("The method should have generic return type")
{
const symbolt &method_symbol=
new_symbol_table
.lookup_ref("java::generics$bound_element.f:()Ljava/lang/Number;");
const typet &symbol_type=method_symbol.type;

REQUIRE(symbol_type.id()==ID_code);

const code_typet &code=to_code_type(symbol_type);
}

REQUIRE(
new_symbol_table
.has_symbol("java::generics$bound_element.g:(Ljava/lang/Number;)V"));
THEN("The method should have a generic parameter.")
{
const symbolt &method_symbol=
new_symbol_table
.lookup_ref("java::generics$bound_element.g:(Ljava/lang/Number;)V");
const typet &symbol_type=method_symbol.type;

// TODO: methods are not recognized as generic, reintroduce when
// the issue of signature/descriptor for methods is resolved
// THEN("The method should have a generic parameter.")
// {
// const symbolt &method_symbol=
// new_symbol_table
// .lookup("java::generics$bound_element.g:(Ljava/lang/Number;)V");
// const typet &symbol_type=method_symbol.type;
//
// REQUIRE(symbol_type.id()==ID_code);
//
// const code_typet &code=to_code_type(symbol_type);
//
// bool found=false;
// for(const auto &p : code.parameters())
// {
// if(p.get_identifier()==
// "java::generics$bound_element.g:(Ljava/lang/Number;)V::e")
// {
// found=true;
// const typet &t=p.type();
// REQUIRE(is_java_generic_parameter(p.type()));
// const java_generic_parametert &gen_type=
// to_java_generic_parameter(p.type());
// const symbol_typet &type_var=gen_type.type_variable();
// REQUIRE(type_var.get_identifier()==
// "java::generics$bound_element::NUM");
// break;
// }
// }
// REQUIRE(found);
// }
REQUIRE(symbol_type.id()==ID_code);

const code_typet &code=to_code_type(symbol_type);

bool found=false;
for(const auto &p : code.parameters())
{
if(p.get_identifier()==
"java::generics$bound_element.g:(Ljava/lang/Number;)V::e")
{
found=true;
const typet &t=p.type();
REQUIRE(is_java_generic_parameter(p.type()));
const java_generic_parametert &gen_type=
to_java_generic_parameter(p.type());
const symbol_typet &type_var=gen_type.type_variable();
REQUIRE(type_var.get_identifier()==
"java::generics$bound_element::NUM");
break;
}
}
REQUIRE(found);
}
}
}
GIVEN("A class with multiple bounds")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*******************************************************************\

Module: Unit tests for parsing generic classes

Author: DiffBlue Limited. All rights reserved.

\*******************************************************************/

#include <testing-utils/catch.hpp>

#include <memory>

#include <util/config.h>
#include <util/language.h>
#include <java_bytecode/java_bytecode_language.h>
#include <iostream>
#include <testing-utils/load_java_class.h>
#include <testing-utils/require_symbol.h>

SCENARIO(
"java_bytecode_parse_signature_descriptor_mismatch",
"[core][java_bytecode][java_bytecode_parse_generics]")
{
const symbol_tablet &new_symbol_table=
load_java_class(
"Outer",
"./java_bytecode/java_bytecode_parse_generics");

const std::string class_prefix="java::Outer";
REQUIRE(new_symbol_table.has_symbol(class_prefix));

const std::string inner_prefix=class_prefix+"$Inner";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice :-) I should copy that pattern for my unit tests...

THEN("There is a (complete) symbol for the inner class Inner")
{
REQUIRE(new_symbol_table.has_symbol(inner_prefix));

const symbolt &inner_symbol=new_symbol_table.lookup_ref(inner_prefix);
const class_typet &inner_class_type=
require_symbol::require_complete_class(inner_symbol);
}

THEN("There is a symbol for the constructor of the inner class with correct"
" descriptor")
{
const std::string func_name=".<init>";
const std::string func_descriptor=":(LOuter;LAbstractGenericClass;)V";
const std::string process_func_name=
inner_prefix+func_name+func_descriptor;
REQUIRE(new_symbol_table.has_symbol(process_func_name));

const code_typet func_code=
to_code_type(new_symbol_table.lookup_ref(process_func_name).type);
REQUIRE(func_code.parameters().size()==3);
}

const std::string inner_enum_prefix=class_prefix+"$InnerEnum";
THEN("There is a (complete) symbol for the inner enum InnerEnum")
{
REQUIRE(new_symbol_table.has_symbol(inner_enum_prefix));

const symbolt &inner_enum_symbol=
new_symbol_table.lookup_ref(inner_enum_prefix);
const class_typet &inner_enum_class_type=
require_symbol::require_complete_class(inner_enum_symbol);
}

THEN("There is a symbol for the constructor of the inner enum with correct"
" number of parameters")
{
const std::string func_name=".<init>";
const std::string func_descriptor=":(Ljava/lang/String;I)V";
const std::string process_func_name=
inner_enum_prefix+func_name+func_descriptor;
REQUIRE(new_symbol_table.has_symbol(process_func_name));

const code_typet func_code=
to_code_type(new_symbol_table.lookup_ref(process_func_name).type);
REQUIRE(func_code.parameters().size()==3);
}
}
29 changes: 29 additions & 0 deletions unit/testing-utils/require_symbol.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*******************************************************************\

Module: Unit test utilities

Author: DiffBlue Limited. All rights reserved.

\*******************************************************************/

/// \file
/// Helper functions for requiring properties of symbols

#include "require_symbol.h"

#include <testing-utils/catch.hpp>

const class_typet &require_symbol::require_complete_class(
const symbolt &class_symbol)
{
REQUIRE(class_symbol.is_type);

const typet &class_symbol_type=class_symbol.type;
REQUIRE(class_symbol_type.id()==ID_struct);

const class_typet &class_class_type=to_class_type(class_symbol_type);
REQUIRE(class_class_type.is_class());
REQUIRE_FALSE(class_class_type.get_bool(ID_incomplete_class));

return class_class_type;
}
25 changes: 25 additions & 0 deletions unit/testing-utils/require_symbol.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*******************************************************************\

Module: Unit test utilities

Author: DiffBlue Limited. All rights reserved.

\*******************************************************************/

/// \file
/// Helper functions for requiring properties of symbols

#include <util/symbol.h>
#include <util/std_types.h>

#ifndef CPROVER_TESTING_UTILS_REQUIRE_SYMBOL_H
#define CPROVER_TESTING_UTILS_REQUIRE_SYMBOL_H

// NOLINTNEXTLINE(readability/namespace)
namespace require_symbol
{
const class_typet &require_complete_class(
const symbolt &class_symbol);
}

#endif // CPROVER_TESTING_UTILS_REQUIRE_SYMBOL_H