Skip to content

Feature/parse int#613 #1077

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions regression/strings-smoke-tests/java_parseint/test.desc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ test_parseint.class
--refine-strings
^EXIT=10$
^SIGNAL=0$
^\[.*assertion.1\].* line 7.* SUCCESS$
^\[.*assertion.2\].* line 8.* FAILURE$
--
^\[.*assertion.1\].* line 8.* SUCCESS$
^\[.*assertion.2\].* line 9.* FAILURE$
Binary file modified regression/strings-smoke-tests/java_parseint/test_parseint.class
Binary file not shown.
16 changes: 9 additions & 7 deletions regression/strings-smoke-tests/java_parseint/test_parseint.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
public class test_parseint
{
public static void main(String[] argv)
{
String twelve = new String("12");
int parsed = Integer.parseInt(twelve);
assert(parsed == 12);
assert(parsed != 12);
}
public static void main(String[] args)
{
if (args.length == 1) {
String twelve = new String("12");
int parsed1 = Integer.parseInt(twelve);
assert(parsed1 == 12);
assert(parsed1 != 12);
}
}
}
12 changes: 12 additions & 0 deletions regression/strings-smoke-tests/java_parseint_knownbug/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
KNOWNBUG
test_parseint.class
--refine-strings
^EXIT=10$
^SIGNAL=0$
^\[.*assertion.1\].* line 9.* SUCCESS$
^\[.*assertion.2\].* line 10.* FAILURE$
^\[.*assertion.3\].* line 17.* SUCCESS$
^\[.*assertion.4\].* line 18.* FAILURE$
--
--
Issue #664 is about turning these tests on
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
public class test_parseint
{
public static void main(String[] args)
{
if (args.length == 2) {
// 2^31-1, max value of Integer
String max_int = new String("2147483647");
int parsed2 = Integer.parseInt(max_int);
assert(parsed2 == 2147483647);
assert(parsed2 != 2147483647);
}
else if (args.length == 3) {
// -2^31, min value of Integer, and longest string we could have without
// leading zeroes
String min_int = new String("-2147483648");
int parsed3 = Integer.parseInt(min_int);
assert(parsed3 == -2147483648);
assert(parsed3 != -2147483648);
}
}
}
16 changes: 16 additions & 0 deletions regression/strings-smoke-tests/java_parseint_with_radix/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
CORE
test_parseint_with_radix.class
--refine-strings
^EXIT=10$
^SIGNAL=0$
^\[.*assertion.1\].* line 8.* SUCCESS$
^\[.*assertion.2\].* line 9.* FAILURE$
^\[.*assertion.3\].* line 14.* SUCCESS$
^\[.*assertion.4\].* line 15.* FAILURE$
^\[.*assertion.5\].* line 20.* SUCCESS$
^\[.*assertion.6\].* line 21.* FAILURE$
^\[.*assertion.7\].* line 26.* SUCCESS$
^\[.*assertion.8\].* line 27.* FAILURE$
^\[.*assertion.9\].* line 32.* SUCCESS$
^\[.*assertion.10\].* line 33.* FAILURE$
--
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
public class test_parseint_with_radix
{
public static void main(String[] args)
{
if (args.length == 1) {
String str1 = new String("F");
int parsed1 = Integer.parseInt(str1, 16);
assert(parsed1 == 15);
assert(parsed1 != 15);
}
else if (args.length == 2) {
String str2 = new String("123");
int parsed2 = Integer.parseInt(str2, 10);
assert(parsed2 == 123);
assert(parsed2 != 123);
}
else if (args.length == 3) {
String str3 = new String("77");
int parsed3 = Integer.parseInt(str3, 8);
assert(parsed3 == 63);
assert(parsed3 != 63);
}
else if (args.length == 4) {
String str4 = new String("-101");
int parsed4 = Integer.parseInt(str4, 2);
assert(parsed4 == -5);
assert(parsed4 != -5);
}
else if (args.length == 5) {
String str5 = new String("00aB");
int parsed5 = Integer.parseInt(str5, 16);
assert(parsed5 == 171);
assert(parsed5 != 171);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
KNOWNBUG
test_parseint_with_radix.class
--refine-strings
^EXIT=10$
^SIGNAL=0$
^\[.*assertion.1\].* line 9.* SUCCESS$
^\[.*assertion.2\].* line 10.* FAILURE$
^\[.*assertion.3\].* line 16.* SUCCESS$
^\[.*assertion.4\].* line 17.* FAILURE$
--
--
Issue #664 is about turning these tests on
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
public class test_parseint_with_radix
{
public static void main(String[] args)
{
if (args.length == 1) {
// 2^31-1, max value of Integer
String str1 = new String("7FFFFFFF");
int parsed1 = Integer.parseInt(str1, 16);
assert(parsed1 == 2147483647);
assert(parsed1 != 2147483647);
}
else if (args.length == 2) {
// -2^31, min value of Integer, and longest string we could have
String str2 = new String("-100000000000000000000000000000000");
int parsed2 = Integer.parseInt(str2, 2);
assert(parsed2 == -2147483648);
assert(parsed2 != -2147483648);
}
}
}
3 changes: 3 additions & 0 deletions src/java_bytecode/java_string_library_preprocess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,9 @@ void java_string_library_preprocesst::initialize_conversion_table()
cprover_equivalent_to_java_function
["java::java.lang.Integer.parseInt:(Ljava/lang/String;)I"]=
ID_cprover_string_parse_int_func;
cprover_equivalent_to_java_function
["java::java.lang.Integer.parseInt:(Ljava/lang/String;I)I"]=
ID_cprover_string_parse_int_func;
cprover_equivalent_to_java_string_returning_function
["java::java.lang.Integer.toHexString:(I)Ljava/lang/String;"]=
ID_cprover_string_of_int_hex_func;
Expand Down
6 changes: 5 additions & 1 deletion src/solvers/refinement/string_constraint_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ class string_constraint_generatort

exprt add_axioms_for_parse_int(const function_application_exprt &f);
exprt add_axioms_for_correct_number_format(
const string_exprt &str, std::size_t max_size=10);
const string_exprt &str, const exprt &radix, std::size_t max_size=10);
exprt add_axioms_for_to_char_array(const function_application_exprt &f);
exprt add_axioms_for_compare_to(const function_application_exprt &f);

Expand Down Expand Up @@ -328,4 +328,8 @@ class string_constraint_generatort
bool is_constant_string(const string_exprt &expr) const;
};

exprt is_digit_with_radix(exprt chr, exprt radix);
exprt get_numeric_value_from_character(
const exprt &chr, const typet &char_type, const typet &type);

#endif
131 changes: 102 additions & 29 deletions src/solvers/refinement/string_constraint_generator_valueof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,11 +357,14 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of(
}

/// Add axioms making the return value true if the given string is a correct
/// number.
/// \param f: function application with one string expression
/// \return an boolean expression
/// number in the given radix
/// \param str: string expression
/// \param radix: the radix
/// \param max_size: maximum number of characters
/// \return a boolean expression saying whether the string does represent a
/// number with the given radix
exprt string_constraint_generatort::add_axioms_for_correct_number_format(
const string_exprt &str, std::size_t max_size)
const string_exprt &str, const exprt &radix, std::size_t max_size)
{
symbol_exprt correct=fresh_boolean("correct_number_format");
const refined_string_typet &ref_type=to_refined_string_type(str.type());
Expand All @@ -375,16 +378,14 @@ exprt string_constraint_generatort::add_axioms_for_correct_number_format(
exprt chr=str[0];
equal_exprt starts_with_minus(chr, minus_char);
equal_exprt starts_with_plus(chr, plus_char);
and_exprt starts_with_digit(
binary_relation_exprt(chr, ID_ge, zero_char),
binary_relation_exprt(chr, ID_le, nine_char));
exprt starts_with_digit=is_digit_with_radix(chr, radix);

// TODO: we should have implications in the other direction for correct
// correct => |str| > 0
exprt non_empty=str.axiom_for_is_longer_than(from_integer(1, index_type));
axioms.push_back(implies_exprt(correct, non_empty));

// correct => (str[0] = '+' or '-' || '0' <= str[0] <= '9')
// correct => (str[0] = '+' or '-' || is_digit_with_radix(str[0], radix))
or_exprt correct_first(
or_exprt(starts_with_minus, starts_with_plus), starts_with_digit);
axioms.push_back(implies_exprt(correct, correct_first));
Expand All @@ -399,11 +400,9 @@ exprt string_constraint_generatort::add_axioms_for_correct_number_format(
axioms.push_back(
implies_exprt(correct, str.axiom_for_is_shorter_than(max_size)));

// forall 1 <= qvar < |str| . correct => '0'<= str[qvar] <= '9'
// forall 1 <= qvar < |str| . correct => is_digit_with_radix(str[qvar], radix)
symbol_exprt qvar=fresh_univ_index("number_format", index_type);
and_exprt is_digit(
binary_relation_exprt(str[qvar], ID_ge, zero_char),
binary_relation_exprt(str[qvar], ID_le, nine_char));
exprt is_digit=is_digit_with_radix(str[qvar], radix);
string_constraintt all_digits(
qvar, from_integer(1, index_type), str.length(), correct, is_digit);
axioms.push_back(all_digits);
Expand All @@ -412,58 +411,66 @@ exprt string_constraint_generatort::add_axioms_for_correct_number_format(
}

/// add axioms corresponding to the Integer.parseInt java function
/// \param f: function application with one string expression
/// \param f: a function application with either one string expression or one
/// string expression and an expression for the radix
/// \return an integer expression
exprt string_constraint_generatort::add_axioms_for_parse_int(
const function_application_exprt &f)
{
string_exprt str=get_string_expr(args(f, 1)[0]);
PRECONDITION(f.arguments().size()==1 || f.arguments().size()==2);
string_exprt str=get_string_expr(f.arguments()[0]);
const exprt radix=
f.arguments().size()==1?from_integer(10, f.type()):f.arguments()[1];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the ternary conditional is the one blessed operator we're permitted spaces around

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see anything indicating that in the coding standard. I had a quick look in the codebase and some sections use spaces and some don't. @tautschnig seems to be the authority on formatting so I found a PR of mine that he reviewed with a ternary operator in it and he didn't disapprove of having no spaces: https://github.com/diffblue/cbmc/pull/767/files#diff-ffd0a317958e24d1f65f21ed40f67c99R55

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear: I'd like to use spaces, but if we are allowed to then it should be in the coding standard, so we end getting more consistent.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somewhat similarly to @smowton I inferred that we should use spaces with ternary from the passage that says that we should use spaces around logical operators (I see ternary as logical operator). In the code that @owen-jones-diffblue linked there is a line break after ? so it is not that bad.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It lists the three operators that you can have spaces around (&&, || and <<)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two of them are logical, would you also say that we are not permitted to use spaces around >> because it is not on the list?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe you are meant to use spaces around << when used for streams, and not use spaces around << or >> when used for bit shifting

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see.
That was my interpretation, too.
Thank you for the clarification.


const typet &type=f.type();
symbol_exprt i=fresh_symbol("parsed_int", type);
const refined_string_typet &ref_type=to_refined_string_type(str.type());
const typet &char_type=ref_type.get_char_type();
exprt zero_char=constant_char('0', char_type);
exprt minus_char=constant_char('-', char_type);
exprt plus_char=constant_char('+', char_type);
assert(type.id()==ID_signedbv);
exprt ten=from_integer(10, type);

exprt chr=str[0];
exprt starts_with_minus=equal_exprt(chr, minus_char);
exprt starts_with_plus=equal_exprt(chr, plus_char);
exprt starts_with_digit=binary_relation_exprt(chr, ID_ge, zero_char);
exprt starts_with_digit=
not_exprt(or_exprt(starts_with_minus, starts_with_plus));

// TODO: we should throw an exception when this does not hold:
exprt correct=add_axioms_for_correct_number_format(str);
/// TODO: we should throw an exception when this does not hold:
exprt correct=add_axioms_for_correct_number_format(str, radix);
axioms.push_back(correct);

/// TODO(OJones): size should depend on the radix
/// TODO(OJones): we should deal with overflow properly
for(unsigned size=1; size<=10; size++)
{
exprt sum=from_integer(0, type);
exprt first_value=typecast_exprt(minus_exprt(chr, zero_char), type);
exprt first_value=get_numeric_value_from_character(chr, char_type, type);
equal_exprt premise=str.axiom_for_has_length(size);

for(unsigned j=1; j<size; j++)
{
mult_exprt ten_sum(sum, ten);
mult_exprt radix_sum(sum, radix);
if(j>=9)
{
// We have to be careful about overflows
div_exprt div(sum, ten);
equal_exprt no_overflow(div, sum);
div_exprt div(sum, radix);
implies_exprt no_overflow(premise, (equal_exprt(div, sum)));
axioms.push_back(no_overflow);
}

sum=plus_exprt_with_overflow_check(
ten_sum,
typecast_exprt(minus_exprt(str[j], zero_char), type));
radix_sum,
get_numeric_value_from_character(str[j], char_type, type));

mult_exprt first(first_value, ten);
mult_exprt first(first_value, radix);
if(j>=9)
{
// We have to be careful about overflows
div_exprt div_first(first, ten);
div_exprt div_first(first, radix);
implies_exprt no_overflow_first(
starts_with_digit, equal_exprt(div_first, first_value));
and_exprt(starts_with_digit, premise),
equal_exprt(div_first, first_value));
axioms.push_back(no_overflow_first);
}
first_value=first;
Expand All @@ -474,7 +481,6 @@ exprt string_constraint_generatort::add_axioms_for_parse_int(
// a2 : starts_with_plus => i=sum
// a3 : starts_with_minus => i=-sum

equal_exprt premise=str.axiom_for_has_length(size);
implies_exprt a1(
and_exprt(premise, starts_with_digit),
equal_exprt(i, plus_exprt(sum, first_value)));
Expand All @@ -490,3 +496,70 @@ exprt string_constraint_generatort::add_axioms_for_parse_int(
}
return i;
}

/// Check if a character is a digit with respect to the given radix, e.g. if the
/// radix is 10 then check if the character is in the range 0-9.
/// \param chr: the character
/// \param radix: the radix
/// \return an expression for the condition
exprt is_digit_with_radix(exprt chr, exprt radix)
{
const typet &char_type=chr.type();
exprt zero_char=from_integer('0', char_type);
exprt nine_char=from_integer('9', char_type);
exprt a_char=from_integer('a', char_type);
exprt A_char=from_integer('A', char_type);

and_exprt is_digit_when_radix_le_10(
binary_relation_exprt(chr, ID_ge, zero_char),
binary_relation_exprt(
chr, ID_lt, plus_exprt(zero_char, typecast_exprt(radix, char_type))));

minus_exprt radix_minus_ten(
typecast_exprt(radix, char_type), from_integer(10, char_type));

or_exprt is_digit_when_radix_gt_10(
and_exprt(
binary_relation_exprt(chr, ID_ge, zero_char),
binary_relation_exprt(chr, ID_le, nine_char)),
and_exprt(
binary_relation_exprt(chr, ID_ge, a_char),
binary_relation_exprt(chr, ID_lt, plus_exprt(a_char, radix_minus_ten))),
and_exprt(
binary_relation_exprt(chr, ID_ge, A_char),
binary_relation_exprt(chr, ID_lt, plus_exprt(A_char, radix_minus_ten))));

return if_exprt(
binary_relation_exprt(radix, ID_le, from_integer(10, radix.type())),
is_digit_when_radix_le_10,
is_digit_when_radix_gt_10);
}

/// Get the numeric value of a character, assuming that the radix is large
/// enough
/// \param chr: the character to get the numeric value of
/// \param char_type: the type to use for characters
/// \param type: the type to use for the return value
/// \return an integer expression of the given type with the numeric value of
/// the char
exprt get_numeric_value_from_character(
const exprt &chr, const typet &char_type, const typet &type)
{
constant_exprt zero_char=from_integer('0', char_type);
constant_exprt a_char=from_integer('a', char_type);
constant_exprt A_char=from_integer('A', char_type);
constant_exprt ten_int=from_integer(10, char_type);

binary_relation_exprt upper_case(chr, ID_ge, A_char);
binary_relation_exprt lower_case(chr, ID_ge, a_char);

return typecast_exprt(
if_exprt(
lower_case,
plus_exprt(minus_exprt(chr, a_char), ten_int),
if_exprt(
upper_case,
plus_exprt(minus_exprt(chr, A_char), ten_int),
minus_exprt(chr, zero_char))),
type);
}
Loading