Skip to content

Commit 3d41879

Browse files
authored
Merge pull request #1077 from owen-jones-diffblue/feature/parseInt#613
Feature/parse int#613
2 parents 2a7e25c + bdb1a3c commit 3d41879

File tree

18 files changed

+358
-42
lines changed

18 files changed

+358
-42
lines changed

regression/strings-smoke-tests/java_parseint/test.desc

+2-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,5 @@ test_parseint.class
33
--refine-strings
44
^EXIT=10$
55
^SIGNAL=0$
6-
^\[.*assertion.1\].* line 7.* SUCCESS$
7-
^\[.*assertion.2\].* line 8.* FAILURE$
8-
--
6+
^\[.*assertion.1\].* line 8.* SUCCESS$
7+
^\[.*assertion.2\].* line 9.* FAILURE$
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
public class test_parseint
22
{
3-
public static void main(String[] argv)
4-
{
5-
String twelve = new String("12");
6-
int parsed = Integer.parseInt(twelve);
7-
assert(parsed == 12);
8-
assert(parsed != 12);
9-
}
3+
public static void main(String[] args)
4+
{
5+
if (args.length == 1) {
6+
String twelve = new String("12");
7+
int parsed1 = Integer.parseInt(twelve);
8+
assert(parsed1 == 12);
9+
assert(parsed1 != 12);
10+
}
11+
}
1012
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
KNOWNBUG
2+
test_parseint.class
3+
--refine-strings
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
^\[.*assertion.1\].* line 9.* SUCCESS$
7+
^\[.*assertion.2\].* line 10.* FAILURE$
8+
^\[.*assertion.3\].* line 17.* SUCCESS$
9+
^\[.*assertion.4\].* line 18.* FAILURE$
10+
--
11+
--
12+
Issue #664 is about turning these tests on
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
public class test_parseint
2+
{
3+
public static void main(String[] args)
4+
{
5+
if (args.length == 2) {
6+
// 2^31-1, max value of Integer
7+
String max_int = new String("2147483647");
8+
int parsed2 = Integer.parseInt(max_int);
9+
assert(parsed2 == 2147483647);
10+
assert(parsed2 != 2147483647);
11+
}
12+
else if (args.length == 3) {
13+
// -2^31, min value of Integer, and longest string we could have without
14+
// leading zeroes
15+
String min_int = new String("-2147483648");
16+
int parsed3 = Integer.parseInt(min_int);
17+
assert(parsed3 == -2147483648);
18+
assert(parsed3 != -2147483648);
19+
}
20+
}
21+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
CORE
2+
test_parseint_with_radix.class
3+
--refine-strings
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
^\[.*assertion.1\].* line 8.* SUCCESS$
7+
^\[.*assertion.2\].* line 9.* FAILURE$
8+
^\[.*assertion.3\].* line 14.* SUCCESS$
9+
^\[.*assertion.4\].* line 15.* FAILURE$
10+
^\[.*assertion.5\].* line 20.* SUCCESS$
11+
^\[.*assertion.6\].* line 21.* FAILURE$
12+
^\[.*assertion.7\].* line 26.* SUCCESS$
13+
^\[.*assertion.8\].* line 27.* FAILURE$
14+
^\[.*assertion.9\].* line 32.* SUCCESS$
15+
^\[.*assertion.10\].* line 33.* FAILURE$
16+
--
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
public class test_parseint_with_radix
2+
{
3+
public static void main(String[] args)
4+
{
5+
if (args.length == 1) {
6+
String str1 = new String("F");
7+
int parsed1 = Integer.parseInt(str1, 16);
8+
assert(parsed1 == 15);
9+
assert(parsed1 != 15);
10+
}
11+
else if (args.length == 2) {
12+
String str2 = new String("123");
13+
int parsed2 = Integer.parseInt(str2, 10);
14+
assert(parsed2 == 123);
15+
assert(parsed2 != 123);
16+
}
17+
else if (args.length == 3) {
18+
String str3 = new String("77");
19+
int parsed3 = Integer.parseInt(str3, 8);
20+
assert(parsed3 == 63);
21+
assert(parsed3 != 63);
22+
}
23+
else if (args.length == 4) {
24+
String str4 = new String("-101");
25+
int parsed4 = Integer.parseInt(str4, 2);
26+
assert(parsed4 == -5);
27+
assert(parsed4 != -5);
28+
}
29+
else if (args.length == 5) {
30+
String str5 = new String("00aB");
31+
int parsed5 = Integer.parseInt(str5, 16);
32+
assert(parsed5 == 171);
33+
assert(parsed5 != 171);
34+
}
35+
}
36+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
KNOWNBUG
2+
test_parseint_with_radix.class
3+
--refine-strings
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
^\[.*assertion.1\].* line 9.* SUCCESS$
7+
^\[.*assertion.2\].* line 10.* FAILURE$
8+
^\[.*assertion.3\].* line 16.* SUCCESS$
9+
^\[.*assertion.4\].* line 17.* FAILURE$
10+
--
11+
--
12+
Issue #664 is about turning these tests on
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
public class test_parseint_with_radix
2+
{
3+
public static void main(String[] args)
4+
{
5+
if (args.length == 1) {
6+
// 2^31-1, max value of Integer
7+
String str1 = new String("7FFFFFFF");
8+
int parsed1 = Integer.parseInt(str1, 16);
9+
assert(parsed1 == 2147483647);
10+
assert(parsed1 != 2147483647);
11+
}
12+
else if (args.length == 2) {
13+
// -2^31, min value of Integer, and longest string we could have
14+
String str2 = new String("-100000000000000000000000000000000");
15+
int parsed2 = Integer.parseInt(str2, 2);
16+
assert(parsed2 == -2147483648);
17+
assert(parsed2 != -2147483648);
18+
}
19+
}
20+
}

src/java_bytecode/java_string_library_preprocess.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -1954,6 +1954,9 @@ void java_string_library_preprocesst::initialize_conversion_table()
19541954
cprover_equivalent_to_java_function
19551955
["java::java.lang.Integer.parseInt:(Ljava/lang/String;)I"]=
19561956
ID_cprover_string_parse_int_func;
1957+
cprover_equivalent_to_java_function
1958+
["java::java.lang.Integer.parseInt:(Ljava/lang/String;I)I"]=
1959+
ID_cprover_string_parse_int_func;
19571960
cprover_equivalent_to_java_string_returning_function
19581961
["java::java.lang.Integer.toHexString:(I)Ljava/lang/String;"]=
19591962
ID_cprover_string_of_int_hex_func;

src/solvers/refinement/string_constraint_generator.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ class string_constraint_generatort
294294

295295
exprt add_axioms_for_parse_int(const function_application_exprt &f);
296296
exprt add_axioms_for_correct_number_format(
297-
const string_exprt &str, std::size_t max_size=10);
297+
const string_exprt &str, const exprt &radix, std::size_t max_size=10);
298298
exprt add_axioms_for_to_char_array(const function_application_exprt &f);
299299
exprt add_axioms_for_compare_to(const function_application_exprt &f);
300300

@@ -328,4 +328,8 @@ class string_constraint_generatort
328328
bool is_constant_string(const string_exprt &expr) const;
329329
};
330330

331+
exprt is_digit_with_radix(exprt chr, exprt radix);
332+
exprt get_numeric_value_from_character(
333+
const exprt &chr, const typet &char_type, const typet &type);
334+
331335
#endif

src/solvers/refinement/string_constraint_generator_valueof.cpp

+102-29
Original file line numberDiff line numberDiff line change
@@ -357,11 +357,14 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of(
357357
}
358358

359359
/// Add axioms making the return value true if the given string is a correct
360-
/// number.
361-
/// \param f: function application with one string expression
362-
/// \return an boolean expression
360+
/// number in the given radix
361+
/// \param str: string expression
362+
/// \param radix: the radix
363+
/// \param max_size: maximum number of characters
364+
/// \return a boolean expression saying whether the string does represent a
365+
/// number with the given radix
363366
exprt string_constraint_generatort::add_axioms_for_correct_number_format(
364-
const string_exprt &str, std::size_t max_size)
367+
const string_exprt &str, const exprt &radix, std::size_t max_size)
365368
{
366369
symbol_exprt correct=fresh_boolean("correct_number_format");
367370
const refined_string_typet &ref_type=to_refined_string_type(str.type());
@@ -375,16 +378,14 @@ exprt string_constraint_generatort::add_axioms_for_correct_number_format(
375378
exprt chr=str[0];
376379
equal_exprt starts_with_minus(chr, minus_char);
377380
equal_exprt starts_with_plus(chr, plus_char);
378-
and_exprt starts_with_digit(
379-
binary_relation_exprt(chr, ID_ge, zero_char),
380-
binary_relation_exprt(chr, ID_le, nine_char));
381+
exprt starts_with_digit=is_digit_with_radix(chr, radix);
381382

382383
// TODO: we should have implications in the other direction for correct
383384
// correct => |str| > 0
384385
exprt non_empty=str.axiom_for_is_longer_than(from_integer(1, index_type));
385386
axioms.push_back(implies_exprt(correct, non_empty));
386387

387-
// correct => (str[0] = '+' or '-' || '0' <= str[0] <= '9')
388+
// correct => (str[0] = '+' or '-' || is_digit_with_radix(str[0], radix))
388389
or_exprt correct_first(
389390
or_exprt(starts_with_minus, starts_with_plus), starts_with_digit);
390391
axioms.push_back(implies_exprt(correct, correct_first));
@@ -399,11 +400,9 @@ exprt string_constraint_generatort::add_axioms_for_correct_number_format(
399400
axioms.push_back(
400401
implies_exprt(correct, str.axiom_for_is_shorter_than(max_size)));
401402

402-
// forall 1 <= qvar < |str| . correct => '0'<= str[qvar] <= '9'
403+
// forall 1 <= qvar < |str| . correct => is_digit_with_radix(str[qvar], radix)
403404
symbol_exprt qvar=fresh_univ_index("number_format", index_type);
404-
and_exprt is_digit(
405-
binary_relation_exprt(str[qvar], ID_ge, zero_char),
406-
binary_relation_exprt(str[qvar], ID_le, nine_char));
405+
exprt is_digit=is_digit_with_radix(str[qvar], radix);
407406
string_constraintt all_digits(
408407
qvar, from_integer(1, index_type), str.length(), correct, is_digit);
409408
axioms.push_back(all_digits);
@@ -412,58 +411,66 @@ exprt string_constraint_generatort::add_axioms_for_correct_number_format(
412411
}
413412

414413
/// add axioms corresponding to the Integer.parseInt java function
415-
/// \param f: function application with one string expression
414+
/// \param f: a function application with either one string expression or one
415+
/// string expression and an expression for the radix
416416
/// \return an integer expression
417417
exprt string_constraint_generatort::add_axioms_for_parse_int(
418418
const function_application_exprt &f)
419419
{
420-
string_exprt str=get_string_expr(args(f, 1)[0]);
420+
PRECONDITION(f.arguments().size()==1 || f.arguments().size()==2);
421+
string_exprt str=get_string_expr(f.arguments()[0]);
422+
const exprt radix=
423+
f.arguments().size()==1?from_integer(10, f.type()):f.arguments()[1];
424+
421425
const typet &type=f.type();
422426
symbol_exprt i=fresh_symbol("parsed_int", type);
423427
const refined_string_typet &ref_type=to_refined_string_type(str.type());
424428
const typet &char_type=ref_type.get_char_type();
425-
exprt zero_char=constant_char('0', char_type);
426429
exprt minus_char=constant_char('-', char_type);
427430
exprt plus_char=constant_char('+', char_type);
428431
assert(type.id()==ID_signedbv);
429-
exprt ten=from_integer(10, type);
430432

431433
exprt chr=str[0];
432434
exprt starts_with_minus=equal_exprt(chr, minus_char);
433435
exprt starts_with_plus=equal_exprt(chr, plus_char);
434-
exprt starts_with_digit=binary_relation_exprt(chr, ID_ge, zero_char);
436+
exprt starts_with_digit=
437+
not_exprt(or_exprt(starts_with_minus, starts_with_plus));
435438

436-
// TODO: we should throw an exception when this does not hold:
437-
exprt correct=add_axioms_for_correct_number_format(str);
439+
/// TODO: we should throw an exception when this does not hold:
440+
exprt correct=add_axioms_for_correct_number_format(str, radix);
438441
axioms.push_back(correct);
439442

443+
/// TODO(OJones): size should depend on the radix
444+
/// TODO(OJones): we should deal with overflow properly
440445
for(unsigned size=1; size<=10; size++)
441446
{
442447
exprt sum=from_integer(0, type);
443-
exprt first_value=typecast_exprt(minus_exprt(chr, zero_char), type);
448+
exprt first_value=get_numeric_value_from_character(chr, char_type, type);
449+
equal_exprt premise=str.axiom_for_has_length(size);
444450

445451
for(unsigned j=1; j<size; j++)
446452
{
447-
mult_exprt ten_sum(sum, ten);
453+
mult_exprt radix_sum(sum, radix);
448454
if(j>=9)
449455
{
450456
// We have to be careful about overflows
451-
div_exprt div(sum, ten);
452-
equal_exprt no_overflow(div, sum);
457+
div_exprt div(sum, radix);
458+
implies_exprt no_overflow(premise, (equal_exprt(div, sum)));
453459
axioms.push_back(no_overflow);
454460
}
455461

456462
sum=plus_exprt_with_overflow_check(
457-
ten_sum,
458-
typecast_exprt(minus_exprt(str[j], zero_char), type));
463+
radix_sum,
464+
get_numeric_value_from_character(str[j], char_type, type));
459465

460-
mult_exprt first(first_value, ten);
466+
mult_exprt first(first_value, radix);
461467
if(j>=9)
462468
{
463469
// We have to be careful about overflows
464-
div_exprt div_first(first, ten);
470+
div_exprt div_first(first, radix);
465471
implies_exprt no_overflow_first(
466-
starts_with_digit, equal_exprt(div_first, first_value));
472+
and_exprt(starts_with_digit, premise),
473+
equal_exprt(div_first, first_value));
467474
axioms.push_back(no_overflow_first);
468475
}
469476
first_value=first;
@@ -474,7 +481,6 @@ exprt string_constraint_generatort::add_axioms_for_parse_int(
474481
// a2 : starts_with_plus => i=sum
475482
// a3 : starts_with_minus => i=-sum
476483

477-
equal_exprt premise=str.axiom_for_has_length(size);
478484
implies_exprt a1(
479485
and_exprt(premise, starts_with_digit),
480486
equal_exprt(i, plus_exprt(sum, first_value)));
@@ -490,3 +496,70 @@ exprt string_constraint_generatort::add_axioms_for_parse_int(
490496
}
491497
return i;
492498
}
499+
500+
/// Check if a character is a digit with respect to the given radix, e.g. if the
501+
/// radix is 10 then check if the character is in the range 0-9.
502+
/// \param chr: the character
503+
/// \param radix: the radix
504+
/// \return an expression for the condition
505+
exprt is_digit_with_radix(exprt chr, exprt radix)
506+
{
507+
const typet &char_type=chr.type();
508+
exprt zero_char=from_integer('0', char_type);
509+
exprt nine_char=from_integer('9', char_type);
510+
exprt a_char=from_integer('a', char_type);
511+
exprt A_char=from_integer('A', char_type);
512+
513+
and_exprt is_digit_when_radix_le_10(
514+
binary_relation_exprt(chr, ID_ge, zero_char),
515+
binary_relation_exprt(
516+
chr, ID_lt, plus_exprt(zero_char, typecast_exprt(radix, char_type))));
517+
518+
minus_exprt radix_minus_ten(
519+
typecast_exprt(radix, char_type), from_integer(10, char_type));
520+
521+
or_exprt is_digit_when_radix_gt_10(
522+
and_exprt(
523+
binary_relation_exprt(chr, ID_ge, zero_char),
524+
binary_relation_exprt(chr, ID_le, nine_char)),
525+
and_exprt(
526+
binary_relation_exprt(chr, ID_ge, a_char),
527+
binary_relation_exprt(chr, ID_lt, plus_exprt(a_char, radix_minus_ten))),
528+
and_exprt(
529+
binary_relation_exprt(chr, ID_ge, A_char),
530+
binary_relation_exprt(chr, ID_lt, plus_exprt(A_char, radix_minus_ten))));
531+
532+
return if_exprt(
533+
binary_relation_exprt(radix, ID_le, from_integer(10, radix.type())),
534+
is_digit_when_radix_le_10,
535+
is_digit_when_radix_gt_10);
536+
}
537+
538+
/// Get the numeric value of a character, assuming that the radix is large
539+
/// enough
540+
/// \param chr: the character to get the numeric value of
541+
/// \param char_type: the type to use for characters
542+
/// \param type: the type to use for the return value
543+
/// \return an integer expression of the given type with the numeric value of
544+
/// the char
545+
exprt get_numeric_value_from_character(
546+
const exprt &chr, const typet &char_type, const typet &type)
547+
{
548+
constant_exprt zero_char=from_integer('0', char_type);
549+
constant_exprt a_char=from_integer('a', char_type);
550+
constant_exprt A_char=from_integer('A', char_type);
551+
constant_exprt ten_int=from_integer(10, char_type);
552+
553+
binary_relation_exprt upper_case(chr, ID_ge, A_char);
554+
binary_relation_exprt lower_case(chr, ID_ge, a_char);
555+
556+
return typecast_exprt(
557+
if_exprt(
558+
lower_case,
559+
plus_exprt(minus_exprt(chr, a_char), ten_int),
560+
if_exprt(
561+
upper_case,
562+
plus_exprt(minus_exprt(chr, A_char), ten_int),
563+
minus_exprt(chr, zero_char))),
564+
type);
565+
}

0 commit comments

Comments
 (0)