Skip to content

Commit c9b6c42

Browse files
Merge pull request diffblue#1513 from romainbrenguier/feature/input-string-printable
Input string printable TG-808
2 parents c4486f1 + d2c3752 commit c9b6c42

15 files changed

+170
-21
lines changed
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
public class Test {
2+
3+
public static String check(String t) {
4+
String s = t.concat("$£¥\u0000\u0008\u0010\u001F");
5+
// This should be disallowed because "\u0000" is not printable
6+
assert(!s.equals("\u0000$£¥\u0000\u0008\u0010\u001F"));
7+
// This can fail when t="a" which is printable
8+
assert(!s.equals("a$£¥\u0000\u0008\u0010\u001F"));
9+
return s;
10+
}
11+
12+
public static boolean check_char(String s, char c) {
13+
if(s == null)
14+
return false;
15+
// This should be -1 for all non-printable character
16+
int i = s.indexOf(c);
17+
boolean b = (i == -1 || (c >= ' ' && c <= '~'));
18+
assert(b);
19+
return b;
20+
}
21+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
Test.class
3+
--refine-strings --function Test.check --string-max-length 100 --string-printable --java-assume-inputs-non-null
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
assertion.* file Test.java line 6 .* SUCCESS
7+
assertion.* file Test.java line 8 .* FAILURE
8+
--
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CORE
2+
Test.class
3+
--refine-strings --function Test.check_char --string-max-length 100 --string-printable
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
assertion.* file Test.java line 18 .* SUCCESS
7+
--

src/cbmc/cbmc_solvers.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ std::unique_ptr<cbmc_solverst::solvert> cbmc_solverst::get_string_refinement()
180180
info.string_max_length=options.get_signed_int_option("string-max-length");
181181
info.string_non_empty=options.get_bool_option("string-non-empty");
182182
info.trace=options.get_bool_option("trace");
183-
info.string_printable=options.get_bool_option("string-printable");
184183
if(options.get_bool_option("max-node-refinement"))
185184
info.max_node_refinement=
186185
options.get_unsigned_int_option("max-node-refinement");

src/java_bytecode/java_bytecode_language.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
5151
if(cmd.isset("string-max-input-length"))
5252
object_factory_parameters.max_nondet_string_length=
5353
std::stoi(cmd.get_value("string-max-input-length"));
54+
object_factory_parameters.string_printable = cmd.isset("string-printable");
5455
if(cmd.isset("java-max-vla-length"))
5556
max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length"));
5657
if(cmd.isset("lazy-methods-context-sensitive"))

src/java_bytecode/java_bytecode_language.h

+3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ struct object_factory_parameterst final
7575
/// dereference a pointer using a 'depth counter'. We set a pointer to null if
7676
/// such depth becomes >= than this maximum value.
7777
size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH;
78+
79+
/// Force string content to be ASCII printable characters when set to true.
80+
bool string_printable = false;
7881
};
7982

8083
typedef std::pair<

src/java_bytecode/java_object_factory.cpp

+14-2
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,8 @@ codet initialize_nondet_string_struct(
542542
const exprt &obj,
543543
const std::size_t &max_nondet_string_length,
544544
const source_locationt &loc,
545-
symbol_tablet &symbol_table)
545+
symbol_tablet &symbol_table,
546+
bool printable)
546547
{
547548
PRECONDITION(
548549
java_string_library_preprocesst::implements_java_char_sequence(obj.type()));
@@ -605,6 +606,13 @@ codet initialize_nondet_string_struct(
605606

606607
add_array_to_length_association(
607608
data_expr, length_expr, symbol_table, loc, code);
609+
610+
// Printable ASCII characters are between ' ' and '~'.
611+
if(printable)
612+
{
613+
add_character_set_constraint(
614+
array_pointer, length_expr, " -~", symbol_table, loc, code);
615+
}
608616
}
609617

610618
// tmp_object = struct_expr;
@@ -616,6 +624,7 @@ codet initialize_nondet_string_struct(
616624
/// content and association of its address to the pointer `expr`.
617625
/// \param expr: pointer to be affected
618626
/// \param max_nondet_string_length: maximum length of strings to initialize
627+
/// \param printable: Boolean, true to force content to be printable
619628
/// \param symbol_table: the symbol table
620629
/// \param loc: location in the source
621630
/// \param [out] code: code block in which initialization code is added
@@ -625,6 +634,7 @@ codet initialize_nondet_string_struct(
625634
static bool add_nondet_string_pointer_initialization(
626635
const exprt &expr,
627636
const std::size_t &max_nondet_string_length,
637+
bool printable,
628638
symbol_tablet &symbol_table,
629639
const source_locationt &loc,
630640
code_blockt &code)
@@ -644,7 +654,8 @@ static bool add_nondet_string_pointer_initialization(
644654
dereference_exprt(expr, struct_type),
645655
max_nondet_string_length,
646656
loc,
647-
symbol_table));
657+
symbol_table,
658+
printable));
648659
return false;
649660
}
650661

@@ -784,6 +795,7 @@ void java_object_factoryt::gen_nondet_pointer_init(
784795
add_nondet_string_pointer_initialization(
785796
expr,
786797
object_factory_parameters.max_nondet_string_length,
798+
object_factory_parameters.string_printable,
787799
symbol_table,
788800
loc,
789801
assignments);

src/java_bytecode/java_object_factory.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ codet initialize_nondet_string_struct(
156156
const exprt &obj,
157157
const std::size_t &max_nondet_string_length,
158158
const source_locationt &loc,
159-
symbol_tablet &symbol_table);
159+
symbol_tablet &symbol_table,
160+
bool printable);
160161

161162
#endif // CPROVER_JAVA_BYTECODE_JAVA_OBJECT_FACTORY_H

src/java_bytecode/java_string_library_preprocess.cpp

+32
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,38 @@ void add_array_to_length_association(
704704
symbol_table));
705705
}
706706

707+
/// Add a call to a primitive of the string solver which ensures all characters
708+
/// belong to the character set.
709+
/// \param pointer: a character pointer expression
710+
/// \param length: length of the character sequence pointed by `pointer`
711+
/// \param char_set: character set given by a range expression consisting of
712+
/// two characters separated by an hyphen.
713+
/// For instance "a-z" denotes all lower case ascii letters.
714+
/// \param symbol_table: the symbol table
715+
/// \param loc: source location
716+
/// \param code [out] : code block to which declaration and calls get added
717+
void add_character_set_constraint(
718+
const exprt &pointer,
719+
const exprt &length,
720+
const irep_idt &char_set,
721+
symbol_tablet &symbol_table,
722+
const source_locationt &loc,
723+
code_blockt &code)
724+
{
725+
PRECONDITION(pointer.type().id() == ID_pointer);
726+
symbolt &return_sym = get_fresh_aux_symbol(
727+
java_int_type(), "cnstr_added", "cnstr_added", loc, ID_java, symbol_table);
728+
const exprt return_expr = return_sym.symbol_expr();
729+
code.add(code_declt(return_expr));
730+
const constant_exprt char_set_expr(char_set, string_typet());
731+
code.add(
732+
code_assign_function_application(
733+
return_expr,
734+
ID_cprover_string_constrain_characters_func,
735+
{length, pointer, char_set_expr},
736+
symbol_table));
737+
}
738+
707739
/// Create a refined_string_exprt `str` whose content and length are fresh
708740
/// symbols, calls the string primitive with name `function_name`.
709741
/// In the arguments of the primitive `str` takes the place of the result and

src/java_bytecode/java_string_library_preprocess.h

+8
Original file line numberDiff line numberDiff line change
@@ -349,4 +349,12 @@ void add_array_to_length_association(
349349
const source_locationt &loc,
350350
code_blockt &code);
351351

352+
void add_character_set_constraint(
353+
const exprt &pointer,
354+
const exprt &length,
355+
const irep_idt &char_set,
356+
symbol_tablet &symbol_table,
357+
const source_locationt &loc,
358+
code_blockt &code);
359+
352360
#endif // CPROVER_JAVA_BYTECODE_JAVA_STRING_LIBRARY_PREPROCESS_H

src/solvers/refinement/string_constraint_generator.h

+8-3
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ class string_constraint_generatort final
4242
{
4343
/// Max length of non-deterministic strings
4444
size_t string_max_length=std::numeric_limits<size_t>::max();
45-
/// Prefer printable characters in non-deterministic strings
46-
bool string_printable=false;
4745
};
4846

4947
string_constraint_generatort(const infot& info, const namespacet& ns);
@@ -110,6 +108,14 @@ class string_constraint_generatort final
110108
void add_default_axioms(const array_string_exprt &s);
111109
exprt axiom_for_is_positive_index(const exprt &x);
112110

111+
void add_constraint_on_characters(
112+
const array_string_exprt &s,
113+
const exprt &start,
114+
const exprt &end,
115+
const std::string &char_set);
116+
exprt
117+
add_axioms_for_constrain_characters(const function_application_exprt &f);
118+
113119
// The following functions add axioms for the returned value
114120
// to be equal to the result of the function given as argument.
115121
// They are not accessed directly from other classes: they call
@@ -333,7 +339,6 @@ class string_constraint_generatort final
333339
std::set<array_string_exprt> created_strings;
334340
unsigned symbol_count=0;
335341
const messaget message;
336-
const bool force_printable_characters;
337342

338343
std::vector<exprt> axioms;
339344
std::vector<symbol_exprt> boolean_symbols;

src/solvers/refinement/string_constraint_generator_main.cpp

+64-13
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ string_constraint_generatort::string_constraint_generatort(
3131
const string_constraint_generatort::infot &info,
3232
const namespacet &ns)
3333
: max_string_length(info.string_max_length),
34-
force_printable_characters(info.string_printable),
3534
ns(ns)
3635
{
3736
}
@@ -319,21 +318,71 @@ void string_constraint_generatort::add_default_axioms(
319318
if(!created_strings.insert(s).second)
320319
return;
321320

322-
axioms.push_back(
323-
s.axiom_for_length_ge(from_integer(0, s.length().type())));
321+
const exprt index_zero = from_integer(0, s.length().type());
322+
axioms.push_back(s.axiom_for_length_ge(index_zero));
323+
324324
if(max_string_length!=std::numeric_limits<size_t>::max())
325325
axioms.push_back(s.axiom_for_length_le(max_string_length));
326+
}
326327

327-
if(force_printable_characters)
328-
{
329-
symbol_exprt qvar=fresh_univ_index("printable", s.length().type());
330-
exprt chr=s[qvar];
331-
and_exprt printable(
332-
binary_relation_exprt(chr, ID_ge, from_integer(' ', chr.type())),
333-
binary_relation_exprt(chr, ID_le, from_integer('~', chr.type())));
334-
string_constraintt sc(qvar, s.length(), printable);
335-
axioms.push_back(sc);
336-
}
328+
/// Add constraint on characters of a string.
329+
///
330+
/// This constraint is
331+
/// \f$ \forall i \in [start, end), low\_char \le s[i] \le high\_char \f$
332+
/// \param s: a string expression
333+
/// \param start: index of the first character to constrain
334+
/// \param end: index at which we stop adding constraints
335+
/// \param char_set: a string of the form "<low_char>-<high_char>" where
336+
/// `<low_char>` and `<high_char>` are two characters, which represents
337+
/// the set of characters that are between `low_char` and `high_char`.
338+
/// \return a string expression that is linked to the argument through axioms
339+
/// that are added to the list
340+
void string_constraint_generatort::add_constraint_on_characters(
341+
const array_string_exprt &s,
342+
const exprt &start,
343+
const exprt &end,
344+
const std::string &char_set)
345+
{
346+
// Parse char_set
347+
PRECONDITION(char_set.length() == 3);
348+
PRECONDITION(char_set[1] == '-');
349+
const char &low_char = char_set[0];
350+
const char &high_char = char_set[2];
351+
352+
// Add constraint
353+
const symbol_exprt qvar = fresh_univ_index("char_constr", s.length().type());
354+
const exprt chr = s[qvar];
355+
const and_exprt char_in_set(
356+
binary_relation_exprt(chr, ID_ge, from_integer(low_char, chr.type())),
357+
binary_relation_exprt(chr, ID_le, from_integer(high_char, chr.type())));
358+
const string_constraintt sc(qvar, start, end, true_exprt(), char_in_set);
359+
axioms.push_back(sc);
360+
}
361+
362+
/// Add axioms to ensure all characters of a string belong to a given set.
363+
///
364+
/// The axiom is: \f$\forall i \in [start, end).\ s[i] \in char_set \f$, where
365+
/// `char_set` is given by the string `char_set_string` composed of three
366+
/// characters `low_char`, `-` and `high_char`. Character `c` belongs to
367+
/// `char_set` if \f$low_char \le c \le high_char\f$.
368+
/// \param f: a function application with arguments: integer `|s|`, character
369+
/// pointer `&s[0]`, string `char_set_string`,
370+
/// optional integers `start` and `end`
371+
/// \return integer expression whose value is zero
372+
exprt string_constraint_generatort::add_axioms_for_constrain_characters(
373+
const function_application_exprt &f)
374+
{
375+
const auto &args = f.arguments();
376+
PRECONDITION(3 <= args.size() && args.size() <= 5);
377+
PRECONDITION(args[2].type().id() == ID_string);
378+
PRECONDITION(args[2].id() == ID_constant);
379+
const array_string_exprt s = char_array_of_pointer(args[1], args[0]);
380+
const irep_idt &char_set_string = to_constant_expr(args[2]).get_value();
381+
const exprt &start =
382+
args.size() >= 4 ? args[3] : from_integer(0, s.length().type());
383+
const exprt &end = args.size() >= 5 ? args[4] : s.length();
384+
add_constraint_on_characters(s, start, end, char_set_string.c_str());
385+
return from_integer(0, get_return_code_type());
337386
}
338387

339388
/// Adds creates a new array if it does not already exists
@@ -472,6 +521,8 @@ exprt string_constraint_generatort::add_axioms_for_function_application(
472521
res = associate_array_to_pointer(expr);
473522
else if(id == ID_cprover_associate_length_to_array_func)
474523
res = associate_length_to_array(expr);
524+
else if(id == ID_cprover_string_constrain_characters_func)
525+
res = add_axioms_for_constrain_characters(expr);
475526
else
476527
{
477528
std::string msg(

src/util/irep_ids.def

+1
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,7 @@ IREP_ID_ONE(cprover_string_concat_bool_func)
773773
IREP_ID_ONE(cprover_string_concat_double_func)
774774
IREP_ID_ONE(cprover_string_concat_float_func)
775775
IREP_ID_ONE(cprover_string_concat_code_point_func)
776+
IREP_ID_ONE(cprover_string_constrain_characters_func)
776777
IREP_ID_ONE(cprover_string_contains_func)
777778
IREP_ID_ONE(cprover_string_copy_func)
778779
IREP_ID_ONE(cprover_string_delete_func)

unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ SCENARIO(
4949
WHEN("Initialisation code for a string is generated")
5050
{
5151
const codet code =
52-
initialize_nondet_string_struct(expr, 20, loc, symbol_table);
52+
initialize_nondet_string_struct(expr, 20, loc, symbol_table, false);
5353

5454
THEN("Code is produced")
5555
{

0 commit comments

Comments
 (0)