Skip to content

Input string printable TG-808 #1513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Binary file not shown.
21 changes: 21 additions & 0 deletions regression/strings-smoke-tests/java_string_printable/Test.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
public class Test {

public static String check(String t) {
String s = t.concat("$£¥\u0000\u0008\u0010\u001F");
// This should be disallowed because "\u0000" is not printable
assert(!s.equals("\u0000$£¥\u0000\u0008\u0010\u001F"));
// This can fail when t="a" which is printable
assert(!s.equals("a$£¥\u0000\u0008\u0010\u001F"));
return s;
}

public static boolean check_char(String s, char c) {
if(s == null)
return false;
// This should be -1 for all non-printable character
int i = s.indexOf(c);
boolean b = (i == -1 || (c >= ' ' && c <= '~'));
assert(b);
return b;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CORE
Test.class
--refine-strings --function Test.check --string-max-length 100 --string-printable --java-assume-inputs-non-null
^EXIT=10$
^SIGNAL=0$
assertion.* file Test.java line 6 .* SUCCESS
assertion.* file Test.java line 8 .* FAILURE
--
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CORE
Test.class
--refine-strings --function Test.check_char --string-max-length 100 --string-printable
^EXIT=0$
^SIGNAL=0$
assertion.* file Test.java line 18 .* SUCCESS
--
1 change: 0 additions & 1 deletion src/cbmc/cbmc_solvers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ std::unique_ptr<cbmc_solverst::solvert> cbmc_solverst::get_string_refinement()
info.string_max_length=options.get_signed_int_option("string-max-length");
info.string_non_empty=options.get_bool_option("string-non-empty");
info.trace=options.get_bool_option("trace");
info.string_printable=options.get_bool_option("string-printable");
if(options.get_bool_option("max-node-refinement"))
info.max_node_refinement=
options.get_unsigned_int_option("max-node-refinement");
Expand Down
1 change: 1 addition & 0 deletions src/java_bytecode/java_bytecode_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
if(cmd.isset("string-max-input-length"))
object_factory_parameters.max_nondet_string_length=
std::stoi(cmd.get_value("string-max-input-length"));
object_factory_parameters.string_printable = cmd.isset("string-printable");
if(cmd.isset("java-max-vla-length"))
max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length"));
if(cmd.isset("lazy-methods-context-sensitive"))
Expand Down
3 changes: 3 additions & 0 deletions src/java_bytecode/java_bytecode_language.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ struct object_factory_parameterst final
/// dereference a pointer using a 'depth counter'. We set a pointer to null if
/// such depth becomes >= than this maximum value.
size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH;

/// Force string content to be ASCII printable characters when set to true.
bool string_printable = false;
};

typedef std::pair<
Expand Down
16 changes: 14 additions & 2 deletions src/java_bytecode/java_object_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,8 @@ codet initialize_nondet_string_struct(
const exprt &obj,
const std::size_t &max_nondet_string_length,
const source_locationt &loc,
symbol_tablet &symbol_table)
symbol_tablet &symbol_table,
bool printable)
{
PRECONDITION(
java_string_library_preprocesst::implements_java_char_sequence(obj.type()));
Expand Down Expand Up @@ -605,6 +606,13 @@ codet initialize_nondet_string_struct(

add_array_to_length_association(
data_expr, length_expr, symbol_table, loc, code);

// Printable ASCII characters are between ' ' and '~'.
if(printable)
{
add_character_set_constraint(
array_pointer, length_expr, " -~", symbol_table, loc, code);
}
}

// tmp_object = struct_expr;
Expand All @@ -616,6 +624,7 @@ codet initialize_nondet_string_struct(
/// content and association of its address to the pointer `expr`.
/// \param expr: pointer to be affected
/// \param max_nondet_string_length: maximum length of strings to initialize
/// \param printable: Boolean, true to force content to be printable
/// \param symbol_table: the symbol table
/// \param loc: location in the source
/// \param [out] code: code block in which initialization code is added
Expand All @@ -625,6 +634,7 @@ codet initialize_nondet_string_struct(
static bool add_nondet_string_pointer_initialization(
const exprt &expr,
const std::size_t &max_nondet_string_length,
bool printable,
symbol_tablet &symbol_table,
const source_locationt &loc,
code_blockt &code)
Expand All @@ -644,7 +654,8 @@ static bool add_nondet_string_pointer_initialization(
dereference_exprt(expr, struct_type),
max_nondet_string_length,
loc,
symbol_table));
symbol_table,
printable));
return false;
}

Expand Down Expand Up @@ -784,6 +795,7 @@ void java_object_factoryt::gen_nondet_pointer_init(
add_nondet_string_pointer_initialization(
expr,
object_factory_parameters.max_nondet_string_length,
object_factory_parameters.string_printable,
symbol_table,
loc,
assignments);
Expand Down
3 changes: 2 additions & 1 deletion src/java_bytecode/java_object_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ codet initialize_nondet_string_struct(
const exprt &obj,
const std::size_t &max_nondet_string_length,
const source_locationt &loc,
symbol_tablet &symbol_table);
symbol_tablet &symbol_table,
bool printable);

#endif // CPROVER_JAVA_BYTECODE_JAVA_OBJECT_FACTORY_H
32 changes: 32 additions & 0 deletions src/java_bytecode/java_string_library_preprocess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,38 @@ void add_array_to_length_association(
symbol_table));
}

/// Add a call to a primitive of the string solver which ensures all characters
/// belong to the character set.
/// \param pointer: a character pointer expression
/// \param length: length of the character sequence pointed by `pointer`
/// \param char_set: character set given by a range expression consisting of
/// two characters separated by an hyphen.
/// For instance "a-z" denotes all lower case ascii letters.
/// \param symbol_table: the symbol table
/// \param loc: source location
/// \param code [out] : code block to which declaration and calls get added
void add_character_set_constraint(
const exprt &pointer,
const exprt &length,
const irep_idt &char_set,
symbol_tablet &symbol_table,
const source_locationt &loc,
code_blockt &code)
{
PRECONDITION(pointer.type().id() == ID_pointer);
symbolt &return_sym = get_fresh_aux_symbol(
java_int_type(), "cnstr_added", "cnstr_added", loc, ID_java, symbol_table);
const exprt return_expr = return_sym.symbol_expr();
code.add(code_declt(return_expr));
const constant_exprt char_set_expr(char_set, string_typet());
code.add(
code_assign_function_application(
return_expr,
ID_cprover_string_constrain_characters_func,
{length, pointer, char_set_expr},
symbol_table));
}

/// Create a refined_string_exprt `str` whose content and length are fresh
/// symbols, calls the string primitive with name `function_name`.
/// In the arguments of the primitive `str` takes the place of the result and
Expand Down
8 changes: 8 additions & 0 deletions src/java_bytecode/java_string_library_preprocess.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,4 +349,12 @@ void add_array_to_length_association(
const source_locationt &loc,
code_blockt &code);

void add_character_set_constraint(
const exprt &pointer,
const exprt &length,
const irep_idt &char_set,
symbol_tablet &symbol_table,
const source_locationt &loc,
code_blockt &code);

#endif // CPROVER_JAVA_BYTECODE_JAVA_STRING_LIBRARY_PREPROCESS_H
11 changes: 8 additions & 3 deletions src/solvers/refinement/string_constraint_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ class string_constraint_generatort final
{
/// Max length of non-deterministic strings
size_t string_max_length=std::numeric_limits<size_t>::max();
/// Prefer printable characters in non-deterministic strings
bool string_printable=false;
};

string_constraint_generatort(const infot& info, const namespacet& ns);
Expand Down Expand Up @@ -110,6 +108,14 @@ class string_constraint_generatort final
void add_default_axioms(const array_string_exprt &s);
exprt axiom_for_is_positive_index(const exprt &x);

void add_constraint_on_characters(
const array_string_exprt &s,
const exprt &start,
const exprt &end,
const std::string &char_set);
exprt
add_axioms_for_constrain_characters(const function_application_exprt &f);

// The following functions add axioms for the returned value
// to be equal to the result of the function given as argument.
// They are not accessed directly from other classes: they call
Expand Down Expand Up @@ -339,7 +345,6 @@ class string_constraint_generatort final
std::set<array_string_exprt> created_strings;
unsigned symbol_count=0;
const messaget message;
const bool force_printable_characters;

std::vector<exprt> axioms;
std::vector<symbol_exprt> boolean_symbols;
Expand Down
77 changes: 64 additions & 13 deletions src/solvers/refinement/string_constraint_generator_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ string_constraint_generatort::string_constraint_generatort(
const string_constraint_generatort::infot &info,
const namespacet &ns)
: max_string_length(info.string_max_length),
force_printable_characters(info.string_printable),
ns(ns)
{
}
Expand Down Expand Up @@ -319,21 +318,71 @@ void string_constraint_generatort::add_default_axioms(
if(!created_strings.insert(s).second)
return;

axioms.push_back(
s.axiom_for_length_ge(from_integer(0, s.length().type())));
const exprt index_zero = from_integer(0, s.length().type());
axioms.push_back(s.axiom_for_length_ge(index_zero));

if(max_string_length!=std::numeric_limits<size_t>::max())
axioms.push_back(s.axiom_for_length_le(max_string_length));
}

if(force_printable_characters)
{
symbol_exprt qvar=fresh_univ_index("printable", s.length().type());
exprt chr=s[qvar];
and_exprt printable(
binary_relation_exprt(chr, ID_ge, from_integer(' ', chr.type())),
binary_relation_exprt(chr, ID_le, from_integer('~', chr.type())));
string_constraintt sc(qvar, s.length(), printable);
axioms.push_back(sc);
}
/// Add constraint on characters of a string.
///
/// This constraint is
/// \f$ \forall i \in [start, end), low\_char \le s[i] \le high\_char \f$
/// \param s: a string expression
/// \param start: index of the first character to constrain
/// \param end: index at which we stop adding constraints
/// \param char_set: a string of the form "<low_char>-<high_char>" where
/// `<low_char>` and `<high_char>` are two characters, which represents
/// the set of characters that are between `low_char` and `high_char`.
/// \return a string expression that is linked to the argument through axioms
/// that are added to the list
void string_constraint_generatort::add_constraint_on_characters(
const array_string_exprt &s,
const exprt &start,
const exprt &end,
const std::string &char_set)
{
// Parse char_set
PRECONDITION(char_set.length() == 3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we check this when accepting the command-line option? If not we should; it's always better to fail early on bad user input so they don't have to wait until symex completes to learn they've done it wrong

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, this doesn't come directly from the command-line, when --string-printable is set java_object_factory will call this primitive with the fixed input " -~" so there is no risk of bad user input.
In the future, we could allow the user to configure that and have more complex char_set (as you have in regex for instance), but this is outside the scope of this PR.

e.g. restricting the charset to "a-b" , insist that the string is 5 chars long and not "aaaaa", and check that you get something matching [ab]{5}

Because this cannot really be set by the user, this would require some unit test. Alternatively I could add a test with something like this:
f(String s, char c){assert(s.find(c)=-1 || (c>= ' ' && c<= '~'));} which ensures we cannot find a non ascii-printable character in a string.

PRECONDITION(char_set[1] == '-');
const char &low_char = char_set[0];
const char &high_char = char_set[2];

// Add constraint
const symbol_exprt qvar = fresh_univ_index("char_constr", s.length().type());
const exprt chr = s[qvar];
const and_exprt char_in_set(
binary_relation_exprt(chr, ID_ge, from_integer(low_char, chr.type())),
binary_relation_exprt(chr, ID_le, from_integer(high_char, chr.type())));
const string_constraintt sc(qvar, start, end, true_exprt(), char_in_set);
axioms.push_back(sc);
}

/// Add axioms to ensure all characters of a string belong to a given set.
///
/// The axiom is: \f$\forall i \in [start, end).\ s[i] \in char_set \f$, where
/// `char_set` is given by the string `char_set_string` composed of three
/// characters `low_char`, `-` and `high_char`. Character `c` belongs to
/// `char_set` if \f$low_char \le c \le high_char\f$.
/// \param f: a function application with arguments: integer `|s|`, character
/// pointer `&s[0]`, string `char_set_string`,
/// optional integers `start` and `end`
/// \return integer expression whose value is zero
exprt string_constraint_generatort::add_axioms_for_constrain_characters(
const function_application_exprt &f)
{
const auto &args = f.arguments();
PRECONDITION(3 <= args.size() && args.size() <= 5);
PRECONDITION(args[2].type().id() == ID_string);
PRECONDITION(args[2].id() == ID_constant);
const array_string_exprt s = char_array_of_pointer(args[1], args[0]);
const irep_idt &char_set_string = to_constant_expr(args[2]).get_value();
const exprt &start =
args.size() >= 4 ? args[3] : from_integer(0, s.length().type());
const exprt &end = args.size() >= 5 ? args[4] : s.length();
add_constraint_on_characters(s, start, end, char_set_string.c_str());
return from_integer(0, get_return_code_type());
}

/// Adds creates a new array if it does not already exists
Expand Down Expand Up @@ -472,6 +521,8 @@ exprt string_constraint_generatort::add_axioms_for_function_application(
res = associate_array_to_pointer(expr);
else if(id == ID_cprover_associate_length_to_array_func)
res = associate_length_to_array(expr);
else if(id == ID_cprover_string_constrain_characters_func)
res = add_axioms_for_constrain_characters(expr);
else
{
std::string msg(
Expand Down
1 change: 1 addition & 0 deletions src/util/irep_ids.def
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ IREP_ID_ONE(cprover_string_concat_bool_func)
IREP_ID_ONE(cprover_string_concat_double_func)
IREP_ID_ONE(cprover_string_concat_float_func)
IREP_ID_ONE(cprover_string_concat_code_point_func)
IREP_ID_ONE(cprover_string_constrain_characters_func)
IREP_ID_ONE(cprover_string_contains_func)
IREP_ID_ONE(cprover_string_copy_func)
IREP_ID_ONE(cprover_string_delete_func)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ SCENARIO(
WHEN("Initialisation code for a string is generated")
{
const codet code =
initialize_nondet_string_struct(expr, 20, loc, symbol_table);
initialize_nondet_string_struct(expr, 20, loc, symbol_table, false);

THEN("Code is produced")
{
Expand Down