Skip to content

Eval function for string_to_lower/upper_case builtin functions #2678

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/solvers/refinement/string_constraint_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ class string_constraint_generatort final
const array_string_exprt &str,
const exprt &position,
const exprt &character);
exprt add_axioms_for_to_lower_case(
const array_string_exprt &res,
const array_string_exprt &str);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why exported?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are going to use it in the string_builtin_function object evaluation function


private:
symbol_exprt fresh_boolean(const irep_idt &prefix);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,55 +251,69 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case(
const array_string_exprt res =
char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
const array_string_exprt str = get_string_expr(f.arguments()[2]);
const refined_string_typet &ref_type =
to_refined_string_type(f.arguments()[2].type());
const typet &char_type=ref_type.get_char_type();
const typet &index_type=ref_type.get_index_type();
const exprt char_A=constant_char('A', char_type);
const exprt char_Z=constant_char('Z', char_type);
return add_axioms_for_to_lower_case(res, str);
}

exprt string_constraint_generatort::add_axioms_for_to_lower_case(
const array_string_exprt &res,
const array_string_exprt &str)
{
const typet &char_type = res.type().subtype();
const typet &index_type = res.length().type();
const exprt char_A = from_integer('A', char_type);
const exprt char_Z = from_integer('Z', char_type);

// \todo for now, only characters in Basic Latin and Latin-1 supplement
// are supported (up to 0x100), we should add others using case mapping
// information from the UnicodeData file.

equal_exprt a1(res.length(), str.length());
lemmas.push_back(a1);
lemmas.push_back(equal_exprt(res.length(), str.length()));

symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type);
exprt::operandst upper_case;
// Characters between 'A' and 'Z' are upper-case
upper_case.push_back(and_exprt(
binary_relation_exprt(char_A, ID_le, str[idx]),
binary_relation_exprt(str[idx], ID_le, char_Z)));

// Characters between 0xc0 (latin capital A with grave)
// and 0xd6 (latin capital O with diaeresis) are upper-case
upper_case.push_back(and_exprt(
binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
binary_relation_exprt(str[idx], ID_le, from_integer(0xd6, char_type))));

// Characters between 0xd8 (latin capital O with stroke)
// and 0xde (latin capital thorn) are upper-case
upper_case.push_back(and_exprt(
binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
binary_relation_exprt(str[idx], ID_le, from_integer(0xde, char_type))));

exprt is_upper_case=disjunction(upper_case);

// The difference between upper-case and lower-case for the basic latin and
// latin-1 supplement is 0x20.
exprt diff=from_integer(0x20, char_type);
equal_exprt converted(res[idx], plus_exprt(str[idx], diff));
and_exprt non_converted(
equal_exprt(res[idx], str[idx]),
binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
if_exprt conditional_convert(is_upper_case, converted, non_converted);

string_constraintt a2(
idx, zero_if_negative(res.length()), conditional_convert);
constraints.push_back(a2);
constraints.push_back([&] {
const symbol_exprt idx = fresh_univ_index("QA_lower_case", index_type);

const exprt is_upper_case = disjunction([&] {
exprt::operandst upper_case;
// Characters between 'A' and 'Z' are upper-case
upper_case.push_back(
and_exprt(
binary_relation_exprt(char_A, ID_le, str[idx]),
binary_relation_exprt(str[idx], ID_le, char_Z)));

// Characters between 0xc0 (latin capital A with grave)
// and 0xd6 (latin capital O with diaeresis) are upper-case
upper_case.push_back(
and_exprt(
binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
binary_relation_exprt(
str[idx], ID_le, from_integer(0xd6, char_type))));

// Characters between 0xd8 (latin capital O with stroke)
// and 0xde (latin capital thorn) are upper-case
upper_case.push_back(
and_exprt(
binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
binary_relation_exprt(
str[idx], ID_le, from_integer(0xde, char_type))));
return upper_case;
}());

const exprt conditional_convert = [&] {
// The difference between upper-case and lower-case for the basic latin and
// latin-1 supplement is 0x20.
const exprt diff = from_integer(0x20, char_type);
const exprt converted = equal_exprt(res[idx], plus_exprt(str[idx], diff));
const exprt non_converted = and_exprt(
equal_exprt(res[idx], str[idx]),
binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
return if_exprt(is_upper_case, converted, non_converted);
}();

return from_integer(0, f.type());
return string_constraintt(
idx, zero_if_negative(res.length()), conditional_convert);
}());

return from_integer(0, get_return_code_type());
}

/// Add axioms ensuring `res` corresponds to `str` in which lowercase characters
Expand Down