Skip to content

Commit 8c4801b

Browse files
Refactor add_axioms_for_to_lower_case
1 parent 4291232 commit 8c4801b

File tree

2 files changed

+59
-42
lines changed

2 files changed

+59
-42
lines changed

src/solvers/refinement/string_constraint_generator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ class string_constraint_generatort final
175175
const array_string_exprt &str,
176176
const exprt &position,
177177
const exprt &character);
178+
exprt add_axioms_for_to_lower_case(
179+
const array_string_exprt &res,
180+
const array_string_exprt &str);
178181

179182
private:
180183
symbol_exprt fresh_boolean(const irep_idt &prefix);

src/solvers/refinement/string_constraint_generator_transformation.cpp

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -251,55 +251,69 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case(
251251
const array_string_exprt res =
252252
char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
253253
const array_string_exprt str = get_string_expr(f.arguments()[2]);
254-
const refined_string_typet &ref_type =
255-
to_refined_string_type(f.arguments()[2].type());
256-
const typet &char_type=ref_type.get_char_type();
257-
const typet &index_type=ref_type.get_index_type();
258-
const exprt char_A=constant_char('A', char_type);
259-
const exprt char_Z=constant_char('Z', char_type);
254+
return add_axioms_for_to_lower_case(res, str);
255+
}
256+
257+
exprt string_constraint_generatort::add_axioms_for_to_lower_case(
258+
const array_string_exprt &res,
259+
const array_string_exprt &str)
260+
{
261+
const typet &char_type = res.type().subtype();
262+
const typet &index_type = res.length().type();
263+
const exprt char_A = from_integer('A', char_type);
264+
const exprt char_Z = from_integer('Z', char_type);
260265

261266
// \todo for now, only characters in Basic Latin and Latin-1 supplement
262267
// are supported (up to 0x100), we should add others using case mapping
263268
// information from the UnicodeData file.
264269

265-
equal_exprt a1(res.length(), str.length());
266-
lemmas.push_back(a1);
270+
lemmas.push_back(equal_exprt(res.length(), str.length()));
267271

268-
symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type);
269-
exprt::operandst upper_case;
270-
// Characters between 'A' and 'Z' are upper-case
271-
upper_case.push_back(and_exprt(
272-
binary_relation_exprt(char_A, ID_le, str[idx]),
273-
binary_relation_exprt(str[idx], ID_le, char_Z)));
274-
275-
// Characters between 0xc0 (latin capital A with grave)
276-
// and 0xd6 (latin capital O with diaeresis) are upper-case
277-
upper_case.push_back(and_exprt(
278-
binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
279-
binary_relation_exprt(str[idx], ID_le, from_integer(0xd6, char_type))));
280-
281-
// Characters between 0xd8 (latin capital O with stroke)
282-
// and 0xde (latin capital thorn) are upper-case
283-
upper_case.push_back(and_exprt(
284-
binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
285-
binary_relation_exprt(str[idx], ID_le, from_integer(0xde, char_type))));
286-
287-
exprt is_upper_case=disjunction(upper_case);
288-
289-
// The difference between upper-case and lower-case for the basic latin and
290-
// latin-1 supplement is 0x20.
291-
exprt diff=from_integer(0x20, char_type);
292-
equal_exprt converted(res[idx], plus_exprt(str[idx], diff));
293-
and_exprt non_converted(
294-
equal_exprt(res[idx], str[idx]),
295-
binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
296-
if_exprt conditional_convert(is_upper_case, converted, non_converted);
297-
298-
string_constraintt a2(
299-
idx, zero_if_negative(res.length()), conditional_convert);
300-
constraints.push_back(a2);
272+
constraints.push_back([&] {
273+
const symbol_exprt idx = fresh_univ_index("QA_lower_case", index_type);
274+
275+
const exprt is_upper_case = disjunction([&] {
276+
exprt::operandst upper_case;
277+
// Characters between 'A' and 'Z' are upper-case
278+
upper_case.push_back(
279+
and_exprt(
280+
binary_relation_exprt(char_A, ID_le, str[idx]),
281+
binary_relation_exprt(str[idx], ID_le, char_Z)));
282+
283+
// Characters between 0xc0 (latin capital A with grave)
284+
// and 0xd6 (latin capital O with diaeresis) are upper-case
285+
upper_case.push_back(
286+
and_exprt(
287+
binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
288+
binary_relation_exprt(
289+
str[idx], ID_le, from_integer(0xd6, char_type))));
290+
291+
// Characters between 0xd8 (latin capital O with stroke)
292+
// and 0xde (latin capital thorn) are upper-case
293+
upper_case.push_back(
294+
and_exprt(
295+
binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
296+
binary_relation_exprt(
297+
str[idx], ID_le, from_integer(0xde, char_type))));
298+
return upper_case;
299+
}());
300+
301+
const exprt conditional_convert = [&] {
302+
// The difference between upper-case and lower-case for the basic latin and
303+
// latin-1 supplement is 0x20.
304+
const exprt diff = from_integer(0x20, char_type);
305+
const exprt converted = equal_exprt(res[idx], plus_exprt(str[idx], diff));
306+
const exprt non_converted = and_exprt(
307+
equal_exprt(res[idx], str[idx]),
308+
binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
309+
return if_exprt(is_upper_case, converted, non_converted);
310+
}();
301311

302-
return from_integer(0, f.type());
312+
return string_constraintt(
313+
idx, zero_if_negative(res.length()), conditional_convert);
314+
}());
315+
316+
return from_integer(0, get_return_code_type());
303317
}
304318

305319
/// Add axioms ensuring `res` corresponds to `str` in which lowercase characters

0 commit comments

Comments
 (0)