Skip to content

Commit fe9071b

Browse files
Make to_upper_case work for Latin-1 supplement
1 parent 53ccd3f commit fe9071b

File tree

1 file changed

+21
-32
lines changed

1 file changed

+21
-32
lines changed

src/solvers/refinement/string_constraint_generator_transformation.cpp

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,14 @@ static exprt is_upper_case(const exprt &character)
279279
return disjunction(upper_case);
280280
}
281281

282+
/// Expression which is true for lower_case characters of the Basic Latin and
283+
/// Latin-1 supplement of unicode.
284+
static exprt is_lower_case(const exprt &character)
285+
{
286+
return is_upper_case(
287+
minus_exprt(character, from_integer(0x20, character.type())));
288+
}
289+
282290
/// Add axioms ensuring `res` corresponds to `str` in which uppercase characters
283291
/// have been converted to lowercase.
284292
/// These axioms are:
@@ -323,19 +331,14 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case(
323331
}
324332

325333
/// Add axioms ensuring `res` corresponds to `str` in which lowercase characters
326-
/// have been converted to uppercase.
334+
/// of Basic Latin and Latin-1 supplement of unicode have been converted to
335+
/// uppercase.
327336
///
328337
/// These axioms are:
329-
/// 1. \f$ |{\tt res}| = |{\tt str}| \f$
330-
/// 2. \f$ \forall i<|{\tt str}|.\ 'a'\le {\tt str}[i]\le 'z'
331-
/// \Rightarrow {\tt res}[i]={\tt str}[i]+'A'-'a' \f$
332-
/// 3. \f$ \forall i<|{\tt str}|.\ \lnot ('a'\le {\tt str}[i] \le 'z')
333-
/// \Rightarrow {\tt res}[i]={\tt str}[i] \f$
334-
/// Note that index expressions are only allowed in the body of universal
335-
/// axioms, so we use a trivial premise and push our premise into the body.
338+
/// 1. res.length = str.length
339+
/// 2. forall i < str.length.
340+
/// is_lower_case(str[i]) ? res[i] = str[i] - 0x20 : res[i] = str[i]
336341
///
337-
/// \todo We can reduce the number of constraints by merging
338-
/// constraints 2 and 3.
339342
/// \param res: array of characters expression
340343
/// \param str: array of characters expression
341344
/// \return integer expression which is different from `0` when there is an
@@ -350,31 +353,17 @@ exprt string_constraint_generatort::add_axioms_for_to_upper_case(
350353
exprt char_A=constant_char('A', char_type);
351354
exprt char_z=constant_char('z', char_type);
352355

353-
// \todo Add support for locales using case mapping information
354-
// from the UnicodeData file.
355-
356356
lemmas.push_back(equal_exprt(res.length(), str.length()));
357357

358358
constraints.push_back([&] {
359-
const symbol_exprt idx1 = fresh_univ_index("QA_upper_case1", index_type);
360-
const exprt is_lower_case = and_exprt(
361-
binary_relation_exprt(char_a, ID_le, str[idx1]),
362-
binary_relation_exprt(str[idx1], ID_le, char_z));
363-
const exprt diff = minus_exprt(char_A, char_a);
364-
const exprt convert = equal_exprt(res[idx1], plus_exprt(str[idx1], diff));
365-
const exprt body = implies_exprt(is_lower_case, convert);
366-
return string_constraintt(idx1, zero_if_negative(res.length()), body);
367-
}());
368-
369-
constraints.push_back([&] {
370-
const symbol_exprt idx2 = fresh_univ_index("QA_upper_case2", index_type);
371-
const exprt is_not_lower_case = not_exprt(
372-
and_exprt(
373-
binary_relation_exprt(char_a, ID_le, str[idx2]),
374-
binary_relation_exprt(str[idx2], ID_le, char_z)));
375-
const exprt eq = equal_exprt(res[idx2], str[idx2]);
376-
const exprt body2 = implies_exprt(is_not_lower_case, eq);
377-
return string_constraintt(idx2, zero_if_negative(res.length()), body2);
359+
const symbol_exprt idx = fresh_univ_index("QA_upper_case", index_type);
360+
const exprt converted =
361+
minus_exprt(str[idx], from_integer(0x20, char_type));
362+
return string_constraintt(
363+
idx,
364+
zero_if_negative(res.length()),
365+
equal_exprt(
366+
res[idx], if_exprt(is_lower_case(str[idx]), converted, str[idx])));
378367
}());
379368

380369
return from_integer(0, get_return_code_type());

0 commit comments

Comments
 (0)