Skip to content

Commit 6a156a9

Browse files
Making toLowerCase more precise
We extend add_axioms_for_to_lower_case by taking into account latin characters up to u00FF and adding in the lemmas the property that the characters should not exceed u00FF, making the axioms correct (we will not generate wrong assertion violation) but incomplete (we may miss cases involving characters beyond u00FF).
1 parent c50f3d5 commit 6a156a9

File tree

1 file changed

+30
-16
lines changed

1 file changed

+30
-16
lines changed

src/solvers/refinement/string_constraint_generator_transformation.cpp

+30-16
Original file line numberDiff line numberDiff line change
@@ -216,34 +216,48 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case(
216216
string_exprt res=fresh_string(ref_type);
217217
exprt char_a=constant_char('a', char_type);
218218
exprt char_A=constant_char('A', char_type);
219-
exprt char_z=constant_char('z', char_type);
220-
exprt char_Z=constant_char('Z', char_type);
221219

222-
// TODO: add support for locales using case mapping information
223-
// from the UnicodeData file.
220+
// TODO: for now, only characters in Basic Latin and Latin-1 supplement
221+
// are supported (up to 0x100), we should add others using case mapping
222+
// information from the UnicodeData file.
224223

225224
// We add axioms:
226225
// a1 : |res| = |str|
227-
// a2 : forall idx<str.length, 'A'<=str[idx]<='Z' => res[idx]=str[idx]+'a'-'A'
228-
// a3 : forall idx<str.length, !('a'<=str[idx]<='z') => res[idx]=str[idx]
229-
// forall idx<str.length,
230-
// this[idx]='A'<=str[idx]<='Z' ? str[idx]+'a'-'A' : str[idx]
226+
// a2 : forall idx<str.length,
227+
// is_upper_case(str[idx])?
228+
// res[idx]=str[idx]+'a'-'A' : res[idx]=str[idx]<0x100
231229

232230
exprt a1=res.axiom_for_has_same_length_as(str);
233231
axioms.push_back(a1);
234232

235233
symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type);
236-
exprt is_upper_case=and_exprt(
234+
exprt::operandst upper_case;
235+
upper_case.push_back(and_exprt(
237236
binary_relation_exprt(char_A, ID_le, str[idx]),
238-
binary_relation_exprt(str[idx], ID_le, char_Z));
239-
minus_exprt diff(char_a, char_A);
240-
equal_exprt convert(res[idx], plus_exprt(str[idx], diff));
241-
string_constraintt a2(idx, res.length(), is_upper_case, convert);
237+
binary_relation_exprt(str[idx], ID_le, from_integer('Z', char_type))));
238+
239+
upper_case.push_back(and_exprt(
240+
binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
241+
binary_relation_exprt(str[idx], ID_le, from_integer(0xd6, char_type))));
242+
243+
upper_case.push_back(and_exprt(
244+
binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
245+
binary_relation_exprt(str[idx], ID_le, from_integer(0xde, char_type))));
246+
247+
exprt is_upper_case=disjunction(upper_case);
248+
249+
// The difference between upper-case and lower-case for the basic latin and
250+
// latin-1 supplement is 0x20.
251+
exprt diff=from_integer(0x20, char_type);
252+
equal_exprt converted(res[idx], plus_exprt(str[idx], diff));
253+
and_exprt non_converted(
254+
equal_exprt(res[idx], str[idx]),
255+
binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
256+
if_exprt conditional_convert(is_upper_case, converted, non_converted);
257+
258+
string_constraintt a2(idx, res.length(), conditional_convert);
242259
axioms.push_back(a2);
243260

244-
equal_exprt eq(res[idx], str[idx]);
245-
string_constraintt a3(idx, res.length(), not_exprt(is_upper_case), eq);
246-
axioms.push_back(a3);
247261
return res;
248262
}
249263

0 commit comments

Comments
 (0)