Skip to content

Commit 71ace43

Browse files
Merge pull request #1036 from romainbrenguier/feature/string-to-lower-case
Improvements in String.toLowerCase
2 parents 0610b0d + 695b3ce commit 71ace43

File tree

4 files changed

+71
-29
lines changed

4 files changed

+71
-29
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
CORE
22
test_case.class
33
--refine-strings
4-
^EXIT=0$
4+
^EXIT=10$
55
^SIGNAL=0$
6-
^VERIFICATION SUCCESSFUL$
6+
assertion.* file test_case.java line 10 .* SUCCESS$
7+
assertion.* file test_case.java line 11 .* SUCCESS$
8+
assertion.* file test_case.java line 12 .* SUCCESS$
9+
assertion.* file test_case.java line 16 .* FAILURE$
10+
assertion.* file test_case.java line 20 .* SUCCESS$
11+
assertion.* file test_case.java line 24 .* FAILURE$
712
--
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,27 @@
11
public class test_case
22
{
3-
public static void main(/*String[] argv*/)
4-
{
5-
String s = new String("Ab");
6-
String l = s.toLowerCase();
7-
String u = s.toUpperCase();
8-
assert(l.equals("ab"));
9-
assert(u.equals("AB"));
10-
assert(s.equalsIgnoreCase("aB"));
11-
}
3+
public static void main(int i)
4+
{
5+
String s = new String("Ab");
6+
String l = s.toLowerCase();
7+
String u = s.toUpperCase();
8+
if(i==1)
9+
{
10+
assert(l.equals("ab"));
11+
assert(u.equals("AB"));
12+
assert(s.equalsIgnoreCase("aB"));
13+
}
14+
else if(i==2)
15+
{
16+
assert(!u.equals("AB"));
17+
}
18+
else if(i==3)
19+
{
20+
assert("ÖÇ".toLowerCase().equals("öç"));
21+
}
22+
else
23+
{
24+
assert(!"ÖÇ".toLowerCase().equals("öç"));
25+
}
26+
}
1227
}

src/solvers/refinement/string_constraint_generator_transformation.cpp

+40-18
Original file line numberDiff line numberDiff line change
@@ -214,36 +214,58 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case(
214214
const typet &char_type=ref_type.get_char_type();
215215
const typet &index_type=ref_type.get_index_type();
216216
string_exprt res=fresh_string(ref_type);
217-
exprt char_a=constant_char('a', char_type);
218-
exprt char_A=constant_char('A', char_type);
219-
exprt char_z=constant_char('z', char_type);
220-
exprt char_Z=constant_char('Z', char_type);
217+
const exprt char_A=constant_char('A', char_type);
218+
const exprt char_Z=constant_char('Z', char_type);
221219

222-
// TODO: add support for locales using case mapping information
223-
// from the UnicodeData file.
220+
221+
// TODO: for now, only characters in Basic Latin and Latin-1 supplement
222+
// are supported (up to 0x100), we should add others using case mapping
223+
// information from the UnicodeData file.
224224

225225
// We add axioms:
226226
// a1 : |res| = |str|
227-
// a2 : forall idx<str.length, 'A'<=str[idx]<='Z' => res[idx]=str[idx]+'a'-'A'
228-
// a3 : forall idx<str.length, !('a'<=str[idx]<='z') => res[idx]=str[idx]
229-
// forall idx<str.length,
230-
// this[idx]='A'<=str[idx]<='Z' ? str[idx]+'a'-'A' : str[idx]
227+
// a2 : forall idx<str.length,
228+
// is_upper_case(str[idx])?
229+
// res[idx]=str[idx]+diff : res[idx]=str[idx]<0x100
230+
// where diff is the difference between lower case and upper case characters:
231+
// diff = 'a'-'A' = 0x20
231232

232233
exprt a1=res.axiom_for_has_same_length_as(str);
233234
axioms.push_back(a1);
234235

235236
symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type);
236-
exprt is_upper_case=and_exprt(
237+
exprt::operandst upper_case;
238+
// Characters between 'A' and 'Z' are upper-case
239+
upper_case.push_back(and_exprt(
237240
binary_relation_exprt(char_A, ID_le, str[idx]),
238-
binary_relation_exprt(str[idx], ID_le, char_Z));
239-
minus_exprt diff(char_a, char_A);
240-
equal_exprt convert(res[idx], plus_exprt(str[idx], diff));
241-
string_constraintt a2(idx, res.length(), is_upper_case, convert);
241+
binary_relation_exprt(str[idx], ID_le, char_Z)));
242+
243+
// Characters between 0xc0 (latin capital A with grave)
244+
// and 0xd6 (latin capital O with diaeresis) are upper-case
245+
upper_case.push_back(and_exprt(
246+
binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
247+
binary_relation_exprt(str[idx], ID_le, from_integer(0xd6, char_type))));
248+
249+
// Characters between 0xd8 (latin capital O with stroke)
250+
// and 0xde (latin capital thorn) are upper-case
251+
upper_case.push_back(and_exprt(
252+
binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
253+
binary_relation_exprt(str[idx], ID_le, from_integer(0xde, char_type))));
254+
255+
exprt is_upper_case=disjunction(upper_case);
256+
257+
// The difference between upper-case and lower-case for the basic latin and
258+
// latin-1 supplement is 0x20.
259+
exprt diff=from_integer(0x20, char_type);
260+
equal_exprt converted(res[idx], plus_exprt(str[idx], diff));
261+
and_exprt non_converted(
262+
equal_exprt(res[idx], str[idx]),
263+
binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
264+
if_exprt conditional_convert(is_upper_case, converted, non_converted);
265+
266+
string_constraintt a2(idx, res.length(), conditional_convert);
242267
axioms.push_back(a2);
243268

244-
equal_exprt eq(res[idx], str[idx]);
245-
string_constraintt a3(idx, res.length(), not_exprt(is_upper_case), eq);
246-
axioms.push_back(a3);
247269
return res;
248270
}
249271

0 commit comments

Comments
 (0)