@@ -214,36 +214,58 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case(
214
214
const typet &char_type=ref_type.get_char_type ();
215
215
const typet &index_type=ref_type.get_index_type ();
216
216
string_exprt res=fresh_string (ref_type);
217
- exprt char_a=constant_char (' a' , char_type);
218
- exprt char_A=constant_char (' A' , char_type);
219
- exprt char_z=constant_char (' z' , char_type);
220
- exprt char_Z=constant_char (' Z' , char_type);
217
+ const exprt char_A=constant_char (' A' , char_type);
218
+ const exprt char_Z=constant_char (' Z' , char_type);
221
219
222
- // TODO: add support for locales using case mapping information
223
- // from the UnicodeData file.
220
+
221
+ // TODO: for now, only characters in Basic Latin and Latin-1 supplement
222
+ // are supported (up to 0x100), we should add others using case mapping
223
+ // information from the UnicodeData file.
224
224
225
225
// We add axioms:
226
226
// a1 : |res| = |str|
227
- // a2 : forall idx<str.length, 'A'<=str[idx]<='Z' => res[idx]=str[idx]+'a'-'A'
228
- // a3 : forall idx<str.length, !('a'<=str[idx]<='z') => res[idx]=str[idx]
229
- // forall idx<str.length,
230
- // this[idx]='A'<=str[idx]<='Z' ? str[idx]+'a'-'A' : str[idx]
227
+ // a2 : forall idx<str.length,
228
+ // is_upper_case(str[idx])?
229
+ // res[idx]=str[idx]+diff : res[idx]=str[idx]<0x100
230
+ // where diff is the difference between lower case and upper case characters:
231
+ // diff = 'a'-'A' = 0x20
231
232
232
233
exprt a1=res.axiom_for_has_same_length_as (str);
233
234
axioms.push_back (a1);
234
235
235
236
symbol_exprt idx=fresh_univ_index (" QA_lower_case" , index_type);
236
- exprt is_upper_case=and_exprt (
237
+ exprt::operandst upper_case;
238
+ // Characters between 'A' and 'Z' are upper-case
239
+ upper_case.push_back (and_exprt (
237
240
binary_relation_exprt (char_A, ID_le, str[idx]),
238
- binary_relation_exprt (str[idx], ID_le, char_Z));
239
- minus_exprt diff (char_a, char_A);
240
- equal_exprt convert (res[idx], plus_exprt (str[idx], diff));
241
- string_constraintt a2 (idx, res.length (), is_upper_case, convert);
241
+ binary_relation_exprt (str[idx], ID_le, char_Z)));
242
+
243
+ // Characters between 0xc0 (latin capital A with grave)
244
+ // and 0xd6 (latin capital O with diaeresis) are upper-case
245
+ upper_case.push_back (and_exprt (
246
+ binary_relation_exprt (from_integer (0xc0 , char_type), ID_le, str[idx]),
247
+ binary_relation_exprt (str[idx], ID_le, from_integer (0xd6 , char_type))));
248
+
249
+ // Characters between 0xd8 (latin capital O with stroke)
250
+ // and 0xde (latin capital thorn) are upper-case
251
+ upper_case.push_back (and_exprt (
252
+ binary_relation_exprt (from_integer (0xd8 , char_type), ID_le, str[idx]),
253
+ binary_relation_exprt (str[idx], ID_le, from_integer (0xde , char_type))));
254
+
255
+ exprt is_upper_case=disjunction (upper_case);
256
+
257
+ // The difference between upper-case and lower-case for the basic latin and
258
+ // latin-1 supplement is 0x20.
259
+ exprt diff=from_integer (0x20 , char_type);
260
+ equal_exprt converted (res[idx], plus_exprt (str[idx], diff));
261
+ and_exprt non_converted (
262
+ equal_exprt (res[idx], str[idx]),
263
+ binary_relation_exprt (str[idx], ID_lt, from_integer (0x100 , char_type)));
264
+ if_exprt conditional_convert (is_upper_case, converted, non_converted);
265
+
266
+ string_constraintt a2 (idx, res.length (), conditional_convert);
242
267
axioms.push_back (a2);
243
268
244
- equal_exprt eq (res[idx], str[idx]);
245
- string_constraintt a3 (idx, res.length (), not_exprt (is_upper_case), eq);
246
- axioms.push_back (a3);
247
269
return res;
248
270
}
249
271
0 commit comments