@@ -216,34 +216,48 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case(
216
216
string_exprt res=fresh_string (ref_type);
217
217
exprt char_a=constant_char (' a' , char_type);
218
218
exprt char_A=constant_char (' A' , char_type);
219
- exprt char_z=constant_char (' z' , char_type);
220
- exprt char_Z=constant_char (' Z' , char_type);
221
219
222
- // TODO: add support for locales using case mapping information
223
- // from the UnicodeData file.
220
+ // TODO: for now, only characters in Basic Latin and Latin-1 supplement
221
+ // are supported (up to 0x100), we should add others using case mapping
222
+ // information from the UnicodeData file.
224
223
225
224
// We add axioms:
226
225
// a1 : |res| = |str|
227
- // a2 : forall idx<str.length, 'A'<=str[idx]<='Z' => res[idx]=str[idx]+'a'-'A'
228
- // a3 : forall idx<str.length, !('a'<=str[idx]<='z') => res[idx]=str[idx]
229
- // forall idx<str.length,
230
- // this[idx]='A'<=str[idx]<='Z' ? str[idx]+'a'-'A' : str[idx]
226
+ // a2 : forall idx<str.length,
227
+ // is_upper_case(str[idx])?
228
+ // res[idx]=str[idx]+'a'-'A' : res[idx]=str[idx]<0x100
231
229
232
230
exprt a1=res.axiom_for_has_same_length_as (str);
233
231
axioms.push_back (a1);
234
232
235
233
symbol_exprt idx=fresh_univ_index (" QA_lower_case" , index_type);
236
- exprt is_upper_case=and_exprt (
234
+ exprt::operandst upper_case;
235
+ upper_case.push_back (and_exprt (
237
236
binary_relation_exprt (char_A, ID_le, str[idx]),
238
- binary_relation_exprt (str[idx], ID_le, char_Z));
239
- minus_exprt diff (char_a, char_A);
240
- equal_exprt convert (res[idx], plus_exprt (str[idx], diff));
241
- string_constraintt a2 (idx, res.length (), is_upper_case, convert);
237
+ binary_relation_exprt (str[idx], ID_le, from_integer (' Z' , char_type))));
238
+
239
+ upper_case.push_back (and_exprt (
240
+ binary_relation_exprt (from_integer (0xc0 , char_type), ID_le, str[idx]),
241
+ binary_relation_exprt (str[idx], ID_le, from_integer (0xd6 , char_type))));
242
+
243
+ upper_case.push_back (and_exprt (
244
+ binary_relation_exprt (from_integer (0xd8 , char_type), ID_le, str[idx]),
245
+ binary_relation_exprt (str[idx], ID_le, from_integer (0xde , char_type))));
246
+
247
+ exprt is_upper_case=disjunction (upper_case);
248
+
249
+ // The difference between upper-case and lower-case for the basic latin and
250
+ // latin-1 supplement is 0x20.
251
+ exprt diff=from_integer (0x20 , char_type);
252
+ equal_exprt converted (res[idx], plus_exprt (str[idx], diff));
253
+ and_exprt non_converted (
254
+ equal_exprt (res[idx], str[idx]),
255
+ binary_relation_exprt (str[idx], ID_lt, from_integer (0x100 , char_type)));
256
+ if_exprt conditional_convert (is_upper_case, converted, non_converted);
257
+
258
+ string_constraintt a2 (idx, res.length (), conditional_convert);
242
259
axioms.push_back (a2);
243
260
244
- equal_exprt eq (res[idx], str[idx]);
245
- string_constraintt a3 (idx, res.length (), not_exprt (is_upper_case), eq);
246
- axioms.push_back (a3);
247
261
return res;
248
262
}
249
263
0 commit comments