@@ -279,6 +279,14 @@ static exprt is_upper_case(const exprt &character)
279
279
return disjunction (upper_case);
280
280
}
281
281
282
+ // / Expression which is true for lower_case characters of the Basic Latin and
283
+ // / Latin-1 supplement of unicode.
284
+ static exprt is_lower_case (const exprt &character)
285
+ {
286
+ return is_upper_case (
287
+ minus_exprt (character, from_integer (0x20 , character.type ())));
288
+ }
289
+
282
290
// / Add axioms ensuring `res` corresponds to `str` in which uppercase characters
283
291
// / have been converted to lowercase.
284
292
// / These axioms are:
@@ -323,19 +331,14 @@ exprt string_constraint_generatort::add_axioms_for_to_lower_case(
323
331
}
324
332
325
333
// / Add axioms ensuring `res` corresponds to `str` in which lowercase characters
326
- // / have been converted to uppercase.
334
+ // / of Basic Latin and Latin-1 supplement of unicode have been converted to
335
+ // / uppercase.
327
336
// /
328
337
// / These axioms are:
329
- // / 1. \f$ |{\tt res}| = |{\tt str}| \f$
330
- // / 2. \f$ \forall i<|{\tt str}|.\ 'a'\le {\tt str}[i]\le 'z'
331
- // / \Rightarrow {\tt res}[i]={\tt str}[i]+'A'-'a' \f$
332
- // / 3. \f$ \forall i<|{\tt str}|.\ \lnot ('a'\le {\tt str}[i] \le 'z')
333
- // / \Rightarrow {\tt res}[i]={\tt str}[i] \f$
334
- // / Note that index expressions are only allowed in the body of universal
335
- // / axioms, so we use a trivial premise and push our premise into the body.
338
+ // / 1. res.length = str.length
339
+ // / 2. forall i < str.length.
340
+ // / is_lower_case(str[i]) ? res[i] = str[i] - 0x20 : res[i] = str[i]
336
341
// /
337
- // / \todo We can reduce the number of constraints by merging
338
- // / constraints 2 and 3.
339
342
// / \param res: array of characters expression
340
343
// / \param str: array of characters expression
341
344
// / \return integer expression which is different from `0` when there is an
@@ -350,31 +353,17 @@ exprt string_constraint_generatort::add_axioms_for_to_upper_case(
350
353
exprt char_A=constant_char (' A' , char_type);
351
354
exprt char_z=constant_char (' z' , char_type);
352
355
353
- // \todo Add support for locales using case mapping information
354
- // from the UnicodeData file.
355
-
356
356
lemmas.push_back (equal_exprt (res.length (), str.length ()));
357
357
358
358
constraints.push_back ([&] {
359
- const symbol_exprt idx1 = fresh_univ_index (" QA_upper_case1" , index_type);
360
- const exprt is_lower_case = and_exprt (
361
- binary_relation_exprt (char_a, ID_le, str[idx1]),
362
- binary_relation_exprt (str[idx1], ID_le, char_z));
363
- const exprt diff = minus_exprt (char_A, char_a);
364
- const exprt convert = equal_exprt (res[idx1], plus_exprt (str[idx1], diff));
365
- const exprt body = implies_exprt (is_lower_case, convert);
366
- return string_constraintt (idx1, zero_if_negative (res.length ()), body);
367
- }());
368
-
369
- constraints.push_back ([&] {
370
- const symbol_exprt idx2 = fresh_univ_index (" QA_upper_case2" , index_type);
371
- const exprt is_not_lower_case = not_exprt (
372
- and_exprt (
373
- binary_relation_exprt (char_a, ID_le, str[idx2]),
374
- binary_relation_exprt (str[idx2], ID_le, char_z)));
375
- const exprt eq = equal_exprt (res[idx2], str[idx2]);
376
- const exprt body2 = implies_exprt (is_not_lower_case, eq);
377
- return string_constraintt (idx2, zero_if_negative (res.length ()), body2);
359
+ const symbol_exprt idx = fresh_univ_index (" QA_upper_case" , index_type);
360
+ const exprt converted =
361
+ minus_exprt (str[idx], from_integer (0x20 , char_type));
362
+ return string_constraintt (
363
+ idx,
364
+ zero_if_negative (res.length ()),
365
+ equal_exprt (
366
+ res[idx], if_exprt (is_lower_case (str[idx]), converted, str[idx])));
378
367
}());
379
368
380
369
return from_integer (0 , get_return_code_type ());
0 commit comments