Skip to content

Commit 0e356c2

Browse files
authored
Merge pull request #4898 from danpoe/refactor/string-simplification
Factor out charAt() and startsWith() simplification into separate functions
2 parents c6c0eff + ca203b9 commit 0e356c2

File tree

1 file changed

+110
-86
lines changed

1 file changed

+110
-86
lines changed

src/util/simplify_expr.cpp

Lines changed: 110 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -366,76 +366,132 @@ static simplify_exprt::resultt<> simplify_string_index_of(
366366
}
367367
}
368368

369-
simplify_exprt::resultt<> simplify_exprt::simplify_function_application(
370-
const function_application_exprt &expr)
369+
/// Simplify String.charAt function when arguments are constant
370+
///
371+
/// \param expr: the expression to simplify
372+
/// \param ns: namespace
373+
/// \return: the modified expression or an unchanged expression
374+
static simplify_exprt::resultt<> simplify_string_char_at(
375+
const function_application_exprt &expr,
376+
const namespacet &ns)
371377
{
372-
if(expr.function().id() != ID_symbol)
373-
return unchanged(expr);
378+
if(expr.arguments().at(1).id() != ID_constant)
379+
{
380+
return simplify_exprt::unchanged(expr);
381+
}
374382

375-
const irep_idt &func_id = to_symbol_expr(expr.function()).get_identifier();
383+
const auto &index = to_constant_expr(expr.arguments().at(1));
376384

377-
// Starts-with is used for .equals.
378-
if(func_id == ID_cprover_string_startswith_func)
385+
const refined_string_exprt &s = to_string_expr(expr.arguments().at(0));
386+
387+
const auto char_seq_opt = try_get_string_data_array(s, ns);
388+
389+
if(!char_seq_opt)
379390
{
380-
// We want to get both arguments of any starts-with comparison, and
381-
// trace them back to the actual string instance. All variables on the
382-
// way must be constant for us to be sure this will work.
383-
auto &first_argument = to_string_expr(expr.arguments().at(0));
384-
auto &second_argument = to_string_expr(expr.arguments().at(1));
391+
return simplify_exprt::unchanged(expr);
392+
}
385393

386-
const auto first_value_opt = try_get_string_data_array(first_argument, ns);
394+
const array_exprt &char_seq = char_seq_opt->get();
387395

388-
if(!first_value_opt)
389-
{
390-
return unchanged(expr);
391-
}
396+
const auto i_opt = numeric_cast<std::size_t>(index);
392397

393-
const array_exprt &first_value = first_value_opt->get();
398+
if(!i_opt || *i_opt >= char_seq.operands().size())
399+
{
400+
return simplify_exprt::unchanged(expr);
401+
}
394402

395-
const auto second_value_opt =
396-
try_get_string_data_array(second_argument, ns);
403+
const auto &c = to_constant_expr(char_seq.operands().at(*i_opt));
397404

398-
if(!second_value_opt)
399-
{
400-
return unchanged(expr);
401-
}
405+
if(c.type() != expr.type())
406+
{
407+
return simplify_exprt::unchanged(expr);
408+
}
402409

403-
const array_exprt &second_value = second_value_opt->get();
410+
return c;
411+
}
404412

405-
mp_integer offset_int = 0;
406-
if(expr.arguments().size() == 3)
407-
{
408-
auto &offset = expr.arguments()[2];
409-
if(offset.id() != ID_constant)
410-
return unchanged(expr);
411-
offset_int = numeric_cast_v<mp_integer>(to_constant_expr(offset));
412-
}
413+
/// Simplify String.startsWith function when arguments are constant
414+
///
415+
/// \param expr: the expression to simplify
416+
/// \param ns: namespace
417+
/// \return: the modified expression or an unchanged expression
418+
static simplify_exprt::resultt<> simplify_string_startswith(
419+
const function_application_exprt &expr,
420+
const namespacet &ns)
421+
{
422+
// We want to get both arguments of any starts-with comparison, and
423+
// trace them back to the actual string instance. All variables on the
424+
// way must be constant for us to be sure this will work.
425+
auto &first_argument = to_string_expr(expr.arguments().at(0));
426+
auto &second_argument = to_string_expr(expr.arguments().at(1));
427+
428+
const auto first_value_opt = try_get_string_data_array(first_argument, ns);
413429

414-
// test whether second_value is a prefix of first_value
415-
bool is_prefix =
416-
offset_int >= 0 && mp_integer(first_value.operands().size()) >=
417-
offset_int + second_value.operands().size();
418-
if(is_prefix)
430+
if(!first_value_opt)
431+
{
432+
return simplify_exprt::unchanged(expr);
433+
}
434+
435+
const array_exprt &first_value = first_value_opt->get();
436+
437+
const auto second_value_opt = try_get_string_data_array(second_argument, ns);
438+
439+
if(!second_value_opt)
440+
{
441+
return simplify_exprt::unchanged(expr);
442+
}
443+
444+
const array_exprt &second_value = second_value_opt->get();
445+
446+
mp_integer offset_int = 0;
447+
if(expr.arguments().size() == 3)
448+
{
449+
auto &offset = expr.arguments()[2];
450+
if(offset.id() != ID_constant)
451+
return simplify_exprt::unchanged(expr);
452+
offset_int = numeric_cast_v<mp_integer>(to_constant_expr(offset));
453+
}
454+
455+
// test whether second_value is a prefix of first_value
456+
bool is_prefix =
457+
offset_int >= 0 && mp_integer(first_value.operands().size()) >=
458+
offset_int + second_value.operands().size();
459+
if(is_prefix)
460+
{
461+
exprt::operandst::const_iterator second_it =
462+
second_value.operands().begin();
463+
for(const auto &first_op : first_value.operands())
419464
{
420-
exprt::operandst::const_iterator second_it =
421-
second_value.operands().begin();
422-
for(const auto &first_op : first_value.operands())
465+
if(offset_int > 0)
466+
--offset_int;
467+
else if(second_it == second_value.operands().end())
468+
break;
469+
else if(first_op != *second_it)
423470
{
424-
if(offset_int > 0)
425-
--offset_int;
426-
else if(second_it == second_value.operands().end())
427-
break;
428-
else if(first_op != *second_it)
429-
{
430-
is_prefix = false;
431-
break;
432-
}
433-
else
434-
++second_it;
471+
is_prefix = false;
472+
break;
435473
}
474+
else
475+
++second_it;
436476
}
477+
}
437478

438-
return from_integer(is_prefix ? 1 : 0, expr.type());
479+
return from_integer(is_prefix ? 1 : 0, expr.type());
480+
}
481+
482+
simplify_exprt::resultt<> simplify_exprt::simplify_function_application(
483+
const function_application_exprt &expr)
484+
{
485+
if(expr.function().id() != ID_symbol)
486+
return unchanged(expr);
487+
488+
const irep_idt &func_id = to_symbol_expr(expr.function()).get_identifier();
489+
490+
// String.startsWith() is used to implement String.equals() in the models
491+
// library
492+
if(func_id == ID_cprover_string_startswith_func)
493+
{
494+
return simplify_string_startswith(expr, ns);
439495
}
440496
else if(func_id == ID_cprover_string_endswith_func)
441497
{
@@ -455,39 +511,7 @@ simplify_exprt::resultt<> simplify_exprt::simplify_function_application(
455511
}
456512
else if(func_id == ID_cprover_string_char_at_func)
457513
{
458-
if(expr.arguments().at(1).id() != ID_constant)
459-
{
460-
return unchanged(expr);
461-
}
462-
463-
const auto &index = to_constant_expr(expr.arguments().at(1));
464-
465-
const refined_string_exprt &s = to_string_expr(expr.arguments().at(0));
466-
467-
const auto char_seq_opt = try_get_string_data_array(s, ns);
468-
469-
if(!char_seq_opt)
470-
{
471-
return unchanged(expr);
472-
}
473-
474-
const array_exprt &char_seq = char_seq_opt->get();
475-
476-
const auto i_opt = numeric_cast<std::size_t>(index);
477-
478-
if(!i_opt || *i_opt >= char_seq.operands().size())
479-
{
480-
return unchanged(expr);
481-
}
482-
483-
const auto &c = to_constant_expr(char_seq.operands().at(*i_opt));
484-
485-
if(c.type() != expr.type())
486-
{
487-
return unchanged(expr);
488-
}
489-
490-
return c;
514+
return simplify_string_char_at(expr, ns);
491515
}
492516

493517
return unchanged(expr);

0 commit comments

Comments
 (0)