Skip to content

Commit 9701c54

Browse files
authored
Merge pull request #4777 from danpoe/feature/simplify-expression-char-at
Expression simplification for charAt()
2 parents 8da81c5 + bffa2bc commit 9701c54

File tree

5 files changed

+127
-47
lines changed

5 files changed

+127
-47
lines changed
Binary file not shown.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
public class Main {
2+
public void constantCharAt() {
3+
StringBuilder sb = new StringBuilder("abc");
4+
char c = sb.charAt(1);
5+
assert c == 'b';
6+
}
7+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
Main.class
3+
--function Main.constantCharAt --show-vcc
4+
^Generated [0-9]+ VCC\(s\), 0 remaining after simplification$
5+
^EXIT=0$
6+
^SIGNAL=0$
7+
--
8+
--

src/util/simplify_expr.cpp

Lines changed: 93 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -181,67 +181,34 @@ bool simplify_exprt::simplify_function_application(exprt &expr)
181181
auto &first_argument = to_string_expr(function_app.arguments().at(0));
182182
auto &second_argument = to_string_expr(function_app.arguments().at(1));
183183

184-
if(
185-
first_argument.content().id() != ID_address_of ||
186-
second_argument.content().id() != ID_address_of)
184+
const auto first_value_opt = try_get_string_data_array(first_argument, ns);
185+
186+
if(!first_value_opt)
187187
{
188188
return true;
189189
}
190190

191-
mp_integer offset_int = 0;
192-
if(function_app.arguments().size() == 3)
193-
{
194-
auto &offset = function_app.arguments()[2];
195-
if(offset.id() != ID_constant)
196-
return true;
197-
offset_int = numeric_cast_v<mp_integer>(to_constant_expr(offset));
198-
}
191+
const array_exprt &first_value = first_value_opt->get();
199192

200-
const address_of_exprt &first_address_of =
201-
to_address_of_expr(first_argument.content());
202-
const address_of_exprt &second_address_of =
203-
to_address_of_expr(second_argument.content());
193+
const auto second_value_opt =
194+
try_get_string_data_array(second_argument, ns);
204195

205-
// Constants are got via address_of expressions, so this helps filter out
206-
// things that are non-constant.
207-
if(
208-
first_address_of.object().id() != ID_index ||
209-
second_address_of.object().id() != ID_index)
196+
if(!second_value_opt)
210197
{
211198
return true;
212199
}
213200

214-
const index_exprt &first_index_expression =
215-
to_index_expr(first_address_of.object());
216-
const index_exprt &second_index_expression =
217-
to_index_expr(second_address_of.object());
218-
219-
const exprt &first_string_array = first_index_expression.array();
220-
const exprt &second_string_array = second_index_expression.array();
201+
const array_exprt &second_value = second_value_opt->get();
221202

222-
// Check if our symbols type is an array.
223-
if(
224-
first_string_array.type().id() != ID_array ||
225-
second_string_array.type().id() != ID_array)
203+
mp_integer offset_int = 0;
204+
if(function_app.arguments().size() == 3)
226205
{
227-
return true;
206+
auto &offset = function_app.arguments()[2];
207+
if(offset.id() != ID_constant)
208+
return true;
209+
offset_int = numeric_cast_v<mp_integer>(to_constant_expr(offset));
228210
}
229211

230-
// get their initial values from the symbol table
231-
const symbolt *symbol_ptr = nullptr;
232-
if(ns.lookup(
233-
to_symbol_expr(first_string_array).get_identifier(), symbol_ptr))
234-
return true;
235-
const exprt &first_value = symbol_ptr->value;
236-
if(ns.lookup(
237-
to_symbol_expr(second_string_array).get_identifier(), symbol_ptr))
238-
return true;
239-
const exprt &second_value = symbol_ptr->value;
240-
241-
// only compare if both are arrays
242-
if(first_value.id() != ID_array || second_value.id() != ID_array)
243-
return true;
244-
245212
// test whether second_value is a prefix of first_value
246213
bool is_prefix =
247214
offset_int >= 0 && mp_integer(first_value.operands().size()) >=
@@ -268,6 +235,45 @@ bool simplify_exprt::simplify_function_application(exprt &expr)
268235
expr = from_integer(is_prefix ? 1 : 0, expr.type());
269236
return false;
270237
}
238+
else if(func_id == ID_cprover_string_char_at_func)
239+
{
240+
if(function_app.arguments().at(1).id() != ID_constant)
241+
{
242+
return true;
243+
}
244+
245+
const auto &index = to_constant_expr(function_app.arguments().at(1));
246+
247+
const refined_string_exprt &s =
248+
to_string_expr(function_app.arguments().at(0));
249+
250+
const auto char_seq_opt = try_get_string_data_array(s, ns);
251+
252+
if(!char_seq_opt)
253+
{
254+
return true;
255+
}
256+
257+
const array_exprt &char_seq = char_seq_opt->get();
258+
259+
const auto i_opt = numeric_cast<std::size_t>(index);
260+
261+
if(!i_opt || *i_opt >= char_seq.operands().size())
262+
{
263+
return true;
264+
}
265+
266+
const auto &c = to_constant_expr(char_seq.operands().at(*i_opt));
267+
268+
if(c.type() != expr.type())
269+
{
270+
return true;
271+
}
272+
273+
expr = c;
274+
275+
return false;
276+
}
271277

272278
return true;
273279
}
@@ -1826,6 +1832,46 @@ optionalt<std::string> simplify_exprt::expr2bits(
18261832
return {};
18271833
}
18281834

1835+
optionalt<std::reference_wrapper<const array_exprt>>
1836+
simplify_exprt::try_get_string_data_array(
1837+
const refined_string_exprt &s,
1838+
const namespacet &ns)
1839+
{
1840+
if(s.content().id() != ID_address_of)
1841+
{
1842+
return {};
1843+
}
1844+
1845+
const auto &content = to_address_of_expr(s.content());
1846+
1847+
if(content.object().id() != ID_index)
1848+
{
1849+
return {};
1850+
}
1851+
1852+
const auto &array_start = to_index_expr(content.object());
1853+
1854+
if(array_start.array().id() != ID_symbol ||
1855+
array_start.array().type().id() != ID_array)
1856+
{
1857+
return {};
1858+
}
1859+
1860+
const auto &array = to_symbol_expr(array_start.array());
1861+
1862+
const symbolt *symbol_ptr = nullptr;
1863+
1864+
if(ns.lookup(array.get_identifier(), symbol_ptr) ||
1865+
symbol_ptr->value.id() != ID_array)
1866+
{
1867+
return {};
1868+
}
1869+
1870+
const auto &char_seq = to_array_expr(symbol_ptr->value);
1871+
1872+
return optionalt<std::reference_wrapper<const array_exprt>>(char_seq);
1873+
}
1874+
18291875
bool simplify_exprt::simplify_byte_extract(byte_extract_exprt &expr)
18301876
{
18311877
// lift up any ID_if on the object

src/util/simplify_expr_class.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Author: Daniel Kroening, [email protected]
2525
#include "replace_expr.h"
2626
#endif
2727

28+
class array_exprt;
2829
class bswap_exprt;
2930
class byte_extract_exprt;
3031
class byte_update_exprt;
@@ -35,6 +36,7 @@ class index_exprt;
3536
class member_exprt;
3637
class namespacet;
3738
class popcount_exprt;
39+
class refined_string_exprt;
3840
class tvt;
3941
class typecast_exprt;
4042

@@ -215,6 +217,23 @@ class simplify_exprt
215217
#ifdef USE_LOCAL_REPLACE_MAP
216218
replace_mapt local_replace_map;
217219
#endif
220+
221+
/// Get char sequence from refined string expression
222+
///
223+
/// If `s.content()` is of the form `&id[e]`, where `id` is an array-typed
224+
/// symbol expression (and `e` is any expression), return the value of the
225+
/// symbol `id` (as given by the `value` field of the symbol in the namespace
226+
/// `ns`); otherwise return an empty optional.
227+
///
228+
/// \param s: refined string expression
229+
/// \param ns: namespace
230+
/// \return array expression representing the char sequence which forms the
231+
/// content of the refined string expression, empty optional if the content
232+
/// cannot be determined
233+
static optionalt<std::reference_wrapper<const array_exprt>>
234+
try_get_string_data_array(
235+
const refined_string_exprt &s,
236+
const namespacet &ns);
218237
};
219238

220239
#endif // CPROVER_UTIL_SIMPLIFY_EXPR_CLASS_H

0 commit comments

Comments
 (0)