diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 12e931ab3e2..cdc8b3a3db2 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -51,6 +51,16 @@ bool value_sett::field_sensitive( return ns.follow(type).id()==ID_struct; } +const value_sett::entryt *value_sett::find_entry(const value_sett::idt &id) + const +{ + auto found = values.find(id); + if(found != values.end()) + return &found->second; + else + return base_value_set ? base_value_set->find_entry(id) : nullptr; +} + value_sett::entryt &value_sett::get_entry( const entryt &e, const typet &type, @@ -66,6 +76,14 @@ value_sett::entryt &value_sett::get_entry( std::pair r= values.insert(std::pair(index, e)); + if(base_value_set && r.second) + { + // New entry; copy the existing base entry if found. + const entryt *base_entry = base_value_set->find_entry(index); + if(base_entry) + r.first->second = *base_entry; + } + return r.first->second; } @@ -181,6 +199,12 @@ void value_sett::output( out << " } \n"; } + + if(base_value_set) + { + out << "NB. the base value-set (" << base_value_set << ") has a further " + << base_value_set->size() << " entries\n"; + } } exprt value_sett::to_expr(const object_map_dt::value_type &it) const @@ -205,6 +229,10 @@ exprt value_sett::to_expr(const object_map_dt::value_type &it) const bool value_sett::make_union(const value_sett::valuest &new_values) { + // Note this method makes no reference to base_value_set -- we assume that + // we share the same base, if any, with new_values. This is enforced in the + // make_union(const value_sett &) overload. + bool result=false; valuest::iterator v_it=values.begin(); @@ -429,13 +457,11 @@ void value_sett::get_value_set_rec( expr_type.id()==ID_array) { // look it up - valuest::const_iterator v_it= - values.find(id2string(identifier)+suffix); + const entryt *entry = + find_entry(id2string(identifier) + suffix); // try first component name as suffix if not yet found - if(v_it==values.end() && - (expr_type.id()==ID_struct || - expr_type.id()==ID_union)) + if(!entry && (expr_type.id() == ID_struct || expr_type.id() == ID_union)) { const struct_union_typet &struct_union_type= to_struct_union_type(expr_type); @@ -443,17 +469,17 @@ void value_sett::get_value_set_rec( const irep_idt &first_component_name = struct_union_type.components().front().get_name(); - v_it = values.find( + entry = find_entry( id2string(identifier) + "." + id2string(first_component_name) + suffix); } // not found? try without suffix - if(v_it==values.end()) - v_it=values.find(identifier); + if(!entry) + entry = find_entry(identifier); - if(v_it!=values.end()) - make_union(dest, v_it->second.object_map); + if(entry) + make_union(dest, entry->object_map); else insert(dest, exprt(ID_unknown, original_type)); } @@ -857,16 +883,16 @@ void value_sett::get_value_set_rec( const std::string full_name=prefix+suffix; // look it up - valuest::const_iterator v_it=values.find(full_name); + const entryt *entry = find_entry(full_name); // not found? try without suffix - if(v_it==values.end()) - v_it=values.find(prefix); + if(!entry) + entry = find_entry(prefix); - if(v_it==values.end()) + if(!entry) insert(dest, exprt(ID_unknown, original_type)); else - make_union(dest, v_it->second.object_map); + make_union(dest, entry->object_map); } else if(expr.id()==ID_byte_extract_little_endian || expr.id()==ID_byte_extract_big_endian) @@ -1267,86 +1293,6 @@ void value_sett::assign( } } -void value_sett::do_free( - const exprt &op, - const namespacet &ns) -{ - // op must be a pointer - if(op.type().id()!=ID_pointer) - throw "free expected to have pointer-type operand"; - - // find out what it points to - object_mapt value_set; - get_value_set(op, value_set, ns, false); - - const object_map_dt &object_map=value_set.read(); - - // find out which *instances* interest us - dynamic_object_id_sett to_mark; - - for(object_map_dt::const_iterator - it=object_map.begin(); - it!=object_map.end(); - it++) - { - const exprt &object=object_numbering[it->first]; - - if(object.id()==ID_dynamic_object) - { - const dynamic_object_exprt &dynamic_object= - to_dynamic_object_expr(object); - - if(dynamic_object.valid().is_true()) - to_mark.insert(dynamic_object.get_instance()); - } - } - - // mark these as 'may be invalid' - // this, unfortunately, destroys the sharing - for(valuest::iterator v_it=values.begin(); - v_it!=values.end(); - v_it++) - { - object_mapt new_object_map; - - const object_map_dt &old_object_map= - v_it->second.object_map.read(); - - bool changed=false; - - for(object_map_dt::const_iterator - o_it=old_object_map.begin(); - o_it!=old_object_map.end(); - o_it++) - { - const exprt &object=object_numbering[o_it->first]; - - if(object.id()==ID_dynamic_object) - { - const dynamic_object_exprt &dynamic_object= - to_dynamic_object_expr(object); - - if(to_mark.count(dynamic_object.get_instance())==0) - set(new_object_map, *o_it); - else - { - // adjust - offsett o = o_it->second; - exprt tmp(object); - to_dynamic_object_expr(tmp).valid()=exprt(ID_unknown); - insert(new_object_map, tmp, o); - changed=true; - } - } - else - set(new_object_map, *o_it); - } - - if(changed) - v_it->second.object_map=new_object_map; - } -} - void value_sett::assign_rec( const exprt &lhs, const object_mapt &values_rhs, @@ -1603,15 +1549,6 @@ void value_sett::apply_code_rec( { // does nothing } - else if(statement==ID_free) - { - // this may kill a valid bit - - if(code.operands().size()!=1) - throw "free expected to have one operand"; - - do_free(code.op0(), ns); - } else if(statement=="lock" || statement=="unlock") { // ignore for now @@ -1750,3 +1687,13 @@ exprt value_sett::make_member( return member_expr; } + +void value_sett::set_base_value_set(const value_sett *base) +{ + // Simplifying assumptions -- this way we don't need to check for clashing + // keys, for example: + INVARIANT(size() == 0, "base_value_set cannot be set for a non-empty set"); + INVARIANT(!base_value_set, "base_value_set cannot be reset"); + + base_value_set = base; +} diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index c12d33bfa79..d86663a5d2e 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -38,10 +38,21 @@ class namespacet; /// pairs of an `exprt` and an optional offset if known (0 for both dynamic /// objects in the example given above). RHS expressions are represented using /// numbering to avoid storing redundant duplicate expressions. +/// +/// A value_sett may have a base (stored in field base_value_set), in which case +/// any lookup that fails in this value-set will check the base, and any write +/// to this value-set will copy any entry in the base into this value-set before +/// performing the write (i.e. we act as a copy-on-write layer on top of the +/// base). This makes copying or union'ing value-sets with a common base +/// cheaper -- the intended use case is for faster local analysis on top of a +/// large, expensive-to-copy global context. value_sett::make_union requires +/// that the value-sets being merged have the same base (or both have no base); +/// it is up to the user to make sure this is true. + class value_sett { public: - value_sett():location_number(0) + value_sett():location_number(0), base_value_set(nullptr) { } @@ -317,6 +328,15 @@ class value_sett values.clear(); } + /// Finds an entry in this value-set. The interface differs from get_entry + /// because get_value_set_rec wants to check for a struct's first component + /// before stripping the suffix as is done in get_entry. + /// \param id: identifier to find. + /// \return a constant pointer to an entry if found, or null otherwise. + /// Note the pointer may be invalidated by insert operations, including + /// get_entry. + const entryt *find_entry(const idt &id) const; + /// Gets or inserts an entry in this value-set. /// \param e: entry to find. Its `id` and `suffix` fields will be used /// to find a corresponding entry; if a fresh entry is created its @@ -331,6 +351,12 @@ class value_sett const entryt &e, const typet &type, const namespacet &ns); + /// Gets the number of entries in this value-set. + valuest::size_type size() const + { + return values.size(); + } + /// Pretty-print this value-set /// \param ns: global namespace /// \param [out] out: stream to write to @@ -357,6 +383,9 @@ class value_sett /// \return true if anything changed. bool make_union(const value_sett &new_values) { + INVARIANT( + new_values.base_value_set == base_value_set, + "Unioned value-sets must have the same base"); return make_union(new_values.values); } @@ -450,6 +479,11 @@ class value_sett exprt &expr, const namespacet &ns) const; + /// Set the base value-set, to which failing queries fall through. Note for + /// simplicity's sake, at the moment this value-set must be empty when this + /// is called, and the base value-set cannot be reset later. + void set_base_value_set(const value_sett *base); + protected: /// Reads the set of objects pointed to by `expr`, including making /// recursive lookups for dereference operations etc. @@ -484,13 +518,6 @@ class value_sett const exprt &src, exprt &dest) const; - /// Marks objects that may be pointed to by `op` as maybe-invalid - /// \param op: pointer to invalidate - /// \param ns: global namespace - void do_free( - const exprt &op, - const namespacet &ns); - /// Extracts a member from a struct- or union-typed expression. /// Usually that means making a `member_exprt`, but this can shortcut /// extracting members from constants or with-expressions. @@ -548,6 +575,8 @@ class value_sett const namespacet &) { } + + const value_sett *base_value_set; }; #endif // CPROVER_POINTER_ANALYSIS_VALUE_SET_H diff --git a/unit/pointer-analysis/value_set.cpp b/unit/pointer-analysis/value_set.cpp index 7c7b4a7e2d6..2d97a16f0c7 100644 --- a/unit/pointer-analysis/value_set.cpp +++ b/unit/pointer-analysis/value_set.cpp @@ -311,4 +311,91 @@ SCENARIO( } } } + + GIVEN("A base value-set") + { + value_sett base_value_set; + + // Create int i; int *ptr; + + signedbv_typet int32_type(32); + + symbolt i; + i.name = "i"; + i.base_name = "i"; + i.pretty_name = "i"; + i.type = int32_type; + i.is_static_lifetime = true; + symbol_table.add(i); + + symbolt ptr; + ptr.name = "ptr"; + ptr.base_name = "ptr"; + ptr.pretty_name = "ptr"; + ptr.type = pointer_typet(int32_type, 64); + ptr.is_static_lifetime = true; + symbol_table.add(ptr); + + // Assign ptr = &i (in the base value-set): + + code_assignt assign_ptr( + ptr.symbol_expr(), address_of_exprt(i.symbol_expr())); + base_value_set.apply_code(assign_ptr, ns); + + value_set.set_base_value_set(&base_value_set); + + WHEN("Querying ptr via the base value-set") + { + value_setst::valuest result; + base_value_set.get_value_set(ptr.symbol_expr(), result, ns); + + THEN("The answer should be { &i }") + { + REQUIRE(result.size() == 1); + REQUIRE(object_descriptor_matches(*result.begin(), i.symbol_expr())); + } + + WHEN("Querying ptr via the child value-set") + { + value_setst::valuest child_result; + value_set.get_value_set(ptr.symbol_expr(), child_result, ns); + + THEN("The answer should also be { &i }") + { + REQUIRE(result == child_result); + } + } + + WHEN("ptr is weakly updated in the child value-set") + { + null_pointer_exprt null_pointer(to_pointer_type(ptr.type)); + + // Final 'true' parameter makes this a weak write: + value_set.assign(ptr.symbol_expr(), null_pointer, ns, false, true); + + WHEN("ptr is queried again") + { + value_setst::valuest requery_result; + value_set.get_value_set(ptr.symbol_expr(), requery_result, ns); + + THEN("The answer should be { null, &i }") + { + REQUIRE(requery_result.size() == 2); + + bool found_i = false, found_null = false; + for(const exprt &result : requery_result) + { + if(object_descriptor_matches(result, i.symbol_expr())) + found_i = true; + else if(object_descriptor_matches(result, null_pointer)) + found_null = true; + } + + REQUIRE(found_i); + REQUIRE(found_null); + } + } + } + } + } }