Skip to content

Sharing map sharing statistics #2509

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 31, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 201 additions & 5 deletions src/util/sharing_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ Author: Daniel Poetzl
template <class keyT, class valueT, class hashT, class equalT> \
CV typename sharing_mapt<keyT, valueT, hashT, equalT>::ST \
sharing_mapt<keyT, valueT, hashT, equalT>

#define SHARING_MAPT3(T, CV, ST) \
template <class keyT, class valueT, class hashT, class equalT> \
template <class T> \
CV typename sharing_mapt<keyT, valueT, hashT, equalT>::ST \
sharing_mapt<keyT, valueT, hashT, equalT>
// clang-format on

// Note: Due to a bug in Visual Studio we need to add an additional "const"
Expand Down Expand Up @@ -124,11 +130,6 @@ template <class keyT,
class sharing_mapt
{
public:
friend void sharing_map_interface_test();
friend void sharing_map_copy_test();
friend void sharing_map_collision_test();
friend void sharing_map_view_test();

~sharing_mapt()
{
}
Expand Down Expand Up @@ -291,6 +292,35 @@ class sharing_mapt
delta_viewt &delta_view,
const bool only_common = true) const;

/// Stats about sharing between several sharing map instances. An instance of
/// this class is returned by the get_sharing_map_stats_* functions.
///
/// The num_nodes field gives the total number of nodes in the given maps.
/// Nodes that are part of n of the maps are counted n times.
///
/// The num_unique_nodes field gives the number of unique nodes in the given
/// maps. A node that is part of several of the maps is only counted once.
///
/// The num_leafs and num_unique_leafs fields are similar to the above but
/// only leafs are counted.
struct sharing_map_statst
{
std::size_t num_nodes = 0;
std::size_t num_unique_nodes = 0;
std::size_t num_leafs = 0;
std::size_t num_unique_leafs = 0;
};

template <class Iterator>
static sharing_map_statst get_sharing_stats(
Iterator begin,
Iterator end,
std::function<sharing_mapt &(const Iterator)> f =
[](const Iterator it) -> sharing_mapt & { return *it; });

template <class Iterator>
static sharing_map_statst get_sharing_stats_map(Iterator begin, Iterator end);

protected:
// helpers

Expand All @@ -317,6 +347,11 @@ class sharing_mapt
void gather_all(const baset &n, const unsigned depth, delta_viewt &delta_view)
const;

std::size_t count_unmarked_nodes(
bool leafs_only,
std::set<void *> &marked,
bool mark = true) const;

// dummy element returned when no element was found
static mapped_type dummy;

Expand Down Expand Up @@ -386,6 +421,167 @@ ::iterate(
while(!stack.empty());
}

SHARING_MAPT(std::size_t)
::count_unmarked_nodes(bool leafs_only, std::set<void *> &marked, bool mark)
const
{
if(empty())
return 0;

unsigned count = 0;

typedef std::pair<unsigned, const baset *> stack_itemt;

std::stack<stack_itemt> stack;
stack.push({0, &map});

do
{
const stack_itemt &si = stack.top();

const unsigned depth = si.first;
const baset *bp = si.second;

stack.pop();

// internal node or container node
const innert *ip = static_cast<const innert *>(bp);
const unsigned use_count = ip->data.use_count();
void *raw_ptr = ip->data.get();

if(use_count >= 2)
{
if(marked.find(raw_ptr) != marked.end())
{
continue;
}

if(mark)
{
marked.insert(raw_ptr);
}
}

if(!leafs_only)
{
count++;
}

if(depth < steps) // internal
{
const to_mapt &m = ip->get_to_map();
SM_ASSERT(!m.empty());

for(const auto &item : m)
{
const innert *i = &item.second;
stack.push({depth + 1, i});
}
}
else // container
{
SM_ASSERT(depth == steps);

const leaf_listt &ll = ip->get_container();
SM_ASSERT(!ll.empty());

for(const auto &l : ll)
{
const unsigned use_count = l.data.use_count();
void *raw_ptr = l.data.get();

if(use_count >= 2)
{
if(marked.find(raw_ptr) != marked.end())
{
continue;
}

if(mark)
{
marked.insert(raw_ptr);
}
}

count++;
}
}
} while(!stack.empty());

return count;
}

/// Get sharing stats
///
/// Complexity:
/// - Worst case: O(N * H * log(S))
/// - Best case: O(N + H)
///
/// \param begin: begin iterator
/// \param end: end iterator
/// \param f: function applied to the iterator to get a sharing map
/// \return: sharing stats
SHARING_MAPT3(Iterator, , sharing_map_statst)
::get_sharing_stats(
Iterator begin,
Iterator end,
std::function<sharing_mapt &(const Iterator)> f)
{
std::set<void *> marked;
sharing_map_statst sms;

// We do a separate pass over the tree for each statistic. This is not very
// efficient but the function is intended only for diagnosis purposes anyways.

// number of nodes
for(Iterator it = begin; it != end; it++)
{
sms.num_nodes += f(it).count_unmarked_nodes(false, marked, false);
}

SM_ASSERT(marked.empty());

// number of unique nodes
for(Iterator it = begin; it != end; it++)
{
sms.num_unique_nodes += f(it).count_unmarked_nodes(false, marked, true);
}

marked.clear();

// number of leafs
for(Iterator it = begin; it != end; it++)
{
sms.num_leafs += f(it).count_unmarked_nodes(true, marked, false);
}

SM_ASSERT(marked.empty());

// number of unique leafs
for(Iterator it = begin; it != end; it++)
{
sms.num_unique_leafs += f(it).count_unmarked_nodes(true, marked, true);
}

return sms;
}

/// Get sharing stats
///
/// Complexity:
/// - Worst case: O(N * H * log(S))
/// - Best case: O(N + H)
///
/// \param begin: begin iterator of a map
/// \param end: end iterator of a map
/// \return: sharing stats
SHARING_MAPT3(Iterator, , sharing_map_statst)
::get_sharing_stats_map(Iterator begin, Iterator end)
{
return get_sharing_stats<Iterator>(
begin, end, [](const Iterator it) -> sharing_mapt & { return it->second; });
}

/// Get a view of the elements in the map
/// A view is a list of pairs with the components being const references to the
/// keys and values in the map.
Expand Down
11 changes: 5 additions & 6 deletions src/util/sharing_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ SN_TYPE_PAR_DEF class sharing_node_innert : public sharing_node_baset

bool shares_with(const sharing_node_innert &other) const
{
SN_ASSERT(data && other.data);

return data == other.data;
}

Expand All @@ -151,8 +153,6 @@ SN_TYPE_PAR_DEF class sharing_node_innert : public sharing_node_baset

d_it &write_internal()
{
SN_ASSERT(data.use_count() > 0);

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any test that shows that this is wrong (and is now correct)?

Copy link
Contributor Author

@danpoe danpoe Jul 23, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code was already exercised by several tests, but I used the wrong macro names to activate the sharing map/node internal assertions in the unit tests. Thus, none of the internal assertions were enabled and we didn't see the assertion failures. This is corrected by the first commit in this PR.

if(data == empty_data)
{
data = make_shared_derived_u<SN_PTR_TYPE_ARGS>();
Expand All @@ -176,8 +176,6 @@ SN_TYPE_PAR_DEF class sharing_node_innert : public sharing_node_baset

d_ct &write_container()
{
SN_ASSERT(data.use_count() > 0);

if(data == empty_data)
{
data = make_shared_derived_v<SN_PTR_TYPE_ARGS>();
Expand Down Expand Up @@ -259,8 +257,9 @@ SN_TYPE_PAR_DEF class sharing_node_innert : public sharing_node_baset
leaft *place_leaf(const keyT &k, const valueT &v)
{
SN_ASSERT(is_container());

SN_ASSERT(as_const(this)->find_leaf(k) == nullptr);
// we need to check empty() first as the const version of find_leaf() must
// not be called on an empty node
SN_ASSERT(empty() || as_const(this)->find_leaf(k) == nullptr);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, is there a test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(see comment above)


leaf_listt &c = get_container();
c.push_front(leaft(k, v));
Expand Down
Loading