Skip to content

Commit f2afe15

Browse files
committed
Sharing map documentation
1 parent c220dae commit f2afe15

File tree

1 file changed

+229
-0
lines changed

1 file changed

+229
-0
lines changed

src/util/sharing_map.h

+229
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,66 @@ Author: Daniel Poetzl
3939
CV typename sharing_mapt<keyT, valueT, hashT, predT>::ST \
4040
sharing_mapt<keyT, valueT, hashT, predT>
4141

42+
/// A map implemented as a tree where subtrees can be shared between different
43+
/// maps.
44+
///
45+
/// The map is implemented as a fixed-height n-ary trie. The height H and the
46+
/// maximum number of children per inner node S are determined by the two
47+
/// configuration parameters `bits` and `chunks` in sharing_map.h. It holds
48+
/// that H = `bits` / `chunks` and S = 2 ^ `chunks`.
49+
///
50+
/// When inserting a key-value pair into the map, first the hash of its key is
51+
/// computed. The `bits` number of lower order bits of the hash are deemed
52+
/// significant, and are grouped into `bits` / `chunk` chunks). The hash is then
53+
/// treated as a string (with each chunk representing a character) for the
54+
/// purposes of determining the position of the key-value pair in the trie. The
55+
/// actual key-value pairs are stored in the leaf nodes. Collisions (i.e., two
56+
/// different keys yield the same "string"), are handled by chaining the
57+
/// corresponding key-value pairs in a `std::list`.
58+
///
59+
/// The use of a trie in combination with hashing has the advantage that the
60+
/// tree is unlikely to degenerate (if the number of hash collisions is low).
61+
/// This makes re-balancing operations unnecessary which do not interact well
62+
/// with sharing. A disadvantage is that the height of the tree is likely
63+
/// greater than if the elements had been stored in a balanced tree (with
64+
/// greater differences for sparser maps).
65+
///
66+
/// The nodes in the sharing map are objects of type sharing_nodet. Each sharing
67+
/// node has a `shared_ptr` to an object of type `dt` which can be shared
68+
/// between nodes.
69+
///
70+
/// Retrieval, insertion, and removal operations interact with sharing as
71+
/// follows:
72+
/// - When a non-const reference to a value in the map that is contained in a
73+
/// shared subtree is retrieved, the nodes on the path from the root of the
74+
/// subtree to the corresponding key-value pair (and the key-value pair itself)
75+
/// are copied and integrated with the map.
76+
/// - When a key-value pair is inserted into the map and its position is in a
77+
/// shared subtree, already existing nodes from the root of the subtree to the
78+
/// position of the key-value pair are copied and integrated with the map, and
79+
/// new nodes are created as needed.
80+
/// - When a key-value pair is erased from the map that is in a shared subtree,
81+
/// nodes from the root of the subtree to the last node that will still exist on
82+
/// the path to the erased element after the element has been removed are
83+
/// copied and integrated with the map, and the remaining nodes are removed.
84+
///
85+
/// Several methods take a hint indicating whether the element is known not to
86+
/// be in the map (`false`), known to be in the map (`true`), or it is unknown
87+
/// whether the element is in the map (`unknown`). The value `unknown` is always
88+
/// valid. When `true` or `false` are given they need to be accurate, otherwise
89+
/// the behavior is undefined. A correct hint can prevent the need to follow a
90+
/// path from the root to a key-value pair twice (e.g., once for checking that
91+
/// it exists, and second for copying nodes).
92+
///
93+
/// In the descriptions of the methods of the sharing map we also give the
94+
/// complexity of the operations. We use the following symbols:
95+
/// - N: number of key-value pairs in the map
96+
/// - M: maximum number of key-value pairs that are chained in a leaf node
97+
/// - H: height of the tree
98+
/// - S: maximum number of children per internal node
99+
///
100+
/// The first two symbols denote dynamic properties of a given map, whereas the
101+
/// last two symbols are static configuration parameters of the map class.
42102
template <
43103
class keyT,
44104
class valueT,
@@ -68,7 +128,16 @@ class sharing_mapt
68128

69129
typedef size_t size_type;
70130

131+
/// Return type of methods that retrieve a const reference to a value. First
132+
/// component is a reference to the value (or a dummy value if the given key
133+
/// does not exist), and the second component indicates if the value with the
134+
/// given key was found.
71135
typedef const std::pair<const mapped_type &, const bool> const_find_type;
136+
137+
/// Return type of methods that retrieve a reference to a value. First
138+
/// component is a reference to the value (or a dummy value if the given key
139+
/// does not exist), and the second component indicates if the value with the
140+
/// given key was found.
72141
typedef const std::pair<mapped_type &, const bool> find_type;
73142

74143
typedef std::vector<key_type> keyst;
@@ -89,7 +158,10 @@ class sharing_mapt
89158

90159
static const std::string not_found_msg;
91160

161+
/// Number of bits in the hash deemed significant
92162
static const size_t bits;
163+
164+
/// Size of a chunk of the hash that represents a character
93165
static const size_t chunk;
94166

95167
static const size_t mask;
@@ -136,6 +208,9 @@ class sharing_mapt
136208

137209
mapped_type &operator[](const key_type &k);
138210

211+
/// Swap with other map
212+
///
213+
/// Complexity: O(1)
139214
void swap(self_type &other)
140215
{
141216
map.swap(other.map);
@@ -145,22 +220,32 @@ class sharing_mapt
145220
other.num=tmp;
146221
}
147222

223+
/// Get number of elements in map
224+
///
225+
/// Complexity: O(1)
148226
size_type size() const
149227
{
150228
return num;
151229
}
152230

231+
/// Check if map is empty
153232
bool empty() const
154233
{
155234
return num==0;
156235
}
157236

237+
/// Clear map
158238
void clear()
159239
{
160240
map.clear();
161241
num=0;
162242
}
163243

244+
/// Check if key is in map
245+
///
246+
/// Complexity:
247+
/// - Worst case: O(H * log(S) + M)
248+
/// - Best case: O(H)
164249
bool has_key(const key_type &k) const
165250
{
166251
return get_leaf_node(k)!=nullptr;
@@ -169,6 +254,9 @@ class sharing_mapt
169254
// views
170255

171256
typedef std::pair<const key_type &, const mapped_type &> view_itemt;
257+
258+
/// View of the key-value pairs in the map. A view is a list of pairs with
259+
/// the components being const references to the keys and values in the map.
172260
typedef std::vector<view_itemt> viewt;
173261

174262
class delta_view_itemt
@@ -194,6 +282,9 @@ class sharing_mapt
194282
const mapped_type &other_m;
195283
};
196284

285+
/// Delta view of the key-value pairs in two maps. A delta view of two maps is
286+
/// a view of the key-value pairs in the maps that are contained in subtrees
287+
/// that are not shared between them (also see get_delta_view()).
197288
typedef std::vector<delta_view_itemt> delta_viewt;
198289

199290
void get_view(viewt &view) const;
@@ -214,6 +305,15 @@ class sharing_mapt
214305
void gather_all(const node_type &n, delta_viewt &delta_view) const;
215306
};
216307

308+
/// Get a view of the elements in the map
309+
/// A view is a list of pairs with the components being const references to the
310+
/// keys and values in the map.
311+
///
312+
/// Complexity:
313+
/// - Worst case: O(N * H * log(S))
314+
/// - Best case: O(N + H)
315+
///
316+
/// \param[out] view: Empty view
217317
SHARING_MAPT(void)::get_view(viewt &view) const
218318
{
219319
assert(view.empty());
@@ -286,6 +386,39 @@ SHARING_MAPT(void)::gather_all(const node_type &n, delta_viewt &delta_view)
286386
while(!stack.empty());
287387
}
288388

389+
/// Get a delta view of the elements in the map
390+
///
391+
/// Informally, a delta view of two maps is a view of the key-value pairs in the
392+
/// maps that are contained in subtrees that are not shared between them.
393+
///
394+
/// A delta view is represented as a list of structs, with each struct having
395+
/// four members (`in_both`, `key`, `value1`, `value2`). The elements `key`,
396+
/// `value1`, and `value2` are const references to the corresponding elements in
397+
/// the map. The first element indicates whether the key exists in both maps,
398+
/// the second element is the key, the third element is the mapped value of the
399+
/// first map, and the fourth element is the mapped value of the second map, or
400+
/// a dummy element if the key exists only in the first map (in which case
401+
/// `in_both` is false).
402+
///
403+
/// Calling `A.delta_view(B, ...)` yields a view such that for each element in
404+
/// the view one of two things holds:
405+
/// - the key is contained in both A and B, and in the maps the corresponding
406+
/// key-value pairs are not contained in a subtree that is shared between them
407+
/// - the key is only contained in A
408+
///
409+
/// When `only_common=true`, the first case above holds for every element in the
410+
/// view.
411+
///
412+
/// Complexity:
413+
/// - Worst case: O(max(N1, N2) * H * log(S) * M1 * M2) (no sharing)
414+
/// - Best case: O(1) (maximum sharing)
415+
///
416+
/// The symbols N1, M1 refer to map A, and symbols N2, M2 refer to map B.
417+
///
418+
/// \param other: other map
419+
/// \param[out] delta_view: Empty delta view
420+
/// \param only_common: Indicates if the returned delta view should only
421+
/// contain key-value pairs for keys that exist in both maps
289422
SHARING_MAPT(void)::get_delta_view(
290423
const self_type &other,
291424
delta_viewt &delta_view,
@@ -439,6 +572,15 @@ SHARING_MAPT2(const, node_type *)::get_leaf_node(const key_type &k) const
439572
return p;
440573
}
441574

575+
/// Erase element
576+
///
577+
/// Complexity:
578+
/// - Worst case: O(H * S + M)
579+
/// - Best case: O(H)
580+
///
581+
/// \param k: The key of the element to erase
582+
/// \param key_exists: Hint to indicate whether the element is known to exist
583+
/// (possible values `unknown` or` true`)
442584
SHARING_MAPT2(, size_type)::erase(
443585
const key_type &k,
444586
const tvt &key_exists)
@@ -488,6 +630,17 @@ SHARING_MAPT2(, size_type)::erase(
488630
return 1;
489631
}
490632

633+
/// Erase all elements
634+
///
635+
/// Complexity:
636+
/// - Worst case: O(K * (H * S + M))
637+
/// - Best case: O(K * H)
638+
///
639+
/// \param ks: The keys of the element to erase
640+
/// \param key_exists: Hint to indicate whether the elements are known to exist
641+
/// (possible values `unknown` or `true`). Applies to all elements (i.e., have
642+
/// to use `unknown` if for at least one element it is not known whether it
643+
/// exists)
491644
SHARING_MAPT2(, size_type)::erase_all(
492645
const keyst &ks,
493646
const tvt &key_exists)
@@ -502,6 +655,18 @@ SHARING_MAPT2(, size_type)::erase_all(
502655
return cnt;
503656
}
504657

658+
/// Insert element, return const reference
659+
///
660+
/// Complexity:
661+
/// - Worst case: O(H * S + M)
662+
/// - Best case: O(H)
663+
///
664+
/// \param k: The key of the element to insert
665+
/// \param m: The mapped value to insert
666+
/// \param key_exists: Hint to indicate whether the element is known to exist
667+
/// (possible values `false` or `unknown`)
668+
/// \return Pair of const reference to existing or newly inserted element, and
669+
/// boolean indicating if new element was inserted
505670
SHARING_MAPT2(, const_find_type)::insert(
506671
const key_type &k,
507672
const mapped_type &m,
@@ -525,13 +690,26 @@ SHARING_MAPT2(, const_find_type)::insert(
525690
return const_find_type(as_const(p)->get_value(), true);
526691
}
527692

693+
// Insert element, return const reference
528694
SHARING_MAPT2(, const_find_type)::insert(
529695
const value_type &p,
530696
const tvt &key_exists)
531697
{
532698
return insert(p.first, p.second, key_exists);
533699
}
534700

701+
/// Insert element, return non-const reference
702+
///
703+
/// Complexity:
704+
/// - Worst case: O(H * S + M)
705+
/// - Best case: O(H)
706+
///
707+
/// \param k: The key of the element to insert
708+
/// \param m: The mapped value to insert
709+
/// \param key_exists: Hint to indicate whether the element is known to exist
710+
/// (possible values false or unknown)
711+
/// \return Pair of reference to existing or newly inserted element, and boolean
712+
/// indicating if new element was inserted
535713
SHARING_MAPT2(, find_type)::place(
536714
const key_type &k,
537715
const mapped_type &m)
@@ -550,12 +728,24 @@ SHARING_MAPT2(, find_type)::place(
550728
return find_type(p->get_value(), true);
551729
}
552730

731+
/// Insert element, return non-const reference
553732
SHARING_MAPT2(, find_type)::place(
554733
const value_type &p)
555734
{
556735
return place(p.first, p.second);
557736
}
558737

738+
/// Find element
739+
///
740+
/// Complexity:
741+
/// - Worst case: O(H * S + M)
742+
/// - Best case: O(H)
743+
///
744+
/// \param k: The key of the element to search for
745+
/// \param key_exists: Hint to indicate whether the element is known to exist
746+
/// (possible values `unknown` or `true`)
747+
/// \return Pair of reference to found value (or dummy value if not found), and
748+
/// boolean indicating if element was found.
559749
SHARING_MAPT2(, find_type)::find(
560750
const key_type &k,
561751
const tvt &key_exists)
@@ -575,6 +765,17 @@ SHARING_MAPT2(, find_type)::find(
575765

576766
}
577767

768+
/// Find element
769+
///
770+
/// Complexity:
771+
/// - Worst case: O(H * log(S) + M)
772+
/// - Best case: O(H)
773+
///
774+
/// \param k: The key of the element to search
775+
/// \param key_exists: Hint to indicate whether the element is known to exist
776+
/// (possible values `unknown` or `true`)
777+
/// \return Pair of const reference to found value (or dummy value if not
778+
/// found), and boolean indicating if element was found.
578779
SHARING_MAPT2(, const_find_type)::find(const key_type &k) const
579780
{
580781
const node_type *p=get_leaf_node(k);
@@ -585,6 +786,17 @@ SHARING_MAPT2(, const_find_type)::find(const key_type &k) const
585786
return const_find_type(p->get_value(), true);
586787
}
587788

789+
/// Get element at key
790+
///
791+
/// Complexity:
792+
/// - Worst case: O(H * S + M)
793+
/// - Best case: O(H)
794+
///
795+
/// \param k: The key of the element
796+
/// \param key_exists: Hint to indicate whether the element is known to exist
797+
/// (possible values `unknown` or `true`)
798+
/// \throw `std::out_of_range` if key not found
799+
/// \return The mapped value
588800
SHARING_MAPT2(, mapped_type &)::at(
589801
const key_type &k,
590802
const tvt &key_exists)
@@ -597,6 +809,15 @@ SHARING_MAPT2(, mapped_type &)::at(
597809
return r.first;
598810
}
599811

812+
/// Get element at key
813+
///
814+
/// Complexity:
815+
/// - Worst case: O(H * log(S) + M)
816+
/// - Best case: O(H)
817+
///
818+
/// \param k: The key of the element
819+
/// \throw std::out_of_range if key not found
820+
/// \return The mapped value
600821
SHARING_MAPT2(const, mapped_type &)::at(const key_type &k) const
601822
{
602823
const_find_type r=find(k);
@@ -606,6 +827,14 @@ SHARING_MAPT2(const, mapped_type &)::at(const key_type &k) const
606827
return r.first;
607828
}
608829

830+
/// Get element at key, insert new if non-existent
831+
///
832+
/// Complexity:
833+
/// - Worst case: O(H * S + M)
834+
/// - Best case: O(H)
835+
///
836+
/// \param k: The key of the element
837+
/// \return The mapped value
609838
SHARING_MAPT2(, mapped_type &)::operator[](const key_type &k)
610839
{
611840
return place(k, mapped_type()).first;

0 commit comments

Comments
 (0)