Skip to content

Commit ea3c9d2

Browse files
Implement an insertion-order preserving, efficient multi-map
1 parent 7710ae0 commit ea3c9d2

File tree

3 files changed

+249
-1
lines changed

3 files changed

+249
-1
lines changed

src/librustc_data_structures/sorted_map.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ use std::iter::FromIterator;
44
use std::mem;
55
use std::ops::{Bound, Index, IndexMut, RangeBounds};
66

7+
mod index_map;
8+
9+
pub use index_map::SortedIndexMultiMap;
10+
711
/// `SortedMap` is a data structure with similar characteristics as BTreeMap but
812
/// slightly different trade-offs: lookup, insertion, and removal are O(log(N))
913
/// and elements can be iterated in order cheaply.
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
//! A variant of `SortedMap` that preserves insertion order.
2+
3+
use std::borrow::Borrow;
4+
use std::hash::{Hash, Hasher};
5+
use std::iter::FromIterator;
6+
7+
use crate::stable_hasher::{HashStable, StableHasher};
8+
use rustc_index::vec::{Idx, IndexVec};
9+
10+
/// An indexed multi-map that preserves insertion order while permitting both `O(log n)` lookup of
11+
/// an item by key and `O(1)` lookup by index.
12+
///
13+
/// This data structure is a hybrid of an [`IndexVec`] and a [`SortedMap`]. Like `IndexVec`,
14+
/// `SortedIndexMultiMap` assigns a typed index to each item while preserving insertion order.
15+
/// Like `SortedMap`, `SortedIndexMultiMap` has efficient lookup of items by key. However, this
16+
/// is accomplished by sorting an array of item indices instead of the items themselves.
17+
///
18+
/// Unlike `SortedMap`, this data structure can hold multiple equivalent items at once, so the
19+
/// `get_by_key` method and its variants return an iterator instead of an `Option`. Equivalent
20+
/// items will be yielded in insertion order.
21+
///
22+
/// Unlike a general-purpose map like `BTreeSet` or `HashSet`, `SortedMap` and
23+
/// `SortedIndexMultiMap` require `O(n)` time to insert a single item. This is because we may need
24+
/// to insert into the middle of the sorted array. Users should avoid mutating this data structure
25+
/// in-place.
26+
///
27+
/// [`IndexVec`]: ../../rustc_index/vec/struct.IndexVec.html
28+
/// [`SortedMap`]: ../sorted_map/struct.SortedMap.html
29+
#[derive(Clone, Debug)]
30+
pub struct SortedIndexMultiMap<I: Idx, K, V> {
31+
/// The elements of the map in insertion order.
32+
items: IndexVec<I, (K, V)>,
33+
34+
/// Indices of the items in the set, sorted by the item's key.
35+
idx_sorted_by_item_key: Vec<I>,
36+
}
37+
38+
impl<I: Idx, K: Ord, V> SortedIndexMultiMap<I, K, V> {
39+
pub fn new() -> Self {
40+
SortedIndexMultiMap { items: IndexVec::new(), idx_sorted_by_item_key: Vec::new() }
41+
}
42+
43+
pub fn len(&self) -> usize {
44+
self.items.len()
45+
}
46+
47+
pub fn is_empty(&self) -> bool {
48+
self.items.is_empty()
49+
}
50+
51+
/// Returns an iterator over the items in the map in insertion order.
52+
pub fn into_iter(self) -> impl DoubleEndedIterator<Item = (K, V)> {
53+
self.items.into_iter()
54+
}
55+
56+
/// Returns an iterator over the items in the map in insertion order along with their indices.
57+
pub fn into_iter_enumerated(self) -> impl DoubleEndedIterator<Item = (I, (K, V))> {
58+
self.items.into_iter_enumerated()
59+
}
60+
61+
/// Returns an iterator over the items in the map in insertion order.
62+
pub fn iter(&self) -> impl '_ + DoubleEndedIterator<Item = (&K, &V)> {
63+
self.items.iter().map(|(ref k, ref v)| (k, v))
64+
}
65+
66+
/// Returns an iterator over the items in the map in insertion order along with their indices.
67+
pub fn iter_enumerated(&self) -> impl '_ + DoubleEndedIterator<Item = (I, (&K, &V))> {
68+
self.items.iter_enumerated().map(|(i, (ref k, ref v))| (i, (k, v)))
69+
}
70+
71+
/// Returns the item in the map with the given index.
72+
pub fn get(&self, idx: I) -> Option<&(K, V)> {
73+
self.items.get(idx)
74+
}
75+
76+
/// Returns an iterator over the items in the map that are equal to `key`.
77+
///
78+
/// If there are multiple items that are equivalent to `key`, they will be yielded in
79+
/// insertion order.
80+
pub fn get_by_key<Q: 'a>(&'a self, key: &Q) -> impl 'a + Iterator<Item = &'a V>
81+
where
82+
Q: Ord + ?Sized,
83+
K: Borrow<Q>,
84+
{
85+
self.get_by_key_enumerated(key).map(|(_, v)| v)
86+
}
87+
88+
/// Returns an iterator over the items in the map that are equal to `key` along with their
89+
/// indices.
90+
///
91+
/// If there are multiple items that are equivalent to `key`, they will be yielded in
92+
/// insertion order.
93+
pub fn get_by_key_enumerated<Q>(&self, key: &Q) -> impl '_ + Iterator<Item = (I, &V)>
94+
where
95+
Q: Ord + ?Sized,
96+
K: Borrow<Q>,
97+
{
98+
// FIXME: This should be in the standard library as `equal_range`. See rust-lang/rfcs#2184.
99+
match self.binary_search_idx(key) {
100+
Err(_) => self.idxs_to_items_enumerated(&[]),
101+
102+
Ok(idx) => {
103+
let start = self.find_lower_bound(key, idx);
104+
let end = self.find_upper_bound(key, idx);
105+
self.idxs_to_items_enumerated(&self.idx_sorted_by_item_key[start..end])
106+
}
107+
}
108+
}
109+
110+
fn binary_search_idx<Q>(&self, key: &Q) -> Result<usize, usize>
111+
where
112+
Q: Ord + ?Sized,
113+
K: Borrow<Q>,
114+
{
115+
self.idx_sorted_by_item_key.binary_search_by(|&idx| self.items[idx].0.borrow().cmp(key))
116+
}
117+
118+
/// Returns the index into the `idx_sorted_by_item_key` array of the first item equal to
119+
/// `key`.
120+
///
121+
/// `initial` must be an index into that same array for an item that is equal to `key`.
122+
fn find_lower_bound<Q>(&self, key: &Q, initial: usize) -> usize
123+
where
124+
Q: Ord + ?Sized,
125+
K: Borrow<Q>,
126+
{
127+
debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key);
128+
129+
// FIXME: At present, this uses linear search, meaning lookup is only `O(log n)` if duplicate
130+
// entries are rare. It would be better to start with a linear search for the common case but
131+
// fall back to an exponential search if many duplicates are found. This applies to
132+
// `upper_bound` as well.
133+
let mut start = initial;
134+
while start != 0 && self.items[self.idx_sorted_by_item_key[start - 1]].0.borrow() == key {
135+
start -= 1;
136+
}
137+
138+
start
139+
}
140+
141+
/// Returns the index into the `idx_sorted_by_item_key` array of the first item greater than
142+
/// `key`, or `self.len()` if no such item exists.
143+
///
144+
/// `initial` must be an index into that same array for an item that is equal to `key`.
145+
fn find_upper_bound<Q>(&self, key: &Q, initial: usize) -> usize
146+
where
147+
Q: Ord + ?Sized,
148+
K: Borrow<Q>,
149+
{
150+
debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key);
151+
152+
// See the FIXME for `find_lower_bound`.
153+
let mut end = initial + 1;
154+
let len = self.items.len();
155+
while end < len && self.items[self.idx_sorted_by_item_key[end]].0.borrow() == key {
156+
end += 1;
157+
}
158+
159+
end
160+
}
161+
162+
fn idxs_to_items_enumerated(&'a self, idxs: &'a [I]) -> impl 'a + Iterator<Item = (I, &'a V)> {
163+
idxs.iter().map(move |&idx| (idx, &self.items[idx].1))
164+
}
165+
}
166+
167+
impl<I: Idx, K: Eq, V: Eq> Eq for SortedIndexMultiMap<I, K, V> {}
168+
impl<I: Idx, K: PartialEq, V: PartialEq> PartialEq for SortedIndexMultiMap<I, K, V> {
169+
fn eq(&self, other: &Self) -> bool {
170+
// No need to compare the sorted index. If the items are the same, the index will be too.
171+
self.items == other.items
172+
}
173+
}
174+
175+
impl<I: Idx, K, V> Hash for SortedIndexMultiMap<I, K, V>
176+
where
177+
K: Hash,
178+
V: Hash,
179+
{
180+
fn hash<H: Hasher>(&self, hasher: &mut H) {
181+
self.items.hash(hasher)
182+
}
183+
}
184+
impl<I: Idx, K, V, C> HashStable<C> for SortedIndexMultiMap<I, K, V>
185+
where
186+
K: HashStable<C>,
187+
V: HashStable<C>,
188+
{
189+
fn hash_stable(&self, ctx: &mut C, hasher: &mut StableHasher) {
190+
self.items.hash_stable(ctx, hasher)
191+
}
192+
}
193+
194+
impl<I: Idx, K: Ord, V> FromIterator<(K, V)> for SortedIndexMultiMap<I, K, V> {
195+
fn from_iter<J>(iter: J) -> Self
196+
where
197+
J: IntoIterator<Item = (K, V)>,
198+
{
199+
let items = IndexVec::from_iter(iter);
200+
let mut idx_sorted_by_item_key: Vec<_> = items.indices().collect();
201+
202+
// `sort_by_key` is stable, so insertion order is preserved for duplicate items.
203+
idx_sorted_by_item_key.sort_by_key(|&idx| &items[idx].0);
204+
205+
SortedIndexMultiMap { items, idx_sorted_by_item_key }
206+
}
207+
}
208+
209+
impl<I: Idx, K, V> std::ops::Index<I> for SortedIndexMultiMap<I, K, V> {
210+
type Output = V;
211+
212+
fn index(&self, idx: I) -> &Self::Output {
213+
&self.items[idx].1
214+
}
215+
}
216+
217+
#[cfg(tests)]
218+
mod tests;

src/librustc_data_structures/sorted_map/tests.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,30 @@
1-
use super::SortedMap;
1+
use super::{SortedIndexMultiMap, SortedMap};
2+
3+
#[test]
4+
fn test_sorted_index_multi_map() {
5+
let entries: Vec<_> = vec![(2, 0), (1, 0), (2, 1), (3, 0), (2, 2)];
6+
let set: SortedIndexMultiMap<usize, _, _> = entries.iter().copied().collect();
7+
8+
// Insertion order is preserved.
9+
assert!(entries.iter().map(|(ref k, ref v)| (k, v)).eq(set.iter()));
10+
11+
// Indexing
12+
for (i, expect) in entries.iter().enumerate() {
13+
assert_eq!(set[i], expect.1);
14+
}
15+
16+
// `get_by_key` works.
17+
assert_eq!(set.get_by_key(&3).copied().collect::<Vec<_>>(), vec![0]);
18+
assert!(set.get_by_key(&4).next().is_none());
19+
20+
// `get_by_key` returns items in insertion order.
21+
let twos: Vec<_> = set.get_by_key_enumerated(&2).collect();
22+
let idxs: Vec<usize> = twos.iter().map(|(i, _)| *i).collect();
23+
let values: Vec<usize> = twos.iter().map(|(_, &v)| v).collect();
24+
25+
assert_eq!(idxs, vec![0, 2, 4]);
26+
assert_eq!(values, vec![0, 1, 2]);
27+
}
228

329
#[test]
430
fn test_insert_and_iter() {

0 commit comments

Comments
 (0)