|
| 1 | +use std::iter::Step; |
| 2 | +use std::marker::PhantomData; |
| 3 | +use std::ops::Bound; |
| 4 | +use std::ops::RangeBounds; |
| 5 | + |
| 6 | +use crate::vec::Idx; |
| 7 | +use crate::vec::IndexVec; |
| 8 | +use smallvec::SmallVec; |
| 9 | + |
| 10 | +#[cfg(test)] |
| 11 | +mod tests; |
| 12 | + |
| 13 | +/// Stores a set of intervals on the indices. |
| 14 | +#[derive(Debug, Clone)] |
| 15 | +pub struct IntervalSet<I> { |
| 16 | + // Start, end |
| 17 | + map: SmallVec<[(u32, u32); 4]>, |
| 18 | + domain: usize, |
| 19 | + _data: PhantomData<I>, |
| 20 | +} |
| 21 | + |
| 22 | +#[inline] |
| 23 | +fn inclusive_start<T: Idx>(range: impl RangeBounds<T>) -> u32 { |
| 24 | + match range.start_bound() { |
| 25 | + Bound::Included(start) => start.index() as u32, |
| 26 | + Bound::Excluded(start) => start.index() as u32 + 1, |
| 27 | + Bound::Unbounded => 0, |
| 28 | + } |
| 29 | +} |
| 30 | + |
| 31 | +#[inline] |
| 32 | +fn inclusive_end<T: Idx>(domain: usize, range: impl RangeBounds<T>) -> Option<u32> { |
| 33 | + let end = match range.end_bound() { |
| 34 | + Bound::Included(end) => end.index() as u32, |
| 35 | + Bound::Excluded(end) => end.index().checked_sub(1)? as u32, |
| 36 | + Bound::Unbounded => domain.checked_sub(1)? as u32, |
| 37 | + }; |
| 38 | + Some(end) |
| 39 | +} |
| 40 | + |
| 41 | +impl<I: Idx> IntervalSet<I> { |
| 42 | + pub fn new(domain: usize) -> IntervalSet<I> { |
| 43 | + IntervalSet { map: SmallVec::new(), domain, _data: PhantomData } |
| 44 | + } |
| 45 | + |
| 46 | + pub fn clear(&mut self) { |
| 47 | + self.map.clear(); |
| 48 | + } |
| 49 | + |
| 50 | + pub fn iter(&self) -> impl Iterator<Item = I> + '_ |
| 51 | + where |
| 52 | + I: Step, |
| 53 | + { |
| 54 | + self.iter_intervals().flatten() |
| 55 | + } |
| 56 | + |
| 57 | + /// Iterates through intervals stored in the set, in order. |
| 58 | + pub fn iter_intervals(&self) -> impl Iterator<Item = std::ops::Range<I>> + '_ |
| 59 | + where |
| 60 | + I: Step, |
| 61 | + { |
| 62 | + self.map.iter().map(|&(start, end)| I::new(start as usize)..I::new(end as usize + 1)) |
| 63 | + } |
| 64 | + |
| 65 | + /// Returns true if we increased the number of elements present. |
| 66 | + pub fn insert(&mut self, point: I) -> bool { |
| 67 | + self.insert_range(point..=point) |
| 68 | + } |
| 69 | + |
| 70 | + /// Returns true if we increased the number of elements present. |
| 71 | + pub fn insert_range(&mut self, range: impl RangeBounds<I> + Clone) -> bool { |
| 72 | + let start = inclusive_start(range.clone()); |
| 73 | + let Some(mut end) = inclusive_end(self.domain, range) else { |
| 74 | + // empty range |
| 75 | + return false; |
| 76 | + }; |
| 77 | + if start > end { |
| 78 | + return false; |
| 79 | + } |
| 80 | + |
| 81 | + loop { |
| 82 | + // This condition looks a bit weird, but actually makes sense. |
| 83 | + // |
| 84 | + // if r.0 == end + 1, then we're actually adjacent, so we want to |
| 85 | + // continue to the next range. We're looking here for the first |
| 86 | + // range which starts *non-adjacently* to our end. |
| 87 | + let next = self.map.partition_point(|r| r.0 <= end + 1); |
| 88 | + if let Some(last) = next.checked_sub(1) { |
| 89 | + let (prev_start, prev_end) = &mut self.map[last]; |
| 90 | + if *prev_end + 1 >= start { |
| 91 | + // If the start for the inserted range is adjacent to the |
| 92 | + // end of the previous, we can extend the previous range. |
| 93 | + if start < *prev_start { |
| 94 | + // Our range starts before the one we found. We'll need |
| 95 | + // to *remove* it, and then try again. |
| 96 | + // |
| 97 | + // FIXME: This is not so efficient; we may need to |
| 98 | + // recurse a bunch of times here. Instead, it's probably |
| 99 | + // better to do something like drain_filter(...) on the |
| 100 | + // map to be able to delete or modify all the ranges in |
| 101 | + // start..=end and then potentially re-insert a new |
| 102 | + // range. |
| 103 | + end = std::cmp::max(end, *prev_end); |
| 104 | + self.map.remove(last); |
| 105 | + } else { |
| 106 | + // We overlap with the previous range, increase it to |
| 107 | + // include us. |
| 108 | + // |
| 109 | + // Make sure we're actually going to *increase* it though -- |
| 110 | + // it may be that end is just inside the previously existing |
| 111 | + // set. |
| 112 | + return if end > *prev_end { |
| 113 | + *prev_end = end; |
| 114 | + true |
| 115 | + } else { |
| 116 | + false |
| 117 | + }; |
| 118 | + } |
| 119 | + } else { |
| 120 | + // Otherwise, we don't overlap, so just insert |
| 121 | + self.map.insert(last + 1, (start, end)); |
| 122 | + return true; |
| 123 | + } |
| 124 | + } else { |
| 125 | + if self.map.is_empty() { |
| 126 | + // Quite common in practice, and expensive to call memcpy |
| 127 | + // with length zero. |
| 128 | + self.map.push((start, end)); |
| 129 | + } else { |
| 130 | + self.map.insert(next, (start, end)); |
| 131 | + } |
| 132 | + return true; |
| 133 | + } |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + pub fn contains(&self, needle: I) -> bool { |
| 138 | + let needle = needle.index() as u32; |
| 139 | + let last = match self.map.partition_point(|r| r.0 <= needle).checked_sub(1) { |
| 140 | + Some(idx) => idx, |
| 141 | + None => { |
| 142 | + // All ranges in the map start after the new range's end |
| 143 | + return false; |
| 144 | + } |
| 145 | + }; |
| 146 | + let (_, prev_end) = &self.map[last]; |
| 147 | + needle <= *prev_end |
| 148 | + } |
| 149 | + |
| 150 | + pub fn superset(&self, other: &IntervalSet<I>) -> bool |
| 151 | + where |
| 152 | + I: Step, |
| 153 | + { |
| 154 | + // FIXME: Performance here is probably not great. We will be doing a lot |
| 155 | + // of pointless tree traversals. |
| 156 | + other.iter().all(|elem| self.contains(elem)) |
| 157 | + } |
| 158 | + |
| 159 | + pub fn is_empty(&self) -> bool { |
| 160 | + self.map.is_empty() |
| 161 | + } |
| 162 | + |
| 163 | + /// Returns the maximum (last) element present in the set from `range`. |
| 164 | + pub fn last_set_in(&self, range: impl RangeBounds<I> + Clone) -> Option<I> { |
| 165 | + let start = inclusive_start(range.clone()); |
| 166 | + let Some(end) = inclusive_end(self.domain, range) else { |
| 167 | + // empty range |
| 168 | + return None; |
| 169 | + }; |
| 170 | + if start > end { |
| 171 | + return None; |
| 172 | + } |
| 173 | + let last = match self.map.partition_point(|r| r.0 <= end).checked_sub(1) { |
| 174 | + Some(idx) => idx, |
| 175 | + None => { |
| 176 | + // All ranges in the map start after the new range's end |
| 177 | + return None; |
| 178 | + } |
| 179 | + }; |
| 180 | + let (_, prev_end) = &self.map[last]; |
| 181 | + if start <= *prev_end { Some(I::new(std::cmp::min(*prev_end, end) as usize)) } else { None } |
| 182 | + } |
| 183 | + |
| 184 | + pub fn insert_all(&mut self) { |
| 185 | + self.clear(); |
| 186 | + self.map.push((0, self.domain.try_into().unwrap())); |
| 187 | + } |
| 188 | + |
| 189 | + pub fn union(&mut self, other: &IntervalSet<I>) -> bool |
| 190 | + where |
| 191 | + I: Step, |
| 192 | + { |
| 193 | + assert_eq!(self.domain, other.domain); |
| 194 | + let mut did_insert = false; |
| 195 | + for range in other.iter_intervals() { |
| 196 | + did_insert |= self.insert_range(range); |
| 197 | + } |
| 198 | + did_insert |
| 199 | + } |
| 200 | +} |
| 201 | + |
| 202 | +/// This data structure optimizes for cases where the stored bits in each row |
| 203 | +/// are expected to be highly contiguous (long ranges of 1s or 0s), in contrast |
| 204 | +/// to BitMatrix and SparseBitMatrix which are optimized for |
| 205 | +/// "random"/non-contiguous bits and cheap(er) point queries at the expense of |
| 206 | +/// memory usage. |
| 207 | +#[derive(Clone)] |
| 208 | +pub struct SparseIntervalMatrix<R, C> |
| 209 | +where |
| 210 | + R: Idx, |
| 211 | + C: Idx, |
| 212 | +{ |
| 213 | + rows: IndexVec<R, IntervalSet<C>>, |
| 214 | + column_size: usize, |
| 215 | +} |
| 216 | + |
| 217 | +impl<R: Idx, C: Step + Idx> SparseIntervalMatrix<R, C> { |
| 218 | + pub fn new(column_size: usize) -> SparseIntervalMatrix<R, C> { |
| 219 | + SparseIntervalMatrix { rows: IndexVec::new(), column_size } |
| 220 | + } |
| 221 | + |
| 222 | + pub fn rows(&self) -> impl Iterator<Item = R> { |
| 223 | + self.rows.indices() |
| 224 | + } |
| 225 | + |
| 226 | + pub fn row(&self, row: R) -> Option<&IntervalSet<C>> { |
| 227 | + self.rows.get(row) |
| 228 | + } |
| 229 | + |
| 230 | + fn ensure_row(&mut self, row: R) -> &mut IntervalSet<C> { |
| 231 | + self.rows.ensure_contains_elem(row, || IntervalSet::new(self.column_size)); |
| 232 | + &mut self.rows[row] |
| 233 | + } |
| 234 | + |
| 235 | + pub fn union_row(&mut self, row: R, from: &IntervalSet<C>) -> bool |
| 236 | + where |
| 237 | + C: Step, |
| 238 | + { |
| 239 | + self.ensure_row(row).union(from) |
| 240 | + } |
| 241 | + |
| 242 | + pub fn union_rows(&mut self, read: R, write: R) -> bool |
| 243 | + where |
| 244 | + C: Step, |
| 245 | + { |
| 246 | + if read == write || self.rows.get(read).is_none() { |
| 247 | + return false; |
| 248 | + } |
| 249 | + self.ensure_row(write); |
| 250 | + let (read_row, write_row) = self.rows.pick2_mut(read, write); |
| 251 | + write_row.union(read_row) |
| 252 | + } |
| 253 | + |
| 254 | + pub fn insert_all_into_row(&mut self, row: R) { |
| 255 | + self.ensure_row(row).insert_all(); |
| 256 | + } |
| 257 | + |
| 258 | + pub fn insert_range(&mut self, row: R, range: impl RangeBounds<C> + Clone) { |
| 259 | + self.ensure_row(row).insert_range(range); |
| 260 | + } |
| 261 | + |
| 262 | + pub fn insert(&mut self, row: R, point: C) -> bool { |
| 263 | + self.ensure_row(row).insert(point) |
| 264 | + } |
| 265 | + |
| 266 | + pub fn contains(&self, row: R, point: C) -> bool { |
| 267 | + self.row(row).map_or(false, |r| r.contains(point)) |
| 268 | + } |
| 269 | +} |
0 commit comments