Skip to content

Commit a4d2263

Browse files
committed
add an initial radix trie implementation
1 parent afdd0b8 commit a4d2263

File tree

2 files changed

+370
-0
lines changed

2 files changed

+370
-0
lines changed

src/libcore/core.rc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ pub mod dlist;
144144
pub mod dlist_iter;
145145
pub mod hashmap;
146146
pub mod cell;
147+
pub mod trie;
147148

148149

149150
/* Tasks and communication */

src/libcore/trie.rs

Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
//! A radix trie for storing integers in sorted order
12+
13+
use prelude::*;
14+
15+
// FIXME: #3469: need to manually update TrieNode when SHIFT changes
16+
const SHIFT: uint = 4;
17+
const SIZE: uint = 1 << SHIFT;
18+
const MASK: uint = SIZE - 1;
19+
20+
enum Child<T> {
21+
Internal(~TrieNode<T>),
22+
External(uint, T),
23+
Nothing
24+
}
25+
26+
pub struct TrieMap<T> {
27+
priv root: TrieNode<T>,
28+
priv length: uint
29+
}
30+
31+
impl<T> BaseIter<(uint, &T)> for TrieMap<T> {
32+
/// Visit all key-value pairs in order
33+
#[inline(always)]
34+
pure fn each(&self, f: fn(&(uint, &self/T)) -> bool) {
35+
self.root.each(f)
36+
}
37+
#[inline(always)]
38+
pure fn size_hint(&self) -> Option<uint> { Some(self.len()) }
39+
}
40+
41+
impl<T> ReverseIter<(uint, &T)> for TrieMap<T> {
42+
/// Visit all key-value pairs in reverse order
43+
#[inline(always)]
44+
pure fn each_reverse(&self, f: fn(&(uint, &self/T)) -> bool) {
45+
self.root.each_reverse(f)
46+
}
47+
}
48+
49+
impl<T> Container for TrieMap<T> {
50+
/// Return the number of elements in the map
51+
#[inline(always)]
52+
pure fn len(&self) -> uint { self.length }
53+
54+
/// Return true if the map contains no elements
55+
#[inline(always)]
56+
pure fn is_empty(&self) -> bool { self.len() == 0 }
57+
}
58+
59+
impl<T: Copy> Mutable for TrieMap<T> {
60+
/// Clear the map, removing all values.
61+
#[inline(always)]
62+
fn clear(&mut self) {
63+
self.root = TrieNode::new();
64+
self.length = 0;
65+
}
66+
}
67+
68+
impl<T: Copy> Map<uint, T> for TrieMap<T> {
69+
/// Return true if the map contains a value for the specified key
70+
#[inline(always)]
71+
pure fn contains_key(&self, key: &uint) -> bool {
72+
self.find(key).is_some()
73+
}
74+
75+
/// Visit all keys in order
76+
#[inline(always)]
77+
pure fn each_key(&self, f: fn(&uint) -> bool) {
78+
self.each(|&(k, _)| f(&k))
79+
}
80+
81+
/// Visit all values in order
82+
#[inline(always)]
83+
pure fn each_value(&self, f: fn(&T) -> bool) { self.each(|&(_, v)| f(v)) }
84+
85+
/// Return the value corresponding to the key in the map
86+
#[inline(hint)]
87+
pure fn find(&self, key: &uint) -> Option<&self/T> {
88+
let mut node: &self/TrieNode<T> = &self.root;
89+
let mut idx = 0;
90+
loop {
91+
match node.children[chunk(*key, idx)] {
92+
Internal(ref x) => node = &**x,
93+
External(stored, ref value) => {
94+
if stored == *key {
95+
return Some(value)
96+
} else {
97+
return None
98+
}
99+
}
100+
Nothing => return None
101+
}
102+
idx += 1;
103+
}
104+
}
105+
106+
/// Insert a key-value pair into the map. An existing value for a
107+
/// key is replaced by the new value. Return true if the key did
108+
/// not already exist in the map.
109+
#[inline(always)]
110+
fn insert(&mut self, key: uint, value: T) -> bool {
111+
let ret = insert(&mut self.root.count,
112+
&mut self.root.children[chunk(key, 0)],
113+
key, value, 1);
114+
if ret { self.length += 1 }
115+
ret
116+
}
117+
118+
/// Remove a key-value pair from the map. Return true if the key
119+
/// was present in the map, otherwise false.
120+
#[inline(always)]
121+
fn remove(&mut self, key: &uint) -> bool {
122+
let ret = remove(&mut self.root.count,
123+
&mut self.root.children[chunk(*key, 0)],
124+
*key, 1);
125+
if ret { self.length -= 1 }
126+
ret
127+
}
128+
}
129+
130+
impl<T: Copy> TrieMap<T> {
131+
#[inline(always)]
132+
static pure fn new() -> TrieMap<T> {
133+
TrieMap{root: TrieNode::new(), length: 0}
134+
}
135+
}
136+
137+
impl<T> TrieMap<T> {
138+
/// Visit all keys in reverse order
139+
#[inline(always)]
140+
pure fn each_key_reverse(&self, f: fn(&uint) -> bool) {
141+
self.each_reverse(|&(k, _)| f(&k))
142+
}
143+
144+
/// Visit all values in reverse order
145+
#[inline(always)]
146+
pure fn each_value_reverse(&self, f: fn(&T) -> bool) {
147+
self.each_reverse(|&(_, v)| f(v))
148+
}
149+
150+
/// Iterate over the map and mutate the contained values
151+
fn mutate_values(&mut self, f: fn(uint, &mut T) -> bool) {
152+
self.root.mutate_values(f)
153+
}
154+
}
155+
156+
pub struct TrieSet {
157+
priv map: TrieMap<()>
158+
}
159+
160+
impl BaseIter<uint> for TrieSet {
161+
/// Visit all values in order
162+
pure fn each(&self, f: fn(&uint) -> bool) { self.map.each_key(f) }
163+
pure fn size_hint(&self) -> Option<uint> { Some(self.len()) }
164+
}
165+
166+
impl ReverseIter<uint> for TrieSet {
167+
/// Visit all values in reverse order
168+
pure fn each_reverse(&self, f: fn(&uint) -> bool) {
169+
self.map.each_key_reverse(f)
170+
}
171+
}
172+
173+
impl Container for TrieSet {
174+
/// Return the number of elements in the set
175+
#[inline(always)]
176+
pure fn len(&self) -> uint { self.map.len() }
177+
178+
/// Return true if the set contains no elements
179+
#[inline(always)]
180+
pure fn is_empty(&self) -> bool { self.map.is_empty() }
181+
}
182+
183+
impl Mutable for TrieSet {
184+
/// Clear the set, removing all values.
185+
#[inline(always)]
186+
fn clear(&mut self) { self.map.clear() }
187+
}
188+
189+
impl TrieSet {
190+
/// Return true if the set contains a value
191+
#[inline(always)]
192+
pure fn contains(&self, value: &uint) -> bool {
193+
self.map.contains_key(value)
194+
}
195+
196+
/// Add a value to the set. Return true if the value was not already
197+
/// present in the set.
198+
#[inline(always)]
199+
fn insert(&mut self, value: uint) -> bool { self.map.insert(value, ()) }
200+
201+
/// Remove a value from the set. Return true if the value was
202+
/// present in the set.
203+
#[inline(always)]
204+
fn remove(&mut self, value: &uint) -> bool { self.map.remove(value) }
205+
}
206+
207+
struct TrieNode<T> {
208+
count: uint,
209+
children: [Child<T> * 16] // FIXME: #3469: can't use the SIZE constant yet
210+
}
211+
212+
impl<T: Copy> TrieNode<T> {
213+
#[inline(always)]
214+
static pure fn new() -> TrieNode<T> {
215+
TrieNode{count: 0, children: [Nothing, ..SIZE]}
216+
}
217+
}
218+
219+
impl<T> TrieNode<T> {
220+
pure fn each(&self, f: fn(&(uint, &self/T)) -> bool) {
221+
for uint::range(0, self.children.len()) |idx| {
222+
match self.children[idx] {
223+
Internal(ref x) => x.each(f),
224+
External(k, ref v) => if !f(&(k, v)) { return },
225+
Nothing => ()
226+
}
227+
}
228+
}
229+
230+
pure fn each_reverse(&self, f: fn(&(uint, &self/T)) -> bool) {
231+
for uint::range_rev(self.children.len(), 0) |idx| {
232+
match self.children[idx - 1] {
233+
Internal(ref x) => x.each(f),
234+
External(k, ref v) => if !f(&(k, v)) { return },
235+
Nothing => ()
236+
}
237+
}
238+
}
239+
240+
fn mutate_values(&mut self, f: fn(uint, &mut T) -> bool) {
241+
for vec::each_mut(self.children) |child| {
242+
match *child {
243+
Internal(ref mut x) => x.mutate_values(f),
244+
External(k, ref mut v) => if !f(k, v) { return },
245+
Nothing => ()
246+
}
247+
}
248+
}
249+
}
250+
251+
// if this was done via a trait, the key could be generic
252+
#[inline(always)]
253+
pure fn chunk(n: uint, idx: uint) -> uint {
254+
let real_idx = uint::bytes - 1 - idx;
255+
(n >> (SHIFT * real_idx)) & MASK
256+
}
257+
258+
fn insert<T: Copy>(count: &mut uint, child: &mut Child<T>, key: uint,
259+
value: T, idx: uint) -> bool {
260+
match *child {
261+
External(stored_key, stored_value) => {
262+
if stored_key == key {
263+
false // already in the trie
264+
} else {
265+
// conflict - split the node
266+
let mut new = ~TrieNode::new();
267+
insert(&mut new.count,
268+
&mut new.children[chunk(stored_key, idx)],
269+
stored_key, stored_value, idx + 1);
270+
insert(&mut new.count, &mut new.children[chunk(key, idx)], key,
271+
value, idx + 1);
272+
*child = Internal(new);
273+
true
274+
}
275+
}
276+
Internal(ref mut x) => {
277+
insert(&mut x.count, &mut x.children[chunk(key, idx)], key, value,
278+
idx + 1)
279+
}
280+
Nothing => {
281+
*count += 1;
282+
*child = External(key, value);
283+
true
284+
}
285+
}
286+
}
287+
288+
fn remove<T>(count: &mut uint, child: &mut Child<T>, key: uint,
289+
idx: uint) -> bool {
290+
let (ret, this) = match *child {
291+
External(stored, _) => {
292+
if stored == key { (true, true) } else { (false, false) }
293+
}
294+
Internal(ref mut x) => {
295+
let ret = remove(&mut x.count, &mut x.children[chunk(key, idx)],
296+
key, idx + 1);
297+
(ret, x.count == 0)
298+
}
299+
Nothing => (false, false)
300+
};
301+
302+
if this {
303+
*child = Nothing;
304+
*count -= 1;
305+
}
306+
ret
307+
}
308+
309+
#[cfg(test)]
310+
pub fn check_integrity<T>(trie: &TrieNode<T>) {
311+
assert trie.count != 0;
312+
313+
let mut sum = 0;
314+
315+
for trie.children.each |x| {
316+
match *x {
317+
Nothing => (),
318+
Internal(ref y) => {
319+
check_integrity(&**y);
320+
sum += 1
321+
}
322+
External(_, _) => { sum += 1 }
323+
}
324+
}
325+
326+
assert sum == trie.count;
327+
}
328+
329+
#[cfg(test)]
330+
mod tests {
331+
use super::*;
332+
use uint;
333+
334+
#[test]
335+
fn test_step() {
336+
let mut trie = TrieMap::new();
337+
let n = 300;
338+
339+
for uint::range_step(1, n, 2) |x| {
340+
assert trie.insert(x, x + 1);
341+
assert trie.contains_key(&x);
342+
check_integrity(&trie.root);
343+
}
344+
345+
for uint::range_step(0, n, 2) |x| {
346+
assert !trie.contains_key(&x);
347+
assert trie.insert(x, x + 1);
348+
check_integrity(&trie.root);
349+
}
350+
351+
for uint::range(0, n) |x| {
352+
assert trie.contains_key(&x);
353+
assert !trie.insert(x, x + 1);
354+
check_integrity(&trie.root);
355+
}
356+
357+
for uint::range_step(1, n, 2) |x| {
358+
assert trie.remove(&x);
359+
assert !trie.contains_key(&x);
360+
check_integrity(&trie.root);
361+
}
362+
363+
for uint::range_step(0, n, 2) |x| {
364+
assert trie.contains_key(&x);
365+
assert !trie.insert(x, x + 1);
366+
check_integrity(&trie.root);
367+
}
368+
}
369+
}

0 commit comments

Comments
 (0)