Skip to content

Commit c68f356

Browse files
committed
major btree refactor
1 parent d229980 commit c68f356

File tree

1 file changed

+98
-76
lines changed

1 file changed

+98
-76
lines changed

src/libcollections/btree.rs

Lines changed: 98 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,30 @@ use core::prelude::*;
1919

2020
use alloc::boxed::Box;
2121
use vec::Vec;
22-
use core::mem;
23-
use core::iter::range_inclusive;
22+
use core::{mem, ptr};
23+
use core::slice::Items;
2424
use {Mutable, MutableMap, Map, MutableSeq};
2525

26-
/// "Order" of the B-tree, from which all other properties are derived
27-
static B: uint = 6;
26+
/// Generate an array of None<$typ>'s of size $count
27+
macro_rules! nones(
28+
($typ: ty, $count: expr) => (
29+
unsafe {
30+
let mut tmp: [Option<$typ>, .. $count] = mem::uninitialized();
31+
for i in tmp.as_mut_slice().mut_iter() {
32+
ptr::write(i, None);
33+
}
34+
tmp
35+
}
36+
);
37+
)
38+
39+
/// "Order" of the B-tree, from which all other properties are derived. In experiments with
40+
/// different values of B on a BTree<uint, uint> on 64-bit linux, `B = 5` struck the best
41+
/// balance between search and mutation time. Lowering B improves mutation time (less array
42+
/// shifting), and raising B improves search time (less depth and pointer following).
43+
/// However, increasing B higher than 5 had marginal search gains compared to mutation costs.
44+
/// This value should be re-evaluated whenever the tree is significantly refactored.
45+
static B: uint = 5;
2846
/// Maximum number of elements in a node
2947
static CAPACITY: uint = 2 * B - 1;
3048
/// Minimum number of elements in a node
@@ -34,8 +52,7 @@ static EDGE_CAPACITY: uint = CAPACITY + 1;
3452
/// Amount to take off the tail of a node being split
3553
static SPLIT_LEN: uint = B - 1;
3654

37-
/// Represents a search path for mutating. The rawptrs here should never be
38-
/// null or dangling, and should be accessed one-at-a-time via pops.
55+
/// Represents a search path for mutating
3956
type SearchStack<K,V> = Vec<(*mut Node<K,V>, uint)>;
4057

4158
/// Represents the result of an Insertion: either the item fit, or the node had to split
@@ -46,12 +63,18 @@ enum InsertionResult<K,V>{
4663

4764
/// Represents the result of a search for a key in a single node
4865
enum SearchResult {
49-
Found(uint), Bound(uint),
66+
Found(uint), GoDown(uint),
5067
}
5168

52-
/// A B-Tree Node
69+
/// A B-Tree Node. We keep keys/edges/values separate to optimize searching for keys.
5370
struct Node<K,V> {
5471
length: uint,
72+
// FIXME(Gankro): We use Options here because there currently isn't a safe way to deal
73+
// with partially initialized [T, ..n]'s. #16998 is one solution to this. Other alternatives
74+
// include Vec's or heap-allocating a raw buffer of bytes, similar to HashMap's RawTable.
75+
// However, those solutions introduce an unfortunate extra of indirection (unless the whole
76+
// node is inlined into this one mega-buffer). We consider this solution to be sufficient for a
77+
// first-draft, and it has the benefit of being a nice safe starting point to optimize from.
5578
keys: [Option<K>, ..CAPACITY],
5679
edges: [Option<Box<Node<K,V>>>, ..EDGE_CAPACITY],
5780
vals: [Option<V>, ..CAPACITY],
@@ -91,10 +114,10 @@ impl<K: Ord, V> Map<K,V> for BTree<K,V> {
91114
let mut cur_node = &**root;
92115
loop {
93116
match cur_node.search(key) {
94-
Found(i) => return cur_node.vals[i].as_ref(), // Found the key
95-
Bound(i) => match cur_node.edges[i].as_ref() { // Didn't find the key
96-
None => return None, // We're a leaf, it's not in here
97-
Some(next_node) => { // We're an internal node, search the subtree
117+
Found(i) => return cur_node.vals[i].as_ref(),
118+
GoDown(i) => match cur_node.edges[i].as_ref() {
119+
None => return None,
120+
Some(next_node) => {
98121
cur_node = &**next_node;
99122
continue;
100123
}
@@ -118,7 +141,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
118141
let cur_node = temp_node;
119142
match cur_node.search(key) {
120143
Found(i) => return cur_node.vals[i].as_mut(),
121-
Bound(i) => match cur_node.edges[i].as_mut() {
144+
GoDown(i) => match cur_node.edges[i].as_mut() {
122145
None => return None,
123146
Some(next_node) => {
124147
temp_node = &mut **next_node;
@@ -134,7 +157,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
134157
// Insertion in a B-Tree is a bit complicated.
135158
//
136159
// First we do the same kind of search described in
137-
// `find`, but we need to maintain a stack of all the nodes/edges in our search path.
160+
// `find`. But we need to maintain a stack of all the nodes/edges in our search path.
138161
// If we find a match for the key we're trying to insert, just swap the.vals and return the
139162
// old ones. However, when we bottom out in a leaf, we attempt to insert our key-value pair
140163
// at the same location we would want to follow another edge.
@@ -147,8 +170,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
147170
//
148171
// Note that we subtly deviate from Open Data Structures in our implementation of split.
149172
// ODS describes inserting into the node *regardless* of its capacity, and then
150-
// splitting *afterwards* if it happens to be overfull. However, this is inefficient
151-
// (or downright impossible, depending on the design).
173+
// splitting *afterwards* if it happens to be overfull. However, this is inefficient.
152174
// Instead, we split beforehand, and then insert the key-value pair into the appropriate
153175
// result node. This has two consequences:
154176
//
@@ -169,21 +191,14 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
169191
None
170192
} else {
171193
let visit_stack = {
172-
// Borrowck hack, see `find_mut`
194+
// We need this temp_node for borrowck wrangling
173195
let mut temp_node = &mut **self.root.as_mut().unwrap();
174196
// visit_stack is a stack of rawptrs to nodes paired with indices, respectively
175197
// representing the nodes and edges of our search path. We have to store rawptrs
176198
// because as far as Rust is concerned, we can mutate aliased data with such a
177-
// stack. It is of course correct, but what it doesn't know is the following:
178-
//
179-
// * The nodes in the visit_stack don't move in memory (at least, don't move
180-
// in memory between now and when we've finished handling the raw pointer to it)
181-
//
182-
// * We don't mutate anything through a given ptr until we've popped and forgotten
183-
// all the ptrs after it, at which point we don't have any pointers to children of
184-
// that node
185-
//
186-
// An alternative is to take the Node boxes from their parents. This actually makes
199+
// stack. It is of course correct, but what it doesn't know is that we will only
200+
// be popping and using these ptrs one at a time in `insert_stack`. The alternative
201+
// to doing this is to take the Node boxes from their parents. This actually makes
187202
// borrowck *really* happy and everything is pretty smooth. However, this creates
188203
// *tons* of pointless writes, and requires us to always walk all the way back to
189204
// the root after an insertion, even if we only needed to change a leaf. Therefore,
@@ -202,7 +217,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
202217
mem::swap(cur_node.keys[i].as_mut().unwrap(), &mut key);
203218
return Some(value);
204219
},
205-
Bound(i) => {
220+
GoDown(i) => {
206221
visit_stack.push((cur_node_ptr, i));
207222
match cur_node.edges[i].as_mut() {
208223
None => {
@@ -229,10 +244,10 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
229244

230245
// Deletion is the most complicated operation for a B-Tree.
231246
//
232-
// First we do the same kind of search described in `find`, but we need to maintain a stack
233-
// of all the nodes/edges in our search path. If we don't find the key, then we just return
234-
// `None` and do nothing. If we do find the key, we perform two operations: remove the item,
235-
// and then possibly handle underflow.
247+
// First we do the same kind of search described in
248+
// `find`. But we need to maintain a stack of all the nodes/edges in our search path.
249+
// If we don't find the key, then we just return `None` and do nothing. If we do find the
250+
// key, we perform two operations: remove the item, and then possibly handle underflow.
236251
//
237252
// # removing the item
238253
// If the node is a leaf, we just remove the item, and shift
@@ -269,7 +284,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
269284
None
270285
} else {
271286
let visit_stack = {
272-
// Borrowck hack, see `find_mut`
287+
// We need this temp_node for borrowck wrangling
273288
let mut temp_node = &mut **self.root.as_mut().unwrap();
274289
// See `pop` for a description of this variable
275290
let mut visit_stack = Vec::with_capacity(self.depth);
@@ -295,7 +310,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
295310
}
296311
break;
297312
},
298-
Bound(i) => match cur_node.edges[i].as_mut() {
313+
GoDown(i) => match cur_node.edges[i].as_mut() {
299314
None => return None, // We're at a leaf; the key isn't in this tree
300315
Some(next_node) => {
301316
// We've found the subtree the key must be in
@@ -340,7 +355,7 @@ impl<K: Ord, V> BTree<K,V> {
340355
}
341356
Split(key, value, right) => match stack.pop() {
342357
// The last insertion triggered a split, so get the next element on the
343-
// stack to recursively insert the split node into.
358+
// stack to revursively insert the split node into.
344359
None => {
345360
// The stack was empty; we've split the root, and need to make a new one.
346361
let left = self.root.take().unwrap();
@@ -411,10 +426,9 @@ impl<K: Ord, V> Node<K,V> {
411426
fn new() -> Node<K,V> {
412427
Node {
413428
length: 0,
414-
// FIXME(Gankro): this is gross, I guess you need a macro? [None, ..capacity] uses copy
415-
keys: [None, None, None, None, None, None, None, None, None, None, None],
416-
vals: [None, None, None, None, None, None, None, None, None, None, None],
417-
edges: [None, None, None, None, None, None, None, None, None, None, None, None],
429+
keys: nones!(K, CAPACITY),
430+
vals: nones!(V, CAPACITY),
431+
edges: nones!(Box<Node<K,V>>, CAPACITY + 1),
418432
}
419433
}
420434

@@ -427,18 +441,21 @@ impl<K: Ord, V> Node<K,V> {
427441
/// `Found` will be yielded with the matching index. If it fails to find an exact match,
428442
/// `Bound` will be yielded with the index of the subtree the key must lie in.
429443
fn search(&self, key: &K) -> SearchResult {
430-
// linear search the node's keys because we're small
431-
// FIXME(Gankro): if we ever get generic integer arguments
432-
// to support variable choices of `B`, then this should be
433-
// tuned to fall into binary search at some arbitrary level
444+
// FIXME(Gankro): Tune when to search linear or binary when B becomes configurable.
445+
// For the B configured as of this writing (B = 5), binary search was *singnificantly*
446+
// worse.
447+
self.search_linear(key)
448+
}
449+
450+
fn search_linear(&self, key: &K) -> SearchResult {
434451
for (i, k) in self.keys().enumerate() {
435452
match k.cmp(key) {
436453
Less => {}, // keep walkin' son, she's too small
437454
Equal => return Found(i),
438-
Greater => return Bound(i),
455+
Greater => return GoDown(i),
439456
}
440457
}
441-
Bound(self.length)
458+
GoDown(self.length)
442459
}
443460

444461
/// Make a leaf root from scratch
@@ -705,7 +722,7 @@ impl<'a, K> Iterator<&'a K> for Keys<'a, K> {
705722
}
706723

707724
/// Subroutine for removal. Takes a search stack for a key that terminates at an
708-
/// internal node, and mutates the tree and search stack to make it a search
725+
/// internal node, and makes it mutates the tree and search stack to make it a search
709726
/// stack for that key that terminates at a leaf. This leaves the tree in an inconsistent
710727
/// state that must be repaired by the caller by removing the key in question.
711728
fn leafify_stack<K,V>(stack: &mut SearchStack<K,V>) {
@@ -743,21 +760,29 @@ fn leafify_stack<K,V>(stack: &mut SearchStack<K,V>) {
743760
/// Basically `Vec.insert(index)`. Assumes that the last element in the slice is
744761
/// Somehow "empty" and can be overwritten.
745762
fn shift_and_insert<T>(slice: &mut [T], index: uint, elem: T) {
746-
// FIXME(Gankro): This should probably be a copy_memory and a write?
747-
for i in range(index, slice.len() - 1).rev() {
748-
slice.swap(i, i + 1);
763+
unsafe {
764+
let start = slice.as_mut_ptr().offset(index as int);
765+
let len = slice.len();
766+
if index < len - 1 {
767+
ptr::copy_memory(start.offset(1), start as *const _, len - index - 1);
768+
}
769+
ptr::write(start, elem);
749770
}
750-
slice[index] = elem;
751771
}
752772

753773
/// Basically `Vec.remove(index)`.
754774
fn remove_and_shift<T>(slice: &mut [Option<T>], index: uint) -> Option<T> {
755-
let result = slice[index].take();
756-
// FIXME(Gankro): This should probably be a copy_memory and write?
757-
for i in range(index, slice.len() - 1) {
758-
slice.swap(i, i + 1);
775+
unsafe {
776+
let first = slice.as_mut_ptr();
777+
let start = first.offset(index as int);
778+
let result = ptr::read(start as *const _);
779+
let len = slice.len();
780+
if len > 1 && index < len - 1 {
781+
ptr::copy_memory(start, start.offset(1) as *const _, len - index - 1);
782+
}
783+
ptr::write(first.offset((len - 1) as int), None);
784+
result
759785
}
760-
result
761786
}
762787

763788
/// Subroutine for splitting a node. Put the `SPLIT_LEN` last elements from left,
@@ -774,11 +799,11 @@ fn steal_last<T>(left: &mut[T], right: &mut[T], amount: uint) {
774799

775800
/// Subroutine for merging the contents of right into left
776801
/// Assumes left has space for all of right
777-
fn merge<T>(left: &mut[Option<T>], right: &mut[Option<T>]) {
778-
let left_len = left.len();
779-
let right_len = right.len();
780-
for i in range(0, right_len) {
781-
left[left_len - right_len + i] = right[i].take();
802+
fn merge<T>(left: &mut[T], right: &mut[T]) {
803+
let offset = left.len() - right.len();
804+
for (a,b) in left.mut_slice_from(offset).mut_iter()
805+
.zip(right.mut_iter()) {
806+
mem::swap(a, b);
782807
}
783808
}
784809

@@ -802,63 +827,60 @@ impl<K,V> Mutable for BTree<K,V> {
802827

803828

804829

805-
806-
807830
#[cfg(test)]
808831
mod test {
809-
use std::prelude::*;
810-
811832
use super::BTree;
812-
use {Map, MutableMap, Mutable, MutableSeq};
813833

814834
#[test]
815835
fn test_basic() {
816836
let mut map = BTree::new();
837+
let size = 10000u;
817838
assert_eq!(map.len(), 0);
818839

819-
for i in range(0u, 10000) {
840+
for i in range(0, size) {
820841
assert_eq!(map.swap(i, 10*i), None);
821842
assert_eq!(map.len(), i + 1);
822843
}
823844

824-
for i in range(0u, 10000) {
845+
for i in range(0, size) {
825846
assert_eq!(map.find(&i).unwrap(), &(i*10));
826847
}
827848

828-
for i in range(10000, 20000) {
849+
for i in range(size, size*2) {
829850
assert_eq!(map.find(&i), None);
830851
}
831852

832-
for i in range(0u, 10000) {
853+
for i in range(0, size) {
833854
assert_eq!(map.swap(i, 100*i), Some(10*i));
834-
assert_eq!(map.len(), 10000);
855+
assert_eq!(map.len(), size);
835856
}
836857

837-
for i in range(0u, 10000) {
858+
for i in range(0, size) {
838859
assert_eq!(map.find(&i).unwrap(), &(i*100));
839860
}
840861

841-
for i in range(0u, 5000) {
862+
for i in range(0, size/2) {
842863
assert_eq!(map.pop(&(i*2)), Some(i*200));
843-
assert_eq!(map.len(), 10000 - i - 1);
864+
assert_eq!(map.len(), size - i - 1);
844865
}
845866

846-
for i in range(0u, 5000) {
867+
for i in range(0, size/2) {
847868
assert_eq!(map.find(&(2*i)), None);
848869
assert_eq!(map.find(&(2*i+1)).unwrap(), &(i*200 + 100));
849870
}
850871

851-
for i in range(0u, 5000) {
872+
for i in range(0, size/2) {
852873
assert_eq!(map.pop(&(2*i)), None);
853874
assert_eq!(map.pop(&(2*i+1)), Some(i*200 + 100));
854-
assert_eq!(map.len(), 5000 - i - 1);
875+
assert_eq!(map.len(), size/2 - i - 1);
855876
}
856877
}
857878
}
858879

859880

860881

861882

883+
862884
#[cfg(test)]
863885
mod bench {
864886
use test::Bencher;

0 commit comments

Comments
 (0)