@@ -19,12 +19,30 @@ use core::prelude::*;
19
19
20
20
use alloc:: boxed:: Box ;
21
21
use vec:: Vec ;
22
- use core:: mem;
23
- use core:: iter :: range_inclusive ;
22
+ use core:: { mem, ptr } ;
23
+ use core:: slice :: Items ;
24
24
use { Mutable , MutableMap , Map , MutableSeq } ;
25
25
26
- /// "Order" of the B-tree, from which all other properties are derived
27
- static B : uint = 6 ;
26
+ /// Generate an array of None<$typ>'s of size $count
27
+ macro_rules! nones(
28
+ ( $typ: ty, $count: expr) => (
29
+ unsafe {
30
+ let mut tmp: [ Option <$typ>, .. $count] = mem:: uninitialized( ) ;
31
+ for i in tmp. as_mut_slice( ) . mut_iter( ) {
32
+ ptr:: write( i, None ) ;
33
+ }
34
+ tmp
35
+ }
36
+ ) ;
37
+ )
38
+
39
+ /// "Order" of the B-tree, from which all other properties are derived. In experiments with
40
+ /// different values of B on a BTree<uint, uint> on 64-bit linux, `B = 5` struck the best
41
+ /// balance between search and mutation time. Lowering B improves mutation time (less array
42
+ /// shifting), and raising B improves search time (less depth and pointer following).
43
+ /// However, increasing B higher than 5 had marginal search gains compared to mutation costs.
44
+ /// This value should be re-evaluated whenever the tree is significantly refactored.
45
+ static B : uint = 5 ;
28
46
/// Maximum number of elements in a node
29
47
static CAPACITY : uint = 2 * B - 1 ;
30
48
/// Minimum number of elements in a node
@@ -34,8 +52,7 @@ static EDGE_CAPACITY: uint = CAPACITY + 1;
34
52
/// Amount to take off the tail of a node being split
35
53
static SPLIT_LEN : uint = B - 1 ;
36
54
37
- /// Represents a search path for mutating. The rawptrs here should never be
38
- /// null or dangling, and should be accessed one-at-a-time via pops.
55
+ /// Represents a search path for mutating
39
56
type SearchStack < K , V > = Vec < ( * mut Node < K , V > , uint ) > ;
40
57
41
58
/// Represents the result of an Insertion: either the item fit, or the node had to split
@@ -46,12 +63,18 @@ enum InsertionResult<K,V>{
46
63
47
64
/// Represents the result of a search for a key in a single node
48
65
enum SearchResult {
49
- Found ( uint ) , Bound ( uint ) ,
66
+ Found ( uint ) , GoDown ( uint ) ,
50
67
}
51
68
52
- /// A B-Tree Node
69
+ /// A B-Tree Node. We keep keys/edges/values separate to optimize searching for keys.
53
70
struct Node < K , V > {
54
71
length : uint ,
72
+ // FIXME(Gankro): We use Options here because there currently isn't a safe way to deal
73
+ // with partially initialized [T, ..n]'s. #16998 is one solution to this. Other alternatives
74
+ // include Vec's or heap-allocating a raw buffer of bytes, similar to HashMap's RawTable.
75
+ // However, those solutions introduce an unfortunate extra of indirection (unless the whole
76
+ // node is inlined into this one mega-buffer). We consider this solution to be sufficient for a
77
+ // first-draft, and it has the benefit of being a nice safe starting point to optimize from.
55
78
keys : [ Option < K > , ..CAPACITY ] ,
56
79
edges : [ Option < Box < Node < K , V > > > , ..EDGE_CAPACITY ] ,
57
80
vals : [ Option < V > , ..CAPACITY ] ,
@@ -91,10 +114,10 @@ impl<K: Ord, V> Map<K,V> for BTree<K,V> {
91
114
let mut cur_node = & * * root;
92
115
loop {
93
116
match cur_node. search ( key) {
94
- Found ( i) => return cur_node. vals [ i] . as_ref ( ) , // Found the key
95
- Bound ( i) => match cur_node. edges [ i] . as_ref ( ) { // Didn't find the key
96
- None => return None , // We're a leaf, it's not in here
97
- Some ( next_node) => { // We're an internal node, search the subtree
117
+ Found ( i) => return cur_node. vals [ i] . as_ref ( ) ,
118
+ GoDown ( i) => match cur_node. edges [ i] . as_ref ( ) {
119
+ None => return None ,
120
+ Some ( next_node) => {
98
121
cur_node = & * * next_node;
99
122
continue ;
100
123
}
@@ -118,7 +141,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
118
141
let cur_node = temp_node;
119
142
match cur_node. search ( key) {
120
143
Found ( i) => return cur_node. vals [ i] . as_mut ( ) ,
121
- Bound ( i) => match cur_node. edges [ i] . as_mut ( ) {
144
+ GoDown ( i) => match cur_node. edges [ i] . as_mut ( ) {
122
145
None => return None ,
123
146
Some ( next_node) => {
124
147
temp_node = & mut * * next_node;
@@ -134,7 +157,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
134
157
// Insertion in a B-Tree is a bit complicated.
135
158
//
136
159
// First we do the same kind of search described in
137
- // `find`, but we need to maintain a stack of all the nodes/edges in our search path.
160
+ // `find`. But we need to maintain a stack of all the nodes/edges in our search path.
138
161
// If we find a match for the key we're trying to insert, just swap the.vals and return the
139
162
// old ones. However, when we bottom out in a leaf, we attempt to insert our key-value pair
140
163
// at the same location we would want to follow another edge.
@@ -147,8 +170,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
147
170
//
148
171
// Note that we subtly deviate from Open Data Structures in our implementation of split.
149
172
// ODS describes inserting into the node *regardless* of its capacity, and then
150
- // splitting *afterwards* if it happens to be overfull. However, this is inefficient
151
- // (or downright impossible, depending on the design).
173
+ // splitting *afterwards* if it happens to be overfull. However, this is inefficient.
152
174
// Instead, we split beforehand, and then insert the key-value pair into the appropriate
153
175
// result node. This has two consequences:
154
176
//
@@ -169,21 +191,14 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
169
191
None
170
192
} else {
171
193
let visit_stack = {
172
- // Borrowck hack, see `find_mut`
194
+ // We need this temp_node for borrowck wrangling
173
195
let mut temp_node = & mut * * self . root . as_mut ( ) . unwrap ( ) ;
174
196
// visit_stack is a stack of rawptrs to nodes paired with indices, respectively
175
197
// representing the nodes and edges of our search path. We have to store rawptrs
176
198
// because as far as Rust is concerned, we can mutate aliased data with such a
177
- // stack. It is of course correct, but what it doesn't know is the following:
178
- //
179
- // * The nodes in the visit_stack don't move in memory (at least, don't move
180
- // in memory between now and when we've finished handling the raw pointer to it)
181
- //
182
- // * We don't mutate anything through a given ptr until we've popped and forgotten
183
- // all the ptrs after it, at which point we don't have any pointers to children of
184
- // that node
185
- //
186
- // An alternative is to take the Node boxes from their parents. This actually makes
199
+ // stack. It is of course correct, but what it doesn't know is that we will only
200
+ // be popping and using these ptrs one at a time in `insert_stack`. The alternative
201
+ // to doing this is to take the Node boxes from their parents. This actually makes
187
202
// borrowck *really* happy and everything is pretty smooth. However, this creates
188
203
// *tons* of pointless writes, and requires us to always walk all the way back to
189
204
// the root after an insertion, even if we only needed to change a leaf. Therefore,
@@ -202,7 +217,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
202
217
mem:: swap ( cur_node. keys [ i] . as_mut ( ) . unwrap ( ) , & mut key) ;
203
218
return Some ( value) ;
204
219
} ,
205
- Bound ( i) => {
220
+ GoDown ( i) => {
206
221
visit_stack. push ( ( cur_node_ptr, i) ) ;
207
222
match cur_node. edges [ i] . as_mut ( ) {
208
223
None => {
@@ -229,10 +244,10 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
229
244
230
245
// Deletion is the most complicated operation for a B-Tree.
231
246
//
232
- // First we do the same kind of search described in `find`, but we need to maintain a stack
233
- // of all the nodes/edges in our search path. If we don't find the key, then we just return
234
- // `None` and do nothing. If we do find the key, we perform two operations: remove the item,
235
- // and then possibly handle underflow.
247
+ // First we do the same kind of search described in
248
+ // `find`. But we need to maintain a stack of all the nodes/edges in our search path.
249
+ // If we don't find the key, then we just return `None` and do nothing. If we do find the
250
+ // key, we perform two operations: remove the item, and then possibly handle underflow.
236
251
//
237
252
// # removing the item
238
253
// If the node is a leaf, we just remove the item, and shift
@@ -269,7 +284,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
269
284
None
270
285
} else {
271
286
let visit_stack = {
272
- // Borrowck hack, see `find_mut`
287
+ // We need this temp_node for borrowck wrangling
273
288
let mut temp_node = & mut * * self . root . as_mut ( ) . unwrap ( ) ;
274
289
// See `pop` for a description of this variable
275
290
let mut visit_stack = Vec :: with_capacity ( self . depth ) ;
@@ -295,7 +310,7 @@ impl<K: Ord, V> MutableMap<K,V> for BTree<K,V> {
295
310
}
296
311
break ;
297
312
} ,
298
- Bound ( i) => match cur_node. edges [ i] . as_mut ( ) {
313
+ GoDown ( i) => match cur_node. edges [ i] . as_mut ( ) {
299
314
None => return None , // We're at a leaf; the key isn't in this tree
300
315
Some ( next_node) => {
301
316
// We've found the subtree the key must be in
@@ -340,7 +355,7 @@ impl<K: Ord, V> BTree<K,V> {
340
355
}
341
356
Split ( key, value, right) => match stack. pop ( ) {
342
357
// The last insertion triggered a split, so get the next element on the
343
- // stack to recursively insert the split node into.
358
+ // stack to revursively insert the split node into.
344
359
None => {
345
360
// The stack was empty; we've split the root, and need to make a new one.
346
361
let left = self . root . take ( ) . unwrap ( ) ;
@@ -411,10 +426,9 @@ impl<K: Ord, V> Node<K,V> {
411
426
fn new ( ) -> Node < K , V > {
412
427
Node {
413
428
length : 0 ,
414
- // FIXME(Gankro): this is gross, I guess you need a macro? [None, ..capacity] uses copy
415
- keys : [ None , None , None , None , None , None , None , None , None , None , None ] ,
416
- vals : [ None , None , None , None , None , None , None , None , None , None , None ] ,
417
- edges : [ None , None , None , None , None , None , None , None , None , None , None , None ] ,
429
+ keys : nones ! ( K , CAPACITY ) ,
430
+ vals : nones ! ( V , CAPACITY ) ,
431
+ edges : nones ! ( Box <Node <K , V >>, CAPACITY + 1 ) ,
418
432
}
419
433
}
420
434
@@ -427,18 +441,21 @@ impl<K: Ord, V> Node<K,V> {
427
441
/// `Found` will be yielded with the matching index. If it fails to find an exact match,
428
442
/// `Bound` will be yielded with the index of the subtree the key must lie in.
429
443
fn search ( & self , key : & K ) -> SearchResult {
430
- // linear search the node's keys because we're small
431
- // FIXME(Gankro): if we ever get generic integer arguments
432
- // to support variable choices of `B`, then this should be
433
- // tuned to fall into binary search at some arbitrary level
444
+ // FIXME(Gankro): Tune when to search linear or binary when B becomes configurable.
445
+ // For the B configured as of this writing (B = 5), binary search was *singnificantly*
446
+ // worse.
447
+ self . search_linear ( key)
448
+ }
449
+
450
+ fn search_linear ( & self , key : & K ) -> SearchResult {
434
451
for ( i, k) in self . keys ( ) . enumerate ( ) {
435
452
match k. cmp ( key) {
436
453
Less => { } , // keep walkin' son, she's too small
437
454
Equal => return Found ( i) ,
438
- Greater => return Bound ( i) ,
455
+ Greater => return GoDown ( i) ,
439
456
}
440
457
}
441
- Bound ( self . length )
458
+ GoDown ( self . length )
442
459
}
443
460
444
461
/// Make a leaf root from scratch
@@ -705,7 +722,7 @@ impl<'a, K> Iterator<&'a K> for Keys<'a, K> {
705
722
}
706
723
707
724
/// Subroutine for removal. Takes a search stack for a key that terminates at an
708
- /// internal node, and mutates the tree and search stack to make it a search
725
+ /// internal node, and makes it mutates the tree and search stack to make it a search
709
726
/// stack for that key that terminates at a leaf. This leaves the tree in an inconsistent
710
727
/// state that must be repaired by the caller by removing the key in question.
711
728
fn leafify_stack < K , V > ( stack : & mut SearchStack < K , V > ) {
@@ -743,21 +760,29 @@ fn leafify_stack<K,V>(stack: &mut SearchStack<K,V>) {
743
760
/// Basically `Vec.insert(index)`. Assumes that the last element in the slice is
744
761
/// Somehow "empty" and can be overwritten.
745
762
fn shift_and_insert < T > ( slice : & mut [ T ] , index : uint , elem : T ) {
746
- // FIXME(Gankro): This should probably be a copy_memory and a write?
747
- for i in range ( index, slice. len ( ) - 1 ) . rev ( ) {
748
- slice. swap ( i, i + 1 ) ;
763
+ unsafe {
764
+ let start = slice. as_mut_ptr ( ) . offset ( index as int ) ;
765
+ let len = slice. len ( ) ;
766
+ if index < len - 1 {
767
+ ptr:: copy_memory ( start. offset ( 1 ) , start as * const _ , len - index - 1 ) ;
768
+ }
769
+ ptr:: write ( start, elem) ;
749
770
}
750
- slice[ index] = elem;
751
771
}
752
772
753
773
/// Basically `Vec.remove(index)`.
754
774
fn remove_and_shift < T > ( slice : & mut [ Option < T > ] , index : uint ) -> Option < T > {
755
- let result = slice[ index] . take ( ) ;
756
- // FIXME(Gankro): This should probably be a copy_memory and write?
757
- for i in range ( index, slice. len ( ) - 1 ) {
758
- slice. swap ( i, i + 1 ) ;
775
+ unsafe {
776
+ let first = slice. as_mut_ptr ( ) ;
777
+ let start = first. offset ( index as int ) ;
778
+ let result = ptr:: read ( start as * const _ ) ;
779
+ let len = slice. len ( ) ;
780
+ if len > 1 && index < len - 1 {
781
+ ptr:: copy_memory ( start, start. offset ( 1 ) as * const _ , len - index - 1 ) ;
782
+ }
783
+ ptr:: write ( first. offset ( ( len - 1 ) as int ) , None ) ;
784
+ result
759
785
}
760
- result
761
786
}
762
787
763
788
/// Subroutine for splitting a node. Put the `SPLIT_LEN` last elements from left,
@@ -774,11 +799,11 @@ fn steal_last<T>(left: &mut[T], right: &mut[T], amount: uint) {
774
799
775
800
/// Subroutine for merging the contents of right into left
776
801
/// Assumes left has space for all of right
777
- fn merge < T > ( left : & mut [ Option < T > ] , right : & mut [ Option < T > ] ) {
778
- let left_len = left. len ( ) ;
779
- let right_len = right . len ( ) ;
780
- for i in range ( 0 , right_len ) {
781
- left [ left_len - right_len + i ] = right [ i ] . take ( ) ;
802
+ fn merge < T > ( left : & mut [ T ] , right : & mut [ T ] ) {
803
+ let offset = left. len ( ) - right . len ( ) ;
804
+ for ( a , b ) in left . mut_slice_from ( offset ) . mut_iter ( )
805
+ . zip ( right . mut_iter ( ) ) {
806
+ mem :: swap ( a , b ) ;
782
807
}
783
808
}
784
809
@@ -802,63 +827,60 @@ impl<K,V> Mutable for BTree<K,V> {
802
827
803
828
804
829
805
-
806
-
807
830
#[ cfg( test) ]
808
831
mod test {
809
- use std:: prelude:: * ;
810
-
811
832
use super :: BTree ;
812
- use { Map , MutableMap , Mutable , MutableSeq } ;
813
833
814
834
#[ test]
815
835
fn test_basic ( ) {
816
836
let mut map = BTree :: new ( ) ;
837
+ let size = 10000 u;
817
838
assert_eq ! ( map. len( ) , 0 ) ;
818
839
819
- for i in range ( 0 u , 10000 ) {
840
+ for i in range ( 0 , size ) {
820
841
assert_eq ! ( map. swap( i, 10 * i) , None ) ;
821
842
assert_eq ! ( map. len( ) , i + 1 ) ;
822
843
}
823
844
824
- for i in range ( 0 u , 10000 ) {
845
+ for i in range ( 0 , size ) {
825
846
assert_eq ! ( map. find( & i) . unwrap( ) , & ( i* 10 ) ) ;
826
847
}
827
848
828
- for i in range ( 10000 , 20000 ) {
849
+ for i in range ( size , size * 2 ) {
829
850
assert_eq ! ( map. find( & i) , None ) ;
830
851
}
831
852
832
- for i in range ( 0 u , 10000 ) {
853
+ for i in range ( 0 , size ) {
833
854
assert_eq ! ( map. swap( i, 100 * i) , Some ( 10 * i) ) ;
834
- assert_eq ! ( map. len( ) , 10000 ) ;
855
+ assert_eq ! ( map. len( ) , size ) ;
835
856
}
836
857
837
- for i in range ( 0 u , 10000 ) {
858
+ for i in range ( 0 , size ) {
838
859
assert_eq ! ( map. find( & i) . unwrap( ) , & ( i* 100 ) ) ;
839
860
}
840
861
841
- for i in range ( 0 u , 5000 ) {
862
+ for i in range ( 0 , size/ 2 ) {
842
863
assert_eq ! ( map. pop( & ( i* 2 ) ) , Some ( i* 200 ) ) ;
843
- assert_eq ! ( map. len( ) , 10000 - i - 1 ) ;
864
+ assert_eq ! ( map. len( ) , size - i - 1 ) ;
844
865
}
845
866
846
- for i in range ( 0 u , 5000 ) {
867
+ for i in range ( 0 , size/ 2 ) {
847
868
assert_eq ! ( map. find( & ( 2 * i) ) , None ) ;
848
869
assert_eq ! ( map. find( & ( 2 * i+1 ) ) . unwrap( ) , & ( i* 200 + 100 ) ) ;
849
870
}
850
871
851
- for i in range ( 0 u , 5000 ) {
872
+ for i in range ( 0 , size/ 2 ) {
852
873
assert_eq ! ( map. pop( & ( 2 * i) ) , None ) ;
853
874
assert_eq ! ( map. pop( & ( 2 * i+1 ) ) , Some ( i* 200 + 100 ) ) ;
854
- assert_eq ! ( map. len( ) , 5000 - i - 1 ) ;
875
+ assert_eq ! ( map. len( ) , size/ 2 - i - 1 ) ;
855
876
}
856
877
}
857
878
}
858
879
859
880
860
881
861
882
883
+
862
884
#[ cfg( test) ]
863
885
mod bench {
864
886
use test:: Bencher ;
0 commit comments