From 3938af80756aeb32a894d4d3ab23f3dedeb99c72 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Sun, 31 Aug 2014 13:47:20 -0400 Subject: [PATCH 1/5] WIP btree rewrite --- src/libcollections/btree.rs | 1538 ++++++++++++++++++----------------- 1 file changed, 779 insertions(+), 759 deletions(-) diff --git a/src/libcollections/btree.rs b/src/libcollections/btree.rs index f6011976b65d0..8db7cf2427a0f 100644 --- a/src/libcollections/btree.rs +++ b/src/libcollections/btree.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -7,334 +7,451 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// - -// NB. this is not deprecated for removal, just deprecating the -// current implementation. If the major pain-points are addressed -// (overuse of by-value self and .clone), this can be removed. -#![deprecated = "the current implementation is extremely inefficient, \ - prefer a HashMap, TreeMap or TrieMap"] -#![allow(deprecated)] -//! Starting implementation of a B-tree for Rust. -//! Structure inspired by Github user davidhalperin's gist. - -// A B-tree contains a root node (which contains a vector of elements), -// a length (the height of the tree), and lower and upper bounds on the -// number of elements that a given node can contain. +// This implementation is largely based on the one described in *Open Data Structures*, which +// can be freely downloaded at http://opendatastructures.org/, and whose contents are as of this +// writing (August 2014) freely licensed under the following Creative Commons Attribution +// License: [CC BY 2.5 CA](http://creativecommons.org/licenses/by/2.5/ca/). use core::prelude::*; use alloc::boxed::Box; -use core::fmt; -use core::fmt::Show; - -use MutableSeq; use vec::Vec; +use core::mem; +use core::iter::range_inclusive; +use {Mutable, MutableMap, Map, MutableSeq}; -#[allow(missing_doc)] -pub struct BTree { - root: Node, - len: uint, - lower_bound: uint, - upper_bound: uint -} +/// "Order" of the B-tree, from which all other properties are derived +static B: uint = 6; +/// Maximum number of elements in a node +static capacity: uint = 2 * B - 1; +/// Minimum number of elements in a node +static min_load: uint = B - 1; +/// Maximum number of children in a node +static edge_capacity: uint = capacity + 1; +/// Amount to take off the tail of a node being split +static split_len: uint = B - 1; -impl BTree { - /// Returns new `BTree` with root node (leaf) and user-supplied lower bound - /// The lower bound applies to every node except the root node. - pub fn new(k: K, v: V, lb: uint) -> BTree { - BTree { - root: Node::new_leaf(vec!(LeafElt::new(k, v))), - len: 1, - lower_bound: lb, - upper_bound: 2 * lb - } - } +/// Represents a search path for mutating +type SearchStack = Vec<(*mut Node, uint)>; - /// Helper function for `clone`: returns new BTree with supplied root node, - /// length, and lower bound. For use when the length is known already. - fn new_with_node_len(n: Node, - length: uint, - lb: uint) -> BTree { - BTree { - root: n, - len: length, - lower_bound: lb, - upper_bound: 2 * lb - } - } +/// Represents the result of an Insertion: either the item fit, or the node had to split +enum InsertionResult{ + Fit, + Split(K, V, Box>), } -// We would probably want to remove the dependence on the Clone trait in the future. -// It is here as a crutch to ensure values can be passed around through the tree's nodes -// especially during insertions and deletions. -impl BTree { - /// Returns the value of a given key, which may not exist in the tree. - /// Calls the root node's get method. - pub fn get(self, k: K) -> Option { - return self.root.get(k); - } - - /// An insert method that uses the `clone` method for support. - pub fn insert(mut self, k: K, v: V) -> BTree { - let (a, b) = self.root.clone().insert(k, v, self.upper_bound.clone()); - if b { - match a.clone() { - LeafNode(leaf) => { - self.root = Node::new_leaf(leaf.clone().elts); - } - BranchNode(branch) => { - self.root = Node::new_branch(branch.clone().elts, - branch.clone().rightmost_child); - } - } - } - self - } +/// A B-Tree Node +struct Node { + length: uint, + keys: [Option, ..capacity], + edges: [Option>>, ..edge_capacity], + vals: [Option, ..capacity], } -impl Clone for BTree { - fn clone(&self) -> BTree { - BTree::new_with_node_len(self.root.clone(), self.len, self.lower_bound) - } -} -impl PartialEq for BTree { - fn eq(&self, other: &BTree) -> bool { - self.root.cmp(&other.root) == Equal - } +/// A B-Tree of Order 6 +pub struct BTree{ + root: Option>>, + length: uint, + depth: uint, } -impl Eq for BTree {} - -impl PartialOrd for BTree { - fn partial_cmp(&self, other: &BTree) -> Option { - Some(self.cmp(other)) +impl BTree { + /// Make a new empty BTree + pub fn new() -> BTree { + BTree { + length: 0, + depth: 0, + root: None, + } } } -impl Ord for BTree { - /// Returns an ordering based on the root nodes of each `BTree`. - fn cmp(&self, other: &BTree) -> Ordering { - self.root.cmp(&other.root) - } -} +impl Map for BTree { + // Searching in a B-Tree is pretty straightforward. + // + // Start at the root. Try to find the key in the current node. If we find it, return it. + // If it's not in there, follow the edge *before* the smallest key larger than + // the search key. If no such key exists (they're *all* smaller), then just take the last + // edge in the node. If we're in a leaf and we don't find our key, then it's not + // in the tree. + fn find(&self, key: &K) -> Option<&V> { + match self.root.as_ref() { + None => None, + Some(root) => { + let mut cur_node = &**root; + let leaf_depth = self.depth; + + 'main: for cur_depth in range_inclusive(1, leaf_depth) { + let is_leaf = leaf_depth == cur_depth; + let node_len = cur_node.length; + + // linear search the node's keys because we're small + // FIXME(Gankro): if we ever get generic integer arguments + // to support variable choices of `B`, then this should be + // tuned to fall into binary search at some arbitrary level + for i in range(0, node_len) { + match cur_node.keys[i].as_ref().unwrap().cmp(key) { + Less => {}, // keep walkin' son + Equal => return cur_node.vals[i].as_ref(), + Greater => if is_leaf { + return None + } else { + cur_node = &**cur_node.edges[i].as_ref().unwrap(); + continue 'main; + } + } + } -impl fmt::Show for BTree { - /// Returns a string representation of the `BTree`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.root.fmt(f) + // all the keys are smaller than the one we're searching for + if is_leaf { + // We're a leaf, so that's it, it's just not in here + return None + } else { + // We're an internal node, so we can always fall back to + // the "everything bigger than my keys" edge: the last one + cur_node = &**cur_node.edges[node_len].as_ref().unwrap(); + } + } + unreachable!(); + } + } } } +impl MutableMap for BTree { + // See `find` for implementation notes, this is basically a copy-paste with mut's added + fn find_mut(&mut self, key: &K) -> Option<&mut V> { + match self.root.as_mut() { + None => None, + Some(root) => { + // Borrowck hack + let mut temp_node = &mut **root; + let leaf_depth = self.depth; + + 'main: for cur_depth in range_inclusive(1, leaf_depth) { + let cur_node = temp_node; + let is_leaf = leaf_depth == cur_depth; + let node_len = cur_node.length; + + for i in range(0, node_len) { + match cur_node.keys[i].as_ref().unwrap().cmp(key) { + Less => {}, + Equal => return cur_node.vals[i].as_mut(), + Greater => if is_leaf { + return None + } else { + temp_node = &mut **cur_node.edges[i].as_mut().unwrap(); + continue 'main; + } + } + } -// Node types -// -// A node is either a LeafNode or a BranchNode, which contain either a Leaf or a Branch. -// Branches contain BranchElts, which contain a left child (another node) and a key-value -// pair. Branches also contain the rightmost child of the elements in the array. -// Leaves contain LeafElts, which do not have children. -enum Node { - LeafNode(Leaf), - BranchNode(Branch) -} - - -impl Node { - /// Creates a new leaf node given a vector of elements. - fn new_leaf(vec: Vec>) -> Node { - LeafNode(Leaf::new(vec)) - } - - /// Creates a new branch node given a vector of an elements and a pointer to a rightmost child. - fn new_branch(vec: Vec>, right: Box>) - -> Node { - BranchNode(Branch::new(vec, right)) - } - - /// Determines whether the given Node contains a Branch or a Leaf. - /// Used in testing. - fn is_leaf(&self) -> bool { - match self { - &LeafNode(..) => true, - &BranchNode(..) => false + if is_leaf { + return None + } else { + temp_node = &mut **cur_node.edges[node_len].as_mut().unwrap(); + } + } + unreachable!(); + } } } - /// A binary search function for Nodes. - /// Calls either the Branch's or the Leaf's bsearch function. - fn bsearch_node(&self, k: K) -> Option { - match self { - &LeafNode(ref leaf) => leaf.bsearch_leaf(k), - &BranchNode(ref branch) => branch.bsearch_branch(k) - } - } -} + // Insertion in a B-Tree is a bit complicated. + // + // First we do the same kind of search described in + // `find`. But we need to maintain a stack of all the nodes/edges in our search path. + // If we find a match for the key we're trying to insert, just swap the.vals and return the + // old ones. However, when we bottom out in a leaf, we attempt to insert our key-value pair + // at the same location we would want to follow another edge. + // + // If the node has room, then this is done in the obvious way by shifting elements. However, + // if the node itself is full, we split node into two, and give its median + // key-value pair to its parent to insert the new node with. Of course, the parent may also be + // full, and insertion can propogate until we reach the root. If we reach the root, and + // it is *also* full, then we split the root and place the two nodes under a newly made root. + // + // Note that we subtly deviate from Open Data Structures in our implementation of split. + // ODS describes inserting into the node *regardless* of its capacity, and then + // splitting *afterwards* if it happens to be overfull. However, this is inefficient. + // Instead, we split beforehand, and then insert the key-value pair into the appropriate + // result node. This has two consequences: + // + // 1) While ODS produces a left node of size B-1, and a right node of size B, + // we may potentially reverse this. However, this shouldn't effect the analysis. + // + // 2) While ODS may potentially return the pair we *just* inserted after + // the split, we will never do this. Again, this shouldn't effect the analysis. + + fn swap(&mut self, mut key: K, mut value: V) -> Option { + // FIXME(Gankro): this is gross because of the lexical borrows + // if pcwalton's work pans out, this can be made much better! + // See `find` for a more idealized structure + if self.root.is_none() { + self.root = Some(Node::make_leaf_root(key, value)); + self.length += 1; + self.depth += 1; + None + } else { + let visit_stack = { + // We need this temp_node for borrowck wrangling + let mut temp_node = &mut **self.root.as_mut().unwrap(); + let leaf_depth = self.depth; + // visit_stack is a stack of rawptrs to nodes paired with indices, respectively + // representing the nodes and edges of our search path. We have to store rawptrs + // because as far as Rust is concerned, we can mutate aliased data with such a + // stack. It is of course correct, but what it doesn't know is that we will only + // be popping and using these ptrs one at a time in `insert_stack`. The alternative + // to doing this is to take the Node boxes from their parents. This actually makes + // borrowck *really* happy and everything is pretty smooth. However, this creates + // *tons* of pointless writes, and requires us to always walk all the way back to + // the root after an insertion, even if we only needed to change a leaf. Therefore, + // we accept this potential unsafety and complexity in the name of performance. + let mut visit_stack = Vec::with_capacity(self.depth); + + 'main: for cur_depth in range_inclusive(1, leaf_depth) { + let is_leaf = leaf_depth == cur_depth; + let cur_node = temp_node; + let node_len = cur_node.length; + let cur_node_ptr = cur_node as *mut _; + + // See `find` for a description of this search + for i in range(0, node_len) { + let cmp = cur_node.keys[i].as_ref().unwrap().cmp(&key); + match cmp { + Less => {}, // keep walkin' son, she's too small + Equal => { + // Perfect match, swap the contents and return the old ones + mem::swap(cur_node.vals[i].as_mut().unwrap(), &mut value); + mem::swap(cur_node.keys[i].as_mut().unwrap(), &mut key); + return Some(value); + }, + Greater => if is_leaf { + // We've found where to insert this key/value pair + visit_stack.push((cur_node_ptr, i)); + break 'main; + } else { + // We've found the subtree to insert this key/value pair in + visit_stack.push((cur_node_ptr, i)); + temp_node = &mut **cur_node.edges[i].as_mut().unwrap(); + continue 'main; + } + } + } -impl Node { - /// Returns the corresponding value to the provided key. - /// `get()` is called in different ways on a branch or a leaf. - fn get(&self, k: K) -> Option { - match *self { - LeafNode(ref leaf) => return leaf.get(k), - BranchNode(ref branch) => return branch.get(k) - } - } + // all the keys are smaller than the one we're searching for, so try to go down + // the last edge in our node + visit_stack.push((cur_node_ptr, node_len)); + + if is_leaf { + // We're at a leaf, so we're done + break 'main; + } else { + // We're at an internal node, so we need to keep going + temp_node = &mut **cur_node.edges[node_len].as_mut().unwrap(); + continue 'main; + } + } + visit_stack + }; - /// Matches on the `Node`, then performs and returns the appropriate insert method. - fn insert(self, k: K, v: V, ub: uint) -> (Node, bool) { - match self { - LeafNode(leaf) => leaf.insert(k, v, ub), - BranchNode(branch) => branch.insert(k, v, ub) + // If we get here then we need to insert a new element + self.insert_stack(visit_stack, key, value); + None } } -} -impl Clone for Node { - /// Returns a new `Node` based on whether or not it is a branch or a leaf. - fn clone(&self) -> Node { - match *self { - LeafNode(ref leaf) => { - Node::new_leaf(leaf.elts.clone()) - } - BranchNode(ref branch) => { - Node::new_branch(branch.elts.clone(), - branch.rightmost_child.clone()) - } - } - } -} + // Deletion is the most complicated operation for a B-Tree. + // + // First we do the same kind of search described in + // `find`. But we need to maintain a stack of all the nodes/edges in our search path. + // If we don't find the key, then we just return `None` and do nothing. If we do find the + // key, we perform two operations: remove the item, and then possibly handle underflow. + // + // # removing the item + // If the node is a leaf, we just remove the item, and shift + // any items after it back to fill the hole. + // + // If the node is an internal node, we *swap* the item with the smallest item in + // in its right subtree (which must reside in a leaf), and then revert to the leaf + // case + // + // # handling underflow + // After removing an item, there may be too few items in the node. We want nodes + // to be mostly full for efficiency, although we make an exception for the root, which + // may have as few as one item. If this is the case, we may first try to steal + // an item from our left or right neighbour. + // + // To steal from the left (right) neighbour, + // we take the largest (smallest) item and child from it. We then swap the taken item + // with the item in their mutual parent that seperates them, and then insert the + // parent's item and the taken child into the first (last) index of the underflowed node. + // + // However, stealing has the possibility of underflowing our neighbour. If this is the + // case, we instead *merge* with our neighbour. This of course reduces the number of + // children in the parent. Therefore, we also steal the item that seperates the now + // merged nodes, and insert it into the merged node. + // + // Merging may cause the parent to underflow. If this is the case, then we must repeat + // the underflow handling process on the parent. If merging merges the last two children + // of the root, then we replace the root with the merged node. + + fn pop(&mut self, key: &K) -> Option { + // See `pop` for a discussion of why this is gross + if self.root.is_none() { + // We're empty, get lost! + None + } else { + let visit_stack = { + // We need this temp_node for borrowck wrangling + let mut temp_node = &mut **self.root.as_mut().unwrap(); + let leaf_depth = self.depth; + // See `pop` for a description of this variable + let mut visit_stack = Vec::with_capacity(self.depth); + + 'main: for cur_depth in range_inclusive(1, leaf_depth) { + let is_leaf = leaf_depth == cur_depth; + let cur_node = temp_node; + let node_len = cur_node.length; + let cur_node_ptr = cur_node as *mut _; + + // See `find` for a description of this search + for i in range(0, node_len) { + let cmp = cur_node.keys[i].as_ref().unwrap().cmp(key); + match cmp { + Less => {}, // keep walkin' son, she's too small + Equal => { + // Perfect match. Terminate the stack here, and move to the + // next phase (remove_stack). + visit_stack.push((cur_node_ptr, i)); + break 'main; + }, + Greater => if is_leaf { + // The key isn't in this tree + return None; + } else { + // We've found the subtree the key must be in + visit_stack.push((cur_node_ptr, i)); + temp_node = &mut **cur_node.edges[i].as_mut().unwrap(); + continue 'main; + } + } + } -impl PartialEq for Node { - fn eq(&self, other: &Node) -> bool { - match *self{ - BranchNode(ref branch) => { - if other.is_leaf() { - return false; - } - match *other { - BranchNode(ref branch2) => branch.cmp(branch2) == Equal, - LeafNode(..) => false - } - } - LeafNode(ref leaf) => { - match *other { - LeafNode(ref leaf2) => leaf.cmp(leaf2) == Equal, - BranchNode(..) => false + // all the keys are smaller than the one we're searching for, so try to go down + // the last edge in our node + if is_leaf { + // We're at a leaf, so it's just not in here + return None; + } else { + // We're at an internal node, so we need to keep going + visit_stack.push((cur_node_ptr, node_len)); + temp_node = &mut **cur_node.edges[node_len].as_mut().unwrap(); + continue 'main; + } } - } + visit_stack + }; + + // If we get here then we found the key, let's remove it + Some(self.remove_stack(visit_stack)) } } } -impl Eq for Node {} +impl BTree { + /// insert the key and value into the top element in the stack, and if that node has to split + /// recursively insert the split contents into the stack until splits stop. Then replace the + /// stack back into the tree. + /// + /// Assumes that the stack represents a search path from the root to a leaf, and that the + /// search path is non-empty + fn insert_stack(&mut self, mut stack: SearchStack, key: K, value: V) { + self.length += 1; -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Node) -> Option { - Some(self.cmp(other)) - } -} + // Insert the key and value into the leaf at the top of the stack + let (node, index) = stack.pop().unwrap(); + let mut insertion = unsafe { + (*node).insert_as_leaf(index, key, value) + }; -impl Ord for Node { - /// Implementation of `Ord` for `Node`s. - fn cmp(&self, other: &Node) -> Ordering { - match *self { - LeafNode(ref leaf) => { - match *other { - LeafNode(ref leaf2) => leaf.cmp(leaf2), - BranchNode(_) => Less + loop { + match insertion { + Fit => { + // The last insertion went off without a hitch, no splits! We can stop + // inserting now. + return; } - } - BranchNode(ref branch) => { - match *other { - BranchNode(ref branch2) => branch.cmp(branch2), - LeafNode(_) => Greater + Split(key, value, right) => match stack.pop() { + // The last insertion triggered a split, so get the next element on the + // stack to revursively insert the split node into. + None => { + // The stack was empty; we've split the root, and need to make a new one. + let left = self.root.take().unwrap(); + self.root = Some(Node::make_internal_root(key, value, left, right)); + self.depth += 1; + return; + } + Some((node, index)) => { + // The stack wasn't empty, do the insertion and recurse + unsafe { + insertion = (*node).insert_as_internal(index, key, value, right); + } + continue; + } } } } } -} - -impl fmt::Show for Node { - /// Returns a string representation of a `Node`. - /// Will iterate over the Node and show `Key: x, value: y, child: ()` - /// for all elements in the `Node`. `child` only exists if the `Node` contains - /// a branch. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - LeafNode(ref leaf) => leaf.fmt(f), - BranchNode(ref branch) => branch.fmt(f), - } - } -} - - -// A leaf is a vector with elements that contain no children. A leaf also -// does not contain a rightmost child. -struct Leaf { - elts: Vec> -} - -// Vector of values with children, plus a rightmost child (greater than all) -struct Branch { - elts: Vec>, - rightmost_child: Box>, -} + /// Remove the key and value in the top element of the stack, then handle underflows + fn remove_stack(&mut self, mut stack: SearchStack) -> V { + self.length -= 1; -impl Leaf { - /// Creates a new `Leaf` from a vector of `LeafElts`. - fn new(vec: Vec>) -> Leaf { - Leaf { - elts: vec + if stack.len() < self.depth { + // We found the key in an internal node, but that's annoying, + // so let's swap it with a leaf key and pretend we *did* find it in a leaf. + // Note that after calling this, the tree is in an inconsistent state, but will + // be consistent after we remove the swapped value just below + leafify_stack(&mut stack); } - } - /// Searches a leaf for a spot for a new element using a binary search. - /// Returns `None` if the element is already in the vector. - fn bsearch_leaf(&self, k: K) -> Option { - let mut high: uint = self.elts.len(); - let mut low: uint = 0; - let mut midpoint: uint = (high - low) / 2 ; - if midpoint == high { - midpoint = 0; - } + // Remove the key-value pair from the leaf, check if the node is underfull, and then + // promptly forget the leaf and ptr to avoid ownership issues + let (value, mut underflow) = unsafe { + let (node_ptr, index) = stack.pop().unwrap(); + let node = &mut *node_ptr; + let (_key, value) = node.remove_as_leaf(index); + let underflow = node.length < min_load; + (value, underflow) + }; + loop { - let order = self.elts[midpoint].key.cmp(&k); - match order { - Equal => { - return None; - } - Greater => { - if midpoint > 0 { - if self.elts[midpoint - 1].key.cmp(&k) == Less { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = midpoint / 2; - high = tmp; - continue; - } - } - else { - return Some(0); + match stack.pop() { + None => { + // We've reached the root, so no matter what, we're done. We manually access + // the root via the tree itself to avoid creating any dangling pointers. + if self.root.as_ref().unwrap().length == 0 { + // We've emptied out the root, so make its only child the new root. + // If the root is a leaf, this will set the root to `None` + self.depth -= 1; + self.root = self.root.take().unwrap().edges[0].take(); } + return value; } - Less => { - if midpoint + 1 < self.elts.len() { - if self.elts[midpoint + 1].key.cmp(&k) == Greater { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (high + low) / 2; - low = tmp; + Some((parent_ptr, index)) => { + if underflow { + // Underflow! Handle it! + unsafe { + let parent = &mut *parent_ptr; + parent.handle_underflow(index); + underflow = parent.length < min_load; } - } - else { - return Some(self.elts.len()); + } else { + // All done! + return value; } } } @@ -342,578 +459,481 @@ impl Leaf { } } +impl Node { + /// Make a new node + fn new() -> Node { + Node { + length: 0, + // FIXME(Gankro): this is gross, I guess you need a macro? [None, ..capacity] uses copy + keys: [None, None, None, None, None, None, None, None, None, None, None], + vals: [None, None, None, None, None, None, None, None, None, None, None], + edges: [None, None, None, None, None, None, None, None, None, None, None, None], + } + } + -impl Leaf { - /// Returns the corresponding value to the supplied key. - fn get(&self, k: K) -> Option { - for s in self.elts.iter() { - let order = s.key.cmp(&k); - match order { - Equal => return Some(s.value.clone()), - _ => {} + /// Make a leaf root from scratch + fn make_leaf_root(key: K, value: V) -> Box> { + let mut node = box Node::new(); + node.insert_fit_as_leaf(0, key, value); + node + } + + /// Make an internal root from scratch + fn make_internal_root(key: K, value: V, left: Box>, right: Box>) + -> Box> { + let mut node = box Node::new(); + node.keys[0] = Some(key); + node.vals[0] = Some(value); + node.edges[0] = Some(left); + node.edges[1] = Some(right); + node.length = 1; + node + } + + /// Try to insert this key-value pair at the given index in this internal node + /// If the node is full, we have to split it. + fn insert_as_leaf(&mut self, index: uint, key: K, value: V) -> InsertionResult { + let len = self.length; + if len < capacity { + // The element can fit, just insert it + self.insert_fit_as_leaf(index, key, value); + Fit + } else { + // The element can't fit, this node is full. Split it into two nodes. + let (new_key, new_val, mut new_right) = self.split(); + let left_len = self.length; + + if index <= left_len { + self.insert_fit_as_leaf(index, key, value); + } else { + new_right.insert_fit_as_leaf(index - left_len - 1, key, value); } + + Split(new_key, new_val, new_right) } - return None; - } - - /// Uses `clone()` to facilitate inserting new elements into a tree. - fn insert(mut self, k: K, v: V, ub: uint) -> (Node, bool) { - let to_insert = LeafElt::new(k, v); - let index: Option = self.bsearch_leaf(to_insert.clone().key); - //Check index to see whether we actually inserted the element into the vector. - match index { - //If the index is None, the new element already exists in the vector. - None => { - return (Node::new_leaf(self.clone().elts), false); - } - //If there is an index, insert at that index. - Some(i) => { - if i >= self.elts.len() { - self.elts.push(to_insert.clone()); - } - else { - self.elts.insert(i, to_insert.clone()); - } + } + + /// Try to insert this key-value pair at the given index in this internal node + /// If the node is full, we have to split it. + fn insert_as_internal(&mut self, index: uint, key: K, value: V, right: Box>) + -> InsertionResult { + let len = self.length; + if len < capacity { + // The element can fit, just insert it + self.insert_fit_as_internal(index, key, value, right); + Fit + } else { + // The element can't fit, this node is full. Split it into two nodes. + let (new_key, new_val, mut new_right) = self.split(); + let left_len = self.length; + + if index <= left_len { + self.insert_fit_as_internal(index, key, value, right); + } else { + new_right.insert_fit_as_internal(index - left_len - 1, key, value, right); } + + Split(new_key, new_val, new_right) } - //If we have overfilled the vector (by making its size greater than the - //upper bound), we return a new Branch with one element and two children. - if self.elts.len() > ub { - let midpoint_opt = self.elts.remove(ub / 2); - let midpoint = midpoint_opt.unwrap(); - let (left_leaf, right_leaf) = self.elts.partition(|le| - le.key.cmp(&midpoint.key.clone()) - == Less); - let branch_return = Node::new_branch(vec!(BranchElt::new(midpoint.key.clone(), - midpoint.value.clone(), - box Node::new_leaf(left_leaf))), - box Node::new_leaf(right_leaf)); - return (branch_return, true); - } - (Node::new_leaf(self.elts.clone()), true) } -} -impl Clone for Leaf { - /// Returns a new `Leaf` with the same elts. - fn clone(&self) -> Leaf { - Leaf::new(self.elts.clone()) + /// We have somehow verified that this key-value pair will fit in this internal node, + /// so insert under that assumption. + fn insert_fit_as_leaf(&mut self, index: uint, key: K, value: V) { + let len = self.length; + shift_and_insert(self.keys.mut_slice_to(len + 1), index, Some(key)); + shift_and_insert(self.vals.mut_slice_to(len + 1), index, Some(value)); + self.length += 1; } -} -impl PartialEq for Leaf { - fn eq(&self, other: &Leaf) -> bool { - self.elts == other.elts + /// We have somehow verified that this key-value pair will fit in this internal node, + /// so insert under that assumption + fn insert_fit_as_internal(&mut self, index: uint, key: K, value: V, right: Box>) { + let len = self.length; + shift_and_insert(self.keys.mut_slice_to(len + 1), index, Some(key)); + shift_and_insert(self.vals.mut_slice_to(len + 1), index, Some(value)); + shift_and_insert(self.edges.mut_slice_to(len + 2), index + 1, Some(right)); + self.length += 1; } -} -impl Eq for Leaf {} + /// node is full, so split it into two nodes, and yield the middle-most key-value pair + /// because we have one too many, and our parent now has one too few + fn split(&mut self) -> (K, V, Box>) { + let mut right = box Node::new(); -impl PartialOrd for Leaf { - fn partial_cmp(&self, other: &Leaf) -> Option { - Some(self.cmp(other)) - } -} + steal_last(self.vals.as_mut_slice(), right.vals.as_mut_slice(), split_len); + steal_last(self.keys.as_mut_slice(), right.keys.as_mut_slice(), split_len); + // FIXME(Gankro): This isn't necessary for leaf nodes + steal_last(self.edges.as_mut_slice(), right.edges.as_mut_slice(), split_len + 1); -impl Ord for Leaf { - /// Returns an ordering based on the first element of each `Leaf`. - fn cmp(&self, other: &Leaf) -> Ordering { - if self.elts.len() > other.elts.len() { - return Greater; - } - if self.elts.len() < other.elts.len() { - return Less; - } - self.elts[0].cmp(&other.elts[0]) - } -} + // How much each node got + let left_len = capacity - split_len; + let right_len = split_len; + // But we're gonna pop one off the end of the left one, so subtract one + self.length = left_len - 1; + right.length = right_len; -impl fmt::Show for Leaf { - /// Returns a string representation of a `Leaf`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (i, s) in self.elts.iter().enumerate() { - if i != 0 { try!(write!(f, " // ")) } - try!(write!(f, "{}", *s)) - } - Ok(()) + // Pop it + let key = self.keys[left_len - 1].take().unwrap(); + let val = self.vals[left_len - 1].take().unwrap(); + + (key, val, right) } -} -impl Branch { - /// Creates a new `Branch` from a vector of `BranchElts` and a rightmost child (a node). - fn new(vec: Vec>, right: Box>) - -> Branch { - Branch { - elts: vec, - rightmost_child: right - } + /// Remove the key-value pair at the given index + fn remove_as_leaf(&mut self, index: uint) -> (K, V) { + let len = self.length; + let key = remove_and_shift(self.keys.mut_slice_to(len), index).unwrap(); + let value = remove_and_shift(self.vals.mut_slice_to(len), index).unwrap(); + self.length -= 1; + (key, value) } - fn bsearch_branch(&self, k: K) -> Option { - let mut midpoint: uint = self.elts.len() / 2; - let mut high: uint = self.elts.len(); - let mut low: uint = 0u; - if midpoint == high { - midpoint = 0u; - } - loop { - let order = self.elts[midpoint].key.cmp(&k); - match order { - Equal => { - return None; - } - Greater => { - if midpoint > 0 { - if self.elts[midpoint - 1].key.cmp(&k) == Less { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (midpoint - low) / 2; - high = tmp; - continue; - } - } - else { - return Some(0); - } - } - Less => { - if midpoint + 1 < self.elts.len() { - if self.elts[midpoint + 1].key.cmp(&k) == Greater { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (high - midpoint) / 2; - low = tmp; - } - } - else { - return Some(self.elts.len()); - } - } - } + /// Handle an underflow in this node's child. We favour handling "to the left" because we know + /// we're empty, but our neighbour can be full. Handling to the left means when we choose to + /// steal, we pop off the end of our neighbour (always fast) and "unshift" ourselves + /// (always slow, but at least faster since we know we're half-empty). + /// Handling "to the right" reverses these roles. Of course, we merge whenever possible + /// because we want dense nodes, and merging is about equal work regardless of direction. + fn handle_underflow(&mut self, underflowed_child_index: uint) { + if underflowed_child_index > 0 { + self.handle_underflow_to_left(underflowed_child_index); + } else { + self.handle_underflow_to_right(underflowed_child_index); } } -} -impl Branch { - /// Returns the corresponding value to the supplied key. - /// If the key is not there, find the child that might hold it. - fn get(&self, k: K) -> Option { - for s in self.elts.iter() { - let order = s.key.cmp(&k); - match order { - Less => return s.left.get(k), - Equal => return Some(s.value.clone()), - _ => {} - } - } - self.rightmost_child.get(k) - } - - /// An insert method that uses `.clone()` for support. - fn insert(mut self, k: K, v: V, ub: uint) -> (Node, bool) { - let mut new_branch = Node::new_branch(self.clone().elts, self.clone().rightmost_child); - let mut outcome = false; - let index: Option = new_branch.bsearch_node(k.clone()); - //First, find which path down the tree will lead to the appropriate leaf - //for the key-value pair. - match index.clone() { - None => { - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - outcome); - } - Some(i) => { - if i == self.elts.len() { - let new_outcome = self.clone().rightmost_child.insert(k.clone(), - v.clone(), - ub.clone()); - new_branch = new_outcome.clone().val0(); - outcome = new_outcome.val1(); - } - else { - let new_outcome = self.elts[i].left.clone().insert(k.clone(), - v.clone(), - ub.clone()); - new_branch = new_outcome.clone().val0(); - outcome = new_outcome.val1(); - } - //Check to see whether a branch or a leaf was returned from the - //tree traversal. - match new_branch.clone() { - //If we have a leaf, we do not need to resize the tree, - //so we can return false. - LeafNode(..) => { - if i == self.elts.len() { - self.rightmost_child = box new_branch.clone(); - } - else { - self.elts.get_mut(i).left = box new_branch.clone(); - } - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - true); - } - //If we have a branch, we might need to refactor the tree. - BranchNode(..) => {} - } - } - } - //If we inserted something into the tree, do the following: - if outcome { - match new_branch.clone() { - //If we have a new leaf node, integrate it into the current branch - //and return it, saying we have inserted a new element. - LeafNode(..) => { - if index.unwrap() == self.elts.len() { - self.rightmost_child = box new_branch; - } - else { - self.elts.get_mut(index.unwrap()).left = box new_branch; - } - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - true); - } - //If we have a new branch node, attempt to insert it into the tree - //as with the key-value pair, then check to see if the node is overfull. - BranchNode(branch) => { - let new_elt = branch.elts[0].clone(); - let new_elt_index = self.bsearch_branch(new_elt.clone().key); - match new_elt_index { - None => { - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - false); - } - Some(i) => { - self.elts.insert(i, new_elt); - if i + 1 >= self.elts.len() { - self.rightmost_child = branch.clone().rightmost_child; - } - else { - self.elts.get_mut(i + 1).left = - branch.clone().rightmost_child; - } - } - } - } - } - //If the current node is overfilled, create a new branch with one element - //and two children. - if self.elts.len() > ub { - let midpoint = self.elts.remove(ub / 2).unwrap(); - let (new_left, new_right) = self.clone().elts.partition(|le| - midpoint.key.cmp(&le.key) - == Greater); - new_branch = Node::new_branch( - vec!(BranchElt::new(midpoint.clone().key, - midpoint.clone().value, - box Node::new_branch(new_left, - midpoint.clone().left))), - box Node::new_branch(new_right, self.clone().rightmost_child)); - return (new_branch, true); + fn handle_underflow_to_left(&mut self, underflowed_child_index: uint) { + // Right is underflowed. Try to steal from left, + // but merge left and right if left is low too. + let mut left = self.edges[underflowed_child_index - 1].take().unwrap(); + let left_len = left.length; + if left_len > min_load { + // Steal! Stealing is roughly analagous to a binary tree rotation. + // In this case, we're "rotating" right. + + // Take the biggest stuff off left + let mut key = remove_and_shift(left.keys.mut_slice_to(left_len), left_len - 1); + let mut val = remove_and_shift(left.vals.mut_slice_to(left_len), left_len - 1); + let edge = remove_and_shift(left.edges.mut_slice_to(left_len + 1), left_len); + left.length -= 1; + + // Swap the parent's seperating key-value pair with left's + mem::swap(&mut self.keys[underflowed_child_index - 1], &mut key); + mem::swap(&mut self.vals[underflowed_child_index - 1], &mut val); + + // Put them at the start of right + { + let right = self.edges[underflowed_child_index].as_mut().unwrap(); + let right_len = right.length; + shift_and_insert(right.keys.mut_slice_to(right_len + 1), 0, key); + shift_and_insert(right.vals.mut_slice_to(right_len + 1), 0, val); + shift_and_insert(right.edges.mut_slice_to(right_len + 2), 0, edge); + right.length += 1; } + + // Put left back where we found it + self.edges[underflowed_child_index - 1] = Some(left); + } else { + // Merge! Left and right will be smooshed into one node, along with the key-value + // pair that seperated them in their parent. + let len = self.length; + + // Permanently remove left's index, and the key-value pair that seperates + // left and right + let key = remove_and_shift(self.keys.mut_slice_to(len), underflowed_child_index - 1); + let val = remove_and_shift(self.vals.mut_slice_to(len), underflowed_child_index - 1); + remove_and_shift(self.edges.mut_slice_to(len + 1), underflowed_child_index - 1); + + self.length -= 1; + + // Give left right's stuff, and put left where right was. Note that all the indices + // in the parent have been shifted left at this point. + let right = self.edges[underflowed_child_index - 1].take().unwrap(); + left.absorb(key, val, right); + self.edges[underflowed_child_index - 1] = Some(left); } - (Node::new_branch(self.elts.clone(), self.rightmost_child.clone()), outcome) } -} -impl Clone for Branch { - /// Returns a new branch using the clone methods of the `Branch`'s internal variables. - fn clone(&self) -> Branch { - Branch::new(self.elts.clone(), self.rightmost_child.clone()) - } -} + fn handle_underflow_to_right(&mut self, underflowed_child_index: uint) { + // Left is underflowed. Try to steal from the right, + // but merge left and right if right is low too. + let mut right = self.edges[underflowed_child_index + 1].take().unwrap(); + let right_len = right.length; + if right_len > min_load { + // Steal! Stealing is roughly analagous to a binary tree rotation. + // In this case, we're "rotating" left. + + // Take the smallest stuff off right + let mut key = remove_and_shift(right.keys.mut_slice_to(right_len), 0); + let mut val = remove_and_shift(right.vals.mut_slice_to(right_len), 0); + let edge = remove_and_shift(right.edges.mut_slice_to(right_len + 1), 0); + right.length -= 1; + + // Swap the parent's seperating key-value pair with right's + mem::swap(&mut self.keys[underflowed_child_index], &mut key); + mem::swap(&mut self.vals[underflowed_child_index], &mut val); + + // Put them at the end of left + { + let left = self.edges[underflowed_child_index].as_mut().unwrap(); + let left_len = left.length; + shift_and_insert(left.keys.mut_slice_to(left_len + 1), left_len, key); + shift_and_insert(left.vals.mut_slice_to(left_len + 1), left_len, val); + shift_and_insert(left.edges.mut_slice_to(left_len + 2), left_len + 1, edge); + left.length += 1; + } -impl PartialEq for Branch { - fn eq(&self, other: &Branch) -> bool { - self.elts == other.elts + // Put right back where we found it + self.edges[underflowed_child_index + 1] = Some(right); + } else { + // Merge! Left and right will be smooshed into one node, along with the key-value + // pair that seperated them in their parent. + let len = self.length; + + // Permanently remove right's index, and the key-value pair that seperates + // left and right + let key = remove_and_shift(self.keys.mut_slice_to(len), underflowed_child_index); + let val = remove_and_shift(self.vals.mut_slice_to(len), underflowed_child_index); + remove_and_shift(self.edges.mut_slice_to(len + 1), underflowed_child_index + 1); + + self.length -= 1; + + // Give left right's stuff. Note that unlike handle_underflow_to_left, we don't need + // to compensate indices, and we don't need to put left "back". + let left = self.edges[underflowed_child_index].as_mut().unwrap(); + left.absorb(key, val, right); + } } -} -impl Eq for Branch {} + /// Take all the values from right, seperated by the given key and value + fn absorb(&mut self, key: Option, value: Option, mut right: Box>) { + let len = self.length; + let r_len = right.length; -impl PartialOrd for Branch { - fn partial_cmp(&self, other: &Branch) -> Option { - Some(self.cmp(other)) - } -} + self.keys[len] = key; + self.vals[len] = value; -impl Ord for Branch { - /// Compares the first elements of two `Branch`es to determine an - /// `Ordering`. - fn cmp(&self, other: &Branch) -> Ordering { - if self.elts.len() > other.elts.len() { - return Greater; - } - if self.elts.len() < other.elts.len() { - return Less; - } - self.elts[0].cmp(&other.elts[0]) - } -} + merge(self.keys.mut_slice_to(len + r_len + 1), right.keys.mut_slice_to(r_len)); + merge(self.vals.mut_slice_to(len + r_len + 1), right.vals.mut_slice_to(r_len)); + merge(self.edges.mut_slice_to(len + r_len + 2), right.edges.mut_slice_to(r_len + 1)); -impl fmt::Show for Branch { - /// Returns a string representation of a `Branch`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (i, s) in self.elts.iter().enumerate() { - if i != 0 { try!(write!(f, " // ")) } - try!(write!(f, "{}", *s)) - } - write!(f, " // rightmost child: ({}) ", *self.rightmost_child) + self.length += r_len + 1; } } -//A LeafElt contains no left child, but a key-value pair. -struct LeafElt { - key: K, - value: V -} +/// Subroutine for removal. Takes a search stack for a key that terminates at an +/// internal node, and makes it mutates the tree and search stack to make it a search +/// stack for that key that terminates at a leaf. This leaves the tree in an inconsistent +/// state that must be repaired by the caller by removing the key in question. +fn leafify_stack(stack: &mut SearchStack) { + let (node_ptr, index) = stack.pop().unwrap(); + unsafe { + // First, get ptrs to the found key-value pair + let node = &mut *node_ptr; + let (key_ptr, val_ptr) = { + (&mut node.keys[index] as *mut _, &mut node.vals[index] as *mut _) + }; -//A BranchElt has a left child in insertion to a key-value pair. -struct BranchElt { - left: Box>, - key: K, - value: V -} + // Go into the right subtree of the found key + stack.push((node_ptr, index + 1)); + let mut temp_node = &mut **node.edges[index + 1].as_mut().unwrap(); -impl LeafElt { - /// Creates a new `LeafElt` from a supplied key-value pair. - fn new(k: K, v: V) -> LeafElt { - LeafElt { - key: k, - value: v + loop { + // Walk into the smallest subtree of this + let node = temp_node; + let node_ptr = node as *mut _; + stack.push((node_ptr, 0)); + let next = node.edges[0].as_mut(); + if next.is_some() { + // This node is internal, go deeper + temp_node = &mut **next.unwrap(); + } else { + // This node is a leaf, do the swap and return + mem::swap(&mut *key_ptr, &mut node.keys[0]); + mem::swap(&mut *val_ptr, &mut node.vals[0]); + break; + } } } } -impl Clone for LeafElt { - /// Returns a new `LeafElt` by cloning the key and value. - fn clone(&self) -> LeafElt { - LeafElt::new(self.key.clone(), self.value.clone()) +/// Basically `Vec.insert(index)`. Assumes that the last element in the slice is +/// Somehow "empty" and can be overwritten. +fn shift_and_insert(slice: &mut [T], index: uint, elem: T) { + // FIXME(Gankro): This should probably be a copy_memory and a write? + for i in range(index, slice.len() - 1).rev() { + slice.swap(i, i + 1); } + slice[index] = elem; } -impl PartialEq for LeafElt { - fn eq(&self, other: &LeafElt) -> bool { - self.key == other.key && self.value == other.value +/// Basically `Vec.remove(index)`. +fn remove_and_shift(slice: &mut [Option], index: uint) -> Option { + let result = slice[index].take(); + // FIXME(Gankro): This should probably be a copy_memory and write? + for i in range(index, slice.len() - 1) { + slice.swap(i, i + 1); } + result } -impl Eq for LeafElt {} - -impl PartialOrd for LeafElt { - fn partial_cmp(&self, other: &LeafElt) -> Option { - Some(self.cmp(other)) +/// Subroutine for splitting a node. Put the `split_len` last elements from left, +/// (which should be full) and put them at the start of right (which should be empty) +fn steal_last(left: &mut[T], right: &mut[T], amount: uint) { + // Is there a better way to do this? + // Maybe copy_nonoverlapping_memory and then bulk None out the old Location? + let offset = left.len() - amount; + for (a,b) in left.mut_slice_from(offset).mut_iter() + .zip(right.mut_slice_to(amount).mut_iter()) { + mem::swap(a, b); } } -impl Ord for LeafElt { - /// Returns an ordering based on the keys of the `LeafElt`s. - fn cmp(&self, other: &LeafElt) -> Ordering { - self.key.cmp(&other.key) +/// Subroutine for merging the contents of right into left +/// Assumes left has space for all of right +fn merge(left: &mut[Option], right: &mut[Option]) { + let left_len = left.len(); + let right_len = right.len(); + for i in range(0, right_len) { + left[left_len - right_len + i] = right[i].take(); } } -impl fmt::Show for LeafElt { - /// Returns a string representation of a `LeafElt`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Key: {}, value: {};", self.key, self.value) +impl Collection for BTree{ + fn len(&self) -> uint { + self.length } } -impl BranchElt { - /// Creates a new `BranchElt` from a supplied key, value, and left child. - fn new(k: K, v: V, n: Box>) -> BranchElt { - BranchElt { - left: n, - key: k, - value: v - } +impl Mutable for BTree { + fn clear(&mut self) { + // Note that this will trigger a lot of recursive destructors, but BTrees can't get + // very deep, so we won't worry about it for now. + self.root = None; + self.depth = 0; + self.length = 0; } } -impl Clone for BranchElt { - /// Returns a new `BranchElt` by cloning the key, value, and left child. - fn clone(&self) -> BranchElt { - BranchElt::new(self.key.clone(), - self.value.clone(), - self.left.clone()) - } -} -impl PartialEq for BranchElt{ - fn eq(&self, other: &BranchElt) -> bool { - self.key == other.key && self.value == other.value - } -} -impl Eq for BranchElt{} -impl PartialOrd for BranchElt { - fn partial_cmp(&self, other: &BranchElt) -> Option { - Some(self.cmp(other)) - } -} +#[cfg(test)] +mod test { + use std::prelude::*; -impl Ord for BranchElt { - /// Fulfills `Ord` for `BranchElts`. - fn cmp(&self, other: &BranchElt) -> Ordering { - self.key.cmp(&other.key) - } -} + use super::BTree; + use {Map, MutableMap, Mutable, MutableSeq}; -impl fmt::Show for BranchElt { - /// Formats as a string containing the key, value, and child (which should recur to a - /// leaf). Consider changing in future to be more readable. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Key: {}, value: {}, (child: {})", - self.key, self.value, *self.left) - } -} + #[test] + fn test_basic() { + let mut map = BTree::new(); + assert_eq!(map.len(), 0); -#[cfg(test)] -mod test_btree { - use std::prelude::*; + for i in range(0u, 10000) { + assert_eq!(map.swap(i, 10*i), None); + assert_eq!(map.len(), i + 1); + } - use super::{BTree, Node, LeafElt}; + for i in range(0u, 10000) { + assert_eq!(map.find(&i).unwrap(), &(i*10)); + } - use MutableSeq; + for i in range(10000, 20000) { + assert_eq!(map.find(&i), None); + } - //Tests the functionality of the insert methods (which are unfinished). - #[test] - fn insert_test_one() { - let b = BTree::new(1i, "abc".to_string(), 2); - let is_insert = b.insert(2i, "xyz".to_string()); - assert!(is_insert.root.is_leaf()); - } + for i in range(0u, 10000) { + assert_eq!(map.swap(i, 100*i), Some(10*i)); + assert_eq!(map.len(), 10000); + } - #[test] - fn insert_test_two() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3)); - let b = BTree::new_with_node_len(n, 3, 2); - //println!("{}", b.clone().insert(4, "ddd".to_string()).to_string()); - assert!(b.insert(4, "ddd".to_string()).root.is_leaf()); - } + for i in range(0u, 10000) { + assert_eq!(map.find(&i).unwrap(), &(i*100)); + } - #[test] - fn insert_test_three() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(4i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - //println!("{}", b.clone().insert(5, "eee".to_string()).to_string()); - assert!(!b.insert(5, "eee".to_string()).root.is_leaf()); - } + for i in range(0u, 5000) { + assert_eq!(map.pop(&(i*2)), Some(i*200)); + assert_eq!(map.len(), 10000 - i - 1); + } - #[test] - fn insert_test_four() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(4i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let mut b = BTree::new_with_node_len(n, 3, 2); - b = b.clone().insert(5, "eee".to_string()); - b = b.clone().insert(6, "fff".to_string()); - b = b.clone().insert(7, "ggg".to_string()); - b = b.clone().insert(8, "hhh".to_string()); - b = b.clone().insert(0, "omg".to_string()); - //println!("{}", b.clone().to_string()); - assert!(!b.root.is_leaf()); - } + for i in range(0u, 5000) { + assert_eq!(map.find(&(2*i)), None); + assert_eq!(map.find(&(2*i+1)).unwrap(), &(i*200 + 100)); + } - #[test] - fn bsearch_test_one() { - let b = BTree::new(1i, "abc".to_string(), 2u); - assert_eq!(Some(1), b.root.bsearch_node(2)); + for i in range(0u, 5000) { + assert_eq!(map.pop(&(2*i)), None); + assert_eq!(map.pop(&(2*i+1)), Some(i*200 + 100)); + assert_eq!(map.len(), 5000 - i - 1); + } } +} - #[test] - fn bsearch_test_two() { - let b = BTree::new(1i, "abc".to_string(), 2u); - assert_eq!(Some(0), b.root.bsearch_node(0)); - } - #[test] - fn bsearch_test_three() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(4i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(5i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - assert_eq!(Some(2), b.root.bsearch_node(3)); - } - #[test] - fn bsearch_test_four() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(4i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(5i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - assert_eq!(Some(4), b.root.bsearch_node(800)); + +#[cfg(test)] +mod bench { + use test::Bencher; + + use super::BTree; + use deque::bench::{insert_rand_n, insert_seq_n, find_rand_n, find_seq_n}; + + // Find seq + #[bench] + pub fn insert_rand_100(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + insert_rand_n(100, &mut m, b); } - //Tests the functionality of the get method. - #[test] - fn get_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - let val = b.get(1); - assert_eq!(val, Some("abc".to_string())); + #[bench] + pub fn insert_rand_10_000(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + insert_rand_n(10_000, &mut m, b); } - //Tests the BTree's clone() method. - #[test] - fn btree_clone_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = b.clone(); - assert!(b.root == b2.root) + // Insert seq + #[bench] + pub fn insert_seq_100(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + insert_seq_n(100, &mut m, b); } - //Tests the BTree's cmp() method when one node is "less than" another. - #[test] - fn btree_cmp_test_less() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(2i, "bcd".to_string(), 2); - assert!(&b.cmp(&b2) == &Less) + #[bench] + pub fn insert_seq_10_000(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + insert_seq_n(10_000, &mut m, b); } - //Tests the BTree's cmp() method when two nodes are equal. - #[test] - fn btree_cmp_test_eq() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(1i, "bcd".to_string(), 2); - assert!(&b.cmp(&b2) == &Equal) + // Find rand + #[bench] + pub fn find_rand_100(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + find_rand_n(100, &mut m, b); } - //Tests the BTree's cmp() method when one node is "greater than" another. - #[test] - fn btree_cmp_test_greater() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(2i, "bcd".to_string(), 2); - assert!(&b2.cmp(&b) == &Greater) + #[bench] + pub fn find_rand_10_000(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + find_rand_n(10_000, &mut m, b); } - //Tests the BTree's to_string() method. - #[test] - fn btree_tostr_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - assert_eq!(b.to_string(), "Key: 1, value: abc;".to_string()) + // Find seq + #[bench] + pub fn find_seq_100(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + find_seq_n(100, &mut m, b); } -} + #[bench] + pub fn find_seq_10_000(b: &mut Bencher) { + let mut m : BTree = BTree::new(); + find_seq_n(10_000, &mut m, b); + } +} \ No newline at end of file From 9d27059bf4fe6492603d2151a06c8c079814655a Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Wed, 3 Sep 2014 11:30:39 -0400 Subject: [PATCH 2/5] fix constants --- src/libcollections/btree.rs | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/libcollections/btree.rs b/src/libcollections/btree.rs index 8db7cf2427a0f..fdf4b818def15 100644 --- a/src/libcollections/btree.rs +++ b/src/libcollections/btree.rs @@ -24,13 +24,13 @@ use {Mutable, MutableMap, Map, MutableSeq}; /// "Order" of the B-tree, from which all other properties are derived static B: uint = 6; /// Maximum number of elements in a node -static capacity: uint = 2 * B - 1; +static CAPACITY: uint = 2 * B - 1; /// Minimum number of elements in a node -static min_load: uint = B - 1; +static MIN_LOAD: uint = B - 1; /// Maximum number of children in a node -static edge_capacity: uint = capacity + 1; +static EDGE_CAPACITY: uint = CAPACITY + 1; /// Amount to take off the tail of a node being split -static split_len: uint = B - 1; +static SPLIT_LEN: uint = B - 1; /// Represents a search path for mutating type SearchStack = Vec<(*mut Node, uint)>; @@ -44,9 +44,9 @@ enum InsertionResult{ /// A B-Tree Node struct Node { length: uint, - keys: [Option, ..capacity], - edges: [Option>>, ..edge_capacity], - vals: [Option, ..capacity], + keys: [Option, ..CAPACITY], + edges: [Option>>, ..EDGE_CAPACITY], + vals: [Option, ..CAPACITY], } @@ -424,7 +424,7 @@ impl BTree { let (node_ptr, index) = stack.pop().unwrap(); let node = &mut *node_ptr; let (_key, value) = node.remove_as_leaf(index); - let underflow = node.length < min_load; + let underflow = node.length < MIN_LOAD; (value, underflow) }; @@ -447,7 +447,7 @@ impl BTree { unsafe { let parent = &mut *parent_ptr; parent.handle_underflow(index); - underflow = parent.length < min_load; + underflow = parent.length < MIN_LOAD; } } else { // All done! @@ -495,7 +495,7 @@ impl Node { /// If the node is full, we have to split it. fn insert_as_leaf(&mut self, index: uint, key: K, value: V) -> InsertionResult { let len = self.length; - if len < capacity { + if len < CAPACITY { // The element can fit, just insert it self.insert_fit_as_leaf(index, key, value); Fit @@ -519,7 +519,7 @@ impl Node { fn insert_as_internal(&mut self, index: uint, key: K, value: V, right: Box>) -> InsertionResult { let len = self.length; - if len < capacity { + if len < CAPACITY { // The element can fit, just insert it self.insert_fit_as_internal(index, key, value, right); Fit @@ -562,14 +562,14 @@ impl Node { fn split(&mut self) -> (K, V, Box>) { let mut right = box Node::new(); - steal_last(self.vals.as_mut_slice(), right.vals.as_mut_slice(), split_len); - steal_last(self.keys.as_mut_slice(), right.keys.as_mut_slice(), split_len); + steal_last(self.vals.as_mut_slice(), right.vals.as_mut_slice(), SPLIT_LEN); + steal_last(self.keys.as_mut_slice(), right.keys.as_mut_slice(), SPLIT_LEN); // FIXME(Gankro): This isn't necessary for leaf nodes - steal_last(self.edges.as_mut_slice(), right.edges.as_mut_slice(), split_len + 1); + steal_last(self.edges.as_mut_slice(), right.edges.as_mut_slice(), SPLIT_LEN + 1); // How much each node got - let left_len = capacity - split_len; - let right_len = split_len; + let left_len = CAPACITY - SPLIT_LEN; + let right_len = SPLIT_LEN; // But we're gonna pop one off the end of the left one, so subtract one self.length = left_len - 1; @@ -611,7 +611,7 @@ impl Node { // but merge left and right if left is low too. let mut left = self.edges[underflowed_child_index - 1].take().unwrap(); let left_len = left.length; - if left_len > min_load { + if left_len > MIN_LOAD { // Steal! Stealing is roughly analagous to a binary tree rotation. // In this case, we're "rotating" right. @@ -663,7 +663,7 @@ impl Node { // but merge left and right if right is low too. let mut right = self.edges[underflowed_child_index + 1].take().unwrap(); let right_len = right.length; - if right_len > min_load { + if right_len > MIN_LOAD { // Steal! Stealing is roughly analagous to a binary tree rotation. // In this case, we're "rotating" left. @@ -781,7 +781,7 @@ fn remove_and_shift(slice: &mut [Option], index: uint) -> Option { result } -/// Subroutine for splitting a node. Put the `split_len` last elements from left, +/// Subroutine for splitting a node. Put the `SPLIT_LEN` last elements from left, /// (which should be full) and put them at the start of right (which should be empty) fn steal_last(left: &mut[T], right: &mut[T], amount: uint) { // Is there a better way to do this? From d2299807a2d8f1b2a0575c2dc1a3589ffffb8c6c Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Thu, 4 Sep 2014 20:39:12 -0400 Subject: [PATCH 3/5] tons of doc fixes, refactor search into a method --- src/libcollections/btree.rs | 287 +++++++++++++++++------------------- 1 file changed, 134 insertions(+), 153 deletions(-) diff --git a/src/libcollections/btree.rs b/src/libcollections/btree.rs index fdf4b818def15..0f1c0181b4bb4 100644 --- a/src/libcollections/btree.rs +++ b/src/libcollections/btree.rs @@ -1,4 +1,4 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -8,8 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// This implementation is largely based on the one described in *Open Data Structures*, which -// can be freely downloaded at http://opendatastructures.org/, and whose contents are as of this +// This implementation is largely based on the high-level description and analysis of B-Trees +// found in *Open Data Structures* (ODS). Although our implementation does not use any of +// the source found in ODS, if one wishes to review the high-level design of this structure, it +// can be freely downloaded at http://opendatastructures.org/. Its contents are as of this // writing (August 2014) freely licensed under the following Creative Commons Attribution // License: [CC BY 2.5 CA](http://creativecommons.org/licenses/by/2.5/ca/). @@ -32,7 +34,8 @@ static EDGE_CAPACITY: uint = CAPACITY + 1; /// Amount to take off the tail of a node being split static SPLIT_LEN: uint = B - 1; -/// Represents a search path for mutating +/// Represents a search path for mutating. The rawptrs here should never be +/// null or dangling, and should be accessed one-at-a-time via pops. type SearchStack = Vec<(*mut Node, uint)>; /// Represents the result of an Insertion: either the item fit, or the node had to split @@ -41,6 +44,11 @@ enum InsertionResult{ Split(K, V, Box>), } +/// Represents the result of a search for a key in a single node +enum SearchResult { + Found(uint), Bound(uint), +} + /// A B-Tree Node struct Node { length: uint, @@ -81,40 +89,18 @@ impl Map for BTree { None => None, Some(root) => { let mut cur_node = &**root; - let leaf_depth = self.depth; - - 'main: for cur_depth in range_inclusive(1, leaf_depth) { - let is_leaf = leaf_depth == cur_depth; - let node_len = cur_node.length; - - // linear search the node's keys because we're small - // FIXME(Gankro): if we ever get generic integer arguments - // to support variable choices of `B`, then this should be - // tuned to fall into binary search at some arbitrary level - for i in range(0, node_len) { - match cur_node.keys[i].as_ref().unwrap().cmp(key) { - Less => {}, // keep walkin' son - Equal => return cur_node.vals[i].as_ref(), - Greater => if is_leaf { - return None - } else { - cur_node = &**cur_node.edges[i].as_ref().unwrap(); - continue 'main; + loop { + match cur_node.search(key) { + Found(i) => return cur_node.vals[i].as_ref(), // Found the key + Bound(i) => match cur_node.edges[i].as_ref() { // Didn't find the key + None => return None, // We're a leaf, it's not in here + Some(next_node) => { // We're an internal node, search the subtree + cur_node = &**next_node; + continue; } } } - - // all the keys are smaller than the one we're searching for - if is_leaf { - // We're a leaf, so that's it, it's just not in here - return None - } else { - // We're an internal node, so we can always fall back to - // the "everything bigger than my keys" edge: the last one - cur_node = &**cur_node.edges[node_len].as_ref().unwrap(); - } } - unreachable!(); } } } @@ -126,35 +112,21 @@ impl MutableMap for BTree { match self.root.as_mut() { None => None, Some(root) => { - // Borrowck hack + // temp_node is a Borrowck hack for having a mutable value outlive a loop iteration let mut temp_node = &mut **root; - let leaf_depth = self.depth; - - 'main: for cur_depth in range_inclusive(1, leaf_depth) { + loop { let cur_node = temp_node; - let is_leaf = leaf_depth == cur_depth; - let node_len = cur_node.length; - - for i in range(0, node_len) { - match cur_node.keys[i].as_ref().unwrap().cmp(key) { - Less => {}, - Equal => return cur_node.vals[i].as_mut(), - Greater => if is_leaf { - return None - } else { - temp_node = &mut **cur_node.edges[i].as_mut().unwrap(); - continue 'main; + match cur_node.search(key) { + Found(i) => return cur_node.vals[i].as_mut(), + Bound(i) => match cur_node.edges[i].as_mut() { + None => return None, + Some(next_node) => { + temp_node = &mut **next_node; + continue; } } } - - if is_leaf { - return None - } else { - temp_node = &mut **cur_node.edges[node_len].as_mut().unwrap(); - } } - unreachable!(); } } } @@ -162,7 +134,7 @@ impl MutableMap for BTree { // Insertion in a B-Tree is a bit complicated. // // First we do the same kind of search described in - // `find`. But we need to maintain a stack of all the nodes/edges in our search path. + // `find`, but we need to maintain a stack of all the nodes/edges in our search path. // If we find a match for the key we're trying to insert, just swap the.vals and return the // old ones. However, when we bottom out in a leaf, we attempt to insert our key-value pair // at the same location we would want to follow another edge. @@ -175,7 +147,8 @@ impl MutableMap for BTree { // // Note that we subtly deviate from Open Data Structures in our implementation of split. // ODS describes inserting into the node *regardless* of its capacity, and then - // splitting *afterwards* if it happens to be overfull. However, this is inefficient. + // splitting *afterwards* if it happens to be overfull. However, this is inefficient + // (or downright impossible, depending on the design). // Instead, we split beforehand, and then insert the key-value pair into the appropriate // result node. This has two consequences: // @@ -186,8 +159,8 @@ impl MutableMap for BTree { // the split, we will never do this. Again, this shouldn't effect the analysis. fn swap(&mut self, mut key: K, mut value: V) -> Option { - // FIXME(Gankro): this is gross because of the lexical borrows - // if pcwalton's work pans out, this can be made much better! + // FIXME(Gankro): this is gross because of lexical borrows. + // If pcwalton's work pans out, this can be made much better! // See `find` for a more idealized structure if self.root.is_none() { self.root = Some(Node::make_leaf_root(key, value)); @@ -196,63 +169,54 @@ impl MutableMap for BTree { None } else { let visit_stack = { - // We need this temp_node for borrowck wrangling + // Borrowck hack, see `find_mut` let mut temp_node = &mut **self.root.as_mut().unwrap(); - let leaf_depth = self.depth; // visit_stack is a stack of rawptrs to nodes paired with indices, respectively // representing the nodes and edges of our search path. We have to store rawptrs // because as far as Rust is concerned, we can mutate aliased data with such a - // stack. It is of course correct, but what it doesn't know is that we will only - // be popping and using these ptrs one at a time in `insert_stack`. The alternative - // to doing this is to take the Node boxes from their parents. This actually makes + // stack. It is of course correct, but what it doesn't know is the following: + // + // * The nodes in the visit_stack don't move in memory (at least, don't move + // in memory between now and when we've finished handling the raw pointer to it) + // + // * We don't mutate anything through a given ptr until we've popped and forgotten + // all the ptrs after it, at which point we don't have any pointers to children of + // that node + // + // An alternative is to take the Node boxes from their parents. This actually makes // borrowck *really* happy and everything is pretty smooth. However, this creates // *tons* of pointless writes, and requires us to always walk all the way back to // the root after an insertion, even if we only needed to change a leaf. Therefore, // we accept this potential unsafety and complexity in the name of performance. let mut visit_stack = Vec::with_capacity(self.depth); - 'main: for cur_depth in range_inclusive(1, leaf_depth) { - let is_leaf = leaf_depth == cur_depth; + loop { let cur_node = temp_node; - let node_len = cur_node.length; let cur_node_ptr = cur_node as *mut _; // See `find` for a description of this search - for i in range(0, node_len) { - let cmp = cur_node.keys[i].as_ref().unwrap().cmp(&key); - match cmp { - Less => {}, // keep walkin' son, she's too small - Equal => { - // Perfect match, swap the contents and return the old ones - mem::swap(cur_node.vals[i].as_mut().unwrap(), &mut value); - mem::swap(cur_node.keys[i].as_mut().unwrap(), &mut key); - return Some(value); - }, - Greater => if is_leaf { - // We've found where to insert this key/value pair - visit_stack.push((cur_node_ptr, i)); - break 'main; - } else { - // We've found the subtree to insert this key/value pair in - visit_stack.push((cur_node_ptr, i)); - temp_node = &mut **cur_node.edges[i].as_mut().unwrap(); - continue 'main; + match cur_node.search(&key) { + Found(i) => { + // Perfect match, swap the contents and return the old ones + mem::swap(cur_node.vals[i].as_mut().unwrap(), &mut value); + mem::swap(cur_node.keys[i].as_mut().unwrap(), &mut key); + return Some(value); + }, + Bound(i) => { + visit_stack.push((cur_node_ptr, i)); + match cur_node.edges[i].as_mut() { + None => { + // We've found where to insert this key/value pair + break; + } + Some(next_node) => { + // We've found the subtree to insert this key/value pair in + temp_node = &mut **next_node; + continue; + } } } } - - // all the keys are smaller than the one we're searching for, so try to go down - // the last edge in our node - visit_stack.push((cur_node_ptr, node_len)); - - if is_leaf { - // We're at a leaf, so we're done - break 'main; - } else { - // We're at an internal node, so we need to keep going - temp_node = &mut **cur_node.edges[node_len].as_mut().unwrap(); - continue 'main; - } } visit_stack }; @@ -265,10 +229,10 @@ impl MutableMap for BTree { // Deletion is the most complicated operation for a B-Tree. // - // First we do the same kind of search described in - // `find`. But we need to maintain a stack of all the nodes/edges in our search path. - // If we don't find the key, then we just return `None` and do nothing. If we do find the - // key, we perform two operations: remove the item, and then possibly handle underflow. + // First we do the same kind of search described in `find`, but we need to maintain a stack + // of all the nodes/edges in our search path. If we don't find the key, then we just return + // `None` and do nothing. If we do find the key, we perform two operations: remove the item, + // and then possibly handle underflow. // // # removing the item // If the node is a leaf, we just remove the item, and shift @@ -305,52 +269,42 @@ impl MutableMap for BTree { None } else { let visit_stack = { - // We need this temp_node for borrowck wrangling + // Borrowck hack, see `find_mut` let mut temp_node = &mut **self.root.as_mut().unwrap(); - let leaf_depth = self.depth; // See `pop` for a description of this variable let mut visit_stack = Vec::with_capacity(self.depth); - 'main: for cur_depth in range_inclusive(1, leaf_depth) { - let is_leaf = leaf_depth == cur_depth; + loop { let cur_node = temp_node; - let node_len = cur_node.length; let cur_node_ptr = cur_node as *mut _; // See `find` for a description of this search - for i in range(0, node_len) { - let cmp = cur_node.keys[i].as_ref().unwrap().cmp(key); - match cmp { - Less => {}, // keep walkin' son, she's too small - Equal => { - // Perfect match. Terminate the stack here, and move to the - // next phase (remove_stack). - visit_stack.push((cur_node_ptr, i)); - break 'main; - }, - Greater => if is_leaf { - // The key isn't in this tree - return None; - } else { + match cur_node.search(key) { + Found(i) => { + // Perfect match. Terminate the stack here, and move to the + // next phase (remove_stack). + visit_stack.push((cur_node_ptr, i)); + + if cur_node.edges[i].is_some() { + // We found the key in an internal node, but that's annoying, + // so let's swap it with a leaf key and pretend we *did* find + // it in a leaf. Note that after calling this, the tree is in an + // inconsistent state, but will be consistent after we remove the + // swapped value in `remove_stack` + leafify_stack(&mut visit_stack); + } + break; + }, + Bound(i) => match cur_node.edges[i].as_mut() { + None => return None, // We're at a leaf; the key isn't in this tree + Some(next_node) => { // We've found the subtree the key must be in visit_stack.push((cur_node_ptr, i)); - temp_node = &mut **cur_node.edges[i].as_mut().unwrap(); - continue 'main; + temp_node = &mut **next_node; + continue; } } } - - // all the keys are smaller than the one we're searching for, so try to go down - // the last edge in our node - if is_leaf { - // We're at a leaf, so it's just not in here - return None; - } else { - // We're at an internal node, so we need to keep going - visit_stack.push((cur_node_ptr, node_len)); - temp_node = &mut **cur_node.edges[node_len].as_mut().unwrap(); - continue 'main; - } } visit_stack }; @@ -361,7 +315,7 @@ impl MutableMap for BTree { } } -impl BTree { +impl BTree { /// insert the key and value into the top element in the stack, and if that node has to split /// recursively insert the split contents into the stack until splits stop. Then replace the /// stack back into the tree. @@ -386,7 +340,7 @@ impl BTree { } Split(key, value, right) => match stack.pop() { // The last insertion triggered a split, so get the next element on the - // stack to revursively insert the split node into. + // stack to recursively insert the split node into. None => { // The stack was empty; we've split the root, and need to make a new one. let left = self.root.take().unwrap(); @@ -406,18 +360,11 @@ impl BTree { } } - /// Remove the key and value in the top element of the stack, then handle underflows + /// Remove the key and value in the top element of the stack, then handle underflows. + /// Assumes the stack represents a search path from the root to a leaf. fn remove_stack(&mut self, mut stack: SearchStack) -> V { self.length -= 1; - if stack.len() < self.depth { - // We found the key in an internal node, but that's annoying, - // so let's swap it with a leaf key and pretend we *did* find it in a leaf. - // Note that after calling this, the tree is in an inconsistent state, but will - // be consistent after we remove the swapped value just below - leafify_stack(&mut stack); - } - // Remove the key-value pair from the leaf, check if the node is underfull, and then // promptly forget the leaf and ptr to avoid ownership issues let (value, mut underflow) = unsafe { @@ -459,7 +406,7 @@ impl BTree { } } -impl Node { +impl Node { /// Make a new node fn new() -> Node { Node { @@ -471,6 +418,28 @@ impl Node { } } + /// An iterator for the keys of this node + fn keys<'a>(&'a self) -> Keys<'a, K> { + Keys{ it: self.keys.iter() } + } + + /// Searches for the given key in the node. If it finds an exact match, + /// `Found` will be yielded with the matching index. If it fails to find an exact match, + /// `Bound` will be yielded with the index of the subtree the key must lie in. + fn search(&self, key: &K) -> SearchResult { + // linear search the node's keys because we're small + // FIXME(Gankro): if we ever get generic integer arguments + // to support variable choices of `B`, then this should be + // tuned to fall into binary search at some arbitrary level + for (i, k) in self.keys().enumerate() { + match k.cmp(key) { + Less => {}, // keep walkin' son, she's too small + Equal => return Found(i), + Greater => return Bound(i), + } + } + Bound(self.length) + } /// Make a leaf root from scratch fn make_leaf_root(key: K, value: V) -> Box> { @@ -725,8 +694,18 @@ impl Node { } } +struct Keys<'a, K>{ + it: Items<'a, Option> +} + +impl<'a, K> Iterator<&'a K> for Keys<'a, K> { + fn next(&mut self) -> Option<&'a K> { + self.it.next().and_then(|x| x.as_ref()) + } +} + /// Subroutine for removal. Takes a search stack for a key that terminates at an -/// internal node, and makes it mutates the tree and search stack to make it a search +/// internal node, and mutates the tree and search stack to make it a search /// stack for that key that terminates at a leaf. This leaves the tree in an inconsistent /// state that must be repaired by the caller by removing the key in question. fn leafify_stack(stack: &mut SearchStack) { @@ -814,8 +793,8 @@ impl Mutable for BTree { // Note that this will trigger a lot of recursive destructors, but BTrees can't get // very deep, so we won't worry about it for now. self.root = None; - self.depth = 0; self.length = 0; + self.depth = 0; } } @@ -823,6 +802,8 @@ impl Mutable for BTree { + + #[cfg(test)] mod test { use std::prelude::*; @@ -936,4 +917,4 @@ mod bench { let mut m : BTree = BTree::new(); find_seq_n(10_000, &mut m, b); } -} \ No newline at end of file +} From c68f35658dff6447b245432224474ea96d230c48 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Sun, 7 Sep 2014 13:17:48 -0400 Subject: [PATCH 4/5] major btree refactor --- src/libcollections/btree.rs | 174 ++++++++++++++++++++---------------- 1 file changed, 98 insertions(+), 76 deletions(-) diff --git a/src/libcollections/btree.rs b/src/libcollections/btree.rs index 0f1c0181b4bb4..6ce3291e72bb3 100644 --- a/src/libcollections/btree.rs +++ b/src/libcollections/btree.rs @@ -19,12 +19,30 @@ use core::prelude::*; use alloc::boxed::Box; use vec::Vec; -use core::mem; -use core::iter::range_inclusive; +use core::{mem, ptr}; +use core::slice::Items; use {Mutable, MutableMap, Map, MutableSeq}; -/// "Order" of the B-tree, from which all other properties are derived -static B: uint = 6; +/// Generate an array of None<$typ>'s of size $count +macro_rules! nones( + ($typ: ty, $count: expr) => ( + unsafe { + let mut tmp: [Option<$typ>, .. $count] = mem::uninitialized(); + for i in tmp.as_mut_slice().mut_iter() { + ptr::write(i, None); + } + tmp + } + ); +) + +/// "Order" of the B-tree, from which all other properties are derived. In experiments with +/// different values of B on a BTree on 64-bit linux, `B = 5` struck the best +/// balance between search and mutation time. Lowering B improves mutation time (less array +/// shifting), and raising B improves search time (less depth and pointer following). +/// However, increasing B higher than 5 had marginal search gains compared to mutation costs. +/// This value should be re-evaluated whenever the tree is significantly refactored. +static B: uint = 5; /// Maximum number of elements in a node static CAPACITY: uint = 2 * B - 1; /// Minimum number of elements in a node @@ -34,8 +52,7 @@ static EDGE_CAPACITY: uint = CAPACITY + 1; /// Amount to take off the tail of a node being split static SPLIT_LEN: uint = B - 1; -/// Represents a search path for mutating. The rawptrs here should never be -/// null or dangling, and should be accessed one-at-a-time via pops. +/// Represents a search path for mutating type SearchStack = Vec<(*mut Node, uint)>; /// Represents the result of an Insertion: either the item fit, or the node had to split @@ -46,12 +63,18 @@ enum InsertionResult{ /// Represents the result of a search for a key in a single node enum SearchResult { - Found(uint), Bound(uint), + Found(uint), GoDown(uint), } -/// A B-Tree Node +/// A B-Tree Node. We keep keys/edges/values separate to optimize searching for keys. struct Node { length: uint, + // FIXME(Gankro): We use Options here because there currently isn't a safe way to deal + // with partially initialized [T, ..n]'s. #16998 is one solution to this. Other alternatives + // include Vec's or heap-allocating a raw buffer of bytes, similar to HashMap's RawTable. + // However, those solutions introduce an unfortunate extra of indirection (unless the whole + // node is inlined into this one mega-buffer). We consider this solution to be sufficient for a + // first-draft, and it has the benefit of being a nice safe starting point to optimize from. keys: [Option, ..CAPACITY], edges: [Option>>, ..EDGE_CAPACITY], vals: [Option, ..CAPACITY], @@ -91,10 +114,10 @@ impl Map for BTree { let mut cur_node = &**root; loop { match cur_node.search(key) { - Found(i) => return cur_node.vals[i].as_ref(), // Found the key - Bound(i) => match cur_node.edges[i].as_ref() { // Didn't find the key - None => return None, // We're a leaf, it's not in here - Some(next_node) => { // We're an internal node, search the subtree + Found(i) => return cur_node.vals[i].as_ref(), + GoDown(i) => match cur_node.edges[i].as_ref() { + None => return None, + Some(next_node) => { cur_node = &**next_node; continue; } @@ -118,7 +141,7 @@ impl MutableMap for BTree { let cur_node = temp_node; match cur_node.search(key) { Found(i) => return cur_node.vals[i].as_mut(), - Bound(i) => match cur_node.edges[i].as_mut() { + GoDown(i) => match cur_node.edges[i].as_mut() { None => return None, Some(next_node) => { temp_node = &mut **next_node; @@ -134,7 +157,7 @@ impl MutableMap for BTree { // Insertion in a B-Tree is a bit complicated. // // First we do the same kind of search described in - // `find`, but we need to maintain a stack of all the nodes/edges in our search path. + // `find`. But we need to maintain a stack of all the nodes/edges in our search path. // If we find a match for the key we're trying to insert, just swap the.vals and return the // old ones. However, when we bottom out in a leaf, we attempt to insert our key-value pair // at the same location we would want to follow another edge. @@ -147,8 +170,7 @@ impl MutableMap for BTree { // // Note that we subtly deviate from Open Data Structures in our implementation of split. // ODS describes inserting into the node *regardless* of its capacity, and then - // splitting *afterwards* if it happens to be overfull. However, this is inefficient - // (or downright impossible, depending on the design). + // splitting *afterwards* if it happens to be overfull. However, this is inefficient. // Instead, we split beforehand, and then insert the key-value pair into the appropriate // result node. This has two consequences: // @@ -169,21 +191,14 @@ impl MutableMap for BTree { None } else { let visit_stack = { - // Borrowck hack, see `find_mut` + // We need this temp_node for borrowck wrangling let mut temp_node = &mut **self.root.as_mut().unwrap(); // visit_stack is a stack of rawptrs to nodes paired with indices, respectively // representing the nodes and edges of our search path. We have to store rawptrs // because as far as Rust is concerned, we can mutate aliased data with such a - // stack. It is of course correct, but what it doesn't know is the following: - // - // * The nodes in the visit_stack don't move in memory (at least, don't move - // in memory between now and when we've finished handling the raw pointer to it) - // - // * We don't mutate anything through a given ptr until we've popped and forgotten - // all the ptrs after it, at which point we don't have any pointers to children of - // that node - // - // An alternative is to take the Node boxes from their parents. This actually makes + // stack. It is of course correct, but what it doesn't know is that we will only + // be popping and using these ptrs one at a time in `insert_stack`. The alternative + // to doing this is to take the Node boxes from their parents. This actually makes // borrowck *really* happy and everything is pretty smooth. However, this creates // *tons* of pointless writes, and requires us to always walk all the way back to // the root after an insertion, even if we only needed to change a leaf. Therefore, @@ -202,7 +217,7 @@ impl MutableMap for BTree { mem::swap(cur_node.keys[i].as_mut().unwrap(), &mut key); return Some(value); }, - Bound(i) => { + GoDown(i) => { visit_stack.push((cur_node_ptr, i)); match cur_node.edges[i].as_mut() { None => { @@ -229,10 +244,10 @@ impl MutableMap for BTree { // Deletion is the most complicated operation for a B-Tree. // - // First we do the same kind of search described in `find`, but we need to maintain a stack - // of all the nodes/edges in our search path. If we don't find the key, then we just return - // `None` and do nothing. If we do find the key, we perform two operations: remove the item, - // and then possibly handle underflow. + // First we do the same kind of search described in + // `find`. But we need to maintain a stack of all the nodes/edges in our search path. + // If we don't find the key, then we just return `None` and do nothing. If we do find the + // key, we perform two operations: remove the item, and then possibly handle underflow. // // # removing the item // If the node is a leaf, we just remove the item, and shift @@ -269,7 +284,7 @@ impl MutableMap for BTree { None } else { let visit_stack = { - // Borrowck hack, see `find_mut` + // We need this temp_node for borrowck wrangling let mut temp_node = &mut **self.root.as_mut().unwrap(); // See `pop` for a description of this variable let mut visit_stack = Vec::with_capacity(self.depth); @@ -295,7 +310,7 @@ impl MutableMap for BTree { } break; }, - Bound(i) => match cur_node.edges[i].as_mut() { + GoDown(i) => match cur_node.edges[i].as_mut() { None => return None, // We're at a leaf; the key isn't in this tree Some(next_node) => { // We've found the subtree the key must be in @@ -340,7 +355,7 @@ impl BTree { } Split(key, value, right) => match stack.pop() { // The last insertion triggered a split, so get the next element on the - // stack to recursively insert the split node into. + // stack to revursively insert the split node into. None => { // The stack was empty; we've split the root, and need to make a new one. let left = self.root.take().unwrap(); @@ -411,10 +426,9 @@ impl Node { fn new() -> Node { Node { length: 0, - // FIXME(Gankro): this is gross, I guess you need a macro? [None, ..capacity] uses copy - keys: [None, None, None, None, None, None, None, None, None, None, None], - vals: [None, None, None, None, None, None, None, None, None, None, None], - edges: [None, None, None, None, None, None, None, None, None, None, None, None], + keys: nones!(K, CAPACITY), + vals: nones!(V, CAPACITY), + edges: nones!(Box>, CAPACITY + 1), } } @@ -427,18 +441,21 @@ impl Node { /// `Found` will be yielded with the matching index. If it fails to find an exact match, /// `Bound` will be yielded with the index of the subtree the key must lie in. fn search(&self, key: &K) -> SearchResult { - // linear search the node's keys because we're small - // FIXME(Gankro): if we ever get generic integer arguments - // to support variable choices of `B`, then this should be - // tuned to fall into binary search at some arbitrary level + // FIXME(Gankro): Tune when to search linear or binary when B becomes configurable. + // For the B configured as of this writing (B = 5), binary search was *singnificantly* + // worse. + self.search_linear(key) + } + + fn search_linear(&self, key: &K) -> SearchResult { for (i, k) in self.keys().enumerate() { match k.cmp(key) { Less => {}, // keep walkin' son, she's too small Equal => return Found(i), - Greater => return Bound(i), + Greater => return GoDown(i), } } - Bound(self.length) + GoDown(self.length) } /// Make a leaf root from scratch @@ -705,7 +722,7 @@ impl<'a, K> Iterator<&'a K> for Keys<'a, K> { } /// Subroutine for removal. Takes a search stack for a key that terminates at an -/// internal node, and mutates the tree and search stack to make it a search +/// internal node, and makes it mutates the tree and search stack to make it a search /// stack for that key that terminates at a leaf. This leaves the tree in an inconsistent /// state that must be repaired by the caller by removing the key in question. fn leafify_stack(stack: &mut SearchStack) { @@ -743,21 +760,29 @@ fn leafify_stack(stack: &mut SearchStack) { /// Basically `Vec.insert(index)`. Assumes that the last element in the slice is /// Somehow "empty" and can be overwritten. fn shift_and_insert(slice: &mut [T], index: uint, elem: T) { - // FIXME(Gankro): This should probably be a copy_memory and a write? - for i in range(index, slice.len() - 1).rev() { - slice.swap(i, i + 1); + unsafe { + let start = slice.as_mut_ptr().offset(index as int); + let len = slice.len(); + if index < len - 1 { + ptr::copy_memory(start.offset(1), start as *const _, len - index - 1); + } + ptr::write(start, elem); } - slice[index] = elem; } /// Basically `Vec.remove(index)`. fn remove_and_shift(slice: &mut [Option], index: uint) -> Option { - let result = slice[index].take(); - // FIXME(Gankro): This should probably be a copy_memory and write? - for i in range(index, slice.len() - 1) { - slice.swap(i, i + 1); + unsafe { + let first = slice.as_mut_ptr(); + let start = first.offset(index as int); + let result = ptr::read(start as *const _); + let len = slice.len(); + if len > 1 && index < len - 1 { + ptr::copy_memory(start, start.offset(1) as *const _, len - index - 1); + } + ptr::write(first.offset((len - 1) as int), None); + result } - result } /// Subroutine for splitting a node. Put the `SPLIT_LEN` last elements from left, @@ -774,11 +799,11 @@ fn steal_last(left: &mut[T], right: &mut[T], amount: uint) { /// Subroutine for merging the contents of right into left /// Assumes left has space for all of right -fn merge(left: &mut[Option], right: &mut[Option]) { - let left_len = left.len(); - let right_len = right.len(); - for i in range(0, right_len) { - left[left_len - right_len + i] = right[i].take(); +fn merge(left: &mut[T], right: &mut[T]) { + let offset = left.len() - right.len(); + for (a,b) in left.mut_slice_from(offset).mut_iter() + .zip(right.mut_iter()) { + mem::swap(a, b); } } @@ -802,56 +827,52 @@ impl Mutable for BTree { - - #[cfg(test)] mod test { - use std::prelude::*; - use super::BTree; - use {Map, MutableMap, Mutable, MutableSeq}; #[test] fn test_basic() { let mut map = BTree::new(); + let size = 10000u; assert_eq!(map.len(), 0); - for i in range(0u, 10000) { + for i in range(0, size) { assert_eq!(map.swap(i, 10*i), None); assert_eq!(map.len(), i + 1); } - for i in range(0u, 10000) { + for i in range(0, size) { assert_eq!(map.find(&i).unwrap(), &(i*10)); } - for i in range(10000, 20000) { + for i in range(size, size*2) { assert_eq!(map.find(&i), None); } - for i in range(0u, 10000) { + for i in range(0, size) { assert_eq!(map.swap(i, 100*i), Some(10*i)); - assert_eq!(map.len(), 10000); + assert_eq!(map.len(), size); } - for i in range(0u, 10000) { + for i in range(0, size) { assert_eq!(map.find(&i).unwrap(), &(i*100)); } - for i in range(0u, 5000) { + for i in range(0, size/2) { assert_eq!(map.pop(&(i*2)), Some(i*200)); - assert_eq!(map.len(), 10000 - i - 1); + assert_eq!(map.len(), size - i - 1); } - for i in range(0u, 5000) { + for i in range(0, size/2) { assert_eq!(map.find(&(2*i)), None); assert_eq!(map.find(&(2*i+1)).unwrap(), &(i*200 + 100)); } - for i in range(0u, 5000) { + for i in range(0, size/2) { assert_eq!(map.pop(&(2*i)), None); assert_eq!(map.pop(&(2*i+1)), Some(i*200 + 100)); - assert_eq!(map.len(), 5000 - i - 1); + assert_eq!(map.len(), size/2 - i - 1); } } } @@ -859,6 +880,7 @@ mod test { + #[cfg(test)] mod bench { use test::Bencher; From 810e06c2471af33609fa7533bc27ffa2220f3c58 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Sun, 7 Sep 2014 14:14:49 -0400 Subject: [PATCH 5/5] fix import --- src/libcollections/btree.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/libcollections/btree.rs b/src/libcollections/btree.rs index 6ce3291e72bb3..e26f4d76773ef 100644 --- a/src/libcollections/btree.rs +++ b/src/libcollections/btree.rs @@ -829,7 +829,10 @@ impl Mutable for BTree { #[cfg(test)] mod test { + use std::prelude::*; + use super::BTree; + use {Map, MutableMap, Mutable, MutableSeq}; #[test] fn test_basic() {