From f67d5e719c09ee1d4cb6944de0cd7333d9d9f7d7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 21:12:49 -0700 Subject: [PATCH 1/2] remove unused parts of skiplist (most of it) --- pandas/_libs/skiplist.pxd | 22 ------ pandas/_libs/skiplist.pyx | 141 -------------------------------------- 2 files changed, 163 deletions(-) diff --git a/pandas/_libs/skiplist.pxd b/pandas/_libs/skiplist.pxd index a273d2c445d18..ce7aab80487cc 100644 --- a/pandas/_libs/skiplist.pxd +++ b/pandas/_libs/skiplist.pxd @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t - - cdef extern from "src/skiplist.h": ctypedef struct node_t: node_t **next @@ -24,22 +21,3 @@ cdef extern from "src/skiplist.h": double skiplist_get(skiplist_t*, int, int*) nogil int skiplist_insert(skiplist_t*, double) nogil int skiplist_remove(skiplist_t*, double) nogil - - -# Note: Node is declared here so that IndexableSkiplist can be exposed; -# Node itself not intended to be exposed. -cdef class Node: - cdef public: - double value - list next - list width - - -cdef class IndexableSkiplist: - cdef: - Py_ssize_t size, maxlevels - Node head - - cpdef get(self, Py_ssize_t i) - cpdef insert(self, double value) - cpdef remove(self, double value) diff --git a/pandas/_libs/skiplist.pyx b/pandas/_libs/skiplist.pyx index 2fdee72f9d588..eb750a478415a 100644 --- a/pandas/_libs/skiplist.pyx +++ b/pandas/_libs/skiplist.pyx @@ -5,144 +5,3 @@ # Link: http://code.activestate.com/recipes/576930/ # Cython version: Wes McKinney -from random import random - -from libc.math cimport log - -import numpy as np - - -# MSVC does not have log2! - -cdef double Log2(double x): - return log(x) / log(2.) - - -# TODO: optimize this, make less messy - -cdef class Node: - # cdef public: - # double value - # list next - # list width - - def __init__(self, double value, list next, list width): - self.value = value - self.next = next - self.width = width - - -# Singleton terminator node -NIL = Node(np.inf, [], []) - - -cdef class IndexableSkiplist: - """ - Sorted collection supporting O(lg n) insertion, removal, and - lookup by rank. - """ - # cdef: - # Py_ssize_t size, maxlevels - # Node head - - def __init__(self, expected_size=100): - self.size = 0 - self.maxlevels = int(1 + Log2(expected_size)) - self.head = Node(np.NaN, [NIL] * self.maxlevels, [1] * self.maxlevels) - - def __len__(self): - return self.size - - def __getitem__(self, i): - return self.get(i) - - cpdef get(self, Py_ssize_t i): - cdef: - Py_ssize_t level - Node node - - node = self.head - i += 1 - - for level in range(self.maxlevels - 1, -1, -1): - while node.width[level] <= i: - i -= node.width[level] - node = node.next[level] - - return node.value - - cpdef insert(self, double value): - cdef: - Py_ssize_t level, steps, d - Node node, prevnode, newnode, next_at_level, tmp - list chain, steps_at_level - - # find first node on each level where node.next[levels].value > value - chain = [None] * self.maxlevels - steps_at_level = [0] * self.maxlevels - node = self.head - - for level in range(self.maxlevels - 1, -1, -1): - next_at_level = node.next[level] - - while next_at_level.value <= value: - steps_at_level[level] = (steps_at_level[level] + - node.width[level]) - node = next_at_level - next_at_level = node.next[level] - - chain[level] = node - - # insert a link to the newnode at each level - d = min(self.maxlevels, 1 - int(Log2(random()))) - newnode = Node(value, [None] * d, [None] * d) - steps = 0 - - for level in range(d): - prevnode = chain[level] - newnode.next[level] = prevnode.next[level] - prevnode.next[level] = newnode - newnode.width[level] = (prevnode.width[level] - steps) - prevnode.width[level] = steps + 1 - steps += steps_at_level[level] - - for level in range(d, self.maxlevels): - (chain[level]).width[level] += 1 - - self.size += 1 - - cpdef remove(self, double value): - cdef: - Py_ssize_t level, d - Node node, prevnode, tmpnode, next_at_level - list chain - - # find first node on each level where node.next[levels].value >= value - chain = [None] * self.maxlevels - node = self.head - - for level in range(self.maxlevels - 1, -1, -1): - next_at_level = node.next[level] - while next_at_level.value < value: - node = next_at_level - next_at_level = node.next[level] - - chain[level] = node - - if value != (((chain[0]).next)[0]).value: - raise KeyError('Not Found') - - # remove one link at each level - d = len((((chain[0]).next)[0]).next) - - for level in range(d): - prevnode = chain[level] - tmpnode = prevnode.next[level] - prevnode.width[level] += tmpnode.width[level] - 1 - prevnode.next[level] = tmpnode.next[level] - - for level in range(d, self.maxlevels): - tmpnode = chain[level] - tmpnode.width[level] -= 1 - - self.size -= 1 From f87a404eea6cf42d04ec86ba0d4bd4fd8593ea27 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Sep 2019 10:04:18 -0700 Subject: [PATCH 2/2] comment pointing back to removed unused code --- pandas/_libs/skiplist.pxd | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/skiplist.pxd b/pandas/_libs/skiplist.pxd index ce7aab80487cc..e827223bbe0a7 100644 --- a/pandas/_libs/skiplist.pxd +++ b/pandas/_libs/skiplist.pxd @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# See GH#27465 for reference on related-but-unused cython code cdef extern from "src/skiplist.h": ctypedef struct node_t: