Skip to content

CLN: remove unused parts of skiplist (most of it) #27465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 11, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions pandas/_libs/skiplist.pxd
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-

from cython cimport Py_ssize_t


cdef extern from "src/skiplist.h":
ctypedef struct node_t:
node_t **next
Expand All @@ -24,22 +21,3 @@ cdef extern from "src/skiplist.h":
double skiplist_get(skiplist_t*, int, int*) nogil
int skiplist_insert(skiplist_t*, double) nogil
int skiplist_remove(skiplist_t*, double) nogil


# Note: Node is declared here so that IndexableSkiplist can be exposed;
# Node itself not intended to be exposed.
cdef class Node:
cdef public:
double value
list next
list width


cdef class IndexableSkiplist:
cdef:
Py_ssize_t size, maxlevels
Node head

cpdef get(self, Py_ssize_t i)
cpdef insert(self, double value)
cpdef remove(self, double value)
141 changes: 0 additions & 141 deletions pandas/_libs/skiplist.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,144 +5,3 @@
# Link: http://code.activestate.com/recipes/576930/

# Cython version: Wes McKinney
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason that we need this file (now without any code) ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, we'd need to update setup.py

from random import random

from libc.math cimport log

import numpy as np


# MSVC does not have log2!

cdef double Log2(double x):
return log(x) / log(2.)


# TODO: optimize this, make less messy

cdef class Node:
# cdef public:
# double value
# list next
# list width

def __init__(self, double value, list next, list width):
self.value = value
self.next = next
self.width = width


# Singleton terminator node
NIL = Node(np.inf, [], [])


cdef class IndexableSkiplist:
"""
Sorted collection supporting O(lg n) insertion, removal, and
lookup by rank.
"""
# cdef:
# Py_ssize_t size, maxlevels
# Node head

def __init__(self, expected_size=100):
self.size = 0
self.maxlevels = int(1 + Log2(expected_size))
self.head = Node(np.NaN, [NIL] * self.maxlevels, [1] * self.maxlevels)

def __len__(self):
return self.size

def __getitem__(self, i):
return self.get(i)

cpdef get(self, Py_ssize_t i):
cdef:
Py_ssize_t level
Node node

node = self.head
i += 1

for level in range(self.maxlevels - 1, -1, -1):
while node.width[level] <= i:
i -= node.width[level]
node = node.next[level]

return node.value

cpdef insert(self, double value):
cdef:
Py_ssize_t level, steps, d
Node node, prevnode, newnode, next_at_level, tmp
list chain, steps_at_level

# find first node on each level where node.next[levels].value > value
chain = [None] * self.maxlevels
steps_at_level = [0] * self.maxlevels
node = self.head

for level in range(self.maxlevels - 1, -1, -1):
next_at_level = node.next[level]

while next_at_level.value <= value:
steps_at_level[level] = (steps_at_level[level] +
node.width[level])
node = next_at_level
next_at_level = node.next[level]

chain[level] = node

# insert a link to the newnode at each level
d = min(self.maxlevels, 1 - int(Log2(random())))
newnode = Node(value, [None] * d, [None] * d)
steps = 0

for level in range(d):
prevnode = chain[level]
newnode.next[level] = prevnode.next[level]
prevnode.next[level] = newnode
newnode.width[level] = (prevnode.width[level] - steps)
prevnode.width[level] = steps + 1
steps += steps_at_level[level]

for level in range(d, self.maxlevels):
(<Node>chain[level]).width[level] += 1

self.size += 1

cpdef remove(self, double value):
cdef:
Py_ssize_t level, d
Node node, prevnode, tmpnode, next_at_level
list chain

# find first node on each level where node.next[levels].value >= value
chain = [None] * self.maxlevels
node = self.head

for level in range(self.maxlevels - 1, -1, -1):
next_at_level = node.next[level]
while next_at_level.value < value:
node = next_at_level
next_at_level = node.next[level]

chain[level] = node

if value != (<Node>(<Node>(<Node>chain[0]).next)[0]).value:
raise KeyError('Not Found')

# remove one link at each level
d = len((<Node>(<Node>(<Node>chain[0]).next)[0]).next)

for level in range(d):
prevnode = chain[level]
tmpnode = prevnode.next[level]
prevnode.width[level] += tmpnode.width[level] - 1
prevnode.next[level] = tmpnode.next[level]

for level in range(d, self.maxlevels):
tmpnode = chain[level]
tmpnode.width[level] -= 1

self.size -= 1