Skip to content

pythonize cython code #22638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions .coveragerc

This file was deleted.

6 changes: 3 additions & 3 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

cimport cython
from cython cimport Py_ssize_t
import cython
from cython import Py_ssize_t

from libc.stdlib cimport malloc, free
from libc.string cimport memmove
Expand Down Expand Up @@ -114,7 +114,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):

@cython.wraparound(False)
@cython.boundscheck(False)
def is_lexsorted(list list_of_arrays):
def is_lexsorted(list_of_arrays: list) -> bint:
cdef:
Py_ssize_t i
Py_ssize_t n, nlevels
Expand Down
16 changes: 3 additions & 13 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# at https://github.com/veorq/SipHash

import cython
from cpython cimport PyBytes_Check, PyUnicode_Check
from libc.stdlib cimport malloc, free

import numpy as np
Expand Down Expand Up @@ -44,6 +43,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
char **vecs
char *cdata
object val
list datas = []

k = <bytes>key.encode(encoding)
kb = <uint8_t *>k
Expand All @@ -57,12 +57,11 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
vecs = <char **> malloc(n * sizeof(char *))
lens = <uint64_t*> malloc(n * sizeof(uint64_t))

cdef list datas = []
for i in range(n):
val = arr[i]
if PyBytes_Check(val):
if isinstance(val, bytes):
data = <bytes>val
elif PyUnicode_Check(val):
elif isinstance(val, unicode):
data = <bytes>val.encode(encoding)
elif val is None or is_nan(val):
# null, stringify and encode
Expand Down Expand Up @@ -132,15 +131,6 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
v2[0] = _rotl(v2[0], 32)


# TODO: This appears unused; remove?
cpdef uint64_t siphash(bytes data, bytes key) except? 0:
if len(key) != 16:
raise ValueError("key should be a 16-byte bytestring, "
"got {key} (len {klen})"
.format(key=key, klen=len(key)))
return low_level_siphash(data, len(data), key)


@cython.cdivision(True)
cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen,
uint8_t* key) nogil:
Expand Down
15 changes: 6 additions & 9 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta, date

cimport cython

from cpython cimport PyTuple_Check, PyList_Check
from cpython.slice cimport PySlice_Check
import cython

import numpy as np
cimport numpy as cnp
Expand All @@ -30,15 +27,15 @@ cdef int64_t iNaT = util.get_nat()


cdef inline bint is_definitely_invalid_key(object val):
if PyTuple_Check(val):
if isinstance(val, tuple):
try:
hash(val)
except TypeError:
return True

# we have a _data, means we are a NDFrame
return (PySlice_Check(val) or util.is_array(val)
or PyList_Check(val) or hasattr(val, '_data'))
return (isinstance(val, slice) or util.is_array(val)
or isinstance(val, list) or hasattr(val, '_data'))


cpdef get_value_at(ndarray arr, object loc, object tz=None):
Expand Down Expand Up @@ -88,7 +85,7 @@ cdef class IndexEngine:
void* data_ptr

loc = self.get_loc(key)
if PySlice_Check(loc) or util.is_array(loc):
if isinstance(loc, slice) or util.is_array(loc):
return arr[loc]
else:
return get_value_at(arr, loc, tz=tz)
Expand Down Expand Up @@ -640,7 +637,7 @@ cdef class BaseMultiIndexCodesEngine:
def get_loc(self, object key):
if is_definitely_invalid_key(key):
raise TypeError("'{key}' is an invalid key".format(key=key))
if not PyTuple_Check(key):
if not isinstance(key, tuple):
raise KeyError(key)
try:
indices = [0 if checknull(v) else lev.get_loc(v) + 1
Expand Down
47 changes: 28 additions & 19 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# -*- coding: utf-8 -*-

cimport cython
from cython cimport Py_ssize_t
import cython
from cython import Py_ssize_t

from cpython cimport PyObject
from cpython.slice cimport PySlice_Check

cdef extern from "Python.h":
Py_ssize_t PY_SSIZE_T_MAX
Expand All @@ -30,14 +29,15 @@ cdef class BlockPlacement:
cdef bint _has_slice, _has_array, _is_known_slice_like

def __init__(self, val):
cdef slice slc
cdef:
slice slc

self._as_slice = None
self._as_array = None
self._has_slice = False
self._has_array = False

if PySlice_Check(val):
if isinstance(val, slice):
slc = slice_canonize(val)

if slc.start != slc.stop:
Expand All @@ -55,7 +55,8 @@ cdef class BlockPlacement:
self._has_array = True

def __str__(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
v = self._as_slice
else:
Expand All @@ -66,15 +67,17 @@ cdef class BlockPlacement:
__repr__ = __str__

def __len__(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
return slice_len(s)
else:
return len(self._as_array)

def __iter__(self):
cdef slice s = self._ensure_has_slice()
cdef Py_ssize_t start, stop, step, _
cdef:
slice s = self._ensure_has_slice()
Py_ssize_t start, stop, step, _
if s is not None:
start, stop, step, _ = slice_get_indices_ex(s)
return iter(range(start, stop, step))
Expand All @@ -83,15 +86,17 @@ cdef class BlockPlacement:

@property
def as_slice(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is None:
raise TypeError('Not slice-like')
else:
return s

@property
def indexer(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
return s
else:
Expand All @@ -103,7 +108,8 @@ cdef class BlockPlacement:

@property
def as_array(self):
cdef Py_ssize_t start, stop, end, _
cdef:
Py_ssize_t start, stop, end, _
if not self._has_array:
start, stop, step, _ = slice_get_indices_ex(self._as_slice)
self._as_array = np.arange(start, stop, step,
Expand All @@ -113,17 +119,19 @@ cdef class BlockPlacement:

@property
def is_slice_like(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
return s is not None

def __getitem__(self, loc):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
val = slice_getitem(s, loc)
else:
val = self._as_array[loc]

if not PySlice_Check(val) and val.ndim == 0:
if not isinstance(val, slice) and val.ndim == 0:
return val

return BlockPlacement(val)
Expand All @@ -139,8 +147,9 @@ cdef class BlockPlacement:
[o.as_array for o in others]))

cdef iadd(self, other):
cdef slice s = self._ensure_has_slice()
cdef Py_ssize_t other_int, start, stop, step, l
cdef:
slice s = self._ensure_has_slice()
Py_ssize_t other_int, start, stop, step, l

if isinstance(other, int) and s is not None:
other_int = <Py_ssize_t>other
Expand Down Expand Up @@ -184,7 +193,7 @@ cdef class BlockPlacement:
return self._as_slice


cdef slice_canonize(slice s):
cdef slice slice_canonize(slice s):
"""
Convert slice to canonical bounded form.
"""
Expand Down Expand Up @@ -282,7 +291,7 @@ def slice_getitem(slice slc not None, ind):

s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc)

if PySlice_Check(ind):
if isinstance(ind, slice):
ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind,
s_len)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ cdef class Interval(IntervalMixin):
return ((self.left < key if self.open_left else self.left <= key) and
(key < self.right if self.open_right else key <= self.right))

def __richcmp__(self, other, int op):
def __richcmp__(self, other, op: int):
if hasattr(other, 'ndim'):
# let numpy (or IntervalIndex) handle vectorization
return NotImplemented
Expand Down
Loading