Skip to content

REF: cython cleanup, typing, optimizations #23456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
"""
Template for each `dtype` helper function using 1-d template

# 1-d template
- pad
- pad_1d
- pad_2d
- backfill
- backfill_1d
- backfill_2d
- is_monotonic
- arrmap

WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,
raise AssertionError("len(index) != len(labels)")

nobs = np.zeros((<object> out).shape, dtype=np.int64)
{{if name=='object'}}
{{if name == 'object'}}
resx = np.empty((<object> out).shape, dtype=object)
{{else}}
resx = np.empty_like(out)
Expand Down
8 changes: 3 additions & 5 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

cimport cython

from cpython cimport (PyObject, Py_INCREF, PyList_Check, PyTuple_Check,
PyMem_Malloc, PyMem_Realloc, PyMem_Free,
PyString_Check, PyBytes_Check,
PyUnicode_Check)
from cpython cimport (PyObject, Py_INCREF,
PyMem_Malloc, PyMem_Realloc, PyMem_Free)

from libc.stdlib cimport malloc, free

Expand Down Expand Up @@ -153,7 +151,7 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
cdef:
int ret = 0
Py_ssize_t i, n = len(labels)
kh_int64_t * table = kh_init_int64()
kh_int64_t *table = kh_init_int64()
Int64Vector idx = Int64Vector()
ndarray[int64_t, ndim=1] arr
Int64VectorData *ud = idx.data
Expand Down
29 changes: 14 additions & 15 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# VectorData
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

Expand Down Expand Up @@ -53,9 +53,9 @@ ctypedef fused vector_data:
cdef inline bint needs_resize(vector_data *data) nogil:
return data.n == data.m

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Vector
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

Expand Down Expand Up @@ -134,8 +134,7 @@ cdef class StringVector:
bint external_view_exists

def __cinit__(self):
self.data = <StringVectorData *>PyMem_Malloc(
sizeof(StringVectorData))
self.data = <StringVectorData *>PyMem_Malloc(sizeof(StringVectorData))
if not self.data:
raise MemoryError()
self.external_view_exists = False
Expand Down Expand Up @@ -184,7 +183,7 @@ cdef class StringVector:
self.data.m = self.data.n
return ao

cdef inline void append(self, char * x):
cdef inline void append(self, char *x):

if needs_resize(self.data):
self.resize()
Expand Down Expand Up @@ -240,9 +239,9 @@ cdef class ObjectVector:
for i in range(len(x)):
self.append(x[i])

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# HashTable
#----------------------------------------------------------------------
# ----------------------------------------------------------------------


cdef class HashTable:
Expand Down Expand Up @@ -283,9 +282,9 @@ cdef class {{name}}HashTable(HashTable):

def sizeof(self, deep=False):
""" return the size of my table in bytes """
return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags
return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags

cpdef get_item(self, {{dtype}}_t val):
cdef khiter_t k
Expand Down Expand Up @@ -679,7 +678,7 @@ cdef class StringHashTable(HashTable):
for i in range(n):
val = values[i]

if PyUnicode_Check(val) or PyString_Check(val):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should note somewhere how we prefer isinstance checks for strings but use the cpython methods for datetimes.

if isinstance(val, (str, unicode)):
v = util.get_c_string(val)
else:
v = util.get_c_string(self.na_string_sentinel)
Expand Down Expand Up @@ -712,7 +711,7 @@ cdef class StringHashTable(HashTable):
for i in range(n):
val = values[i]

if PyUnicode_Check(val) or PyString_Check(val):
if isinstance(val, (str, unicode)):
v = util.get_c_string(val)
else:
v = util.get_c_string(self.na_string_sentinel)
Expand Down Expand Up @@ -773,7 +772,7 @@ cdef class StringHashTable(HashTable):
for i in range(n):
val = values[i]

if ((PyUnicode_Check(val) or PyString_Check(val))
if (isinstance(val, (str, unicode))
and not (use_na_value and val == na_value)):
v = util.get_c_string(val)
vecs[i] = v
Expand Down
24 changes: 10 additions & 14 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ Template for each `dtype` helper function for hashtable
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# VectorData
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

Expand Down Expand Up @@ -80,7 +80,7 @@ cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
{{endif}}
cdef:
Py_ssize_t i=0
Py_ssize_t i = 0
kh_{{ttype}}_t *table

{{if dtype != 'object'}}
Expand Down Expand Up @@ -141,7 +141,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
{{dtype}}_t value
{{endif}}
Py_ssize_t k, i, n = len(values)
kh_{{ttype}}_t * table = kh_init_{{ttype}}()
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')

kh_resize_{{ttype}}(table, min(n, _SIZE_HINT_LIMIT))
Expand Down Expand Up @@ -202,9 +202,9 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
return out


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Membership
#----------------------------------------------------------------------
# ----------------------------------------------------------------------


@cython.wraparound(False)
Expand Down Expand Up @@ -237,7 +237,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
int ret = 0
ndarray[uint8_t] result
{{scalar}} val
kh_{{ttype}}_t * table = kh_init_{{ttype}}()
kh_{{ttype}}_t *table = kh_init_{{ttype}}()

# construct the table
n = len(values)
Expand Down Expand Up @@ -275,9 +275,9 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
{{endfor}}


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Mode Computations
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

Expand Down Expand Up @@ -305,17 +305,13 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna):
{{endif}}
cdef:
int count, max_count = 1
int j = -1 # so you can do +=
int j = -1 # so you can do +=
Py_ssize_t k
kh_{{table_type}}_t *table
ndarray[{{ctype}}] modes

table = kh_init_{{table_type}}()
{{if dtype == 'object'}}
build_count_table_{{dtype}}(values, table, dropna)
{{else}}
build_count_table_{{dtype}}(values, table, dropna)
{{endif}}

modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}})

Expand Down
12 changes: 9 additions & 3 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bint:
return True


@cython.wraparound(False)
@cython.boundscheck(False)
def astype_intsafe(ndarray[object] arr, new_dtype):
cdef:
Py_ssize_t i, n = len(arr)
Expand All @@ -494,6 +496,8 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
return result


@cython.wraparound(False)
@cython.boundscheck(False)
def astype_unicode(arr: ndarray,
skipna: bool=False) -> ndarray[object]:
"""
Expand Down Expand Up @@ -528,6 +532,8 @@ def astype_unicode(arr: ndarray,
return result


@cython.wraparound(False)
@cython.boundscheck(False)
def astype_str(arr: ndarray,
skipna: bool=False) -> ndarray[object]:
"""
Expand Down Expand Up @@ -562,6 +568,8 @@ def astype_str(arr: ndarray,
return result


@cython.wraparound(False)
@cython.boundscheck(False)
def clean_index_list(list obj):
"""
Utility used in pandas.core.index.ensure_index
Expand All @@ -583,11 +591,9 @@ def clean_index_list(list obj):

# don't force numpy coerce with nan's
inferred = infer_dtype(obj)
if inferred in ['string', 'bytes', 'unicode',
'mixed', 'mixed-integer']:
if inferred in ['string', 'bytes', 'unicode', 'mixed', 'mixed-integer']:
return np.asarray(obj, dtype=object), 0
elif inferred in ['integer']:

# TODO: we infer an integer but it *could* be a unint64
try:
return np.asarray(obj, dtype='int64'), 0
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/missing.pxd
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from tslibs.nattype cimport is_null_datetimelike

cpdef bint checknull(object val)
cpdef bint checknull_old(object val)

Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ cdef inline _extract_result(object res):
res = res[0]
return res


cdef class Slider:
"""
Only handles contiguous data for now
Expand Down
22 changes: 14 additions & 8 deletions pandas/_libs/sparse.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ cdef class IntIndex(SparseIndex):
ndarray[int32_t, ndim=1] indices):
pass


cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
cdef:
Py_ssize_t init_len, i, npoints, result_indexer = 0
Expand Down Expand Up @@ -315,6 +316,7 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
lens = lens[:result_indexer]
return locs, lens


# -----------------------------------------------------------------------------
# BlockIndex

Expand Down Expand Up @@ -670,12 +672,14 @@ cdef class BlockMerge(object):
self.xi = yi
self.yi = xi


cdef class BlockIntersection(BlockMerge):
"""
not done yet
"""
pass


cdef class BlockUnion(BlockMerge):
"""
Object-oriented approach makes sharing state between recursive functions a
Expand Down Expand Up @@ -805,10 +809,11 @@ include "sparse_op_helper.pxi"
# Indexing operations

def get_reindexer(ndarray[object, ndim=1] values, dict index_map):
cdef object idx
cdef Py_ssize_t i
cdef Py_ssize_t new_length = len(values)
cdef ndarray[int32_t, ndim=1] indexer
cdef:
object idx
Py_ssize_t i
Py_ssize_t new_length = len(values)
ndarray[int32_t, ndim=1] indexer

indexer = np.empty(new_length, dtype=np.int32)

Expand Down Expand Up @@ -861,10 +866,11 @@ def reindex_integer(ndarray[float64_t, ndim=1] values,
# SparseArray mask create operations

def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value):
cdef object value
cdef Py_ssize_t i
cdef Py_ssize_t new_length = len(arr)
cdef ndarray[int8_t, ndim=1] mask
cdef:
object value
Py_ssize_t i
Py_ssize_t new_length = len(arr)
ndarray[int8_t, ndim=1] mask

mask = np.ones(new_length, dtype=np.int8)

Expand Down
Loading