Skip to content

REF: cython cleanup, typing, optimizations #23464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ class NegInfinity(object):
__ge__ = lambda self, other: isinstance(other, NegInfinity)


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
"""
Efficiently find the unique first-differences of the given array.
Expand Down Expand Up @@ -793,7 +795,7 @@ arrmap_bool = arrmap["uint8_t"]

@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic(ndarray[algos_t] arr, bint timelike):
def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
"""
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def group_any_all(ndarray[uint8_t] out,
The returned values will either be 0 or 1 (False or True, respectively).
"""
cdef:
Py_ssize_t i, N=len(labels)
Py_ssize_t i, N = len(labels)
int64_t lab
uint8_t flag_val

Expand Down
20 changes: 0 additions & 20 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -667,11 +667,6 @@ def group_max(ndarray[groupby_t, ndim=2] out,
out[i, j] = maxx[i, j]


group_max_float64 = group_max["float64_t"]
group_max_float32 = group_max["float32_t"]
group_max_int64 = group_max["int64_t"]


@cython.wraparound(False)
@cython.boundscheck(False)
def group_min(ndarray[groupby_t, ndim=2] out,
Expand Down Expand Up @@ -734,11 +729,6 @@ def group_min(ndarray[groupby_t, ndim=2] out,
out[i, j] = minx[i, j]


group_min_float64 = group_min["float64_t"]
group_min_float32 = group_min["float32_t"]
group_min_int64 = group_min["int64_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummin(ndarray[groupby_t, ndim=2] out,
Expand Down Expand Up @@ -787,11 +777,6 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,
out[i, j] = mval


group_cummin_float64 = group_cummin["float64_t"]
group_cummin_float32 = group_cummin["float32_t"]
group_cummin_int64 = group_cummin["int64_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummax(ndarray[groupby_t, ndim=2] out,
Expand Down Expand Up @@ -837,8 +822,3 @@ def group_cummax(ndarray[groupby_t, ndim=2] out,
if val > mval:
accum[lab, j] = mval = val
out[i, j] = mval


group_cummax_float64 = group_cummax["float64_t"]
group_cummax_float32 = group_cummax["float32_t"]
group_cummax_int64 = group_cummax["int64_t"]
26 changes: 13 additions & 13 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ cdef class {{name}}Vector:
self.data.n = 0
self.data.m = _INIT_VEC_CAP
self.ao = np.empty(self.data.m, dtype={{idtype}})
self.data.data = <{{arg}}*> self.ao.data
self.data.data = <{{arg}}*>self.ao.data

cdef resize(self):
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
self.ao.resize(self.data.m, refcheck=False)
self.data.data = <{{arg}}*> self.ao.data
self.data.data = <{{arg}}*>self.ao.data

def __dealloc__(self):
if self.data is not NULL:
Expand Down Expand Up @@ -140,7 +140,7 @@ cdef class StringVector:
self.external_view_exists = False
self.data.n = 0
self.data.m = _INIT_VEC_CAP
self.data.data = <char **> malloc(self.data.m * sizeof(char *))
self.data.data = <char **>malloc(self.data.m * sizeof(char *))
if not self.data.data:
raise MemoryError()

Expand All @@ -153,7 +153,7 @@ cdef class StringVector:
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)

orig_data = self.data.data
self.data.data = <char **> malloc(self.data.m * sizeof(char *))
self.data.data = <char **>malloc(self.data.m * sizeof(char *))
if not self.data.data:
raise MemoryError()
for i in range(m):
Expand Down Expand Up @@ -208,22 +208,22 @@ cdef class ObjectVector:
self.n = 0
self.m = _INIT_VEC_CAP
self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
self.data = <PyObject**> self.ao.data
self.data = <PyObject**>self.ao.data

def __len__(self):
return self.n

cdef inline append(self, object o):
cdef inline append(self, object obj):
if self.n == self.m:
if self.external_view_exists:
raise ValueError("external reference but "
"Vector.resize() needed")
self.m = max(self.m * 2, _INIT_VEC_CAP)
self.ao.resize(self.m, refcheck=False)
self.data = <PyObject**> self.ao.data
self.data = <PyObject**>self.ao.data

Py_INCREF(o)
self.data[self.n] = <PyObject*> o
Py_INCREF(obj)
self.data[self.n] = <PyObject*>obj
self.n += 1

def to_array(self):
Expand Down Expand Up @@ -768,7 +768,7 @@ cdef class StringHashTable(HashTable):
use_na_value = na_value is not None

# assign pointers and pre-filter out missing
vecs = <const char **> malloc(n * sizeof(char *))
vecs = <const char **>malloc(n * sizeof(char *))
for i in range(n):
val = values[i]

Expand Down Expand Up @@ -844,9 +844,9 @@ cdef class PyObjectHashTable(HashTable):

def sizeof(self, deep=False):
""" return the size of my table in bytes """
return self.table.n_buckets * (sizeof(PyObject *) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags
return self.table.n_buckets * (sizeof(PyObject *) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags

cpdef get_item(self, object val):
cdef khiter_t k
Expand Down
22 changes: 11 additions & 11 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
val = values[i]

if not checknull(val) or not dropna:
k = kh_get_{{ttype}}(table, <PyObject*> val)
k = kh_get_{{ttype}}(table, <PyObject*>val)
if k != table.n_buckets:
table.vals[k] += 1
else:
k = kh_put_{{ttype}}(table, <PyObject*> val, &ret)
k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
table.vals[k] = 1
{{else}}
with nogil:
Expand Down Expand Up @@ -103,7 +103,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
{{if dtype == 'object'}}
for k in range(table.n_buckets):
if kh_exist_{{ttype}}(table, k):
result_keys[i] = <{{dtype}}> table.keys[k]
result_keys[i] = <{{dtype}}>table.keys[k]
result_counts[i] = table.vals[k]
i += 1
{{else}}
Expand Down Expand Up @@ -152,7 +152,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
if keep == 'last':
{{if dtype == 'object'}}
for i from n > i >= 0:
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
Expand All @@ -163,7 +163,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
elif keep == 'first':
{{if dtype == 'object'}}
for i in range(n):
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
Expand All @@ -175,13 +175,13 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
{{if dtype == 'object'}}
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, <PyObject*> value)
k = kh_get_{{ttype}}(table, <PyObject*>value)
if k != table.n_buckets:
out[table.vals[k]] = 1
out[i] = 1
else:
k = kh_put_{{ttype}}(table, <PyObject*> value, &ret)
table.keys[k] = <PyObject*> value
k = kh_put_{{ttype}}(table, <PyObject*>value, &ret)
table.keys[k] = <PyObject*>value
table.vals[k] = i
out[i] = 0
{{else}}
Expand Down Expand Up @@ -245,7 +245,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):

{{if dtype == 'object'}}
for i in range(n):
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
{{else}}
with nogil:
for i in range(n):
Expand All @@ -259,7 +259,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
{{if dtype == 'object'}}
for i in range(n):
val = arr[i]
k = kh_get_{{ttype}}(table, <PyObject*> val)
k = kh_get_{{ttype}}(table, <PyObject*>val)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are actually linting this? iirc these were changed to add the space last year. i don’t care which way but we should have 1 true way

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not aware of a way to lint for this.

I've been pretty consistent about no-space, so at this point that constitutes a pretty large majority of existing cases.

result[i] = (k != table.n_buckets)
{{else}}
with nogil:
Expand Down Expand Up @@ -342,7 +342,7 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna):
else:
continue

modes[j] = <object> table.keys[k]
modes[j] = <object>table.keys[k]
{{endif}}

kh_destroy_{{table_type}}(table)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

cimport cython
from cython cimport Py_ssize_t
import cython
from cython import Py_ssize_t
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this actually matter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. Only difference is one is valid python, getting us one step closer to being able to lint


import numpy as np
cimport numpy as cnp
Expand Down
Loading