Skip to content

REF: cython cleanups and optimizations #23382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Closed
38 changes: 16 additions & 22 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ class NegInfinity(object):
__ge__ = lambda self, other: isinstance(other, NegInfinity)


cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[int64_t, ndim=1] unique_deltas(int64_t[:] arr):
"""
Efficiently find the unique first-differences of the given array.

Expand Down Expand Up @@ -151,7 +153,7 @@ def is_lexsorted(list_of_arrays: list) -> bint:

@cython.boundscheck(False)
@cython.wraparound(False)
def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
def groupsort_indexer(int64_t[:] index, Py_ssize_t ngroups):
"""
compute a 1-d indexer that is an ordering of the passed index,
ordered by the groups. This is a reverse of the label
Expand Down Expand Up @@ -236,7 +238,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
Py_ssize_t i, j, xi, yi, N, K
bint minpv
ndarray[float64_t, ndim=2] result
ndarray[uint8_t, ndim=2] mask
uint8_t[:, :] mask
int64_t nobs = 0
float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor

Expand Down Expand Up @@ -301,7 +303,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
ndarray[float64_t, ndim=2] result
ndarray[float64_t, ndim=1] maskedx
ndarray[float64_t, ndim=1] maskedy
ndarray[uint8_t, ndim=2] mask
uint8_t[:, :] mask
int64_t nobs = 0
float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor

Expand Down Expand Up @@ -373,7 +375,7 @@ ctypedef fused algos_t:
# TODO: unused; needed?
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef map_indices(ndarray[algos_t] index):
cpdef map_indices(algos_t[:] index):
"""
Produce a dict mapping the values of the input array to their respective
locations.
Expand All @@ -397,7 +399,7 @@ cpdef map_indices(ndarray[algos_t] index):

@cython.boundscheck(False)
@cython.wraparound(False)
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
def pad(algos_t[:] old, algos_t[:] new, limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
Expand Down Expand Up @@ -475,9 +477,7 @@ pad_bool = pad["uint8_t"]

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
cdef:
Py_ssize_t i, N
algos_t val
Expand Down Expand Up @@ -521,9 +521,7 @@ pad_inplace_bool = pad_inplace["uint8_t"]

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
def pad_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None):
cdef:
Py_ssize_t i, j, N, K
algos_t val
Expand Down Expand Up @@ -595,7 +593,7 @@ D

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
def backfill(algos_t[:] old, algos_t[:] new, limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
Expand Down Expand Up @@ -674,9 +672,7 @@ backfill_bool = backfill["uint8_t"]

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
cdef:
Py_ssize_t i, N
algos_t val
Expand Down Expand Up @@ -720,9 +716,7 @@ backfill_inplace_bool = backfill_inplace["uint8_t"]

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
def backfill_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None):
cdef:
Py_ssize_t i, j, N, K
algos_t val
Expand Down Expand Up @@ -768,11 +762,11 @@ backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"]

@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap(ndarray[algos_t] index, object func):
def arrmap(algos_t[:] index, object func):
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
ndarray[object] result = np.empty(length, dtype=np.object_)
object[:] result = np.empty(length, dtype=np.object_)

from pandas._libs.lib import maybe_convert_objects

Expand All @@ -793,7 +787,7 @@ arrmap_bool = arrmap["uint8_t"]

@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic(ndarray[algos_t] arr, bint timelike):
def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
"""
Returns
-------
Expand Down
16 changes: 3 additions & 13 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
"""
Template for each `dtype` helper function using 1-d template

# 1-d template
- pad
- pad_1d
- pad_2d
- backfill
- backfill_1d
- backfill_2d
- is_monotonic
- arrmap

WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

Expand Down Expand Up @@ -39,7 +29,7 @@ def get_dispatch(dtypes):
@cython.boundscheck(False)
@cython.wraparound(False)
def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
ndarray[{{dest_type}}, ndim=2] out,
{{dest_type}}[:, :] out,
Py_ssize_t periods, int axis):
cdef:
Py_ssize_t i, j, sx, sy
Expand Down Expand Up @@ -82,8 +72,8 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,


def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
ndarray[int64_t] indexer, Py_ssize_t loc,
ndarray[{{dest_type}}] out):
int64_t[:] indexer, Py_ssize_t loc,
{{dest_type}}[:, :] out):
cdef:
Py_ssize_t i, j, k

Expand Down
34 changes: 17 additions & 17 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,18 @@ cdef inline float64_t kth_smallest_c(float64_t* a,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_median_float64(ndarray[float64_t, ndim=2] out,
ndarray[int64_t] counts,
def group_median_float64(float64_t[:, :] out,
int64_t[:] counts,
ndarray[float64_t, ndim=2] values,
ndarray[int64_t] labels,
int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
"""
cdef:
Py_ssize_t i, j, N, K, ngroups, size
ndarray[int64_t] _counts
ndarray data
int64_t[:] _counts
ndarray[float64_t, ndim=2] data
float64_t* ptr

assert min_count == -1, "'min_count' only used in add and prod"
Expand Down Expand Up @@ -217,7 +217,7 @@ def group_cumsum(numeric[:, :] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
int ngroups, int periods):
cdef:
Py_ssize_t N, i, j, ii
Expand Down Expand Up @@ -269,8 +269,8 @@ def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
ndarray[uint8_t] mask, object direction,
def group_fillna_indexer(int64_t[:] out, ndarray[int64_t] labels,
uint8_t[:] mask, object direction,
int64_t limit):
"""Indexes how to fill values forwards or backwards within a group

Expand All @@ -291,16 +291,16 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
"""
cdef:
Py_ssize_t i, N
ndarray[int64_t] sorted_labels
int64_t[:] sorted_labels
int64_t idx, curr_fill_idx=-1, filled_vals=0

N = len(out)

# Make sure all arrays are the same size
assert N == len(labels) == len(mask)

sorted_labels = np.argsort(labels, kind='mergesort').astype(
np.int64, copy=False)
sorted_labels = np.argsort(labels, kind='mergesort').astype(np.int64,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i prefer the former

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will revert.

copy=False)
if direction == 'bfill':
sorted_labels = sorted_labels[::-1]

Expand All @@ -327,10 +327,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_any_all(ndarray[uint8_t] out,
ndarray[int64_t] labels,
ndarray[uint8_t] values,
ndarray[uint8_t] mask,
def group_any_all(uint8_t[:] out,
int64_t[:] labels,
uint8_t[:] values,
uint8_t[:] mask,
object val_test,
bint skipna):
"""Aggregated boolean values to show truthfulness of group elements
Expand All @@ -353,7 +353,7 @@ def group_any_all(ndarray[uint8_t] out,
The returned values will either be 0 or 1 (False or True, respectively).
"""
cdef:
Py_ssize_t i, N=len(labels)
Py_ssize_t i, N = len(labels)
int64_t lab
uint8_t flag_val

Expand All @@ -370,7 +370,7 @@ def group_any_all(ndarray[uint8_t] out,
else:
raise ValueError("'bool_func' must be either 'any' or 'all'!")

out.fill(1 - flag_val)
out[:] = 1 - flag_val

with nogil:
for i in range(N):
Expand Down
Loading