Skip to content

REF: Use more cython memoryviews #58271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 17, 2024
22 changes: 10 additions & 12 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
Py_ssize_t i, xi, yi, N, K
int64_t minpv
float64_t[:, ::1] result
ndarray[uint8_t, ndim=2] mask
uint8_t[:, :] mask
int64_t nobs = 0
float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy

Expand Down Expand Up @@ -407,7 +407,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
Py_ssize_t i, xi, yi, N, K
ndarray[float64_t, ndim=2] result
ndarray[float64_t, ndim=2] ranked_mat
ndarray[float64_t, ndim=1] rankedx, rankedy
float64_t[::1] rankedx, rankedy
float64_t[::1] maskedx, maskedy
ndarray[uint8_t, ndim=2] mask
int64_t nobs = 0
Expand Down Expand Up @@ -566,8 +566,8 @@ def get_fill_indexer(const uint8_t[:] mask, limit=None):
@cython.boundscheck(False)
@cython.wraparound(False)
def pad(
ndarray[numeric_object_t] old,
ndarray[numeric_object_t] new,
const numeric_object_t[:] old,
const numeric_object_t[:] new,
limit=None
) -> ndarray:
# -> ndarray[intp_t, ndim=1]
Expand Down Expand Up @@ -691,8 +691,8 @@ def pad_2d_inplace(numeric_object_t[:, :] values, uint8_t[:, :] mask, limit=None
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill(
ndarray[numeric_object_t] old,
ndarray[numeric_object_t] new,
const numeric_object_t[:] old,
const numeric_object_t[:] new,
limit=None
) -> ndarray: # -> ndarray[intp_t, ndim=1]
"""
Expand Down Expand Up @@ -786,7 +786,7 @@ def backfill_2d_inplace(numeric_object_t[:, :] values,

@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike):
def is_monotonic(const numeric_object_t[:] arr, bint timelike):
"""
Returns
-------
Expand Down Expand Up @@ -1089,8 +1089,7 @@ cdef void rank_sorted_1d(
float64_t[::1] out,
int64_t[::1] grp_sizes,
const intp_t[:] sort_indexer,
# TODO(cython3): make const (https://github.com/cython/cython/issues/3222)
numeric_object_t[:] masked_vals,
const numeric_object_t[:] masked_vals,
const uint8_t[:] mask,
bint check_mask,
Py_ssize_t N,
Expand Down Expand Up @@ -1378,16 +1377,15 @@ ctypedef fused out_t:
@cython.boundscheck(False)
@cython.wraparound(False)
def diff_2d(
ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr"
const diff_t[:, :] arr,
ndarray[out_t, ndim=2] out,
Py_ssize_t periods,
int axis,
bint datetimelike=False,
):
cdef:
Py_ssize_t i, j, sx, sy, start, stop
bint f_contig = arr.flags.f_contiguous
# bint f_contig = arr.is_f_contig() # TODO(cython3) once arr is memoryview
bint f_contig = arr.is_f_contig()
diff_t left, right

# Disable for unsupported dtype combinations,
Expand Down
24 changes: 10 additions & 14 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -511,9 +511,9 @@ def group_shift_indexer(
@cython.wraparound(False)
@cython.boundscheck(False)
def group_fillna_indexer(
ndarray[intp_t] out,
ndarray[intp_t] labels,
ndarray[uint8_t] mask,
Py_ssize_t[::1] out,
const intp_t[::1] labels,
const uint8_t[:] mask,
int64_t limit,
bint compute_ffill,
int ngroups,
Expand Down Expand Up @@ -1179,13 +1179,13 @@ def group_ohlc(
@cython.boundscheck(False)
@cython.wraparound(False)
def group_quantile(
ndarray[float64_t, ndim=2] out,
float64_t[:, ::1] out,
ndarray[numeric_t, ndim=1] values,
ndarray[intp_t] labels,
const intp_t[::1] labels,
const uint8_t[:] mask,
const float64_t[:] qs,
ndarray[int64_t] starts,
ndarray[int64_t] ends,
const int64_t[::1] starts,
const int64_t[::1] ends,
str interpolation,
uint8_t[:, ::1] result_mask,
bint is_datetimelike,
Expand Down Expand Up @@ -1388,7 +1388,7 @@ cdef inline void _check_below_mincount(
uint8_t[:, ::1] result_mask,
Py_ssize_t ncounts,
Py_ssize_t K,
int64_t[:, ::1] nobs,
const int64_t[:, ::1] nobs,
int64_t min_count,
mincount_t[:, ::1] resx,
) noexcept:
Expand Down Expand Up @@ -1435,14 +1435,12 @@ cdef inline void _check_below_mincount(
out[i, j] = 0


# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
# use `const numeric_object_t[:, :] values`
@cython.wraparound(False)
@cython.boundscheck(False)
def group_last(
numeric_object_t[:, ::1] out,
int64_t[::1] counts,
ndarray[numeric_object_t, ndim=2] values,
const numeric_object_t[:, :] values,
const intp_t[::1] labels,
const uint8_t[:, :] mask,
uint8_t[:, ::1] result_mask=None,
Expand Down Expand Up @@ -1502,14 +1500,12 @@ def group_last(
)


# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
# use `const numeric_object_t[:, :] values`
@cython.wraparound(False)
@cython.boundscheck(False)
def group_nth(
numeric_object_t[:, ::1] out,
int64_t[::1] counts,
ndarray[numeric_object_t, ndim=2] values,
const numeric_object_t[:, :] values,
const intp_t[::1] labels,
const uint8_t[:, :] mask,
uint8_t[:, ::1] result_mask=None,
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import numpy as np

from numpy cimport (
import_array,
ndarray,
uint8_t,
uint64_t,
)
Expand All @@ -23,7 +22,7 @@ from pandas._libs.util cimport is_nan

@cython.boundscheck(False)
def hash_object_array(
ndarray[object] arr, str key, str encoding="utf8"
object[:] arr, str key, str encoding="utf8"
) -> np.ndarray[np.uint64]:
"""
Parameters
Expand Down
Loading