Skip to content

Commit 7d6a80d

Browse files
committed
PERF: declare contiguity in libgroubpy (pandas-dev#40295)
1 parent 2e8c134 commit 7d6a80d

File tree

4 files changed

+48
-46
lines changed

4 files changed

+48
-46
lines changed

pandas/_libs/algos_common_helper.pxi.in

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ def ensure_platform_int(object arr):
1818
if (<ndarray>arr).descr.type_num == PLATFORM_INT:
1919
return arr
2020
else:
21-
return arr.astype(np.intp)
21+
# equiv: arr.astype(np.intp)
22+
return cnp.PyArray_Cast(<ndarray>arr, PLATFORM_INT)
2223
else:
2324
return np.array(arr, dtype=np.intp)
2425

@@ -28,7 +29,8 @@ def ensure_object(object arr):
2829
if (<ndarray>arr).descr.type_num == NPY_OBJECT:
2930
return arr
3031
else:
31-
return arr.astype(np.object_)
32+
# equiv: arr.astype(object)
33+
return cnp.PyArray_Cast(<ndarray>arr, NPY_OBJECT)
3234
else:
3335
return np.array(arr, dtype=np.object_)
3436

pandas/_libs/groupby.pyx

+42-42
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
169169

170170
@cython.boundscheck(False)
171171
@cython.wraparound(False)
172-
def group_cumprod_float64(float64_t[:, :] out,
172+
def group_cumprod_float64(float64_t[:, ::1] out,
173173
const float64_t[:, :] values,
174174
const int64_t[:] labels,
175175
int ngroups,
@@ -200,7 +200,7 @@ def group_cumprod_float64(float64_t[:, :] out,
200200
cdef:
201201
Py_ssize_t i, j, N, K, size
202202
float64_t val
203-
float64_t[:, :] accum
203+
float64_t[:, ::1] accum
204204
int64_t lab
205205

206206
N, K = (<object>values).shape
@@ -226,7 +226,7 @@ def group_cumprod_float64(float64_t[:, :] out,
226226

227227
@cython.boundscheck(False)
228228
@cython.wraparound(False)
229-
def group_cumsum(numeric[:, :] out,
229+
def group_cumsum(numeric[:, ::1] out,
230230
ndarray[numeric, ndim=2] values,
231231
const int64_t[:] labels,
232232
int ngroups,
@@ -257,7 +257,7 @@ def group_cumsum(numeric[:, :] out,
257257
cdef:
258258
Py_ssize_t i, j, N, K, size
259259
numeric val, y, t
260-
numeric[:, :] accum, compensation
260+
numeric[:, ::1] accum, compensation
261261
int64_t lab
262262

263263
N, K = (<object>values).shape
@@ -295,14 +295,14 @@ def group_cumsum(numeric[:, :] out,
295295

296296
@cython.boundscheck(False)
297297
@cython.wraparound(False)
298-
def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
298+
def group_shift_indexer(int64_t[::1] out, const int64_t[:] labels,
299299
int ngroups, int periods):
300300
cdef:
301301
Py_ssize_t N, i, j, ii
302302
int offset = 0, sign
303303
int64_t lab, idxer, idxer_slot
304-
int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
305-
int64_t[:, :] label_indexer
304+
int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64)
305+
int64_t[:, ::1] label_indexer
306306

307307
N, = (<object>labels).shape
308308

@@ -409,10 +409,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
409409

410410
@cython.boundscheck(False)
411411
@cython.wraparound(False)
412-
def group_any_all(uint8_t[:] out,
413-
const uint8_t[:] values,
412+
def group_any_all(uint8_t[::1] out,
413+
const uint8_t[::1] values,
414414
const int64_t[:] labels,
415-
const uint8_t[:] mask,
415+
const uint8_t[::1] mask,
416416
object val_test,
417417
bint skipna):
418418
"""
@@ -478,8 +478,8 @@ ctypedef fused complexfloating_t:
478478

479479
@cython.wraparound(False)
480480
@cython.boundscheck(False)
481-
def _group_add(complexfloating_t[:, :] out,
482-
int64_t[:] counts,
481+
def _group_add(complexfloating_t[:, ::1] out,
482+
int64_t[::1] counts,
483483
ndarray[complexfloating_t, ndim=2] values,
484484
const int64_t[:] labels,
485485
Py_ssize_t min_count=0):
@@ -489,8 +489,8 @@ def _group_add(complexfloating_t[:, :] out,
489489
cdef:
490490
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
491491
complexfloating_t val, count, t, y
492-
complexfloating_t[:, :] sumx, compensation
493-
int64_t[:, :] nobs
492+
complexfloating_t[:, ::1] sumx, compensation
493+
int64_t[:, ::1] nobs
494494
Py_ssize_t len_values = len(values), len_labels = len(labels)
495495

496496
if len_values != len_labels:
@@ -537,8 +537,8 @@ group_add_complex128 = _group_add['double complex']
537537

538538
@cython.wraparound(False)
539539
@cython.boundscheck(False)
540-
def _group_prod(floating[:, :] out,
541-
int64_t[:] counts,
540+
def _group_prod(floating[:, ::1] out,
541+
int64_t[::1] counts,
542542
ndarray[floating, ndim=2] values,
543543
const int64_t[:] labels,
544544
Py_ssize_t min_count=0):
@@ -548,8 +548,8 @@ def _group_prod(floating[:, :] out,
548548
cdef:
549549
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
550550
floating val, count
551-
floating[:, :] prodx
552-
int64_t[:, :] nobs
551+
floating[:, ::1] prodx
552+
int64_t[:, ::1] nobs
553553
Py_ssize_t len_values = len(values), len_labels = len(labels)
554554

555555
if len_values != len_labels:
@@ -590,17 +590,17 @@ group_prod_float64 = _group_prod['double']
590590
@cython.wraparound(False)
591591
@cython.boundscheck(False)
592592
@cython.cdivision(True)
593-
def _group_var(floating[:, :] out,
594-
int64_t[:] counts,
593+
def _group_var(floating[:, ::1] out,
594+
int64_t[::1] counts,
595595
ndarray[floating, ndim=2] values,
596596
const int64_t[:] labels,
597597
Py_ssize_t min_count=-1,
598598
int64_t ddof=1):
599599
cdef:
600600
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
601601
floating val, ct, oldmean
602-
floating[:, :] mean
603-
int64_t[:, :] nobs
602+
floating[:, ::1] mean
603+
int64_t[:, ::1] nobs
604604
Py_ssize_t len_values = len(values), len_labels = len(labels)
605605

606606
assert min_count == -1, "'min_count' only used in add and prod"
@@ -648,16 +648,16 @@ group_var_float64 = _group_var['double']
648648

649649
@cython.wraparound(False)
650650
@cython.boundscheck(False)
651-
def _group_mean(floating[:, :] out,
652-
int64_t[:] counts,
651+
def _group_mean(floating[:, ::1] out,
652+
int64_t[::1] counts,
653653
ndarray[floating, ndim=2] values,
654-
const int64_t[:] labels,
654+
const int64_t[::1] labels,
655655
Py_ssize_t min_count=-1):
656656
cdef:
657657
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
658658
floating val, count, y, t
659-
floating[:, :] sumx, compensation
660-
int64_t[:, :] nobs
659+
floating[:, ::1] sumx, compensation
660+
int64_t[:, ::1] nobs
661661
Py_ssize_t len_values = len(values), len_labels = len(labels)
662662

663663
assert min_count == -1, "'min_count' only used in add and prod"
@@ -704,8 +704,8 @@ group_mean_float64 = _group_mean['double']
704704

705705
@cython.wraparound(False)
706706
@cython.boundscheck(False)
707-
def _group_ohlc(floating[:, :] out,
708-
int64_t[:] counts,
707+
def _group_ohlc(floating[:, ::1] out,
708+
int64_t[::1] counts,
709709
ndarray[floating, ndim=2] values,
710710
const int64_t[:] labels,
711711
Py_ssize_t min_count=-1):
@@ -898,8 +898,8 @@ cdef inline bint _treat_as_na(rank_t val, bint is_datetimelike) nogil:
898898
# use `const rank_t[:, :] values`
899899
@cython.wraparound(False)
900900
@cython.boundscheck(False)
901-
def group_last(rank_t[:, :] out,
902-
int64_t[:] counts,
901+
def group_last(rank_t[:, ::1] out,
902+
int64_t[::1] counts,
903903
ndarray[rank_t, ndim=2] values,
904904
const int64_t[:] labels,
905905
Py_ssize_t min_count=-1):
@@ -990,8 +990,8 @@ def group_last(rank_t[:, :] out,
990990
# use `const rank_t[:, :] values`
991991
@cython.wraparound(False)
992992
@cython.boundscheck(False)
993-
def group_nth(rank_t[:, :] out,
994-
int64_t[:] counts,
993+
def group_nth(rank_t[:, ::1] out,
994+
int64_t[::1] counts,
995995
ndarray[rank_t, ndim=2] values,
996996
const int64_t[:] labels,
997997
int64_t min_count=-1, int64_t rank=1
@@ -1083,7 +1083,7 @@ def group_nth(rank_t[:, :] out,
10831083

10841084
@cython.boundscheck(False)
10851085
@cython.wraparound(False)
1086-
def group_rank(float64_t[:, :] out,
1086+
def group_rank(float64_t[:, ::1] out,
10871087
ndarray[rank_t, ndim=2] values,
10881088
const int64_t[:] labels,
10891089
int ngroups,
@@ -1154,8 +1154,8 @@ ctypedef fused groupby_t:
11541154

11551155
@cython.wraparound(False)
11561156
@cython.boundscheck(False)
1157-
def group_max(groupby_t[:, :] out,
1158-
int64_t[:] counts,
1157+
def group_max(groupby_t[:, ::1] out,
1158+
int64_t[::1] counts,
11591159
ndarray[groupby_t, ndim=2] values,
11601160
const int64_t[:] labels,
11611161
Py_ssize_t min_count=-1):
@@ -1167,7 +1167,7 @@ def group_max(groupby_t[:, :] out,
11671167
groupby_t val, count, nan_val
11681168
ndarray[groupby_t, ndim=2] maxx
11691169
bint runtime_error = False
1170-
int64_t[:, :] nobs
1170+
int64_t[:, ::1] nobs
11711171

11721172
# TODO(cython 3.0):
11731173
# Instead of `labels.shape[0]` use `len(labels)`
@@ -1229,8 +1229,8 @@ def group_max(groupby_t[:, :] out,
12291229

12301230
@cython.wraparound(False)
12311231
@cython.boundscheck(False)
1232-
def group_min(groupby_t[:, :] out,
1233-
int64_t[:] counts,
1232+
def group_min(groupby_t[:, ::1] out,
1233+
int64_t[::1] counts,
12341234
ndarray[groupby_t, ndim=2] values,
12351235
const int64_t[:] labels,
12361236
Py_ssize_t min_count=-1):
@@ -1242,7 +1242,7 @@ def group_min(groupby_t[:, :] out,
12421242
groupby_t val, count, nan_val
12431243
ndarray[groupby_t, ndim=2] minx
12441244
bint runtime_error = False
1245-
int64_t[:, :] nobs
1245+
int64_t[:, ::1] nobs
12461246

12471247
# TODO(cython 3.0):
12481248
# Instead of `labels.shape[0]` use `len(labels)`
@@ -1302,7 +1302,7 @@ def group_min(groupby_t[:, :] out,
13021302

13031303
@cython.boundscheck(False)
13041304
@cython.wraparound(False)
1305-
def group_cummin(groupby_t[:, :] out,
1305+
def group_cummin(groupby_t[:, ::1] out,
13061306
ndarray[groupby_t, ndim=2] values,
13071307
const int64_t[:] labels,
13081308
int ngroups,
@@ -1362,7 +1362,7 @@ def group_cummin(groupby_t[:, :] out,
13621362

13631363
@cython.boundscheck(False)
13641364
@cython.wraparound(False)
1365-
def group_cummax(groupby_t[:, :] out,
1365+
def group_cummax(groupby_t[:, ::1] out,
13661366
ndarray[groupby_t, ndim=2] values,
13671367
const int64_t[:] labels,
13681368
int ngroups,

pandas/_libs/missing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def _create_binary_propagating_op(name, is_divmod=False):
390390
return method
391391

392392

393-
def _create_unary_propagating_op(name):
393+
def _create_unary_propagating_op(name: str):
394394
def method(self):
395395
return NA
396396

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ def _cython_operation(
652652
result = self._aggregate(result, counts, values, codes, func, min_count)
653653
elif kind == "transform":
654654
result = maybe_fill(
655-
np.empty_like(values, dtype=out_dtype), fill_value=np.nan
655+
np.empty(values.shape, dtype=out_dtype), fill_value=np.nan
656656
)
657657

658658
# TODO: min_count

0 commit comments

Comments
 (0)