Skip to content

Commit a93a1a7

Browse files
jbrockmendelJulianWgs
authored andcommitted
TYP: libgroupby int64->intp (pandas-dev#40635)
1 parent fc23ebd commit a93a1a7

14 files changed

+106
-100
lines changed

pandas/_libs/algos.pyx

+5-4
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ def nancorr_kendall(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarra
490490
int64_t total_discordant = 0
491491
float64_t kendall_tau
492492
int64_t n_obs
493-
const int64_t[:] labels_n
493+
const intp_t[:] labels_n
494494

495495
N, K = (<object>mat).shape
496496

@@ -499,7 +499,7 @@ def nancorr_kendall(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarra
499499

500500
ranked_mat = np.empty((N, K), dtype=np.float64)
501501
# For compatibility when calling rank_1d
502-
labels_n = np.zeros(N, dtype=np.int64)
502+
labels_n = np.zeros(N, dtype=np.intp)
503503

504504
for i in range(K):
505505
ranked_mat[:, i] = rank_1d(mat[:, i], labels_n)
@@ -961,7 +961,7 @@ ctypedef fused rank_t:
961961
@cython.boundscheck(False)
962962
def rank_1d(
963963
ndarray[rank_t, ndim=1] values,
964-
const int64_t[:] labels,
964+
const intp_t[:] labels,
965965
ties_method="average",
966966
bint ascending=True,
967967
bint pct=False,
@@ -973,7 +973,8 @@ def rank_1d(
973973
Parameters
974974
----------
975975
values : array of rank_t values to be ranked
976-
labels : array containing unique label for each group, with its ordering
976+
labels : np.ndarray[np.intp]
977+
Array containing unique label for each group, with its ordering
977978
matching up to the corresponding record in `values`. If not called
978979
from a groupby operation, will be an array of 0's
979980
ties_method : {'average', 'min', 'max', 'first', 'dense'}, default

pandas/_libs/groupby.pyx

+46-42
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
105105
def group_median_float64(ndarray[float64_t, ndim=2] out,
106106
ndarray[int64_t] counts,
107107
ndarray[float64_t, ndim=2] values,
108-
ndarray[int64_t] labels,
108+
ndarray[intp_t] labels,
109109
Py_ssize_t min_count=-1):
110110
"""
111111
Only aggregates on axis=0
@@ -122,7 +122,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
122122
ngroups = len(counts)
123123
N, K = (<object>values).shape
124124

125-
indexer, _counts = groupsort_indexer(ensure_platform_int(labels), ngroups)
125+
indexer, _counts = groupsort_indexer(labels, ngroups)
126126
counts[:] = _counts[1:]
127127

128128
data = np.empty((K, N), dtype=np.float64)
@@ -145,7 +145,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
145145
@cython.wraparound(False)
146146
def group_cumprod_float64(float64_t[:, ::1] out,
147147
const float64_t[:, :] values,
148-
const int64_t[:] labels,
148+
const intp_t[:] labels,
149149
int ngroups,
150150
bint is_datetimelike,
151151
bint skipna=True):
@@ -158,7 +158,7 @@ def group_cumprod_float64(float64_t[:, ::1] out,
158158
Array to store cumprod in.
159159
values : float64 array
160160
Values to take cumprod of.
161-
labels : int64 array
161+
labels : np.ndarray[np.intp]
162162
Labels to group by.
163163
ngroups : int
164164
Number of groups, larger than all entries of `labels`.
@@ -175,7 +175,7 @@ def group_cumprod_float64(float64_t[:, ::1] out,
175175
Py_ssize_t i, j, N, K, size
176176
float64_t val
177177
float64_t[:, ::1] accum
178-
int64_t lab
178+
intp_t lab
179179

180180
N, K = (<object>values).shape
181181
accum = np.ones((ngroups, K), dtype=np.float64)
@@ -202,7 +202,7 @@ def group_cumprod_float64(float64_t[:, ::1] out,
202202
@cython.wraparound(False)
203203
def group_cumsum(numeric[:, ::1] out,
204204
ndarray[numeric, ndim=2] values,
205-
const int64_t[:] labels,
205+
const intp_t[:] labels,
206206
int ngroups,
207207
is_datetimelike,
208208
bint skipna=True):
@@ -215,7 +215,7 @@ def group_cumsum(numeric[:, ::1] out,
215215
Array to store cumsum in.
216216
values : array
217217
Values to take cumsum of.
218-
labels : int64 array
218+
labels : np.ndarray[np.intp]
219219
Labels to group by.
220220
ngroups : int
221221
Number of groups, larger than all entries of `labels`.
@@ -232,7 +232,7 @@ def group_cumsum(numeric[:, ::1] out,
232232
Py_ssize_t i, j, N, K, size
233233
numeric val, y, t
234234
numeric[:, ::1] accum, compensation
235-
int64_t lab
235+
intp_t lab
236236

237237
N, K = (<object>values).shape
238238
accum = np.zeros((ngroups, K), dtype=np.asarray(values).dtype)
@@ -269,12 +269,12 @@ def group_cumsum(numeric[:, ::1] out,
269269

270270
@cython.boundscheck(False)
271271
@cython.wraparound(False)
272-
def group_shift_indexer(int64_t[::1] out, const int64_t[:] labels,
272+
def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels,
273273
int ngroups, int periods):
274274
cdef:
275-
Py_ssize_t N, i, j, ii
275+
Py_ssize_t N, i, j, ii, lab
276276
int offset = 0, sign
277-
int64_t lab, idxer, idxer_slot
277+
int64_t idxer, idxer_slot
278278
int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64)
279279
int64_t[:, ::1] label_indexer
280280

@@ -321,7 +321,7 @@ def group_shift_indexer(int64_t[::1] out, const int64_t[:] labels,
321321

322322
@cython.wraparound(False)
323323
@cython.boundscheck(False)
324-
def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
324+
def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
325325
ndarray[uint8_t] mask, object direction,
326326
int64_t limit, bint dropna):
327327
"""
@@ -331,8 +331,9 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
331331
----------
332332
out : array of int64_t values which this method will write its results to
333333
Missing values will be written to with a value of -1
334-
labels : array containing unique label for each group, with its ordering
335-
matching up to the corresponding record in `values`
334+
labels : np.ndarray[np.intp]
335+
Array containing unique label for each group, with its ordering
336+
matching up to the corresponding record in `values`.
336337
mask : array of int64_t values where a 1 indicates a missing value
337338
direction : {'ffill', 'bfill'}
338339
Direction for fill to be applied (forwards or backwards, respectively)
@@ -344,17 +345,18 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
344345
This method modifies the `out` parameter rather than returning an object
345346
"""
346347
cdef:
347-
Py_ssize_t i, N
348-
int64_t[:] sorted_labels
349-
int64_t idx, curr_fill_idx=-1, filled_vals=0
348+
Py_ssize_t i, N, idx
349+
intp_t[:] sorted_labels
350+
intp_t curr_fill_idx=-1
351+
int64_t filled_vals = 0
350352

351353
N = len(out)
352354

353355
# Make sure all arrays are the same size
354356
assert N == len(labels) == len(mask)
355357

356358
sorted_labels = np.argsort(labels, kind='mergesort').astype(
357-
np.int64, copy=False)
359+
np.intp, copy=False)
358360
if direction == 'bfill':
359361
sorted_labels = sorted_labels[::-1]
360362

@@ -385,7 +387,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
385387
@cython.wraparound(False)
386388
def group_any_all(uint8_t[::1] out,
387389
const uint8_t[::1] values,
388-
const int64_t[:] labels,
390+
const intp_t[:] labels,
389391
const uint8_t[::1] mask,
390392
object val_test,
391393
bint skipna):
@@ -395,7 +397,8 @@ def group_any_all(uint8_t[::1] out,
395397
Parameters
396398
----------
397399
out : array of values which this method will write its results to
398-
labels : array containing unique label for each group, with its
400+
labels : np.ndarray[np.intp]
401+
Array containing unique label for each group, with its
399402
ordering matching up to the corresponding record in `values`
400403
values : array containing the truth value of each element
401404
mask : array indicating whether a value is na or not
@@ -411,7 +414,7 @@ def group_any_all(uint8_t[::1] out,
411414
"""
412415
cdef:
413416
Py_ssize_t i, N = len(labels)
414-
int64_t lab
417+
intp_t lab
415418
uint8_t flag_val
416419

417420
if val_test == 'all':
@@ -455,7 +458,7 @@ ctypedef fused complexfloating_t:
455458
def _group_add(complexfloating_t[:, ::1] out,
456459
int64_t[::1] counts,
457460
ndarray[complexfloating_t, ndim=2] values,
458-
const int64_t[:] labels,
461+
const intp_t[:] labels,
459462
Py_ssize_t min_count=0):
460463
"""
461464
Only aggregates on axis=0 using Kahan summation
@@ -514,7 +517,7 @@ group_add_complex128 = _group_add['double complex']
514517
def _group_prod(floating[:, ::1] out,
515518
int64_t[::1] counts,
516519
ndarray[floating, ndim=2] values,
517-
const int64_t[:] labels,
520+
const intp_t[:] labels,
518521
Py_ssize_t min_count=0):
519522
"""
520523
Only aggregates on axis=0
@@ -567,7 +570,7 @@ group_prod_float64 = _group_prod['double']
567570
def _group_var(floating[:, ::1] out,
568571
int64_t[::1] counts,
569572
ndarray[floating, ndim=2] values,
570-
const int64_t[:] labels,
573+
const intp_t[:] labels,
571574
Py_ssize_t min_count=-1,
572575
int64_t ddof=1):
573576
cdef:
@@ -625,7 +628,7 @@ group_var_float64 = _group_var['double']
625628
def _group_mean(floating[:, ::1] out,
626629
int64_t[::1] counts,
627630
ndarray[floating, ndim=2] values,
628-
const int64_t[::1] labels,
631+
const intp_t[::1] labels,
629632
Py_ssize_t min_count=-1):
630633
cdef:
631634
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
@@ -681,7 +684,7 @@ group_mean_float64 = _group_mean['double']
681684
def _group_ohlc(floating[:, ::1] out,
682685
int64_t[::1] counts,
683686
ndarray[floating, ndim=2] values,
684-
const int64_t[:] labels,
687+
const intp_t[:] labels,
685688
Py_ssize_t min_count=-1):
686689
"""
687690
Only aggregates on axis=0
@@ -732,7 +735,7 @@ group_ohlc_float64 = _group_ohlc['double']
732735
@cython.wraparound(False)
733736
def group_quantile(ndarray[float64_t] out,
734737
ndarray[numeric, ndim=1] values,
735-
ndarray[int64_t] labels,
738+
ndarray[intp_t] labels,
736739
ndarray[uint8_t] mask,
737740
float64_t q,
738741
object interpolation):
@@ -743,7 +746,7 @@ def group_quantile(ndarray[float64_t] out,
743746
----------
744747
out : ndarray
745748
Array of aggregated values that will be written to.
746-
labels : ndarray
749+
labels : ndarray[np.intp]
747750
Array containing the unique group labels.
748751
values : ndarray
749752
Array containing the values to apply the function against.
@@ -758,7 +761,7 @@ def group_quantile(ndarray[float64_t] out,
758761
cdef:
759762
Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz
760763
Py_ssize_t grp_start=0, idx=0
761-
int64_t lab
764+
intp_t lab
762765
uint8_t interp
763766
float64_t q_idx, frac, val, next_val
764767
ndarray[int64_t] counts, non_na_counts, sort_arr
@@ -875,7 +878,7 @@ cdef inline bint _treat_as_na(rank_t val, bint is_datetimelike) nogil:
875878
def group_last(rank_t[:, ::1] out,
876879
int64_t[::1] counts,
877880
ndarray[rank_t, ndim=2] values,
878-
const int64_t[:] labels,
881+
const intp_t[:] labels,
879882
Py_ssize_t min_count=-1):
880883
"""
881884
Only aggregates on axis=0
@@ -967,7 +970,7 @@ def group_last(rank_t[:, ::1] out,
967970
def group_nth(rank_t[:, ::1] out,
968971
int64_t[::1] counts,
969972
ndarray[rank_t, ndim=2] values,
970-
const int64_t[:] labels,
973+
const intp_t[:] labels,
971974
int64_t min_count=-1, int64_t rank=1
972975
):
973976
"""
@@ -1059,7 +1062,7 @@ def group_nth(rank_t[:, ::1] out,
10591062
@cython.wraparound(False)
10601063
def group_rank(float64_t[:, ::1] out,
10611064
ndarray[rank_t, ndim=2] values,
1062-
const int64_t[:] labels,
1065+
const intp_t[:] labels,
10631066
int ngroups,
10641067
bint is_datetimelike, object ties_method="average",
10651068
bint ascending=True, bint pct=False, object na_option="keep"):
@@ -1070,7 +1073,8 @@ def group_rank(float64_t[:, ::1] out,
10701073
----------
10711074
out : array of float64_t values which this method will write its results to
10721075
values : array of rank_t values to be ranked
1073-
labels : array containing unique label for each group, with its ordering
1076+
labels : np.ndarray[np.intp]
1077+
Array containing unique label for each group, with its ordering
10741078
matching up to the corresponding record in `values`
10751079
ngroups : int
10761080
This parameter is not used, is needed to match signatures of other
@@ -1131,7 +1135,7 @@ ctypedef fused groupby_t:
11311135
cdef group_min_max(groupby_t[:, ::1] out,
11321136
int64_t[::1] counts,
11331137
ndarray[groupby_t, ndim=2] values,
1134-
const int64_t[:] labels,
1138+
const intp_t[:] labels,
11351139
Py_ssize_t min_count=-1,
11361140
bint compute_max=True):
11371141
"""
@@ -1145,7 +1149,7 @@ cdef group_min_max(groupby_t[:, ::1] out,
11451149
Input as a zeroed array, populated by group sizes during algorithm
11461150
values : array
11471151
Values to find column-wise min/max of.
1148-
labels : int64 array
1152+
labels : np.ndarray[np.intp]
11491153
Labels to group by.
11501154
min_count : Py_ssize_t, default -1
11511155
The minimum number of non-NA group elements, NA result if threshold
@@ -1230,7 +1234,7 @@ cdef group_min_max(groupby_t[:, ::1] out,
12301234
def group_max(groupby_t[:, ::1] out,
12311235
int64_t[::1] counts,
12321236
ndarray[groupby_t, ndim=2] values,
1233-
const int64_t[:] labels,
1237+
const intp_t[:] labels,
12341238
Py_ssize_t min_count=-1):
12351239
"""See group_min_max.__doc__"""
12361240
group_min_max(out, counts, values, labels, min_count=min_count, compute_max=True)
@@ -1241,7 +1245,7 @@ def group_max(groupby_t[:, ::1] out,
12411245
def group_min(groupby_t[:, ::1] out,
12421246
int64_t[::1] counts,
12431247
ndarray[groupby_t, ndim=2] values,
1244-
const int64_t[:] labels,
1248+
const intp_t[:] labels,
12451249
Py_ssize_t min_count=-1):
12461250
"""See group_min_max.__doc__"""
12471251
group_min_max(out, counts, values, labels, min_count=min_count, compute_max=False)
@@ -1251,7 +1255,7 @@ def group_min(groupby_t[:, ::1] out,
12511255
@cython.wraparound(False)
12521256
def group_cummin_max(groupby_t[:, ::1] out,
12531257
ndarray[groupby_t, ndim=2] values,
1254-
const int64_t[:] labels,
1258+
const intp_t[:] labels,
12551259
int ngroups,
12561260
bint is_datetimelike,
12571261
bint compute_max):
@@ -1264,7 +1268,7 @@ def group_cummin_max(groupby_t[:, ::1] out,
12641268
Array to store cummin/max in.
12651269
values : array
12661270
Values to take cummin/max of.
1267-
labels : int64 array
1271+
labels : np.ndarray[np.intp]
12681272
Labels to group by.
12691273
ngroups : int
12701274
Number of groups, larger than all entries of `labels`.
@@ -1282,7 +1286,7 @@ def group_cummin_max(groupby_t[:, ::1] out,
12821286
Py_ssize_t i, j, N, K, size
12831287
groupby_t val, mval
12841288
ndarray[groupby_t, ndim=2] accum
1285-
int64_t lab
1289+
intp_t lab
12861290

12871291
N, K = (<object>values).shape
12881292
accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype)
@@ -1319,7 +1323,7 @@ def group_cummin_max(groupby_t[:, ::1] out,
13191323
@cython.wraparound(False)
13201324
def group_cummin(groupby_t[:, ::1] out,
13211325
ndarray[groupby_t, ndim=2] values,
1322-
const int64_t[:] labels,
1326+
const intp_t[:] labels,
13231327
int ngroups,
13241328
bint is_datetimelike):
13251329
"""See group_cummin_max.__doc__"""
@@ -1330,7 +1334,7 @@ def group_cummin(groupby_t[:, ::1] out,
13301334
@cython.wraparound(False)
13311335
def group_cummax(groupby_t[:, ::1] out,
13321336
ndarray[groupby_t, ndim=2] values,
1333-
const int64_t[:] labels,
1337+
const intp_t[:] labels,
13341338
int ngroups,
13351339
bint is_datetimelike):
13361340
"""See group_cummin_max.__doc__"""

0 commit comments

Comments
 (0)