Skip to content

Commit 5b48692

Browse files
jbrockmendelNico Cernek
authored and
Nico Cernek
committed
REF: avoid getattr pattern for rank_1d, rank_2d (pandas-dev#29137)
1 parent 0bc506b commit 5b48692

File tree

4 files changed

+33
-49
lines changed

4 files changed

+33
-49
lines changed

pandas/_libs/algos.pyx

+5-17
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
311311
ranked_mat = np.empty((N, K), dtype=np.float64)
312312

313313
for i in range(K):
314-
ranked_mat[:, i] = rank_1d_float64(mat[:, i])
314+
ranked_mat[:, i] = rank_1d(mat[:, i])
315315

316316
for xi in range(K):
317317
for yi in range(xi + 1):
@@ -337,8 +337,8 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
337337
j += 1
338338

339339
if not all_ranks:
340-
maskedx = rank_1d_float64(maskedx)
341-
maskedy = rank_1d_float64(maskedy)
340+
maskedx = rank_1d(maskedx)
341+
maskedy = rank_1d(maskedy)
342342

343343
mean = (nobs + 1) / 2.
344344

@@ -1005,12 +1005,6 @@ def rank_1d(rank_t[:] in_arr, ties_method='average',
10051005
return ranks
10061006

10071007

1008-
rank_1d_object = rank_1d["object"]
1009-
rank_1d_float64 = rank_1d["float64_t"]
1010-
rank_1d_uint64 = rank_1d["uint64_t"]
1011-
rank_1d_int64 = rank_1d["int64_t"]
1012-
1013-
10141008
def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
10151009
ascending=True, na_option='keep', pct=False):
10161010
"""
@@ -1083,8 +1077,8 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
10831077
except TypeError:
10841078
values = in_arr
10851079
for i in range(len(values)):
1086-
ranks[i] = rank_1d_object(in_arr[i], ties_method=ties_method,
1087-
ascending=ascending, pct=pct)
1080+
ranks[i] = rank_1d(in_arr[i], ties_method=ties_method,
1081+
ascending=ascending, pct=pct)
10881082
if axis == 0:
10891083
return ranks.T
10901084
else:
@@ -1179,12 +1173,6 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
11791173
return ranks
11801174

11811175

1182-
rank_2d_object = rank_2d["object"]
1183-
rank_2d_float64 = rank_2d["float64_t"]
1184-
rank_2d_uint64 = rank_2d["uint64_t"]
1185-
rank_2d_int64 = rank_2d["int64_t"]
1186-
1187-
11881176
# generated from template
11891177
include "algos_common_helper.pxi"
11901178
include "algos_take_helper.pxi"

pandas/_libs/groupby.pyx

+16-11
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def _group_add(complexfloating_t[:, :] out,
455455
if len(values) != len(labels):
456456
raise ValueError("len(index) != len(labels)")
457457

458-
nobs = np.zeros((len(out), out.shape[1]), dtype=np.int64)
458+
nobs = np.zeros((<object>out).shape, dtype=np.int64)
459459
sumx = np.zeros_like(out)
460460

461461
N, K = (<object>values).shape
@@ -507,12 +507,13 @@ def _group_prod(floating[:, :] out,
507507
cdef:
508508
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
509509
floating val, count
510-
floating[:, :] prodx, nobs
510+
floating[:, :] prodx
511+
int64_t[:, :] nobs
511512

512513
if not len(values) == len(labels):
513514
raise ValueError("len(index) != len(labels)")
514515

515-
nobs = np.zeros_like(out)
516+
nobs = np.zeros((<object>out).shape, dtype=np.int64)
516517
prodx = np.ones_like(out)
517518

518519
N, K = (<object>values).shape
@@ -555,14 +556,15 @@ def _group_var(floating[:, :] out,
555556
cdef:
556557
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
557558
floating val, ct, oldmean
558-
floating[:, :] nobs, mean
559+
floating[:, :] mean
560+
int64_t[:, :] nobs
559561

560562
assert min_count == -1, "'min_count' only used in add and prod"
561563

562564
if not len(values) == len(labels):
563565
raise ValueError("len(index) != len(labels)")
564566

565-
nobs = np.zeros_like(out)
567+
nobs = np.zeros((<object>out).shape, dtype=np.int64)
566568
mean = np.zeros_like(out)
567569

568570
N, K = (<object>values).shape
@@ -610,14 +612,15 @@ def _group_mean(floating[:, :] out,
610612
cdef:
611613
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
612614
floating val, count
613-
floating[:, :] sumx, nobs
615+
floating[:, :] sumx
616+
int64_t[:, :] nobs
614617

615618
assert min_count == -1, "'min_count' only used in add and prod"
616619

617620
if not len(values) == len(labels):
618621
raise ValueError("len(index) != len(labels)")
619622

620-
nobs = np.zeros_like(out)
623+
nobs = np.zeros((<object>out).shape, dtype=np.int64)
621624
sumx = np.zeros_like(out)
622625

623626
N, K = (<object>values).shape
@@ -1243,15 +1246,16 @@ def group_max(groupby_t[:, :] out,
12431246
cdef:
12441247
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
12451248
groupby_t val, count, nan_val
1246-
ndarray[groupby_t, ndim=2] maxx, nobs
1249+
ndarray[groupby_t, ndim=2] maxx
12471250
bint runtime_error = False
1251+
int64_t[:, :] nobs
12481252

12491253
assert min_count == -1, "'min_count' only used in add and prod"
12501254

12511255
if not len(values) == len(labels):
12521256
raise AssertionError("len(index) != len(labels)")
12531257

1254-
nobs = np.zeros_like(out)
1258+
nobs = np.zeros((<object>out).shape, dtype=np.int64)
12551259

12561260
maxx = np.empty_like(out)
12571261
if groupby_t is int64_t:
@@ -1314,15 +1318,16 @@ def group_min(groupby_t[:, :] out,
13141318
cdef:
13151319
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
13161320
groupby_t val, count, nan_val
1317-
ndarray[groupby_t, ndim=2] minx, nobs
1321+
ndarray[groupby_t, ndim=2] minx
13181322
bint runtime_error = False
1323+
int64_t[:, :] nobs
13191324

13201325
assert min_count == -1, "'min_count' only used in add and prod"
13211326

13221327
if not len(values) == len(labels):
13231328
raise AssertionError("len(index) != len(labels)")
13241329

1325-
nobs = np.zeros_like(out)
1330+
nobs = np.zeros((<object>out).shape, dtype=np.int64)
13261331

13271332
minx = np.empty_like(out)
13281333
if groupby_t is int64_t:

pandas/core/algorithms.py

+11-20
Original file line numberDiff line numberDiff line change
@@ -245,11 +245,17 @@ def _get_hashtable_algo(values):
245245
return (htable, table, values, dtype, ndtype)
246246

247247

248-
def _get_data_algo(values, func_map):
248+
def _get_values_for_rank(values):
249249
if is_categorical_dtype(values):
250250
values = values._values_for_rank()
251251

252252
values, dtype, ndtype = _ensure_data(values)
253+
return values, dtype, ndtype
254+
255+
256+
def _get_data_algo(values, func_map):
257+
values, dtype, ndtype = _get_values_for_rank(values)
258+
253259
if ndtype == "object":
254260

255261
# it's cheaper to use a String Hash Table than Object; we infer
@@ -900,17 +906,17 @@ def rank(values, axis=0, method="average", na_option="keep", ascending=True, pct
900906
(e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
901907
"""
902908
if values.ndim == 1:
903-
f, values = _get_data_algo(values, _rank1d_functions)
904-
ranks = f(
909+
values, _, _ = _get_values_for_rank(values)
910+
ranks = algos.rank_1d(
905911
values,
906912
ties_method=method,
907913
ascending=ascending,
908914
na_option=na_option,
909915
pct=pct,
910916
)
911917
elif values.ndim == 2:
912-
f, values = _get_data_algo(values, _rank2d_functions)
913-
ranks = f(
918+
values, _, _ = _get_values_for_rank(values)
919+
ranks = algos.rank_2d(
914920
values,
915921
axis=axis,
916922
ties_method=method,
@@ -1000,21 +1006,6 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
10001006
return arr + b
10011007

10021008

1003-
_rank1d_functions = {
1004-
"float64": algos.rank_1d_float64,
1005-
"int64": algos.rank_1d_int64,
1006-
"uint64": algos.rank_1d_uint64,
1007-
"object": algos.rank_1d_object,
1008-
}
1009-
1010-
_rank2d_functions = {
1011-
"float64": algos.rank_2d_float64,
1012-
"int64": algos.rank_2d_int64,
1013-
"uint64": algos.rank_2d_uint64,
1014-
"object": algos.rank_2d_object,
1015-
}
1016-
1017-
10181009
def quantile(x, q, interpolation_method="fraction"):
10191010
"""
10201011
Compute sample quantile or quantiles of the input array. For example, q=0.5

pandas/tests/test_algos.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1619,7 +1619,7 @@ def test_scipy_compat(self):
16191619
def _check(arr):
16201620
mask = ~np.isfinite(arr)
16211621
arr = arr.copy()
1622-
result = libalgos.rank_1d_float64(arr)
1622+
result = libalgos.rank_1d(arr)
16231623
arr[mask] = np.inf
16241624
exp = rankdata(arr)
16251625
exp[mask] = np.nan

0 commit comments

Comments
 (0)