Skip to content

Commit 23cbc47

Browse files
authored
BUG: groupby.rank for dt64tz, period dtypes (#38187)
1 parent 4749fd6 commit 23cbc47

File tree

4 files changed

+79
-6
lines changed

4 files changed

+79
-6
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,7 @@ Groupby/resample/rolling
768768
- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
769769
- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
770770
- Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`)
771+
- Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`)
771772

772773
Reshaping
773774
^^^^^^^^^

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,7 @@ def _cython_transform(
995995

996996
try:
997997
result, _ = self.grouper._cython_operation(
998-
"transform", obj.values, how, axis, **kwargs
998+
"transform", obj._values, how, axis, **kwargs
999999
)
10001000
except NotImplementedError:
10011001
continue

pandas/core/groupby/ops.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -491,8 +491,11 @@ def _ea_wrap_cython_operation(
491491
res_values, names = self._cython_operation(
492492
kind, values, how, axis, min_count, **kwargs
493493
)
494+
if how in ["rank"]:
495+
# preserve float64 dtype
496+
return res_values, names
497+
494498
res_values = res_values.astype("i8", copy=False)
495-
# FIXME: this is wrong for rank, but not tested.
496499
result = type(orig_values)._simple_new(res_values, dtype=orig_values.dtype)
497500
return result, names
498501

pandas/tests/groupby/test_rank.py

+73-4
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,40 @@ def test_rank_apply():
4242
@pytest.mark.parametrize(
4343
"vals",
4444
[
45-
[2, 2, 8, 2, 6],
45+
np.array([2, 2, 8, 2, 6], dtype=dtype)
46+
for dtype in ["i8", "i4", "i2", "i1", "u8", "u4", "u2", "u1", "f8", "f4", "f2"]
47+
]
48+
+ [
4649
[
4750
pd.Timestamp("2018-01-02"),
4851
pd.Timestamp("2018-01-02"),
4952
pd.Timestamp("2018-01-08"),
5053
pd.Timestamp("2018-01-02"),
5154
pd.Timestamp("2018-01-06"),
5255
],
56+
[
57+
pd.Timestamp("2018-01-02", tz="US/Pacific"),
58+
pd.Timestamp("2018-01-02", tz="US/Pacific"),
59+
pd.Timestamp("2018-01-08", tz="US/Pacific"),
60+
pd.Timestamp("2018-01-02", tz="US/Pacific"),
61+
pd.Timestamp("2018-01-06", tz="US/Pacific"),
62+
],
63+
[
64+
pd.Timestamp("2018-01-02") - pd.Timestamp(0),
65+
pd.Timestamp("2018-01-02") - pd.Timestamp(0),
66+
pd.Timestamp("2018-01-08") - pd.Timestamp(0),
67+
pd.Timestamp("2018-01-02") - pd.Timestamp(0),
68+
pd.Timestamp("2018-01-06") - pd.Timestamp(0),
69+
],
70+
[
71+
pd.Timestamp("2018-01-02").to_period("D"),
72+
pd.Timestamp("2018-01-02").to_period("D"),
73+
pd.Timestamp("2018-01-08").to_period("D"),
74+
pd.Timestamp("2018-01-02").to_period("D"),
75+
pd.Timestamp("2018-01-06").to_period("D"),
76+
],
5377
],
78+
ids=lambda x: type(x[0]),
5479
)
5580
@pytest.mark.parametrize(
5681
"ties_method,ascending,pct,exp",
@@ -79,7 +104,12 @@ def test_rank_apply():
79104
)
80105
def test_rank_args(grps, vals, ties_method, ascending, pct, exp):
81106
key = np.repeat(grps, len(vals))
82-
vals = vals * len(grps)
107+
108+
orig_vals = vals
109+
vals = list(vals) * len(grps)
110+
if isinstance(orig_vals, np.ndarray):
111+
vals = np.array(vals, dtype=orig_vals.dtype)
112+
83113
df = DataFrame({"key": key, "val": vals})
84114
result = df.groupby("key").rank(method=ties_method, ascending=ascending, pct=pct)
85115

@@ -142,7 +172,10 @@ def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp):
142172
@pytest.mark.parametrize(
143173
"vals",
144174
[
145-
[2, 2, np.nan, 8, 2, 6, np.nan, np.nan],
175+
np.array([2, 2, np.nan, 8, 2, 6, np.nan, np.nan], dtype=dtype)
176+
for dtype in ["f8", "f4", "f2"]
177+
]
178+
+ [
146179
[
147180
pd.Timestamp("2018-01-02"),
148181
pd.Timestamp("2018-01-02"),
@@ -153,7 +186,38 @@ def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp):
153186
np.nan,
154187
np.nan,
155188
],
189+
[
190+
pd.Timestamp("2018-01-02", tz="US/Pacific"),
191+
pd.Timestamp("2018-01-02", tz="US/Pacific"),
192+
np.nan,
193+
pd.Timestamp("2018-01-08", tz="US/Pacific"),
194+
pd.Timestamp("2018-01-02", tz="US/Pacific"),
195+
pd.Timestamp("2018-01-06", tz="US/Pacific"),
196+
np.nan,
197+
np.nan,
198+
],
199+
[
200+
pd.Timestamp("2018-01-02") - pd.Timestamp(0),
201+
pd.Timestamp("2018-01-02") - pd.Timestamp(0),
202+
np.nan,
203+
pd.Timestamp("2018-01-08") - pd.Timestamp(0),
204+
pd.Timestamp("2018-01-02") - pd.Timestamp(0),
205+
pd.Timestamp("2018-01-06") - pd.Timestamp(0),
206+
np.nan,
207+
np.nan,
208+
],
209+
[
210+
pd.Timestamp("2018-01-02").to_period("D"),
211+
pd.Timestamp("2018-01-02").to_period("D"),
212+
np.nan,
213+
pd.Timestamp("2018-01-08").to_period("D"),
214+
pd.Timestamp("2018-01-02").to_period("D"),
215+
pd.Timestamp("2018-01-06").to_period("D"),
216+
np.nan,
217+
np.nan,
218+
],
156219
],
220+
ids=lambda x: type(x[0]),
157221
)
158222
@pytest.mark.parametrize(
159223
"ties_method,ascending,na_option,pct,exp",
@@ -346,7 +410,12 @@ def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp):
346410
)
347411
def test_rank_args_missing(grps, vals, ties_method, ascending, na_option, pct, exp):
348412
key = np.repeat(grps, len(vals))
349-
vals = vals * len(grps)
413+
414+
orig_vals = vals
415+
vals = list(vals) * len(grps)
416+
if isinstance(orig_vals, np.ndarray):
417+
vals = np.array(vals, dtype=orig_vals.dtype)
418+
350419
df = DataFrame({"key": key, "val": vals})
351420
result = df.groupby("key").rank(
352421
method=ties_method, ascending=ascending, na_option=na_option, pct=pct

0 commit comments

Comments
 (0)