From 1ad607a52664a3cbd1be452e7580df99af726001 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 10 Dec 2021 00:22:30 +0000 Subject: [PATCH 1/2] Backport PR #44828: REGR: resampling DataFrame with DateTimeIndex with empty groups and uint8, uint16 or uint32 columns incorrectly raising RuntimeError --- doc/source/whatsnew/v1.3.5.rst | 1 + pandas/core/groupby/ops.py | 7 +++--- pandas/tests/resample/test_datetime_index.py | 24 ++++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index 048cd978c4478..49d37eff63323 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`) - Fixed regression in :func:`merge_asof` raising error when array was supplied as join key (:issue:`42844`) +- Fixed regression when resampling :class:`DataFrame` with :class:`DateTimeIndex` with empty groups and ``uint8``, ``uint16`` or ``uint32`` columns incorrectly raising ``RuntimeError`` (:issue:`43329`) - Fixed regression in creating a :class:`DataFrame` from a timezone-aware :class:`Timestamp` scalar near a Daylight Savings Time transition (:issue:`42505`) - Fixed performance regression in :func:`read_csv` (:issue:`44106`) - Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index dbc225454766d..c12ec8043b61a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -546,9 +546,10 @@ def _call_cython_op( elif is_bool_dtype(dtype): values = values.astype("int64") elif is_integer_dtype(dtype): - # e.g. uint8 -> uint64, int16 -> int64 - dtype_str = dtype.kind + "8" - values = values.astype(dtype_str, copy=False) + # GH#43329 If the dtype is explicitly of type uint64 the type is not + # changed to prevent overflow. + if dtype != np.uint64: + values = values.astype(np.int64, copy=False) elif is_numeric: if not is_complex_dtype(dtype): values = ensure_float64(values) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 5594659fb4b03..614780604a8ee 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1827,3 +1827,27 @@ def test_resample_aggregate_functions_min_count(func): index=DatetimeIndex(["2020-03-31"], dtype="datetime64[ns]", freq="Q-DEC"), ) tm.assert_series_equal(result, expected) + + +def test_resample_unsigned_int(any_unsigned_int_numpy_dtype): + # gh-43329 + df = DataFrame( + index=date_range(start="2000-01-01", end="2000-01-03 23", freq="12H"), + columns=["x"], + data=[0, 1, 0] * 2, + dtype=any_unsigned_int_numpy_dtype, + ) + df = df.loc[(df.index < "2000-01-02") | (df.index > "2000-01-03"), :] + + if any_unsigned_int_numpy_dtype == "uint64": + with pytest.raises(RuntimeError, match="empty group with uint64_t"): + result = df.resample("D").max() + else: + result = df.resample("D").max() + + expected = DataFrame( + [1, np.nan, 0], + columns=["x"], + index=date_range(start="2000-01-01", end="2000-01-03 23", freq="D"), + ) + tm.assert_frame_equal(result, expected) From 90bb167ddd8237d4c58d8618771eb727e3d56b0a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 10 Dec 2021 11:06:04 +0000 Subject: [PATCH 2/2] rename fixture --- pandas/tests/resample/test_datetime_index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 614780604a8ee..2006ffdbcf9e0 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1829,17 +1829,17 @@ def test_resample_aggregate_functions_min_count(func): tm.assert_series_equal(result, expected) -def test_resample_unsigned_int(any_unsigned_int_numpy_dtype): +def test_resample_unsigned_int(uint_dtype): # gh-43329 df = DataFrame( index=date_range(start="2000-01-01", end="2000-01-03 23", freq="12H"), columns=["x"], data=[0, 1, 0] * 2, - dtype=any_unsigned_int_numpy_dtype, + dtype=uint_dtype, ) df = df.loc[(df.index < "2000-01-02") | (df.index > "2000-01-03"), :] - if any_unsigned_int_numpy_dtype == "uint64": + if uint_dtype == "uint64": with pytest.raises(RuntimeError, match="empty group with uint64_t"): result = df.resample("D").max() else: