We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e3603e4 commit 374e506Copy full SHA for 374e506
pandas/_libs/algos.pyi
@@ -132,6 +132,7 @@ def ensure_int8(arr: object, copy=...) -> npt.NDArray[np.int8]: ...
132
def ensure_int16(arr: object, copy=...) -> npt.NDArray[np.int16]: ...
133
def ensure_int32(arr: object, copy=...) -> npt.NDArray[np.int32]: ...
134
def ensure_int64(arr: object, copy=...) -> npt.NDArray[np.int64]: ...
135
+def ensure_uint64(arr: object, copy=...) -> npt.NDArray[np.uint64]: ...
136
def take_1d_int8_int8(
137
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
138
) -> None: ...
pandas/_libs/algos_common_helper.pxi.in
@@ -41,12 +41,12 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
41
('int16', 'INT16', 'int16'),
42
('int32', 'INT32', 'int32'),
43
('int64', 'INT64', 'int64'),
44
+ ('uint64', 'UINT64', 'uint64'),
45
# Disabling uint and complex dtypes because we do not use them
- # (and compiling them increases wheel size)
46
+ # (and compiling them increases wheel size) (except uint64)
47
# ('uint8', 'UINT8', 'uint8'),
48
# ('uint16', 'UINT16', 'uint16'),
49
# ('uint32', 'UINT32', 'uint32'),
- # ('uint64', 'UINT64', 'uint64'),
50
# ('complex64', 'COMPLEX64', 'complex64'),
51
# ('complex128', 'COMPLEX128', 'complex128')
52
]
pandas/_libs/groupby.pyx
@@ -513,14 +513,7 @@ ctypedef fused mean_t:
513
514
ctypedef fused sum_t:
515
mean_t
516
- int8_t
517
- int16_t
518
- int32_t
519
int64_t
520
-
521
- uint8_t
522
- uint16_t
523
- uint32_t
524
uint64_t
525
object
526
pandas/core/dtypes/common.py
@@ -100,6 +100,7 @@ def ensure_float(arr):
100
ensure_int8 = algos.ensure_int8
101
ensure_platform_int = algos.ensure_platform_int
102
ensure_object = algos.ensure_object
103
+ensure_uint64 = algos.ensure_uint64
104
105
106
def ensure_str(value: bytes | Any) -> str:
pandas/core/groupby/ops.py
@@ -46,6 +46,7 @@
ensure_float64,
ensure_int64,
ensure_platform_int,
+ ensure_uint64,
is_1d_only_ea_dtype,
is_bool_dtype,
is_complex_dtype,
@@ -224,6 +225,13 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
224
225
# result may still include NaN, so we have to cast
226
values = ensure_float64(values)
227
228
+ elif how == "sum":
229
+ # Avoid overflow during group op
230
+ if values.dtype.kind == "i":
231
+ values = ensure_int64(values)
232
+ else:
233
+ values = ensure_uint64(values)
234
+
235
return values
236
237
# TODO: general case implementation overridable by EAs.
pandas/tests/groupby/test_groupby.py
@@ -2829,3 +2829,16 @@ def test_groupby_sum_support_mask(any_numeric_ea_dtype):
2829
dtype=any_numeric_ea_dtype,
2830
)
2831
tm.assert_frame_equal(result, expected)
2832
2833
2834
+@pytest.mark.parametrize("val, dtype", [(111, "int"), (222, "uint")])
2835
+def test_groupby_sum_overflow(val, dtype):
2836
+ # GH#37493
2837
+ df = DataFrame({"a": 1, "b": [val, val]}, dtype=f"{dtype}8")
2838
+ result = df.groupby("a").sum()
2839
+ expected = DataFrame(
2840
+ {"b": [val * 2]},
2841
+ index=Index([1], name="a", dtype=f"{dtype}64"),
2842
+ dtype=f"{dtype}64",
2843
+ )
2844
+ tm.assert_frame_equal(result, expected)
0 commit comments