diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 129f5cedb86c2..75971f4ca109e 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -776,6 +776,7 @@ Timezones Numeric ^^^^^^^ - Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`) +- Bug in :meth:`Series.__floordiv__` and :meth:`Series.__truediv__` for :class:`ArrowDtype` with integral dtypes raising for large divisors (:issue:`56706`) - Bug in :meth:`Series.__floordiv__` for :class:`ArrowDtype` with integral dtypes raising for large values (:issue:`56645`) - Bug in :meth:`Series.pow` not filling missing values correctly (:issue:`55512`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b1164301e6d79..c7de54c6ee84b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -109,7 +109,7 @@ def cast_for_truediv( arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar - ) -> pa.ChunkedArray: + ) -> tuple[pa.ChunkedArray, pa.Array | pa.Scalar]: # Ensure int / int -> float mirroring Python/Numpy behavior # as pc.divide_checked(int, int) -> int if pa.types.is_integer(arrow_array.type) and pa.types.is_integer( @@ -120,8 +120,15 @@ def cast_for_truediv( # Intentionally not using arrow_array.cast because it could be a scalar # value in reflected case, and safe=False only added to # scalar cast in pyarrow 13. - return pc.cast(arrow_array, pa.float64(), safe=False) - return arrow_array + # In arrow, common type between integral and float64 is float64, + # but integral type is safe casted to float64, to mirror python + # and numpy, we want an unsafe cast, so we cast both operands to + # to float64 before invoking arrow. + return pc.cast(arrow_array, pa.float64(), safe=False), pc.cast( + pa_object, pa.float64(), safe=False + ) + + return arrow_array, pa_object def floordiv_compat( left: pa.ChunkedArray | pa.Array | pa.Scalar, @@ -129,8 +136,8 @@ def floordiv_compat( ) -> pa.ChunkedArray: # Ensure int // int -> int mirroring Python/Numpy behavior # as pc.floor(pc.divide_checked(int, int)) -> float - converted_left = cast_for_truediv(left, right) - result = pc.floor(pc.divide(converted_left, right)) + converted_left, converted_right = cast_for_truediv(left, right) + result = pc.floor(pc.divide(converted_left, converted_right)) if pa.types.is_integer(left.type) and pa.types.is_integer(right.type): result = result.cast(left.type) return result @@ -142,8 +149,8 @@ def floordiv_compat( "rsub": lambda x, y: pc.subtract_checked(y, x), "mul": pc.multiply_checked, "rmul": lambda x, y: pc.multiply_checked(y, x), - "truediv": lambda x, y: pc.divide(cast_for_truediv(x, y), y), - "rtruediv": lambda x, y: pc.divide(y, cast_for_truediv(x, y)), + "truediv": lambda x, y: pc.divide(*cast_for_truediv(x, y)), + "rtruediv": lambda x, y: pc.divide(*cast_for_truediv(y, x)), "floordiv": lambda x, y: floordiv_compat(x, y), "rfloordiv": lambda x, y: floordiv_compat(y, x), "mod": NotImplemented, diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index ed1b7b199a16f..24b2ac4fa8b22 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3246,6 +3246,24 @@ def test_arrow_floordiv_large_values(): tm.assert_series_equal(result, expected) +def test_arrow_true_division_large_divisor(): + # GH 56706 + a = pd.Series([0], dtype="int64[pyarrow]") + b = pd.Series([18014398509481983], dtype="int64[pyarrow]") + expected = pd.Series([0], dtype="float64[pyarrow]") + result = a / b + tm.assert_series_equal(result, expected) + + +def test_arrow_floor_division_large_divisor(): + # GH 56706 + a = pd.Series([0], dtype="int64[pyarrow]") + b = pd.Series([18014398509481983], dtype="int64[pyarrow]") + expected = pd.Series([0], dtype="int64[pyarrow]") + result = a // b + tm.assert_series_equal(result, expected) + + def test_string_to_datetime_parsing_cast(): # GH 56266 string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]