pandas-dev · mroeschke · Jan 5, 2024 · Dec 29, 2023 · Dec 29, 2023 · Dec 29, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -776,6 +776,7 @@ Timezones
 Numeric
 ^^^^^^^
 - Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`)
+- Bug in :meth:`Series.__floordiv__` and :meth:`Series.__truediv__` for :class:`ArrowDtype` with integral dtypes raising for large divisors (:issue:`56706`)
 - Bug in :meth:`Series.__floordiv__` for :class:`ArrowDtype` with integral dtypes raising for large values (:issue:`56645`)
 - Bug in :meth:`Series.pow` not filling missing values correctly (:issue:`55512`)
 

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -109,7 +109,7 @@
 
     def cast_for_truediv(
         arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar
-    ) -> pa.ChunkedArray:
+    ) -> tuple[pa.ChunkedArray, pa.Array | pa.Scalar]:
         # Ensure int / int -> float mirroring Python/Numpy behavior
         # as pc.divide_checked(int, int) -> int
         if pa.types.is_integer(arrow_array.type) and pa.types.is_integer(
@@ -120,19 +120,62 @@ def cast_for_truediv(
             # Intentionally not using arrow_array.cast because it could be a scalar
             # value in reflected case, and safe=False only added to
             # scalar cast in pyarrow 13.
-            return pc.cast(arrow_array, pa.float64(), safe=False)
-        return arrow_array
+            # In arrow, common type between integral and float64 is float64,
+            # but integral type is safe casted to float64, to mirror python
+            # and numpy, we want an unsafe cast, so we cast both operands to
+            # to float64 before invoking arrow.
+            return pc.cast(arrow_array, pa.float64(), safe=False), pc.cast(
+                pa_object, pa.float64(), safe=False
+            )
+
+        return arrow_array, pa_object
 
     def floordiv_compat(
         left: pa.ChunkedArray | pa.Array | pa.Scalar,
         right: pa.ChunkedArray | pa.Array | pa.Scalar,
     ) -> pa.ChunkedArray:
-        # Ensure int // int -> int mirroring Python/Numpy behavior
-        # as pc.floor(pc.divide_checked(int, int)) -> float
-        converted_left = cast_for_truediv(left, right)
-        result = pc.floor(pc.divide(converted_left, right))
         if pa.types.is_integer(left.type) and pa.types.is_integer(right.type):
+            # Use divide_checked to ensure cases like -9223372036854775808 // -1
+            # don't silently overflow.
+            divided = pc.divide_checked(left, right)
+            # If one operand is a signed integer, we need to ensure division
+            # rounds towards negative infinity instead of 0. If divided.type
+            # is a signed integer, that means at least one operand is signed
+            # otherwise divided.type would be unsigned integer.
+            if pa.types.is_signed_integer(divided.type):
+                # GH 56676: avoid storing intermediate calculating in
+                # floating point type.
+                has_remainder = pc.not_equal(pc.multiply(divided, right), left)
+                result = pc.if_else(
+                    pc.and_(
+                        has_remainder,
+                        pc.less(
+                            pc.bit_wise_xor(left, right),
+                            pa.scalar(0, type=divided.type),
+                        ),
+                    ),
+                    # GH 55561: floordiv should round towards negative infinity.
+                    # pc.divide_checked for integral types rounds towards 0.
+                    # Avoid using subtract_checked which would incorrectly raise
+                    # for -9223372036854775808 // 1, because if integer overflow
+                    # occurs, then has_remainder should be false, and overflowed
+                    # value is discarded.
+                    pc.subtract(divided, pa.scalar(1, type=divided.type)),
+                    divided,
+                )
+            else:
+                # For 2 unsigned integer operands, integer division is
+                # same as floor division.
+                result = divided
+            # Ensure compatibility with older versions of pandas where
+            # int8 // int64 returned int8 rather than int64.
             result = result.cast(left.type)
+        else:
+            # Use divide instead of divide_checked to match numpy
+            # floordiv where divide by 0 returns infinity for floating
+            # point types.
+            divided = pc.divide(left, right)
+            result = pc.floor(divided)
         return result
 
     ARROW_ARITHMETIC_FUNCS = {
@@ -142,8 +185,8 @@ def floordiv_compat(
         "rsub": lambda x, y: pc.subtract_checked(y, x),
         "mul": pc.multiply_checked,
         "rmul": lambda x, y: pc.multiply_checked(y, x),
-        "truediv": lambda x, y: pc.divide(cast_for_truediv(x, y), y),
-        "rtruediv": lambda x, y: pc.divide(y, cast_for_truediv(x, y)),
+        "truediv": lambda x, y: pc.divide(*cast_for_truediv(x, y)),
+        "rtruediv": lambda x, y: pc.divide(*cast_for_truediv(y, x)),
         "floordiv": lambda x, y: floordiv_compat(x, y),
         "rfloordiv": lambda x, y: floordiv_compat(y, x),
         "mod": NotImplemented,

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -3239,13 +3239,82 @@ def test_arrow_floordiv():
 
 
 def test_arrow_floordiv_large_values():
-    # GH 55561
+    # GH 56645
     a = pd.Series([1425801600000000000], dtype="int64[pyarrow]")
     expected = pd.Series([1425801600000], dtype="int64[pyarrow]")
     result = a // 1_000_000
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("dtype", ["int64[pyarrow]", "uint64[pyarrow]"])
+def test_arrow_floordiv_large_integral_result(dtype):
+    # GH 56676
+    a = pd.Series([18014398509481983], dtype=dtype)
+    result = a // 1
+    tm.assert_series_equal(result, a)
+
+
+@pytest.mark.parametrize("pa_type", tm.SIGNED_INT_PYARROW_DTYPES)
+def test_arrow_floordiv_larger_divisor(pa_type):
+    # GH 56676
+    dtype = ArrowDtype(pa_type)
+    a = pd.Series([-23], dtype=dtype)
+    result = a // 24
+    expected = pd.Series([-1], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("pa_type", tm.SIGNED_INT_PYARROW_DTYPES)
+def test_arrow_floordiv_integral_invalid(pa_type):
+    # GH 56676
+    min_value = np.iinfo(pa_type.to_pandas_dtype()).min
+    a = pd.Series([min_value], dtype=ArrowDtype(pa_type))
+    with pytest.raises(pa.lib.ArrowInvalid, match="overflow|not in range"):
+        a // -1
+    with pytest.raises(pa.lib.ArrowInvalid, match="divide by zero"):
+        a // 0
+
+
+@pytest.mark.parametrize("dtype", tm.FLOAT_PYARROW_DTYPES_STR_REPR)
+def test_arrow_floordiv_floating_0_divisor(dtype):
+    # GH 56676
+    a = pd.Series([2], dtype=dtype)
+    result = a // 0
+    expected = pd.Series([float("inf")], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES)
+def test_arrow_integral_floordiv_large_values(pa_type):
+    # GH 56676
+    max_value = np.iinfo(pa_type.to_pandas_dtype()).max
+    dtype = ArrowDtype(pa_type)
+    a = pd.Series([max_value], dtype=dtype)
+    b = pd.Series([1], dtype=dtype)
+    result = a // b
+    tm.assert_series_equal(result, a)
+
+
+@pytest.mark.parametrize("dtype", ["int64[pyarrow]", "uint64[pyarrow]"])
+def test_arrow_true_division_large_divisor(dtype):
+    # GH 56706
+    a = pd.Series([0], dtype=dtype)
+    b = pd.Series([18014398509481983], dtype=dtype)
+    expected = pd.Series([0], dtype="float64[pyarrow]")
+    result = a / b
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["int64[pyarrow]", "uint64[pyarrow]"])
+def test_arrow_floor_division_large_divisor(dtype):
+    # GH 56706
+    a = pd.Series([0], dtype=dtype)
+    b = pd.Series([18014398509481983], dtype=dtype)
+    expected = pd.Series([0], dtype=dtype)
+    result = a // b
+    tm.assert_series_equal(result, expected)
+
+
 def test_string_to_datetime_parsing_cast():
     # GH 56266
     string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]