Skip to content

Commit 263b8a2

Browse files
author
Rohan Jain
committed
fix truediv for large divsor
1 parent 167507e commit 263b8a2

File tree

3 files changed

+31
-5
lines changed

3 files changed

+31
-5
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,7 @@ Timezones
776776
Numeric
777777
^^^^^^^
778778
- Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`)
779+
- Bug in :meth:`Series.__floordiv__` and :meth:`Series.__truediv__` for :class:`ArrowDtype` with integral dtypes raising for large divisors (:issue:`56706`)
779780
- Bug in :meth:`Series.__floordiv__` for :class:`ArrowDtype` with integral dtypes raising for large values (:issue:`56645`)
780781
- Bug in :meth:`Series.pow` not filling missing values correctly (:issue:`55512`)
781782

pandas/core/arrays/arrow/array.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@
109109

110110
def cast_for_truediv(
111111
arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar
112-
) -> pa.ChunkedArray:
112+
) -> tuple[pa.ChunkedArray, pa.Array | pa.Scalar]:
113113
# Ensure int / int -> float mirroring Python/Numpy behavior
114114
# as pc.divide_checked(int, int) -> int
115115
if pa.types.is_integer(arrow_array.type) and pa.types.is_integer(
@@ -120,8 +120,15 @@ def cast_for_truediv(
120120
# Intentionally not using arrow_array.cast because it could be a scalar
121121
# value in reflected case, and safe=False only added to
122122
# scalar cast in pyarrow 13.
123-
return pc.cast(arrow_array, pa.float64(), safe=False)
124-
return arrow_array
123+
# In arrow, common type between integral and float64 is float64,
124+
# but integral type is safe casted to float64, to mirror python
125+
# and numpy, we want an unsafe cast, so we cast both operands to
126+
# to float64 before invoking arrow.
127+
return pc.cast(arrow_array, pa.float64(), safe=False), pc.cast(
128+
pa_object, pa.float64(), safe=False
129+
)
130+
131+
return arrow_array, pa_object
125132

126133
def floordiv_compat(
127134
left: pa.ChunkedArray | pa.Array | pa.Scalar,
@@ -170,8 +177,8 @@ def floordiv_compat(
170177
"rsub": lambda x, y: pc.subtract_checked(y, x),
171178
"mul": pc.multiply_checked,
172179
"rmul": lambda x, y: pc.multiply_checked(y, x),
173-
"truediv": lambda x, y: pc.divide(cast_for_truediv(x, y), y),
174-
"rtruediv": lambda x, y: pc.divide(y, cast_for_truediv(x, y)),
180+
"truediv": lambda x, y: pc.divide(*cast_for_truediv(x, y)),
181+
"rtruediv": lambda x, y: pc.divide(*cast_for_truediv(y, x)),
175182
"floordiv": lambda x, y: floordiv_compat(x, y),
176183
"rfloordiv": lambda x, y: floordiv_compat(y, x),
177184
"mod": NotImplemented,

pandas/tests/extension/test_arrow.py

+18
Original file line numberDiff line numberDiff line change
@@ -3286,6 +3286,24 @@ def test_arrow_floordiv_no_overflow():
32863286
tm.assert_series_equal(result, a)
32873287

32883288

3289+
def test_arrow_true_division_large_divisor():
3290+
# GH 56706
3291+
a = pd.Series([0], dtype="int64[pyarrow]")
3292+
b = pd.Series([18014398509481983], dtype="int64[pyarrow]")
3293+
expected = pd.Series([0], dtype="float64[pyarrow]")
3294+
result = a / b
3295+
tm.assert_series_equal(result, expected)
3296+
3297+
3298+
def test_arrow_floor_division_large_divisor():
3299+
# GH 56706
3300+
a = pd.Series([0], dtype="int64[pyarrow]")
3301+
b = pd.Series([18014398509481983], dtype="int64[pyarrow]")
3302+
expected = pd.Series([0], dtype="int64[pyarrow]")
3303+
result = a // b
3304+
tm.assert_series_equal(result, expected)
3305+
3306+
32893307
def test_string_to_datetime_parsing_cast():
32903308
# GH 56266
32913309
string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]

0 commit comments

Comments
 (0)