diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 129f5cedb86c2..e0507991d82ad 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -777,6 +777,7 @@ Numeric ^^^^^^^ - Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`) - Bug in :meth:`Series.__floordiv__` for :class:`ArrowDtype` with integral dtypes raising for large values (:issue:`56645`) +- Bug in :meth:`Series.__mod__` and :meth:`Series.__divmod__` for :class:`ArrowDtype` raising ``NotImplementedError`` (:issue:`56693`) - Bug in :meth:`Series.pow` not filling missing values correctly (:issue:`55512`) Conversion diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b1164301e6d79..e398c330467de 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -135,6 +135,18 @@ def floordiv_compat( result = result.cast(left.type) return result + def divmod_compat( + left: pa.ChunkedArray | pa.Array | pa.Scalar, + right: pa.ChunkedArray | pa.Array | pa.Scalar, + ) -> tuple[pa.ChunkedArray, pa.ChunkedArray]: + # (x % y) = x - (x // y) * y + # TODO: Should be replaced with corresponding arrow compute + # method when available + # https://lists.apache.org/thread/h3t6nz1ys2k2hnbrjvwyoxkf70cps8sh + floordiv_result = floordiv_compat(left, right) + modulus_result = pc.subtract(left, pc.multiply(floordiv_result, right)) + return floordiv_result, modulus_result + ARROW_ARITHMETIC_FUNCS = { "add": pc.add_checked, "radd": lambda x, y: pc.add_checked(y, x), @@ -146,10 +158,10 @@ def floordiv_compat( "rtruediv": lambda x, y: pc.divide(y, cast_for_truediv(x, y)), "floordiv": lambda x, y: floordiv_compat(x, y), "rfloordiv": lambda x, y: floordiv_compat(y, x), - "mod": NotImplemented, - "rmod": NotImplemented, - "divmod": NotImplemented, - "rdivmod": NotImplemented, + "mod": lambda x, y: divmod_compat(x, y)[1], + "rmod": lambda x, y: divmod_compat(y, x)[1], + "divmod": lambda x, y: divmod_compat(x, y), + "rdivmod": lambda x, y: divmod_compat(y, x), "pow": pc.power_checked, "rpow": lambda x, y: pc.power_checked(y, x), } @@ -750,6 +762,8 @@ def _evaluate_op_method(self, other, op, arrow_funcs): raise NotImplementedError(f"{op.__name__} not implemented.") result = pc_func(self._pa_array, other) + if op is divmod or op is roperator.rdivmod: + return type(self)(result[0]), type(self)(result[1]) return type(self)(result) def _logical_method(self, other, op): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index ed1b7b199a16f..15238213f5f52 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3238,6 +3238,75 @@ def test_arrow_floordiv(): tm.assert_series_equal(result, expected) +modulus_test_cases = [ + [-7, 3], + [-7, -4], + [-7, -8], + [-7, 8], + [7, 8], + [7, 3], + [7, -3], + [-1.2, 1.3], + [1.2, 1.3], + [-1.2, -1.3], + [-1.2, 0.5], + [1.2, 0.5], + [-1.2, -0.5], +] + + +@pytest.mark.parametrize("left, right", modulus_test_cases) +def test_arrow_modulus(left, right): + # GH 56693 + dtype = "int64[pyarrow]" if isinstance(left, int) else "double[pyarrow]" + a = pd.Series([left], dtype=dtype) + b = pd.Series([right], dtype=dtype) + result = a % b + # Use stdlib python modulus to baseline. + expected = pd.Series([left % right], dtype=dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("left, right", modulus_test_cases) +def test_arrow_reflected_modulus(left, right): + # GH 56693 + dtype = "int64[pyarrow]" if isinstance(left, int) else "double[pyarrow]" + a = pd.Series([left], dtype=dtype) + result = right % a + # Use stdlib python modulus to baseline. + expected = pd.Series([right % left], dtype=dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("left, right", modulus_test_cases) +def test_arrow_divmod(left, right): + # GH 56693 + dtype = "int64[pyarrow]" if isinstance(left, int) else "double[pyarrow]" + a = pd.Series([left], dtype=dtype) + b = pd.Series([right], dtype=dtype) + result_floordiv, result_modulus = divmod(a, b) + # Use stdlib python modulus to baseline. + stdlib_baseline = divmod(left, right) + expected_floordiv = pd.Series([stdlib_baseline[0]], dtype=dtype) + expected_modulus = pd.Series([stdlib_baseline[1]], dtype=dtype) + tm.assert_series_equal(result_floordiv, expected_floordiv) + tm.assert_series_equal(result_modulus, expected_modulus) + + +@pytest.mark.parametrize("left, right", modulus_test_cases) +def test_arrow_reflected_divmod(left, right): + # GH 56693 + dtype = "int64[pyarrow]" if isinstance(left, int) else "double[pyarrow]" + a = pd.Series([left], dtype=dtype) + result_floordiv, result_modulus = divmod(right, a) + # Use stdlib python modulus to baseline. + stdlib_baseline = divmod(right, left) + expected_floordiv = pd.Series([stdlib_baseline[0]], dtype=dtype) + expected_modulus = pd.Series([stdlib_baseline[1]], dtype=dtype) + tm.assert_series_equal(result_floordiv, expected_floordiv) + tm.assert_series_equal(result_modulus, expected_modulus) + + def test_arrow_floordiv_large_values(): # GH 55561 a = pd.Series([1425801600000000000], dtype="int64[pyarrow]")