From dd3b35ad9690221b13999521b96385ba769adbe0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 11 Dec 2020 15:56:08 +0100 Subject: [PATCH 1/2] REF: move __array_ufunc__ to base NumericArray --- pandas/core/arrays/floating.py | 50 ----------------- pandas/core/arrays/integer.py | 49 ----------------- pandas/core/arrays/numeric.py | 57 ++++++++++++++++++++ pandas/tests/arrays/integer/test_function.py | 13 +++-- 4 files changed, 63 insertions(+), 106 deletions(-) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 1077538f6a21d..de1506fe30d04 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -1,4 +1,3 @@ -import numbers from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union import warnings @@ -22,7 +21,6 @@ from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.missing import isna -from pandas.core import ops from pandas.core.ops import invalid_comparison from pandas.core.tools.numeric import to_numeric @@ -287,54 +285,6 @@ def _from_sequence_of_strings( scalars = to_numeric(strings, errors="raise") return cls._from_sequence(scalars, dtype=dtype, copy=copy) - _HANDLED_TYPES = (np.ndarray, numbers.Number) - - def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): - # For FloatingArray inputs, we apply the ufunc to ._data - # and mask the result. - if method == "reduce": - # Not clear how to handle missing values in reductions. Raise. - raise NotImplementedError("The 'reduce' method is not supported.") - out = kwargs.get("out", ()) - - for x in inputs + out: - if not isinstance(x, self._HANDLED_TYPES + (FloatingArray,)): - return NotImplemented - - # for binary ops, use our custom dunder methods - result = ops.maybe_dispatch_ufunc_to_dunder_op( - self, ufunc, method, *inputs, **kwargs - ) - if result is not NotImplemented: - return result - - mask = np.zeros(len(self), dtype=bool) - inputs2 = [] - for x in inputs: - if isinstance(x, FloatingArray): - mask |= x._mask - inputs2.append(x._data) - else: - inputs2.append(x) - - def reconstruct(x): - # we don't worry about scalar `x` here, since we - # raise for reduce up above. - - # TODO - if is_float_dtype(x.dtype): - m = mask.copy() - return FloatingArray(x, m) - else: - x[mask] = np.nan - return x - - result = getattr(ufunc, method)(*inputs2, **kwargs) - if isinstance(result, tuple): - tuple(reconstruct(x) for x in result) - else: - return reconstruct(result) - def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index fa427e94fe08f..69a23b634975a 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,4 +1,3 @@ -import numbers from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union import warnings @@ -22,7 +21,6 @@ ) from pandas.core.dtypes.missing import isna -from pandas.core import ops from pandas.core.ops import invalid_comparison from pandas.core.tools.numeric import to_numeric @@ -369,53 +367,6 @@ def _from_sequence_of_strings( scalars = to_numeric(strings, errors="raise") return cls._from_sequence(scalars, dtype=dtype, copy=copy) - _HANDLED_TYPES = (np.ndarray, numbers.Number) - - def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): - # For IntegerArray inputs, we apply the ufunc to ._data - # and mask the result. - if method == "reduce": - # Not clear how to handle missing values in reductions. Raise. - raise NotImplementedError("The 'reduce' method is not supported.") - out = kwargs.get("out", ()) - - for x in inputs + out: - if not isinstance(x, self._HANDLED_TYPES + (IntegerArray,)): - return NotImplemented - - # for binary ops, use our custom dunder methods - result = ops.maybe_dispatch_ufunc_to_dunder_op( - self, ufunc, method, *inputs, **kwargs - ) - if result is not NotImplemented: - return result - - mask = np.zeros(len(self), dtype=bool) - inputs2 = [] - for x in inputs: - if isinstance(x, IntegerArray): - mask |= x._mask - inputs2.append(x._data) - else: - inputs2.append(x) - - def reconstruct(x): - # we don't worry about scalar `x` here, since we - # raise for reduce up above. - - if is_integer_dtype(x.dtype): - m = mask.copy() - return IntegerArray(x, m) - else: - x[mask] = np.nan - return x - - result = getattr(ufunc, method)(*inputs2, **kwargs) - if isinstance(result, tuple): - return tuple(reconstruct(x) for x in result) - else: - return reconstruct(result) - def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 5447a84c86ac1..11982bbb9a2ae 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -1,4 +1,5 @@ import datetime +import numbers import numpy as np @@ -13,6 +14,8 @@ is_list_like, ) +from pandas.core import ops + from .masked import BaseMaskedArray @@ -90,3 +93,57 @@ def _arith_method(self, other, op): ) return self._maybe_mask_result(result, mask, other, op_name) + + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): + # For NumericArray inputs, we apply the ufunc to ._data + # and mask the result. + if method == "reduce": + # Not clear how to handle missing values in reductions. Raise. + raise NotImplementedError("The 'reduce' method is not supported.") + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (NumericArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + mask = np.zeros(len(self), dtype=bool) + inputs2 = [] + for x in inputs: + if isinstance(x, NumericArray): + mask |= x._mask + inputs2.append(x._data) + else: + inputs2.append(x) + + def reconstruct(x): + # we don't worry about scalar `x` here, since we + # raise for reduce up above. + + if is_integer_dtype(x.dtype): + from pandas.core.arrays import IntegerArray + + m = mask.copy() + return IntegerArray(x, m) + elif is_float_dtype(x.dtype): + from pandas.core.arrays import FloatingArray + + m = mask.copy() + return FloatingArray(x, m) + else: + x[mask] = np.nan + return x + + result = getattr(ufunc, method)(*inputs2, **kwargs) + if isinstance(result, tuple): + return tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 521547cc7357d..753a6216a5c1c 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -3,7 +3,7 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays import integer_array +from pandas.core.arrays import FloatingArray, integer_array @pytest.mark.parametrize("ufunc", [np.abs, np.sign]) @@ -26,13 +26,13 @@ def test_ufuncs_single_float(ufunc): a = integer_array([1, 2, -3, np.nan]) with np.errstate(invalid="ignore"): result = ufunc(a) - expected = ufunc(a.astype(float)) - tm.assert_numpy_array_equal(result, expected) + expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask) + tm.assert_extension_array_equal(result, expected) s = pd.Series(a) with np.errstate(invalid="ignore"): result = ufunc(s) - expected = ufunc(s.astype(float)) + expected = pd.Series(expected) tm.assert_series_equal(result, expected) @@ -68,14 +68,13 @@ def test_ufunc_binary_output(): a = integer_array([1, 2, np.nan]) result = np.modf(a) expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float")) + expected = (pd.array(expected[0]), pd.array(expected[1])) assert isinstance(result, tuple) assert len(result) == 2 for x, y in zip(result, expected): - # TODO(FloatArray): This will return an extension array. - # y = integer_array(y) - tm.assert_numpy_array_equal(x, y) + tm.assert_extension_array_equal(x, y) @pytest.mark.parametrize("values", [[0, 1], [0, None]]) From 389b259d5d3591dacb45f24881b8f8155a042783 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 12 Jan 2021 09:22:12 +0100 Subject: [PATCH 2/2] try fix mypy --- pandas/core/arrays/numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 3a487360fd909..ed9e37bd68184 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -1,6 +1,6 @@ import datetime import numbers -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Any, List, Union import numpy as np @@ -156,7 +156,7 @@ def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): return result mask = np.zeros(len(self), dtype=bool) - inputs2 = [] + inputs2: List[Any] = [] for x in inputs: if isinstance(x, NumericArray): mask |= x._mask