diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 363ec10d58bb6..12af80f1bce80 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -208,6 +208,25 @@ will 2. call ``result = op(values, ExtensionArray)`` 3. re-box the result in a ``Series`` +.. _extending.extension.ufunc: + +NumPy Universal Functions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Series` implements ``__array_ufunc__``. As part of the implementation, +pandas unboxes the ``ExtensionArray`` from the :class:`Series`, applies the ufunc, +and re-boxes it if necessary. + +If applicable, we highly recommend that you implement ``__array_ufunc__`` in your +extension array to avoid coercion to an ndarray. See +`the numpy documentation `__ +for an example. + +As part of your implementation, we require that you defer to pandas when a pandas +container (:class:`Series`, :class:`DataFrame`, :class:`Index`) is detected in ``inputs``. +If any of those is present, you should return ``NotImplemented``. Pandas will take care of +unboxing the array from the container and re-calling the ufunc with the unwrapped input. + .. _extending.extension.testing: Testing extension arrays diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst index 914c55115567a..33e5d390447d7 100644 --- a/doc/source/getting_started/dsintro.rst +++ b/doc/source/getting_started/dsintro.rst @@ -731,28 +731,62 @@ DataFrame interoperability with NumPy functions .. _dsintro.numpy_interop: Elementwise NumPy ufuncs (log, exp, sqrt, ...) and various other NumPy functions -can be used with no issues on DataFrame, assuming the data within are numeric: +can be used with no issues on Series and DataFrame, assuming the data within +are numeric: .. ipython:: python np.exp(df) np.asarray(df) -The dot method on DataFrame implements matrix multiplication: +DataFrame is not intended to be a drop-in replacement for ndarray as its +indexing semantics and data model are quite different in places from an n-dimensional +array. + +:class:`Series` implements ``__array_ufunc__``, which allows it to work with NumPy's +`universal functions `_. + +The ufunc is applied to the underlying array in a Series. .. ipython:: python - df.T.dot(df) + ser = pd.Series([1, 2, 3, 4]) + np.exp(ser) -Similarly, the dot method on Series implements dot product: +Like other parts of the library, pandas will automatically align labeled inputs +as part of a ufunc with multiple inputs. For example, using :meth:`numpy.remainder` +on two :class:`Series` with differently ordered labels will align before the operation. .. ipython:: python - s1 = pd.Series(np.arange(5, 10)) - s1.dot(s1) + ser1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + ser2 = pd.Series([1, 3, 5], index=['b', 'a', 'c']) + ser1 + ser2 + np.remainder(ser1, ser2) -DataFrame is not intended to be a drop-in replacement for ndarray as its -indexing semantics are quite different in places from a matrix. +As usual, the union of the two indices is taken, and non-overlapping values are filled +with missing values. + +.. ipython:: python + + ser3 = pd.Series([2, 4, 6], index=['b', 'c', 'd']) + ser3 + np.remainder(ser1, ser3) + +When a binary ufunc is applied to a :class:`Series` and :class:`Index`, the Series +implementation takes precedence and a Series is returned. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + idx = pd.Index([4, 5, 6]) + + np.maximum(ser, idx) + +NumPy ufuncs are safe to apply to :class:`Series` backed by non-ndarray arrays, +for example :class:`SparseArray` (see :ref:`sparse.calculation`). If possible, +the ufunc is applied without converting the underlying data to an ndarray. Console display ~~~~~~~~~~~~~~~ diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index a2f93dcf337d7..4f44fcaab63d4 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -5,6 +5,7 @@ Computational tools =================== + Statistical functions --------------------- diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 30ae4ebe21ca4..8850ee79a893b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -886,6 +886,7 @@ Sparse - Introduce a better error message in :meth:`Series.sparse.from_coo` so it returns a ``TypeError`` for inputs that are not coo matrices (:issue:`26554`) - Bug in :func:`numpy.modf` on a :class:`SparseArray`. Now a tuple of :class:`SparseArray` is returned (:issue:`26946`). + Build Changes ^^^^^^^^^^^^^ @@ -896,6 +897,7 @@ ExtensionArray - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). - :meth:`Series.count` miscounts NA values in ExtensionArrays (:issue:`26835`) +- Added ``Series.__array_ufunc__`` to better handle NumPy ufuncs applied to Series backed by extension arrays (:issue:`23293`). - Keyword argument ``deep`` has been removed from :meth:`ExtensionArray.copy` (:issue:`27083`) Other diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6340cc732d6c1..0762a607f20ae 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -107,6 +107,17 @@ class ExtensionArray: attributes called ``.values`` or ``._values`` to ensure full compatibility with pandas internals. But other names as ``.data``, ``._data``, ``._items``, ... can be freely used. + + If implementing NumPy's ``__array_ufunc__`` interface, pandas expects + that + + 1. You defer by raising ``NotImplemented`` when any Series are present + in `inputs`. Pandas will extract the arrays and call the ufunc again. + 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. + Pandas inspect this to determine whether the ufunc is valid for the + types present. + + See :ref:`extending.extension.ufunc` for more. """ # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 68c7b79becb55..b77a4f985067d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -26,6 +26,7 @@ from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna +from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms from pandas.core.algorithms import factorize, take, take_1d, unique1d @@ -1292,6 +1293,20 @@ def __array__(self, dtype=None): ret = np.asarray(ret) return ret + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # for all other cases, raise for now (similarly as what happens in + # Series.__array_prepare__) + raise TypeError("Object with dtype {dtype} cannot perform " + "the numpy op {op}".format( + dtype=self.dtype, + op=ufunc.__name__)) + def __setstate__(self, state): """Necessary for making this object picklable""" if not isinstance(state, dict): diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 88de497a3329f..644c2f634240f 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,3 +1,4 @@ +import numbers import sys from typing import Type import warnings @@ -17,7 +18,7 @@ from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna, notna -from pandas.core import nanops +from pandas.core import nanops, ops from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin from pandas.core.tools.numeric import to_numeric @@ -344,6 +345,52 @@ def __array__(self, dtype=None): """ return self._coerce_to_ndarray() + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # For IntegerArray inputs, we apply the ufunc to ._data + # and mask the result. + if method == 'reduce': + # Not clear how to handle missing values in reductions. Raise. + raise NotImplementedError("The 'reduce' method is not supported.") + out = kwargs.get('out', ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (IntegerArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + mask = np.zeros(len(self), dtype=bool) + inputs2 = [] + for x in inputs: + if isinstance(x, IntegerArray): + mask |= x._mask + inputs2.append(x._data) + else: + inputs2.append(x) + + def reconstruct(x): + # we don't worry about scalar `x` here, since we + # raise for reduce up above. + + if is_integer_dtype(x.dtype): + m = mask.copy() + return IntegerArray(x, m) + else: + x[mask] = np.nan + return x + + result = getattr(ufunc, method)(*inputs2, **kwargs) + if isinstance(result, tuple): + tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + def __iter__(self): for i in range(len(self)): if self._mask[i]: diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 97ab6ec8235ef..29cc899fa6a9b 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -38,6 +38,7 @@ from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.missing import interpolate_2d +import pandas.core.ops as ops import pandas.io.formats.printing as printing @@ -1665,42 +1666,11 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if not isinstance(x, self._HANDLED_TYPES + (SparseArray,)): return NotImplemented - special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', - 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder'} - aliases = { - 'subtract': 'sub', - 'multiply': 'mul', - 'floor_divide': 'floordiv', - 'true_divide': 'truediv', - 'power': 'pow', - 'remainder': 'mod', - 'divide': 'div', - 'equal': 'eq', - 'not_equal': 'ne', - 'less': 'lt', - 'less_equal': 'le', - 'greater': 'gt', - 'greater_equal': 'ge', - } - - flipped = { - 'lt': '__gt__', - 'le': '__ge__', - 'gt': '__lt__', - 'ge': '__le__', - 'eq': '__eq__', - 'ne': '__ne__', - } - - op_name = ufunc.__name__ - op_name = aliases.get(op_name, op_name) - - if op_name in special and kwargs.get('out') is None: - if isinstance(inputs[0], type(self)): - return getattr(self, '__{}__'.format(op_name))(inputs[1]) - else: - name = flipped.get(op_name, '__r{}__'.format(op_name)) - return getattr(self, name)(inputs[0]) + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result if len(inputs) == 1: # No alignment necessary. diff --git a/pandas/core/ops.py b/pandas/core/ops.py index a4d31cb227f19..5dd8455073212 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,7 +6,7 @@ import datetime import operator import textwrap -from typing import Dict, Optional +from typing import Any, Callable, Dict, Optional import warnings import numpy as np @@ -29,6 +29,7 @@ from pandas.core.dtypes.missing import isna, notna import pandas as pd +from pandas._typing import ArrayLike import pandas.core.common as com import pandas.core.missing as missing @@ -1660,7 +1661,14 @@ def na_op(x, y): lambda val: op(val, y)) raise - result = missing.fill_zeros(result, x, y, op_name, fill_zeros) + if isinstance(result, tuple): + # e.g. divmod + result = tuple( + missing.fill_zeros(r, x, y, op_name, fill_zeros) + for r in result + ) + else: + result = missing.fill_zeros(result, x, y, op_name, fill_zeros) return result def wrapper(left, right): @@ -2349,3 +2357,78 @@ def wrapper(self, other): wrapper.__name__ = op_name return wrapper + + +def maybe_dispatch_ufunc_to_dunder_op( + self: ArrayLike, + ufunc: Callable, + method: str, + *inputs: ArrayLike, + **kwargs: Any +): + """ + Dispatch a ufunc to the equivalent dunder method. + + Parameters + ---------- + self : ArrayLike + The array whose dunder method we dispatch to + ufunc : Callable + A NumPy ufunc + method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'} + inputs : ArrayLike + The input arrays. + kwargs : Any + The additional keyword arguments, e.g. ``out``. + + Returns + ------- + result : Any + The result of applying the ufunc + """ + # special has the ufuncs we dispatch to the dunder op on + special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', + 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder', + 'matmul'} + aliases = { + 'subtract': 'sub', + 'multiply': 'mul', + 'floor_divide': 'floordiv', + 'true_divide': 'truediv', + 'power': 'pow', + 'remainder': 'mod', + 'divide': 'div', + 'equal': 'eq', + 'not_equal': 'ne', + 'less': 'lt', + 'less_equal': 'le', + 'greater': 'gt', + 'greater_equal': 'ge', + } + + # For op(., Array) -> Array.__r{op}__ + flipped = { + 'lt': '__gt__', + 'le': '__ge__', + 'gt': '__lt__', + 'ge': '__le__', + 'eq': '__eq__', + 'ne': '__ne__', + } + + op_name = ufunc.__name__ + op_name = aliases.get(op_name, op_name) + + def not_implemented(*args, **kwargs): + return NotImplemented + + if (method == '__call__' and op_name in special + and kwargs.get('out') is None): + if isinstance(inputs[0], type(self)): + name = '__{}__'.format(op_name) + return getattr(self, name, not_implemented)(inputs[1]) + else: + name = flipped.get(op_name, '__r{}__'.format(op_name)) + return getattr(self, name, not_implemented)(inputs[0]) + else: + return NotImplemented diff --git a/pandas/core/series.py b/pandas/core/series.py index f415bc9fd3561..9179099562832 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,6 +5,7 @@ from io import StringIO from shutil import get_terminal_size from textwrap import dedent +from typing import Any, Callable import warnings import numpy as np @@ -714,6 +715,84 @@ def view(self, dtype=None): # ---------------------------------------------------------------------- # NDArray Compat + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) + + def __array_ufunc__( + self, + ufunc: Callable, + method: str, + *inputs: Any, + **kwargs: Any + ): + # TODO: handle DataFrame + from pandas.core.internals.construction import extract_array + cls = type(self) + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # Determine if we should defer. + no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__) + + for item in inputs: + higher_priority = ( + hasattr(item, '__array_priority__') and + item.__array_priority__ > self.__array_priority__ + ) + has_array_ufunc = ( + hasattr(item, '__array_ufunc__') and + type(item).__array_ufunc__ not in no_defer and + not isinstance(item, self._HANDLED_TYPES) + ) + if higher_priority or has_array_ufunc: + return NotImplemented + + # align all the inputs. + names = [getattr(x, 'name') for x in inputs if hasattr(x, 'name')] + types = tuple(type(x) for x in inputs) + # TODO: dataframe + alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)] + + if len(alignable) > 1: + # This triggers alignment. + # At the moment, there aren't any ufuncs with more than two inputs + # so this ends up just being x1.index | x2.index, but we write + # it to handle *args. + index = alignable[0].index + for s in alignable[1:]: + index |= s.index + inputs = tuple(x.reindex(index) if issubclass(t, Series) else x + for x, t in zip(inputs, types)) + else: + index = self.index + + inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + if len(set(names)) == 1: + # we require names to be hashable, right? + name = names[0] # type: Any + else: + name = None + + def construct_return(result): + if lib.is_scalar(result): + return result + return self._constructor(result, + index=index, + name=name, + copy=False) + + if type(result) is tuple: + # multiple return values + return tuple(construct_return(x) for x in result) + elif method == 'at': + # no return value + return None + else: + return construct_return(result) def __array__(self, dtype=None): """ @@ -776,30 +855,6 @@ def __array__(self, dtype=None): dtype = 'M8[ns]' return np.asarray(self.array, dtype) - def __array_wrap__(self, result, context=None): - """ - Gets called after a ufunc. - """ - return self._constructor(result, index=self.index, - copy=False).__finalize__(self) - - def __array_prepare__(self, result, context=None): - """ - Gets called prior to a ufunc. - """ - - # nice error message for non-ufunc types - if (context is not None and - (not isinstance(self._values, (np.ndarray, ExtensionArray)) - or isinstance(self._values, Categorical))): - obj = context[1][0] - raise TypeError("{obj} with dtype {dtype} cannot perform " - "the numpy op {op}".format( - obj=type(obj).__name__, - dtype=getattr(obj, 'dtype', None), - op=context[0].__name__)) - return result - # ---------------------------------------------------------------------- # Unary Methods diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b1091d38c10d0..908e197ec1d28 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -670,6 +670,10 @@ def test_comparison_tzawareness_compat_scalars(self, op, box_with_array): @pytest.mark.parametrize('other', [datetime(2016, 1, 1), Timestamp('2016-01-01'), np.datetime64('2016-01-01')]) + # Bug in NumPy? https://github.com/numpy/numpy/issues/13841 + # Raising in __eq__ will fallback to NumPy, which warns, fails, + # then re-raises the original exception. So we just need to ignore. + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture, box_with_array): tz = tz_aware_fixture diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index f58f8981317df..31c7f47bcf5bd 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -891,6 +891,25 @@ def test_ufunc_coercions(self, holder): exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) + @pytest.mark.parametrize('holder', [pd.Int64Index, pd.UInt64Index, + pd.Float64Index, pd.Series]) + def test_ufunc_multiple_return_values(self, holder): + obj = holder([1, 2, 3], name='x') + box = pd.Series if holder is pd.Series else pd.Index + + result = np.modf(obj) + assert isinstance(result, tuple) + exp1 = pd.Float64Index([0., 0., 0.], name='x') + exp2 = pd.Float64Index([1., 2., 3.], name='x') + tm.assert_equal(result[0], tm.box_expected(exp1, box)) + tm.assert_equal(result[1], tm.box_expected(exp2, box)) + + def test_ufunc_at(self): + s = pd.Series([0, 1, 2], index=[1, 2, 3], name='x') + np.add.at(s, [0, 2], 10) + expected = pd.Series([10, 1, 12], index=[1, 2, 3], name='x') + tm.assert_series_equal(s, expected) + class TestObjectDtypeEquivalence: # Tests that arithmetic operations match operations executed elementwise diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 65f7628370ad4..fb62a90a6007e 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -717,6 +717,74 @@ def test_astype_nansafe(): arr.astype('uint32') +@pytest.mark.parametrize( + 'ufunc', [np.abs, np.sign]) +def test_ufuncs_single_int(ufunc): + a = integer_array([1, 2, -3, np.nan]) + result = ufunc(a) + expected = integer_array(ufunc(a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(integer_array(ufunc(a.astype(float)))) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + 'ufunc', [np.log, np.exp, np.sin, np.cos, np.sqrt]) +def test_ufuncs_single_float(ufunc): + a = integer_array([1, 2, -3, np.nan]) + with np.errstate(invalid='ignore'): + result = ufunc(a) + expected = ufunc(a.astype(float)) + tm.assert_numpy_array_equal(result, expected) + + s = pd.Series(a) + with np.errstate(invalid='ignore'): + result = ufunc(s) + expected = ufunc(s.astype(float)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + 'ufunc', [np.add, np.subtract]) +def test_ufuncs_binary_int(ufunc): + # two IntegerArrays + a = integer_array([1, 2, -3, np.nan]) + result = ufunc(a, a) + expected = integer_array(ufunc(a.astype(float), a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with numpy array + arr = np.array([1, 2, 3, 4]) + result = ufunc(a, arr) + expected = integer_array(ufunc(a.astype(float), arr)) + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = integer_array(ufunc(arr, a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with scalar + result = ufunc(a, 1) + expected = integer_array(ufunc(a.astype(float), 1)) + tm.assert_extension_array_equal(result, expected) + + result = ufunc(1, a) + expected = integer_array(ufunc(1, a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize('values', [ + [0, 1], [0, None] +]) +def test_ufunc_reduce_raises(values): + a = integer_array(values) + with pytest.raises(NotImplementedError): + np.add.reduce(a) + + # TODO(jreback) - these need testing / are broken # shift diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2b1bb53e962be..d097a599730b8 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -84,6 +84,29 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): def _from_factorized(cls, values, original): return cls(values) + _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # + if not all(isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) + for t in inputs): + return NotImplemented + + inputs = tuple(x._data if isinstance(x, DecimalArray) else x + for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + + def reconstruct(x): + if isinstance(x, (decimal.Decimal, numbers.Number)): + return x + else: + return DecimalArray._from_sequence(x) + + if isinstance(result, tuple): + return tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + def __getitem__(self, item): if isinstance(item, numbers.Integral): return self._data[item] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 4625c79e1bc3d..80885e4045e64 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -390,6 +390,14 @@ def test_divmod_array(reverse, expected_div, expected_mod): tm.assert_extension_array_equal(mod, expected_mod) +def test_ufunc_fallback(data): + a = data[:5] + s = pd.Series(a, index=range(3, 8)) + result = np.abs(s) + expected = pd.Series(np.abs(a), index=range(3, 8)) + tm.assert_series_equal(result, expected) + + def test_formatting_values_deprecated(): class DecimalArray2(DecimalArray): def _formatting_values(self): @@ -400,3 +408,39 @@ def _formatting_values(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): repr(ser) + + +def test_array_ufunc(): + a = to_decimal([1, 2, 3]) + result = np.exp(a) + expected = to_decimal(np.exp(a._data)) + tm.assert_extension_array_equal(result, expected) + + +def test_array_ufunc_series(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.exp(s) + expected = pd.Series(to_decimal(np.exp(a._data))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_scalar_other(): + # check _HANDLED_TYPES + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.add(s, decimal.Decimal(1)) + expected = pd.Series(np.add(a, decimal.Decimal(1))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_defer(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + + expected = pd.Series(to_decimal([2, 4, 6])) + r1 = np.add(s, a) + r2 = np.add(a, s) + + tm.assert_series_equal(r1, expected) + tm.assert_series_equal(r2, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index aed08b78fe640..df69bb35115cf 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -484,18 +484,18 @@ def test_matmul(self): b = DataFrame(np.random.randn(3, 4), index=['1', '2', '3'], columns=['p', 'q', 'r', 's']).T - # Series @ DataFrame + # Series @ DataFrame -> Series result = operator.matmul(a, b) expected = Series(np.dot(a.values, b.values), index=['1', '2', '3']) assert_series_equal(result, expected) - # DataFrame @ Series + # DataFrame @ Series -> Series result = operator.matmul(b.T, a) expected = Series(np.dot(b.T.values, a.T.values), index=['1', '2', '3']) assert_series_equal(result, expected) - # Series @ Series + # Series @ Series -> scalar result = operator.matmul(a, a) expected = np.dot(a.values, a.values) assert_almost_equal(result, expected) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 05d19452b1eac..1a0eeb51c4921 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -1,3 +1,4 @@ +from collections import deque import string import numpy as np @@ -12,14 +13,12 @@ np.logaddexp, ] SPARSE = [ - pytest.param(True, - marks=pytest.mark.xfail(reason="Series.__array_ufunc__")), - False, + True, + False ] SPARSE_IDS = ['sparse', 'dense'] SHUFFLE = [ - pytest.param(True, marks=pytest.mark.xfail(reason="GH-26945", - strict=False)), + True, False ] @@ -43,7 +42,7 @@ def test_unary_ufunc(ufunc, sparse): array = np.random.randint(0, 10, 10, dtype='int64') array[::2] = 0 if sparse: - array = pd.SparseArray(array, dtype=pd.SparseDtype('int', 0)) + array = pd.SparseArray(array, dtype=pd.SparseDtype('int64', 0)) index = list(string.ascii_letters[:10]) name = "name" @@ -61,8 +60,8 @@ def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): # Test that ufunc(Series(a), array) == Series(ufunc(a, b)) a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -82,18 +81,15 @@ def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) -@pytest.mark.parametrize("flip", [ - pytest.param(True, marks=pytest.mark.xfail(reason="Index should defer")), - False -], ids=['flipped', 'straight']) +@pytest.mark.parametrize("flip", [True, False], ids=['flipped', 'straight']) def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): # Test that # * func(Series(a), Series(b)) == Series(ufunc(a, b)) # * ufunc(Index, Series) dispatches to Series (returns a Series) a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -121,14 +117,10 @@ def test_binary_ufunc_with_series(flip, shuffle, sparse, ufunc, # Test that # * func(Series(a), Series(b)) == Series(ufunc(a, b)) # with alignment between the indices - - if flip and shuffle: - pytest.xfail(reason="Fix with Series.__array_ufunc__") - a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -138,8 +130,6 @@ def test_binary_ufunc_with_series(flip, shuffle, sparse, ufunc, if shuffle: other = other.take(idx) - a2 = a2.take(idx) - # alignment, so the expected index is the first index in the op. if flip: index = other.align(series)[0].index else: @@ -198,10 +188,13 @@ def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, pytest.skip("sparse divmod not implemented.") a1, a2 = arrays_for_binary_ufunc + # work around https://github.com/pandas-dev/pandas/issues/26987 + a1[a1 == 0] = 1 + a2[a2 == 0] = 1 if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) s1 = pd.Series(a1) s2 = pd.Series(a2) @@ -241,7 +234,6 @@ def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) -@pytest.mark.xfail(reason="Series.__array_ufunc__") def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc): # Drop the names when they differ. @@ -251,3 +243,70 @@ def test_binary_ufunc_drops_series_name(ufunc, sparse, result = ufunc(s1, s2) assert result.name is None + + +def test_object_series_ok(): + class Dummy: + def __init__(self, value): + self.value = value + + def __add__(self, other): + return self.value + other.value + + arr = np.array([Dummy(0), Dummy(1)]) + ser = pd.Series(arr) + tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) + tm.assert_series_equal(np.add(ser, Dummy(1)), + pd.Series(np.add(ser, Dummy(1)))) + + +@pytest.mark.parametrize('values', [ + pd.array([1, 3, 2]), + pytest.param( + pd.array([1, 10, 0], dtype='Sparse[int]'), + marks=pytest.mark.xfail(resason='GH-27080. Bug in SparseArray') + ), + pd.to_datetime(['2000', '2010', '2001']), + pd.to_datetime(['2000', '2010', '2001']).tz_localize("CET"), + pd.to_datetime(['2000', '2010', '2001']).to_period(freq="D"), + +]) +def test_reduce(values): + a = pd.Series(values) + assert np.maximum.reduce(a) == values[1] + + +@pytest.mark.parametrize('type_', [ + list, + deque, + tuple, +]) +def test_binary_ufunc_other_types(type_): + a = pd.Series([1, 2, 3], name='name') + b = type_([3, 4, 5]) + + result = np.add(a, b) + expected = pd.Series(np.add(a.to_numpy(), b), name='name') + tm.assert_series_equal(result, expected) + + +def test_object_dtype_ok(): + + class Thing: + def __init__(self, value): + self.value = value + + def __add__(self, other): + other = getattr(other, 'value', other) + return type(self)(self.value + other) + + def __eq__(self, other): + return type(other) is Thing and self.value == other.value + + def __repr__(self): + return 'Thing({})'.format(self.value) + + s = pd.Series([Thing(1), Thing(2)]) + result = np.add(s, Thing(1)) + expected = pd.Series([Thing(2), Thing(3)]) + tm.assert_series_equal(result, expected)