From 2fffac3b464da3db1e1931c18ccba886d67d6564 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 11:00:11 +0200 Subject: [PATCH 01/48] Add __array_ufunc__ to Series / Array --- pandas/core/arrays/integer.py | 28 ++++++++++++++++++++++ pandas/core/series.py | 20 ++++++++++++++-- pandas/tests/arrays/test_integer.py | 37 +++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 17e92c3976e2c..a580b09ba8df9 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,3 +1,4 @@ +import numbers import sys import warnings import copy @@ -293,6 +294,33 @@ def __array__(self, dtype=None): """ return self._coerce_to_ndarray() + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + + out = kwargs.get('out', ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (IntegerArray,)): + return NotImplemented + + if method == '__call__': + if ufunc.signature is None and ufunc.nout == 1: + args = [a._data for a in inputs] + masks = [a._mask for a in inputs] + result = ufunc(*args, **kwargs) + mask = np.logical_or.reduce(masks) + if result.dtype.kind in ('i', 'u'): + return IntegerArray(result, mask) + else: + result[mask] = np.nan + return result + + # fall back to array for other ufuncs + return np.array(self).__array_ufunc__( + ufunc, method, *inputs, **kwargs) + return NotImplemented + def __iter__(self): for i in range(len(self)): if self._mask[i]: diff --git a/pandas/core/series.py b/pandas/core/series.py index 7ebbe0dfb4bb7..074cc201f24a9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -623,6 +623,23 @@ def view(self, dtype=None): return self._constructor(self._values.view(dtype), index=self.index).__finalize__(self) + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + inputs = tuple( + x._values if isinstance(x, type(self)) else x + for x in inputs + ) + if hasattr(self._values, '__array_ufunc__'): + result = self._values.__array_ufunc__( + ufunc, method, *inputs, **kwargs) + else: + result = np.array(self._values).__array_ufunc__( + ufunc, method, *inputs, **kwargs) + if result is NotImplemented: + raise TypeError("The '{0}' operation is not supported for " + "dtype {1}.".format(ufunc.__name__, self.dtype)) + return self._constructor(result, index=self.index, + copy=False).__finalize__(self) + def __array__(self, result=None): """ the array interface, return my values @@ -640,10 +657,9 @@ def __array_prepare__(self, result, context=None): """ Gets called prior to a ufunc """ - # nice error message for non-ufunc types if (context is not None and - not isinstance(self._values, (np.ndarray, ABCSparseArray))): + not isinstance(self._values, (np.ndarray, ExtensionArray))): obj = context[1][0] raise TypeError("{obj} with dtype {dtype} cannot perform " "the numpy op {op}".format( diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index e6dae0ffaec28..316d8b9196891 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -690,6 +690,43 @@ def test_astype_nansafe(): arr.astype('uint32') +@pytest.mark.parametrize( + 'ufunc', [np.abs, ]) +def test_ufuncs_single_int(ufunc): + a = integer_array([1, 2, -3, np.nan]) + result = ufunc(a) + expected = integer_array(ufunc(a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(integer_array(ufunc(a.astype(float)))) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + 'ufunc', [np.log, np.exp, np.sin, np.cos, np.sqrt]) +def test_ufuncs_single_float(ufunc): + a = integer_array([1, 2, -3, np.nan]) + result = ufunc(a) + expected = ufunc(a.astype(float)) + tm.assert_numpy_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = ufunc(s.astype(float)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + 'ufunc', [np.add, np.subtract]) +def test_ufuncs_binary_int(ufunc): + a = integer_array([1, 2, -3, np.nan]) + result = ufunc(a, a) + expected = integer_array(ufunc(a.astype(float), a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + # TODO(jreback) - these need testing / are broken # shift From c5a4664136e6f53f9ec4140c5272d82daaa7b4a8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 15:13:58 +0200 Subject: [PATCH 02/48] expand IntegerArray.__array_ufunc__ --- pandas/core/arrays/integer.py | 71 +++++++++++++++++++++++------ pandas/tests/arrays/test_integer.py | 37 +++++++++++++-- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index a580b09ba8df9..1518c035f005a 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -8,7 +8,7 @@ from pandas._libs import lib from pandas.util._decorators import cache_readonly from pandas.compat import u, range, string_types -from pandas.compat import set_function_name +from pandas.compat import set_function_name, PY2 from pandas.core import nanops from pandas.core.dtypes.cast import astype_nansafe @@ -304,22 +304,67 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if not isinstance(x, self._HANDLED_TYPES + (IntegerArray,)): return NotImplemented - if method == '__call__': - if ufunc.signature is None and ufunc.nout == 1: - args = [a._data for a in inputs] - masks = [a._mask for a in inputs] - result = ufunc(*args, **kwargs) - mask = np.logical_or.reduce(masks) - if result.dtype.kind in ('i', 'u'): - return IntegerArray(result, mask) - else: - result[mask] = np.nan - return result + special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', + 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder'} + if PY2: + special.add('div') + aliases = { + 'subtract': 'sub', + 'multiply': 'mul', + 'floor_divide': 'floordiv', + 'true_divide': 'truediv', + 'power': 'pow', + 'remainder': 'mod', + 'divide': 'div', + 'equal': 'eq', + 'not_equal': 'ne', + 'less': 'lt', + 'less_equal': 'le', + 'greater': 'gt', + 'greater_equal': 'ge', + } + + flipped = { + 'lt': '__gt__', + 'le': '__ge__', + 'gt': '__lt__', + 'ge': '__le__', + 'eq': '__eq__', + 'ne': '__ne__', + } + + op_name = ufunc.__name__ + op_name = aliases.get(op_name, op_name) + + if (method == '__call__' and op_name in special + and kwargs.get('out') is None): + if isinstance(inputs[0], type(self)): + return getattr(self, '__{}__'.format(op_name))(inputs[1]) + else: + name = flipped.get(op_name, '__r{}__'.format(op_name)) + return getattr(self, name)(inputs[0]) + + if (method == '__call__' + and ufunc.signature is None + and ufunc.nout == 1): + # only supports IntegerArray for now + args = [a._data for a in inputs] + masks = [a._mask for a in inputs] + result = ufunc(*args, **kwargs) + mask = np.logical_or.reduce(masks) + if result.dtype.kind in ('i', 'u'): + return IntegerArray(result, mask) + else: + result[mask] = np.nan + return result # fall back to array for other ufuncs + inputs = tuple( + np.array(x) if isinstance(x, type(self)) else x + for x in inputs + ) return np.array(self).__array_ufunc__( ufunc, method, *inputs, **kwargs) - return NotImplemented def __iter__(self): for i in range(len(self)): diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 316d8b9196891..eae475a0ef78a 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -691,7 +691,7 @@ def test_astype_nansafe(): @pytest.mark.parametrize( - 'ufunc', [np.abs, ]) + 'ufunc', [np.abs, np.positive, np.negative]) def test_ufuncs_single_int(ufunc): a = integer_array([1, 2, -3, np.nan]) result = ufunc(a) @@ -708,24 +708,51 @@ def test_ufuncs_single_int(ufunc): 'ufunc', [np.log, np.exp, np.sin, np.cos, np.sqrt]) def test_ufuncs_single_float(ufunc): a = integer_array([1, 2, -3, np.nan]) - result = ufunc(a) - expected = ufunc(a.astype(float)) + with np.errstate(invalid='ignore'): + result = ufunc(a) + expected = ufunc(a.astype(float)) tm.assert_numpy_array_equal(result, expected) s = pd.Series(a) - result = ufunc(s) - expected = ufunc(s.astype(float)) + with np.errstate(invalid='ignore'): + result = ufunc(s) + expected = ufunc(s.astype(float)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( 'ufunc', [np.add, np.subtract]) def test_ufuncs_binary_int(ufunc): + # two IntegerArrays a = integer_array([1, 2, -3, np.nan]) result = ufunc(a, a) expected = integer_array(ufunc(a.astype(float), a.astype(float))) tm.assert_extension_array_equal(result, expected) + # IntegerArray with numpy array + arr = np.array([1, 2, 3, 4]) + result = ufunc(a, arr) + expected = integer_array(ufunc(a.astype(float), arr)) + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = integer_array(ufunc(arr, a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with scalar + result = ufunc(a, 1) + expected = integer_array(ufunc(a.astype(float), 1)) + tm.assert_extension_array_equal(result, expected) + + result = ufunc(1, a) + expected = integer_array(ufunc(1, a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + +def test_ufunc_fallback(): + a = integer_array([1, 2, -3, np.nan]) + assert pd.isna(np.add.reduce(a)) + # TODO(jreback) - these need testing / are broken From dd332a47042bbd8b58ac2188da57fe40a62172a3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 15:48:54 +0200 Subject: [PATCH 03/48] fix Series.__array_ufunc__ and consolidate dispatch --- pandas/core/arrays/integer.py | 44 ++++------------------------------- pandas/core/arrays/sparse.py | 44 +++++------------------------------ pandas/core/ops.py | 43 ++++++++++++++++++++++++++++++++++ pandas/core/series.py | 6 +++++ 4 files changed, 60 insertions(+), 77 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 1518c035f005a..8997089648255 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -304,45 +304,11 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if not isinstance(x, self._HANDLED_TYPES + (IntegerArray,)): return NotImplemented - special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', - 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder'} - if PY2: - special.add('div') - aliases = { - 'subtract': 'sub', - 'multiply': 'mul', - 'floor_divide': 'floordiv', - 'true_divide': 'truediv', - 'power': 'pow', - 'remainder': 'mod', - 'divide': 'div', - 'equal': 'eq', - 'not_equal': 'ne', - 'less': 'lt', - 'less_equal': 'le', - 'greater': 'gt', - 'greater_equal': 'ge', - } - - flipped = { - 'lt': '__gt__', - 'le': '__ge__', - 'gt': '__lt__', - 'ge': '__le__', - 'eq': '__eq__', - 'ne': '__ne__', - } - - op_name = ufunc.__name__ - op_name = aliases.get(op_name, op_name) - - if (method == '__call__' and op_name in special - and kwargs.get('out') is None): - if isinstance(inputs[0], type(self)): - return getattr(self, '__{}__'.format(op_name))(inputs[1]) - else: - name = flipped.get(op_name, '__r{}__'.format(op_name)) - return getattr(self, name)(inputs[0]) + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not None: + return result if (method == '__call__' and ufunc.signature is None diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 920a9f8286f0d..5fe183abd309b 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -47,6 +47,7 @@ from pandas._libs import lib import pandas.core.algorithms as algos import pandas.io.formats.printing as printing +import pandas.core.ops as ops # ---------------------------------------------------------------------------- @@ -1447,44 +1448,11 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if not isinstance(x, self._HANDLED_TYPES + (SparseArray,)): return NotImplemented - special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', - 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder'} - if compat.PY2: - special.add('div') - aliases = { - 'subtract': 'sub', - 'multiply': 'mul', - 'floor_divide': 'floordiv', - 'true_divide': 'truediv', - 'power': 'pow', - 'remainder': 'mod', - 'divide': 'div', - 'equal': 'eq', - 'not_equal': 'ne', - 'less': 'lt', - 'less_equal': 'le', - 'greater': 'gt', - 'greater_equal': 'ge', - } - - flipped = { - 'lt': '__gt__', - 'le': '__ge__', - 'gt': '__lt__', - 'ge': '__le__', - 'eq': '__eq__', - 'ne': '__ne__', - } - - op_name = ufunc.__name__ - op_name = aliases.get(op_name, op_name) - - if op_name in special and kwargs.get('out') is None: - if isinstance(inputs[0], type(self)): - return getattr(self, '__{}__'.format(op_name))(inputs[1]) - else: - name = flipped.get(op_name, '__r{}__'.format(op_name)) - return getattr(self, name)(inputs[0]) + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not None: + return result if len(inputs) == 1: # No alignment necessary. diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 42d9fbd4b7585..d4c8e6d100c3b 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2146,3 +2146,46 @@ def wrapper(self, other): wrapper.__name__ = op_name return wrapper + + +def maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs): + + special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', + 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder'} + if compat.PY2: + special.add('div') + aliases = { + 'subtract': 'sub', + 'multiply': 'mul', + 'floor_divide': 'floordiv', + 'true_divide': 'truediv', + 'power': 'pow', + 'remainder': 'mod', + 'divide': 'div', + 'equal': 'eq', + 'not_equal': 'ne', + 'less': 'lt', + 'less_equal': 'le', + 'greater': 'gt', + 'greater_equal': 'ge', + } + + flipped = { + 'lt': '__gt__', + 'le': '__ge__', + 'gt': '__lt__', + 'ge': '__le__', + 'eq': '__eq__', + 'ne': '__ne__', + } + + op_name = ufunc.__name__ + op_name = aliases.get(op_name, op_name) + + if (method == '__call__' and op_name in special + and kwargs.get('out') is None): + if isinstance(inputs[0], type(self)): + return getattr(self, '__{}__'.format(op_name))(inputs[1]) + else: + name = flipped.get(op_name, '__r{}__'.format(op_name)) + return getattr(self, name)(inputs[0]) diff --git a/pandas/core/series.py b/pandas/core/series.py index 074cc201f24a9..d59fc72c297c3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -628,6 +628,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): x._values if isinstance(x, type(self)) else x for x in inputs ) + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not None: + return result + if hasattr(self._values, '__array_ufunc__'): result = self._values.__array_ufunc__( ufunc, method, *inputs, **kwargs) From 71c058e6a57a85953a6e31463b3ffc1289275325 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 15:58:31 +0200 Subject: [PATCH 04/48] test Series array_ufunc fallback to numpy array for DecimalArray --- pandas/tests/extension/decimal/test_decimal.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index be1c61166e4b1..084abee49fb92 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -364,3 +364,11 @@ def test_divmod_array(reverse, expected_div, expected_mod): tm.assert_extension_array_equal(div, expected_div) tm.assert_extension_array_equal(mod, expected_mod) + + +def test_ufunc_fallback(data): + a = data[:5] + s = pd.Series(a, index=range(3, 8)) + result = np.abs(s) + expected = pd.Series(np.abs(a.astype(object)), index=range(3, 8)) + tm.assert_series_equal(result, expected) From a0d11d9d2fcfabe06e9422aed2366733a71a1345 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 16:06:21 +0200 Subject: [PATCH 05/48] fix import --- pandas/core/arrays/integer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 8997089648255..7204471a9ac16 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -8,9 +8,10 @@ from pandas._libs import lib from pandas.util._decorators import cache_readonly from pandas.compat import u, range, string_types -from pandas.compat import set_function_name, PY2 +from pandas.compat import set_function_name from pandas.core import nanops +from pandas.core import ops from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass from pandas.core.dtypes.common import ( From 4cfeb9b23b3ac5428307b3fab3f9e826292676b1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 17:18:18 +0200 Subject: [PATCH 06/48] first dispatch before getting underlying values (eg for Series[Period] underlying values are object array -> not always doing the correct thing) --- pandas/core/series.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d59fc72c297c3..cc15bcdf42afd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -624,16 +624,17 @@ def view(self, dtype=None): index=self.index).__finalize__(self) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - inputs = tuple( - x._values if isinstance(x, type(self)) else x - for x in inputs - ) # for binary ops, use our custom dunder methods result = ops.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs) if result is not None: return result + inputs = tuple( + x._values if isinstance(x, type(self)) else x + for x in inputs + ) + if hasattr(self._values, '__array_ufunc__'): result = self._values.__array_ufunc__( ufunc, method, *inputs, **kwargs) From 607f8a6c0862010af731f57d271fadbd3132c673 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 17:33:43 +0200 Subject: [PATCH 07/48] fix Categorical: disallow all ufunc apart from ops --- pandas/core/arrays/categorical.py | 14 ++++++++++++++ pandas/core/series.py | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 8735284617f31..d88994c37a50e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -39,6 +39,7 @@ NoNewAttributesMixin, _shared_docs) import pandas.core.common as com from pandas.core.missing import interpolate_2d +from pandas.core import ops from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, cache_readonly, deprecate_kwarg, Substitution) @@ -1271,6 +1272,19 @@ def __array__(self, dtype=None): ret = np.asarray(ret) return ret + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs) + if result is not None: + return result + # for all other cases, raise for now (similarly as what happens in + # Series.__array_prepare__) + raise TypeError("Object with dtype {dtype} cannot perform " + "the numpy op {op}".format( + dtype=self.dtype, + op=ufunc.__name__)) + def __setstate__(self, state): """Necessary for making this object picklable""" if not isinstance(state, dict): diff --git a/pandas/core/series.py b/pandas/core/series.py index cc15bcdf42afd..4fb95de95dad7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -666,7 +666,8 @@ def __array_prepare__(self, result, context=None): """ # nice error message for non-ufunc types if (context is not None and - not isinstance(self._values, (np.ndarray, ExtensionArray))): + (not isinstance(self._values, (np.ndarray, ExtensionArray)) + or isinstance(self._values, Categorical))): obj = context[1][0] raise TypeError("{obj} with dtype {dtype} cannot perform " "the numpy op {op}".format( From c4fcae7d10a1b1501f975ad013e54732643106d8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 18:05:34 +0200 Subject: [PATCH 08/48] simplify calling ufunc on underlying values --- pandas/core/arrays/integer.py | 3 +-- pandas/core/series.py | 10 +--------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 7204471a9ac16..46c4dd0daf9c1 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -330,8 +330,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): np.array(x) if isinstance(x, type(self)) else x for x in inputs ) - return np.array(self).__array_ufunc__( - ufunc, method, *inputs, **kwargs) + return getattr(ufunc, method)(*inputs, **kwargs) def __iter__(self): for i in range(len(self)): diff --git a/pandas/core/series.py b/pandas/core/series.py index 4fb95de95dad7..2a26f41b9d7c7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -635,15 +635,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): for x in inputs ) - if hasattr(self._values, '__array_ufunc__'): - result = self._values.__array_ufunc__( - ufunc, method, *inputs, **kwargs) - else: - result = np.array(self._values).__array_ufunc__( - ufunc, method, *inputs, **kwargs) - if result is NotImplemented: - raise TypeError("The '{0}' operation is not supported for " - "dtype {1}.".format(ufunc.__name__, self.dtype)) + result = getattr(ufunc, method)(*inputs, **kwargs) return self._constructor(result, index=self.index, copy=False).__finalize__(self) From 65dea1bfd464b9d2f0110faca90a1f8952c7e95d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Oct 2018 21:12:18 +0200 Subject: [PATCH 09/48] fix categorical not existing ops --- pandas/core/arrays/categorical.py | 2 +- pandas/core/arrays/integer.py | 2 +- pandas/core/arrays/sparse.py | 2 +- pandas/core/ops.py | 10 ++++++++-- pandas/core/series.py | 2 +- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d88994c37a50e..b9c6c75cee853 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1276,7 +1276,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # for binary ops, use our custom dunder methods result = ops.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs) - if result is not None: + if result is not NotImplemented: return result # for all other cases, raise for now (similarly as what happens in # Series.__array_prepare__) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 46c4dd0daf9c1..4030220855201 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -308,7 +308,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # for binary ops, use our custom dunder methods result = ops.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs) - if result is not None: + if result is not NotImplemented: return result if (method == '__call__' diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 5fe183abd309b..906586e3f3e89 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1451,7 +1451,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # for binary ops, use our custom dunder methods result = ops.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs) - if result is not None: + if result is not NotImplemented: return result if len(inputs) == 1: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index d4c8e6d100c3b..1bbe6f3704ed9 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2182,10 +2182,16 @@ def maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs): op_name = ufunc.__name__ op_name = aliases.get(op_name, op_name) + def not_implemented(*args, **kwargs): + return NotImplemented + if (method == '__call__' and op_name in special and kwargs.get('out') is None): if isinstance(inputs[0], type(self)): - return getattr(self, '__{}__'.format(op_name))(inputs[1]) + name = '__{}__'.format(op_name) + return getattr(self, name, not_implemented)(inputs[1]) else: name = flipped.get(op_name, '__r{}__'.format(op_name)) - return getattr(self, name)(inputs[0]) + return getattr(self, name, not_implemented)(inputs[0]) + else: + return NotImplemented diff --git a/pandas/core/series.py b/pandas/core/series.py index 2a26f41b9d7c7..bba109e4dff81 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -627,7 +627,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # for binary ops, use our custom dunder methods result = ops.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs) - if result is not None: + if result is not NotImplemented: return result inputs = tuple( From 134df141ea67814bd1e8d05df064b38560effecd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Oct 2018 12:01:57 +0200 Subject: [PATCH 10/48] np.positive not available for older numpy versions --- pandas/tests/arrays/test_integer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index eae475a0ef78a..cf8d17c99e16c 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -691,7 +691,7 @@ def test_astype_nansafe(): @pytest.mark.parametrize( - 'ufunc', [np.abs, np.positive, np.negative]) + 'ufunc', [np.abs, np.sign]) def test_ufuncs_single_int(ufunc): a = integer_array([1, 2, -3, np.nan]) result = ufunc(a) From 5239b70e53fda71631f7a74f003f52ff2031fb2e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Oct 2018 12:12:18 +0200 Subject: [PATCH 11/48] fix multiple return values --- pandas/core/series.py | 12 ++++++++++-- pandas/tests/arithmetic/test_numeric.py | 13 +++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bba109e4dff81..c50658418b073 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -636,8 +636,16 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ) result = getattr(ufunc, method)(*inputs, **kwargs) - return self._constructor(result, index=self.index, - copy=False).__finalize__(self) + + def construct_return(result): + return self._constructor(result, index=self.index, + copy=False).__finalize__(self) + + if type(result) is tuple: + # multiple return values + return tuple(construct_return(x) for x in result) + else: + return construct_return(result) def __array__(self, result=None): """ diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 25845dd8b3151..584cac229fc8c 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -833,6 +833,19 @@ def test_ufunc_coercions(self, holder): exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) + @pytest.mark.parametrize('holder', [pd.Int64Index, pd.UInt64Index, + pd.Float64Index, pd.Series]) + def test_ufunc_multiple_return_values(self, holder): + obj = holder([1, 2, 3], name='x') + box = pd.Series if holder is pd.Series else pd.Index + + result = np.modf(obj) + assert isinstance(result, tuple) + exp1 = pd.Float64Index([0., 0., 0.], name='x') + exp2 = pd.Float64Index([1., 2., 3.], name='x') + tm.assert_equal(result[0], tm.box_expected(exp1, box)) + tm.assert_equal(result[1], tm.box_expected(exp2, box)) + class TestObjectDtypeEquivalence(object): # Tests that arithmetic operations match operations executed elementwise From 3d9188509f91637d76734753f09c54decf261e46 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Oct 2018 13:29:58 +0200 Subject: [PATCH 12/48] skip IntegerArray tests for older numpy versions --- pandas/core/series.py | 1 + pandas/tests/arrays/test_integer.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index c50658418b073..d4066db00a94c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -630,6 +630,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if result is not NotImplemented: return result + import pdb; pdb.set_trace() inputs = tuple( x._values if isinstance(x, type(self)) else x for x in inputs diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index cf8d17c99e16c..1c77a0b856ebf 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -4,6 +4,7 @@ import pandas.util.testing as tm import pytest +from pandas import _np_version_under1p13 from pandas.api.types import is_integer, is_float, is_float_dtype, is_scalar from pandas.core.dtypes.generic import ABCIndexClass @@ -690,6 +691,7 @@ def test_astype_nansafe(): arr.astype('uint32') +@pytest.mark.skipif(_np_version_under1p13) @pytest.mark.parametrize( 'ufunc', [np.abs, np.sign]) def test_ufuncs_single_int(ufunc): @@ -704,6 +706,7 @@ def test_ufuncs_single_int(ufunc): tm.assert_series_equal(result, expected) +@pytest.mark.skipif(_np_version_under1p13) @pytest.mark.parametrize( 'ufunc', [np.log, np.exp, np.sin, np.cos, np.sqrt]) def test_ufuncs_single_float(ufunc): @@ -720,6 +723,7 @@ def test_ufuncs_single_float(ufunc): tm.assert_series_equal(result, expected) +@pytest.mark.skipif(_np_version_under1p13) @pytest.mark.parametrize( 'ufunc', [np.add, np.subtract]) def test_ufuncs_binary_int(ufunc): From 429f15c6839d7a114804a5f9ed98433d465a8023 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Oct 2018 13:37:54 +0200 Subject: [PATCH 13/48] also deal with no return value --- pandas/core/series.py | 3 +++ pandas/tests/arithmetic/test_numeric.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index d4066db00a94c..a096b594d54d6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -645,6 +645,9 @@ def construct_return(result): if type(result) is tuple: # multiple return values return tuple(construct_return(x) for x in result) + elif method == 'at': + # no return value + return None else: return construct_return(result) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 584cac229fc8c..613cf43f19d96 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -846,6 +846,12 @@ def test_ufunc_multiple_return_values(self, holder): tm.assert_equal(result[0], tm.box_expected(exp1, box)) tm.assert_equal(result[1], tm.box_expected(exp2, box)) + def test_ufunc_at(self): + s = pd.Series([0, 1, 2], index=[1, 2, 3], name='x') + np.add.at(s, [0, 2], 10) + expected = pd.Series([10, 1, 12], index=[1, 2, 3], name='x') + tm.assert_series_equal(s, expected) + class TestObjectDtypeEquivalence(object): # Tests that arithmetic operations match operations executed elementwise From 41f41580c87b565b7067273873b43df769541a33 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Oct 2018 14:43:39 +0200 Subject: [PATCH 14/48] clean-up debugging left-over --- pandas/core/series.py | 1 - pandas/tests/arrays/test_integer.py | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a096b594d54d6..f3b0ad34642d3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -630,7 +630,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if result is not NotImplemented: return result - import pdb; pdb.set_trace() inputs = tuple( x._values if isinstance(x, type(self)) else x for x in inputs diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 1c77a0b856ebf..f218da55c5562 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -691,7 +691,8 @@ def test_astype_nansafe(): arr.astype('uint32') -@pytest.mark.skipif(_np_version_under1p13) +@pytest.mark.skipif(_np_version_under1p13, + reason='ufunc protocol added in 1.13') @pytest.mark.parametrize( 'ufunc', [np.abs, np.sign]) def test_ufuncs_single_int(ufunc): @@ -706,7 +707,8 @@ def test_ufuncs_single_int(ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.skipif(_np_version_under1p13) +@pytest.mark.skipif(_np_version_under1p13, + reason='ufunc protocol added in 1.13') @pytest.mark.parametrize( 'ufunc', [np.log, np.exp, np.sin, np.cos, np.sqrt]) def test_ufuncs_single_float(ufunc): @@ -723,7 +725,8 @@ def test_ufuncs_single_float(ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.skipif(_np_version_under1p13) +@pytest.mark.skipif(_np_version_under1p13, + reason='ufunc protocol added in 1.13') @pytest.mark.parametrize( 'ufunc', [np.add, np.subtract]) def test_ufuncs_binary_int(ufunc): From 0d6a663ff896b4eca24cbbce3a433ed2de15cbcd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Jun 2019 09:52:24 -0500 Subject: [PATCH 15/48] TST: Additional tests for Series ufuncs This adds a set of tests for ufuncs on Series. The goal is to establish the correct behavior prior to implementing `Series.__array_ufunc__`. There are two kinds of xfails right now 1. Series[Sparse] fails because `Series.__array_ufunc__` doesn't yet dispatch to `Series.array.__array_ufunc__` 2. `ufunc(series, series)` when the two series are unaligned. It's been determined that these should align, but isn't currently implemented. --- pandas/tests/series/test_ufunc.py | 152 ++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 pandas/tests/series/test_ufunc.py diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py new file mode 100644 index 0000000000000..406fb950b2bf8 --- /dev/null +++ b/pandas/tests/series/test_ufunc.py @@ -0,0 +1,152 @@ +import string + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm + +UNARY_UFUNCS = [np.positive, np.floor, np.exp] +BINARY_UFUNCS = [np.add, np.logaddexp] # -> dunder op +SPARSE = [ + pytest.param(True, + marks=pytest.mark.xfail(reason="Series.__array_ufunc__")), + False, +] +SPARSE_IDS = ['sparse', 'dense'] +SHUFFLE = [ + pytest.param(True, marks=pytest.mark.xfail(reason="GH-26945")), + False +] + + +@pytest.fixture +def arrays_for_binary_ufunc(): + """ + A pair of random, length-100 integer-dtype arrays, that are mostly 0. + """ + a1 = np.random.randint(0, 10, 100) + a2 = np.random.randint(0, 10, 100) + a1[::3] = 0 + a2[::4] = 0 + return a1, a2 + + +@pytest.mark.parametrize("ufunc", UNARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_unary_ufunc(ufunc, sparse): + array = np.random.randint(0, 10, 10) + array[::2] = 0 + if sparse: + array = pd.SparseArray(array, dtype=pd.SparseDtype('int', 0)) + + index = list(string.ascii_letters[:10]) + name = "name" + series = pd.Series(array, index=index, name=name) + + result = ufunc(series) + expected = pd.Series(ufunc(array), index=index, name=name) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", SHUFFLE) +@pytest.mark.parametrize("box_other", [True, False]) +def test_binary_ufunc(ufunc, sparse, shuffle, box_other, + arrays_for_binary_ufunc): + # Check the invariant that + # ufunc(Series(a), Series(b)) == Series(ufunc(a, b)) + # with alignment. + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + + name = "name" + # TODO: verify name when the differ? Take the first? Drop? + s1 = pd.Series(a1, name=name) + s2 = pd.Series(a2, name=name) + + # handle shufling / alignment + # If boxing -- ufunc(series, series) -- then we don't need to shuffle + # the other array for the expected, since we align. + # If not boxing -- ufunc(series, array) -- then we do need to shuffle + # the other array, since we *dont'* align + idx = np.random.permutation(len(s1)) + if box_other and shuffle: + # ensure we align before applying the ufunc + s2 = s2.take(idx) + elif shuffle: + a2 = a2.take(idx) + + result = ufunc(s1, s2) + expected = pd.Series(ufunc(a1, a2), name=name) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False]) +def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): + array, _ = arrays_for_binary_ufunc + if sparse: + array = pd.SparseArray(array) + other = 2 + series = pd.Series(array, name="name") + + a, b = series, other + c, d = array, other + if flip: + c, d = b, c + a, b = b, a + + expected = pd.Series(ufunc(a, b), name="name") + result = pd.Series(ufunc(c, d), name="name") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.divmod]) # any others? +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", SHUFFLE) +@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") +def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, + arrays_for_binary_ufunc): + a1, a2 = arrays_for_binary_ufunc + + if sparse: + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + + s1 = pd.Series(a1) + s2 = pd.Series(a2) + + if shuffle: + # ensure we align before applying the ufunc + s2 = s2.sample(frac=1) + + expected = ufunc(a1, a2) + assert isinstance(expected, tuple) + + result = ufunc(s1, s2) + assert isinstance(result, tuple) + tm.assert_series_equal(result[0], pd.Series(expected[0])) + tm.assert_series_equal(result[1], pd.Series(expected[1])) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): + array, _ = arrays_for_binary_ufunc + + if sparse: + array = pd.SparseArray(array) + + series = pd.Series(array, name="name") + result = np.modf(series) + expected = np.modf(array) + + assert isinstance(result, tuple) + assert isinstance(expected, tuple) + + tm.assert_series_equal(result[0], pd.Series(expected[0], name="name")) + tm.assert_series_equal(result[1], pd.Series(expected[1], name="name")) From 8f46391a0ce923010fcc5ab78268efd3181a13cf Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Jun 2019 13:00:55 -0500 Subject: [PATCH 16/48] fixup release note --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e6bc422b52e89..a11e6dce7a9a3 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -769,7 +769,7 @@ Sparse - Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) - Bug in :class:`SparseDataFrame` when adding a column in which the length of values does not match length of index, ``AssertionError`` is raised instead of raising ``ValueError`` (:issue:`25484`) - Introduce a better error message in :meth:`Series.sparse.from_coo` so it returns a ``TypeError`` for inputs that are not coo matrices (:issue:`26554`) -- Bug in :func:`numpy.modf` on a :class:`SparseArray`. Now a tuple of :class:`SparseArray` is returned. +- Bug in :func:`numpy.modf` on a :class:`SparseArray`. Now a tuple of :class:`SparseArray` is returned (:issue:`26946`). Other ^^^^^ From 44e3c7ebe49839028443c2bdf05b7613f40a889a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Jun 2019 14:33:08 -0500 Subject: [PATCH 17/48] fixups --- pandas/tests/series/test_ufunc.py | 35 +++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 406fb950b2bf8..c45a5144d45e8 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -7,7 +7,10 @@ import pandas.util.testing as tm UNARY_UFUNCS = [np.positive, np.floor, np.exp] -BINARY_UFUNCS = [np.add, np.logaddexp] # -> dunder op +BINARY_UFUNCS = [ + np.add, # dunder op + np.logaddexp, +] SPARSE = [ pytest.param(True, marks=pytest.mark.xfail(reason="Series.__array_ufunc__")), @@ -52,8 +55,12 @@ def test_unary_ufunc(ufunc, sparse): @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", SHUFFLE) -@pytest.mark.parametrize("box_other", [True, False]) +@pytest.mark.parametrize("box_other", [True, False], + ids=['other-boxed', 'other-raw']) +@pytest.mark.parametrize("flip", [True, False], + ids=['flipped', 'straight']) def test_binary_ufunc(ufunc, sparse, shuffle, box_other, + flip, arrays_for_binary_ufunc): # Check the invariant that # ufunc(Series(a), Series(b)) == Series(ufunc(a, b)) @@ -80,8 +87,15 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, elif shuffle: a2 = a2.take(idx) - result = ufunc(s1, s2) - expected = pd.Series(ufunc(a1, a2), name=name) + a, b = s1, s2 + c, d = a1, a2 + + if flip: + a, b = b, a + c, d = d, c + + result = ufunc(a, b) + expected = pd.Series(ufunc(c, d), name=name) tm.assert_series_equal(result, expected) @@ -150,3 +164,16 @@ def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): tm.assert_series_equal(result[0], pd.Series(expected[0], name="name")) tm.assert_series_equal(result[1], pd.Series(expected[1], name="name")) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.xfail(reason="Series.__array_ufunc__") +def test_binary_ufunc_drops_series_name(ufunc, sparse, + arrays_for_binary_ufunc): + a1, a2 = arrays_for_binary_ufunc + s1 = pd.Series(a1, name='a') + s2 = pd.Series(a2, name='b') + + result = ufunc(s1, s2) + assert result.name is None From e1799138e7bcb2b6907b25ee47ac78d687251181 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Jun 2019 16:07:31 -0500 Subject: [PATCH 18/48] remove stale comment --- pandas/tests/series/test_ufunc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index c45a5144d45e8..beb29d4ee81d3 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -71,7 +71,6 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) name = "name" - # TODO: verify name when the differ? Take the first? Drop? s1 = pd.Series(a1, name=name) s2 = pd.Series(a2, name=name) @@ -171,6 +170,7 @@ def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): @pytest.mark.xfail(reason="Series.__array_ufunc__") def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc): + # Drop the names when they differ. a1, a2 = arrays_for_binary_ufunc s1 = pd.Series(a1, name='a') s2 = pd.Series(a2, name='b') From 0b1e745c05af118849a05e3112ec86d6eb244e7a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 20 Jun 2019 11:45:02 -0500 Subject: [PATCH 19/48] xfail ufunc(series, index) --- pandas/tests/series/test_ufunc.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index beb29d4ee81d3..d230c1e93e9ac 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -55,8 +55,7 @@ def test_unary_ufunc(ufunc, sparse): @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", SHUFFLE) -@pytest.mark.parametrize("box_other", [True, False], - ids=['other-boxed', 'other-raw']) +@pytest.mark.parametrize("box_other", ['series', 'index', 'raw']) @pytest.mark.parametrize("flip", [True, False], ids=['flipped', 'straight']) def test_binary_ufunc(ufunc, sparse, shuffle, box_other, @@ -72,7 +71,13 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, name = "name" s1 = pd.Series(a1, name=name) - s2 = pd.Series(a2, name=name) + if box_other == 'series': + s2 = pd.Series(a2, name=name) + elif box_other == 'index': + # Index should defer to Series + s2 = pd.Index(a2, naame=name) + else: + s2 = a2 # handle shufling / alignment # If boxing -- ufunc(series, series) -- then we don't need to shuffle @@ -95,6 +100,8 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, result = ufunc(a, b) expected = pd.Series(ufunc(c, d), name=name) + if box_other == 'index' and flip: + raise pytest.xfail("Index should defer to Series") tm.assert_series_equal(result, expected) From 9be1dfff323a30283c330e3f2438a033ecee0542 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 20 Jun 2019 13:50:41 -0500 Subject: [PATCH 20/48] 32-bit compat (cherry picked from commit ba5cb55e511ea484bf4a63d4c489767e5bed87a2) --- pandas/tests/series/test_ufunc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index d230c1e93e9ac..5695028a0fad0 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -28,8 +28,8 @@ def arrays_for_binary_ufunc(): """ A pair of random, length-100 integer-dtype arrays, that are mostly 0. """ - a1 = np.random.randint(0, 10, 100) - a2 = np.random.randint(0, 10, 100) + a1 = np.random.randint(0, 10, 100, dtype='int64') + a2 = np.random.randint(0, 10, 100, dtype='int64') a1[::3] = 0 a2[::4] = 0 return a1, a2 @@ -38,7 +38,7 @@ def arrays_for_binary_ufunc(): @pytest.mark.parametrize("ufunc", UNARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) def test_unary_ufunc(ufunc, sparse): - array = np.random.randint(0, 10, 10) + array = np.random.randint(0, 10, 10, dtype='int64') array[::2] = 0 if sparse: array = pd.SparseArray(array, dtype=pd.SparseDtype('int', 0)) From 775c2efde0f84dc7f1a2c214bc6d8a0bd21f1d4d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 20 Jun 2019 15:03:24 -0500 Subject: [PATCH 21/48] fixup --- pandas/tests/series/test_ufunc.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 5695028a0fad0..f91d648464a3d 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -18,7 +18,8 @@ ] SPARSE_IDS = ['sparse', 'dense'] SHUFFLE = [ - pytest.param(True, marks=pytest.mark.xfail(reason="GH-26945")), + pytest.param(True, marks=pytest.mark.xfail(reason="GH-26945", + strict=False)), False ] @@ -79,17 +80,15 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, else: s2 = a2 - # handle shufling / alignment - # If boxing -- ufunc(series, series) -- then we don't need to shuffle - # the other array for the expected, since we align. - # If not boxing -- ufunc(series, array) -- then we do need to shuffle - # the other array, since we *dont'* align idx = np.random.permutation(len(s1)) - if box_other and shuffle: - # ensure we align before applying the ufunc + + if shuffle: s2 = s2.take(idx) - elif shuffle: - a2 = a2.take(idx) + if box_other != 'series': + # when other is a Series, we align, so we don't + # need to shuffle the array for expected. In all + # other cases, we do. + a2 = a2.take(idx) a, b = s1, s2 c, d = a1, a2 From 4d7f249b8c2ae4f11a56e652d02d8b7565991d6c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 20 Jun 2019 14:16:01 -0500 Subject: [PATCH 22/48] wip --- pandas/core/series.py | 47 +++++++- pandas/tests/series/test_ufunc.py | 192 ++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+), 6 deletions(-) create mode 100644 pandas/tests/series/test_ufunc.py diff --git a/pandas/core/series.py b/pandas/core/series.py index 4a934705f28e4..d8aa5cf0669cb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3,6 +3,7 @@ """ from collections import OrderedDict from io import StringIO +import numbers from shutil import get_terminal_size from textwrap import dedent import warnings @@ -700,24 +701,58 @@ def view(self, dtype=None): # ---------------------------------------------------------------------- # NDArray Compat + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray, numbers.Number) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # for binary ops, use our custom dunder methods + from pandas.core.internals.construction import extract_array + result = ops.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs) if result is not NotImplemented: return result - inputs = tuple( - x._values if isinstance(x, type(self)) else x - for x in inputs - ) + # align all the inputs. + # TODO: is there a more efficient way to do this? + types = tuple(type(x) for x in inputs) + series = [x for x, t in zip(inputs, types) if issubclass(t, Series)] + names = [getattr(x, 'name') for x in inputs if hasattr(x, 'name')] + + if len(series) > 1: + index = series[0].index + for s in series[1:]: + index |= s.index + inputs = [x.reindex(index) for x, t in zip(inputs, types) + if issubclass(t, Series)] + else: + index = self.index + + # Type checks: can we do this, given the inputs. + # It's expected that the following classes defer to us when + # any Series is present. + # 1. Index. + # 2. ExtensionArray. + + inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) + handled_types = sum([getattr(x, '_HANDLED_TYPES', ()) for x in inputs], + self._HANDLED_TYPES + (Series,)) + if not all(isinstance(t, handled_types) for t in inputs): + # there's an unknown object present. Bail out. + # TODO: Handle Series[object] + return NotImplemented result = getattr(ufunc, method)(*inputs, **kwargs) + if len(set(names)) == 1: + # we require names to be hashable, right? + name = names[0] + else: + name = None def construct_return(result): - return self._constructor(result, index=self.index, - copy=False).__finalize__(self) + return self._constructor(result, + index=index, + name=name, + copy=False) if type(result) is tuple: # multiple return values diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py new file mode 100644 index 0000000000000..d077e4fc6bf94 --- /dev/null +++ b/pandas/tests/series/test_ufunc.py @@ -0,0 +1,192 @@ +import string + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm + +UNARY_UFUNCS = [np.positive, np.floor, np.exp] +BINARY_UFUNCS = [ + np.add, # dunder op + np.logaddexp, +] +SPARSE = [ + True, + False +] +SPARSE_IDS = ['sparse', 'dense'] +SHUFFLE = [ + True, + False +] + + +@pytest.fixture +def arrays_for_binary_ufunc(): + """ + A pair of random, length-100 integer-dtype arrays, that are mostly 0. + """ + a1 = np.random.randint(0, 10, 100, dtype='int64') + a2 = np.random.randint(0, 10, 100, dtype='int64') + a1[::3] = 0 + a2[::4] = 0 + return a1, a2 + + +@pytest.mark.parametrize("ufunc", UNARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_unary_ufunc(ufunc, sparse): + array = np.random.randint(0, 10, 10, dtype='int64') + array[::2] = 0 + if sparse: + array = pd.SparseArray(array, dtype=pd.SparseDtype('int', 0)) + + index = list(string.ascii_letters[:10]) + name = "name" + series = pd.Series(array, index=index, name=name) + + result = ufunc(series) + expected = pd.Series(ufunc(array), index=index, name=name) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", SHUFFLE) +@pytest.mark.parametrize("box_other", ['series', 'index', 'raw']) +@pytest.mark.parametrize("flip", [True, False], + ids=['flipped', 'straight']) +def test_binary_ufunc(ufunc, sparse, shuffle, box_other, + flip, + arrays_for_binary_ufunc): + # Check the invariant that + # ufunc(Series(a), Series(b)) == Series(ufunc(a, b)) + # with alignment. + + pd.options.display.max_rows = 5 + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + + name = "name" + s1 = pd.Series(a1, name=name) + if box_other == 'series': + s2 = pd.Series(a2, name=name) + elif box_other == 'index': + # Index should defer to Series + # astype for https://github.com/pandas-dev/pandas/issues/26972 + s2 = pd.Index(a2, name=name).astype('int64') + else: + s2 = a2 + + idx = np.random.permutation(len(s1)) + + if shuffle: + s2 = s2.take(idx) + if box_other != 'series': + # when other is a Series, we align, so we don't + # need to shuffle the array for expected. In all + # other cases, we do. + a2 = a2.take(idx) + + a, b = s1, s2 + c, d = a1, a2 + + if flip: + a, b = b, a + c, d = d, c + + result = ufunc(a, b) + if shuffle and box_other != 'series': + index = s1.index + else: + # shuffle & union or no alignment + index = np.arange(len(s1)) + + expected = pd.Series(ufunc(c, d), name=name, index=index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False]) +def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): + array, _ = arrays_for_binary_ufunc + if sparse: + array = pd.SparseArray(array) + other = 2 + series = pd.Series(array, name="name") + + a, b = series, other + c, d = array, other + if flip: + c, d = b, c + a, b = b, a + + expected = pd.Series(ufunc(c, d), name="name") + result = ufunc(a, b) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.divmod]) # any others? +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", SHUFFLE) +@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") +def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, + arrays_for_binary_ufunc): + if sparse and ufunc is np.divmod: + pytest.skip("sparse divmod not implemented.") + + a1, a2 = arrays_for_binary_ufunc + + if sparse: + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + + s1 = pd.Series(a1) + s2 = pd.Series(a2) + + if shuffle: + # ensure we align before applying the ufunc + s2 = s2.sample(frac=1) + + expected = ufunc(a1, a2) + assert isinstance(expected, tuple) + + result = ufunc(s1, s2) + assert isinstance(result, tuple) + tm.assert_series_equal(result[0], pd.Series(expected[0])) + tm.assert_series_equal(result[1], pd.Series(expected[1])) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): + array, _ = arrays_for_binary_ufunc + + if sparse: + array = pd.SparseArray(array) + + series = pd.Series(array, name="name") + result = np.modf(series) + expected = np.modf(array) + + assert isinstance(result, tuple) + assert isinstance(expected, tuple) + + tm.assert_series_equal(result[0], pd.Series(expected[0], name="name")) + tm.assert_series_equal(result[1], pd.Series(expected[1], name="name")) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +def test_binary_ufunc_drops_series_name(ufunc, sparse, + arrays_for_binary_ufunc): + # Drop the names when they differ. + a1, a2 = arrays_for_binary_ufunc + s1 = pd.Series(a1, name='a') + s2 = pd.Series(a2, name='b') + + result = ufunc(s1, s2) + assert result.name is None From 0b359d76889cc79a3db7a00dac66697a182d3a79 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Jun 2019 13:00:55 -0500 Subject: [PATCH 23/48] fixup release note --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e6bc422b52e89..a11e6dce7a9a3 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -769,7 +769,7 @@ Sparse - Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) - Bug in :class:`SparseDataFrame` when adding a column in which the length of values does not match length of index, ``AssertionError`` is raised instead of raising ``ValueError`` (:issue:`25484`) - Introduce a better error message in :meth:`Series.sparse.from_coo` so it returns a ``TypeError`` for inputs that are not coo matrices (:issue:`26554`) -- Bug in :func:`numpy.modf` on a :class:`SparseArray`. Now a tuple of :class:`SparseArray` is returned. +- Bug in :func:`numpy.modf` on a :class:`SparseArray`. Now a tuple of :class:`SparseArray` is returned (:issue:`26946`). Other ^^^^^ From 64d8908f1a67780d3d952acab676a99c3d4c02dc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 09:20:20 -0500 Subject: [PATCH 24/48] more --- pandas/tests/series/test_ufunc.py | 83 +++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index f91d648464a3d..2e9e331c7542f 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -53,10 +53,71 @@ def test_unary_ufunc(ufunc, sparse): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False], ids=['flipped', 'straight']) +def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): + # Test that ufunc(Series(a), array) == Series(ufunc(a, b)) + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + + name = "name" # op(Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = a2 + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = reversed(array_args) + series_args = reversed(series_args) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [ + pytest.param(True, marks=pytest.mark.xfail(reason="Index should defer")), + False +], ids=['flipped', 'straight']) +def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): + # Test that + # * func(Series(a), Series(b)) == Series(ufunc(a, b)) + # * ufunc(Index, Series) dispatches to Series (returns a Series) + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + + name = "name" # op(Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = pd.Index(a2, name=name).astype("int64") + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = reversed(array_args) + series_args = reversed(series_args) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +def test_binary_ufunc_with_series(): + pass + + @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", SHUFFLE) -@pytest.mark.parametrize("box_other", ['series', 'index', 'raw']) +@pytest.mark.parametrize("box_other", ['series', 'index', 'ndarray']) @pytest.mark.parametrize("flip", [True, False], ids=['flipped', 'straight']) def test_binary_ufunc(ufunc, sparse, shuffle, box_other, @@ -76,7 +137,9 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, s2 = pd.Series(a2, name=name) elif box_other == 'index': # Index should defer to Series - s2 = pd.Index(a2, naame=name) + # astype for https://github.com/pandas-dev/pandas/issues/26972 + s2 = pd.Index(a2, name=name).astype('int64') + else: s2 = a2 @@ -98,7 +161,13 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, c, d = d, c result = ufunc(a, b) - expected = pd.Series(ufunc(c, d), name=name) + if shuffle and box_other != 'series': + index = s1.index + else: + # shuffle & union or no alignment + index = np.arange(len(s1)) + + expected = pd.Series(ufunc(c, d), name=name, index=index) if box_other == 'index' and flip: raise pytest.xfail("Index should defer to Series") tm.assert_series_equal(result, expected) @@ -120,8 +189,9 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): c, d = b, c a, b = b, a - expected = pd.Series(ufunc(a, b), name="name") - result = pd.Series(ufunc(c, d), name="name") + expected = pd.Series(ufunc(c, d), name="name") + result = ufunc(a, b) + tm.assert_series_equal(result, expected) @@ -131,6 +201,9 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc): + if sparse and ufunc is np.divmod: + pytest.skip("sparse divmod not implemented.") + a1, a2 = arrays_for_binary_ufunc if sparse: From d1788b01c0be9de6b1a8ed71f47004a860322a2b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 10:15:58 -0500 Subject: [PATCH 25/48] lint --- pandas/tests/series/test_ufunc.py | 99 +++++++++++++++---------------- 1 file changed, 47 insertions(+), 52 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 2e9e331c7542f..05d19452b1eac 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -39,6 +39,7 @@ def arrays_for_binary_ufunc(): @pytest.mark.parametrize("ufunc", UNARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) def test_unary_ufunc(ufunc, sparse): + # Test that ufunc(Series) == Series(ufunc) array = np.random.randint(0, 10, 10, dtype='int64') array[::2] = 0 if sparse: @@ -110,66 +111,51 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): tm.assert_series_equal(result, expected) -def test_binary_ufunc_with_series(): - pass - - @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) -@pytest.mark.parametrize("shuffle", SHUFFLE) -@pytest.mark.parametrize("box_other", ['series', 'index', 'ndarray']) -@pytest.mark.parametrize("flip", [True, False], - ids=['flipped', 'straight']) -def test_binary_ufunc(ufunc, sparse, shuffle, box_other, - flip, - arrays_for_binary_ufunc): - # Check the invariant that - # ufunc(Series(a), Series(b)) == Series(ufunc(a, b)) - # with alignment. +@pytest.mark.parametrize("shuffle", [True, False], ids=['unaligned', + 'aligned']) +@pytest.mark.parametrize("flip", [True, False], ids=['flipped', 'straight']) +def test_binary_ufunc_with_series(flip, shuffle, sparse, ufunc, + arrays_for_binary_ufunc): + # Test that + # * func(Series(a), Series(b)) == Series(ufunc(a, b)) + # with alignment between the indices + + if flip and shuffle: + pytest.xfail(reason="Fix with Series.__array_ufunc__") + a1, a2 = arrays_for_binary_ufunc if sparse: a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) - name = "name" - s1 = pd.Series(a1, name=name) - if box_other == 'series': - s2 = pd.Series(a2, name=name) - elif box_other == 'index': - # Index should defer to Series - # astype for https://github.com/pandas-dev/pandas/issues/26972 - s2 = pd.Index(a2, name=name).astype('int64') - - else: - s2 = a2 + name = "name" # op(Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = pd.Series(a2, name=name) - idx = np.random.permutation(len(s1)) + idx = np.random.permutation(len(a1)) if shuffle: - s2 = s2.take(idx) - if box_other != 'series': - # when other is a Series, we align, so we don't - # need to shuffle the array for expected. In all - # other cases, we do. - a2 = a2.take(idx) + other = other.take(idx) + a2 = a2.take(idx) + # alignment, so the expected index is the first index in the op. + if flip: + index = other.align(series)[0].index + else: + index = series.align(other)[0].index + else: + index = series.index - a, b = s1, s2 - c, d = a1, a2 + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) if flip: - a, b = b, a - c, d = d, c - - result = ufunc(a, b) - if shuffle and box_other != 'series': - index = s1.index - else: - # shuffle & union or no alignment - index = np.arange(len(s1)) + array_args = tuple(reversed(array_args)) + series_args = tuple(reversed(series_args)) # ufunc(array, series) - expected = pd.Series(ufunc(c, d), name=name, index=index) - if box_other == 'index' and flip: - raise pytest.xfail("Index should defer to Series") + expected = pd.Series(ufunc(*array_args), index=index, name=name) + result = ufunc(*series_args) tm.assert_series_equal(result, expected) @@ -177,20 +163,24 @@ def test_binary_ufunc(ufunc, sparse, shuffle, box_other, @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("flip", [True, False]) def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): + # Test that + # * ufunc(Series, scalar) == Series(ufunc(array, scalar)) + # * ufunc(Series, scalar) == ufunc(scalar, Series) array, _ = arrays_for_binary_ufunc if sparse: array = pd.SparseArray(array) other = 2 series = pd.Series(array, name="name") - a, b = series, other - c, d = array, other + series_args = (series, other) + array_args = (array, other) + if flip: - c, d = b, c - a, b = b, a + series_args = tuple(reversed(series_args)) + array_args = tuple(reversed(array_args)) - expected = pd.Series(ufunc(c, d), name="name") - result = ufunc(a, b) + expected = pd.Series(ufunc(*array_args), name="name") + result = ufunc(*series_args) tm.assert_series_equal(result, expected) @@ -201,6 +191,9 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc): + # Test that + # the same conditions from binary_ufunc_scalar apply to + # ufuncs with multiple outputs. if sparse and ufunc is np.divmod: pytest.skip("sparse divmod not implemented.") @@ -228,6 +221,8 @@ def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): + # Test that the same conditions from unary input apply to multi-output + # ufuncs array, _ = arrays_for_binary_ufunc if sparse: From 971e3473eb41a48e5b3ac8cbcd809f28203af314 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 11:13:12 -0500 Subject: [PATCH 26/48] fixup! more --- pandas/core/ops.py | 10 +++++++++- pandas/tests/series/test_ufunc.py | 15 ++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index bf6d4561d783f..fbc2033e8ddec 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1634,12 +1634,20 @@ def _arith_method_SERIES(cls, op, special): def na_op(x, y): import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: result = masked_arith_op(x, y, op) - result = missing.fill_zeros(result, x, y, op_name, fill_zeros) + if isinstance(result, tuple): + # e.g. divmod + result = tuple( + missing.fill_zeros(x, x, y, op_name, fill_zeros) + for x in result + ) + else: + result = missing.fill_zeros(result, x, y, op_name, fill_zeros) return result def safe_na_op(lvalues, rvalues): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 10362da3a9e74..32ee6853c0457 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -80,10 +80,7 @@ def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) -@pytest.mark.parametrize("flip", [ - pytest.param(True, marks=pytest.mark.xfail(reason="Index should defer")), - False -], ids=['flipped', 'straight']) +@pytest.mark.parametrize("flip", [True, False], ids=['flipped', 'straight']) def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): # Test that # * func(Series(a), Series(b)) == Series(ufunc(a, b)) @@ -119,10 +116,6 @@ def test_binary_ufunc_with_series(flip, shuffle, sparse, ufunc, # Test that # * func(Series(a), Series(b)) == Series(ufunc(a, b)) # with alignment between the indices - - if flip and shuffle: - pytest.xfail(reason="Fix with Series.__array_ufunc__") - a1, a2 = arrays_for_binary_ufunc if sparse: a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) @@ -136,8 +129,6 @@ def test_binary_ufunc_with_series(flip, shuffle, sparse, ufunc, if shuffle: other = other.take(idx) - a2 = a2.take(idx) - # alignment, so the expected index is the first index in the op. if flip: index = other.align(series)[0].index else: @@ -196,6 +187,9 @@ def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, pytest.skip("sparse divmod not implemented.") a1, a2 = arrays_for_binary_ufunc + # work around https://github.com/pandas-dev/pandas/issues/26987 + a1[a1 == 0] = 1 + a2[a2 == 0] = 1 if sparse: a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) @@ -239,7 +233,6 @@ def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("ufunc", BINARY_UFUNCS) -@pytest.mark.xfail(reason="Series.__array_ufunc__") def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc): # Drop the names when they differ. From 95e8aef3504f4fa66448dc2d72d9b1a1d9f64695 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 11:44:54 -0500 Subject: [PATCH 27/48] remove dead code --- pandas/core/series.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d8aa5cf0669cb..f99f8e8a7853c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -824,30 +824,6 @@ def __array__(self, dtype=None): dtype = 'M8[ns]' return np.asarray(self.array, dtype) - def __array_wrap__(self, result, context=None): - """ - Gets called after a ufunc. - """ - return self._constructor(result, index=self.index, - copy=False).__finalize__(self) - - def __array_prepare__(self, result, context=None): - """ - Gets called prior to a ufunc. - """ - - # nice error message for non-ufunc types - if (context is not None and - (not isinstance(self._values, (np.ndarray, ExtensionArray)) - or isinstance(self._values, Categorical))): - obj = context[1][0] - raise TypeError("{obj} with dtype {dtype} cannot perform " - "the numpy op {op}".format( - obj=type(obj).__name__, - dtype=getattr(obj, 'dtype', None), - op=context[0].__name__)) - return result - # ---------------------------------------------------------------------- # Unary Methods From 7bfd584b9be5b8a276274d4105618fbed17be3e9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 11:52:06 -0500 Subject: [PATCH 28/48] todos --- pandas/core/series.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index f99f8e8a7853c..09eb8a98da05f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -704,41 +704,45 @@ def view(self, dtype=None): _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray, numbers.Number) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # for binary ops, use our custom dunder methods + # TODO: handle DataFrame from pandas.core.internals.construction import extract_array + # for binary ops, use our custom dunder methods result = ops.maybe_dispatch_ufunc_to_dunder_op( self, ufunc, method, *inputs, **kwargs) if result is not NotImplemented: return result # align all the inputs. - # TODO: is there a more efficient way to do this? - types = tuple(type(x) for x in inputs) - series = [x for x, t in zip(inputs, types) if issubclass(t, Series)] names = [getattr(x, 'name') for x in inputs if hasattr(x, 'name')] - - if len(series) > 1: - index = series[0].index - for s in series[1:]: + types = tuple(type(x) for x in inputs) + # TODO: dataframe + alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)] + + if len(alignable) > 1: + # This triggers alignment. + # TODO: I'm sure there's a better way to get the expected index. + index = alignable[0].index + for s in alignable[1:]: index |= s.index inputs = [x.reindex(index) for x, t in zip(inputs, types) if issubclass(t, Series)] else: index = self.index - # Type checks: can we do this, given the inputs. + # dtype check: can we do this, given the inputs? # It's expected that the following classes defer to us when - # any Series is present. - # 1. Index. - # 2. ExtensionArray. + # any Series is present in inputes. + # 1. Index. + # 2. ExtensionArray. inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) handled_types = sum([getattr(x, '_HANDLED_TYPES', ()) for x in inputs], self._HANDLED_TYPES + (Series,)) - if not all(isinstance(t, handled_types) for t in inputs): - # there's an unknown object present. Bail out. - # TODO: Handle Series[object] + any_object = any(getattr(x, 'dtype', None) == 'object' for x in inputs) + # defer when an unknown object and not object dtype. + if (not all(isinstance(t, handled_types) for t in inputs) and + not any_object): return NotImplemented result = getattr(ufunc, method)(*inputs, **kwargs) From feee01566f227423033a461bdbfb6eafd1f5964d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 15:06:05 -0500 Subject: [PATCH 29/48] remove compat --- pandas/tests/arrays/test_integer.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 92b49878c3718..537ad533c412d 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -4,7 +4,6 @@ from pandas.core.dtypes.generic import ABCIndexClass import pandas as pd -from pandas import _np_version_under1p13 from pandas.api.types import is_float, is_float_dtype, is_integer, is_scalar from pandas.core.arrays import IntegerArray, integer_array from pandas.core.arrays.integer import ( @@ -718,8 +717,6 @@ def test_astype_nansafe(): arr.astype('uint32') -@pytest.mark.skipif(_np_version_under1p13, - reason='ufunc protocol added in 1.13') @pytest.mark.parametrize( 'ufunc', [np.abs, np.sign]) def test_ufuncs_single_int(ufunc): @@ -734,8 +731,6 @@ def test_ufuncs_single_int(ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.skipif(_np_version_under1p13, - reason='ufunc protocol added in 1.13') @pytest.mark.parametrize( 'ufunc', [np.log, np.exp, np.sin, np.cos, np.sqrt]) def test_ufuncs_single_float(ufunc): @@ -752,8 +747,6 @@ def test_ufuncs_single_float(ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.skipif(_np_version_under1p13, - reason='ufunc protocol added in 1.13') @pytest.mark.parametrize( 'ufunc', [np.add, np.subtract]) def test_ufuncs_binary_int(ufunc): From 3702b9b466386fc2aa91f9392a866d97ac4a9f22 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 15:11:00 -0500 Subject: [PATCH 30/48] object dtype tests --- pandas/tests/series/test_ufunc.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 32ee6853c0457..fcae7b4c1f422 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -242,3 +242,18 @@ def test_binary_ufunc_drops_series_name(ufunc, sparse, result = ufunc(s1, s2) assert result.name is None + + +def test_object_series_ok(): + class Dummy: + def __init__(self, value): + self.value = value + + def __add__(self, other): + return self.value + other.value + + arr = np.array([Dummy(0), Dummy(1)]) + ser = pd.Series(arr) + tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) + tm.assert_series_equal(np.add(ser, Dummy(1)), + pd.Series(np.add(ser, Dummy(1)))) From a0f84ed5e73d58bc215a8638d1c5217c7e611052 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 15:56:21 -0500 Subject: [PATCH 31/48] wip --- pandas/core/arrays/categorical.py | 1 + pandas/core/arrays/integer.py | 48 +++++++++++++++++------------ pandas/tests/arrays/test_integer.py | 4 +++ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 47a1ed9ae3f39..7bcaf88fdb65f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1313,6 +1313,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): self, ufunc, method, *inputs, **kwargs) if result is not NotImplemented: return result + # for all other cases, raise for now (similarly as what happens in # Series.__array_prepare__) raise TypeError("Object with dtype {dtype} cannot perform " diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 3db95ffe4d479..971d9d22de3bd 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -349,7 +349,8 @@ def __array__(self, dtype=None): _HANDLED_TYPES = (np.ndarray, numbers.Number) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - + # For IntegerArray inputs, we apply the ufunc to ._data + # and mask the result. out = kwargs.get('out', ()) for x in inputs + out: @@ -362,26 +363,33 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if result is not NotImplemented: return result - if (method == '__call__' - and ufunc.signature is None - and ufunc.nout == 1): - # only supports IntegerArray for now - args = [a._data for a in inputs] - masks = [a._mask for a in inputs] - result = ufunc(*args, **kwargs) - mask = np.logical_or.reduce(masks) - if result.dtype.kind in ('i', 'u'): - return IntegerArray(result, mask) + mask = np.zeros(len(self), dtype=bool) + inputs2 = [] + for x in inputs: + if isinstance(x, IntegerArray): + mask |= x._mask + inputs2.append(x._data) + else: + inputs2.append(x) + + def reconstruct(x): + if np.isscalar(x): + # reductions. + if mask.any(): + return np.nan + return x + if is_integer_dtype(x.dtype): + m = mask.copy() + return IntegerArray(x, m) else: - result[mask] = np.nan - return result - - # fall back to array for other ufuncs - inputs = tuple( - np.array(x) if isinstance(x, type(self)) else x - for x in inputs - ) - return getattr(ufunc, method)(*inputs, **kwargs) + x[mask] = np.nan + return x + + result = getattr(ufunc, method)(*inputs2, **kwargs) + if isinstance(result, tuple): + tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) def __iter__(self): for i in range(len(self)): diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 537ad533c412d..c4e12fe8d79e7 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -781,6 +781,10 @@ def test_ufunc_fallback(): assert pd.isna(np.add.reduce(a)) +def test_modf(): + pass + + # TODO(jreback) - these need testing / are broken # shift From d83fe7aa19f074d3d5c2fbfb7ffa675f63ad18d5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 21 Jun 2019 16:08:53 -0500 Subject: [PATCH 32/48] doc, types --- pandas/core/ops.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fbc2033e8ddec..a9e638a56e6ff 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,7 +6,7 @@ import datetime import operator import textwrap -from typing import Dict, Optional +from typing import Any, Callable, Dict, Optional, Tuple import warnings import numpy as np @@ -29,6 +29,7 @@ from pandas.core.dtypes.missing import isna, notna import pandas as pd +from pandas._typing import ArrayLike import pandas.core.common as com import pandas.core.missing as missing @@ -2362,8 +2363,32 @@ def wrapper(self, other): return wrapper -def maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs): +def maybe_dispatch_ufunc_to_dunder_op( + self: ArrayLike, + ufunc: Callable, + method: str, + *inputs: Tuple[Any], + **kwargs: Dict, +): + """ + Dispatch a ufunc to the equivalent dunder method. + + Parameters + ---------- + self : ArrayLike + The array whose dunder method we dispatch to + ufunc : Callable + A NumPy ufunc + method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'} + inputs : Tuple + kwargs : Dict + Returns + ------- + result : Any + The result of applying the ufunc + """ + # special has the ufuncs we dispatch to the dunder op on special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder'} aliases = { @@ -2382,6 +2407,7 @@ def maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs): 'greater_equal': 'ge', } + # For op(., Array) -> Array.__r{op}__ flipped = { 'lt': '__gt__', 'le': '__ge__', From edad466eb37646ea827405720491c8071921882f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 22 Jun 2019 15:03:12 -0500 Subject: [PATCH 33/48] compat --- pandas/core/ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index a9e638a56e6ff..c2d4bb53ab95e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,7 +6,7 @@ import datetime import operator import textwrap -from typing import Any, Callable, Dict, Optional, Tuple +from typing import Callable, Dict, Optional import warnings import numpy as np @@ -2367,8 +2367,8 @@ def maybe_dispatch_ufunc_to_dunder_op( self: ArrayLike, ufunc: Callable, method: str, - *inputs: Tuple[Any], - **kwargs: Dict, + *inputs, + **kwargs, ): """ Dispatch a ufunc to the equivalent dunder method. From e4ae8dc14ebec44f13e2d79ffe538665607d43fd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 23 Jun 2019 14:51:31 -0500 Subject: [PATCH 34/48] fixups --- pandas/core/ops.py | 4 ++-- pandas/core/series.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index c2d4bb53ab95e..c0bc71d986717 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1644,8 +1644,8 @@ def na_op(x, y): if isinstance(result, tuple): # e.g. divmod result = tuple( - missing.fill_zeros(x, x, y, op_name, fill_zeros) - for x in result + missing.fill_zeros(r, x, y, op_name, fill_zeros) + for r in result ) else: result = missing.fill_zeros(result, x, y, op_name, fill_zeros) diff --git a/pandas/core/series.py b/pandas/core/series.py index 09eb8a98da05f..edd9799ead34c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -701,7 +701,8 @@ def view(self, dtype=None): # ---------------------------------------------------------------------- # NDArray Compat - _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray, numbers.Number) + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray, numbers.Number, + list) # what other builtins? array? deque? ... def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # TODO: handle DataFrame From a9bd6effc16391e85e756a325fa8f13f3528bd6b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 24 Jun 2019 06:37:41 -0500 Subject: [PATCH 35/48] added matmul --- pandas/core/ops.py | 3 ++- pandas/tests/series/test_analytics.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index c0bc71d986717..cd04570c097aa 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2390,7 +2390,8 @@ def maybe_dispatch_ufunc_to_dunder_op( """ # special has the ufuncs we dispatch to the dunder op on special = {'add', 'sub', 'mul', 'pow', 'mod', 'floordiv', 'truediv', - 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder'} + 'divmod', 'eq', 'ne', 'lt', 'gt', 'le', 'ge', 'remainder', + 'matmul'} aliases = { 'subtract': 'sub', 'multiply': 'mul', diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index aed08b78fe640..df69bb35115cf 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -484,18 +484,18 @@ def test_matmul(self): b = DataFrame(np.random.randn(3, 4), index=['1', '2', '3'], columns=['p', 'q', 'r', 's']).T - # Series @ DataFrame + # Series @ DataFrame -> Series result = operator.matmul(a, b) expected = Series(np.dot(a.values, b.values), index=['1', '2', '3']) assert_series_equal(result, expected) - # DataFrame @ Series + # DataFrame @ Series -> Series result = operator.matmul(b.T, a) expected = Series(np.dot(b.T.values, a.T.values), index=['1', '2', '3']) assert_series_equal(result, expected) - # Series @ Series + # Series @ Series -> scalar result = operator.matmul(a, a) expected = np.dot(a.values, a.values) assert_almost_equal(result, expected) From 1a8b8072c4d0c3bc6b1f1f45a928feaba0d93060 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 24 Jun 2019 08:00:53 -0500 Subject: [PATCH 36/48] start docs --- doc/source/user_guide/computation.rst | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 71cbf58dff871..d679b385b1884 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -5,6 +5,53 @@ Computational tools =================== +.. _computation.ufuncs: + +Universal Functions +------------------- + +:class:`Series` implements ``__array_ufunc__``, which allows it to work with NumPy's +`universal functions `_ +to be applied. + +The ufunc is applied to the underlying array in a Series. + +.. ipython:: python + + ser = pd.Series([1, 2, 3, 4]) + np.exp(ser) + +Like other parts of of the library, pandas will automatically align labeled inputs +as part of a ufunc with multiple inputs. For example, using :meth:`numpy.remainder` +on two :class:`Series` with differently ordered labels will align before the operation. + +.. ipython:: python + + ser1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + ser2 = pd.Series([1, 3, 5], index=['b', 'a', 'c']) + ser1 + ser2 + np.remainder(ser1, ser2) + +As usual, the union of the two indices is taken, and non-overlapping values are filled +with missing values. + +.. ipython:: python + + ser3 = pd.Series([2, 4, 6], index=['b', 'c', 'd']) + ser3 + np.remainder(ser1, ser3) + +When a binary ufunc is applied to a :class:`Series` and :class:`Index`, the Series +implementation takes precedence and a Series is returned. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + idx = pd.Index([4, 5, 6]) + + np.maximum(ser, idx) + Statistical Functions --------------------- From 0b0466dfbb584147ee0c2d9ae992ea0c631f7f9d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 24 Jun 2019 08:40:53 -0500 Subject: [PATCH 37/48] compat --- pandas/core/ops.py | 2 +- pandas/tests/series/test_ufunc.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index cd04570c097aa..d90948e969373 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2368,7 +2368,7 @@ def maybe_dispatch_ufunc_to_dunder_op( ufunc: Callable, method: str, *inputs, - **kwargs, + **kwargs ): """ Dispatch a ufunc to the equivalent dunder method. diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index fcae7b4c1f422..34617283f690b 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -41,7 +41,7 @@ def test_unary_ufunc(ufunc, sparse): array = np.random.randint(0, 10, 10, dtype='int64') array[::2] = 0 if sparse: - array = pd.SparseArray(array, dtype=pd.SparseDtype('int', 0)) + array = pd.SparseArray(array, dtype=pd.SparseDtype('int64', 0)) index = list(string.ascii_letters[:10]) name = "name" @@ -59,8 +59,8 @@ def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): # Test that ufunc(Series(a), array) == Series(ufunc(a, b)) a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -87,8 +87,8 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): # * ufunc(Index, Series) dispatches to Series (returns a Series) a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -118,8 +118,8 @@ def test_binary_ufunc_with_series(flip, shuffle, sparse, ufunc, # with alignment between the indices a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -192,8 +192,8 @@ def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, a2[a2 == 0] = 1 if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) + a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int64', 0)) + a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int64', 0)) s1 = pd.Series(a1) s2 = pd.Series(a2) From d3089bdb3df26a9aa68fca6e4048acbb89ae1c3b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Jun 2019 09:44:45 -0500 Subject: [PATCH 38/48] ignore for numpydev --- pandas/tests/arithmetic/test_datetime64.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 64b4e162483f1..3e591fba279e5 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -657,6 +657,10 @@ def test_comparison_tzawareness_compat(self, op, box_with_array): @pytest.mark.parametrize('other', [datetime(2016, 1, 1), Timestamp('2016-01-01'), np.datetime64('2016-01-01')]) + # Bug in NumPy? https://github.com/numpy/numpy/issues/13841 + # Raising in __eq__ will fallback to NumPy, which warns, fails, + # then re-raises the original exception. So we just need to ignore. + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture, box_with_array): tz = tz_aware_fixture From 15a3fb10d45644d9544f891c7adccb9e11222026 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Jun 2019 11:55:13 -0500 Subject: [PATCH 39/48] handle reduce --- doc/source/user_guide/computation.rst | 5 ++--- pandas/core/series.py | 6 ++++-- pandas/tests/series/test_ufunc.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index d679b385b1884..eaf727bf8a425 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -11,8 +11,7 @@ Universal Functions ------------------- :class:`Series` implements ``__array_ufunc__``, which allows it to work with NumPy's -`universal functions `_ -to be applied. +`universal functions `_. The ufunc is applied to the underlying array in a Series. @@ -21,7 +20,7 @@ The ufunc is applied to the underlying array in a Series. ser = pd.Series([1, 2, 3, 4]) np.exp(ser) -Like other parts of of the library, pandas will automatically align labeled inputs +Like other parts of the library, pandas will automatically align labeled inputs as part of a ufunc with multiple inputs. For example, using :meth:`numpy.remainder` on two :class:`Series` with differently ordered labels will align before the operation. diff --git a/pandas/core/series.py b/pandas/core/series.py index 6548eb68d5261..7b0114a73849c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -726,8 +726,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): index = alignable[0].index for s in alignable[1:]: index |= s.index - inputs = [x.reindex(index) for x, t in zip(inputs, types) - if issubclass(t, Series)] + inputs = [x.reindex(index) if issubclass(t, Series) else x + for x, t in zip(inputs, types)] else: index = self.index @@ -754,6 +754,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): name = None def construct_return(result): + if lib.is_scalar(result): + return result return self._constructor(result, index=index, name=name, diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 34617283f690b..6bb21d2f69688 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -257,3 +257,19 @@ def __add__(self, other): tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1)))) + + +@pytest.mark.parametrize('values', [ + pd.array([1, 3, 2]), + pytest.param( + pd.array([1, 10, 0], dtype='Sparse[int]'), + marks=pytest.mark.xfail(resason='GH-27080. Bug in SparseArray') + ), + pd.to_datetime(['2000', '2010', '2001']), + pd.to_datetime(['2000', '2010', '2001']).tz_localize("CET"), + pd.to_datetime(['2000', '2010', '2001']).to_period(freq="D"), + +]) +def test_reduce(values): + a = pd.Series(values) + assert np.maximum.reduce(a) == values[1] From 4f4bd93bbfcbc7279d7bc8e828ac307a5c1c6b30 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Jun 2019 16:43:23 -0500 Subject: [PATCH 40/48] update --- pandas/core/series.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7b0114a73849c..816fbcf4b46ee 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -722,7 +722,9 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if len(alignable) > 1: # This triggers alignment. - # TODO: I'm sure there's a better way to get the expected index. + # At the moment, there aren't any ufuncs with more than two inputs + # so this ends up just being x1.index | x2.index, but we write + # it to handle *args. index = alignable[0].index for s in alignable[1:]: index |= s.index From 5dbff4904eddeab867bb3568005ad0151d934975 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 29 Jun 2019 09:54:36 -0500 Subject: [PATCH 41/48] fixups --- doc/source/development/extending.rst | 22 ++++++++++++ doc/source/getting_started/dsintro.rst | 50 +++++++++++++++++++++----- doc/source/user_guide/computation.rst | 45 ----------------------- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/arrays/base.py | 11 ++++++ pandas/core/ops.py | 12 ++++--- pandas/core/series.py | 11 ++++-- pandas/tests/series/test_ufunc.py | 15 ++++++++ 8 files changed, 107 insertions(+), 60 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 363ec10d58bb6..300a6b81d37ed 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -208,6 +208,28 @@ will 2. call ``result = op(values, ExtensionArray)`` 3. re-box the result in a ``Series`` +.. _extending.extension.ufunc: + +NumPy Universal Functions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Series` implements ``__array_ufunc__``. As part of the implementation, +pandas unboxes the ``ExtensionArray`` from the :class:`Series`, applies the ufunc, +and re-boxes it if necessary. + +If applicable, we highly recommend that your implement ``__array_ufunc__`` in your +extension array to avoid coercion to an ndarray. See +`the numpy documentation `__ +for an example. + +As part of your implementation, we require that you + +1. Define a ``_HANDLED_TYPES`` attribute, a tuple, containing the types your + array can handle +2. Defer to the :class:`Series` implementatio by returning ``NotImplemented`` + if there are any :class:`Series` in the ``types``. This ensures consistent + metadata handling, and associativity for binary operations. + .. _extending.extension.testing: Testing extension arrays diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst index 914c55115567a..33e5d390447d7 100644 --- a/doc/source/getting_started/dsintro.rst +++ b/doc/source/getting_started/dsintro.rst @@ -731,28 +731,62 @@ DataFrame interoperability with NumPy functions .. _dsintro.numpy_interop: Elementwise NumPy ufuncs (log, exp, sqrt, ...) and various other NumPy functions -can be used with no issues on DataFrame, assuming the data within are numeric: +can be used with no issues on Series and DataFrame, assuming the data within +are numeric: .. ipython:: python np.exp(df) np.asarray(df) -The dot method on DataFrame implements matrix multiplication: +DataFrame is not intended to be a drop-in replacement for ndarray as its +indexing semantics and data model are quite different in places from an n-dimensional +array. + +:class:`Series` implements ``__array_ufunc__``, which allows it to work with NumPy's +`universal functions `_. + +The ufunc is applied to the underlying array in a Series. .. ipython:: python - df.T.dot(df) + ser = pd.Series([1, 2, 3, 4]) + np.exp(ser) -Similarly, the dot method on Series implements dot product: +Like other parts of the library, pandas will automatically align labeled inputs +as part of a ufunc with multiple inputs. For example, using :meth:`numpy.remainder` +on two :class:`Series` with differently ordered labels will align before the operation. .. ipython:: python - s1 = pd.Series(np.arange(5, 10)) - s1.dot(s1) + ser1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + ser2 = pd.Series([1, 3, 5], index=['b', 'a', 'c']) + ser1 + ser2 + np.remainder(ser1, ser2) -DataFrame is not intended to be a drop-in replacement for ndarray as its -indexing semantics are quite different in places from a matrix. +As usual, the union of the two indices is taken, and non-overlapping values are filled +with missing values. + +.. ipython:: python + + ser3 = pd.Series([2, 4, 6], index=['b', 'c', 'd']) + ser3 + np.remainder(ser1, ser3) + +When a binary ufunc is applied to a :class:`Series` and :class:`Index`, the Series +implementation takes precedence and a Series is returned. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + idx = pd.Index([4, 5, 6]) + + np.maximum(ser, idx) + +NumPy ufuncs are safe to apply to :class:`Series` backed by non-ndarray arrays, +for example :class:`SparseArray` (see :ref:`sparse.calculation`). If possible, +the ufunc is applied without converting the underlying data to an ndarray. Console display ~~~~~~~~~~~~~~~ diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 0c8ae58e3f142..4f44fcaab63d4 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -5,51 +5,6 @@ Computational tools =================== -.. _computation.ufuncs: - -Universal Functions -------------------- - -:class:`Series` implements ``__array_ufunc__``, which allows it to work with NumPy's -`universal functions `_. - -The ufunc is applied to the underlying array in a Series. - -.. ipython:: python - - ser = pd.Series([1, 2, 3, 4]) - np.exp(ser) - -Like other parts of the library, pandas will automatically align labeled inputs -as part of a ufunc with multiple inputs. For example, using :meth:`numpy.remainder` -on two :class:`Series` with differently ordered labels will align before the operation. - -.. ipython:: python - - ser1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) - ser2 = pd.Series([1, 3, 5], index=['b', 'a', 'c']) - ser1 - ser2 - np.remainder(ser1, ser2) - -As usual, the union of the two indices is taken, and non-overlapping values are filled -with missing values. - -.. ipython:: python - - ser3 = pd.Series([2, 4, 6], index=['b', 'c', 'd']) - ser3 - np.remainder(ser1, ser3) - -When a binary ufunc is applied to a :class:`Series` and :class:`Index`, the Series -implementation takes precedence and a Series is returned. - -.. ipython:: python - - ser = pd.Series([1, 2, 3]) - idx = pd.Index([4, 5, 6]) - - np.maximum(ser, idx) Statistical functions --------------------- diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index c4592dc4918a5..c9cbe14562995 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -811,6 +811,7 @@ ExtensionArray - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). - :meth:`Series.count` miscounts NA values in ExtensionArrays (:issue:`26835`) - Keyword argument ``deep`` has been removed from :method:`ExtensionArray.copy` (:issue:`27083`) +- Added ``Series.__array_ufunc__`` to better handle NumPy ufuncs applied to Series backed by extension arrays (:issue:`23293`). Other ^^^^^ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6340cc732d6c1..0762a607f20ae 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -107,6 +107,17 @@ class ExtensionArray: attributes called ``.values`` or ``._values`` to ensure full compatibility with pandas internals. But other names as ``.data``, ``._data``, ``._items``, ... can be freely used. + + If implementing NumPy's ``__array_ufunc__`` interface, pandas expects + that + + 1. You defer by raising ``NotImplemented`` when any Series are present + in `inputs`. Pandas will extract the arrays and call the ufunc again. + 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. + Pandas inspect this to determine whether the ufunc is valid for the + types present. + + See :ref:`extending.extension.ufunc` for more. """ # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2c8f5adc863c3..bcab41e589b41 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,7 +6,7 @@ import datetime import operator import textwrap -from typing import Callable, Dict, Optional +from typing import Any, Callable, Dict, Optional import warnings import numpy as np @@ -2363,8 +2363,8 @@ def maybe_dispatch_ufunc_to_dunder_op( self: ArrayLike, ufunc: Callable, method: str, - *inputs, - **kwargs + *inputs: ArrayLike, + **kwargs: Any, ): """ Dispatch a ufunc to the equivalent dunder method. @@ -2376,8 +2376,10 @@ def maybe_dispatch_ufunc_to_dunder_op( ufunc : Callable A NumPy ufunc method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'} - inputs : Tuple - kwargs : Dict + inputs : ArrayLike + The input arrays. + kwargs : Any + The additional keyword arguments, e.g. ``out``. Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 816fbcf4b46ee..0031b2ba35c4b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6,6 +6,7 @@ import numbers from shutil import get_terminal_size from textwrap import dedent +from typing import Any, Callable import warnings import numpy as np @@ -704,7 +705,13 @@ def view(self, dtype=None): _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray, numbers.Number, list) # what other builtins? array? deque? ... - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + def __array_ufunc__( + self, + ufunc: Callable, + method: str, + *inputs: Any, + **kwargs: Any + ): # TODO: handle DataFrame from pandas.core.internals.construction import extract_array @@ -735,7 +742,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # dtype check: can we do this, given the inputs? # It's expected that the following classes defer to us when - # any Series is present in inputes. + # any Series is present in inputs. # 1. Index. # 2. ExtensionArray. diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 6bb21d2f69688..af67ae02b0940 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -1,3 +1,4 @@ +from collections import deque import string import numpy as np @@ -273,3 +274,17 @@ def __add__(self, other): def test_reduce(values): a = pd.Series(values) assert np.maximum.reduce(a) == values[1] + + +@pytest.mark.parametrize('type_', [ + list, + deque, + tuple, +]) +def test_binary_ufunc_other_types(type_): + a = pd.Series([1, 2, 3], name='name') + b = type_([3, 4, 5]) + + result = np.add(a, b) + expected = pd.Series(np.add(a.to_numpy(), b), name='name') + tm.assert_series_equal(result, expected) From 22372333e518baf169e192270eac39b67c399463 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 29 Jun 2019 10:56:34 -0500 Subject: [PATCH 42/48] raise for reduce --- pandas/core/arrays/integer.py | 3 +++ pandas/tests/arrays/test_integer.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index ad013f9985920..5a0ca05f74ed5 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -350,6 +350,9 @@ def __array__(self, dtype=None): def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # For IntegerArray inputs, we apply the ufunc to ._data # and mask the result. + if method == 'reduce': + # Not clear how to handle missing values in reductions. Raise. + raise NotImplementedError("The 'reduce' method is not supported.") out = kwargs.get('out', ()) for x in inputs + out: diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 4456372898658..e9e5bc772578b 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -781,8 +781,13 @@ def test_ufunc_fallback(): assert pd.isna(np.add.reduce(a)) -def test_modf(): - pass +@pytest.mark.parametrize('values', [ + [0, 1], [0, None] +]) +def test_ufunc_reduce_raises(values): + a = integer_array(values) + with pytest.raises(NotImplementedError): + np.add.reduce(a) # TODO(jreback) - these need testing / are broken From 5b5c547e63685f241fa4177c8f508bcabfc87728 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 29 Jun 2019 12:12:20 -0500 Subject: [PATCH 43/48] more tests --- pandas/tests/extension/decimal/array.py | 23 +++++++++++ .../tests/extension/decimal/test_decimal.py | 38 ++++++++++++++++++- pandas/tests/series/test_ufunc.py | 22 +++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2b1bb53e962be..d097a599730b8 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -84,6 +84,29 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): def _from_factorized(cls, values, original): return cls(values) + _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # + if not all(isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) + for t in inputs): + return NotImplemented + + inputs = tuple(x._data if isinstance(x, DecimalArray) else x + for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + + def reconstruct(x): + if isinstance(x, (decimal.Decimal, numbers.Number)): + return x + else: + return DecimalArray._from_sequence(x) + + if isinstance(result, tuple): + return tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + def __getitem__(self, item): if isinstance(item, numbers.Integral): return self._data[item] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index dfbea93b57c09..80885e4045e64 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -394,7 +394,7 @@ def test_ufunc_fallback(data): a = data[:5] s = pd.Series(a, index=range(3, 8)) result = np.abs(s) - expected = pd.Series(np.abs(a.astype(object)), index=range(3, 8)) + expected = pd.Series(np.abs(a), index=range(3, 8)) tm.assert_series_equal(result, expected) @@ -408,3 +408,39 @@ def _formatting_values(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): repr(ser) + + +def test_array_ufunc(): + a = to_decimal([1, 2, 3]) + result = np.exp(a) + expected = to_decimal(np.exp(a._data)) + tm.assert_extension_array_equal(result, expected) + + +def test_array_ufunc_series(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.exp(s) + expected = pd.Series(to_decimal(np.exp(a._data))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_scalar_other(): + # check _HANDLED_TYPES + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.add(s, decimal.Decimal(1)) + expected = pd.Series(np.add(a, decimal.Decimal(1))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_defer(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + + expected = pd.Series(to_decimal([2, 4, 6])) + r1 = np.add(s, a) + r2 = np.add(a, s) + + tm.assert_series_equal(r1, expected) + tm.assert_series_equal(r2, expected) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index af67ae02b0940..1a0eeb51c4921 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -288,3 +288,25 @@ def test_binary_ufunc_other_types(type_): result = np.add(a, b) expected = pd.Series(np.add(a.to_numpy(), b), name='name') tm.assert_series_equal(result, expected) + + +def test_object_dtype_ok(): + + class Thing: + def __init__(self, value): + self.value = value + + def __add__(self, other): + other = getattr(other, 'value', other) + return type(self)(self.value + other) + + def __eq__(self, other): + return type(other) is Thing and self.value == other.value + + def __repr__(self): + return 'Thing({})'.format(self.value) + + s = pd.Series([Thing(1), Thing(2)]) + result = np.add(s, Thing(1)) + expected = pd.Series([Thing(2), Thing(3)]) + tm.assert_series_equal(result, expected) From 10bc2cc98fd143d43ba62876a3de21ceb1dc1186 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 29 Jun 2019 12:15:11 -0500 Subject: [PATCH 44/48] more tests --- pandas/core/series.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4fabe4fa5554e..bd66c6280b9f3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -739,8 +739,8 @@ def __array_ufunc__( index = alignable[0].index for s in alignable[1:]: index |= s.index - inputs = [x.reindex(index) if issubclass(t, Series) else x - for x, t in zip(inputs, types)] + inputs = tuple(x.reindex(index) if issubclass(t, Series) else x + for x, t in zip(inputs, types)) else: index = self.index @@ -762,7 +762,7 @@ def __array_ufunc__( result = getattr(ufunc, method)(*inputs, **kwargs) if len(set(names)) == 1: # we require names to be hashable, right? - name = names[0] + name = names[0] # type: Any else: name = None From 5380b77ba9b283e87307f93412210dda126710f1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 29 Jun 2019 13:10:54 -0500 Subject: [PATCH 45/48] 35 compat --- pandas/core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index bcab41e589b41..8b5565febee9e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2364,7 +2364,7 @@ def maybe_dispatch_ufunc_to_dunder_op( ufunc: Callable, method: str, *inputs: ArrayLike, - **kwargs: Any, + **kwargs: Any ): """ Dispatch a ufunc to the equivalent dunder method. From 9f4d1101794d1a1ce1f1bf31718040d40280461f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 29 Jun 2019 21:36:18 -0500 Subject: [PATCH 46/48] remove old test --- pandas/tests/arrays/test_integer.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index e9e5bc772578b..fb62a90a6007e 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -776,11 +776,6 @@ def test_ufuncs_binary_int(ufunc): tm.assert_extension_array_equal(result, expected) -def test_ufunc_fallback(): - a = integer_array([1, 2, -3, np.nan]) - assert pd.isna(np.add.reduce(a)) - - @pytest.mark.parametrize('values', [ [0, 1], [0, None] ]) From ab48bd820687fb04aaeec5027514b902e3af2910 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 Jul 2019 13:09:47 -0500 Subject: [PATCH 47/48] fixup --- doc/source/development/extending.rst | 10 +++----- pandas/core/series.py | 34 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 300a6b81d37ed..b26f9a12a1601 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -222,13 +222,9 @@ extension array to avoid coercion to an ndarray. See `the numpy documentation `__ for an example. -As part of your implementation, we require that you - -1. Define a ``_HANDLED_TYPES`` attribute, a tuple, containing the types your - array can handle -2. Defer to the :class:`Series` implementatio by returning ``NotImplemented`` - if there are any :class:`Series` in the ``types``. This ensures consistent - metadata handling, and associativity for binary operations. +As part of your implementation, we require that you defer to ``Series.__array_ufunc__`` +by returning ``NotImplemented`` when a Series is detected in ``inputs``. Pandas will +extract the array from the Series and re-call the ufunc on the unwrapped inputs. .. _extending.extension.testing: diff --git a/pandas/core/series.py b/pandas/core/series.py index a0d9c2b245dbd..d0bfbc49e98ed 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3,7 +3,6 @@ """ from collections import OrderedDict from io import StringIO -import numbers from shutil import get_terminal_size from textwrap import dedent from typing import Any, Callable @@ -706,8 +705,6 @@ def view(self, dtype=None): # ---------------------------------------------------------------------- # NDArray Compat - _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray, numbers.Number, - list) # what other builtins? array? deque? ... def __array_ufunc__( self, @@ -718,6 +715,7 @@ def __array_ufunc__( ): # TODO: handle DataFrame from pandas.core.internals.construction import extract_array + cls = type(self) # for binary ops, use our custom dunder methods result = ops.maybe_dispatch_ufunc_to_dunder_op( @@ -725,6 +723,22 @@ def __array_ufunc__( if result is not NotImplemented: return result + # Determine if we should defer. + no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__) + + for item in inputs: + higher_priority = ( + hasattr(item, '__array_priority__') and + item.__array_priority__ > self.__array_priority__ + ) + has_array_ufunc = ( + hasattr(item, '__array_ufunc__') and + type(item).__array_ufunc__ not in no_defer and + not is_extension_array_dtype(item) + ) + if higher_priority or has_array_ufunc: + return NotImplemented + # align all the inputs. names = [getattr(x, 'name') for x in inputs if hasattr(x, 'name')] types = tuple(type(x) for x in inputs) @@ -744,21 +758,7 @@ def __array_ufunc__( else: index = self.index - # dtype check: can we do this, given the inputs? - # It's expected that the following classes defer to us when - # any Series is present in inputs. - # 1. Index. - # 2. ExtensionArray. - inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) - handled_types = sum([getattr(x, '_HANDLED_TYPES', ()) for x in inputs], - self._HANDLED_TYPES + (Series,)) - any_object = any(getattr(x, 'dtype', None) == 'object' for x in inputs) - # defer when an unknown object and not object dtype. - if (not all(isinstance(t, handled_types) for t in inputs) and - not any_object): - return NotImplemented - result = getattr(ufunc, method)(*inputs, **kwargs) if len(set(names)) == 1: # we require names to be hashable, right? From 7486d260178176f9e9674c8036bc3063a067b24a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 Jul 2019 13:47:32 -0500 Subject: [PATCH 48/48] Fixups --- doc/source/development/extending.rst | 9 +++++---- pandas/core/arrays/integer.py | 8 +++----- pandas/core/series.py | 3 ++- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index b26f9a12a1601..12af80f1bce80 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -217,14 +217,15 @@ NumPy Universal Functions pandas unboxes the ``ExtensionArray`` from the :class:`Series`, applies the ufunc, and re-boxes it if necessary. -If applicable, we highly recommend that your implement ``__array_ufunc__`` in your +If applicable, we highly recommend that you implement ``__array_ufunc__`` in your extension array to avoid coercion to an ndarray. See `the numpy documentation `__ for an example. -As part of your implementation, we require that you defer to ``Series.__array_ufunc__`` -by returning ``NotImplemented`` when a Series is detected in ``inputs``. Pandas will -extract the array from the Series and re-call the ufunc on the unwrapped inputs. +As part of your implementation, we require that you defer to pandas when a pandas +container (:class:`Series`, :class:`DataFrame`, :class:`Index`) is detected in ``inputs``. +If any of those is present, you should return ``NotImplemented``. Pandas will take care of +unboxing the array from the container and re-calling the ufunc with the unwrapped input. .. _extending.extension.testing: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 5a0ca05f74ed5..644c2f634240f 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -375,11 +375,9 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): inputs2.append(x) def reconstruct(x): - if np.isscalar(x): - # reductions. - if mask.any(): - return np.nan - return x + # we don't worry about scalar `x` here, since we + # raise for reduce up above. + if is_integer_dtype(x.dtype): m = mask.copy() return IntegerArray(x, m) diff --git a/pandas/core/series.py b/pandas/core/series.py index ba94886909fdf..9179099562832 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -715,6 +715,7 @@ def view(self, dtype=None): # ---------------------------------------------------------------------- # NDArray Compat + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) def __array_ufunc__( self, @@ -744,7 +745,7 @@ def __array_ufunc__( has_array_ufunc = ( hasattr(item, '__array_ufunc__') and type(item).__array_ufunc__ not in no_defer and - not is_extension_array_dtype(item) + not isinstance(item, self._HANDLED_TYPES) ) if higher_priority or has_array_ufunc: return NotImplemented