Skip to content

Commit c6eb725

Browse files
Implement DataFrame.__array_ufunc__ (#36955)
1 parent 15b8898 commit c6eb725

File tree

8 files changed

+269
-80
lines changed

8 files changed

+269
-80
lines changed

doc/source/whatsnew/v1.2.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ Other enhancements
238238
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
239239
-
240240
- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
241+
- Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`).
242+
- Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`).
241243
- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
242244
- :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`)
243245
- Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`).
@@ -470,6 +472,7 @@ Deprecations
470472
- The default value of ``regex`` for :meth:`Series.str.replace` will change from ``True`` to ``False`` in a future release. In addition, single character regular expressions will *not* be treated as literal strings when ``regex=True`` is set. (:issue:`24804`)
471473
- Deprecated automatic alignment on comparison operations between :class:`DataFrame` and :class:`Series`, do ``frame, ser = frame.align(ser, axis=1, copy=False)`` before e.g. ``frame == ser`` (:issue:`28759`)
472474
- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)
475+
- Using "outer" ufuncs on DataFrames to return 4d ndarray is now deprecated. Convert to an ndarray first (:issue:`23743`)
473476
- Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
474477
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
475478
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
@@ -751,6 +754,7 @@ Other
751754

752755
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
753756
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
757+
- Fixed metadata propagation in :meth:`Series.abs` and ufuncs called on Series and DataFrames (:issue:`28283`)
754758
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly casting from ``PeriodDtype`` to object dtype (:issue:`34871`)
755759
- Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`)
756760
- Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`) (:issue:`37381`)

pandas/core/arraylike.py

+143-1
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@
55
ExtensionArray
66
"""
77
import operator
8+
from typing import Any, Callable
9+
import warnings
810

9-
from pandas.core.ops import roperator
11+
import numpy as np
12+
13+
from pandas._libs import lib
14+
15+
from pandas.core.construction import extract_array
16+
from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op, roperator
1017
from pandas.core.ops.common import unpack_zerodim_and_defer
1118

1219

@@ -140,3 +147,138 @@ def __pow__(self, other):
140147
@unpack_zerodim_and_defer("__rpow__")
141148
def __rpow__(self, other):
142149
return self._arith_method(other, roperator.rpow)
150+
151+
152+
def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any):
153+
"""
154+
Compatibility with numpy ufuncs.
155+
156+
See also
157+
--------
158+
numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
159+
"""
160+
from pandas.core.generic import NDFrame
161+
from pandas.core.internals import BlockManager
162+
163+
cls = type(self)
164+
165+
# for binary ops, use our custom dunder methods
166+
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
167+
if result is not NotImplemented:
168+
return result
169+
170+
# Determine if we should defer.
171+
no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
172+
173+
for item in inputs:
174+
higher_priority = (
175+
hasattr(item, "__array_priority__")
176+
and item.__array_priority__ > self.__array_priority__
177+
)
178+
has_array_ufunc = (
179+
hasattr(item, "__array_ufunc__")
180+
and type(item).__array_ufunc__ not in no_defer
181+
and not isinstance(item, self._HANDLED_TYPES)
182+
)
183+
if higher_priority or has_array_ufunc:
184+
return NotImplemented
185+
186+
# align all the inputs.
187+
types = tuple(type(x) for x in inputs)
188+
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
189+
190+
if len(alignable) > 1:
191+
# This triggers alignment.
192+
# At the moment, there aren't any ufuncs with more than two inputs
193+
# so this ends up just being x1.index | x2.index, but we write
194+
# it to handle *args.
195+
196+
if len(set(types)) > 1:
197+
# We currently don't handle ufunc(DataFrame, Series)
198+
# well. Previously this raised an internal ValueError. We might
199+
# support it someday, so raise a NotImplementedError.
200+
raise NotImplementedError(
201+
"Cannot apply ufunc {} to mixed DataFrame and Series "
202+
"inputs.".format(ufunc)
203+
)
204+
axes = self.axes
205+
for obj in alignable[1:]:
206+
# this relies on the fact that we aren't handling mixed
207+
# series / frame ufuncs.
208+
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
209+
axes[i] = ax1.union(ax2)
210+
211+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
212+
inputs = tuple(
213+
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
214+
for x, t in zip(inputs, types)
215+
)
216+
else:
217+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
218+
219+
if self.ndim == 1:
220+
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
221+
name = names[0] if len(set(names)) == 1 else None
222+
reconstruct_kwargs = {"name": name}
223+
else:
224+
reconstruct_kwargs = {}
225+
226+
def reconstruct(result):
227+
if lib.is_scalar(result):
228+
return result
229+
if result.ndim != self.ndim:
230+
if method == "outer":
231+
if self.ndim == 2:
232+
# we already deprecated for Series
233+
msg = (
234+
"outer method for ufunc {} is not implemented on "
235+
"pandas objects. Returning an ndarray, but in the "
236+
"future this will raise a 'NotImplementedError'. "
237+
"Consider explicitly converting the DataFrame "
238+
"to an array with '.to_numpy()' first."
239+
)
240+
warnings.warn(msg.format(ufunc), FutureWarning, stacklevel=4)
241+
return result
242+
raise NotImplementedError
243+
return result
244+
if isinstance(result, BlockManager):
245+
# we went through BlockManager.apply
246+
result = self._constructor(result, **reconstruct_kwargs, copy=False)
247+
else:
248+
# we converted an array, lost our axes
249+
result = self._constructor(
250+
result, **reconstruct_axes, **reconstruct_kwargs, copy=False
251+
)
252+
# TODO: When we support multiple values in __finalize__, this
253+
# should pass alignable to `__fianlize__` instead of self.
254+
# Then `np.add(a, b)` would consider attrs from both a and b
255+
# when a and b are NDFrames.
256+
if len(alignable) == 1:
257+
result = result.__finalize__(self)
258+
return result
259+
260+
if self.ndim > 1 and (
261+
len(inputs) > 1 or ufunc.nout > 1 # type: ignore[attr-defined]
262+
):
263+
# Just give up on preserving types in the complex case.
264+
# In theory we could preserve them for them.
265+
# * nout>1 is doable if BlockManager.apply took nout and
266+
# returned a Tuple[BlockManager].
267+
# * len(inputs) > 1 is doable when we know that we have
268+
# aligned blocks / dtypes.
269+
inputs = tuple(np.asarray(x) for x in inputs)
270+
result = getattr(ufunc, method)(*inputs)
271+
elif self.ndim == 1:
272+
# ufunc(series, ...)
273+
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
274+
result = getattr(ufunc, method)(*inputs, **kwargs)
275+
else:
276+
# ufunc(dataframe)
277+
mgr = inputs[0]._mgr
278+
result = mgr.apply(getattr(ufunc, method))
279+
280+
if ufunc.nout > 1: # type: ignore[attr-defined]
281+
result = tuple(reconstruct(x) for x in result)
282+
else:
283+
result = reconstruct(result)
284+
return result

pandas/core/frame.py

+1
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ class DataFrame(NDFrame, OpsMixin):
434434

435435
_internal_names_set = {"columns", "index"} | NDFrame._internal_names_set
436436
_typ = "dataframe"
437+
_HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray)
437438

438439
@property
439440
def _constructor(self) -> Type[DataFrame]:

pandas/core/generic.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
from pandas.core.dtypes.missing import isna, notna
8888

8989
import pandas as pd
90-
from pandas.core import indexing, missing, nanops
90+
from pandas.core import arraylike, indexing, missing, nanops
9191
import pandas.core.algorithms as algos
9292
from pandas.core.base import PandasObject, SelectionMixin
9393
import pandas.core.common as com
@@ -1927,6 +1927,11 @@ def __array_wrap__(
19271927
self, method="__array_wrap__"
19281928
)
19291929

1930+
def __array_ufunc__(
1931+
self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
1932+
):
1933+
return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs)
1934+
19301935
# ideally we would define this to avoid the getattr checks, but
19311936
# is slower
19321937
# @property

pandas/core/series.py

+1-75
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
176176
"""
177177

178178
_typ = "series"
179+
_HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
179180

180181
_name: Label
181182
_metadata: List[str] = ["name"]
@@ -683,81 +684,6 @@ def view(self, dtype=None) -> "Series":
683684
# NDArray Compat
684685
_HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
685686

686-
def __array_ufunc__(
687-
self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
688-
):
689-
# TODO: handle DataFrame
690-
cls = type(self)
691-
692-
# for binary ops, use our custom dunder methods
693-
result = ops.maybe_dispatch_ufunc_to_dunder_op(
694-
self, ufunc, method, *inputs, **kwargs
695-
)
696-
if result is not NotImplemented:
697-
return result
698-
699-
# Determine if we should defer.
700-
no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
701-
702-
for item in inputs:
703-
higher_priority = (
704-
hasattr(item, "__array_priority__")
705-
and item.__array_priority__ > self.__array_priority__
706-
)
707-
has_array_ufunc = (
708-
hasattr(item, "__array_ufunc__")
709-
and type(item).__array_ufunc__ not in no_defer
710-
and not isinstance(item, self._HANDLED_TYPES)
711-
)
712-
if higher_priority or has_array_ufunc:
713-
return NotImplemented
714-
715-
# align all the inputs.
716-
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
717-
types = tuple(type(x) for x in inputs)
718-
# TODO: dataframe
719-
alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)]
720-
721-
if len(alignable) > 1:
722-
# This triggers alignment.
723-
# At the moment, there aren't any ufuncs with more than two inputs
724-
# so this ends up just being x1.index | x2.index, but we write
725-
# it to handle *args.
726-
index = alignable[0].index
727-
for s in alignable[1:]:
728-
index = index.union(s.index)
729-
inputs = tuple(
730-
x.reindex(index) if issubclass(t, Series) else x
731-
for x, t in zip(inputs, types)
732-
)
733-
else:
734-
index = self.index
735-
736-
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
737-
result = getattr(ufunc, method)(*inputs, **kwargs)
738-
739-
name = names[0] if len(set(names)) == 1 else None
740-
741-
def construct_return(result):
742-
if lib.is_scalar(result):
743-
return result
744-
elif result.ndim > 1:
745-
# e.g. np.subtract.outer
746-
if method == "outer":
747-
# GH#27198
748-
raise NotImplementedError
749-
return result
750-
return self._constructor(result, index=index, name=name, copy=False)
751-
752-
if type(result) is tuple:
753-
# multiple return values
754-
return tuple(construct_return(x) for x in result)
755-
elif method == "at":
756-
# no return value
757-
return None
758-
else:
759-
return construct_return(result)
760-
761687
def __array__(self, dtype=None) -> np.ndarray:
762688
"""
763689
Return the values as a NumPy array.

0 commit comments

Comments
 (0)